1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
32 #include "fold-const.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
38 #include "insn-config.h"
39 #include "conditions.h"
41 #include "insn-attr.h"
50 #include "insn-codes.h"
57 #include "cfgcleanup.h"
59 #include "diagnostic-core.h"
60 #include "sched-int.h"
63 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "internal-fn.h"
67 #include "gimple-fold.h"
74 #include "tm-constrs.h"
75 #include "sel-sched.h"
81 /* This file should be included last. */
82 #include "target-def.h"
84 /* This is used for communication between ASM_OUTPUT_LABEL and
85 ASM_OUTPUT_LABELREF. */
86 int ia64_asm_output_label
= 0;
88 /* Register names for ia64_expand_prologue. */
89 static const char * const ia64_reg_numbers
[96] =
90 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
91 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
92 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
93 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
94 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
95 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
96 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
97 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
98 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
99 "r104","r105","r106","r107","r108","r109","r110","r111",
100 "r112","r113","r114","r115","r116","r117","r118","r119",
101 "r120","r121","r122","r123","r124","r125","r126","r127"};
103 /* ??? These strings could be shared with REGISTER_NAMES. */
104 static const char * const ia64_input_reg_names
[8] =
105 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
107 /* ??? These strings could be shared with REGISTER_NAMES. */
108 static const char * const ia64_local_reg_names
[80] =
109 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
110 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
111 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
112 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
113 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
114 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
115 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
116 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
117 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
118 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
120 /* ??? These strings could be shared with REGISTER_NAMES. */
121 static const char * const ia64_output_reg_names
[8] =
122 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
124 /* Variables which are this size or smaller are put in the sdata/sbss
127 unsigned int ia64_section_threshold
;
129 /* The following variable is used by the DFA insn scheduler. The value is
130 TRUE if we do insn bundling instead of insn scheduling. */
142 number_of_ia64_frame_regs
145 /* Structure to be filled in by ia64_compute_frame_size with register
146 save masks and offsets for the current function. */
148 struct ia64_frame_info
150 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
151 the caller's scratch area. */
152 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
153 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
154 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
155 HARD_REG_SET mask
; /* mask of saved registers. */
156 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
157 registers or long-term scratches. */
158 int n_spilled
; /* number of spilled registers. */
159 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
160 int n_input_regs
; /* number of input registers used. */
161 int n_local_regs
; /* number of local registers used. */
162 int n_output_regs
; /* number of output registers used. */
163 int n_rotate_regs
; /* number of rotating registers used. */
165 char need_regstk
; /* true if a .regstk directive needed. */
166 char initialized
; /* true if the data is finalized. */
169 /* Current frame information calculated by ia64_compute_frame_size. */
170 static struct ia64_frame_info current_frame_info
;
171 /* The actual registers that are emitted. */
172 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
174 static int ia64_first_cycle_multipass_dfa_lookahead (void);
175 static void ia64_dependencies_evaluation_hook (rtx_insn
*, rtx_insn
*);
176 static void ia64_init_dfa_pre_cycle_insn (void);
177 static rtx
ia64_dfa_pre_cycle_insn (void);
178 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
179 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn
*, int, int, int *);
180 static void ia64_h_i_d_extended (void);
181 static void * ia64_alloc_sched_context (void);
182 static void ia64_init_sched_context (void *, bool);
183 static void ia64_set_sched_context (void *);
184 static void ia64_clear_sched_context (void *);
185 static void ia64_free_sched_context (void *);
186 static int ia64_mode_to_int (machine_mode
);
187 static void ia64_set_sched_flags (spec_info_t
);
188 static ds_t
ia64_get_insn_spec_ds (rtx_insn
*);
189 static ds_t
ia64_get_insn_checked_ds (rtx_insn
*);
190 static bool ia64_skip_rtx_p (const_rtx
);
191 static int ia64_speculate_insn (rtx_insn
*, ds_t
, rtx
*);
192 static bool ia64_needs_block_p (ds_t
);
193 static rtx
ia64_gen_spec_check (rtx_insn
*, rtx_insn
*, ds_t
);
194 static int ia64_spec_check_p (rtx
);
195 static int ia64_spec_check_src_p (rtx
);
196 static rtx
gen_tls_get_addr (void);
197 static rtx
gen_thread_pointer (void);
198 static int find_gr_spill (enum ia64_frame_regs
, int);
199 static int next_scratch_gr_reg (void);
200 static void mark_reg_gr_used_mask (rtx
, void *);
201 static void ia64_compute_frame_size (HOST_WIDE_INT
);
202 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
203 static void finish_spill_pointers (void);
204 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
205 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
206 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
207 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
208 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
209 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
211 static void ia64_option_override (void);
212 static bool ia64_can_eliminate (const int, const int);
213 static machine_mode
hfa_element_mode (const_tree
, bool);
214 static void ia64_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
216 static int ia64_arg_partial_bytes (cumulative_args_t
, machine_mode
,
218 static rtx
ia64_function_arg_1 (cumulative_args_t
, machine_mode
,
219 const_tree
, bool, bool);
220 static rtx
ia64_function_arg (cumulative_args_t
, machine_mode
,
222 static rtx
ia64_function_incoming_arg (cumulative_args_t
,
223 machine_mode
, const_tree
, bool);
224 static void ia64_function_arg_advance (cumulative_args_t
, machine_mode
,
226 static unsigned int ia64_function_arg_boundary (machine_mode
,
228 static bool ia64_function_ok_for_sibcall (tree
, tree
);
229 static bool ia64_return_in_memory (const_tree
, const_tree
);
230 static rtx
ia64_function_value (const_tree
, const_tree
, bool);
231 static rtx
ia64_libcall_value (machine_mode
, const_rtx
);
232 static bool ia64_function_value_regno_p (const unsigned int);
233 static int ia64_register_move_cost (machine_mode
, reg_class_t
,
235 static int ia64_memory_move_cost (machine_mode mode
, reg_class_t
,
237 static bool ia64_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
238 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
239 static void fix_range (const char *);
240 static struct machine_function
* ia64_init_machine_status (void);
241 static void emit_insn_group_barriers (FILE *);
242 static void emit_all_insn_group_barriers (FILE *);
243 static void final_emit_insn_group_barriers (FILE *);
244 static void emit_predicate_relation_info (void);
245 static void ia64_reorg (void);
246 static bool ia64_in_small_data_p (const_tree
);
247 static void process_epilogue (FILE *, rtx
, bool, bool);
249 static bool ia64_assemble_integer (rtx
, unsigned int, int);
250 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
251 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
252 static void ia64_output_function_end_prologue (FILE *);
254 static void ia64_print_operand (FILE *, rtx
, int);
255 static void ia64_print_operand_address (FILE *, rtx
);
256 static bool ia64_print_operand_punct_valid_p (unsigned char code
);
258 static int ia64_issue_rate (void);
259 static int ia64_adjust_cost_2 (rtx_insn
*, int, rtx_insn
*, int, dw_t
);
260 static void ia64_sched_init (FILE *, int, int);
261 static void ia64_sched_init_global (FILE *, int, int);
262 static void ia64_sched_finish_global (FILE *, int);
263 static void ia64_sched_finish (FILE *, int);
264 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn
**, int *, int, int);
265 static int ia64_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
266 static int ia64_sched_reorder2 (FILE *, int, rtx_insn
**, int *, int);
267 static int ia64_variable_issue (FILE *, int, rtx_insn
*, int);
269 static void ia64_asm_unwind_emit (FILE *, rtx_insn
*);
270 static void ia64_asm_emit_except_personality (rtx
);
271 static void ia64_asm_init_sections (void);
273 static enum unwind_info_type
ia64_debug_unwind_info (void);
275 static struct bundle_state
*get_free_bundle_state (void);
276 static void free_bundle_state (struct bundle_state
*);
277 static void initiate_bundle_states (void);
278 static void finish_bundle_states (void);
279 static int insert_bundle_state (struct bundle_state
*);
280 static void initiate_bundle_state_table (void);
281 static void finish_bundle_state_table (void);
282 static int try_issue_nops (struct bundle_state
*, int);
283 static int try_issue_insn (struct bundle_state
*, rtx
);
284 static void issue_nops_and_insn (struct bundle_state
*, int, rtx_insn
*,
286 static int get_max_pos (state_t
);
287 static int get_template (state_t
, int);
289 static rtx_insn
*get_next_important_insn (rtx_insn
*, rtx_insn
*);
290 static bool important_for_bundling_p (rtx_insn
*);
291 static bool unknown_for_bundling_p (rtx_insn
*);
292 static void bundling (FILE *, int, rtx_insn
*, rtx_insn
*);
294 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
295 HOST_WIDE_INT
, tree
);
296 static void ia64_file_start (void);
297 static void ia64_globalize_decl_name (FILE *, tree
);
299 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
300 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
301 static section
*ia64_select_rtx_section (machine_mode
, rtx
,
302 unsigned HOST_WIDE_INT
);
303 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
305 static unsigned int ia64_section_type_flags (tree
, const char *, int);
306 static void ia64_init_libfuncs (void)
308 static void ia64_hpux_init_libfuncs (void)
310 static void ia64_sysv4_init_libfuncs (void)
312 static void ia64_vms_init_libfuncs (void)
314 static void ia64_soft_fp_init_libfuncs (void)
316 static bool ia64_vms_valid_pointer_mode (machine_mode mode
)
318 static tree
ia64_vms_common_object_attribute (tree
*, tree
, tree
, int, bool *)
321 static bool ia64_attribute_takes_identifier_p (const_tree
);
322 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
323 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
324 static void ia64_encode_section_info (tree
, rtx
, int);
325 static rtx
ia64_struct_value_rtx (tree
, int);
326 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
327 static bool ia64_scalar_mode_supported_p (machine_mode mode
);
328 static bool ia64_vector_mode_supported_p (machine_mode mode
);
329 static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode
);
330 static bool ia64_legitimate_constant_p (machine_mode
, rtx
);
331 static bool ia64_legitimate_address_p (machine_mode
, rtx
, bool);
332 static bool ia64_cannot_force_const_mem (machine_mode
, rtx
);
333 static const char *ia64_mangle_type (const_tree
);
334 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
335 static const char *ia64_invalid_unary_op (int, const_tree
);
336 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
337 static machine_mode
ia64_c_mode_for_suffix (char);
338 static void ia64_trampoline_init (rtx
, tree
, rtx
);
339 static void ia64_override_options_after_change (void);
340 static bool ia64_member_type_forces_blk (const_tree
, machine_mode
);
342 static tree
ia64_builtin_decl (unsigned, bool);
344 static reg_class_t
ia64_preferred_reload_class (rtx
, reg_class_t
);
345 static machine_mode
ia64_get_reg_raw_mode (int regno
);
346 static section
* ia64_hpux_function_section (tree
, enum node_frequency
,
349 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
350 const unsigned char *sel
);
352 #define MAX_VECT_LEN 8
354 struct expand_vec_perm_d
356 rtx target
, op0
, op1
;
357 unsigned char perm
[MAX_VECT_LEN
];
364 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
);
367 /* Table of valid machine attributes. */
368 static const struct attribute_spec ia64_attribute_table
[] =
370 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
371 affects_type_identity } */
372 { "syscall_linkage", 0, 0, false, true, true, NULL
, false },
373 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
,
375 #if TARGET_ABI_OPEN_VMS
376 { "common_object", 1, 1, true, false, false,
377 ia64_vms_common_object_attribute
, false },
379 { "version_id", 1, 1, true, false, false,
380 ia64_handle_version_id_attribute
, false },
381 { NULL
, 0, 0, false, false, false, NULL
, false }
384 /* Initialize the GCC target structure. */
385 #undef TARGET_ATTRIBUTE_TABLE
386 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
388 #undef TARGET_INIT_BUILTINS
389 #define TARGET_INIT_BUILTINS ia64_init_builtins
391 #undef TARGET_EXPAND_BUILTIN
392 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
394 #undef TARGET_BUILTIN_DECL
395 #define TARGET_BUILTIN_DECL ia64_builtin_decl
397 #undef TARGET_ASM_BYTE_OP
398 #define TARGET_ASM_BYTE_OP "\tdata1\t"
399 #undef TARGET_ASM_ALIGNED_HI_OP
400 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
403 #undef TARGET_ASM_ALIGNED_DI_OP
404 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
405 #undef TARGET_ASM_UNALIGNED_HI_OP
406 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
407 #undef TARGET_ASM_UNALIGNED_SI_OP
408 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
409 #undef TARGET_ASM_UNALIGNED_DI_OP
410 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
411 #undef TARGET_ASM_INTEGER
412 #define TARGET_ASM_INTEGER ia64_assemble_integer
414 #undef TARGET_OPTION_OVERRIDE
415 #define TARGET_OPTION_OVERRIDE ia64_option_override
417 #undef TARGET_ASM_FUNCTION_PROLOGUE
418 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
419 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
420 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
421 #undef TARGET_ASM_FUNCTION_EPILOGUE
422 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
424 #undef TARGET_PRINT_OPERAND
425 #define TARGET_PRINT_OPERAND ia64_print_operand
426 #undef TARGET_PRINT_OPERAND_ADDRESS
427 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
431 #undef TARGET_IN_SMALL_DATA_P
432 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
434 #undef TARGET_SCHED_ADJUST_COST_2
435 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
436 #undef TARGET_SCHED_ISSUE_RATE
437 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
438 #undef TARGET_SCHED_VARIABLE_ISSUE
439 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
440 #undef TARGET_SCHED_INIT
441 #define TARGET_SCHED_INIT ia64_sched_init
442 #undef TARGET_SCHED_FINISH
443 #define TARGET_SCHED_FINISH ia64_sched_finish
444 #undef TARGET_SCHED_INIT_GLOBAL
445 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
446 #undef TARGET_SCHED_FINISH_GLOBAL
447 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
448 #undef TARGET_SCHED_REORDER
449 #define TARGET_SCHED_REORDER ia64_sched_reorder
450 #undef TARGET_SCHED_REORDER2
451 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
453 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
454 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
456 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
457 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
459 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
460 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
461 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
462 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
464 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
465 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
466 ia64_first_cycle_multipass_dfa_lookahead_guard
468 #undef TARGET_SCHED_DFA_NEW_CYCLE
469 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
471 #undef TARGET_SCHED_H_I_D_EXTENDED
472 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
474 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
475 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
477 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
478 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
480 #undef TARGET_SCHED_SET_SCHED_CONTEXT
481 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
483 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
484 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
486 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
487 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
489 #undef TARGET_SCHED_SET_SCHED_FLAGS
490 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
492 #undef TARGET_SCHED_GET_INSN_SPEC_DS
493 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
495 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
496 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
498 #undef TARGET_SCHED_SPECULATE_INSN
499 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
501 #undef TARGET_SCHED_NEEDS_BLOCK_P
502 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
504 #undef TARGET_SCHED_GEN_SPEC_CHECK
505 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
507 #undef TARGET_SCHED_SKIP_RTX_P
508 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
510 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
511 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
512 #undef TARGET_ARG_PARTIAL_BYTES
513 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
514 #undef TARGET_FUNCTION_ARG
515 #define TARGET_FUNCTION_ARG ia64_function_arg
516 #undef TARGET_FUNCTION_INCOMING_ARG
517 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
518 #undef TARGET_FUNCTION_ARG_ADVANCE
519 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
520 #undef TARGET_FUNCTION_ARG_BOUNDARY
521 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
523 #undef TARGET_ASM_OUTPUT_MI_THUNK
524 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
525 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
526 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
528 #undef TARGET_ASM_FILE_START
529 #define TARGET_ASM_FILE_START ia64_file_start
531 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
532 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
534 #undef TARGET_REGISTER_MOVE_COST
535 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
536 #undef TARGET_MEMORY_MOVE_COST
537 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
538 #undef TARGET_RTX_COSTS
539 #define TARGET_RTX_COSTS ia64_rtx_costs
540 #undef TARGET_ADDRESS_COST
541 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
543 #undef TARGET_UNSPEC_MAY_TRAP_P
544 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
549 #undef TARGET_ENCODE_SECTION_INFO
550 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
552 #undef TARGET_SECTION_TYPE_FLAGS
553 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
556 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
557 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
560 /* ??? Investigate. */
562 #undef TARGET_PROMOTE_PROTOTYPES
563 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
566 #undef TARGET_FUNCTION_VALUE
567 #define TARGET_FUNCTION_VALUE ia64_function_value
568 #undef TARGET_LIBCALL_VALUE
569 #define TARGET_LIBCALL_VALUE ia64_libcall_value
570 #undef TARGET_FUNCTION_VALUE_REGNO_P
571 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
573 #undef TARGET_STRUCT_VALUE_RTX
574 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
575 #undef TARGET_RETURN_IN_MEMORY
576 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
577 #undef TARGET_SETUP_INCOMING_VARARGS
578 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
579 #undef TARGET_STRICT_ARGUMENT_NAMING
580 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
581 #undef TARGET_MUST_PASS_IN_STACK
582 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
583 #undef TARGET_GET_RAW_RESULT_MODE
584 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
585 #undef TARGET_GET_RAW_ARG_MODE
586 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
588 #undef TARGET_MEMBER_TYPE_FORCES_BLK
589 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
591 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
592 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
594 #undef TARGET_ASM_UNWIND_EMIT
595 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
596 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
597 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
598 #undef TARGET_ASM_INIT_SECTIONS
599 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
601 #undef TARGET_DEBUG_UNWIND_INFO
602 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
604 #undef TARGET_SCALAR_MODE_SUPPORTED_P
605 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
606 #undef TARGET_VECTOR_MODE_SUPPORTED_P
607 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
609 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
610 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
611 ia64_libgcc_floating_mode_supported_p
613 #undef TARGET_LEGITIMATE_CONSTANT_P
614 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
615 #undef TARGET_LEGITIMATE_ADDRESS_P
616 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
618 #undef TARGET_CANNOT_FORCE_CONST_MEM
619 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
621 #undef TARGET_MANGLE_TYPE
622 #define TARGET_MANGLE_TYPE ia64_mangle_type
624 #undef TARGET_INVALID_CONVERSION
625 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
626 #undef TARGET_INVALID_UNARY_OP
627 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
628 #undef TARGET_INVALID_BINARY_OP
629 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
631 #undef TARGET_C_MODE_FOR_SUFFIX
632 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
634 #undef TARGET_CAN_ELIMINATE
635 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
637 #undef TARGET_TRAMPOLINE_INIT
638 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
640 #undef TARGET_CAN_USE_DOLOOP_P
641 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
642 #undef TARGET_INVALID_WITHIN_DOLOOP
643 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
645 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
646 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
648 #undef TARGET_PREFERRED_RELOAD_CLASS
649 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
651 #undef TARGET_DELAY_SCHED2
652 #define TARGET_DELAY_SCHED2 true
654 /* Variable tracking should be run after all optimizations which
655 change order of insns. It also needs a valid CFG. */
656 #undef TARGET_DELAY_VARTRACK
657 #define TARGET_DELAY_VARTRACK true
659 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
660 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
662 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
663 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
665 struct gcc_target targetm
= TARGET_INITIALIZER
;
667 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
668 identifier as an argument, so the front end shouldn't look it up. */
671 ia64_attribute_takes_identifier_p (const_tree attr_id
)
673 if (is_attribute_p ("model", attr_id
))
675 #if TARGET_ABI_OPEN_VMS
676 if (is_attribute_p ("common_object", attr_id
))
684 ADDR_AREA_NORMAL
, /* normal address area */
685 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
689 static GTY(()) tree small_ident1
;
690 static GTY(()) tree small_ident2
;
695 if (small_ident1
== 0)
697 small_ident1
= get_identifier ("small");
698 small_ident2
= get_identifier ("__small__");
702 /* Retrieve the address area that has been chosen for the given decl. */
704 static ia64_addr_area
705 ia64_get_addr_area (tree decl
)
709 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
715 id
= TREE_VALUE (TREE_VALUE (model_attr
));
716 if (id
== small_ident1
|| id
== small_ident2
)
717 return ADDR_AREA_SMALL
;
719 return ADDR_AREA_NORMAL
;
723 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
724 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
726 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
728 tree arg
, decl
= *node
;
731 arg
= TREE_VALUE (args
);
732 if (arg
== small_ident1
|| arg
== small_ident2
)
734 addr_area
= ADDR_AREA_SMALL
;
738 warning (OPT_Wattributes
, "invalid argument of %qE attribute",
740 *no_add_attrs
= true;
743 switch (TREE_CODE (decl
))
746 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
748 && !TREE_STATIC (decl
))
750 error_at (DECL_SOURCE_LOCATION (decl
),
751 "an address area attribute cannot be specified for "
753 *no_add_attrs
= true;
755 area
= ia64_get_addr_area (decl
);
756 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
758 error ("address area of %q+D conflicts with previous "
759 "declaration", decl
);
760 *no_add_attrs
= true;
765 error_at (DECL_SOURCE_LOCATION (decl
),
766 "address area attribute cannot be specified for "
768 *no_add_attrs
= true;
772 warning (OPT_Wattributes
, "%qE attribute ignored",
774 *no_add_attrs
= true;
781 /* Part of the low level implementation of DEC Ada pragma Common_Object which
782 enables the shared use of variables stored in overlaid linker areas
783 corresponding to the use of Fortran COMMON. */
786 ia64_vms_common_object_attribute (tree
*node
, tree name
, tree args
,
787 int flags ATTRIBUTE_UNUSED
,
793 gcc_assert (DECL_P (decl
));
795 DECL_COMMON (decl
) = 1;
796 id
= TREE_VALUE (args
);
797 if (TREE_CODE (id
) != IDENTIFIER_NODE
&& TREE_CODE (id
) != STRING_CST
)
799 error ("%qE attribute requires a string constant argument", name
);
800 *no_add_attrs
= true;
806 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
809 ia64_vms_output_aligned_decl_common (FILE *file
, tree decl
, const char *name
,
810 unsigned HOST_WIDE_INT size
,
813 tree attr
= DECL_ATTRIBUTES (decl
);
816 attr
= lookup_attribute ("common_object", attr
);
819 tree id
= TREE_VALUE (TREE_VALUE (attr
));
822 if (TREE_CODE (id
) == IDENTIFIER_NODE
)
823 name
= IDENTIFIER_POINTER (id
);
824 else if (TREE_CODE (id
) == STRING_CST
)
825 name
= TREE_STRING_POINTER (id
);
829 fprintf (file
, "\t.vms_common\t\"%s\",", name
);
832 fprintf (file
, "%s", COMMON_ASM_OP
);
834 /* Code from elfos.h. */
835 assemble_name (file
, name
);
836 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u",
837 size
, align
/ BITS_PER_UNIT
);
843 ia64_encode_addr_area (tree decl
, rtx symbol
)
847 flags
= SYMBOL_REF_FLAGS (symbol
);
848 switch (ia64_get_addr_area (decl
))
850 case ADDR_AREA_NORMAL
: break;
851 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
852 default: gcc_unreachable ();
854 SYMBOL_REF_FLAGS (symbol
) = flags
;
858 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
860 default_encode_section_info (decl
, rtl
, first
);
862 /* Careful not to prod global register variables. */
863 if (TREE_CODE (decl
) == VAR_DECL
864 && GET_CODE (DECL_RTL (decl
)) == MEM
865 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
866 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
867 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
870 /* Return 1 if the operands of a move are ok. */
873 ia64_move_ok (rtx dst
, rtx src
)
875 /* If we're under init_recog_no_volatile, we'll not be able to use
876 memory_operand. So check the code directly and don't worry about
877 the validity of the underlying address, which should have been
878 checked elsewhere anyway. */
879 if (GET_CODE (dst
) != MEM
)
881 if (GET_CODE (src
) == MEM
)
883 if (register_operand (src
, VOIDmode
))
886 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
887 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
888 return src
== const0_rtx
;
890 return satisfies_constraint_G (src
);
893 /* Return 1 if the operands are ok for a floating point load pair. */
896 ia64_load_pair_ok (rtx dst
, rtx src
)
898 /* ??? There is a thinko in the implementation of the "x" constraint and the
899 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
900 also return false for it. */
901 if (GET_CODE (dst
) != REG
902 || !(FP_REGNO_P (REGNO (dst
)) && FP_REGNO_P (REGNO (dst
) + 1)))
904 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
906 switch (GET_CODE (XEXP (src
, 0)))
915 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
917 if (GET_CODE (adjust
) != CONST_INT
918 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
929 addp4_optimize_ok (rtx op1
, rtx op2
)
931 return (basereg_operand (op1
, GET_MODE(op1
)) !=
932 basereg_operand (op2
, GET_MODE(op2
)));
935 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
936 Return the length of the field, or <= 0 on failure. */
939 ia64_depz_field_mask (rtx rop
, rtx rshift
)
941 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
942 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
944 /* Get rid of the zero bits we're shifting in. */
947 /* We must now have a solid block of 1's at bit 0. */
948 return exact_log2 (op
+ 1);
951 /* Return the TLS model to use for ADDR. */
953 static enum tls_model
954 tls_symbolic_operand_type (rtx addr
)
956 enum tls_model tls_kind
= TLS_MODEL_NONE
;
958 if (GET_CODE (addr
) == CONST
)
960 if (GET_CODE (XEXP (addr
, 0)) == PLUS
961 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
962 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
964 else if (GET_CODE (addr
) == SYMBOL_REF
)
965 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
970 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
971 as a base register. */
974 ia64_reg_ok_for_base_p (const_rtx reg
, bool strict
)
977 && REGNO_OK_FOR_BASE_P (REGNO (reg
)))
980 && (GENERAL_REGNO_P (REGNO (reg
))
981 || !HARD_REGISTER_P (reg
)))
988 ia64_legitimate_address_reg (const_rtx reg
, bool strict
)
990 if ((REG_P (reg
) && ia64_reg_ok_for_base_p (reg
, strict
))
991 || (GET_CODE (reg
) == SUBREG
&& REG_P (XEXP (reg
, 0))
992 && ia64_reg_ok_for_base_p (XEXP (reg
, 0), strict
)))
999 ia64_legitimate_address_disp (const_rtx reg
, const_rtx disp
, bool strict
)
1001 if (GET_CODE (disp
) == PLUS
1002 && rtx_equal_p (reg
, XEXP (disp
, 0))
1003 && (ia64_legitimate_address_reg (XEXP (disp
, 1), strict
)
1004 || (CONST_INT_P (XEXP (disp
, 1))
1005 && IN_RANGE (INTVAL (XEXP (disp
, 1)), -256, 255))))
1011 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1014 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED
,
1017 if (ia64_legitimate_address_reg (x
, strict
))
1019 else if ((GET_CODE (x
) == POST_INC
|| GET_CODE (x
) == POST_DEC
)
1020 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1021 && XEXP (x
, 0) != arg_pointer_rtx
)
1023 else if (GET_CODE (x
) == POST_MODIFY
1024 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1025 && XEXP (x
, 0) != arg_pointer_rtx
1026 && ia64_legitimate_address_disp (XEXP (x
, 0), XEXP (x
, 1), strict
))
1032 /* Return true if X is a constant that is valid for some immediate
1033 field in an instruction. */
1036 ia64_legitimate_constant_p (machine_mode mode
, rtx x
)
1038 switch (GET_CODE (x
))
1045 if (GET_MODE (x
) == VOIDmode
|| mode
== SFmode
|| mode
== DFmode
)
1047 return satisfies_constraint_G (x
);
1051 /* ??? Short term workaround for PR 28490. We must make the code here
1052 match the code in ia64_expand_move and move_operand, even though they
1053 are both technically wrong. */
1054 if (tls_symbolic_operand_type (x
) == 0)
1056 HOST_WIDE_INT addend
= 0;
1059 if (GET_CODE (op
) == CONST
1060 && GET_CODE (XEXP (op
, 0)) == PLUS
1061 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
1063 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
1064 op
= XEXP (XEXP (op
, 0), 0);
1067 if (any_offset_symbol_operand (op
, mode
)
1068 || function_operand (op
, mode
))
1070 if (aligned_offset_symbol_operand (op
, mode
))
1071 return (addend
& 0x3fff) == 0;
1077 if (mode
== V2SFmode
)
1078 return satisfies_constraint_Y (x
);
1080 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1081 && GET_MODE_SIZE (mode
) <= 8);
1088 /* Don't allow TLS addresses to get spilled to memory. */
1091 ia64_cannot_force_const_mem (machine_mode mode
, rtx x
)
1095 return tls_symbolic_operand_type (x
) != 0;
1098 /* Expand a symbolic constant load. */
1101 ia64_expand_load_address (rtx dest
, rtx src
)
1103 gcc_assert (GET_CODE (dest
) == REG
);
1105 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1106 having to pointer-extend the value afterward. Other forms of address
1107 computation below are also more natural to compute as 64-bit quantities.
1108 If we've been given an SImode destination register, change it. */
1109 if (GET_MODE (dest
) != Pmode
)
1110 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
1111 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
1115 if (small_addr_symbolic_operand (src
, VOIDmode
))
1118 if (TARGET_AUTO_PIC
)
1119 emit_insn (gen_load_gprel64 (dest
, src
));
1120 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1121 emit_insn (gen_load_fptr (dest
, src
));
1122 else if (sdata_symbolic_operand (src
, VOIDmode
))
1123 emit_insn (gen_load_gprel (dest
, src
));
1126 HOST_WIDE_INT addend
= 0;
1129 /* We did split constant offsets in ia64_expand_move, and we did try
1130 to keep them split in move_operand, but we also allowed reload to
1131 rematerialize arbitrary constants rather than spill the value to
1132 the stack and reload it. So we have to be prepared here to split
1133 them apart again. */
1134 if (GET_CODE (src
) == CONST
)
1136 HOST_WIDE_INT hi
, lo
;
1138 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
1139 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
1145 src
= plus_constant (Pmode
, XEXP (XEXP (src
, 0), 0), hi
);
1149 tmp
= gen_rtx_HIGH (Pmode
, src
);
1150 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1151 emit_insn (gen_rtx_SET (dest
, tmp
));
1153 tmp
= gen_rtx_LO_SUM (Pmode
, gen_const_mem (Pmode
, dest
), src
);
1154 emit_insn (gen_rtx_SET (dest
, tmp
));
1158 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
1159 emit_insn (gen_rtx_SET (dest
, tmp
));
1166 static GTY(()) rtx gen_tls_tga
;
1168 gen_tls_get_addr (void)
1171 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1175 static GTY(()) rtx thread_pointer_rtx
;
1177 gen_thread_pointer (void)
1179 if (!thread_pointer_rtx
)
1180 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1181 return thread_pointer_rtx
;
1185 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
1186 rtx orig_op1
, HOST_WIDE_INT addend
)
1188 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
;
1191 HOST_WIDE_INT addend_lo
, addend_hi
;
1195 case TLS_MODEL_GLOBAL_DYNAMIC
:
1198 tga_op1
= gen_reg_rtx (Pmode
);
1199 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1201 tga_op2
= gen_reg_rtx (Pmode
);
1202 emit_insn (gen_load_dtprel (tga_op2
, op1
));
1204 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1205 LCT_CONST
, Pmode
, 2, tga_op1
,
1206 Pmode
, tga_op2
, Pmode
);
1208 insns
= get_insns ();
1211 if (GET_MODE (op0
) != Pmode
)
1213 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1216 case TLS_MODEL_LOCAL_DYNAMIC
:
1217 /* ??? This isn't the completely proper way to do local-dynamic
1218 If the call to __tls_get_addr is used only by a single symbol,
1219 then we should (somehow) move the dtprel to the second arg
1220 to avoid the extra add. */
1223 tga_op1
= gen_reg_rtx (Pmode
);
1224 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1226 tga_op2
= const0_rtx
;
1228 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1229 LCT_CONST
, Pmode
, 2, tga_op1
,
1230 Pmode
, tga_op2
, Pmode
);
1232 insns
= get_insns ();
1235 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1237 tmp
= gen_reg_rtx (Pmode
);
1238 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1240 if (!register_operand (op0
, Pmode
))
1241 op0
= gen_reg_rtx (Pmode
);
1244 emit_insn (gen_load_dtprel (op0
, op1
));
1245 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
1248 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
1251 case TLS_MODEL_INITIAL_EXEC
:
1252 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1253 addend_hi
= addend
- addend_lo
;
1255 op1
= plus_constant (Pmode
, op1
, addend_hi
);
1258 tmp
= gen_reg_rtx (Pmode
);
1259 emit_insn (gen_load_tprel (tmp
, op1
));
1261 if (!register_operand (op0
, Pmode
))
1262 op0
= gen_reg_rtx (Pmode
);
1263 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
1266 case TLS_MODEL_LOCAL_EXEC
:
1267 if (!register_operand (op0
, Pmode
))
1268 op0
= gen_reg_rtx (Pmode
);
1274 emit_insn (gen_load_tprel (op0
, op1
));
1275 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
1278 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
1286 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
1287 orig_op0
, 1, OPTAB_DIRECT
);
1288 if (orig_op0
== op0
)
1290 if (GET_MODE (orig_op0
) == Pmode
)
1292 return gen_lowpart (GET_MODE (orig_op0
), op0
);
1296 ia64_expand_move (rtx op0
, rtx op1
)
1298 machine_mode mode
= GET_MODE (op0
);
1300 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1301 op1
= force_reg (mode
, op1
);
1303 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1305 HOST_WIDE_INT addend
= 0;
1306 enum tls_model tls_kind
;
1309 if (GET_CODE (op1
) == CONST
1310 && GET_CODE (XEXP (op1
, 0)) == PLUS
1311 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1313 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1314 sym
= XEXP (XEXP (op1
, 0), 0);
1317 tls_kind
= tls_symbolic_operand_type (sym
);
1319 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1321 if (any_offset_symbol_operand (sym
, mode
))
1323 else if (aligned_offset_symbol_operand (sym
, mode
))
1325 HOST_WIDE_INT addend_lo
, addend_hi
;
1327 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1328 addend_hi
= addend
- addend_lo
;
1332 op1
= plus_constant (mode
, sym
, addend_hi
);
1341 if (reload_completed
)
1343 /* We really should have taken care of this offset earlier. */
1344 gcc_assert (addend
== 0);
1345 if (ia64_expand_load_address (op0
, op1
))
1351 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1353 emit_insn (gen_rtx_SET (subtarget
, op1
));
1355 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1356 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1365 /* Split a move from OP1 to OP0 conditional on COND. */
1368 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1370 rtx_insn
*insn
, *first
= get_last_insn ();
1372 emit_move_insn (op0
, op1
);
1374 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1376 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1380 /* Split a post-reload TImode or TFmode reference into two DImode
1381 components. This is made extra difficult by the fact that we do
1382 not get any scratch registers to work with, because reload cannot
1383 be prevented from giving us a scratch that overlaps the register
1384 pair involved. So instead, when addressing memory, we tweak the
1385 pointer register up and back down with POST_INCs. Or up and not
1386 back down when we can get away with it.
1388 REVERSED is true when the loads must be done in reversed order
1389 (high word first) for correctness. DEAD is true when the pointer
1390 dies with the second insn we generate and therefore the second
1391 address must not carry a postmodify.
1393 May return an insn which is to be emitted after the moves. */
1396 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1400 switch (GET_CODE (in
))
1403 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1404 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1409 /* Cannot occur reversed. */
1410 gcc_assert (!reversed
);
1412 if (GET_MODE (in
) != TFmode
)
1413 split_double (in
, &out
[0], &out
[1]);
1415 /* split_double does not understand how to split a TFmode
1416 quantity into a pair of DImode constants. */
1419 unsigned HOST_WIDE_INT p
[2];
1420 long l
[4]; /* TFmode is 128 bits */
1422 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
1423 real_to_target (l
, &r
, TFmode
);
1425 if (FLOAT_WORDS_BIG_ENDIAN
)
1427 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1428 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1432 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1433 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1435 out
[0] = GEN_INT (p
[0]);
1436 out
[1] = GEN_INT (p
[1]);
1442 rtx base
= XEXP (in
, 0);
1445 switch (GET_CODE (base
))
1450 out
[0] = adjust_automodify_address
1451 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1452 out
[1] = adjust_automodify_address
1453 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1457 /* Reversal requires a pre-increment, which can only
1458 be done as a separate insn. */
1459 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1460 out
[0] = adjust_automodify_address
1461 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1462 out
[1] = adjust_address (in
, DImode
, 0);
1467 gcc_assert (!reversed
&& !dead
);
1469 /* Just do the increment in two steps. */
1470 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1471 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1475 gcc_assert (!reversed
&& !dead
);
1477 /* Add 8, subtract 24. */
1478 base
= XEXP (base
, 0);
1479 out
[0] = adjust_automodify_address
1480 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1481 out
[1] = adjust_automodify_address
1483 gen_rtx_POST_MODIFY (Pmode
, base
,
1484 plus_constant (Pmode
, base
, -24)),
1489 gcc_assert (!reversed
&& !dead
);
1491 /* Extract and adjust the modification. This case is
1492 trickier than the others, because we might have an
1493 index register, or we might have a combined offset that
1494 doesn't fit a signed 9-bit displacement field. We can
1495 assume the incoming expression is already legitimate. */
1496 offset
= XEXP (base
, 1);
1497 base
= XEXP (base
, 0);
1499 out
[0] = adjust_automodify_address
1500 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1502 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1504 /* Can't adjust the postmodify to match. Emit the
1505 original, then a separate addition insn. */
1506 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1507 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1511 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1512 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1514 /* Again the postmodify cannot be made to match,
1515 but in this case it's more efficient to get rid
1516 of the postmodify entirely and fix up with an
1518 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1520 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1524 /* Combined offset still fits in the displacement field.
1525 (We cannot overflow it at the high end.) */
1526 out
[1] = adjust_automodify_address
1527 (in
, DImode
, gen_rtx_POST_MODIFY
1528 (Pmode
, base
, gen_rtx_PLUS
1530 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1549 /* Split a TImode or TFmode move instruction after reload.
1550 This is used by *movtf_internal and *movti_internal. */
1552 ia64_split_tmode_move (rtx operands
[])
1554 rtx in
[2], out
[2], insn
;
1557 bool reversed
= false;
1559 /* It is possible for reload to decide to overwrite a pointer with
1560 the value it points to. In that case we have to do the loads in
1561 the appropriate order so that the pointer is not destroyed too
1562 early. Also we must not generate a postmodify for that second
1563 load, or rws_access_regno will die. And we must not generate a
1564 postmodify for the second load if the destination register
1565 overlaps with the base register. */
1566 if (GET_CODE (operands
[1]) == MEM
1567 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1569 rtx base
= XEXP (operands
[1], 0);
1570 while (GET_CODE (base
) != REG
)
1571 base
= XEXP (base
, 0);
1573 if (REGNO (base
) == REGNO (operands
[0]))
1576 if (refers_to_regno_p (REGNO (operands
[0]),
1577 REGNO (operands
[0])+2,
1581 /* Another reason to do the moves in reversed order is if the first
1582 element of the target register pair is also the second element of
1583 the source register pair. */
1584 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1585 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1588 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1589 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1591 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1592 if (GET_CODE (EXP) == MEM \
1593 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1594 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1595 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1596 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1598 insn
= emit_insn (gen_rtx_SET (out
[0], in
[0]));
1599 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1600 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1602 insn
= emit_insn (gen_rtx_SET (out
[1], in
[1]));
1603 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1604 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1607 emit_insn (fixup
[0]);
1609 emit_insn (fixup
[1]);
1611 #undef MAYBE_ADD_REG_INC_NOTE
1614 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1615 through memory plus an extra GR scratch register. Except that you can
1616 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1617 SECONDARY_RELOAD_CLASS, but not both.
1619 We got into problems in the first place by allowing a construct like
1620 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1621 This solution attempts to prevent this situation from occurring. When
1622 we see something like the above, we spill the inner register to memory. */
1625 spill_xfmode_rfmode_operand (rtx in
, int force
, machine_mode mode
)
1627 if (GET_CODE (in
) == SUBREG
1628 && GET_MODE (SUBREG_REG (in
)) == TImode
1629 && GET_CODE (SUBREG_REG (in
)) == REG
)
1631 rtx memt
= assign_stack_temp (TImode
, 16);
1632 emit_move_insn (memt
, SUBREG_REG (in
));
1633 return adjust_address (memt
, mode
, 0);
1635 else if (force
&& GET_CODE (in
) == REG
)
1637 rtx memx
= assign_stack_temp (mode
, 16);
1638 emit_move_insn (memx
, in
);
1645 /* Expand the movxf or movrf pattern (MODE says which) with the given
1646 OPERANDS, returning true if the pattern should then invoke
1650 ia64_expand_movxf_movrf (machine_mode mode
, rtx operands
[])
1652 rtx op0
= operands
[0];
1654 if (GET_CODE (op0
) == SUBREG
)
1655 op0
= SUBREG_REG (op0
);
1657 /* We must support XFmode loads into general registers for stdarg/vararg,
1658 unprototyped calls, and a rare case where a long double is passed as
1659 an argument after a float HFA fills the FP registers. We split them into
1660 DImode loads for convenience. We also need to support XFmode stores
1661 for the last case. This case does not happen for stdarg/vararg routines,
1662 because we do a block store to memory of unnamed arguments. */
1664 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1668 /* We're hoping to transform everything that deals with XFmode
1669 quantities and GR registers early in the compiler. */
1670 gcc_assert (can_create_pseudo_p ());
1672 /* Struct to register can just use TImode instead. */
1673 if ((GET_CODE (operands
[1]) == SUBREG
1674 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1675 || (GET_CODE (operands
[1]) == REG
1676 && GR_REGNO_P (REGNO (operands
[1]))))
1678 rtx op1
= operands
[1];
1680 if (GET_CODE (op1
) == SUBREG
)
1681 op1
= SUBREG_REG (op1
);
1683 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1685 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1689 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1691 /* Don't word-swap when reading in the constant. */
1692 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1693 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1695 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1696 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1701 /* If the quantity is in a register not known to be GR, spill it. */
1702 if (register_operand (operands
[1], mode
))
1703 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1705 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1707 /* Don't word-swap when reading in the value. */
1708 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1709 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1711 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1712 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1716 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1718 /* We're hoping to transform everything that deals with XFmode
1719 quantities and GR registers early in the compiler. */
1720 gcc_assert (can_create_pseudo_p ());
1722 /* Op0 can't be a GR_REG here, as that case is handled above.
1723 If op0 is a register, then we spill op1, so that we now have a
1724 MEM operand. This requires creating an XFmode subreg of a TImode reg
1725 to force the spill. */
1726 if (register_operand (operands
[0], mode
))
1728 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1729 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1730 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1737 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1739 /* Don't word-swap when writing out the value. */
1740 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1741 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1743 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1744 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1749 if (!reload_in_progress
&& !reload_completed
)
1751 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1753 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1755 rtx memt
, memx
, in
= operands
[1];
1756 if (CONSTANT_P (in
))
1757 in
= validize_mem (force_const_mem (mode
, in
));
1758 if (GET_CODE (in
) == MEM
)
1759 memt
= adjust_address (in
, TImode
, 0);
1762 memt
= assign_stack_temp (TImode
, 16);
1763 memx
= adjust_address (memt
, mode
, 0);
1764 emit_move_insn (memx
, in
);
1766 emit_move_insn (op0
, memt
);
1770 if (!ia64_move_ok (operands
[0], operands
[1]))
1771 operands
[1] = force_reg (mode
, operands
[1]);
1777 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1778 with the expression that holds the compare result (in VOIDmode). */
1780 static GTY(()) rtx cmptf_libfunc
;
1783 ia64_expand_compare (rtx
*expr
, rtx
*op0
, rtx
*op1
)
1785 enum rtx_code code
= GET_CODE (*expr
);
1788 /* If we have a BImode input, then we already have a compare result, and
1789 do not need to emit another comparison. */
1790 if (GET_MODE (*op0
) == BImode
)
1792 gcc_assert ((code
== NE
|| code
== EQ
) && *op1
== const0_rtx
);
1795 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1796 magic number as its third argument, that indicates what to do.
1797 The return value is an integer to be compared against zero. */
1798 else if (TARGET_HPUX
&& GET_MODE (*op0
) == TFmode
)
1801 QCMP_INV
= 1, /* Raise FP_INVALID on NaNs as a side effect. */
1808 enum rtx_code ncode
;
1811 gcc_assert (cmptf_libfunc
&& GET_MODE (*op1
) == TFmode
);
1814 /* 1 = equal, 0 = not equal. Equality operators do
1815 not raise FP_INVALID when given a NaN operand. */
1816 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1817 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1818 /* isunordered() from C99. */
1819 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1820 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1821 /* Relational operators raise FP_INVALID when given
1823 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1824 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1825 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1826 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1827 /* Unordered relational operators do not raise FP_INVALID
1828 when given a NaN operand. */
1829 case UNLT
: magic
= QCMP_LT
|QCMP_UNORD
; ncode
= NE
; break;
1830 case UNLE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1831 case UNGT
: magic
= QCMP_GT
|QCMP_UNORD
; ncode
= NE
; break;
1832 case UNGE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1833 /* Not supported. */
1836 default: gcc_unreachable ();
1841 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1842 *op0
, TFmode
, *op1
, TFmode
,
1843 GEN_INT (magic
), DImode
);
1844 cmp
= gen_reg_rtx (BImode
);
1845 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (ncode
, BImode
,
1848 insns
= get_insns ();
1851 emit_libcall_block (insns
, cmp
, cmp
,
1852 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
));
1857 cmp
= gen_reg_rtx (BImode
);
1858 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
)));
1862 *expr
= gen_rtx_fmt_ee (code
, VOIDmode
, cmp
, const0_rtx
);
1867 /* Generate an integral vector comparison. Return true if the condition has
1868 been reversed, and so the sense of the comparison should be inverted. */
1871 ia64_expand_vecint_compare (enum rtx_code code
, machine_mode mode
,
1872 rtx dest
, rtx op0
, rtx op1
)
1874 bool negate
= false;
1877 /* Canonicalize the comparison to EQ, GT, GTU. */
1888 code
= reverse_condition (code
);
1894 code
= reverse_condition (code
);
1900 code
= swap_condition (code
);
1901 x
= op0
, op0
= op1
, op1
= x
;
1908 /* Unsigned parallel compare is not supported by the hardware. Play some
1909 tricks to turn this into a signed comparison against 0. */
1918 /* Subtract (-(INT MAX) - 1) from both operands to make
1920 mask
= GEN_INT (0x80000000);
1921 mask
= gen_rtx_CONST_VECTOR (V2SImode
, gen_rtvec (2, mask
, mask
));
1922 mask
= force_reg (mode
, mask
);
1923 t1
= gen_reg_rtx (mode
);
1924 emit_insn (gen_subv2si3 (t1
, op0
, mask
));
1925 t2
= gen_reg_rtx (mode
);
1926 emit_insn (gen_subv2si3 (t2
, op1
, mask
));
1935 /* Perform a parallel unsigned saturating subtraction. */
1936 x
= gen_reg_rtx (mode
);
1937 emit_insn (gen_rtx_SET (x
, gen_rtx_US_MINUS (mode
, op0
, op1
)));
1941 op1
= CONST0_RTX (mode
);
1950 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1951 emit_insn (gen_rtx_SET (dest
, x
));
1956 /* Emit an integral vector conditional move. */
1959 ia64_expand_vecint_cmov (rtx operands
[])
1961 machine_mode mode
= GET_MODE (operands
[0]);
1962 enum rtx_code code
= GET_CODE (operands
[3]);
1966 cmp
= gen_reg_rtx (mode
);
1967 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1968 operands
[4], operands
[5]);
1970 ot
= operands
[1+negate
];
1971 of
= operands
[2-negate
];
1973 if (ot
== CONST0_RTX (mode
))
1975 if (of
== CONST0_RTX (mode
))
1977 emit_move_insn (operands
[0], ot
);
1981 x
= gen_rtx_NOT (mode
, cmp
);
1982 x
= gen_rtx_AND (mode
, x
, of
);
1983 emit_insn (gen_rtx_SET (operands
[0], x
));
1985 else if (of
== CONST0_RTX (mode
))
1987 x
= gen_rtx_AND (mode
, cmp
, ot
);
1988 emit_insn (gen_rtx_SET (operands
[0], x
));
1994 t
= gen_reg_rtx (mode
);
1995 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
1996 emit_insn (gen_rtx_SET (t
, x
));
1998 f
= gen_reg_rtx (mode
);
1999 x
= gen_rtx_NOT (mode
, cmp
);
2000 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
2001 emit_insn (gen_rtx_SET (f
, x
));
2003 x
= gen_rtx_IOR (mode
, t
, f
);
2004 emit_insn (gen_rtx_SET (operands
[0], x
));
2008 /* Emit an integral vector min or max operation. Return true if all done. */
2011 ia64_expand_vecint_minmax (enum rtx_code code
, machine_mode mode
,
2016 /* These four combinations are supported directly. */
2017 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
2019 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
2022 /* This combination can be implemented with only saturating subtraction. */
2023 if (mode
== V4HImode
&& code
== UMAX
)
2025 rtx x
, tmp
= gen_reg_rtx (mode
);
2027 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
2028 emit_insn (gen_rtx_SET (tmp
, x
));
2030 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
2034 /* Everything else implemented via vector comparisons. */
2035 xops
[0] = operands
[0];
2036 xops
[4] = xops
[1] = operands
[1];
2037 xops
[5] = xops
[2] = operands
[2];
2056 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
2058 ia64_expand_vecint_cmov (xops
);
2062 /* The vectors LO and HI each contain N halves of a double-wide vector.
2063 Reassemble either the first N/2 or the second N/2 elements. */
2066 ia64_unpack_assemble (rtx out
, rtx lo
, rtx hi
, bool highp
)
2068 machine_mode vmode
= GET_MODE (lo
);
2069 unsigned int i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
2070 struct expand_vec_perm_d d
;
2073 d
.target
= gen_lowpart (vmode
, out
);
2074 d
.op0
= (TARGET_BIG_ENDIAN
? hi
: lo
);
2075 d
.op1
= (TARGET_BIG_ENDIAN
? lo
: hi
);
2078 d
.one_operand_p
= false;
2079 d
.testing_p
= false;
2081 high
= (highp
? nelt
/ 2 : 0);
2082 for (i
= 0; i
< nelt
/ 2; ++i
)
2084 d
.perm
[i
* 2] = i
+ high
;
2085 d
.perm
[i
* 2 + 1] = i
+ high
+ nelt
;
2088 ok
= ia64_expand_vec_perm_const_1 (&d
);
2092 /* Return a vector of the sign-extension of VEC. */
2095 ia64_unpack_sign (rtx vec
, bool unsignedp
)
2097 machine_mode mode
= GET_MODE (vec
);
2098 rtx zero
= CONST0_RTX (mode
);
2104 rtx sign
= gen_reg_rtx (mode
);
2107 neg
= ia64_expand_vecint_compare (LT
, mode
, sign
, vec
, zero
);
2114 /* Emit an integral vector unpack operation. */
2117 ia64_expand_unpack (rtx operands
[3], bool unsignedp
, bool highp
)
2119 rtx sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2120 ia64_unpack_assemble (operands
[0], operands
[1], sign
, highp
);
2123 /* Emit an integral vector widening sum operations. */
2126 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
2131 sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2133 wmode
= GET_MODE (operands
[0]);
2134 l
= gen_reg_rtx (wmode
);
2135 h
= gen_reg_rtx (wmode
);
2137 ia64_unpack_assemble (l
, operands
[1], sign
, false);
2138 ia64_unpack_assemble (h
, operands
[1], sign
, true);
2140 t
= expand_binop (wmode
, add_optab
, l
, operands
[2], NULL
, 0, OPTAB_DIRECT
);
2141 t
= expand_binop (wmode
, add_optab
, h
, t
, operands
[0], 0, OPTAB_DIRECT
);
2142 if (t
!= operands
[0])
2143 emit_move_insn (operands
[0], t
);
2146 /* Emit the appropriate sequence for a call. */
2149 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
2154 addr
= XEXP (addr
, 0);
2155 addr
= convert_memory_address (DImode
, addr
);
2156 b0
= gen_rtx_REG (DImode
, R_BR (0));
2158 /* ??? Should do this for functions known to bind local too. */
2159 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2162 insn
= gen_sibcall_nogp (addr
);
2164 insn
= gen_call_nogp (addr
, b0
);
2166 insn
= gen_call_value_nogp (retval
, addr
, b0
);
2167 insn
= emit_call_insn (insn
);
2172 insn
= gen_sibcall_gp (addr
);
2174 insn
= gen_call_gp (addr
, b0
);
2176 insn
= gen_call_value_gp (retval
, addr
, b0
);
2177 insn
= emit_call_insn (insn
);
2179 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
2183 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
2185 if (TARGET_ABI_OPEN_VMS
)
2186 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
2187 gen_rtx_REG (DImode
, GR_REG (25)));
2191 reg_emitted (enum ia64_frame_regs r
)
2193 if (emitted_frame_related_regs
[r
] == 0)
2194 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
2196 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
2200 get_reg (enum ia64_frame_regs r
)
2203 return current_frame_info
.r
[r
];
2207 is_emitted (int regno
)
2211 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
2212 if (emitted_frame_related_regs
[r
] == regno
)
2218 ia64_reload_gp (void)
2222 if (current_frame_info
.r
[reg_save_gp
])
2224 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
2228 HOST_WIDE_INT offset
;
2231 offset
= (current_frame_info
.spill_cfa_off
2232 + current_frame_info
.spill_size
);
2233 if (frame_pointer_needed
)
2235 tmp
= hard_frame_pointer_rtx
;
2240 tmp
= stack_pointer_rtx
;
2241 offset
= current_frame_info
.total_size
- offset
;
2244 offset_r
= GEN_INT (offset
);
2245 if (satisfies_constraint_I (offset_r
))
2246 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
2249 emit_move_insn (pic_offset_table_rtx
, offset_r
);
2250 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
2251 pic_offset_table_rtx
, tmp
));
2254 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
2257 emit_move_insn (pic_offset_table_rtx
, tmp
);
2261 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
2262 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
2265 bool is_desc
= false;
2267 /* If we find we're calling through a register, then we're actually
2268 calling through a descriptor, so load up the values. */
2269 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
2274 /* ??? We are currently constrained to *not* use peep2, because
2275 we can legitimately change the global lifetime of the GP
2276 (in the form of killing where previously live). This is
2277 because a call through a descriptor doesn't use the previous
2278 value of the GP, while a direct call does, and we do not
2279 commit to either form until the split here.
2281 That said, this means that we lack precise life info for
2282 whether ADDR is dead after this call. This is not terribly
2283 important, since we can fix things up essentially for free
2284 with the POST_DEC below, but it's nice to not use it when we
2285 can immediately tell it's not necessary. */
2286 addr_dead_p
= ((noreturn_p
|| sibcall_p
2287 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2289 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2291 /* Load the code address into scratch_b. */
2292 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2293 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2294 emit_move_insn (scratch_r
, tmp
);
2295 emit_move_insn (scratch_b
, scratch_r
);
2297 /* Load the GP address. If ADDR is not dead here, then we must
2298 revert the change made above via the POST_INCREMENT. */
2300 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2303 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2304 emit_move_insn (pic_offset_table_rtx
, tmp
);
2311 insn
= gen_sibcall_nogp (addr
);
2313 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2315 insn
= gen_call_nogp (addr
, retaddr
);
2316 emit_call_insn (insn
);
2318 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2322 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2324 This differs from the generic code in that we know about the zero-extending
2325 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2326 also know that ld.acq+cmpxchg.rel equals a full barrier.
2328 The loop we want to generate looks like
2333 new_reg = cmp_reg op val;
2334 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2335 if (cmp_reg != old_reg)
2338 Note that we only do the plain load from memory once. Subsequent
2339 iterations use the value loaded by the compare-and-swap pattern. */
2342 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2343 rtx old_dst
, rtx new_dst
, enum memmodel model
)
2345 machine_mode mode
= GET_MODE (mem
);
2346 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2347 enum insn_code icode
;
2349 /* Special case for using fetchadd. */
2350 if ((mode
== SImode
|| mode
== DImode
)
2351 && (code
== PLUS
|| code
== MINUS
)
2352 && fetchadd_operand (val
, mode
))
2355 val
= GEN_INT (-INTVAL (val
));
2358 old_dst
= gen_reg_rtx (mode
);
2362 case MEMMODEL_ACQ_REL
:
2363 case MEMMODEL_SEQ_CST
:
2364 case MEMMODEL_SYNC_SEQ_CST
:
2365 emit_insn (gen_memory_barrier ());
2367 case MEMMODEL_RELAXED
:
2368 case MEMMODEL_ACQUIRE
:
2369 case MEMMODEL_SYNC_ACQUIRE
:
2370 case MEMMODEL_CONSUME
:
2372 icode
= CODE_FOR_fetchadd_acq_si
;
2374 icode
= CODE_FOR_fetchadd_acq_di
;
2376 case MEMMODEL_RELEASE
:
2377 case MEMMODEL_SYNC_RELEASE
:
2379 icode
= CODE_FOR_fetchadd_rel_si
;
2381 icode
= CODE_FOR_fetchadd_rel_di
;
2388 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2392 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2394 if (new_reg
!= new_dst
)
2395 emit_move_insn (new_dst
, new_reg
);
2400 /* Because of the volatile mem read, we get an ld.acq, which is the
2401 front half of the full barrier. The end half is the cmpxchg.rel.
2402 For relaxed and release memory models, we don't need this. But we
2403 also don't bother trying to prevent it either. */
2404 gcc_assert (is_mm_relaxed (model
) || is_mm_release (model
)
2405 || MEM_VOLATILE_P (mem
));
2407 old_reg
= gen_reg_rtx (DImode
);
2408 cmp_reg
= gen_reg_rtx (DImode
);
2409 label
= gen_label_rtx ();
2413 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2414 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2417 emit_move_insn (cmp_reg
, mem
);
2421 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2422 emit_move_insn (old_reg
, cmp_reg
);
2423 emit_move_insn (ar_ccv
, cmp_reg
);
2426 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2431 new_reg
= expand_simple_binop (DImode
, AND
, new_reg
, val
, NULL_RTX
,
2432 true, OPTAB_DIRECT
);
2433 new_reg
= expand_simple_unop (DImode
, code
, new_reg
, NULL_RTX
, true);
2436 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2437 true, OPTAB_DIRECT
);
2440 new_reg
= gen_lowpart (mode
, new_reg
);
2442 emit_move_insn (new_dst
, new_reg
);
2446 case MEMMODEL_RELAXED
:
2447 case MEMMODEL_ACQUIRE
:
2448 case MEMMODEL_SYNC_ACQUIRE
:
2449 case MEMMODEL_CONSUME
:
2452 case QImode
: icode
= CODE_FOR_cmpxchg_acq_qi
; break;
2453 case HImode
: icode
= CODE_FOR_cmpxchg_acq_hi
; break;
2454 case SImode
: icode
= CODE_FOR_cmpxchg_acq_si
; break;
2455 case DImode
: icode
= CODE_FOR_cmpxchg_acq_di
; break;
2461 case MEMMODEL_RELEASE
:
2462 case MEMMODEL_SYNC_RELEASE
:
2463 case MEMMODEL_ACQ_REL
:
2464 case MEMMODEL_SEQ_CST
:
2465 case MEMMODEL_SYNC_SEQ_CST
:
2468 case QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2469 case HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2470 case SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2471 case DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2481 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2483 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2486 /* Begin the assembly file. */
2489 ia64_file_start (void)
2491 default_file_start ();
2492 emit_safe_across_calls ();
2496 emit_safe_across_calls (void)
2498 unsigned int rs
, re
;
2505 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
2509 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
2513 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2517 fputc (',', asm_out_file
);
2519 fprintf (asm_out_file
, "p%u", rs
);
2521 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2525 fputc ('\n', asm_out_file
);
2528 /* Globalize a declaration. */
2531 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2533 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2534 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2537 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2538 const char *p
= TREE_STRING_POINTER (v
);
2539 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2541 targetm
.asm_out
.globalize_label (stream
, name
);
2542 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2543 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2546 /* Helper function for ia64_compute_frame_size: find an appropriate general
2547 register to spill some special register to. SPECIAL_SPILL_MASK contains
2548 bits in GR0 to GR31 that have already been allocated by this routine.
2549 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2552 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2556 if (emitted_frame_related_regs
[r
] != 0)
2558 regno
= emitted_frame_related_regs
[r
];
2559 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2560 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2561 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2562 else if (crtl
->is_leaf
2563 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2564 current_frame_info
.gr_used_mask
|= 1 << regno
;
2569 /* If this is a leaf function, first try an otherwise unused
2570 call-clobbered register. */
2573 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2574 if (! df_regs_ever_live_p (regno
)
2575 && call_used_regs
[regno
]
2576 && ! fixed_regs
[regno
]
2577 && ! global_regs
[regno
]
2578 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2579 && ! is_emitted (regno
))
2581 current_frame_info
.gr_used_mask
|= 1 << regno
;
2588 regno
= current_frame_info
.n_local_regs
;
2589 /* If there is a frame pointer, then we can't use loc79, because
2590 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2591 reg_name switching code in ia64_expand_prologue. */
2592 while (regno
< (80 - frame_pointer_needed
))
2593 if (! is_emitted (LOC_REG (regno
++)))
2595 current_frame_info
.n_local_regs
= regno
;
2596 return LOC_REG (regno
- 1);
2600 /* Failed to find a general register to spill to. Must use stack. */
2604 /* In order to make for nice schedules, we try to allocate every temporary
2605 to a different register. We must of course stay away from call-saved,
2606 fixed, and global registers. We must also stay away from registers
2607 allocated in current_frame_info.gr_used_mask, since those include regs
2608 used all through the prologue.
2610 Any register allocated here must be used immediately. The idea is to
2611 aid scheduling, not to solve data flow problems. */
2613 static int last_scratch_gr_reg
;
2616 next_scratch_gr_reg (void)
2620 for (i
= 0; i
< 32; ++i
)
2622 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2623 if (call_used_regs
[regno
]
2624 && ! fixed_regs
[regno
]
2625 && ! global_regs
[regno
]
2626 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2628 last_scratch_gr_reg
= regno
;
2633 /* There must be _something_ available. */
2637 /* Helper function for ia64_compute_frame_size, called through
2638 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2641 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2643 unsigned int regno
= REGNO (reg
);
2646 unsigned int i
, n
= hard_regno_nregs
[regno
][GET_MODE (reg
)];
2647 for (i
= 0; i
< n
; ++i
)
2648 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2653 /* Returns the number of bytes offset between the frame pointer and the stack
2654 pointer for the current function. SIZE is the number of bytes of space
2655 needed for local variables. */
2658 ia64_compute_frame_size (HOST_WIDE_INT size
)
2660 HOST_WIDE_INT total_size
;
2661 HOST_WIDE_INT spill_size
= 0;
2662 HOST_WIDE_INT extra_spill_size
= 0;
2663 HOST_WIDE_INT pretend_args_size
;
2666 int spilled_gr_p
= 0;
2667 int spilled_fr_p
= 0;
2673 if (current_frame_info
.initialized
)
2676 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2677 CLEAR_HARD_REG_SET (mask
);
2679 /* Don't allocate scratches to the return register. */
2680 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2682 /* Don't allocate scratches to the EH scratch registers. */
2683 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2684 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2685 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2686 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2688 /* Static stack checking uses r2 and r3. */
2689 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
2690 current_frame_info
.gr_used_mask
|= 0xc;
2692 /* Find the size of the register stack frame. We have only 80 local
2693 registers, because we reserve 8 for the inputs and 8 for the
2696 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2697 since we'll be adjusting that down later. */
2698 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2699 for (; regno
>= LOC_REG (0); regno
--)
2700 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2702 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2704 /* For functions marked with the syscall_linkage attribute, we must mark
2705 all eight input registers as in use, so that locals aren't visible to
2708 if (cfun
->machine
->n_varargs
> 0
2709 || lookup_attribute ("syscall_linkage",
2710 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2711 current_frame_info
.n_input_regs
= 8;
2714 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2715 if (df_regs_ever_live_p (regno
))
2717 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2720 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2721 if (df_regs_ever_live_p (regno
))
2723 i
= regno
- OUT_REG (0) + 1;
2725 #ifndef PROFILE_HOOK
2726 /* When -p profiling, we need one output register for the mcount argument.
2727 Likewise for -a profiling for the bb_init_func argument. For -ax
2728 profiling, we need two output registers for the two bb_init_trace_func
2733 current_frame_info
.n_output_regs
= i
;
2735 /* ??? No rotating register support yet. */
2736 current_frame_info
.n_rotate_regs
= 0;
2738 /* Discover which registers need spilling, and how much room that
2739 will take. Begin with floating point and general registers,
2740 which will always wind up on the stack. */
2742 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2743 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2745 SET_HARD_REG_BIT (mask
, regno
);
2751 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2752 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2754 SET_HARD_REG_BIT (mask
, regno
);
2760 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2761 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2763 SET_HARD_REG_BIT (mask
, regno
);
2768 /* Now come all special registers that might get saved in other
2769 general registers. */
2771 if (frame_pointer_needed
)
2773 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2774 /* If we did not get a register, then we take LOC79. This is guaranteed
2775 to be free, even if regs_ever_live is already set, because this is
2776 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2777 as we don't count loc79 above. */
2778 if (current_frame_info
.r
[reg_fp
] == 0)
2780 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2781 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2785 if (! crtl
->is_leaf
)
2787 /* Emit a save of BR0 if we call other functions. Do this even
2788 if this function doesn't return, as EH depends on this to be
2789 able to unwind the stack. */
2790 SET_HARD_REG_BIT (mask
, BR_REG (0));
2792 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2793 if (current_frame_info
.r
[reg_save_b0
] == 0)
2795 extra_spill_size
+= 8;
2799 /* Similarly for ar.pfs. */
2800 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2801 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2802 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2804 extra_spill_size
+= 8;
2808 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2809 registers are clobbered, so we fall back to the stack. */
2810 current_frame_info
.r
[reg_save_gp
]
2811 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2812 if (current_frame_info
.r
[reg_save_gp
] == 0)
2814 SET_HARD_REG_BIT (mask
, GR_REG (1));
2821 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs
[BR_REG (0)])
2823 SET_HARD_REG_BIT (mask
, BR_REG (0));
2824 extra_spill_size
+= 8;
2828 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2830 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2831 current_frame_info
.r
[reg_save_ar_pfs
]
2832 = find_gr_spill (reg_save_ar_pfs
, 1);
2833 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2835 extra_spill_size
+= 8;
2841 /* Unwind descriptor hackery: things are most efficient if we allocate
2842 consecutive GR save registers for RP, PFS, FP in that order. However,
2843 it is absolutely critical that FP get the only hard register that's
2844 guaranteed to be free, so we allocated it first. If all three did
2845 happen to be allocated hard regs, and are consecutive, rearrange them
2846 into the preferred order now.
2848 If we have already emitted code for any of those registers,
2849 then it's already too late to change. */
2850 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2851 MIN (current_frame_info
.r
[reg_save_b0
],
2852 current_frame_info
.r
[reg_save_ar_pfs
]));
2853 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2854 MAX (current_frame_info
.r
[reg_save_b0
],
2855 current_frame_info
.r
[reg_save_ar_pfs
]));
2857 && min_regno
+ 2 == max_regno
2858 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2859 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2860 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2861 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2862 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2863 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2864 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2865 && (emitted_frame_related_regs
[reg_fp
] == 0
2866 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2868 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2869 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2870 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2873 /* See if we need to store the predicate register block. */
2874 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2875 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2877 if (regno
<= PR_REG (63))
2879 SET_HARD_REG_BIT (mask
, PR_REG (0));
2880 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2881 if (current_frame_info
.r
[reg_save_pr
] == 0)
2883 extra_spill_size
+= 8;
2887 /* ??? Mark them all as used so that register renaming and such
2888 are free to use them. */
2889 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2890 df_set_regs_ever_live (regno
, true);
2893 /* If we're forced to use st8.spill, we're forced to save and restore
2894 ar.unat as well. The check for existing liveness allows inline asm
2895 to touch ar.unat. */
2896 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2897 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2899 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2900 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2901 current_frame_info
.r
[reg_save_ar_unat
]
2902 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2903 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2905 extra_spill_size
+= 8;
2910 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2912 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2913 current_frame_info
.r
[reg_save_ar_lc
]
2914 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2915 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2917 extra_spill_size
+= 8;
2922 /* If we have an odd number of words of pretend arguments written to
2923 the stack, then the FR save area will be unaligned. We round the
2924 size of this area up to keep things 16 byte aligned. */
2926 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2928 pretend_args_size
= crtl
->args
.pretend_args_size
;
2930 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2931 + crtl
->outgoing_args_size
);
2932 total_size
= IA64_STACK_ALIGN (total_size
);
2934 /* We always use the 16-byte scratch area provided by the caller, but
2935 if we are a leaf function, there's no one to which we need to provide
2936 a scratch area. However, if the function allocates dynamic stack space,
2937 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2938 so we need to cope. */
2939 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
2940 total_size
= MAX (0, total_size
- 16);
2942 current_frame_info
.total_size
= total_size
;
2943 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2944 current_frame_info
.spill_size
= spill_size
;
2945 current_frame_info
.extra_spill_size
= extra_spill_size
;
2946 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2947 current_frame_info
.n_spilled
= n_spilled
;
2948 current_frame_info
.initialized
= reload_completed
;
2951 /* Worker function for TARGET_CAN_ELIMINATE. */
2954 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED
, const int to
)
2956 return (to
== BR_REG (0) ? crtl
->is_leaf
: true);
2959 /* Compute the initial difference between the specified pair of registers. */
2962 ia64_initial_elimination_offset (int from
, int to
)
2964 HOST_WIDE_INT offset
;
2966 ia64_compute_frame_size (get_frame_size ());
2969 case FRAME_POINTER_REGNUM
:
2972 case HARD_FRAME_POINTER_REGNUM
:
2973 offset
= -current_frame_info
.total_size
;
2974 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2975 offset
+= 16 + crtl
->outgoing_args_size
;
2978 case STACK_POINTER_REGNUM
:
2980 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2981 offset
+= 16 + crtl
->outgoing_args_size
;
2989 case ARG_POINTER_REGNUM
:
2990 /* Arguments start above the 16 byte save area, unless stdarg
2991 in which case we store through the 16 byte save area. */
2994 case HARD_FRAME_POINTER_REGNUM
:
2995 offset
= 16 - crtl
->args
.pretend_args_size
;
2998 case STACK_POINTER_REGNUM
:
2999 offset
= (current_frame_info
.total_size
3000 + 16 - crtl
->args
.pretend_args_size
);
3015 /* If there are more than a trivial number of register spills, we use
3016 two interleaved iterators so that we can get two memory references
3019 In order to simplify things in the prologue and epilogue expanders,
3020 we use helper functions to fix up the memory references after the
3021 fact with the appropriate offsets to a POST_MODIFY memory mode.
3022 The following data structure tracks the state of the two iterators
3023 while insns are being emitted. */
3025 struct spill_fill_data
3027 rtx_insn
*init_after
; /* point at which to emit initializations */
3028 rtx init_reg
[2]; /* initial base register */
3029 rtx iter_reg
[2]; /* the iterator registers */
3030 rtx
*prev_addr
[2]; /* address of last memory use */
3031 rtx_insn
*prev_insn
[2]; /* the insn corresponding to prev_addr */
3032 HOST_WIDE_INT prev_off
[2]; /* last offset */
3033 int n_iter
; /* number of iterators in use */
3034 int next_iter
; /* next iterator to use */
3035 unsigned int save_gr_used_mask
;
3038 static struct spill_fill_data spill_fill_data
;
3041 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
3045 spill_fill_data
.init_after
= get_last_insn ();
3046 spill_fill_data
.init_reg
[0] = init_reg
;
3047 spill_fill_data
.init_reg
[1] = init_reg
;
3048 spill_fill_data
.prev_addr
[0] = NULL
;
3049 spill_fill_data
.prev_addr
[1] = NULL
;
3050 spill_fill_data
.prev_insn
[0] = NULL
;
3051 spill_fill_data
.prev_insn
[1] = NULL
;
3052 spill_fill_data
.prev_off
[0] = cfa_off
;
3053 spill_fill_data
.prev_off
[1] = cfa_off
;
3054 spill_fill_data
.next_iter
= 0;
3055 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
3057 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
3058 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
3060 int regno
= next_scratch_gr_reg ();
3061 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
3062 current_frame_info
.gr_used_mask
|= 1 << regno
;
3067 finish_spill_pointers (void)
3069 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
3073 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
3075 int iter
= spill_fill_data
.next_iter
;
3076 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
3077 rtx disp_rtx
= GEN_INT (disp
);
3080 if (spill_fill_data
.prev_addr
[iter
])
3082 if (satisfies_constraint_N (disp_rtx
))
3084 *spill_fill_data
.prev_addr
[iter
]
3085 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
3086 gen_rtx_PLUS (DImode
,
3087 spill_fill_data
.iter_reg
[iter
],
3089 add_reg_note (spill_fill_data
.prev_insn
[iter
],
3090 REG_INC
, spill_fill_data
.iter_reg
[iter
]);
3094 /* ??? Could use register post_modify for loads. */
3095 if (!satisfies_constraint_I (disp_rtx
))
3097 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3098 emit_move_insn (tmp
, disp_rtx
);
3101 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3102 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
3105 /* Micro-optimization: if we've created a frame pointer, it's at
3106 CFA 0, which may allow the real iterator to be initialized lower,
3107 slightly increasing parallelism. Also, if there are few saves
3108 it may eliminate the iterator entirely. */
3110 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
3111 && frame_pointer_needed
)
3113 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
3114 set_mem_alias_set (mem
, get_varargs_alias_set ());
3123 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
3124 spill_fill_data
.init_reg
[iter
]);
3129 if (!satisfies_constraint_I (disp_rtx
))
3131 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3132 emit_move_insn (tmp
, disp_rtx
);
3136 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3137 spill_fill_data
.init_reg
[iter
],
3144 /* Careful for being the first insn in a sequence. */
3145 if (spill_fill_data
.init_after
)
3146 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
3149 rtx_insn
*first
= get_insns ();
3151 insn
= emit_insn_before (seq
, first
);
3153 insn
= emit_insn (seq
);
3155 spill_fill_data
.init_after
= insn
;
3158 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
3160 /* ??? Not all of the spills are for varargs, but some of them are.
3161 The rest of the spills belong in an alias set of their own. But
3162 it doesn't actually hurt to include them here. */
3163 set_mem_alias_set (mem
, get_varargs_alias_set ());
3165 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
3166 spill_fill_data
.prev_off
[iter
] = cfa_off
;
3168 if (++iter
>= spill_fill_data
.n_iter
)
3170 spill_fill_data
.next_iter
= iter
;
3176 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
3179 int iter
= spill_fill_data
.next_iter
;
3183 mem
= spill_restore_mem (reg
, cfa_off
);
3184 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
3185 spill_fill_data
.prev_insn
[iter
] = insn
;
3192 RTX_FRAME_RELATED_P (insn
) = 1;
3194 /* Don't even pretend that the unwind code can intuit its way
3195 through a pair of interleaved post_modify iterators. Just
3196 provide the correct answer. */
3198 if (frame_pointer_needed
)
3200 base
= hard_frame_pointer_rtx
;
3205 base
= stack_pointer_rtx
;
3206 off
= current_frame_info
.total_size
- cfa_off
;
3209 add_reg_note (insn
, REG_CFA_OFFSET
,
3210 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg
),
3211 plus_constant (Pmode
,
3218 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
3220 int iter
= spill_fill_data
.next_iter
;
3223 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
3224 GEN_INT (cfa_off
)));
3225 spill_fill_data
.prev_insn
[iter
] = insn
;
3228 /* Wrapper functions that discards the CONST_INT spill offset. These
3229 exist so that we can give gr_spill/gr_fill the offset they need and
3230 use a consistent function interface. */
3233 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3235 return gen_movdi (dest
, src
);
3239 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3241 return gen_fr_spill (dest
, src
);
3245 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3247 return gen_fr_restore (dest
, src
);
3250 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3252 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3253 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3255 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3256 inclusive. These are offsets from the current stack pointer. BS_SIZE
3257 is the size of the backing store. ??? This clobbers r2 and r3. */
3260 ia64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
3263 rtx r2
= gen_rtx_REG (Pmode
, GR_REG (2));
3264 rtx r3
= gen_rtx_REG (Pmode
, GR_REG (3));
3265 rtx p6
= gen_rtx_REG (BImode
, PR_REG (6));
3267 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3268 of the Register Stack Engine. We also need to probe it after checking
3269 that the 2 stacks don't overlap. */
3270 emit_insn (gen_bsp_value (r3
));
3271 emit_move_insn (r2
, GEN_INT (-(first
+ size
)));
3273 /* Compare current value of BSP and SP registers. */
3274 emit_insn (gen_rtx_SET (p6
, gen_rtx_fmt_ee (LTU
, BImode
,
3275 r3
, stack_pointer_rtx
)));
3277 /* Compute the address of the probe for the Backing Store (which grows
3278 towards higher addresses). We probe only at the first offset of
3279 the next page because some OS (eg Linux/ia64) only extend the
3280 backing store when this specific address is hit (but generate a SEGV
3281 on other address). Page size is the worst case (4KB). The reserve
3282 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3283 Also compute the address of the last probe for the memory stack
3284 (which grows towards lower addresses). */
3285 emit_insn (gen_rtx_SET (r3
, plus_constant (Pmode
, r3
, 4095)));
3286 emit_insn (gen_rtx_SET (r2
, gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3288 /* Compare them and raise SEGV if the former has topped the latter. */
3289 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3290 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3291 gen_rtx_SET (p6
, gen_rtx_fmt_ee (GEU
, BImode
,
3293 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode
, r3
, GEN_INT (12),
3296 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3297 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3298 gen_rtx_TRAP_IF (VOIDmode
, const1_rtx
,
3301 /* Probe the Backing Store if necessary. */
3303 emit_stack_probe (r3
);
3305 /* Probe the memory stack if necessary. */
3309 /* See if we have a constant small number of probes to generate. If so,
3310 that's the easy case. */
3311 else if (size
<= PROBE_INTERVAL
)
3312 emit_stack_probe (r2
);
3314 /* The run-time loop is made up of 8 insns in the generic case while this
3315 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3316 else if (size
<= 4 * PROBE_INTERVAL
)
3320 emit_move_insn (r2
, GEN_INT (-(first
+ PROBE_INTERVAL
)));
3321 emit_insn (gen_rtx_SET (r2
,
3322 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3323 emit_stack_probe (r2
);
3325 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3326 it exceeds SIZE. If only two probes are needed, this will not
3327 generate any code. Then probe at FIRST + SIZE. */
3328 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
3330 emit_insn (gen_rtx_SET (r2
,
3331 plus_constant (Pmode
, r2
, -PROBE_INTERVAL
)));
3332 emit_stack_probe (r2
);
3335 emit_insn (gen_rtx_SET (r2
,
3336 plus_constant (Pmode
, r2
,
3337 (i
- PROBE_INTERVAL
) - size
)));
3338 emit_stack_probe (r2
);
3341 /* Otherwise, do the same as above, but in a loop. Note that we must be
3342 extra careful with variables wrapping around because we might be at
3343 the very top (or the very bottom) of the address space and we have
3344 to be able to handle this case properly; in particular, we use an
3345 equality test for the loop condition. */
3348 HOST_WIDE_INT rounded_size
;
3350 emit_move_insn (r2
, GEN_INT (-first
));
3353 /* Step 1: round SIZE to the previous multiple of the interval. */
3355 rounded_size
= size
& -PROBE_INTERVAL
;
3358 /* Step 2: compute initial and final value of the loop counter. */
3360 /* TEST_ADDR = SP + FIRST. */
3361 emit_insn (gen_rtx_SET (r2
,
3362 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3364 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3365 if (rounded_size
> (1 << 21))
3367 emit_move_insn (r3
, GEN_INT (-rounded_size
));
3368 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
, r3
)));
3371 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
,
3372 GEN_INT (-rounded_size
))));
3377 while (TEST_ADDR != LAST_ADDR)
3379 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3383 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3384 until it is equal to ROUNDED_SIZE. */
3386 emit_insn (gen_probe_stack_range (r2
, r2
, r3
));
3389 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3390 that SIZE is equal to ROUNDED_SIZE. */
3392 /* TEMP = SIZE - ROUNDED_SIZE. */
3393 if (size
!= rounded_size
)
3395 emit_insn (gen_rtx_SET (r2
, plus_constant (Pmode
, r2
,
3396 rounded_size
- size
)));
3397 emit_stack_probe (r2
);
3401 /* Make sure nothing is scheduled before we are done. */
3402 emit_insn (gen_blockage ());
3405 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3406 absolute addresses. */
3409 output_probe_stack_range (rtx reg1
, rtx reg2
)
3411 static int labelno
= 0;
3412 char loop_lab
[32], end_lab
[32];
3415 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
3416 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
3418 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
3420 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
3423 xops
[2] = gen_rtx_REG (BImode
, PR_REG (6));
3424 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops
);
3425 fprintf (asm_out_file
, "\t(%s) br.cond.dpnt ", reg_names
[REGNO (xops
[2])]);
3426 assemble_name_raw (asm_out_file
, end_lab
);
3427 fputc ('\n', asm_out_file
);
3429 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3430 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
3431 output_asm_insn ("addl %0 = %1, %0", xops
);
3432 fputs ("\t;;\n", asm_out_file
);
3434 /* Probe at TEST_ADDR and branch. */
3435 output_asm_insn ("probe.w.fault %0, 0", xops
);
3436 fprintf (asm_out_file
, "\tbr ");
3437 assemble_name_raw (asm_out_file
, loop_lab
);
3438 fputc ('\n', asm_out_file
);
3440 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
3445 /* Called after register allocation to add any instructions needed for the
3446 prologue. Using a prologue insn is favored compared to putting all of the
3447 instructions in output_function_prologue(), since it allows the scheduler
3448 to intermix instructions with the saves of the caller saved registers. In
3449 some cases, it might be necessary to emit a barrier instruction as the last
3450 insn to prevent such scheduling.
3452 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3453 so that the debug info generation code can handle them properly.
3455 The register save area is laid out like so:
3457 [ varargs spill area ]
3458 [ fr register spill area ]
3459 [ br register spill area ]
3460 [ ar register spill area ]
3461 [ pr register spill area ]
3462 [ gr register spill area ] */
3464 /* ??? Get inefficient code when the frame size is larger than can fit in an
3465 adds instruction. */
3468 ia64_expand_prologue (void)
3471 rtx ar_pfs_save_reg
, ar_unat_save_reg
;
3472 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
3475 ia64_compute_frame_size (get_frame_size ());
3476 last_scratch_gr_reg
= 15;
3478 if (flag_stack_usage_info
)
3479 current_function_static_stack_size
= current_frame_info
.total_size
;
3481 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
3483 HOST_WIDE_INT size
= current_frame_info
.total_size
;
3484 int bs_size
= BACKING_STORE_SIZE (current_frame_info
.n_input_regs
3485 + current_frame_info
.n_local_regs
);
3487 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3489 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
3490 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
,
3491 size
- STACK_CHECK_PROTECT
,
3493 else if (size
+ bs_size
> STACK_CHECK_PROTECT
)
3494 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
, 0, bs_size
);
3496 else if (size
+ bs_size
> 0)
3497 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
, bs_size
);
3502 fprintf (dump_file
, "ia64 frame related registers "
3503 "recorded in current_frame_info.r[]:\n");
3504 #define PRINTREG(a) if (current_frame_info.r[a]) \
3505 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3507 PRINTREG(reg_save_b0
);
3508 PRINTREG(reg_save_pr
);
3509 PRINTREG(reg_save_ar_pfs
);
3510 PRINTREG(reg_save_ar_unat
);
3511 PRINTREG(reg_save_ar_lc
);
3512 PRINTREG(reg_save_gp
);
3516 /* If there is no epilogue, then we don't need some prologue insns.
3517 We need to avoid emitting the dead prologue insns, because flow
3518 will complain about them. */
3524 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
3525 if ((e
->flags
& EDGE_FAKE
) == 0
3526 && (e
->flags
& EDGE_FALLTHRU
) != 0)
3528 epilogue_p
= (e
!= NULL
);
3533 /* Set the local, input, and output register names. We need to do this
3534 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3535 half. If we use in/loc/out register names, then we get assembler errors
3536 in crtn.S because there is no alloc insn or regstk directive in there. */
3537 if (! TARGET_REG_NAMES
)
3539 int inputs
= current_frame_info
.n_input_regs
;
3540 int locals
= current_frame_info
.n_local_regs
;
3541 int outputs
= current_frame_info
.n_output_regs
;
3543 for (i
= 0; i
< inputs
; i
++)
3544 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
3545 for (i
= 0; i
< locals
; i
++)
3546 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
3547 for (i
= 0; i
< outputs
; i
++)
3548 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
3551 /* Set the frame pointer register name. The regnum is logically loc79,
3552 but of course we'll not have allocated that many locals. Rather than
3553 worrying about renumbering the existing rtxs, we adjust the name. */
3554 /* ??? This code means that we can never use one local register when
3555 there is a frame pointer. loc79 gets wasted in this case, as it is
3556 renamed to a register that will never be used. See also the try_locals
3557 code in find_gr_spill. */
3558 if (current_frame_info
.r
[reg_fp
])
3560 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3561 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3562 = reg_names
[current_frame_info
.r
[reg_fp
]];
3563 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3566 /* We don't need an alloc instruction if we've used no outputs or locals. */
3567 if (current_frame_info
.n_local_regs
== 0
3568 && current_frame_info
.n_output_regs
== 0
3569 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3570 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3572 /* If there is no alloc, but there are input registers used, then we
3573 need a .regstk directive. */
3574 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3575 ar_pfs_save_reg
= NULL_RTX
;
3579 current_frame_info
.need_regstk
= 0;
3581 if (current_frame_info
.r
[reg_save_ar_pfs
])
3583 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3584 reg_emitted (reg_save_ar_pfs
);
3587 regno
= next_scratch_gr_reg ();
3588 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3590 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3591 GEN_INT (current_frame_info
.n_input_regs
),
3592 GEN_INT (current_frame_info
.n_local_regs
),
3593 GEN_INT (current_frame_info
.n_output_regs
),
3594 GEN_INT (current_frame_info
.n_rotate_regs
)));
3595 if (current_frame_info
.r
[reg_save_ar_pfs
])
3597 RTX_FRAME_RELATED_P (insn
) = 1;
3598 add_reg_note (insn
, REG_CFA_REGISTER
,
3599 gen_rtx_SET (ar_pfs_save_reg
,
3600 gen_rtx_REG (DImode
, AR_PFS_REGNUM
)));
3604 /* Set up frame pointer, stack pointer, and spill iterators. */
3606 n_varargs
= cfun
->machine
->n_varargs
;
3607 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3608 stack_pointer_rtx
, 0);
3610 if (frame_pointer_needed
)
3612 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3613 RTX_FRAME_RELATED_P (insn
) = 1;
3615 /* Force the unwind info to recognize this as defining a new CFA,
3616 rather than some temp register setup. */
3617 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL_RTX
);
3620 if (current_frame_info
.total_size
!= 0)
3622 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3625 if (satisfies_constraint_I (frame_size_rtx
))
3626 offset
= frame_size_rtx
;
3629 regno
= next_scratch_gr_reg ();
3630 offset
= gen_rtx_REG (DImode
, regno
);
3631 emit_move_insn (offset
, frame_size_rtx
);
3634 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3635 stack_pointer_rtx
, offset
));
3637 if (! frame_pointer_needed
)
3639 RTX_FRAME_RELATED_P (insn
) = 1;
3640 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
3641 gen_rtx_SET (stack_pointer_rtx
,
3642 gen_rtx_PLUS (DImode
,
3647 /* ??? At this point we must generate a magic insn that appears to
3648 modify the stack pointer, the frame pointer, and all spill
3649 iterators. This would allow the most scheduling freedom. For
3650 now, just hard stop. */
3651 emit_insn (gen_blockage ());
3654 /* Must copy out ar.unat before doing any integer spills. */
3655 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3657 if (current_frame_info
.r
[reg_save_ar_unat
])
3660 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3661 reg_emitted (reg_save_ar_unat
);
3665 alt_regno
= next_scratch_gr_reg ();
3666 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3667 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3670 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3671 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3672 if (current_frame_info
.r
[reg_save_ar_unat
])
3674 RTX_FRAME_RELATED_P (insn
) = 1;
3675 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3678 /* Even if we're not going to generate an epilogue, we still
3679 need to save the register so that EH works. */
3680 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3681 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3684 ar_unat_save_reg
= NULL_RTX
;
3686 /* Spill all varargs registers. Do this before spilling any GR registers,
3687 since we want the UNAT bits for the GR registers to override the UNAT
3688 bits from varargs, which we don't care about. */
3691 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3693 reg
= gen_rtx_REG (DImode
, regno
);
3694 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3697 /* Locate the bottom of the register save area. */
3698 cfa_off
= (current_frame_info
.spill_cfa_off
3699 + current_frame_info
.spill_size
3700 + current_frame_info
.extra_spill_size
);
3702 /* Save the predicate register block either in a register or in memory. */
3703 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3705 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3706 if (current_frame_info
.r
[reg_save_pr
] != 0)
3708 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3709 reg_emitted (reg_save_pr
);
3710 insn
= emit_move_insn (alt_reg
, reg
);
3712 /* ??? Denote pr spill/fill by a DImode move that modifies all
3713 64 hard registers. */
3714 RTX_FRAME_RELATED_P (insn
) = 1;
3715 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3717 /* Even if we're not going to generate an epilogue, we still
3718 need to save the register so that EH works. */
3720 emit_insn (gen_prologue_use (alt_reg
));
3724 alt_regno
= next_scratch_gr_reg ();
3725 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3726 insn
= emit_move_insn (alt_reg
, reg
);
3727 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3732 /* Handle AR regs in numerical order. All of them get special handling. */
3733 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3734 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3736 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3737 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3741 /* The alloc insn already copied ar.pfs into a general register. The
3742 only thing we have to do now is copy that register to a stack slot
3743 if we'd not allocated a local register for the job. */
3744 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3745 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3747 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3748 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3752 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3754 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3755 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3757 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3758 reg_emitted (reg_save_ar_lc
);
3759 insn
= emit_move_insn (alt_reg
, reg
);
3760 RTX_FRAME_RELATED_P (insn
) = 1;
3761 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3763 /* Even if we're not going to generate an epilogue, we still
3764 need to save the register so that EH works. */
3766 emit_insn (gen_prologue_use (alt_reg
));
3770 alt_regno
= next_scratch_gr_reg ();
3771 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3772 emit_move_insn (alt_reg
, reg
);
3773 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3778 /* Save the return pointer. */
3779 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3781 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3782 if (current_frame_info
.r
[reg_save_b0
] != 0)
3784 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3785 reg_emitted (reg_save_b0
);
3786 insn
= emit_move_insn (alt_reg
, reg
);
3787 RTX_FRAME_RELATED_P (insn
) = 1;
3788 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (alt_reg
, pc_rtx
));
3790 /* Even if we're not going to generate an epilogue, we still
3791 need to save the register so that EH works. */
3793 emit_insn (gen_prologue_use (alt_reg
));
3797 alt_regno
= next_scratch_gr_reg ();
3798 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3799 emit_move_insn (alt_reg
, reg
);
3800 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3805 if (current_frame_info
.r
[reg_save_gp
])
3807 reg_emitted (reg_save_gp
);
3808 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3809 current_frame_info
.r
[reg_save_gp
]),
3810 pic_offset_table_rtx
);
3813 /* We should now be at the base of the gr/br/fr spill area. */
3814 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3815 + current_frame_info
.spill_size
));
3817 /* Spill all general registers. */
3818 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3819 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3821 reg
= gen_rtx_REG (DImode
, regno
);
3822 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3826 /* Spill the rest of the BR registers. */
3827 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3828 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3830 alt_regno
= next_scratch_gr_reg ();
3831 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3832 reg
= gen_rtx_REG (DImode
, regno
);
3833 emit_move_insn (alt_reg
, reg
);
3834 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3838 /* Align the frame and spill all FR registers. */
3839 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3840 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3842 gcc_assert (!(cfa_off
& 15));
3843 reg
= gen_rtx_REG (XFmode
, regno
);
3844 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3848 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3850 finish_spill_pointers ();
3853 /* Output the textual info surrounding the prologue. */
3856 ia64_start_function (FILE *file
, const char *fnname
,
3857 tree decl ATTRIBUTE_UNUSED
)
3859 #if TARGET_ABI_OPEN_VMS
3860 vms_start_function (fnname
);
3863 fputs ("\t.proc ", file
);
3864 assemble_name (file
, fnname
);
3866 ASM_OUTPUT_LABEL (file
, fnname
);
3869 /* Called after register allocation to add any instructions needed for the
3870 epilogue. Using an epilogue insn is favored compared to putting all of the
3871 instructions in output_function_prologue(), since it allows the scheduler
3872 to intermix instructions with the saves of the caller saved registers. In
3873 some cases, it might be necessary to emit a barrier instruction as the last
3874 insn to prevent such scheduling. */
3877 ia64_expand_epilogue (int sibcall_p
)
3880 rtx reg
, alt_reg
, ar_unat_save_reg
;
3881 int regno
, alt_regno
, cfa_off
;
3883 ia64_compute_frame_size (get_frame_size ());
3885 /* If there is a frame pointer, then we use it instead of the stack
3886 pointer, so that the stack pointer does not need to be valid when
3887 the epilogue starts. See EXIT_IGNORE_STACK. */
3888 if (frame_pointer_needed
)
3889 setup_spill_pointers (current_frame_info
.n_spilled
,
3890 hard_frame_pointer_rtx
, 0);
3892 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3893 current_frame_info
.total_size
);
3895 if (current_frame_info
.total_size
!= 0)
3897 /* ??? At this point we must generate a magic insn that appears to
3898 modify the spill iterators and the frame pointer. This would
3899 allow the most scheduling freedom. For now, just hard stop. */
3900 emit_insn (gen_blockage ());
3903 /* Locate the bottom of the register save area. */
3904 cfa_off
= (current_frame_info
.spill_cfa_off
3905 + current_frame_info
.spill_size
3906 + current_frame_info
.extra_spill_size
);
3908 /* Restore the predicate registers. */
3909 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3911 if (current_frame_info
.r
[reg_save_pr
] != 0)
3913 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3914 reg_emitted (reg_save_pr
);
3918 alt_regno
= next_scratch_gr_reg ();
3919 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3920 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3923 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3924 emit_move_insn (reg
, alt_reg
);
3927 /* Restore the application registers. */
3929 /* Load the saved unat from the stack, but do not restore it until
3930 after the GRs have been restored. */
3931 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3933 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3936 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3937 reg_emitted (reg_save_ar_unat
);
3941 alt_regno
= next_scratch_gr_reg ();
3942 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3943 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3944 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3949 ar_unat_save_reg
= NULL_RTX
;
3951 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3953 reg_emitted (reg_save_ar_pfs
);
3954 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3955 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3956 emit_move_insn (reg
, alt_reg
);
3958 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3960 alt_regno
= next_scratch_gr_reg ();
3961 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3962 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3964 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3965 emit_move_insn (reg
, alt_reg
);
3968 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3970 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3972 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3973 reg_emitted (reg_save_ar_lc
);
3977 alt_regno
= next_scratch_gr_reg ();
3978 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3979 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3982 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3983 emit_move_insn (reg
, alt_reg
);
3986 /* Restore the return pointer. */
3987 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3989 if (current_frame_info
.r
[reg_save_b0
] != 0)
3991 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3992 reg_emitted (reg_save_b0
);
3996 alt_regno
= next_scratch_gr_reg ();
3997 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3998 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4001 reg
= gen_rtx_REG (DImode
, BR_REG (0));
4002 emit_move_insn (reg
, alt_reg
);
4005 /* We should now be at the base of the gr/br/fr spill area. */
4006 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
4007 + current_frame_info
.spill_size
));
4009 /* The GP may be stored on the stack in the prologue, but it's
4010 never restored in the epilogue. Skip the stack slot. */
4011 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
4014 /* Restore all general registers. */
4015 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
4016 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4018 reg
= gen_rtx_REG (DImode
, regno
);
4019 do_restore (gen_gr_restore
, reg
, cfa_off
);
4023 /* Restore the branch registers. */
4024 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
4025 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4027 alt_regno
= next_scratch_gr_reg ();
4028 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4029 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4031 reg
= gen_rtx_REG (DImode
, regno
);
4032 emit_move_insn (reg
, alt_reg
);
4035 /* Restore floating point registers. */
4036 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
4037 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4039 gcc_assert (!(cfa_off
& 15));
4040 reg
= gen_rtx_REG (XFmode
, regno
);
4041 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
4045 /* Restore ar.unat for real. */
4046 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
4048 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
4049 emit_move_insn (reg
, ar_unat_save_reg
);
4052 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
4054 finish_spill_pointers ();
4056 if (current_frame_info
.total_size
4057 || cfun
->machine
->ia64_eh_epilogue_sp
4058 || frame_pointer_needed
)
4060 /* ??? At this point we must generate a magic insn that appears to
4061 modify the spill iterators, the stack pointer, and the frame
4062 pointer. This would allow the most scheduling freedom. For now,
4064 emit_insn (gen_blockage ());
4067 if (cfun
->machine
->ia64_eh_epilogue_sp
)
4068 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
4069 else if (frame_pointer_needed
)
4071 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
4072 RTX_FRAME_RELATED_P (insn
) = 1;
4073 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
4075 else if (current_frame_info
.total_size
)
4077 rtx offset
, frame_size_rtx
;
4079 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
4080 if (satisfies_constraint_I (frame_size_rtx
))
4081 offset
= frame_size_rtx
;
4084 regno
= next_scratch_gr_reg ();
4085 offset
= gen_rtx_REG (DImode
, regno
);
4086 emit_move_insn (offset
, frame_size_rtx
);
4089 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
4092 RTX_FRAME_RELATED_P (insn
) = 1;
4093 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
4094 gen_rtx_SET (stack_pointer_rtx
,
4095 gen_rtx_PLUS (DImode
,
4100 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
4101 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
4104 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
4107 int fp
= GR_REG (2);
4108 /* We need a throw away register here, r0 and r1 are reserved,
4109 so r2 is the first available call clobbered register. If
4110 there was a frame_pointer register, we may have swapped the
4111 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4112 sure we're using the string "r2" when emitting the register
4113 name for the assembler. */
4114 if (current_frame_info
.r
[reg_fp
]
4115 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
4116 fp
= HARD_FRAME_POINTER_REGNUM
;
4118 /* We must emit an alloc to force the input registers to become output
4119 registers. Otherwise, if the callee tries to pass its parameters
4120 through to another call without an intervening alloc, then these
4122 /* ??? We don't need to preserve all input registers. We only need to
4123 preserve those input registers used as arguments to the sibling call.
4124 It is unclear how to compute that number here. */
4125 if (current_frame_info
.n_input_regs
!= 0)
4127 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
4129 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
4130 const0_rtx
, const0_rtx
,
4131 n_inputs
, const0_rtx
));
4132 RTX_FRAME_RELATED_P (insn
) = 1;
4134 /* ??? We need to mark the alloc as frame-related so that it gets
4135 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4136 But there's nothing dwarf2 related to be done wrt the register
4137 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4138 the empty parallel means dwarf2out will not see anything. */
4139 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4140 gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (0)));
4145 /* Return 1 if br.ret can do all the work required to return from a
4149 ia64_direct_return (void)
4151 if (reload_completed
&& ! frame_pointer_needed
)
4153 ia64_compute_frame_size (get_frame_size ());
4155 return (current_frame_info
.total_size
== 0
4156 && current_frame_info
.n_spilled
== 0
4157 && current_frame_info
.r
[reg_save_b0
] == 0
4158 && current_frame_info
.r
[reg_save_pr
] == 0
4159 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
4160 && current_frame_info
.r
[reg_save_ar_unat
] == 0
4161 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
4166 /* Return the magic cookie that we use to hold the return address
4167 during early compilation. */
4170 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
4174 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
4177 /* Split this value after reload, now that we know where the return
4178 address is saved. */
4181 ia64_split_return_addr_rtx (rtx dest
)
4185 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4187 if (current_frame_info
.r
[reg_save_b0
] != 0)
4189 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4190 reg_emitted (reg_save_b0
);
4198 /* Compute offset from CFA for BR0. */
4199 /* ??? Must be kept in sync with ia64_expand_prologue. */
4200 off
= (current_frame_info
.spill_cfa_off
4201 + current_frame_info
.spill_size
);
4202 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
4203 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4206 /* Convert CFA offset to a register based offset. */
4207 if (frame_pointer_needed
)
4208 src
= hard_frame_pointer_rtx
;
4211 src
= stack_pointer_rtx
;
4212 off
+= current_frame_info
.total_size
;
4215 /* Load address into scratch register. */
4216 off_r
= GEN_INT (off
);
4217 if (satisfies_constraint_I (off_r
))
4218 emit_insn (gen_adddi3 (dest
, src
, off_r
));
4221 emit_move_insn (dest
, off_r
);
4222 emit_insn (gen_adddi3 (dest
, src
, dest
));
4225 src
= gen_rtx_MEM (Pmode
, dest
);
4229 src
= gen_rtx_REG (DImode
, BR_REG (0));
4231 emit_move_insn (dest
, src
);
4235 ia64_hard_regno_rename_ok (int from
, int to
)
4237 /* Don't clobber any of the registers we reserved for the prologue. */
4240 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
4241 if (to
== current_frame_info
.r
[r
]
4242 || from
== current_frame_info
.r
[r
]
4243 || to
== emitted_frame_related_regs
[r
]
4244 || from
== emitted_frame_related_regs
[r
])
4247 /* Don't use output registers outside the register frame. */
4248 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
4251 /* Retain even/oddness on predicate register pairs. */
4252 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
4253 return (from
& 1) == (to
& 1);
4258 /* Target hook for assembling integer objects. Handle word-sized
4259 aligned objects and detect the cases when @fptr is needed. */
4262 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4264 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
4265 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
4266 && GET_CODE (x
) == SYMBOL_REF
4267 && SYMBOL_REF_FUNCTION_P (x
))
4269 static const char * const directive
[2][2] = {
4270 /* 64-bit pointer */ /* 32-bit pointer */
4271 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4272 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4274 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
4275 output_addr_const (asm_out_file
, x
);
4276 fputs (")\n", asm_out_file
);
4279 return default_assemble_integer (x
, size
, aligned_p
);
4282 /* Emit the function prologue. */
4285 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4287 int mask
, grsave
, grsave_prev
;
4289 if (current_frame_info
.need_regstk
)
4290 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
4291 current_frame_info
.n_input_regs
,
4292 current_frame_info
.n_local_regs
,
4293 current_frame_info
.n_output_regs
,
4294 current_frame_info
.n_rotate_regs
);
4296 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4299 /* Emit the .prologue directive. */
4302 grsave
= grsave_prev
= 0;
4303 if (current_frame_info
.r
[reg_save_b0
] != 0)
4306 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
4308 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
4309 && (grsave_prev
== 0
4310 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
4313 if (grsave_prev
== 0)
4314 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
4315 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
4317 if (current_frame_info
.r
[reg_fp
] != 0
4318 && (grsave_prev
== 0
4319 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
4322 if (grsave_prev
== 0)
4323 grsave
= HARD_FRAME_POINTER_REGNUM
;
4324 grsave_prev
= current_frame_info
.r
[reg_fp
];
4326 if (current_frame_info
.r
[reg_save_pr
] != 0
4327 && (grsave_prev
== 0
4328 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
4331 if (grsave_prev
== 0)
4332 grsave
= current_frame_info
.r
[reg_save_pr
];
4335 if (mask
&& TARGET_GNU_AS
)
4336 fprintf (file
, "\t.prologue %d, %d\n", mask
,
4337 ia64_dbx_register_number (grsave
));
4339 fputs ("\t.prologue\n", file
);
4341 /* Emit a .spill directive, if necessary, to relocate the base of
4342 the register spill area. */
4343 if (current_frame_info
.spill_cfa_off
!= -16)
4344 fprintf (file
, "\t.spill %ld\n",
4345 (long) (current_frame_info
.spill_cfa_off
4346 + current_frame_info
.spill_size
));
4349 /* Emit the .body directive at the scheduled end of the prologue. */
4352 ia64_output_function_end_prologue (FILE *file
)
4354 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4357 fputs ("\t.body\n", file
);
4360 /* Emit the function epilogue. */
4363 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4364 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4368 if (current_frame_info
.r
[reg_fp
])
4370 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
4371 reg_names
[HARD_FRAME_POINTER_REGNUM
]
4372 = reg_names
[current_frame_info
.r
[reg_fp
]];
4373 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
4374 reg_emitted (reg_fp
);
4376 if (! TARGET_REG_NAMES
)
4378 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
4379 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
4380 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
4381 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
4382 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
4383 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
4386 current_frame_info
.initialized
= 0;
4390 ia64_dbx_register_number (int regno
)
4392 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4393 from its home at loc79 to something inside the register frame. We
4394 must perform the same renumbering here for the debug info. */
4395 if (current_frame_info
.r
[reg_fp
])
4397 if (regno
== HARD_FRAME_POINTER_REGNUM
)
4398 regno
= current_frame_info
.r
[reg_fp
];
4399 else if (regno
== current_frame_info
.r
[reg_fp
])
4400 regno
= HARD_FRAME_POINTER_REGNUM
;
4403 if (IN_REGNO_P (regno
))
4404 return 32 + regno
- IN_REG (0);
4405 else if (LOC_REGNO_P (regno
))
4406 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
4407 else if (OUT_REGNO_P (regno
))
4408 return (32 + current_frame_info
.n_input_regs
4409 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
4414 /* Implement TARGET_TRAMPOLINE_INIT.
4416 The trampoline should set the static chain pointer to value placed
4417 into the trampoline and should branch to the specified routine.
4418 To make the normal indirect-subroutine calling convention work,
4419 the trampoline must look like a function descriptor; the first
4420 word being the target address and the second being the target's
4423 We abuse the concept of a global pointer by arranging for it
4424 to point to the data we need to load. The complete trampoline
4425 has the following form:
4427 +-------------------+ \
4428 TRAMP: | __ia64_trampoline | |
4429 +-------------------+ > fake function descriptor
4431 +-------------------+ /
4432 | target descriptor |
4433 +-------------------+
4435 +-------------------+
4439 ia64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx static_chain
)
4441 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4442 rtx addr
, addr_reg
, tramp
, eight
= GEN_INT (8);
4444 /* The Intel assembler requires that the global __ia64_trampoline symbol
4445 be declared explicitly */
4448 static bool declared_ia64_trampoline
= false;
4450 if (!declared_ia64_trampoline
)
4452 declared_ia64_trampoline
= true;
4453 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
4454 "__ia64_trampoline");
4458 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4459 addr
= convert_memory_address (Pmode
, XEXP (m_tramp
, 0));
4460 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
4461 static_chain
= convert_memory_address (Pmode
, static_chain
);
4463 /* Load up our iterator. */
4464 addr_reg
= copy_to_reg (addr
);
4465 m_tramp
= adjust_automodify_address (m_tramp
, Pmode
, addr_reg
, 0);
4467 /* The first two words are the fake descriptor:
4468 __ia64_trampoline, ADDR+16. */
4469 tramp
= gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline");
4470 if (TARGET_ABI_OPEN_VMS
)
4472 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4473 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4474 relocation against function symbols to make it identical to the
4475 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4476 strict ELF and dereference to get the bare code address. */
4477 rtx reg
= gen_reg_rtx (Pmode
);
4478 SYMBOL_REF_FLAGS (tramp
) |= SYMBOL_FLAG_FUNCTION
;
4479 emit_move_insn (reg
, tramp
);
4480 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
4483 emit_move_insn (m_tramp
, tramp
);
4484 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4485 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4487 emit_move_insn (m_tramp
, force_reg (Pmode
, plus_constant (Pmode
, addr
, 16)));
4488 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4489 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4491 /* The third word is the target descriptor. */
4492 emit_move_insn (m_tramp
, force_reg (Pmode
, fnaddr
));
4493 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4494 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4496 /* The fourth word is the static chain. */
4497 emit_move_insn (m_tramp
, static_chain
);
4500 /* Do any needed setup for a variadic function. CUM has not been updated
4501 for the last named argument which has type TYPE and mode MODE.
4503 We generate the actual spill instructions during prologue generation. */
4506 ia64_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4507 tree type
, int * pretend_size
,
4508 int second_time ATTRIBUTE_UNUSED
)
4510 CUMULATIVE_ARGS next_cum
= *get_cumulative_args (cum
);
4512 /* Skip the current argument. */
4513 ia64_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
, 1);
4515 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
4517 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
4518 *pretend_size
= n
* UNITS_PER_WORD
;
4519 cfun
->machine
->n_varargs
= n
;
4523 /* Check whether TYPE is a homogeneous floating point aggregate. If
4524 it is, return the mode of the floating point type that appears
4525 in all leafs. If it is not, return VOIDmode.
4527 An aggregate is a homogeneous floating point aggregate is if all
4528 fields/elements in it have the same floating point type (e.g,
4529 SFmode). 128-bit quad-precision floats are excluded.
4531 Variable sized aggregates should never arrive here, since we should
4532 have already decided to pass them by reference. Top-level zero-sized
4533 aggregates are excluded because our parallels crash the middle-end. */
4536 hfa_element_mode (const_tree type
, bool nested
)
4538 machine_mode element_mode
= VOIDmode
;
4540 enum tree_code code
= TREE_CODE (type
);
4541 int know_element_mode
= 0;
4544 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
4549 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
4550 case BOOLEAN_TYPE
: case POINTER_TYPE
:
4551 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
4552 case LANG_TYPE
: case FUNCTION_TYPE
:
4555 /* Fortran complex types are supposed to be HFAs, so we need to handle
4556 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4559 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
4560 && TYPE_MODE (type
) != TCmode
)
4561 return GET_MODE_INNER (TYPE_MODE (type
));
4566 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4567 mode if this is contained within an aggregate. */
4568 if (nested
&& TYPE_MODE (type
) != TFmode
)
4569 return TYPE_MODE (type
);
4574 return hfa_element_mode (TREE_TYPE (type
), 1);
4578 case QUAL_UNION_TYPE
:
4579 for (t
= TYPE_FIELDS (type
); t
; t
= DECL_CHAIN (t
))
4581 if (TREE_CODE (t
) != FIELD_DECL
)
4584 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
4585 if (know_element_mode
)
4587 if (mode
!= element_mode
)
4590 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
4594 know_element_mode
= 1;
4595 element_mode
= mode
;
4598 return element_mode
;
4601 /* If we reach here, we probably have some front-end specific type
4602 that the backend doesn't know about. This can happen via the
4603 aggregate_value_p call in init_function_start. All we can do is
4604 ignore unknown tree types. */
4611 /* Return the number of words required to hold a quantity of TYPE and MODE
4612 when passed as an argument. */
4614 ia64_function_arg_words (const_tree type
, machine_mode mode
)
4618 if (mode
== BLKmode
)
4619 words
= int_size_in_bytes (type
);
4621 words
= GET_MODE_SIZE (mode
);
4623 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
4626 /* Return the number of registers that should be skipped so the current
4627 argument (described by TYPE and WORDS) will be properly aligned.
4629 Integer and float arguments larger than 8 bytes start at the next
4630 even boundary. Aggregates larger than 8 bytes start at the next
4631 even boundary if the aggregate has 16 byte alignment. Note that
4632 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4633 but are still to be aligned in registers.
4635 ??? The ABI does not specify how to handle aggregates with
4636 alignment from 9 to 15 bytes, or greater than 16. We handle them
4637 all as if they had 16 byte alignment. Such aggregates can occur
4638 only if gcc extensions are used. */
4640 ia64_function_arg_offset (const CUMULATIVE_ARGS
*cum
,
4641 const_tree type
, int words
)
4643 /* No registers are skipped on VMS. */
4644 if (TARGET_ABI_OPEN_VMS
|| (cum
->words
& 1) == 0)
4648 && TREE_CODE (type
) != INTEGER_TYPE
4649 && TREE_CODE (type
) != REAL_TYPE
)
4650 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4655 /* Return rtx for register where argument is passed, or zero if it is passed
4657 /* ??? 128-bit quad-precision floats are always passed in general
4661 ia64_function_arg_1 (cumulative_args_t cum_v
, machine_mode mode
,
4662 const_tree type
, bool named
, bool incoming
)
4664 const CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4666 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4667 int words
= ia64_function_arg_words (type
, mode
);
4668 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4669 machine_mode hfa_mode
= VOIDmode
;
4671 /* For OPEN VMS, emit the instruction setting up the argument register here,
4672 when we know this will be together with the other arguments setup related
4673 insns. This is not the conceptually best place to do this, but this is
4674 the easiest as we have convenient access to cumulative args info. */
4676 if (TARGET_ABI_OPEN_VMS
&& mode
== VOIDmode
&& type
== void_type_node
4679 unsigned HOST_WIDE_INT regval
= cum
->words
;
4682 for (i
= 0; i
< 8; i
++)
4683 regval
|= ((int) cum
->atypes
[i
]) << (i
* 3 + 8);
4685 emit_move_insn (gen_rtx_REG (DImode
, GR_REG (25)),
4689 /* If all argument slots are used, then it must go on the stack. */
4690 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4693 /* On OpenVMS argument is either in Rn or Fn. */
4694 if (TARGET_ABI_OPEN_VMS
)
4696 if (FLOAT_MODE_P (mode
))
4697 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->words
);
4699 return gen_rtx_REG (mode
, basereg
+ cum
->words
);
4702 /* Check for and handle homogeneous FP aggregates. */
4704 hfa_mode
= hfa_element_mode (type
, 0);
4706 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4707 and unprototyped hfas are passed specially. */
4708 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4712 int fp_regs
= cum
->fp_regs
;
4713 int int_regs
= cum
->words
+ offset
;
4714 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4718 /* If prototyped, pass it in FR regs then GR regs.
4719 If not prototyped, pass it in both FR and GR regs.
4721 If this is an SFmode aggregate, then it is possible to run out of
4722 FR regs while GR regs are still left. In that case, we pass the
4723 remaining part in the GR regs. */
4725 /* Fill the FP regs. We do this always. We stop if we reach the end
4726 of the argument, the last FP register, or the last argument slot. */
4728 byte_size
= ((mode
== BLKmode
)
4729 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4730 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4732 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4733 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4735 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4736 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4740 args_byte_size
+= hfa_size
;
4744 /* If no prototype, then the whole thing must go in GR regs. */
4745 if (! cum
->prototype
)
4747 /* If this is an SFmode aggregate, then we might have some left over
4748 that needs to go in GR regs. */
4749 else if (byte_size
!= offset
)
4750 int_regs
+= offset
/ UNITS_PER_WORD
;
4752 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4754 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4756 machine_mode gr_mode
= DImode
;
4757 unsigned int gr_size
;
4759 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4760 then this goes in a GR reg left adjusted/little endian, right
4761 adjusted/big endian. */
4762 /* ??? Currently this is handled wrong, because 4-byte hunks are
4763 always right adjusted/little endian. */
4766 /* If we have an even 4 byte hunk because the aggregate is a
4767 multiple of 4 bytes in size, then this goes in a GR reg right
4768 adjusted/little endian. */
4769 else if (byte_size
- offset
== 4)
4772 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4773 gen_rtx_REG (gr_mode
, (basereg
4777 gr_size
= GET_MODE_SIZE (gr_mode
);
4779 if (gr_size
== UNITS_PER_WORD
4780 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4782 else if (gr_size
> UNITS_PER_WORD
)
4783 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4785 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4788 /* Integral and aggregates go in general registers. If we have run out of
4789 FR registers, then FP values must also go in general registers. This can
4790 happen when we have a SFmode HFA. */
4791 else if (mode
== TFmode
|| mode
== TCmode
4792 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4794 int byte_size
= ((mode
== BLKmode
)
4795 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4796 if (BYTES_BIG_ENDIAN
4797 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4798 && byte_size
< UNITS_PER_WORD
4801 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4802 gen_rtx_REG (DImode
,
4803 (basereg
+ cum
->words
4806 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4809 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4813 /* If there is a prototype, then FP values go in a FR register when
4814 named, and in a GR register when unnamed. */
4815 else if (cum
->prototype
)
4818 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4819 /* In big-endian mode, an anonymous SFmode value must be represented
4820 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4821 the value into the high half of the general register. */
4822 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
4823 return gen_rtx_PARALLEL (mode
,
4825 gen_rtx_EXPR_LIST (VOIDmode
,
4826 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4829 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4831 /* If there is no prototype, then FP values go in both FR and GR
4835 /* See comment above. */
4836 machine_mode inner_mode
=
4837 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
4839 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4840 gen_rtx_REG (mode
, (FR_ARG_FIRST
4843 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4844 gen_rtx_REG (inner_mode
,
4845 (basereg
+ cum
->words
4849 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4853 /* Implement TARGET_FUNCION_ARG target hook. */
4856 ia64_function_arg (cumulative_args_t cum
, machine_mode mode
,
4857 const_tree type
, bool named
)
4859 return ia64_function_arg_1 (cum
, mode
, type
, named
, false);
4862 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4865 ia64_function_incoming_arg (cumulative_args_t cum
,
4867 const_tree type
, bool named
)
4869 return ia64_function_arg_1 (cum
, mode
, type
, named
, true);
4872 /* Return number of bytes, at the beginning of the argument, that must be
4873 put in registers. 0 is the argument is entirely in registers or entirely
4877 ia64_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
4878 tree type
, bool named ATTRIBUTE_UNUSED
)
4880 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4882 int words
= ia64_function_arg_words (type
, mode
);
4883 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4885 /* If all argument slots are used, then it must go on the stack. */
4886 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4889 /* It doesn't matter whether the argument goes in FR or GR regs. If
4890 it fits within the 8 argument slots, then it goes entirely in
4891 registers. If it extends past the last argument slot, then the rest
4892 goes on the stack. */
4894 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4897 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4900 /* Return ivms_arg_type based on machine_mode. */
4902 static enum ivms_arg_type
4903 ia64_arg_type (machine_mode mode
)
4916 /* Update CUM to point after this argument. This is patterned after
4917 ia64_function_arg. */
4920 ia64_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
4921 const_tree type
, bool named
)
4923 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4924 int words
= ia64_function_arg_words (type
, mode
);
4925 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4926 machine_mode hfa_mode
= VOIDmode
;
4928 /* If all arg slots are already full, then there is nothing to do. */
4929 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
4931 cum
->words
+= words
+ offset
;
4935 cum
->atypes
[cum
->words
] = ia64_arg_type (mode
);
4936 cum
->words
+= words
+ offset
;
4938 /* On OpenVMS argument is either in Rn or Fn. */
4939 if (TARGET_ABI_OPEN_VMS
)
4941 cum
->int_regs
= cum
->words
;
4942 cum
->fp_regs
= cum
->words
;
4946 /* Check for and handle homogeneous FP aggregates. */
4948 hfa_mode
= hfa_element_mode (type
, 0);
4950 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4951 and unprototyped hfas are passed specially. */
4952 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4954 int fp_regs
= cum
->fp_regs
;
4955 /* This is the original value of cum->words + offset. */
4956 int int_regs
= cum
->words
- words
;
4957 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4961 /* If prototyped, pass it in FR regs then GR regs.
4962 If not prototyped, pass it in both FR and GR regs.
4964 If this is an SFmode aggregate, then it is possible to run out of
4965 FR regs while GR regs are still left. In that case, we pass the
4966 remaining part in the GR regs. */
4968 /* Fill the FP regs. We do this always. We stop if we reach the end
4969 of the argument, the last FP register, or the last argument slot. */
4971 byte_size
= ((mode
== BLKmode
)
4972 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4973 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4975 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4976 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
4979 args_byte_size
+= hfa_size
;
4983 cum
->fp_regs
= fp_regs
;
4986 /* Integral and aggregates go in general registers. So do TFmode FP values.
4987 If we have run out of FR registers, then other FP values must also go in
4988 general registers. This can happen when we have a SFmode HFA. */
4989 else if (mode
== TFmode
|| mode
== TCmode
4990 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4991 cum
->int_regs
= cum
->words
;
4993 /* If there is a prototype, then FP values go in a FR register when
4994 named, and in a GR register when unnamed. */
4995 else if (cum
->prototype
)
4998 cum
->int_regs
= cum
->words
;
5000 /* ??? Complex types should not reach here. */
5001 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5003 /* If there is no prototype, then FP values go in both FR and GR
5007 /* ??? Complex types should not reach here. */
5008 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5009 cum
->int_regs
= cum
->words
;
5013 /* Arguments with alignment larger than 8 bytes start at the next even
5014 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5015 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5018 ia64_function_arg_boundary (machine_mode mode
, const_tree type
)
5020 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
5021 return PARM_BOUNDARY
* 2;
5025 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
5026 return PARM_BOUNDARY
* 2;
5028 return PARM_BOUNDARY
;
5031 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
5032 return PARM_BOUNDARY
* 2;
5034 return PARM_BOUNDARY
;
5037 /* True if it is OK to do sibling call optimization for the specified
5038 call expression EXP. DECL will be the called function, or NULL if
5039 this is an indirect call. */
5041 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5043 /* We can't perform a sibcall if the current function has the syscall_linkage
5045 if (lookup_attribute ("syscall_linkage",
5046 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
5049 /* We must always return with our current GP. This means we can
5050 only sibcall to functions defined in the current module unless
5051 TARGET_CONST_GP is set to true. */
5052 return (decl
&& (*targetm
.binds_local_p
) (decl
)) || TARGET_CONST_GP
;
5056 /* Implement va_arg. */
5059 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
5062 /* Variable sized types are passed by reference. */
5063 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
5065 tree ptrtype
= build_pointer_type (type
);
5066 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
5067 return build_va_arg_indirect_ref (addr
);
5070 /* Aggregate arguments with alignment larger than 8 bytes start at
5071 the next even boundary. Integer and floating point arguments
5072 do so if they are larger than 8 bytes, whether or not they are
5073 also aligned larger than 8 bytes. */
5074 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
5075 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
5077 tree t
= fold_build_pointer_plus_hwi (valist
, 2 * UNITS_PER_WORD
- 1);
5078 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5079 build_int_cst (TREE_TYPE (t
), -2 * UNITS_PER_WORD
));
5080 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
5083 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5086 /* Return 1 if function return value returned in memory. Return 0 if it is
5090 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
5093 machine_mode hfa_mode
;
5094 HOST_WIDE_INT byte_size
;
5096 mode
= TYPE_MODE (valtype
);
5097 byte_size
= GET_MODE_SIZE (mode
);
5098 if (mode
== BLKmode
)
5100 byte_size
= int_size_in_bytes (valtype
);
5105 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5107 hfa_mode
= hfa_element_mode (valtype
, 0);
5108 if (hfa_mode
!= VOIDmode
)
5110 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5112 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
5117 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
5123 /* Return rtx for register that holds the function return value. */
5126 ia64_function_value (const_tree valtype
,
5127 const_tree fn_decl_or_type
,
5128 bool outgoing ATTRIBUTE_UNUSED
)
5131 machine_mode hfa_mode
;
5133 const_tree func
= fn_decl_or_type
;
5136 && !DECL_P (fn_decl_or_type
))
5139 mode
= TYPE_MODE (valtype
);
5140 hfa_mode
= hfa_element_mode (valtype
, 0);
5142 if (hfa_mode
!= VOIDmode
)
5150 hfa_size
= GET_MODE_SIZE (hfa_mode
);
5151 byte_size
= ((mode
== BLKmode
)
5152 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
5154 for (i
= 0; offset
< byte_size
; i
++)
5156 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5157 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
5161 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5163 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
5164 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
5167 bool need_parallel
= false;
5169 /* In big-endian mode, we need to manage the layout of aggregates
5170 in the registers so that we get the bits properly aligned in
5171 the highpart of the registers. */
5172 if (BYTES_BIG_ENDIAN
5173 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
5174 need_parallel
= true;
5176 /* Something like struct S { long double x; char a[0] } is not an
5177 HFA structure, and therefore doesn't go in fp registers. But
5178 the middle-end will give it XFmode anyway, and XFmode values
5179 don't normally fit in integer registers. So we need to smuggle
5180 the value inside a parallel. */
5181 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
5182 need_parallel
= true;
5192 bytesize
= int_size_in_bytes (valtype
);
5193 /* An empty PARALLEL is invalid here, but the return value
5194 doesn't matter for empty structs. */
5196 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5197 for (i
= 0; offset
< bytesize
; i
++)
5199 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5200 gen_rtx_REG (DImode
,
5203 offset
+= UNITS_PER_WORD
;
5205 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5208 mode
= promote_function_mode (valtype
, mode
, &unsignedp
,
5209 func
? TREE_TYPE (func
) : NULL_TREE
,
5212 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5216 /* Worker function for TARGET_LIBCALL_VALUE. */
5219 ia64_libcall_value (machine_mode mode
,
5220 const_rtx fun ATTRIBUTE_UNUSED
)
5222 return gen_rtx_REG (mode
,
5223 (((GET_MODE_CLASS (mode
) == MODE_FLOAT
5224 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5225 && (mode
) != TFmode
)
5226 ? FR_RET_FIRST
: GR_RET_FIRST
));
5229 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5232 ia64_function_value_regno_p (const unsigned int regno
)
5234 return ((regno
>= GR_RET_FIRST
&& regno
<= GR_RET_LAST
)
5235 || (regno
>= FR_RET_FIRST
&& regno
<= FR_RET_LAST
));
5238 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5239 We need to emit DTP-relative relocations. */
5242 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5244 gcc_assert (size
== 4 || size
== 8);
5246 fputs ("\tdata4.ua\t@dtprel(", file
);
5248 fputs ("\tdata8.ua\t@dtprel(", file
);
5249 output_addr_const (file
, x
);
5253 /* Print a memory address as an operand to reference that memory location. */
5255 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5256 also call this from ia64_print_operand for memory addresses. */
5259 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
5260 rtx address ATTRIBUTE_UNUSED
)
5264 /* Print an operand to an assembler instruction.
5265 C Swap and print a comparison operator.
5266 D Print an FP comparison operator.
5267 E Print 32 - constant, for SImode shifts as extract.
5268 e Print 64 - constant, for DImode rotates.
5269 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5270 a floating point register emitted normally.
5271 G A floating point constant.
5272 I Invert a predicate register by adding 1.
5273 J Select the proper predicate register for a condition.
5274 j Select the inverse predicate register for a condition.
5275 O Append .acq for volatile load.
5276 P Postincrement of a MEM.
5277 Q Append .rel for volatile store.
5278 R Print .s .d or nothing for a single, double or no truncation.
5279 S Shift amount for shladd instruction.
5280 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5281 for Intel assembler.
5282 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5283 for Intel assembler.
5284 X A pair of floating point registers.
5285 r Print register name, or constant 0 as r0. HP compatibility for
5287 v Print vector constant value as an 8-byte integer value. */
5290 ia64_print_operand (FILE * file
, rtx x
, int code
)
5297 /* Handled below. */
5302 enum rtx_code c
= swap_condition (GET_CODE (x
));
5303 fputs (GET_RTX_NAME (c
), file
);
5308 switch (GET_CODE (x
))
5335 str
= GET_RTX_NAME (GET_CODE (x
));
5342 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
5346 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
5350 if (x
== CONST0_RTX (GET_MODE (x
)))
5351 str
= reg_names
[FR_REG (0)];
5352 else if (x
== CONST1_RTX (GET_MODE (x
)))
5353 str
= reg_names
[FR_REG (1)];
5356 gcc_assert (GET_CODE (x
) == REG
);
5357 str
= reg_names
[REGNO (x
)];
5366 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
5367 real_to_target (val
, &rv
, GET_MODE (x
));
5368 if (GET_MODE (x
) == SFmode
)
5369 fprintf (file
, "0x%08lx", val
[0] & 0xffffffff);
5370 else if (GET_MODE (x
) == DFmode
)
5371 fprintf (file
, "0x%08lx%08lx", (WORDS_BIG_ENDIAN
? val
[0] : val
[1])
5373 (WORDS_BIG_ENDIAN
? val
[1] : val
[0])
5376 output_operand_lossage ("invalid %%G mode");
5381 fputs (reg_names
[REGNO (x
) + 1], file
);
5387 unsigned int regno
= REGNO (XEXP (x
, 0));
5388 if (GET_CODE (x
) == EQ
)
5392 fputs (reg_names
[regno
], file
);
5397 if (MEM_VOLATILE_P (x
))
5398 fputs(".acq", file
);
5403 HOST_WIDE_INT value
;
5405 switch (GET_CODE (XEXP (x
, 0)))
5411 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5412 if (GET_CODE (x
) == CONST_INT
)
5416 gcc_assert (GET_CODE (x
) == REG
);
5417 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
5423 value
= GET_MODE_SIZE (GET_MODE (x
));
5427 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
5431 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
5436 if (MEM_VOLATILE_P (x
))
5437 fputs(".rel", file
);
5441 if (x
== CONST0_RTX (GET_MODE (x
)))
5443 else if (x
== CONST1_RTX (GET_MODE (x
)))
5445 else if (x
== CONST2_RTX (GET_MODE (x
)))
5448 output_operand_lossage ("invalid %%R value");
5452 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5456 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5458 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
5464 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5466 const char *prefix
= "0x";
5467 if (INTVAL (x
) & 0x80000000)
5469 fprintf (file
, "0xffffffff");
5472 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
5479 unsigned int regno
= REGNO (x
);
5480 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
5485 /* If this operand is the constant zero, write it as register zero.
5486 Any register, zero, or CONST_INT value is OK here. */
5487 if (GET_CODE (x
) == REG
)
5488 fputs (reg_names
[REGNO (x
)], file
);
5489 else if (x
== CONST0_RTX (GET_MODE (x
)))
5491 else if (GET_CODE (x
) == CONST_INT
)
5492 output_addr_const (file
, x
);
5494 output_operand_lossage ("invalid %%r value");
5498 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
5499 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
5506 /* For conditional branches, returns or calls, substitute
5507 sptk, dptk, dpnt, or spnt for %s. */
5508 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5511 int pred_val
= XINT (x
, 0);
5513 /* Guess top and bottom 10% statically predicted. */
5514 if (pred_val
< REG_BR_PROB_BASE
/ 50
5515 && br_prob_note_reliable_p (x
))
5517 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
5519 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
5520 || !br_prob_note_reliable_p (x
))
5525 else if (CALL_P (current_output_insn
))
5530 fputs (which
, file
);
5535 x
= current_insn_predicate
;
5538 unsigned int regno
= REGNO (XEXP (x
, 0));
5539 if (GET_CODE (x
) == EQ
)
5541 fprintf (file
, "(%s) ", reg_names
[regno
]);
5546 output_operand_lossage ("ia64_print_operand: unknown code");
5550 switch (GET_CODE (x
))
5552 /* This happens for the spill/restore instructions. */
5557 /* ... fall through ... */
5560 fputs (reg_names
[REGNO (x
)], file
);
5565 rtx addr
= XEXP (x
, 0);
5566 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
5567 addr
= XEXP (addr
, 0);
5568 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
5573 output_addr_const (file
, x
);
5580 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5583 ia64_print_operand_punct_valid_p (unsigned char code
)
5585 return (code
== '+' || code
== ',');
5588 /* Compute a (partial) cost for rtx X. Return true if the complete
5589 cost has been computed, and false if subexpressions should be
5590 scanned. In either case, *TOTAL contains the cost result. */
5591 /* ??? This is incomplete. */
5594 ia64_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
5595 int opno ATTRIBUTE_UNUSED
,
5596 int *total
, bool speed ATTRIBUTE_UNUSED
)
5598 int code
= GET_CODE (x
);
5606 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
5609 if (satisfies_constraint_I (x
))
5611 else if (satisfies_constraint_J (x
))
5614 *total
= COSTS_N_INSNS (1);
5617 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
5620 *total
= COSTS_N_INSNS (1);
5625 *total
= COSTS_N_INSNS (1);
5631 *total
= COSTS_N_INSNS (3);
5635 *total
= COSTS_N_INSNS (4);
5639 /* For multiplies wider than HImode, we have to go to the FPU,
5640 which normally involves copies. Plus there's the latency
5641 of the multiply itself, and the latency of the instructions to
5642 transfer integer regs to FP regs. */
5643 if (FLOAT_MODE_P (mode
))
5644 *total
= COSTS_N_INSNS (4);
5645 else if (GET_MODE_SIZE (mode
) > 2)
5646 *total
= COSTS_N_INSNS (10);
5648 *total
= COSTS_N_INSNS (2);
5653 if (FLOAT_MODE_P (mode
))
5655 *total
= COSTS_N_INSNS (4);
5663 *total
= COSTS_N_INSNS (1);
5670 /* We make divide expensive, so that divide-by-constant will be
5671 optimized to a multiply. */
5672 *total
= COSTS_N_INSNS (60);
5680 /* Calculate the cost of moving data from a register in class FROM to
5681 one in class TO, using MODE. */
5684 ia64_register_move_cost (machine_mode mode
, reg_class_t from
,
5687 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5688 if (to
== ADDL_REGS
)
5690 if (from
== ADDL_REGS
)
5693 /* All costs are symmetric, so reduce cases by putting the
5694 lower number class as the destination. */
5697 reg_class_t tmp
= to
;
5698 to
= from
, from
= tmp
;
5701 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5702 so that we get secondary memory reloads. Between FR_REGS,
5703 we have to make this at least as expensive as memory_move_cost
5704 to avoid spectacularly poor register class preferencing. */
5705 if (mode
== XFmode
|| mode
== RFmode
)
5707 if (to
!= GR_REGS
|| from
!= GR_REGS
)
5708 return memory_move_cost (mode
, to
, false);
5716 /* Moving between PR registers takes two insns. */
5717 if (from
== PR_REGS
)
5719 /* Moving between PR and anything but GR is impossible. */
5720 if (from
!= GR_REGS
)
5721 return memory_move_cost (mode
, to
, false);
5725 /* Moving between BR and anything but GR is impossible. */
5726 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
5727 return memory_move_cost (mode
, to
, false);
5732 /* Moving between AR and anything but GR is impossible. */
5733 if (from
!= GR_REGS
)
5734 return memory_move_cost (mode
, to
, false);
5740 case GR_AND_FR_REGS
:
5741 case GR_AND_BR_REGS
:
5752 /* Calculate the cost of moving data of MODE from a register to or from
5756 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
5758 bool in ATTRIBUTE_UNUSED
)
5760 if (rclass
== GENERAL_REGS
5761 || rclass
== FR_REGS
5762 || rclass
== FP_REGS
5763 || rclass
== GR_AND_FR_REGS
)
5769 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5770 on RCLASS to use when copying X into that class. */
5773 ia64_preferred_reload_class (rtx x
, reg_class_t rclass
)
5779 /* Don't allow volatile mem reloads into floating point registers.
5780 This is defined to force reload to choose the r/m case instead
5781 of the f/f case when reloading (set (reg fX) (mem/v)). */
5782 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
5785 /* Force all unrecognized constants into the constant pool. */
5803 /* This function returns the register class required for a secondary
5804 register when copying between one of the registers in RCLASS, and X,
5805 using MODE. A return value of NO_REGS means that no secondary register
5809 ia64_secondary_reload_class (enum reg_class rclass
,
5810 machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5814 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5815 regno
= true_regnum (x
);
5822 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5823 interaction. We end up with two pseudos with overlapping lifetimes
5824 both of which are equiv to the same constant, and both which need
5825 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5826 changes depending on the path length, which means the qty_first_reg
5827 check in make_regs_eqv can give different answers at different times.
5828 At some point I'll probably need a reload_indi pattern to handle
5831 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5832 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5833 non-general registers for good measure. */
5834 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5837 /* This is needed if a pseudo used as a call_operand gets spilled to a
5839 if (GET_CODE (x
) == MEM
)
5845 /* Need to go through general registers to get to other class regs. */
5846 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5849 /* This can happen when a paradoxical subreg is an operand to the
5851 /* ??? This shouldn't be necessary after instruction scheduling is
5852 enabled, because paradoxical subregs are not accepted by
5853 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5854 stop the paradoxical subreg stupidity in the *_operand functions
5856 if (GET_CODE (x
) == MEM
5857 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5858 || GET_MODE (x
) == QImode
))
5861 /* This can happen because of the ior/and/etc patterns that accept FP
5862 registers as operands. If the third operand is a constant, then it
5863 needs to be reloaded into a FP register. */
5864 if (GET_CODE (x
) == CONST_INT
)
5867 /* This can happen because of register elimination in a muldi3 insn.
5868 E.g. `26107 * (unsigned long)&u'. */
5869 if (GET_CODE (x
) == PLUS
)
5874 /* ??? This happens if we cse/gcse a BImode value across a call,
5875 and the function has a nonlocal goto. This is because global
5876 does not allocate call crossing pseudos to hard registers when
5877 crtl->has_nonlocal_goto is true. This is relatively
5878 common for C++ programs that use exceptions. To reproduce,
5879 return NO_REGS and compile libstdc++. */
5880 if (GET_CODE (x
) == MEM
)
5883 /* This can happen when we take a BImode subreg of a DImode value,
5884 and that DImode value winds up in some non-GR register. */
5885 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5897 /* Implement targetm.unspec_may_trap_p hook. */
5899 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
5901 switch (XINT (x
, 1))
5907 case UNSPEC_CHKACLR
:
5909 /* These unspecs are just wrappers. */
5910 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
5913 return default_unspec_may_trap_p (x
, flags
);
5917 /* Parse the -mfixed-range= option string. */
5920 fix_range (const char *const_str
)
5923 char *str
, *dash
, *comma
;
5925 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5926 REG2 are either register names or register numbers. The effect
5927 of this option is to mark the registers in the range from REG1 to
5928 REG2 as ``fixed'' so they won't be used by the compiler. This is
5929 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5931 i
= strlen (const_str
);
5932 str
= (char *) alloca (i
+ 1);
5933 memcpy (str
, const_str
, i
+ 1);
5937 dash
= strchr (str
, '-');
5940 warning (0, "value of -mfixed-range must have form REG1-REG2");
5945 comma
= strchr (dash
+ 1, ',');
5949 first
= decode_reg_name (str
);
5952 warning (0, "unknown register name: %s", str
);
5956 last
= decode_reg_name (dash
+ 1);
5959 warning (0, "unknown register name: %s", dash
+ 1);
5967 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5971 for (i
= first
; i
<= last
; ++i
)
5972 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5982 /* Implement TARGET_OPTION_OVERRIDE. */
5985 ia64_option_override (void)
5988 cl_deferred_option
*opt
;
5989 vec
<cl_deferred_option
> *v
5990 = (vec
<cl_deferred_option
> *) ia64_deferred_options
;
5993 FOR_EACH_VEC_ELT (*v
, i
, opt
)
5995 switch (opt
->opt_index
)
5997 case OPT_mfixed_range_
:
5998 fix_range (opt
->arg
);
6006 if (TARGET_AUTO_PIC
)
6007 target_flags
|= MASK_CONST_GP
;
6009 /* Numerous experiment shows that IRA based loop pressure
6010 calculation works better for RTL loop invariant motion on targets
6011 with enough (>= 32) registers. It is an expensive optimization.
6012 So it is on only for peak performance. */
6014 flag_ira_loop_pressure
= 1;
6017 ia64_section_threshold
= (global_options_set
.x_g_switch_value
6019 : IA64_DEFAULT_GVALUE
);
6021 init_machine_status
= ia64_init_machine_status
;
6023 if (align_functions
<= 0)
6024 align_functions
= 64;
6025 if (align_loops
<= 0)
6027 if (TARGET_ABI_OPEN_VMS
)
6030 ia64_override_options_after_change();
6033 /* Implement targetm.override_options_after_change. */
6036 ia64_override_options_after_change (void)
6039 && !global_options_set
.x_flag_selective_scheduling
6040 && !global_options_set
.x_flag_selective_scheduling2
)
6042 flag_selective_scheduling2
= 1;
6043 flag_sel_sched_pipelining
= 1;
6045 if (mflag_sched_control_spec
== 2)
6047 /* Control speculation is on by default for the selective scheduler,
6048 but not for the Haifa scheduler. */
6049 mflag_sched_control_spec
= flag_selective_scheduling2
? 1 : 0;
6051 if (flag_sel_sched_pipelining
&& flag_auto_inc_dec
)
6053 /* FIXME: remove this when we'd implement breaking autoinsns as
6054 a transformation. */
6055 flag_auto_inc_dec
= 0;
6059 /* Initialize the record of emitted frame related registers. */
6061 void ia64_init_expanders (void)
6063 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
6066 static struct machine_function
*
6067 ia64_init_machine_status (void)
6069 return ggc_cleared_alloc
<machine_function
> ();
6072 static enum attr_itanium_class
ia64_safe_itanium_class (rtx_insn
*);
6073 static enum attr_type
ia64_safe_type (rtx_insn
*);
6075 static enum attr_itanium_class
6076 ia64_safe_itanium_class (rtx_insn
*insn
)
6078 if (recog_memoized (insn
) >= 0)
6079 return get_attr_itanium_class (insn
);
6080 else if (DEBUG_INSN_P (insn
))
6081 return ITANIUM_CLASS_IGNORE
;
6083 return ITANIUM_CLASS_UNKNOWN
;
6086 static enum attr_type
6087 ia64_safe_type (rtx_insn
*insn
)
6089 if (recog_memoized (insn
) >= 0)
6090 return get_attr_type (insn
);
6092 return TYPE_UNKNOWN
;
6095 /* The following collection of routines emit instruction group stop bits as
6096 necessary to avoid dependencies. */
6098 /* Need to track some additional registers as far as serialization is
6099 concerned so we can properly handle br.call and br.ret. We could
6100 make these registers visible to gcc, but since these registers are
6101 never explicitly used in gcc generated code, it seems wasteful to
6102 do so (plus it would make the call and return patterns needlessly
6104 #define REG_RP (BR_REG (0))
6105 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6106 /* This is used for volatile asms which may require a stop bit immediately
6107 before and after them. */
6108 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6109 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6110 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6112 /* For each register, we keep track of how it has been written in the
6113 current instruction group.
6115 If a register is written unconditionally (no qualifying predicate),
6116 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6118 If a register is written if its qualifying predicate P is true, we
6119 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6120 may be written again by the complement of P (P^1) and when this happens,
6121 WRITE_COUNT gets set to 2.
6123 The result of this is that whenever an insn attempts to write a register
6124 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6126 If a predicate register is written by a floating-point insn, we set
6127 WRITTEN_BY_FP to true.
6129 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6130 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6132 #if GCC_VERSION >= 4000
6133 #define RWS_FIELD_TYPE __extension__ unsigned short
6135 #define RWS_FIELD_TYPE unsigned int
6137 struct reg_write_state
6139 RWS_FIELD_TYPE write_count
: 2;
6140 RWS_FIELD_TYPE first_pred
: 10;
6141 RWS_FIELD_TYPE written_by_fp
: 1;
6142 RWS_FIELD_TYPE written_by_and
: 1;
6143 RWS_FIELD_TYPE written_by_or
: 1;
6146 /* Cumulative info for the current instruction group. */
6147 struct reg_write_state rws_sum
[NUM_REGS
];
6148 #ifdef ENABLE_CHECKING
6149 /* Bitmap whether a register has been written in the current insn. */
6150 HARD_REG_ELT_TYPE rws_insn
[(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
6151 / HOST_BITS_PER_WIDEST_FAST_INT
];
6154 rws_insn_set (int regno
)
6156 gcc_assert (!TEST_HARD_REG_BIT (rws_insn
, regno
));
6157 SET_HARD_REG_BIT (rws_insn
, regno
);
6161 rws_insn_test (int regno
)
6163 return TEST_HARD_REG_BIT (rws_insn
, regno
);
6166 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6167 unsigned char rws_insn
[2];
6170 rws_insn_set (int regno
)
6172 if (regno
== REG_AR_CFM
)
6174 else if (regno
== REG_VOLATILE
)
6179 rws_insn_test (int regno
)
6181 if (regno
== REG_AR_CFM
)
6183 if (regno
== REG_VOLATILE
)
6189 /* Indicates whether this is the first instruction after a stop bit,
6190 in which case we don't need another stop bit. Without this,
6191 ia64_variable_issue will die when scheduling an alloc. */
6192 static int first_instruction
;
6194 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6195 RTL for one instruction. */
6198 unsigned int is_write
: 1; /* Is register being written? */
6199 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
6200 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
6201 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
6202 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
6203 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
6206 static void rws_update (int, struct reg_flags
, int);
6207 static int rws_access_regno (int, struct reg_flags
, int);
6208 static int rws_access_reg (rtx
, struct reg_flags
, int);
6209 static void update_set_flags (rtx
, struct reg_flags
*);
6210 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
6211 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
6212 static void init_insn_group_barriers (void);
6213 static int group_barrier_needed (rtx_insn
*);
6214 static int safe_group_barrier_needed (rtx_insn
*);
6215 static int in_safe_group_barrier
;
6217 /* Update *RWS for REGNO, which is being written by the current instruction,
6218 with predicate PRED, and associated register flags in FLAGS. */
6221 rws_update (int regno
, struct reg_flags flags
, int pred
)
6224 rws_sum
[regno
].write_count
++;
6226 rws_sum
[regno
].write_count
= 2;
6227 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
6228 /* ??? Not tracking and/or across differing predicates. */
6229 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6230 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6231 rws_sum
[regno
].first_pred
= pred
;
6234 /* Handle an access to register REGNO of type FLAGS using predicate register
6235 PRED. Update rws_sum array. Return 1 if this access creates
6236 a dependency with an earlier instruction in the same group. */
6239 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
6241 int need_barrier
= 0;
6243 gcc_assert (regno
< NUM_REGS
);
6245 if (! PR_REGNO_P (regno
))
6246 flags
.is_and
= flags
.is_or
= 0;
6252 rws_insn_set (regno
);
6253 write_count
= rws_sum
[regno
].write_count
;
6255 switch (write_count
)
6258 /* The register has not been written yet. */
6259 if (!in_safe_group_barrier
)
6260 rws_update (regno
, flags
, pred
);
6264 /* The register has been written via a predicate. Treat
6265 it like a unconditional write and do not try to check
6266 for complementary pred reg in earlier write. */
6267 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6269 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6273 if (!in_safe_group_barrier
)
6274 rws_update (regno
, flags
, pred
);
6278 /* The register has been unconditionally written already. We
6280 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6282 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6286 if (!in_safe_group_barrier
)
6288 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6289 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6299 if (flags
.is_branch
)
6301 /* Branches have several RAW exceptions that allow to avoid
6304 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
6305 /* RAW dependencies on branch regs are permissible as long
6306 as the writer is a non-branch instruction. Since we
6307 never generate code that uses a branch register written
6308 by a branch instruction, handling this case is
6312 if (REGNO_REG_CLASS (regno
) == PR_REGS
6313 && ! rws_sum
[regno
].written_by_fp
)
6314 /* The predicates of a branch are available within the
6315 same insn group as long as the predicate was written by
6316 something other than a floating-point instruction. */
6320 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6322 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6325 switch (rws_sum
[regno
].write_count
)
6328 /* The register has not been written yet. */
6332 /* The register has been written via a predicate, assume we
6333 need a barrier (don't check for complementary regs). */
6338 /* The register has been unconditionally written already. We
6348 return need_barrier
;
6352 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
6354 int regno
= REGNO (reg
);
6355 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
6358 return rws_access_regno (regno
, flags
, pred
);
6361 int need_barrier
= 0;
6363 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
6364 return need_barrier
;
6368 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6369 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6372 update_set_flags (rtx x
, struct reg_flags
*pflags
)
6374 rtx src
= SET_SRC (x
);
6376 switch (GET_CODE (src
))
6382 /* There are four cases here:
6383 (1) The destination is (pc), in which case this is a branch,
6384 nothing here applies.
6385 (2) The destination is ar.lc, in which case this is a
6386 doloop_end_internal,
6387 (3) The destination is an fp register, in which case this is
6388 an fselect instruction.
6389 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6390 this is a check load.
6391 In all cases, nothing we do in this function applies. */
6395 if (COMPARISON_P (src
)
6396 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
6397 /* Set pflags->is_fp to 1 so that we know we're dealing
6398 with a floating point comparison when processing the
6399 destination of the SET. */
6402 /* Discover if this is a parallel comparison. We only handle
6403 and.orcm and or.andcm at present, since we must retain a
6404 strict inverse on the predicate pair. */
6405 else if (GET_CODE (src
) == AND
)
6407 else if (GET_CODE (src
) == IOR
)
6414 /* Subroutine of rtx_needs_barrier; this function determines whether the
6415 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6416 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6420 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6422 int need_barrier
= 0;
6424 rtx src
= SET_SRC (x
);
6426 if (GET_CODE (src
) == CALL
)
6427 /* We don't need to worry about the result registers that
6428 get written by subroutine call. */
6429 return rtx_needs_barrier (src
, flags
, pred
);
6430 else if (SET_DEST (x
) == pc_rtx
)
6432 /* X is a conditional branch. */
6433 /* ??? This seems redundant, as the caller sets this bit for
6435 if (!ia64_spec_check_src_p (src
))
6436 flags
.is_branch
= 1;
6437 return rtx_needs_barrier (src
, flags
, pred
);
6440 if (ia64_spec_check_src_p (src
))
6441 /* Avoid checking one register twice (in condition
6442 and in 'then' section) for ldc pattern. */
6444 gcc_assert (REG_P (XEXP (src
, 2)));
6445 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
6447 /* We process MEM below. */
6448 src
= XEXP (src
, 1);
6451 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
6454 if (GET_CODE (dst
) == ZERO_EXTRACT
)
6456 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
6457 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
6459 return need_barrier
;
6462 /* Handle an access to rtx X of type FLAGS using predicate register
6463 PRED. Return 1 if this access creates a dependency with an earlier
6464 instruction in the same group. */
6467 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6470 int is_complemented
= 0;
6471 int need_barrier
= 0;
6472 const char *format_ptr
;
6473 struct reg_flags new_flags
;
6481 switch (GET_CODE (x
))
6484 update_set_flags (x
, &new_flags
);
6485 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
6486 if (GET_CODE (SET_SRC (x
)) != CALL
)
6488 new_flags
.is_write
= 1;
6489 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
6494 new_flags
.is_write
= 0;
6495 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6497 /* Avoid multiple register writes, in case this is a pattern with
6498 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6499 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
6501 new_flags
.is_write
= 1;
6502 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
6503 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
6504 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6509 /* X is a predicated instruction. */
6511 cond
= COND_EXEC_TEST (x
);
6513 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
6515 if (GET_CODE (cond
) == EQ
)
6516 is_complemented
= 1;
6517 cond
= XEXP (cond
, 0);
6518 gcc_assert (GET_CODE (cond
) == REG
6519 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
6520 pred
= REGNO (cond
);
6521 if (is_complemented
)
6524 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
6525 return need_barrier
;
6529 /* Clobber & use are for earlier compiler-phases only. */
6534 /* We always emit stop bits for traditional asms. We emit stop bits
6535 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6536 if (GET_CODE (x
) != ASM_OPERANDS
6537 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
6539 /* Avoid writing the register multiple times if we have multiple
6540 asm outputs. This avoids a failure in rws_access_reg. */
6541 if (! rws_insn_test (REG_VOLATILE
))
6543 new_flags
.is_write
= 1;
6544 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
6549 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6550 We cannot just fall through here since then we would be confused
6551 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6552 traditional asms unlike their normal usage. */
6554 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
6555 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
6560 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6562 rtx pat
= XVECEXP (x
, 0, i
);
6563 switch (GET_CODE (pat
))
6566 update_set_flags (pat
, &new_flags
);
6567 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
6573 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6577 if (REG_P (XEXP (pat
, 0))
6578 && extract_asm_operands (x
) != NULL_RTX
6579 && REGNO (XEXP (pat
, 0)) != AR_UNAT_REGNUM
)
6581 new_flags
.is_write
= 1;
6582 need_barrier
|= rtx_needs_barrier (XEXP (pat
, 0),
6595 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6597 rtx pat
= XVECEXP (x
, 0, i
);
6598 if (GET_CODE (pat
) == SET
)
6600 if (GET_CODE (SET_SRC (pat
)) != CALL
)
6602 new_flags
.is_write
= 1;
6603 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
6607 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
6608 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6613 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
6616 if (REGNO (x
) == AR_UNAT_REGNUM
)
6618 for (i
= 0; i
< 64; ++i
)
6619 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
6622 need_barrier
= rws_access_reg (x
, flags
, pred
);
6626 /* Find the regs used in memory address computation. */
6627 new_flags
.is_write
= 0;
6628 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6631 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
6632 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
6635 /* Operators with side-effects. */
6636 case POST_INC
: case POST_DEC
:
6637 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6639 new_flags
.is_write
= 0;
6640 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6641 new_flags
.is_write
= 1;
6642 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6646 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6648 new_flags
.is_write
= 0;
6649 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6650 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6651 new_flags
.is_write
= 1;
6652 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6655 /* Handle common unary and binary ops for efficiency. */
6656 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
6657 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
6658 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
6659 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
6660 case NE
: case EQ
: case GE
: case GT
: case LE
:
6661 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
6662 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6663 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6666 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
6667 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
6668 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
6669 case SQRT
: case FFS
: case POPCOUNT
:
6670 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6674 /* VEC_SELECT's second argument is a PARALLEL with integers that
6675 describe the elements selected. On ia64, those integers are
6676 always constants. Avoid walking the PARALLEL so that we don't
6677 get confused with "normal" parallels and then die. */
6678 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6682 switch (XINT (x
, 1))
6684 case UNSPEC_LTOFF_DTPMOD
:
6685 case UNSPEC_LTOFF_DTPREL
:
6687 case UNSPEC_LTOFF_TPREL
:
6689 case UNSPEC_PRED_REL_MUTEX
:
6690 case UNSPEC_PIC_CALL
:
6692 case UNSPEC_FETCHADD_ACQ
:
6693 case UNSPEC_FETCHADD_REL
:
6694 case UNSPEC_BSP_VALUE
:
6695 case UNSPEC_FLUSHRS
:
6696 case UNSPEC_BUNDLE_SELECTOR
:
6699 case UNSPEC_GR_SPILL
:
6700 case UNSPEC_GR_RESTORE
:
6702 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
6703 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
6705 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6706 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
6707 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
6712 case UNSPEC_FR_SPILL
:
6713 case UNSPEC_FR_RESTORE
:
6714 case UNSPEC_GETF_EXP
:
6715 case UNSPEC_SETF_EXP
:
6717 case UNSPEC_FR_SQRT_RECIP_APPROX
:
6718 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
6723 case UNSPEC_CHKACLR
:
6725 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6728 case UNSPEC_FR_RECIP_APPROX
:
6730 case UNSPEC_COPYSIGN
:
6731 case UNSPEC_FR_RECIP_APPROX_RES
:
6732 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6733 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6736 case UNSPEC_CMPXCHG_ACQ
:
6737 case UNSPEC_CMPXCHG_REL
:
6738 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6739 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
6747 case UNSPEC_VOLATILE
:
6748 switch (XINT (x
, 1))
6751 /* Alloc must always be the first instruction of a group.
6752 We force this by always returning true. */
6753 /* ??? We might get better scheduling if we explicitly check for
6754 input/local/output register dependencies, and modify the
6755 scheduler so that alloc is always reordered to the start of
6756 the current group. We could then eliminate all of the
6757 first_instruction code. */
6758 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6760 new_flags
.is_write
= 1;
6761 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6764 case UNSPECV_SET_BSP
:
6765 case UNSPECV_PROBE_STACK_RANGE
:
6769 case UNSPECV_BLOCKAGE
:
6770 case UNSPECV_INSN_GROUP_BARRIER
:
6772 case UNSPECV_PSAC_ALL
:
6773 case UNSPECV_PSAC_NORMAL
:
6776 case UNSPECV_PROBE_STACK_ADDRESS
:
6777 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6786 new_flags
.is_write
= 0;
6787 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
6788 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6790 new_flags
.is_write
= 1;
6791 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6792 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6796 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
6797 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6798 switch (format_ptr
[i
])
6800 case '0': /* unused field */
6801 case 'i': /* integer */
6802 case 'n': /* note */
6803 case 'w': /* wide integer */
6804 case 's': /* pointer to string */
6805 case 'S': /* optional pointer to string */
6809 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
6814 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
6815 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
6824 return need_barrier
;
6827 /* Clear out the state for group_barrier_needed at the start of a
6828 sequence of insns. */
6831 init_insn_group_barriers (void)
6833 memset (rws_sum
, 0, sizeof (rws_sum
));
6834 first_instruction
= 1;
6837 /* Given the current state, determine whether a group barrier (a stop bit) is
6838 necessary before INSN. Return nonzero if so. This modifies the state to
6839 include the effects of INSN as a side-effect. */
6842 group_barrier_needed (rtx_insn
*insn
)
6845 int need_barrier
= 0;
6846 struct reg_flags flags
;
6848 memset (&flags
, 0, sizeof (flags
));
6849 switch (GET_CODE (insn
))
6856 /* A barrier doesn't imply an instruction group boundary. */
6860 memset (rws_insn
, 0, sizeof (rws_insn
));
6864 flags
.is_branch
= 1;
6865 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6866 memset (rws_insn
, 0, sizeof (rws_insn
));
6868 /* Don't bundle a call following another call. */
6869 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6875 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6879 if (!ia64_spec_check_p (insn
))
6880 flags
.is_branch
= 1;
6882 /* Don't bundle a jump following a call. */
6883 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6891 if (GET_CODE (PATTERN (insn
)) == USE
6892 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6893 /* Don't care about USE and CLOBBER "insns"---those are used to
6894 indicate to the optimizer that it shouldn't get rid of
6895 certain operations. */
6898 pat
= PATTERN (insn
);
6900 /* Ug. Hack hacks hacked elsewhere. */
6901 switch (recog_memoized (insn
))
6903 /* We play dependency tricks with the epilogue in order
6904 to get proper schedules. Undo this for dv analysis. */
6905 case CODE_FOR_epilogue_deallocate_stack
:
6906 case CODE_FOR_prologue_allocate_stack
:
6907 pat
= XVECEXP (pat
, 0, 0);
6910 /* The pattern we use for br.cloop confuses the code above.
6911 The second element of the vector is representative. */
6912 case CODE_FOR_doloop_end_internal
:
6913 pat
= XVECEXP (pat
, 0, 1);
6916 /* Doesn't generate code. */
6917 case CODE_FOR_pred_rel_mutex
:
6918 case CODE_FOR_prologue_use
:
6925 memset (rws_insn
, 0, sizeof (rws_insn
));
6926 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
6928 /* Check to see if the previous instruction was a volatile
6931 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
6939 if (first_instruction
&& important_for_bundling_p (insn
))
6942 first_instruction
= 0;
6945 return need_barrier
;
6948 /* Like group_barrier_needed, but do not clobber the current state. */
6951 safe_group_barrier_needed (rtx_insn
*insn
)
6953 int saved_first_instruction
;
6956 saved_first_instruction
= first_instruction
;
6957 in_safe_group_barrier
= 1;
6959 t
= group_barrier_needed (insn
);
6961 first_instruction
= saved_first_instruction
;
6962 in_safe_group_barrier
= 0;
6967 /* Scan the current function and insert stop bits as necessary to
6968 eliminate dependencies. This function assumes that a final
6969 instruction scheduling pass has been run which has already
6970 inserted most of the necessary stop bits. This function only
6971 inserts new ones at basic block boundaries, since these are
6972 invisible to the scheduler. */
6975 emit_insn_group_barriers (FILE *dump
)
6978 rtx_insn
*last_label
= 0;
6979 int insns_since_last_label
= 0;
6981 init_insn_group_barriers ();
6983 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6987 if (insns_since_last_label
)
6989 insns_since_last_label
= 0;
6991 else if (NOTE_P (insn
)
6992 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
6994 if (insns_since_last_label
)
6996 insns_since_last_label
= 0;
6998 else if (NONJUMP_INSN_P (insn
)
6999 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7000 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7002 init_insn_group_barriers ();
7005 else if (NONDEBUG_INSN_P (insn
))
7007 insns_since_last_label
= 1;
7009 if (group_barrier_needed (insn
))
7014 fprintf (dump
, "Emitting stop before label %d\n",
7015 INSN_UID (last_label
));
7016 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
7019 init_insn_group_barriers ();
7027 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7028 This function has to emit all necessary group barriers. */
7031 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
7035 init_insn_group_barriers ();
7037 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7039 if (BARRIER_P (insn
))
7041 rtx_insn
*last
= prev_active_insn (insn
);
7045 if (JUMP_TABLE_DATA_P (last
))
7046 last
= prev_active_insn (last
);
7047 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7048 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
7050 init_insn_group_barriers ();
7052 else if (NONDEBUG_INSN_P (insn
))
7054 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
7055 init_insn_group_barriers ();
7056 else if (group_barrier_needed (insn
))
7058 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
7059 init_insn_group_barriers ();
7060 group_barrier_needed (insn
);
7068 /* Instruction scheduling support. */
7070 #define NR_BUNDLES 10
7072 /* A list of names of all available bundles. */
7074 static const char *bundle_name
[NR_BUNDLES
] =
7080 #if NR_BUNDLES == 10
7090 /* Nonzero if we should insert stop bits into the schedule. */
7092 int ia64_final_schedule
= 0;
7094 /* Codes of the corresponding queried units: */
7096 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
7097 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
7099 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
7100 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
7102 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
7104 /* The following variable value is an insn group barrier. */
7106 static rtx_insn
*dfa_stop_insn
;
7108 /* The following variable value is the last issued insn. */
7110 static rtx_insn
*last_scheduled_insn
;
7112 /* The following variable value is pointer to a DFA state used as
7113 temporary variable. */
7115 static state_t temp_dfa_state
= NULL
;
7117 /* The following variable value is DFA state after issuing the last
7120 static state_t prev_cycle_state
= NULL
;
7122 /* The following array element values are TRUE if the corresponding
7123 insn requires to add stop bits before it. */
7125 static char *stops_p
= NULL
;
7127 /* The following variable is used to set up the mentioned above array. */
7129 static int stop_before_p
= 0;
7131 /* The following variable value is length of the arrays `clocks' and
7134 static int clocks_length
;
7136 /* The following variable value is number of data speculations in progress. */
7137 static int pending_data_specs
= 0;
7139 /* Number of memory references on current and three future processor cycles. */
7140 static char mem_ops_in_group
[4];
7142 /* Number of current processor cycle (from scheduler's point of view). */
7143 static int current_cycle
;
7145 static rtx
ia64_single_set (rtx_insn
*);
7146 static void ia64_emit_insn_before (rtx
, rtx
);
7148 /* Map a bundle number to its pseudo-op. */
7151 get_bundle_name (int b
)
7153 return bundle_name
[b
];
7157 /* Return the maximum number of instructions a cpu can issue. */
7160 ia64_issue_rate (void)
7165 /* Helper function - like single_set, but look inside COND_EXEC. */
7168 ia64_single_set (rtx_insn
*insn
)
7170 rtx x
= PATTERN (insn
), ret
;
7171 if (GET_CODE (x
) == COND_EXEC
)
7172 x
= COND_EXEC_CODE (x
);
7173 if (GET_CODE (x
) == SET
)
7176 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7177 Although they are not classical single set, the second set is there just
7178 to protect it from moving past FP-relative stack accesses. */
7179 switch (recog_memoized (insn
))
7181 case CODE_FOR_prologue_allocate_stack
:
7182 case CODE_FOR_prologue_allocate_stack_pr
:
7183 case CODE_FOR_epilogue_deallocate_stack
:
7184 case CODE_FOR_epilogue_deallocate_stack_pr
:
7185 ret
= XVECEXP (x
, 0, 0);
7189 ret
= single_set_2 (insn
, x
);
7196 /* Adjust the cost of a scheduling dependency.
7197 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7198 COST is the current cost, DW is dependency weakness. */
7200 ia64_adjust_cost_2 (rtx_insn
*insn
, int dep_type1
, rtx_insn
*dep_insn
,
7203 enum reg_note dep_type
= (enum reg_note
) dep_type1
;
7204 enum attr_itanium_class dep_class
;
7205 enum attr_itanium_class insn_class
;
7207 insn_class
= ia64_safe_itanium_class (insn
);
7208 dep_class
= ia64_safe_itanium_class (dep_insn
);
7210 /* Treat true memory dependencies separately. Ignore apparent true
7211 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7212 if (dep_type
== REG_DEP_TRUE
7213 && (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
)
7214 && (insn_class
== ITANIUM_CLASS_BR
|| insn_class
== ITANIUM_CLASS_SCALL
))
7217 if (dw
== MIN_DEP_WEAK
)
7218 /* Store and load are likely to alias, use higher cost to avoid stall. */
7219 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST
);
7220 else if (dw
> MIN_DEP_WEAK
)
7222 /* Store and load are less likely to alias. */
7223 if (mflag_sched_fp_mem_deps_zero_cost
&& dep_class
== ITANIUM_CLASS_STF
)
7224 /* Assume there will be no cache conflict for floating-point data.
7225 For integer data, L1 conflict penalty is huge (17 cycles), so we
7226 never assume it will not cause a conflict. */
7232 if (dep_type
!= REG_DEP_OUTPUT
)
7235 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
7236 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
7242 /* Like emit_insn_before, but skip cycle_display notes.
7243 ??? When cycle display notes are implemented, update this. */
7246 ia64_emit_insn_before (rtx insn
, rtx before
)
7248 emit_insn_before (insn
, before
);
7251 /* The following function marks insns who produce addresses for load
7252 and store insns. Such insns will be placed into M slots because it
7253 decrease latency time for Itanium1 (see function
7254 `ia64_produce_address_p' and the DFA descriptions). */
7257 ia64_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
7259 rtx_insn
*insn
, *next
, *next_tail
;
7261 /* Before reload, which_alternative is not set, which means that
7262 ia64_safe_itanium_class will produce wrong results for (at least)
7263 move instructions. */
7264 if (!reload_completed
)
7267 next_tail
= NEXT_INSN (tail
);
7268 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7271 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7273 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
7275 sd_iterator_def sd_it
;
7277 bool has_mem_op_consumer_p
= false;
7279 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
7281 enum attr_itanium_class c
;
7283 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
7286 next
= DEP_CON (dep
);
7287 c
= ia64_safe_itanium_class (next
);
7288 if ((c
== ITANIUM_CLASS_ST
7289 || c
== ITANIUM_CLASS_STF
)
7290 && ia64_st_address_bypass_p (insn
, next
))
7292 has_mem_op_consumer_p
= true;
7295 else if ((c
== ITANIUM_CLASS_LD
7296 || c
== ITANIUM_CLASS_FLD
7297 || c
== ITANIUM_CLASS_FLDP
)
7298 && ia64_ld_address_bypass_p (insn
, next
))
7300 has_mem_op_consumer_p
= true;
7305 insn
->call
= has_mem_op_consumer_p
;
7309 /* We're beginning a new block. Initialize data structures as necessary. */
7312 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
7313 int sched_verbose ATTRIBUTE_UNUSED
,
7314 int max_ready ATTRIBUTE_UNUSED
)
7316 #ifdef ENABLE_CHECKING
7319 if (!sel_sched_p () && reload_completed
)
7320 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
7321 insn
!= current_sched_info
->next_tail
;
7322 insn
= NEXT_INSN (insn
))
7323 gcc_assert (!SCHED_GROUP_P (insn
));
7325 last_scheduled_insn
= NULL
;
7326 init_insn_group_barriers ();
7329 memset (mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7332 /* We're beginning a scheduling pass. Check assertion. */
7335 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
7336 int sched_verbose ATTRIBUTE_UNUSED
,
7337 int max_ready ATTRIBUTE_UNUSED
)
7339 gcc_assert (pending_data_specs
== 0);
7342 /* Scheduling pass is now finished. Free/reset static variable. */
7344 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
7345 int sched_verbose ATTRIBUTE_UNUSED
)
7347 gcc_assert (pending_data_specs
== 0);
7350 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7351 speculation check), FALSE otherwise. */
7353 is_load_p (rtx_insn
*insn
)
7355 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7358 ((insn_class
== ITANIUM_CLASS_LD
|| insn_class
== ITANIUM_CLASS_FLD
)
7359 && get_attr_check_load (insn
) == CHECK_LOAD_NO
);
7362 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7363 (taking account for 3-cycle cache reference postponing for stores: Intel
7364 Itanium 2 Reference Manual for Software Development and Optimization,
7367 record_memory_reference (rtx_insn
*insn
)
7369 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7371 switch (insn_class
) {
7372 case ITANIUM_CLASS_FLD
:
7373 case ITANIUM_CLASS_LD
:
7374 mem_ops_in_group
[current_cycle
% 4]++;
7376 case ITANIUM_CLASS_STF
:
7377 case ITANIUM_CLASS_ST
:
7378 mem_ops_in_group
[(current_cycle
+ 3) % 4]++;
7384 /* We are about to being issuing insns for this clock cycle.
7385 Override the default sort algorithm to better slot instructions. */
7388 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7389 int *pn_ready
, int clock_var
,
7393 int n_ready
= *pn_ready
;
7394 rtx_insn
**e_ready
= ready
+ n_ready
;
7398 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
7400 if (reorder_type
== 0)
7402 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7404 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7405 if (insnp
< e_ready
)
7407 rtx_insn
*insn
= *insnp
;
7408 enum attr_type t
= ia64_safe_type (insn
);
7409 if (t
== TYPE_UNKNOWN
)
7411 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
7412 || asm_noperands (PATTERN (insn
)) >= 0)
7414 rtx_insn
*lowest
= ready
[n_asms
];
7415 ready
[n_asms
] = insn
;
7421 rtx_insn
*highest
= ready
[n_ready
- 1];
7422 ready
[n_ready
- 1] = insn
;
7429 if (n_asms
< n_ready
)
7431 /* Some normal insns to process. Skip the asms. */
7435 else if (n_ready
> 0)
7439 if (ia64_final_schedule
)
7442 int nr_need_stop
= 0;
7444 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7445 if (safe_group_barrier_needed (*insnp
))
7448 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
7450 if (reorder_type
== 0)
7453 /* Move down everything that needs a stop bit, preserving
7455 while (insnp
-- > ready
+ deleted
)
7456 while (insnp
>= ready
+ deleted
)
7458 rtx_insn
*insn
= *insnp
;
7459 if (! safe_group_barrier_needed (insn
))
7461 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7469 current_cycle
= clock_var
;
7470 if (reload_completed
&& mem_ops_in_group
[clock_var
% 4] >= ia64_max_memory_insns
)
7475 /* Move down loads/stores, preserving relative order. */
7476 while (insnp
-- > ready
+ moved
)
7477 while (insnp
>= ready
+ moved
)
7479 rtx_insn
*insn
= *insnp
;
7480 if (! is_load_p (insn
))
7482 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7493 /* We are about to being issuing insns for this clock cycle. Override
7494 the default sort algorithm to better slot instructions. */
7497 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7498 int *pn_ready
, int clock_var
)
7500 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
7501 pn_ready
, clock_var
, 0);
7504 /* Like ia64_sched_reorder, but called after issuing each insn.
7505 Override the default sort algorithm to better slot instructions. */
7508 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
7509 int sched_verbose ATTRIBUTE_UNUSED
, rtx_insn
**ready
,
7510 int *pn_ready
, int clock_var
)
7512 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
7516 /* We are about to issue INSN. Return the number of insns left on the
7517 ready queue that can be issued this cycle. */
7520 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
7521 int sched_verbose ATTRIBUTE_UNUSED
,
7523 int can_issue_more ATTRIBUTE_UNUSED
)
7525 if (sched_deps_info
->generate_spec_deps
&& !sel_sched_p ())
7526 /* Modulo scheduling does not extend h_i_d when emitting
7527 new instructions. Don't use h_i_d, if we don't have to. */
7529 if (DONE_SPEC (insn
) & BEGIN_DATA
)
7530 pending_data_specs
++;
7531 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
7532 pending_data_specs
--;
7535 if (DEBUG_INSN_P (insn
))
7538 last_scheduled_insn
= insn
;
7539 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
7540 if (reload_completed
)
7542 int needed
= group_barrier_needed (insn
);
7544 gcc_assert (!needed
);
7546 init_insn_group_barriers ();
7547 stops_p
[INSN_UID (insn
)] = stop_before_p
;
7550 record_memory_reference (insn
);
7555 /* We are choosing insn from the ready queue. Return zero if INSN
7559 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
7561 gcc_assert (insn
&& INSN_P (insn
));
7563 /* Size of ALAT is 32. As far as we perform conservative
7564 data speculation, we keep ALAT half-empty. */
7565 if (pending_data_specs
>= 16 && (TODO_SPEC (insn
) & BEGIN_DATA
))
7566 return ready_index
== 0 ? -1 : 1;
7568 if (ready_index
== 0)
7571 if ((!reload_completed
7572 || !safe_group_barrier_needed (insn
))
7573 && (!mflag_sched_mem_insns_hard_limit
7574 || !is_load_p (insn
)
7575 || mem_ops_in_group
[current_cycle
% 4] < ia64_max_memory_insns
))
7581 /* The following variable value is pseudo-insn used by the DFA insn
7582 scheduler to change the DFA state when the simulated clock is
7585 static rtx_insn
*dfa_pre_cycle_insn
;
7587 /* Returns 1 when a meaningful insn was scheduled between the last group
7588 barrier and LAST. */
7590 scheduled_good_insn (rtx_insn
*last
)
7592 if (last
&& recog_memoized (last
) >= 0)
7596 last
!= NULL
&& !NOTE_INSN_BASIC_BLOCK_P (last
)
7597 && !stops_p
[INSN_UID (last
)];
7598 last
= PREV_INSN (last
))
7599 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7600 the ebb we're scheduling. */
7601 if (INSN_P (last
) && recog_memoized (last
) >= 0)
7607 /* We are about to being issuing INSN. Return nonzero if we cannot
7608 issue it on given cycle CLOCK and return zero if we should not sort
7609 the ready queue on the next clock start. */
7612 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx_insn
*insn
, int last_clock
,
7613 int clock
, int *sort_p
)
7615 gcc_assert (insn
&& INSN_P (insn
));
7617 if (DEBUG_INSN_P (insn
))
7620 /* When a group barrier is needed for insn, last_scheduled_insn
7622 gcc_assert (!(reload_completed
&& safe_group_barrier_needed (insn
))
7623 || last_scheduled_insn
);
7625 if ((reload_completed
7626 && (safe_group_barrier_needed (insn
)
7627 || (mflag_sched_stop_bits_after_every_cycle
7628 && last_clock
!= clock
7629 && last_scheduled_insn
7630 && scheduled_good_insn (last_scheduled_insn
))))
7631 || (last_scheduled_insn
7632 && (CALL_P (last_scheduled_insn
)
7633 || unknown_for_bundling_p (last_scheduled_insn
))))
7635 init_insn_group_barriers ();
7637 if (verbose
&& dump
)
7638 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
7639 last_clock
== clock
? " + cycle advance" : "");
7642 current_cycle
= clock
;
7643 mem_ops_in_group
[current_cycle
% 4] = 0;
7645 if (last_clock
== clock
)
7647 state_transition (curr_state
, dfa_stop_insn
);
7648 if (TARGET_EARLY_STOP_BITS
)
7649 *sort_p
= (last_scheduled_insn
== NULL_RTX
7650 || ! CALL_P (last_scheduled_insn
));
7656 if (last_scheduled_insn
)
7658 if (unknown_for_bundling_p (last_scheduled_insn
))
7659 state_reset (curr_state
);
7662 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
7663 state_transition (curr_state
, dfa_stop_insn
);
7664 state_transition (curr_state
, dfa_pre_cycle_insn
);
7665 state_transition (curr_state
, NULL
);
7672 /* Implement targetm.sched.h_i_d_extended hook.
7673 Extend internal data structures. */
7675 ia64_h_i_d_extended (void)
7677 if (stops_p
!= NULL
)
7679 int new_clocks_length
= get_max_uid () * 3 / 2;
7680 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
7681 clocks_length
= new_clocks_length
;
7686 /* This structure describes the data used by the backend to guide scheduling.
7687 When the current scheduling point is switched, this data should be saved
7688 and restored later, if the scheduler returns to this point. */
7689 struct _ia64_sched_context
7691 state_t prev_cycle_state
;
7692 rtx_insn
*last_scheduled_insn
;
7693 struct reg_write_state rws_sum
[NUM_REGS
];
7694 struct reg_write_state rws_insn
[NUM_REGS
];
7695 int first_instruction
;
7696 int pending_data_specs
;
7698 char mem_ops_in_group
[4];
7700 typedef struct _ia64_sched_context
*ia64_sched_context_t
;
7702 /* Allocates a scheduling context. */
7704 ia64_alloc_sched_context (void)
7706 return xmalloc (sizeof (struct _ia64_sched_context
));
7709 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7710 the global context otherwise. */
7712 ia64_init_sched_context (void *_sc
, bool clean_p
)
7714 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7716 sc
->prev_cycle_state
= xmalloc (dfa_state_size
);
7719 state_reset (sc
->prev_cycle_state
);
7720 sc
->last_scheduled_insn
= NULL
;
7721 memset (sc
->rws_sum
, 0, sizeof (rws_sum
));
7722 memset (sc
->rws_insn
, 0, sizeof (rws_insn
));
7723 sc
->first_instruction
= 1;
7724 sc
->pending_data_specs
= 0;
7725 sc
->current_cycle
= 0;
7726 memset (sc
->mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7730 memcpy (sc
->prev_cycle_state
, prev_cycle_state
, dfa_state_size
);
7731 sc
->last_scheduled_insn
= last_scheduled_insn
;
7732 memcpy (sc
->rws_sum
, rws_sum
, sizeof (rws_sum
));
7733 memcpy (sc
->rws_insn
, rws_insn
, sizeof (rws_insn
));
7734 sc
->first_instruction
= first_instruction
;
7735 sc
->pending_data_specs
= pending_data_specs
;
7736 sc
->current_cycle
= current_cycle
;
7737 memcpy (sc
->mem_ops_in_group
, mem_ops_in_group
, sizeof (mem_ops_in_group
));
7741 /* Sets the global scheduling context to the one pointed to by _SC. */
7743 ia64_set_sched_context (void *_sc
)
7745 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7747 gcc_assert (sc
!= NULL
);
7749 memcpy (prev_cycle_state
, sc
->prev_cycle_state
, dfa_state_size
);
7750 last_scheduled_insn
= sc
->last_scheduled_insn
;
7751 memcpy (rws_sum
, sc
->rws_sum
, sizeof (rws_sum
));
7752 memcpy (rws_insn
, sc
->rws_insn
, sizeof (rws_insn
));
7753 first_instruction
= sc
->first_instruction
;
7754 pending_data_specs
= sc
->pending_data_specs
;
7755 current_cycle
= sc
->current_cycle
;
7756 memcpy (mem_ops_in_group
, sc
->mem_ops_in_group
, sizeof (mem_ops_in_group
));
7759 /* Clears the data in the _SC scheduling context. */
7761 ia64_clear_sched_context (void *_sc
)
7763 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7765 free (sc
->prev_cycle_state
);
7766 sc
->prev_cycle_state
= NULL
;
7769 /* Frees the _SC scheduling context. */
7771 ia64_free_sched_context (void *_sc
)
7773 gcc_assert (_sc
!= NULL
);
7778 typedef rtx (* gen_func_t
) (rtx
, rtx
);
7780 /* Return a function that will generate a load of mode MODE_NO
7781 with speculation types TS. */
7783 get_spec_load_gen_function (ds_t ts
, int mode_no
)
7785 static gen_func_t gen_ld_
[] = {
7795 gen_zero_extendqidi2
,
7796 gen_zero_extendhidi2
,
7797 gen_zero_extendsidi2
,
7800 static gen_func_t gen_ld_a
[] = {
7810 gen_zero_extendqidi2_advanced
,
7811 gen_zero_extendhidi2_advanced
,
7812 gen_zero_extendsidi2_advanced
,
7814 static gen_func_t gen_ld_s
[] = {
7815 gen_movbi_speculative
,
7816 gen_movqi_speculative
,
7817 gen_movhi_speculative
,
7818 gen_movsi_speculative
,
7819 gen_movdi_speculative
,
7820 gen_movsf_speculative
,
7821 gen_movdf_speculative
,
7822 gen_movxf_speculative
,
7823 gen_movti_speculative
,
7824 gen_zero_extendqidi2_speculative
,
7825 gen_zero_extendhidi2_speculative
,
7826 gen_zero_extendsidi2_speculative
,
7828 static gen_func_t gen_ld_sa
[] = {
7829 gen_movbi_speculative_advanced
,
7830 gen_movqi_speculative_advanced
,
7831 gen_movhi_speculative_advanced
,
7832 gen_movsi_speculative_advanced
,
7833 gen_movdi_speculative_advanced
,
7834 gen_movsf_speculative_advanced
,
7835 gen_movdf_speculative_advanced
,
7836 gen_movxf_speculative_advanced
,
7837 gen_movti_speculative_advanced
,
7838 gen_zero_extendqidi2_speculative_advanced
,
7839 gen_zero_extendhidi2_speculative_advanced
,
7840 gen_zero_extendsidi2_speculative_advanced
,
7842 static gen_func_t gen_ld_s_a
[] = {
7843 gen_movbi_speculative_a
,
7844 gen_movqi_speculative_a
,
7845 gen_movhi_speculative_a
,
7846 gen_movsi_speculative_a
,
7847 gen_movdi_speculative_a
,
7848 gen_movsf_speculative_a
,
7849 gen_movdf_speculative_a
,
7850 gen_movxf_speculative_a
,
7851 gen_movti_speculative_a
,
7852 gen_zero_extendqidi2_speculative_a
,
7853 gen_zero_extendhidi2_speculative_a
,
7854 gen_zero_extendsidi2_speculative_a
,
7859 if (ts
& BEGIN_DATA
)
7861 if (ts
& BEGIN_CONTROL
)
7866 else if (ts
& BEGIN_CONTROL
)
7868 if ((spec_info
->flags
& SEL_SCHED_SPEC_DONT_CHECK_CONTROL
)
7869 || ia64_needs_block_p (ts
))
7872 gen_ld
= gen_ld_s_a
;
7879 return gen_ld
[mode_no
];
7882 /* Constants that help mapping 'machine_mode' to int. */
7885 SPEC_MODE_INVALID
= -1,
7886 SPEC_MODE_FIRST
= 0,
7887 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
7888 SPEC_MODE_FOR_EXTEND_LAST
= 3,
7894 /* Offset to reach ZERO_EXTEND patterns. */
7895 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1
7898 /* Return index of the MODE. */
7900 ia64_mode_to_int (machine_mode mode
)
7904 case BImode
: return 0; /* SPEC_MODE_FIRST */
7905 case QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7906 case HImode
: return 2;
7907 case SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7908 case DImode
: return 4;
7909 case SFmode
: return 5;
7910 case DFmode
: return 6;
7911 case XFmode
: return 7;
7913 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7914 mentioned in itanium[12].md. Predicate fp_register_operand also
7915 needs to be defined. Bottom line: better disable for now. */
7916 return SPEC_MODE_INVALID
;
7917 default: return SPEC_MODE_INVALID
;
7921 /* Provide information about speculation capabilities. */
7923 ia64_set_sched_flags (spec_info_t spec_info
)
7925 unsigned int *flags
= &(current_sched_info
->flags
);
7927 if (*flags
& SCHED_RGN
7928 || *flags
& SCHED_EBB
7929 || *flags
& SEL_SCHED
)
7933 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
7934 || (mflag_sched_ar_data_spec
&& reload_completed
))
7939 && ((mflag_sched_br_in_data_spec
&& !reload_completed
)
7940 || (mflag_sched_ar_in_data_spec
&& reload_completed
)))
7944 if (mflag_sched_control_spec
7946 || reload_completed
))
7948 mask
|= BEGIN_CONTROL
;
7950 if (!sel_sched_p () && mflag_sched_in_control_spec
)
7951 mask
|= BE_IN_CONTROL
;
7954 spec_info
->mask
= mask
;
7958 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
7960 if (mask
& BE_IN_SPEC
)
7963 spec_info
->flags
= 0;
7965 if ((mask
& CONTROL_SPEC
)
7966 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec
)
7967 spec_info
->flags
|= SEL_SCHED_SPEC_DONT_CHECK_CONTROL
;
7969 if (sched_verbose
>= 1)
7970 spec_info
->dump
= sched_dump
;
7972 spec_info
->dump
= 0;
7974 if (mflag_sched_count_spec_in_critical_path
)
7975 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
7979 spec_info
->mask
= 0;
7982 /* If INSN is an appropriate load return its mode.
7983 Return -1 otherwise. */
7985 get_mode_no_for_insn (rtx_insn
*insn
)
7987 rtx reg
, mem
, mode_rtx
;
7991 extract_insn_cached (insn
);
7993 /* We use WHICH_ALTERNATIVE only after reload. This will
7994 guarantee that reload won't touch a speculative insn. */
7996 if (recog_data
.n_operands
!= 2)
7999 reg
= recog_data
.operand
[0];
8000 mem
= recog_data
.operand
[1];
8002 /* We should use MEM's mode since REG's mode in presence of
8003 ZERO_EXTEND will always be DImode. */
8004 if (get_attr_speculable1 (insn
) == SPECULABLE1_YES
)
8005 /* Process non-speculative ld. */
8007 if (!reload_completed
)
8009 /* Do not speculate into regs like ar.lc. */
8010 if (!REG_P (reg
) || AR_REGNO_P (REGNO (reg
)))
8017 rtx mem_reg
= XEXP (mem
, 0);
8019 if (!REG_P (mem_reg
))
8025 else if (get_attr_speculable2 (insn
) == SPECULABLE2_YES
)
8027 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8033 else if (get_attr_data_speculative (insn
) == DATA_SPECULATIVE_YES
8034 || get_attr_control_speculative (insn
) == CONTROL_SPECULATIVE_YES
8035 || get_attr_check_load (insn
) == CHECK_LOAD_YES
)
8036 /* Process speculative ld or ld.c. */
8038 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8043 enum attr_itanium_class attr_class
= get_attr_itanium_class (insn
);
8045 if (attr_class
== ITANIUM_CLASS_CHK_A
8046 || attr_class
== ITANIUM_CLASS_CHK_S_I
8047 || attr_class
== ITANIUM_CLASS_CHK_S_F
)
8054 mode_no
= ia64_mode_to_int (GET_MODE (mode_rtx
));
8056 if (mode_no
== SPEC_MODE_INVALID
)
8059 extend_p
= (GET_MODE (reg
) != GET_MODE (mode_rtx
));
8063 if (!(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
8064 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
))
8067 mode_no
+= SPEC_GEN_EXTEND_OFFSET
;
8073 /* If X is an unspec part of a speculative load, return its code.
8074 Return -1 otherwise. */
8076 get_spec_unspec_code (const_rtx x
)
8078 if (GET_CODE (x
) != UNSPEC
)
8100 /* Implement skip_rtx_p hook. */
8102 ia64_skip_rtx_p (const_rtx x
)
8104 return get_spec_unspec_code (x
) != -1;
8107 /* If INSN is a speculative load, return its UNSPEC code.
8108 Return -1 otherwise. */
8110 get_insn_spec_code (const_rtx insn
)
8114 pat
= PATTERN (insn
);
8116 if (GET_CODE (pat
) == COND_EXEC
)
8117 pat
= COND_EXEC_CODE (pat
);
8119 if (GET_CODE (pat
) != SET
)
8122 reg
= SET_DEST (pat
);
8126 mem
= SET_SRC (pat
);
8127 if (GET_CODE (mem
) == ZERO_EXTEND
)
8128 mem
= XEXP (mem
, 0);
8130 return get_spec_unspec_code (mem
);
8133 /* If INSN is a speculative load, return a ds with the speculation types.
8134 Otherwise [if INSN is a normal instruction] return 0. */
8136 ia64_get_insn_spec_ds (rtx_insn
*insn
)
8138 int code
= get_insn_spec_code (insn
);
8147 return BEGIN_CONTROL
;
8150 return BEGIN_DATA
| BEGIN_CONTROL
;
8157 /* If INSN is a speculative load return a ds with the speculation types that
8159 Otherwise [if INSN is a normal instruction] return 0. */
8161 ia64_get_insn_checked_ds (rtx_insn
*insn
)
8163 int code
= get_insn_spec_code (insn
);
8168 return BEGIN_DATA
| BEGIN_CONTROL
;
8171 return BEGIN_CONTROL
;
8175 return BEGIN_DATA
| BEGIN_CONTROL
;
8182 /* If GEN_P is true, calculate the index of needed speculation check and return
8183 speculative pattern for INSN with speculative mode TS, machine mode
8184 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8185 If GEN_P is false, just calculate the index of needed speculation check. */
8187 ia64_gen_spec_load (rtx insn
, ds_t ts
, int mode_no
)
8190 gen_func_t gen_load
;
8192 gen_load
= get_spec_load_gen_function (ts
, mode_no
);
8194 new_pat
= gen_load (copy_rtx (recog_data
.operand
[0]),
8195 copy_rtx (recog_data
.operand
[1]));
8197 pat
= PATTERN (insn
);
8198 if (GET_CODE (pat
) == COND_EXEC
)
8199 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8206 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED
,
8207 ds_t ds ATTRIBUTE_UNUSED
)
8212 /* Implement targetm.sched.speculate_insn hook.
8213 Check if the INSN can be TS speculative.
8214 If 'no' - return -1.
8215 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8216 If current pattern of the INSN already provides TS speculation,
8219 ia64_speculate_insn (rtx_insn
*insn
, ds_t ts
, rtx
*new_pat
)
8224 gcc_assert (!(ts
& ~SPECULATIVE
));
8226 if (ia64_spec_check_p (insn
))
8229 if ((ts
& BE_IN_SPEC
)
8230 && !insn_can_be_in_speculative_p (insn
, ts
))
8233 mode_no
= get_mode_no_for_insn (insn
);
8235 if (mode_no
!= SPEC_MODE_INVALID
)
8237 if (ia64_get_insn_spec_ds (insn
) == ds_get_speculation_types (ts
))
8242 *new_pat
= ia64_gen_spec_load (insn
, ts
, mode_no
);
8251 /* Return a function that will generate a check for speculation TS with mode
8253 If simple check is needed, pass true for SIMPLE_CHECK_P.
8254 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8256 get_spec_check_gen_function (ds_t ts
, int mode_no
,
8257 bool simple_check_p
, bool clearing_check_p
)
8259 static gen_func_t gen_ld_c_clr
[] = {
8269 gen_zero_extendqidi2_clr
,
8270 gen_zero_extendhidi2_clr
,
8271 gen_zero_extendsidi2_clr
,
8273 static gen_func_t gen_ld_c_nc
[] = {
8283 gen_zero_extendqidi2_nc
,
8284 gen_zero_extendhidi2_nc
,
8285 gen_zero_extendsidi2_nc
,
8287 static gen_func_t gen_chk_a_clr
[] = {
8288 gen_advanced_load_check_clr_bi
,
8289 gen_advanced_load_check_clr_qi
,
8290 gen_advanced_load_check_clr_hi
,
8291 gen_advanced_load_check_clr_si
,
8292 gen_advanced_load_check_clr_di
,
8293 gen_advanced_load_check_clr_sf
,
8294 gen_advanced_load_check_clr_df
,
8295 gen_advanced_load_check_clr_xf
,
8296 gen_advanced_load_check_clr_ti
,
8297 gen_advanced_load_check_clr_di
,
8298 gen_advanced_load_check_clr_di
,
8299 gen_advanced_load_check_clr_di
,
8301 static gen_func_t gen_chk_a_nc
[] = {
8302 gen_advanced_load_check_nc_bi
,
8303 gen_advanced_load_check_nc_qi
,
8304 gen_advanced_load_check_nc_hi
,
8305 gen_advanced_load_check_nc_si
,
8306 gen_advanced_load_check_nc_di
,
8307 gen_advanced_load_check_nc_sf
,
8308 gen_advanced_load_check_nc_df
,
8309 gen_advanced_load_check_nc_xf
,
8310 gen_advanced_load_check_nc_ti
,
8311 gen_advanced_load_check_nc_di
,
8312 gen_advanced_load_check_nc_di
,
8313 gen_advanced_load_check_nc_di
,
8315 static gen_func_t gen_chk_s
[] = {
8316 gen_speculation_check_bi
,
8317 gen_speculation_check_qi
,
8318 gen_speculation_check_hi
,
8319 gen_speculation_check_si
,
8320 gen_speculation_check_di
,
8321 gen_speculation_check_sf
,
8322 gen_speculation_check_df
,
8323 gen_speculation_check_xf
,
8324 gen_speculation_check_ti
,
8325 gen_speculation_check_di
,
8326 gen_speculation_check_di
,
8327 gen_speculation_check_di
,
8330 gen_func_t
*gen_check
;
8332 if (ts
& BEGIN_DATA
)
8334 /* We don't need recovery because even if this is ld.sa
8335 ALAT entry will be allocated only if NAT bit is set to zero.
8336 So it is enough to use ld.c here. */
8340 gcc_assert (mflag_sched_spec_ldc
);
8342 if (clearing_check_p
)
8343 gen_check
= gen_ld_c_clr
;
8345 gen_check
= gen_ld_c_nc
;
8349 if (clearing_check_p
)
8350 gen_check
= gen_chk_a_clr
;
8352 gen_check
= gen_chk_a_nc
;
8355 else if (ts
& BEGIN_CONTROL
)
8358 /* We might want to use ld.sa -> ld.c instead of
8361 gcc_assert (!ia64_needs_block_p (ts
));
8363 if (clearing_check_p
)
8364 gen_check
= gen_ld_c_clr
;
8366 gen_check
= gen_ld_c_nc
;
8370 gen_check
= gen_chk_s
;
8376 gcc_assert (mode_no
>= 0);
8377 return gen_check
[mode_no
];
8380 /* Return nonzero, if INSN needs branchy recovery check. */
8382 ia64_needs_block_p (ds_t ts
)
8384 if (ts
& BEGIN_DATA
)
8385 return !mflag_sched_spec_ldc
;
8387 gcc_assert ((ts
& BEGIN_CONTROL
) != 0);
8389 return !(mflag_sched_spec_control_ldc
&& mflag_sched_spec_ldc
);
8392 /* Generate (or regenerate) a recovery check for INSN. */
8394 ia64_gen_spec_check (rtx_insn
*insn
, rtx_insn
*label
, ds_t ds
)
8396 rtx op1
, pat
, check_pat
;
8397 gen_func_t gen_check
;
8400 mode_no
= get_mode_no_for_insn (insn
);
8401 gcc_assert (mode_no
>= 0);
8407 gcc_assert (!ia64_needs_block_p (ds
));
8408 op1
= copy_rtx (recog_data
.operand
[1]);
8411 gen_check
= get_spec_check_gen_function (ds
, mode_no
, label
== NULL_RTX
,
8414 check_pat
= gen_check (copy_rtx (recog_data
.operand
[0]), op1
);
8416 pat
= PATTERN (insn
);
8417 if (GET_CODE (pat
) == COND_EXEC
)
8418 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8424 /* Return nonzero, if X is branchy recovery check. */
8426 ia64_spec_check_p (rtx x
)
8429 if (GET_CODE (x
) == COND_EXEC
)
8430 x
= COND_EXEC_CODE (x
);
8431 if (GET_CODE (x
) == SET
)
8432 return ia64_spec_check_src_p (SET_SRC (x
));
8436 /* Return nonzero, if SRC belongs to recovery check. */
8438 ia64_spec_check_src_p (rtx src
)
8440 if (GET_CODE (src
) == IF_THEN_ELSE
)
8445 if (GET_CODE (t
) == NE
)
8449 if (GET_CODE (t
) == UNSPEC
)
8455 if (code
== UNSPEC_LDCCLR
8456 || code
== UNSPEC_LDCNC
8457 || code
== UNSPEC_CHKACLR
8458 || code
== UNSPEC_CHKANC
8459 || code
== UNSPEC_CHKS
)
8461 gcc_assert (code
!= 0);
8471 /* The following page contains abstract data `bundle states' which are
8472 used for bundling insns (inserting nops and template generation). */
8474 /* The following describes state of insn bundling. */
8478 /* Unique bundle state number to identify them in the debugging
8481 rtx_insn
*insn
; /* corresponding insn, NULL for the 1st and the last state */
8482 /* number nops before and after the insn */
8483 short before_nops_num
, after_nops_num
;
8484 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
8486 int cost
; /* cost of the state in cycles */
8487 int accumulated_insns_num
; /* number of all previous insns including
8488 nops. L is considered as 2 insns */
8489 int branch_deviation
; /* deviation of previous branches from 3rd slots */
8490 int middle_bundle_stops
; /* number of stop bits in the middle of bundles */
8491 struct bundle_state
*next
; /* next state with the same insn_num */
8492 struct bundle_state
*originator
; /* originator (previous insn state) */
8493 /* All bundle states are in the following chain. */
8494 struct bundle_state
*allocated_states_chain
;
8495 /* The DFA State after issuing the insn and the nops. */
8499 /* The following is map insn number to the corresponding bundle state. */
8501 static struct bundle_state
**index_to_bundle_states
;
8503 /* The unique number of next bundle state. */
8505 static int bundle_states_num
;
8507 /* All allocated bundle states are in the following chain. */
8509 static struct bundle_state
*allocated_bundle_states_chain
;
8511 /* All allocated but not used bundle states are in the following
8514 static struct bundle_state
*free_bundle_state_chain
;
8517 /* The following function returns a free bundle state. */
8519 static struct bundle_state
*
8520 get_free_bundle_state (void)
8522 struct bundle_state
*result
;
8524 if (free_bundle_state_chain
!= NULL
)
8526 result
= free_bundle_state_chain
;
8527 free_bundle_state_chain
= result
->next
;
8531 result
= XNEW (struct bundle_state
);
8532 result
->dfa_state
= xmalloc (dfa_state_size
);
8533 result
->allocated_states_chain
= allocated_bundle_states_chain
;
8534 allocated_bundle_states_chain
= result
;
8536 result
->unique_num
= bundle_states_num
++;
8541 /* The following function frees given bundle state. */
8544 free_bundle_state (struct bundle_state
*state
)
8546 state
->next
= free_bundle_state_chain
;
8547 free_bundle_state_chain
= state
;
8550 /* Start work with abstract data `bundle states'. */
8553 initiate_bundle_states (void)
8555 bundle_states_num
= 0;
8556 free_bundle_state_chain
= NULL
;
8557 allocated_bundle_states_chain
= NULL
;
8560 /* Finish work with abstract data `bundle states'. */
8563 finish_bundle_states (void)
8565 struct bundle_state
*curr_state
, *next_state
;
8567 for (curr_state
= allocated_bundle_states_chain
;
8569 curr_state
= next_state
)
8571 next_state
= curr_state
->allocated_states_chain
;
8572 free (curr_state
->dfa_state
);
8577 /* Hashtable helpers. */
8579 struct bundle_state_hasher
: nofree_ptr_hash
<bundle_state
>
8581 static inline hashval_t
hash (const bundle_state
*);
8582 static inline bool equal (const bundle_state
*, const bundle_state
*);
8585 /* The function returns hash of BUNDLE_STATE. */
8588 bundle_state_hasher::hash (const bundle_state
*state
)
8592 for (result
= i
= 0; i
< dfa_state_size
; i
++)
8593 result
+= (((unsigned char *) state
->dfa_state
) [i
]
8594 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
8595 return result
+ state
->insn_num
;
8598 /* The function returns nonzero if the bundle state keys are equal. */
8601 bundle_state_hasher::equal (const bundle_state
*state1
,
8602 const bundle_state
*state2
)
8604 return (state1
->insn_num
== state2
->insn_num
8605 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
8606 dfa_state_size
) == 0);
8609 /* Hash table of the bundle states. The key is dfa_state and insn_num
8610 of the bundle states. */
8612 static hash_table
<bundle_state_hasher
> *bundle_state_table
;
8614 /* The function inserts the BUNDLE_STATE into the hash table. The
8615 function returns nonzero if the bundle has been inserted into the
8616 table. The table contains the best bundle state with given key. */
8619 insert_bundle_state (struct bundle_state
*bundle_state
)
8621 struct bundle_state
**entry_ptr
;
8623 entry_ptr
= bundle_state_table
->find_slot (bundle_state
, INSERT
);
8624 if (*entry_ptr
== NULL
)
8626 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
8627 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
8628 *entry_ptr
= bundle_state
;
8631 else if (bundle_state
->cost
< (*entry_ptr
)->cost
8632 || (bundle_state
->cost
== (*entry_ptr
)->cost
8633 && ((*entry_ptr
)->accumulated_insns_num
8634 > bundle_state
->accumulated_insns_num
8635 || ((*entry_ptr
)->accumulated_insns_num
8636 == bundle_state
->accumulated_insns_num
8637 && ((*entry_ptr
)->branch_deviation
8638 > bundle_state
->branch_deviation
8639 || ((*entry_ptr
)->branch_deviation
8640 == bundle_state
->branch_deviation
8641 && (*entry_ptr
)->middle_bundle_stops
8642 > bundle_state
->middle_bundle_stops
))))))
8645 struct bundle_state temp
;
8648 **entry_ptr
= *bundle_state
;
8649 (*entry_ptr
)->next
= temp
.next
;
8650 *bundle_state
= temp
;
8655 /* Start work with the hash table. */
8658 initiate_bundle_state_table (void)
8660 bundle_state_table
= new hash_table
<bundle_state_hasher
> (50);
8663 /* Finish work with the hash table. */
8666 finish_bundle_state_table (void)
8668 delete bundle_state_table
;
8669 bundle_state_table
= NULL
;
8674 /* The following variable is a insn `nop' used to check bundle states
8675 with different number of inserted nops. */
8677 static rtx_insn
*ia64_nop
;
8679 /* The following function tries to issue NOPS_NUM nops for the current
8680 state without advancing processor cycle. If it failed, the
8681 function returns FALSE and frees the current state. */
8684 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
8688 for (i
= 0; i
< nops_num
; i
++)
8689 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
8691 free_bundle_state (curr_state
);
8697 /* The following function tries to issue INSN for the current
8698 state without advancing processor cycle. If it failed, the
8699 function returns FALSE and frees the current state. */
8702 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
8704 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
8706 free_bundle_state (curr_state
);
8712 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8713 starting with ORIGINATOR without advancing processor cycle. If
8714 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8715 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8716 If it was successful, the function creates new bundle state and
8717 insert into the hash table and into `index_to_bundle_states'. */
8720 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
8721 rtx_insn
*insn
, int try_bundle_end_p
,
8722 int only_bundle_end_p
)
8724 struct bundle_state
*curr_state
;
8726 curr_state
= get_free_bundle_state ();
8727 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
8728 curr_state
->insn
= insn
;
8729 curr_state
->insn_num
= originator
->insn_num
+ 1;
8730 curr_state
->cost
= originator
->cost
;
8731 curr_state
->originator
= originator
;
8732 curr_state
->before_nops_num
= before_nops_num
;
8733 curr_state
->after_nops_num
= 0;
8734 curr_state
->accumulated_insns_num
8735 = originator
->accumulated_insns_num
+ before_nops_num
;
8736 curr_state
->branch_deviation
= originator
->branch_deviation
;
8737 curr_state
->middle_bundle_stops
= originator
->middle_bundle_stops
;
8739 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
8741 gcc_assert (GET_MODE (insn
) != TImode
);
8742 if (!try_issue_nops (curr_state
, before_nops_num
))
8744 if (!try_issue_insn (curr_state
, insn
))
8746 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
8747 if (curr_state
->accumulated_insns_num
% 3 != 0)
8748 curr_state
->middle_bundle_stops
++;
8749 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
8750 && curr_state
->accumulated_insns_num
% 3 != 0)
8752 free_bundle_state (curr_state
);
8756 else if (GET_MODE (insn
) != TImode
)
8758 if (!try_issue_nops (curr_state
, before_nops_num
))
8760 if (!try_issue_insn (curr_state
, insn
))
8762 curr_state
->accumulated_insns_num
++;
8763 gcc_assert (!unknown_for_bundling_p (insn
));
8765 if (ia64_safe_type (insn
) == TYPE_L
)
8766 curr_state
->accumulated_insns_num
++;
8770 /* If this is an insn that must be first in a group, then don't allow
8771 nops to be emitted before it. Currently, alloc is the only such
8772 supported instruction. */
8773 /* ??? The bundling automatons should handle this for us, but they do
8774 not yet have support for the first_insn attribute. */
8775 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
8777 free_bundle_state (curr_state
);
8781 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
8782 state_transition (curr_state
->dfa_state
, NULL
);
8784 if (!try_issue_nops (curr_state
, before_nops_num
))
8786 if (!try_issue_insn (curr_state
, insn
))
8788 curr_state
->accumulated_insns_num
++;
8789 if (unknown_for_bundling_p (insn
))
8791 /* Finish bundle containing asm insn. */
8792 curr_state
->after_nops_num
8793 = 3 - curr_state
->accumulated_insns_num
% 3;
8794 curr_state
->accumulated_insns_num
8795 += 3 - curr_state
->accumulated_insns_num
% 3;
8797 else if (ia64_safe_type (insn
) == TYPE_L
)
8798 curr_state
->accumulated_insns_num
++;
8800 if (ia64_safe_type (insn
) == TYPE_B
)
8801 curr_state
->branch_deviation
8802 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
8803 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
8805 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
8808 struct bundle_state
*curr_state1
;
8809 struct bundle_state
*allocated_states_chain
;
8811 curr_state1
= get_free_bundle_state ();
8812 dfa_state
= curr_state1
->dfa_state
;
8813 allocated_states_chain
= curr_state1
->allocated_states_chain
;
8814 *curr_state1
= *curr_state
;
8815 curr_state1
->dfa_state
= dfa_state
;
8816 curr_state1
->allocated_states_chain
= allocated_states_chain
;
8817 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
8819 curr_state
= curr_state1
;
8821 if (!try_issue_nops (curr_state
,
8822 3 - curr_state
->accumulated_insns_num
% 3))
8824 curr_state
->after_nops_num
8825 = 3 - curr_state
->accumulated_insns_num
% 3;
8826 curr_state
->accumulated_insns_num
8827 += 3 - curr_state
->accumulated_insns_num
% 3;
8829 if (!insert_bundle_state (curr_state
))
8830 free_bundle_state (curr_state
);
8834 /* The following function returns position in the two window bundle
8838 get_max_pos (state_t state
)
8840 if (cpu_unit_reservation_p (state
, pos_6
))
8842 else if (cpu_unit_reservation_p (state
, pos_5
))
8844 else if (cpu_unit_reservation_p (state
, pos_4
))
8846 else if (cpu_unit_reservation_p (state
, pos_3
))
8848 else if (cpu_unit_reservation_p (state
, pos_2
))
8850 else if (cpu_unit_reservation_p (state
, pos_1
))
8856 /* The function returns code of a possible template for given position
8857 and state. The function should be called only with 2 values of
8858 position equal to 3 or 6. We avoid generating F NOPs by putting
8859 templates containing F insns at the end of the template search
8860 because undocumented anomaly in McKinley derived cores which can
8861 cause stalls if an F-unit insn (including a NOP) is issued within a
8862 six-cycle window after reading certain application registers (such
8863 as ar.bsp). Furthermore, power-considerations also argue against
8864 the use of F-unit instructions unless they're really needed. */
8867 get_template (state_t state
, int pos
)
8872 if (cpu_unit_reservation_p (state
, _0mmi_
))
8874 else if (cpu_unit_reservation_p (state
, _0mii_
))
8876 else if (cpu_unit_reservation_p (state
, _0mmb_
))
8878 else if (cpu_unit_reservation_p (state
, _0mib_
))
8880 else if (cpu_unit_reservation_p (state
, _0mbb_
))
8882 else if (cpu_unit_reservation_p (state
, _0bbb_
))
8884 else if (cpu_unit_reservation_p (state
, _0mmf_
))
8886 else if (cpu_unit_reservation_p (state
, _0mfi_
))
8888 else if (cpu_unit_reservation_p (state
, _0mfb_
))
8890 else if (cpu_unit_reservation_p (state
, _0mlx_
))
8895 if (cpu_unit_reservation_p (state
, _1mmi_
))
8897 else if (cpu_unit_reservation_p (state
, _1mii_
))
8899 else if (cpu_unit_reservation_p (state
, _1mmb_
))
8901 else if (cpu_unit_reservation_p (state
, _1mib_
))
8903 else if (cpu_unit_reservation_p (state
, _1mbb_
))
8905 else if (cpu_unit_reservation_p (state
, _1bbb_
))
8907 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
8909 else if (cpu_unit_reservation_p (state
, _1mfi_
))
8911 else if (cpu_unit_reservation_p (state
, _1mfb_
))
8913 else if (cpu_unit_reservation_p (state
, _1mlx_
))
8922 /* True when INSN is important for bundling. */
8925 important_for_bundling_p (rtx_insn
*insn
)
8927 return (INSN_P (insn
)
8928 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
8929 && GET_CODE (PATTERN (insn
)) != USE
8930 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8933 /* The following function returns an insn important for insn bundling
8934 followed by INSN and before TAIL. */
8937 get_next_important_insn (rtx_insn
*insn
, rtx_insn
*tail
)
8939 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
8940 if (important_for_bundling_p (insn
))
8945 /* True when INSN is unknown, but important, for bundling. */
8948 unknown_for_bundling_p (rtx_insn
*insn
)
8950 return (INSN_P (insn
)
8951 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_UNKNOWN
8952 && GET_CODE (PATTERN (insn
)) != USE
8953 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8956 /* Add a bundle selector TEMPLATE0 before INSN. */
8959 ia64_add_bundle_selector_before (int template0
, rtx_insn
*insn
)
8961 rtx b
= gen_bundle_selector (GEN_INT (template0
));
8963 ia64_emit_insn_before (b
, insn
);
8964 #if NR_BUNDLES == 10
8965 if ((template0
== 4 || template0
== 5)
8966 && ia64_except_unwind_info (&global_options
) == UI_TARGET
)
8969 rtx note
= NULL_RTX
;
8971 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8972 first or second slot. If it is and has REG_EH_NOTE set, copy it
8973 to following nops, as br.call sets rp to the address of following
8974 bundle and therefore an EH region end must be on a bundle
8976 insn
= PREV_INSN (insn
);
8977 for (i
= 0; i
< 3; i
++)
8980 insn
= next_active_insn (insn
);
8981 while (NONJUMP_INSN_P (insn
)
8982 && get_attr_empty (insn
) == EMPTY_YES
);
8984 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
8989 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
8990 || code
== CODE_FOR_nop_b
);
8991 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
8994 add_reg_note (insn
, REG_EH_REGION
, XEXP (note
, 0));
9001 /* The following function does insn bundling. Bundling means
9002 inserting templates and nop insns to fit insn groups into permitted
9003 templates. Instruction scheduling uses NDFA (non-deterministic
9004 finite automata) encoding informations about the templates and the
9005 inserted nops. Nondeterminism of the automata permits follows
9006 all possible insn sequences very fast.
9008 Unfortunately it is not possible to get information about inserting
9009 nop insns and used templates from the automata states. The
9010 automata only says that we can issue an insn possibly inserting
9011 some nops before it and using some template. Therefore insn
9012 bundling in this function is implemented by using DFA
9013 (deterministic finite automata). We follow all possible insn
9014 sequences by inserting 0-2 nops (that is what the NDFA describe for
9015 insn scheduling) before/after each insn being bundled. We know the
9016 start of simulated processor cycle from insn scheduling (insn
9017 starting a new cycle has TImode).
9019 Simple implementation of insn bundling would create enormous
9020 number of possible insn sequences satisfying information about new
9021 cycle ticks taken from the insn scheduling. To make the algorithm
9022 practical we use dynamic programming. Each decision (about
9023 inserting nops and implicitly about previous decisions) is described
9024 by structure bundle_state (see above). If we generate the same
9025 bundle state (key is automaton state after issuing the insns and
9026 nops for it), we reuse already generated one. As consequence we
9027 reject some decisions which cannot improve the solution and
9028 reduce memory for the algorithm.
9030 When we reach the end of EBB (extended basic block), we choose the
9031 best sequence and then, moving back in EBB, insert templates for
9032 the best alternative. The templates are taken from querying
9033 automaton state for each insn in chosen bundle states.
9035 So the algorithm makes two (forward and backward) passes through
9039 bundling (FILE *dump
, int verbose
, rtx_insn
*prev_head_insn
, rtx_insn
*tail
)
9041 struct bundle_state
*curr_state
, *next_state
, *best_state
;
9042 rtx_insn
*insn
, *next_insn
;
9044 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
9045 int pos
= 0, max_pos
, template0
, template1
;
9047 enum attr_type type
;
9050 /* Count insns in the EBB. */
9051 for (insn
= NEXT_INSN (prev_head_insn
);
9052 insn
&& insn
!= tail
;
9053 insn
= NEXT_INSN (insn
))
9059 dfa_clean_insn_cache ();
9060 initiate_bundle_state_table ();
9061 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
9062 /* First (forward) pass -- generation of bundle states. */
9063 curr_state
= get_free_bundle_state ();
9064 curr_state
->insn
= NULL
;
9065 curr_state
->before_nops_num
= 0;
9066 curr_state
->after_nops_num
= 0;
9067 curr_state
->insn_num
= 0;
9068 curr_state
->cost
= 0;
9069 curr_state
->accumulated_insns_num
= 0;
9070 curr_state
->branch_deviation
= 0;
9071 curr_state
->middle_bundle_stops
= 0;
9072 curr_state
->next
= NULL
;
9073 curr_state
->originator
= NULL
;
9074 state_reset (curr_state
->dfa_state
);
9075 index_to_bundle_states
[0] = curr_state
;
9077 /* Shift cycle mark if it is put on insn which could be ignored. */
9078 for (insn
= NEXT_INSN (prev_head_insn
);
9080 insn
= NEXT_INSN (insn
))
9082 && !important_for_bundling_p (insn
)
9083 && GET_MODE (insn
) == TImode
)
9085 PUT_MODE (insn
, VOIDmode
);
9086 for (next_insn
= NEXT_INSN (insn
);
9088 next_insn
= NEXT_INSN (next_insn
))
9089 if (important_for_bundling_p (next_insn
)
9090 && INSN_CODE (next_insn
) != CODE_FOR_insn_group_barrier
)
9092 PUT_MODE (next_insn
, TImode
);
9096 /* Forward pass: generation of bundle states. */
9097 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
9101 gcc_assert (important_for_bundling_p (insn
));
9102 type
= ia64_safe_type (insn
);
9103 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
9105 index_to_bundle_states
[insn_num
] = NULL
;
9106 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
9108 curr_state
= next_state
)
9110 pos
= curr_state
->accumulated_insns_num
% 3;
9111 next_state
= curr_state
->next
;
9112 /* We must fill up the current bundle in order to start a
9113 subsequent asm insn in a new bundle. Asm insn is always
9114 placed in a separate bundle. */
9116 = (next_insn
!= NULL_RTX
9117 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
9118 && unknown_for_bundling_p (next_insn
));
9119 /* We may fill up the current bundle if it is the cycle end
9120 without a group barrier. */
9122 = (only_bundle_end_p
|| next_insn
== NULL_RTX
9123 || (GET_MODE (next_insn
) == TImode
9124 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
9125 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
9127 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
9129 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
9131 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
9134 gcc_assert (index_to_bundle_states
[insn_num
]);
9135 for (curr_state
= index_to_bundle_states
[insn_num
];
9137 curr_state
= curr_state
->next
)
9138 if (verbose
>= 2 && dump
)
9140 /* This structure is taken from generated code of the
9141 pipeline hazard recognizer (see file insn-attrtab.c).
9142 Please don't forget to change the structure if a new
9143 automaton is added to .md file. */
9146 unsigned short one_automaton_state
;
9147 unsigned short oneb_automaton_state
;
9148 unsigned short two_automaton_state
;
9149 unsigned short twob_automaton_state
;
9154 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9155 curr_state
->unique_num
,
9156 (curr_state
->originator
== NULL
9157 ? -1 : curr_state
->originator
->unique_num
),
9159 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9160 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9161 curr_state
->middle_bundle_stops
,
9162 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9167 /* We should find a solution because the 2nd insn scheduling has
9169 gcc_assert (index_to_bundle_states
[insn_num
]);
9170 /* Find a state corresponding to the best insn sequence. */
9172 for (curr_state
= index_to_bundle_states
[insn_num
];
9174 curr_state
= curr_state
->next
)
9175 /* We are just looking at the states with fully filled up last
9176 bundle. The first we prefer insn sequences with minimal cost
9177 then with minimal inserted nops and finally with branch insns
9178 placed in the 3rd slots. */
9179 if (curr_state
->accumulated_insns_num
% 3 == 0
9180 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
9181 || (best_state
->cost
== curr_state
->cost
9182 && (curr_state
->accumulated_insns_num
9183 < best_state
->accumulated_insns_num
9184 || (curr_state
->accumulated_insns_num
9185 == best_state
->accumulated_insns_num
9186 && (curr_state
->branch_deviation
9187 < best_state
->branch_deviation
9188 || (curr_state
->branch_deviation
9189 == best_state
->branch_deviation
9190 && curr_state
->middle_bundle_stops
9191 < best_state
->middle_bundle_stops
)))))))
9192 best_state
= curr_state
;
9193 /* Second (backward) pass: adding nops and templates. */
9194 gcc_assert (best_state
);
9195 insn_num
= best_state
->before_nops_num
;
9196 template0
= template1
= -1;
9197 for (curr_state
= best_state
;
9198 curr_state
->originator
!= NULL
;
9199 curr_state
= curr_state
->originator
)
9201 insn
= curr_state
->insn
;
9202 asm_p
= unknown_for_bundling_p (insn
);
9204 if (verbose
>= 2 && dump
)
9208 unsigned short one_automaton_state
;
9209 unsigned short oneb_automaton_state
;
9210 unsigned short two_automaton_state
;
9211 unsigned short twob_automaton_state
;
9216 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9217 curr_state
->unique_num
,
9218 (curr_state
->originator
== NULL
9219 ? -1 : curr_state
->originator
->unique_num
),
9221 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9222 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9223 curr_state
->middle_bundle_stops
,
9224 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9227 /* Find the position in the current bundle window. The window can
9228 contain at most two bundles. Two bundle window means that
9229 the processor will make two bundle rotation. */
9230 max_pos
= get_max_pos (curr_state
->dfa_state
);
9232 /* The following (negative template number) means that the
9233 processor did one bundle rotation. */
9234 || (max_pos
== 3 && template0
< 0))
9236 /* We are at the end of the window -- find template(s) for
9240 template0
= get_template (curr_state
->dfa_state
, 3);
9243 template1
= get_template (curr_state
->dfa_state
, 3);
9244 template0
= get_template (curr_state
->dfa_state
, 6);
9247 if (max_pos
> 3 && template1
< 0)
9248 /* It may happen when we have the stop inside a bundle. */
9250 gcc_assert (pos
<= 3);
9251 template1
= get_template (curr_state
->dfa_state
, 3);
9255 /* Emit nops after the current insn. */
9256 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
9258 rtx nop_pat
= gen_nop ();
9259 rtx_insn
*nop
= emit_insn_after (nop_pat
, insn
);
9261 gcc_assert (pos
>= 0);
9264 /* We are at the start of a bundle: emit the template
9265 (it should be defined). */
9266 gcc_assert (template0
>= 0);
9267 ia64_add_bundle_selector_before (template0
, nop
);
9268 /* If we have two bundle window, we make one bundle
9269 rotation. Otherwise template0 will be undefined
9270 (negative value). */
9271 template0
= template1
;
9275 /* Move the position backward in the window. Group barrier has
9276 no slot. Asm insn takes all bundle. */
9277 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9278 && !unknown_for_bundling_p (insn
))
9280 /* Long insn takes 2 slots. */
9281 if (ia64_safe_type (insn
) == TYPE_L
)
9283 gcc_assert (pos
>= 0);
9285 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9286 && !unknown_for_bundling_p (insn
))
9288 /* The current insn is at the bundle start: emit the
9290 gcc_assert (template0
>= 0);
9291 ia64_add_bundle_selector_before (template0
, insn
);
9292 b
= PREV_INSN (insn
);
9294 /* See comment above in analogous place for emitting nops
9296 template0
= template1
;
9299 /* Emit nops after the current insn. */
9300 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
9302 rtx nop_pat
= gen_nop ();
9303 ia64_emit_insn_before (nop_pat
, insn
);
9304 rtx_insn
*nop
= PREV_INSN (insn
);
9307 gcc_assert (pos
>= 0);
9310 /* See comment above in analogous place for emitting nops
9312 gcc_assert (template0
>= 0);
9313 ia64_add_bundle_selector_before (template0
, insn
);
9314 b
= PREV_INSN (insn
);
9316 template0
= template1
;
9322 #ifdef ENABLE_CHECKING
9324 /* Assert right calculation of middle_bundle_stops. */
9325 int num
= best_state
->middle_bundle_stops
;
9326 bool start_bundle
= true, end_bundle
= false;
9328 for (insn
= NEXT_INSN (prev_head_insn
);
9329 insn
&& insn
!= tail
;
9330 insn
= NEXT_INSN (insn
))
9334 if (recog_memoized (insn
) == CODE_FOR_bundle_selector
)
9335 start_bundle
= true;
9338 rtx_insn
*next_insn
;
9340 for (next_insn
= NEXT_INSN (insn
);
9341 next_insn
&& next_insn
!= tail
;
9342 next_insn
= NEXT_INSN (next_insn
))
9343 if (INSN_P (next_insn
)
9344 && (ia64_safe_itanium_class (next_insn
)
9345 != ITANIUM_CLASS_IGNORE
9346 || recog_memoized (next_insn
)
9347 == CODE_FOR_bundle_selector
)
9348 && GET_CODE (PATTERN (next_insn
)) != USE
9349 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
9352 end_bundle
= next_insn
== NULL_RTX
9353 || next_insn
== tail
9354 || (INSN_P (next_insn
)
9355 && recog_memoized (next_insn
)
9356 == CODE_FOR_bundle_selector
);
9357 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
9358 && !start_bundle
&& !end_bundle
9360 && !unknown_for_bundling_p (next_insn
))
9363 start_bundle
= false;
9367 gcc_assert (num
== 0);
9371 free (index_to_bundle_states
);
9372 finish_bundle_state_table ();
9374 dfa_clean_insn_cache ();
9377 /* The following function is called at the end of scheduling BB or
9378 EBB. After reload, it inserts stop bits and does insn bundling. */
9381 ia64_sched_finish (FILE *dump
, int sched_verbose
)
9384 fprintf (dump
, "// Finishing schedule.\n");
9385 if (!reload_completed
)
9387 if (reload_completed
)
9389 final_emit_insn_group_barriers (dump
);
9390 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
9391 current_sched_info
->next_tail
);
9392 if (sched_verbose
&& dump
)
9393 fprintf (dump
, "// finishing %d-%d\n",
9394 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
9395 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
9401 /* The following function inserts stop bits in scheduled BB or EBB. */
9404 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
9407 int need_barrier_p
= 0;
9408 int seen_good_insn
= 0;
9410 init_insn_group_barriers ();
9412 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
9413 insn
!= current_sched_info
->next_tail
;
9414 insn
= NEXT_INSN (insn
))
9416 if (BARRIER_P (insn
))
9418 rtx_insn
*last
= prev_active_insn (insn
);
9422 if (JUMP_TABLE_DATA_P (last
))
9423 last
= prev_active_insn (last
);
9424 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9425 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
9427 init_insn_group_barriers ();
9431 else if (NONDEBUG_INSN_P (insn
))
9433 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
9435 init_insn_group_barriers ();
9439 else if (need_barrier_p
|| group_barrier_needed (insn
)
9440 || (mflag_sched_stop_bits_after_every_cycle
9441 && GET_MODE (insn
) == TImode
9444 if (TARGET_EARLY_STOP_BITS
)
9449 last
!= current_sched_info
->prev_head
;
9450 last
= PREV_INSN (last
))
9451 if (INSN_P (last
) && GET_MODE (last
) == TImode
9452 && stops_p
[INSN_UID (last
)])
9454 if (last
== current_sched_info
->prev_head
)
9456 last
= prev_active_insn (last
);
9458 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9459 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9461 init_insn_group_barriers ();
9462 for (last
= NEXT_INSN (last
);
9464 last
= NEXT_INSN (last
))
9467 group_barrier_needed (last
);
9468 if (recog_memoized (last
) >= 0
9469 && important_for_bundling_p (last
))
9475 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9477 init_insn_group_barriers ();
9480 group_barrier_needed (insn
);
9481 if (recog_memoized (insn
) >= 0
9482 && important_for_bundling_p (insn
))
9485 else if (recog_memoized (insn
) >= 0
9486 && important_for_bundling_p (insn
))
9488 need_barrier_p
= (CALL_P (insn
) || unknown_for_bundling_p (insn
));
9495 /* If the following function returns TRUE, we will use the DFA
9499 ia64_first_cycle_multipass_dfa_lookahead (void)
9501 return (reload_completed
? 6 : 4);
9504 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9507 ia64_init_dfa_pre_cycle_insn (void)
9509 if (temp_dfa_state
== NULL
)
9511 dfa_state_size
= state_size ();
9512 temp_dfa_state
= xmalloc (dfa_state_size
);
9513 prev_cycle_state
= xmalloc (dfa_state_size
);
9515 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
9516 SET_PREV_INSN (dfa_pre_cycle_insn
) = SET_NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
9517 recog_memoized (dfa_pre_cycle_insn
);
9518 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9519 SET_PREV_INSN (dfa_stop_insn
) = SET_NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
9520 recog_memoized (dfa_stop_insn
);
9523 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9524 used by the DFA insn scheduler. */
9527 ia64_dfa_pre_cycle_insn (void)
9529 return dfa_pre_cycle_insn
;
9532 /* The following function returns TRUE if PRODUCER (of type ilog or
9533 ld) produces address for CONSUMER (of type st or stf). */
9536 ia64_st_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9540 gcc_assert (producer
&& consumer
);
9541 dest
= ia64_single_set (producer
);
9543 reg
= SET_DEST (dest
);
9545 if (GET_CODE (reg
) == SUBREG
)
9546 reg
= SUBREG_REG (reg
);
9547 gcc_assert (GET_CODE (reg
) == REG
);
9549 dest
= ia64_single_set (consumer
);
9551 mem
= SET_DEST (dest
);
9552 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
9553 return reg_mentioned_p (reg
, mem
);
9556 /* The following function returns TRUE if PRODUCER (of type ilog or
9557 ld) produces address for CONSUMER (of type ld or fld). */
9560 ia64_ld_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9562 rtx dest
, src
, reg
, mem
;
9564 gcc_assert (producer
&& consumer
);
9565 dest
= ia64_single_set (producer
);
9567 reg
= SET_DEST (dest
);
9569 if (GET_CODE (reg
) == SUBREG
)
9570 reg
= SUBREG_REG (reg
);
9571 gcc_assert (GET_CODE (reg
) == REG
);
9573 src
= ia64_single_set (consumer
);
9575 mem
= SET_SRC (src
);
9578 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
9579 mem
= XVECEXP (mem
, 0, 0);
9580 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
9581 /* ??? Is this bypass necessary for ld.c? */
9583 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
9584 mem
= XEXP (mem
, 1);
9587 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
9588 mem
= XEXP (mem
, 0);
9590 if (GET_CODE (mem
) == UNSPEC
)
9592 int c
= XINT (mem
, 1);
9594 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDS_A
9595 || c
== UNSPEC_LDSA
);
9596 mem
= XVECEXP (mem
, 0, 0);
9599 /* Note that LO_SUM is used for GOT loads. */
9600 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
9602 return reg_mentioned_p (reg
, mem
);
9605 /* The following function returns TRUE if INSN produces address for a
9606 load/store insn. We will place such insns into M slot because it
9607 decreases its latency time. */
9610 ia64_produce_address_p (rtx insn
)
9616 /* Emit pseudo-ops for the assembler to describe predicate relations.
9617 At present this assumes that we only consider predicate pairs to
9618 be mutex, and that the assembler can deduce proper values from
9619 straight-line code. */
9622 emit_predicate_relation_info (void)
9626 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9629 rtx_insn
*head
= BB_HEAD (bb
);
9631 /* We only need such notes at code labels. */
9632 if (! LABEL_P (head
))
9634 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
9635 head
= NEXT_INSN (head
);
9637 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9638 grabbing the entire block of predicate registers. */
9639 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
9640 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
9642 rtx p
= gen_rtx_REG (BImode
, r
);
9643 rtx_insn
*n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
9644 if (head
== BB_END (bb
))
9650 /* Look for conditional calls that do not return, and protect predicate
9651 relations around them. Otherwise the assembler will assume the call
9652 returns, and complain about uses of call-clobbered predicates after
9654 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9656 rtx_insn
*insn
= BB_HEAD (bb
);
9661 && GET_CODE (PATTERN (insn
)) == COND_EXEC
9662 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
9665 emit_insn_before (gen_safe_across_calls_all (), insn
);
9666 rtx_insn
*a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
9667 if (BB_HEAD (bb
) == insn
)
9669 if (BB_END (bb
) == insn
)
9673 if (insn
== BB_END (bb
))
9675 insn
= NEXT_INSN (insn
);
9680 /* Perform machine dependent operations on the rtl chain INSNS. */
9685 /* We are freeing block_for_insn in the toplev to keep compatibility
9686 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9687 compute_bb_for_insn ();
9689 /* If optimizing, we'll have split before scheduling. */
9693 if (optimize
&& flag_schedule_insns_after_reload
9694 && dbg_cnt (ia64_sched2
))
9697 timevar_push (TV_SCHED2
);
9698 ia64_final_schedule
= 1;
9700 /* We can't let modulo-sched prevent us from scheduling any bbs,
9701 since we need the final schedule to produce bundle information. */
9702 FOR_EACH_BB_FN (bb
, cfun
)
9703 bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
9705 initiate_bundle_states ();
9706 ia64_nop
= make_insn_raw (gen_nop ());
9707 SET_PREV_INSN (ia64_nop
) = SET_NEXT_INSN (ia64_nop
) = NULL_RTX
;
9708 recog_memoized (ia64_nop
);
9709 clocks_length
= get_max_uid () + 1;
9710 stops_p
= XCNEWVEC (char, clocks_length
);
9712 if (ia64_tune
== PROCESSOR_ITANIUM2
)
9714 pos_1
= get_cpu_unit_code ("2_1");
9715 pos_2
= get_cpu_unit_code ("2_2");
9716 pos_3
= get_cpu_unit_code ("2_3");
9717 pos_4
= get_cpu_unit_code ("2_4");
9718 pos_5
= get_cpu_unit_code ("2_5");
9719 pos_6
= get_cpu_unit_code ("2_6");
9720 _0mii_
= get_cpu_unit_code ("2b_0mii.");
9721 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
9722 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
9723 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
9724 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
9725 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
9726 _0mib_
= get_cpu_unit_code ("2b_0mib.");
9727 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
9728 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
9729 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
9730 _1mii_
= get_cpu_unit_code ("2b_1mii.");
9731 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
9732 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
9733 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
9734 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
9735 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
9736 _1mib_
= get_cpu_unit_code ("2b_1mib.");
9737 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
9738 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
9739 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
9743 pos_1
= get_cpu_unit_code ("1_1");
9744 pos_2
= get_cpu_unit_code ("1_2");
9745 pos_3
= get_cpu_unit_code ("1_3");
9746 pos_4
= get_cpu_unit_code ("1_4");
9747 pos_5
= get_cpu_unit_code ("1_5");
9748 pos_6
= get_cpu_unit_code ("1_6");
9749 _0mii_
= get_cpu_unit_code ("1b_0mii.");
9750 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
9751 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
9752 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
9753 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
9754 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
9755 _0mib_
= get_cpu_unit_code ("1b_0mib.");
9756 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
9757 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
9758 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
9759 _1mii_
= get_cpu_unit_code ("1b_1mii.");
9760 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
9761 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
9762 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
9763 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
9764 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
9765 _1mib_
= get_cpu_unit_code ("1b_1mib.");
9766 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
9767 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
9768 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
9771 if (flag_selective_scheduling2
9772 && !maybe_skip_selective_scheduling ())
9773 run_selective_scheduling ();
9777 /* Redo alignment computation, as it might gone wrong. */
9778 compute_alignments ();
9780 /* We cannot reuse this one because it has been corrupted by the
9782 finish_bundle_states ();
9785 emit_insn_group_barriers (dump_file
);
9787 ia64_final_schedule
= 0;
9788 timevar_pop (TV_SCHED2
);
9791 emit_all_insn_group_barriers (dump_file
);
9795 /* A call must not be the last instruction in a function, so that the
9796 return address is still within the function, so that unwinding works
9797 properly. Note that IA-64 differs from dwarf2 on this point. */
9798 if (ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9803 insn
= get_last_insn ();
9804 if (! INSN_P (insn
))
9805 insn
= prev_active_insn (insn
);
9808 /* Skip over insns that expand to nothing. */
9809 while (NONJUMP_INSN_P (insn
)
9810 && get_attr_empty (insn
) == EMPTY_YES
)
9812 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
9813 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
9815 insn
= prev_active_insn (insn
);
9820 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9821 emit_insn (gen_break_f ());
9822 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9827 emit_predicate_relation_info ();
9829 if (flag_var_tracking
)
9831 timevar_push (TV_VAR_TRACKING
);
9832 variable_tracking_main ();
9833 timevar_pop (TV_VAR_TRACKING
);
9835 df_finish_pass (false);
9838 /* Return true if REGNO is used by the epilogue. */
9841 ia64_epilogue_uses (int regno
)
9846 /* With a call to a function in another module, we will write a new
9847 value to "gp". After returning from such a call, we need to make
9848 sure the function restores the original gp-value, even if the
9849 function itself does not use the gp anymore. */
9850 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
9852 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9853 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9854 /* For functions defined with the syscall_linkage attribute, all
9855 input registers are marked as live at all function exits. This
9856 prevents the register allocator from using the input registers,
9857 which in turn makes it possible to restart a system call after
9858 an interrupt without having to save/restore the input registers.
9859 This also prevents kernel data from leaking to application code. */
9860 return lookup_attribute ("syscall_linkage",
9861 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
9864 /* Conditional return patterns can't represent the use of `b0' as
9865 the return address, so we force the value live this way. */
9869 /* Likewise for ar.pfs, which is used by br.ret. */
9877 /* Return true if REGNO is used by the frame unwinder. */
9880 ia64_eh_uses (int regno
)
9884 if (! reload_completed
)
9890 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
9891 if (regno
== current_frame_info
.r
[r
]
9892 || regno
== emitted_frame_related_regs
[r
])
9898 /* Return true if this goes in small data/bss. */
9900 /* ??? We could also support own long data here. Generating movl/add/ld8
9901 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9902 code faster because there is one less load. This also includes incomplete
9903 types which can't go in sdata/sbss. */
9906 ia64_in_small_data_p (const_tree exp
)
9908 if (TARGET_NO_SDATA
)
9911 /* We want to merge strings, so we never consider them small data. */
9912 if (TREE_CODE (exp
) == STRING_CST
)
9915 /* Functions are never small data. */
9916 if (TREE_CODE (exp
) == FUNCTION_DECL
)
9919 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
9921 const char *section
= DECL_SECTION_NAME (exp
);
9923 if (strcmp (section
, ".sdata") == 0
9924 || strncmp (section
, ".sdata.", 7) == 0
9925 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
9926 || strcmp (section
, ".sbss") == 0
9927 || strncmp (section
, ".sbss.", 6) == 0
9928 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
9933 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
9935 /* If this is an incomplete type with size 0, then we can't put it
9936 in sdata because it might be too big when completed. */
9937 if (size
> 0 && size
<= ia64_section_threshold
)
9944 /* Output assembly directives for prologue regions. */
9946 /* The current basic block number. */
9948 static bool last_block
;
9950 /* True if we need a copy_state command at the start of the next block. */
9952 static bool need_copy_state
;
9954 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9955 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9958 /* The function emits unwind directives for the start of an epilogue. */
9961 process_epilogue (FILE *asm_out_file
, rtx insn ATTRIBUTE_UNUSED
,
9962 bool unwind
, bool frame ATTRIBUTE_UNUSED
)
9964 /* If this isn't the last block of the function, then we need to label the
9965 current state, and copy it back in at the start of the next block. */
9970 fprintf (asm_out_file
, "\t.label_state %d\n",
9971 ++cfun
->machine
->state_num
);
9972 need_copy_state
= true;
9976 fprintf (asm_out_file
, "\t.restore sp\n");
9979 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9982 process_cfa_adjust_cfa (FILE *asm_out_file
, rtx pat
, rtx insn
,
9983 bool unwind
, bool frame
)
9985 rtx dest
= SET_DEST (pat
);
9986 rtx src
= SET_SRC (pat
);
9988 if (dest
== stack_pointer_rtx
)
9990 if (GET_CODE (src
) == PLUS
)
9992 rtx op0
= XEXP (src
, 0);
9993 rtx op1
= XEXP (src
, 1);
9995 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
9997 if (INTVAL (op1
) < 0)
9999 gcc_assert (!frame_pointer_needed
);
10001 fprintf (asm_out_file
,
10002 "\t.fframe " HOST_WIDE_INT_PRINT_DEC
"\n",
10006 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10010 gcc_assert (src
== hard_frame_pointer_rtx
);
10011 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10014 else if (dest
== hard_frame_pointer_rtx
)
10016 gcc_assert (src
== stack_pointer_rtx
);
10017 gcc_assert (frame_pointer_needed
);
10020 fprintf (asm_out_file
, "\t.vframe r%d\n",
10021 ia64_dbx_register_number (REGNO (dest
)));
10024 gcc_unreachable ();
10027 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10030 process_cfa_register (FILE *asm_out_file
, rtx pat
, bool unwind
)
10032 rtx dest
= SET_DEST (pat
);
10033 rtx src
= SET_SRC (pat
);
10034 int dest_regno
= REGNO (dest
);
10039 /* Saving return address pointer. */
10041 fprintf (asm_out_file
, "\t.save rp, r%d\n",
10042 ia64_dbx_register_number (dest_regno
));
10046 src_regno
= REGNO (src
);
10051 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
10053 fprintf (asm_out_file
, "\t.save pr, r%d\n",
10054 ia64_dbx_register_number (dest_regno
));
10057 case AR_UNAT_REGNUM
:
10058 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
10060 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
10061 ia64_dbx_register_number (dest_regno
));
10065 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
10067 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
10068 ia64_dbx_register_number (dest_regno
));
10072 /* Everything else should indicate being stored to memory. */
10073 gcc_unreachable ();
10077 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10080 process_cfa_offset (FILE *asm_out_file
, rtx pat
, bool unwind
)
10082 rtx dest
= SET_DEST (pat
);
10083 rtx src
= SET_SRC (pat
);
10084 int src_regno
= REGNO (src
);
10085 const char *saveop
;
10089 gcc_assert (MEM_P (dest
));
10090 if (GET_CODE (XEXP (dest
, 0)) == REG
)
10092 base
= XEXP (dest
, 0);
10097 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
10098 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
10099 base
= XEXP (XEXP (dest
, 0), 0);
10100 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
10103 if (base
== hard_frame_pointer_rtx
)
10105 saveop
= ".savepsp";
10110 gcc_assert (base
== stack_pointer_rtx
);
10111 saveop
= ".savesp";
10114 src_regno
= REGNO (src
);
10118 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
10120 fprintf (asm_out_file
, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC
"\n",
10125 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
10127 fprintf (asm_out_file
, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC
"\n",
10132 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
10134 fprintf (asm_out_file
, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC
"\n",
10138 case AR_PFS_REGNUM
:
10139 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
10141 fprintf (asm_out_file
, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC
"\n",
10145 case AR_UNAT_REGNUM
:
10146 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
10148 fprintf (asm_out_file
, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC
"\n",
10157 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
10158 1 << (src_regno
- GR_REG (4)));
10167 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
10168 1 << (src_regno
- BR_REG (1)));
10176 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
10177 1 << (src_regno
- FR_REG (2)));
10180 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10181 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10182 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10183 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10185 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
10186 1 << (src_regno
- FR_REG (12)));
10190 /* ??? For some reason we mark other general registers, even those
10191 we can't represent in the unwind info. Ignore them. */
10196 /* This function looks at a single insn and emits any directives
10197 required to unwind this insn. */
10200 ia64_asm_unwind_emit (FILE *asm_out_file
, rtx_insn
*insn
)
10202 bool unwind
= ia64_except_unwind_info (&global_options
) == UI_TARGET
;
10203 bool frame
= dwarf2out_do_frame ();
10207 if (!unwind
&& !frame
)
10210 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
10212 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
10213 == EXIT_BLOCK_PTR_FOR_FN (cfun
);
10215 /* Restore unwind state from immediately before the epilogue. */
10216 if (need_copy_state
)
10220 fprintf (asm_out_file
, "\t.body\n");
10221 fprintf (asm_out_file
, "\t.copy_state %d\n",
10222 cfun
->machine
->state_num
);
10224 need_copy_state
= false;
10228 if (NOTE_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
10231 /* Look for the ALLOC insn. */
10232 if (INSN_CODE (insn
) == CODE_FOR_alloc
)
10234 rtx dest
= SET_DEST (XVECEXP (PATTERN (insn
), 0, 0));
10235 int dest_regno
= REGNO (dest
);
10237 /* If this is the final destination for ar.pfs, then this must
10238 be the alloc in the prologue. */
10239 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
10242 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
10243 ia64_dbx_register_number (dest_regno
));
10247 /* This must be an alloc before a sibcall. We must drop the
10248 old frame info. The easiest way to drop the old frame
10249 info is to ensure we had a ".restore sp" directive
10250 followed by a new prologue. If the procedure doesn't
10251 have a memory-stack frame, we'll issue a dummy ".restore
10253 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
10254 /* if haven't done process_epilogue() yet, do it now */
10255 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10257 fprintf (asm_out_file
, "\t.prologue\n");
10262 handled_one
= false;
10263 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
10264 switch (REG_NOTE_KIND (note
))
10266 case REG_CFA_ADJUST_CFA
:
10267 pat
= XEXP (note
, 0);
10269 pat
= PATTERN (insn
);
10270 process_cfa_adjust_cfa (asm_out_file
, pat
, insn
, unwind
, frame
);
10271 handled_one
= true;
10274 case REG_CFA_OFFSET
:
10275 pat
= XEXP (note
, 0);
10277 pat
= PATTERN (insn
);
10278 process_cfa_offset (asm_out_file
, pat
, unwind
);
10279 handled_one
= true;
10282 case REG_CFA_REGISTER
:
10283 pat
= XEXP (note
, 0);
10285 pat
= PATTERN (insn
);
10286 process_cfa_register (asm_out_file
, pat
, unwind
);
10287 handled_one
= true;
10290 case REG_FRAME_RELATED_EXPR
:
10291 case REG_CFA_DEF_CFA
:
10292 case REG_CFA_EXPRESSION
:
10293 case REG_CFA_RESTORE
:
10294 case REG_CFA_SET_VDRAP
:
10295 /* Not used in the ia64 port. */
10296 gcc_unreachable ();
10299 /* Not a frame-related note. */
10303 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10304 explicit action to take. No guessing required. */
10305 gcc_assert (handled_one
);
10308 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10311 ia64_asm_emit_except_personality (rtx personality
)
10313 fputs ("\t.personality\t", asm_out_file
);
10314 output_addr_const (asm_out_file
, personality
);
10315 fputc ('\n', asm_out_file
);
10318 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10321 ia64_asm_init_sections (void)
10323 exception_section
= get_unnamed_section (0, output_section_asm_op
,
10327 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10329 static enum unwind_info_type
10330 ia64_debug_unwind_info (void)
10338 IA64_BUILTIN_COPYSIGNQ
,
10339 IA64_BUILTIN_FABSQ
,
10340 IA64_BUILTIN_FLUSHRS
,
10342 IA64_BUILTIN_HUGE_VALQ
,
10346 static GTY(()) tree ia64_builtins
[(int) IA64_BUILTIN_max
];
10349 ia64_init_builtins (void)
10355 /* The __fpreg type. */
10356 fpreg_type
= make_node (REAL_TYPE
);
10357 TYPE_PRECISION (fpreg_type
) = 82;
10358 layout_type (fpreg_type
);
10359 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
10361 /* The __float80 type. */
10362 float80_type
= make_node (REAL_TYPE
);
10363 TYPE_PRECISION (float80_type
) = 80;
10364 layout_type (float80_type
);
10365 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
10367 /* The __float128 type. */
10371 tree float128_type
= make_node (REAL_TYPE
);
10373 TYPE_PRECISION (float128_type
) = 128;
10374 layout_type (float128_type
);
10375 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
10377 /* TFmode support builtins. */
10378 ftype
= build_function_type_list (float128_type
, NULL_TREE
);
10379 decl
= add_builtin_function ("__builtin_infq", ftype
,
10380 IA64_BUILTIN_INFQ
, BUILT_IN_MD
,
10382 ia64_builtins
[IA64_BUILTIN_INFQ
] = decl
;
10384 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
10385 IA64_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
10387 ia64_builtins
[IA64_BUILTIN_HUGE_VALQ
] = decl
;
10389 ftype
= build_function_type_list (float128_type
,
10392 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
10393 IA64_BUILTIN_FABSQ
, BUILT_IN_MD
,
10394 "__fabstf2", NULL_TREE
);
10395 TREE_READONLY (decl
) = 1;
10396 ia64_builtins
[IA64_BUILTIN_FABSQ
] = decl
;
10398 ftype
= build_function_type_list (float128_type
,
10402 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
10403 IA64_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
10404 "__copysigntf3", NULL_TREE
);
10405 TREE_READONLY (decl
) = 1;
10406 ia64_builtins
[IA64_BUILTIN_COPYSIGNQ
] = decl
;
10409 /* Under HPUX, this is a synonym for "long double". */
10410 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
10413 /* Fwrite on VMS is non-standard. */
10414 #if TARGET_ABI_OPEN_VMS
10415 vms_patch_builtins ();
10418 #define def_builtin(name, type, code) \
10419 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10422 decl
= def_builtin ("__builtin_ia64_bsp",
10423 build_function_type_list (ptr_type_node
, NULL_TREE
),
10425 ia64_builtins
[IA64_BUILTIN_BSP
] = decl
;
10427 decl
= def_builtin ("__builtin_ia64_flushrs",
10428 build_function_type_list (void_type_node
, NULL_TREE
),
10429 IA64_BUILTIN_FLUSHRS
);
10430 ia64_builtins
[IA64_BUILTIN_FLUSHRS
] = decl
;
10436 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
10437 set_user_assembler_name (decl
, "_Isfinite");
10438 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
10439 set_user_assembler_name (decl
, "_Isfinitef");
10440 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEL
)) != NULL_TREE
)
10441 set_user_assembler_name (decl
, "_Isfinitef128");
10446 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
10447 machine_mode mode ATTRIBUTE_UNUSED
,
10448 int ignore ATTRIBUTE_UNUSED
)
10450 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
10451 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
10455 case IA64_BUILTIN_BSP
:
10456 if (! target
|| ! register_operand (target
, DImode
))
10457 target
= gen_reg_rtx (DImode
);
10458 emit_insn (gen_bsp_value (target
));
10459 #ifdef POINTERS_EXTEND_UNSIGNED
10460 target
= convert_memory_address (ptr_mode
, target
);
10464 case IA64_BUILTIN_FLUSHRS
:
10465 emit_insn (gen_flushrs ());
10468 case IA64_BUILTIN_INFQ
:
10469 case IA64_BUILTIN_HUGE_VALQ
:
10471 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
10472 REAL_VALUE_TYPE inf
;
10476 tmp
= const_double_from_real_value (inf
, target_mode
);
10478 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
10481 target
= gen_reg_rtx (target_mode
);
10483 emit_move_insn (target
, tmp
);
10487 case IA64_BUILTIN_FABSQ
:
10488 case IA64_BUILTIN_COPYSIGNQ
:
10489 return expand_call (exp
, target
, ignore
);
10492 gcc_unreachable ();
10498 /* Return the ia64 builtin for CODE. */
10501 ia64_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
10503 if (code
>= IA64_BUILTIN_max
)
10504 return error_mark_node
;
10506 return ia64_builtins
[code
];
10509 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10510 most significant bits of the stack slot. */
10513 ia64_hpux_function_arg_padding (machine_mode mode
, const_tree type
)
10515 /* Exception to normal case for structures/unions/etc. */
10517 if (type
&& AGGREGATE_TYPE_P (type
)
10518 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
10521 /* Fall back to the default. */
10522 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
10525 /* Emit text to declare externally defined variables and functions, because
10526 the Intel assembler does not support undefined externals. */
10529 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
10531 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10532 set in order to avoid putting out names that are never really
10534 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
10536 /* maybe_assemble_visibility will return 1 if the assembler
10537 visibility directive is output. */
10538 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
10539 && maybe_assemble_visibility (decl
));
10541 /* GNU as does not need anything here, but the HP linker does
10542 need something for external functions. */
10543 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
10544 && TREE_CODE (decl
) == FUNCTION_DECL
)
10545 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
10546 else if (need_visibility
&& !TARGET_GNU_AS
)
10547 (*targetm
.asm_out
.globalize_label
) (file
, name
);
10551 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10552 modes of word_mode and larger. Rename the TFmode libfuncs using the
10553 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10554 backward compatibility. */
10557 ia64_init_libfuncs (void)
10559 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
10560 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
10561 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
10562 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
10564 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
10565 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
10566 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
10567 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
10568 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
10570 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
10571 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
10572 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
10573 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
10574 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
10575 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
10577 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
10578 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
10579 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
10580 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
10581 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
10583 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
10584 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
10585 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
10586 /* HP-UX 11.23 libc does not have a function for unsigned
10587 SImode-to-TFmode conversion. */
10588 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
10591 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10594 ia64_hpux_init_libfuncs (void)
10596 ia64_init_libfuncs ();
10598 /* The HP SI millicode division and mod functions expect DI arguments.
10599 By turning them off completely we avoid using both libgcc and the
10600 non-standard millicode routines and use the HP DI millicode routines
10603 set_optab_libfunc (sdiv_optab
, SImode
, 0);
10604 set_optab_libfunc (udiv_optab
, SImode
, 0);
10605 set_optab_libfunc (smod_optab
, SImode
, 0);
10606 set_optab_libfunc (umod_optab
, SImode
, 0);
10608 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
10609 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
10610 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
10611 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
10613 /* HP-UX libc has TF min/max/abs routines in it. */
10614 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
10615 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
10616 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
10618 /* ia64_expand_compare uses this. */
10619 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
10621 /* These should never be used. */
10622 set_optab_libfunc (eq_optab
, TFmode
, 0);
10623 set_optab_libfunc (ne_optab
, TFmode
, 0);
10624 set_optab_libfunc (gt_optab
, TFmode
, 0);
10625 set_optab_libfunc (ge_optab
, TFmode
, 0);
10626 set_optab_libfunc (lt_optab
, TFmode
, 0);
10627 set_optab_libfunc (le_optab
, TFmode
, 0);
10630 /* Rename the division and modulus functions in VMS. */
10633 ia64_vms_init_libfuncs (void)
10635 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
10636 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
10637 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
10638 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
10639 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
10640 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
10641 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
10642 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
10643 abort_libfunc
= init_one_libfunc ("decc$abort");
10644 memcmp_libfunc
= init_one_libfunc ("decc$memcmp");
10645 #ifdef MEM_LIBFUNCS_INIT
10650 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10651 the HPUX conventions. */
10654 ia64_sysv4_init_libfuncs (void)
10656 ia64_init_libfuncs ();
10658 /* These functions are not part of the HPUX TFmode interface. We
10659 use them instead of _U_Qfcmp, which doesn't work the way we
10661 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
10662 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
10663 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
10664 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
10665 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
10666 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
10668 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10669 glibc doesn't have them. */
10675 ia64_soft_fp_init_libfuncs (void)
10680 ia64_vms_valid_pointer_mode (machine_mode mode
)
10682 return (mode
== SImode
|| mode
== DImode
);
10685 /* For HPUX, it is illegal to have relocations in shared segments. */
10688 ia64_hpux_reloc_rw_mask (void)
10693 /* For others, relax this so that relocations to local data goes in
10694 read-only segments, but we still cannot allow global relocations
10695 in read-only segments. */
10698 ia64_reloc_rw_mask (void)
10700 return flag_pic
? 3 : 2;
10703 /* Return the section to use for X. The only special thing we do here
10704 is to honor small data. */
10707 ia64_select_rtx_section (machine_mode mode
, rtx x
,
10708 unsigned HOST_WIDE_INT align
)
10710 if (GET_MODE_SIZE (mode
) > 0
10711 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
10712 && !TARGET_NO_SDATA
)
10713 return sdata_section
;
10715 return default_elf_select_rtx_section (mode
, x
, align
);
10718 static unsigned int
10719 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
10721 unsigned int flags
= 0;
10723 if (strcmp (name
, ".sdata") == 0
10724 || strncmp (name
, ".sdata.", 7) == 0
10725 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
10726 || strncmp (name
, ".sdata2.", 8) == 0
10727 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
10728 || strcmp (name
, ".sbss") == 0
10729 || strncmp (name
, ".sbss.", 6) == 0
10730 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
10731 flags
= SECTION_SMALL
;
10733 flags
|= default_section_type_flags (decl
, name
, reloc
);
10737 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10738 structure type and that the address of that type should be passed
10739 in out0, rather than in r8. */
10742 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
10744 tree ret_type
= TREE_TYPE (fntype
);
10746 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10747 as the structure return address parameter, if the return value
10748 type has a non-trivial copy constructor or destructor. It is not
10749 clear if this same convention should be used for other
10750 programming languages. Until G++ 3.4, we incorrectly used r8 for
10751 these return values. */
10752 return (abi_version_at_least (2)
10754 && TYPE_MODE (ret_type
) == BLKmode
10755 && TREE_ADDRESSABLE (ret_type
)
10756 && lang_GNU_CXX ());
10759 /* Output the assembler code for a thunk function. THUNK_DECL is the
10760 declaration for the thunk function itself, FUNCTION is the decl for
10761 the target function. DELTA is an immediate constant offset to be
10762 added to THIS. If VCALL_OFFSET is nonzero, the word at
10763 *(*this + vcall_offset) should be added to THIS. */
10766 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
10767 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
10770 rtx this_rtx
, funexp
;
10772 unsigned int this_parmno
;
10773 unsigned int this_regno
;
10776 reload_completed
= 1;
10777 epilogue_completed
= 1;
10779 /* Set things up as ia64_expand_prologue might. */
10780 last_scratch_gr_reg
= 15;
10782 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
10783 current_frame_info
.spill_cfa_off
= -16;
10784 current_frame_info
.n_input_regs
= 1;
10785 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
10787 /* Mark the end of the (empty) prologue. */
10788 emit_note (NOTE_INSN_PROLOGUE_END
);
10790 /* Figure out whether "this" will be the first parameter (the
10791 typical case) or the second parameter (as happens when the
10792 virtual function returns certain class objects). */
10794 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
10796 this_regno
= IN_REG (this_parmno
);
10797 if (!TARGET_REG_NAMES
)
10798 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
10800 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
10802 /* Apply the constant offset, if required. */
10803 delta_rtx
= GEN_INT (delta
);
10806 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
10807 REG_POINTER (tmp
) = 1;
10808 if (delta
&& satisfies_constraint_I (delta_rtx
))
10810 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
10814 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
10818 if (!satisfies_constraint_I (delta_rtx
))
10820 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10821 emit_move_insn (tmp
, delta_rtx
);
10824 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
10827 /* Apply the offset from the vtable, if required. */
10830 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
10831 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10835 rtx t
= gen_rtx_REG (ptr_mode
, 2);
10836 REG_POINTER (t
) = 1;
10837 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
10838 if (satisfies_constraint_I (vcall_offset_rtx
))
10840 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
10844 emit_insn (gen_ptr_extend (tmp
, t
));
10847 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
10851 if (!satisfies_constraint_J (vcall_offset_rtx
))
10853 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
10854 emit_move_insn (tmp2
, vcall_offset_rtx
);
10855 vcall_offset_rtx
= tmp2
;
10857 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
10861 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
10863 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
10865 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
10868 /* Generate a tail call to the target function. */
10869 if (! TREE_USED (function
))
10871 assemble_external (function
);
10872 TREE_USED (function
) = 1;
10874 funexp
= XEXP (DECL_RTL (function
), 0);
10875 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
10876 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
10877 insn
= get_last_insn ();
10878 SIBLING_CALL_P (insn
) = 1;
10880 /* Code generation for calls relies on splitting. */
10881 reload_completed
= 1;
10882 epilogue_completed
= 1;
10883 try_split (PATTERN (insn
), insn
, 0);
10887 /* Run just enough of rest_of_compilation to get the insns emitted.
10888 There's not really enough bulk here to make other passes such as
10889 instruction scheduling worth while. Note that use_thunk calls
10890 assemble_start_function and assemble_end_function. */
10892 emit_all_insn_group_barriers (NULL
);
10893 insn
= get_insns ();
10894 shorten_branches (insn
);
10895 final_start_function (insn
, file
, 1);
10896 final (insn
, file
, 1);
10897 final_end_function ();
10899 reload_completed
= 0;
10900 epilogue_completed
= 0;
10903 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10906 ia64_struct_value_rtx (tree fntype
,
10907 int incoming ATTRIBUTE_UNUSED
)
10909 if (TARGET_ABI_OPEN_VMS
||
10910 (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
)))
10912 return gen_rtx_REG (Pmode
, GR_REG (8));
10916 ia64_scalar_mode_supported_p (machine_mode mode
)
10942 ia64_vector_mode_supported_p (machine_mode mode
)
10959 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P. */
10962 ia64_libgcc_floating_mode_supported_p (machine_mode mode
)
10971 #ifdef IA64_NO_LIBGCC_XFMODE
10978 #ifdef IA64_NO_LIBGCC_TFMODE
10989 /* Implement the FUNCTION_PROFILER macro. */
10992 ia64_output_function_profiler (FILE *file
, int labelno
)
10994 bool indirect_call
;
10996 /* If the function needs a static chain and the static chain
10997 register is r15, we use an indirect call so as to bypass
10998 the PLT stub in case the executable is dynamically linked,
10999 because the stub clobbers r15 as per 5.3.6 of the psABI.
11000 We don't need to do that in non canonical PIC mode. */
11002 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
11004 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
11005 indirect_call
= true;
11008 indirect_call
= false;
11011 fputs ("\t.prologue 4, r40\n", file
);
11013 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
11014 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
11016 if (NO_PROFILE_COUNTERS
)
11017 fputs ("\tmov out3 = r0\n", file
);
11021 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11023 if (TARGET_AUTO_PIC
)
11024 fputs ("\tmovl out3 = @gprel(", file
);
11026 fputs ("\taddl out3 = @ltoff(", file
);
11027 assemble_name (file
, buf
);
11028 if (TARGET_AUTO_PIC
)
11029 fputs (")\n", file
);
11031 fputs ("), r1\n", file
);
11035 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
11036 fputs ("\t;;\n", file
);
11038 fputs ("\t.save rp, r42\n", file
);
11039 fputs ("\tmov out2 = b0\n", file
);
11041 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
11042 fputs ("\t.body\n", file
);
11043 fputs ("\tmov out1 = r1\n", file
);
11046 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
11047 fputs ("\tmov b6 = r16\n", file
);
11048 fputs ("\tld8 r1 = [r14]\n", file
);
11049 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
11052 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
11055 static GTY(()) rtx mcount_func_rtx
;
11057 gen_mcount_func_rtx (void)
11059 if (!mcount_func_rtx
)
11060 mcount_func_rtx
= init_one_libfunc ("_mcount");
11061 return mcount_func_rtx
;
11065 ia64_profile_hook (int labelno
)
11069 if (NO_PROFILE_COUNTERS
)
11070 label
= const0_rtx
;
11074 const char *label_name
;
11075 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11076 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
11077 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
11078 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
11080 ip
= gen_reg_rtx (Pmode
);
11081 emit_insn (gen_ip_value (ip
));
11082 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
11084 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
11089 /* Return the mangling of TYPE if it is an extended fundamental type. */
11091 static const char *
11092 ia64_mangle_type (const_tree type
)
11094 type
= TYPE_MAIN_VARIANT (type
);
11096 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
11097 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
11100 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11102 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
11104 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11105 an extended mangling. Elsewhere, "e" is available since long
11106 double is 80 bits. */
11107 if (TYPE_MODE (type
) == XFmode
)
11108 return TARGET_HPUX
? "u9__float80" : "e";
11109 if (TYPE_MODE (type
) == RFmode
)
11110 return "u7__fpreg";
11114 /* Return the diagnostic message string if conversion from FROMTYPE to
11115 TOTYPE is not allowed, NULL otherwise. */
11116 static const char *
11117 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
11119 /* Reject nontrivial conversion to or from __fpreg. */
11120 if (TYPE_MODE (fromtype
) == RFmode
11121 && TYPE_MODE (totype
) != RFmode
11122 && TYPE_MODE (totype
) != VOIDmode
)
11123 return N_("invalid conversion from %<__fpreg%>");
11124 if (TYPE_MODE (totype
) == RFmode
11125 && TYPE_MODE (fromtype
) != RFmode
)
11126 return N_("invalid conversion to %<__fpreg%>");
11130 /* Return the diagnostic message string if the unary operation OP is
11131 not permitted on TYPE, NULL otherwise. */
11132 static const char *
11133 ia64_invalid_unary_op (int op
, const_tree type
)
11135 /* Reject operations on __fpreg other than unary + or &. */
11136 if (TYPE_MODE (type
) == RFmode
11137 && op
!= CONVERT_EXPR
11138 && op
!= ADDR_EXPR
)
11139 return N_("invalid operation on %<__fpreg%>");
11143 /* Return the diagnostic message string if the binary operation OP is
11144 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11145 static const char *
11146 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
11148 /* Reject operations on __fpreg. */
11149 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
11150 return N_("invalid operation on %<__fpreg%>");
11154 /* HP-UX version_id attribute.
11155 For object foo, if the version_id is set to 1234 put out an alias
11156 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11157 other than an alias statement because it is an illegal symbol name. */
11160 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
11161 tree name ATTRIBUTE_UNUSED
,
11163 int flags ATTRIBUTE_UNUSED
,
11164 bool *no_add_attrs
)
11166 tree arg
= TREE_VALUE (args
);
11168 if (TREE_CODE (arg
) != STRING_CST
)
11170 error("version attribute is not a string");
11171 *no_add_attrs
= true;
11177 /* Target hook for c_mode_for_suffix. */
11179 static machine_mode
11180 ia64_c_mode_for_suffix (char suffix
)
11190 static GTY(()) rtx ia64_dconst_0_5_rtx
;
11193 ia64_dconst_0_5 (void)
11195 if (! ia64_dconst_0_5_rtx
)
11197 REAL_VALUE_TYPE rv
;
11198 real_from_string (&rv
, "0.5");
11199 ia64_dconst_0_5_rtx
= const_double_from_real_value (rv
, DFmode
);
11201 return ia64_dconst_0_5_rtx
;
11204 static GTY(()) rtx ia64_dconst_0_375_rtx
;
11207 ia64_dconst_0_375 (void)
11209 if (! ia64_dconst_0_375_rtx
)
11211 REAL_VALUE_TYPE rv
;
11212 real_from_string (&rv
, "0.375");
11213 ia64_dconst_0_375_rtx
= const_double_from_real_value (rv
, DFmode
);
11215 return ia64_dconst_0_375_rtx
;
11218 static machine_mode
11219 ia64_get_reg_raw_mode (int regno
)
11221 if (FR_REGNO_P (regno
))
11223 return default_get_reg_raw_mode(regno
);
11226 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11230 ia64_member_type_forces_blk (const_tree
, machine_mode mode
)
11232 return TARGET_HPUX
&& mode
== TFmode
;
11235 /* Always default to .text section until HP-UX linker is fixed. */
11237 ATTRIBUTE_UNUSED
static section
*
11238 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED
,
11239 enum node_frequency freq ATTRIBUTE_UNUSED
,
11240 bool startup ATTRIBUTE_UNUSED
,
11241 bool exit ATTRIBUTE_UNUSED
)
11246 /* Construct (set target (vec_select op0 (parallel perm))) and
11247 return true if that's a valid instruction in the active ISA. */
11250 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
11252 rtx rperm
[MAX_VECT_LEN
], x
;
11255 for (i
= 0; i
< nelt
; ++i
)
11256 rperm
[i
] = GEN_INT (perm
[i
]);
11258 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
11259 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
11260 x
= gen_rtx_SET (target
, x
);
11262 rtx_insn
*insn
= emit_insn (x
);
11263 if (recog_memoized (insn
) < 0)
11265 remove_insn (insn
);
11271 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11274 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
11275 const unsigned char *perm
, unsigned nelt
)
11277 machine_mode v2mode
;
11280 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
11281 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
11282 return expand_vselect (target
, x
, perm
, nelt
);
11285 /* Try to expand a no-op permutation. */
11288 expand_vec_perm_identity (struct expand_vec_perm_d
*d
)
11290 unsigned i
, nelt
= d
->nelt
;
11292 for (i
= 0; i
< nelt
; ++i
)
11293 if (d
->perm
[i
] != i
)
11297 emit_move_insn (d
->target
, d
->op0
);
11302 /* Try to expand D via a shrp instruction. */
11305 expand_vec_perm_shrp (struct expand_vec_perm_d
*d
)
11307 unsigned i
, nelt
= d
->nelt
, shift
, mask
;
11310 /* ??? Don't force V2SFmode into the integer registers. */
11311 if (d
->vmode
== V2SFmode
)
11314 mask
= (d
->one_operand_p
? nelt
- 1 : 2 * nelt
- 1);
11316 shift
= d
->perm
[0];
11317 if (BYTES_BIG_ENDIAN
&& shift
> nelt
)
11320 for (i
= 1; i
< nelt
; ++i
)
11321 if (d
->perm
[i
] != ((shift
+ i
) & mask
))
11327 hi
= shift
< nelt
? d
->op1
: d
->op0
;
11328 lo
= shift
< nelt
? d
->op0
: d
->op1
;
11332 shift
*= GET_MODE_UNIT_SIZE (d
->vmode
) * BITS_PER_UNIT
;
11334 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11335 gcc_assert (IN_RANGE (shift
, 1, 63));
11337 /* Recall that big-endian elements are numbered starting at the top of
11338 the register. Ideally we'd have a shift-left-pair. But since we
11339 don't, convert to a shift the other direction. */
11340 if (BYTES_BIG_ENDIAN
)
11341 shift
= 64 - shift
;
11343 tmp
= gen_reg_rtx (DImode
);
11344 hi
= gen_lowpart (DImode
, hi
);
11345 lo
= gen_lowpart (DImode
, lo
);
11346 emit_insn (gen_shrp (tmp
, hi
, lo
, GEN_INT (shift
)));
11348 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, tmp
));
11352 /* Try to instantiate D in a single instruction. */
11355 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
11357 unsigned i
, nelt
= d
->nelt
;
11358 unsigned char perm2
[MAX_VECT_LEN
];
11360 /* Try single-operand selections. */
11361 if (d
->one_operand_p
)
11363 if (expand_vec_perm_identity (d
))
11365 if (expand_vselect (d
->target
, d
->op0
, d
->perm
, nelt
))
11369 /* Try two operand selections. */
11370 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
11373 /* Recognize interleave style patterns with reversed operands. */
11374 if (!d
->one_operand_p
)
11376 for (i
= 0; i
< nelt
; ++i
)
11378 unsigned e
= d
->perm
[i
];
11386 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
11390 if (expand_vec_perm_shrp (d
))
11393 /* ??? Look for deposit-like permutations where most of the result
11394 comes from one vector unchanged and the rest comes from a
11395 sequential hunk of the other vector. */
11400 /* Pattern match broadcast permutations. */
11403 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
11405 unsigned i
, elt
, nelt
= d
->nelt
;
11406 unsigned char perm2
[2];
11410 if (!d
->one_operand_p
)
11414 for (i
= 1; i
< nelt
; ++i
)
11415 if (d
->perm
[i
] != elt
)
11422 /* Implementable by interleave. */
11424 perm2
[1] = elt
+ 2;
11425 ok
= expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, 2);
11430 /* Implementable by extract + broadcast. */
11431 if (BYTES_BIG_ENDIAN
)
11433 elt
*= BITS_PER_UNIT
;
11434 temp
= gen_reg_rtx (DImode
);
11435 emit_insn (gen_extzv (temp
, gen_lowpart (DImode
, d
->op0
),
11436 GEN_INT (8), GEN_INT (elt
)));
11437 emit_insn (gen_mux1_brcst_qi (d
->target
, gen_lowpart (QImode
, temp
)));
11441 /* Should have been matched directly by vec_select. */
11443 gcc_unreachable ();
11449 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11450 two vector permutation into a single vector permutation by using
11451 an interleave operation to merge the vectors. */
11454 expand_vec_perm_interleave_2 (struct expand_vec_perm_d
*d
)
11456 struct expand_vec_perm_d dremap
, dfinal
;
11457 unsigned char remap
[2 * MAX_VECT_LEN
];
11458 unsigned contents
, i
, nelt
, nelt2
;
11459 unsigned h0
, h1
, h2
, h3
;
11463 if (d
->one_operand_p
)
11469 /* Examine from whence the elements come. */
11471 for (i
= 0; i
< nelt
; ++i
)
11472 contents
|= 1u << d
->perm
[i
];
11474 memset (remap
, 0xff, sizeof (remap
));
11477 h0
= (1u << nelt2
) - 1;
11480 h3
= h0
<< (nelt
+ nelt2
);
11482 if ((contents
& (h0
| h2
)) == contents
) /* punpck even halves */
11484 for (i
= 0; i
< nelt
; ++i
)
11486 unsigned which
= i
/ 2 + (i
& 1 ? nelt
: 0);
11488 dremap
.perm
[i
] = which
;
11491 else if ((contents
& (h1
| h3
)) == contents
) /* punpck odd halves */
11493 for (i
= 0; i
< nelt
; ++i
)
11495 unsigned which
= i
/ 2 + nelt2
+ (i
& 1 ? nelt
: 0);
11497 dremap
.perm
[i
] = which
;
11500 else if ((contents
& 0x5555) == contents
) /* mix even elements */
11502 for (i
= 0; i
< nelt
; ++i
)
11504 unsigned which
= (i
& ~1) + (i
& 1 ? nelt
: 0);
11506 dremap
.perm
[i
] = which
;
11509 else if ((contents
& 0xaaaa) == contents
) /* mix odd elements */
11511 for (i
= 0; i
< nelt
; ++i
)
11513 unsigned which
= (i
| 1) + (i
& 1 ? nelt
: 0);
11515 dremap
.perm
[i
] = which
;
11518 else if (floor_log2 (contents
) - ctz_hwi (contents
) < (int)nelt
) /* shrp */
11520 unsigned shift
= ctz_hwi (contents
);
11521 for (i
= 0; i
< nelt
; ++i
)
11523 unsigned which
= (i
+ shift
) & (2 * nelt
- 1);
11525 dremap
.perm
[i
] = which
;
11531 /* Use the remapping array set up above to move the elements from their
11532 swizzled locations into their final destinations. */
11534 for (i
= 0; i
< nelt
; ++i
)
11536 unsigned e
= remap
[d
->perm
[i
]];
11537 gcc_assert (e
< nelt
);
11538 dfinal
.perm
[i
] = e
;
11541 dfinal
.op0
= gen_raw_REG (dfinal
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11543 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
11544 dfinal
.op1
= dfinal
.op0
;
11545 dfinal
.one_operand_p
= true;
11546 dremap
.target
= dfinal
.op0
;
11548 /* Test if the final remap can be done with a single insn. For V4HImode
11549 this *will* succeed. For V8QImode or V2SImode it may not. */
11551 ok
= expand_vec_perm_1 (&dfinal
);
11552 seq
= get_insns ();
11559 ok
= expand_vec_perm_1 (&dremap
);
11566 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11567 constant permutation via two mux2 and a merge. */
11570 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d
*d
)
11572 unsigned char perm2
[4];
11575 rtx t0
, t1
, mask
, x
;
11578 if (d
->vmode
!= V4HImode
|| d
->one_operand_p
)
11583 for (i
= 0; i
< 4; ++i
)
11585 perm2
[i
] = d
->perm
[i
] & 3;
11586 rmask
[i
] = (d
->perm
[i
] & 4 ? const0_rtx
: constm1_rtx
);
11588 mask
= gen_rtx_CONST_VECTOR (V4HImode
, gen_rtvec_v (4, rmask
));
11589 mask
= force_reg (V4HImode
, mask
);
11591 t0
= gen_reg_rtx (V4HImode
);
11592 t1
= gen_reg_rtx (V4HImode
);
11594 ok
= expand_vselect (t0
, d
->op0
, perm2
, 4);
11596 ok
= expand_vselect (t1
, d
->op1
, perm2
, 4);
11599 x
= gen_rtx_AND (V4HImode
, mask
, t0
);
11600 emit_insn (gen_rtx_SET (t0
, x
));
11602 x
= gen_rtx_NOT (V4HImode
, mask
);
11603 x
= gen_rtx_AND (V4HImode
, x
, t1
);
11604 emit_insn (gen_rtx_SET (t1
, x
));
11606 x
= gen_rtx_IOR (V4HImode
, t0
, t1
);
11607 emit_insn (gen_rtx_SET (d
->target
, x
));
11612 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11613 With all of the interface bits taken care of, perform the expansion
11614 in D and return true on success. */
11617 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
11619 if (expand_vec_perm_1 (d
))
11621 if (expand_vec_perm_broadcast (d
))
11623 if (expand_vec_perm_interleave_2 (d
))
11625 if (expand_vec_perm_v4hi_5 (d
))
11631 ia64_expand_vec_perm_const (rtx operands
[4])
11633 struct expand_vec_perm_d d
;
11634 unsigned char perm
[MAX_VECT_LEN
];
11635 int i
, nelt
, which
;
11638 d
.target
= operands
[0];
11639 d
.op0
= operands
[1];
11640 d
.op1
= operands
[2];
11643 d
.vmode
= GET_MODE (d
.target
);
11644 gcc_assert (VECTOR_MODE_P (d
.vmode
));
11645 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11646 d
.testing_p
= false;
11648 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
11649 gcc_assert (XVECLEN (sel
, 0) == nelt
);
11650 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
11652 for (i
= which
= 0; i
< nelt
; ++i
)
11654 rtx e
= XVECEXP (sel
, 0, i
);
11655 int ei
= INTVAL (e
) & (2 * nelt
- 1);
11657 which
|= (ei
< nelt
? 1 : 2);
11668 if (!rtx_equal_p (d
.op0
, d
.op1
))
11670 d
.one_operand_p
= false;
11674 /* The elements of PERM do not suggest that only the first operand
11675 is used, but both operands are identical. Allow easier matching
11676 of the permutation by folding the permutation into the single
11678 for (i
= 0; i
< nelt
; ++i
)
11679 if (d
.perm
[i
] >= nelt
)
11685 d
.one_operand_p
= true;
11689 for (i
= 0; i
< nelt
; ++i
)
11692 d
.one_operand_p
= true;
11696 if (ia64_expand_vec_perm_const_1 (&d
))
11699 /* If the mask says both arguments are needed, but they are the same,
11700 the above tried to expand with one_operand_p true. If that didn't
11701 work, retry with one_operand_p false, as that's what we used in _ok. */
11702 if (which
== 3 && d
.one_operand_p
)
11704 memcpy (d
.perm
, perm
, sizeof (perm
));
11705 d
.one_operand_p
= false;
11706 return ia64_expand_vec_perm_const_1 (&d
);
11712 /* Implement targetm.vectorize.vec_perm_const_ok. */
11715 ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
11716 const unsigned char *sel
)
11718 struct expand_vec_perm_d d
;
11719 unsigned int i
, nelt
, which
;
11723 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11724 d
.testing_p
= true;
11726 /* Extract the values from the vector CST into the permutation
11728 memcpy (d
.perm
, sel
, nelt
);
11729 for (i
= which
= 0; i
< nelt
; ++i
)
11731 unsigned char e
= d
.perm
[i
];
11732 gcc_assert (e
< 2 * nelt
);
11733 which
|= (e
< nelt
? 1 : 2);
11736 /* For all elements from second vector, fold the elements to first. */
11738 for (i
= 0; i
< nelt
; ++i
)
11741 /* Check whether the mask can be applied to the vector type. */
11742 d
.one_operand_p
= (which
!= 3);
11744 /* Otherwise we have to go through the motions and see if we can
11745 figure out how to generate the requested permutation. */
11746 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11747 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
11748 if (!d
.one_operand_p
)
11749 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
11752 ret
= ia64_expand_vec_perm_const_1 (&d
);
11759 ia64_expand_vec_setv2sf (rtx operands
[3])
11761 struct expand_vec_perm_d d
;
11762 unsigned int which
;
11765 d
.target
= operands
[0];
11766 d
.op0
= operands
[0];
11767 d
.op1
= gen_reg_rtx (V2SFmode
);
11768 d
.vmode
= V2SFmode
;
11770 d
.one_operand_p
= false;
11771 d
.testing_p
= false;
11773 which
= INTVAL (operands
[2]);
11774 gcc_assert (which
<= 1);
11775 d
.perm
[0] = 1 - which
;
11776 d
.perm
[1] = which
+ 2;
11778 emit_insn (gen_fpack (d
.op1
, operands
[1], CONST0_RTX (SFmode
)));
11780 ok
= ia64_expand_vec_perm_const_1 (&d
);
11785 ia64_expand_vec_perm_even_odd (rtx target
, rtx op0
, rtx op1
, int odd
)
11787 struct expand_vec_perm_d d
;
11788 machine_mode vmode
= GET_MODE (target
);
11789 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
11797 d
.one_operand_p
= false;
11798 d
.testing_p
= false;
11800 for (i
= 0; i
< nelt
; ++i
)
11801 d
.perm
[i
] = i
* 2 + odd
;
11803 ok
= ia64_expand_vec_perm_const_1 (&d
);
11807 #include "gt-ia64.h"