1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "diagnostic-core.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
63 #include "dwarf2out.h"
65 /* This is used for communication between ASM_OUTPUT_LABEL and
66 ASM_OUTPUT_LABELREF. */
67 int ia64_asm_output_label
= 0;
69 /* Register names for ia64_expand_prologue. */
70 static const char * const ia64_reg_numbers
[96] =
71 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
72 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
73 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
74 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
75 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
76 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
77 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
78 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
79 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
80 "r104","r105","r106","r107","r108","r109","r110","r111",
81 "r112","r113","r114","r115","r116","r117","r118","r119",
82 "r120","r121","r122","r123","r124","r125","r126","r127"};
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_input_reg_names
[8] =
86 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
88 /* ??? These strings could be shared with REGISTER_NAMES. */
89 static const char * const ia64_local_reg_names
[80] =
90 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
91 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
92 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
93 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
94 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
95 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
96 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
97 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
98 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
99 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101 /* ??? These strings could be shared with REGISTER_NAMES. */
102 static const char * const ia64_output_reg_names
[8] =
103 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105 /* Which cpu are we scheduling for. */
106 enum processor_type ia64_tune
= PROCESSOR_ITANIUM2
;
108 /* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110 static int ia64_flag_schedule_insns2
;
112 /* Determines whether we run variable tracking in machine dependent
114 static int ia64_flag_var_tracking
;
116 /* Variables which are this size or smaller are put in the sdata/sbss
119 unsigned int ia64_section_threshold
;
121 /* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
134 number_of_ia64_frame_regs
137 /* Structure to be filled in by ia64_compute_frame_size with register
138 save masks and offsets for the current function. */
140 struct ia64_frame_info
142 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
143 the caller's scratch area. */
144 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
145 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
146 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
147 HARD_REG_SET mask
; /* mask of saved registers. */
148 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
149 registers or long-term scratches. */
150 int n_spilled
; /* number of spilled registers. */
151 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
152 int n_input_regs
; /* number of input registers used. */
153 int n_local_regs
; /* number of local registers used. */
154 int n_output_regs
; /* number of output registers used. */
155 int n_rotate_regs
; /* number of rotating registers used. */
157 char need_regstk
; /* true if a .regstk directive needed. */
158 char initialized
; /* true if the data is finalized. */
161 /* Current frame information calculated by ia64_compute_frame_size. */
162 static struct ia64_frame_info current_frame_info
;
163 /* The actual registers that are emitted. */
164 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
166 static int ia64_first_cycle_multipass_dfa_lookahead (void);
167 static void ia64_dependencies_evaluation_hook (rtx
, rtx
);
168 static void ia64_init_dfa_pre_cycle_insn (void);
169 static rtx
ia64_dfa_pre_cycle_insn (void);
170 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx
);
171 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx
);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx
, int, int, int *);
173 static void ia64_h_i_d_extended (void);
174 static void * ia64_alloc_sched_context (void);
175 static void ia64_init_sched_context (void *, bool);
176 static void ia64_set_sched_context (void *);
177 static void ia64_clear_sched_context (void *);
178 static void ia64_free_sched_context (void *);
179 static int ia64_mode_to_int (enum machine_mode
);
180 static void ia64_set_sched_flags (spec_info_t
);
181 static ds_t
ia64_get_insn_spec_ds (rtx
);
182 static ds_t
ia64_get_insn_checked_ds (rtx
);
183 static bool ia64_skip_rtx_p (const_rtx
);
184 static int ia64_speculate_insn (rtx
, ds_t
, rtx
*);
185 static bool ia64_needs_block_p (int);
186 static rtx
ia64_gen_spec_check (rtx
, rtx
, ds_t
);
187 static int ia64_spec_check_p (rtx
);
188 static int ia64_spec_check_src_p (rtx
);
189 static rtx
gen_tls_get_addr (void);
190 static rtx
gen_thread_pointer (void);
191 static int find_gr_spill (enum ia64_frame_regs
, int);
192 static int next_scratch_gr_reg (void);
193 static void mark_reg_gr_used_mask (rtx
, void *);
194 static void ia64_compute_frame_size (HOST_WIDE_INT
);
195 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
196 static void finish_spill_pointers (void);
197 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
198 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
199 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
200 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
201 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
202 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
204 static void ia64_option_override (void);
205 static void ia64_option_default_params (void);
206 static bool ia64_can_eliminate (const int, const int);
207 static enum machine_mode
hfa_element_mode (const_tree
, bool);
208 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
210 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
212 static rtx
ia64_function_arg_1 (const CUMULATIVE_ARGS
*, enum machine_mode
,
213 const_tree
, bool, bool);
214 static rtx
ia64_function_arg (CUMULATIVE_ARGS
*, enum machine_mode
,
216 static rtx
ia64_function_incoming_arg (CUMULATIVE_ARGS
*,
217 enum machine_mode
, const_tree
, bool);
218 static void ia64_function_arg_advance (CUMULATIVE_ARGS
*, enum machine_mode
,
220 static unsigned int ia64_function_arg_boundary (enum machine_mode
,
222 static bool ia64_function_ok_for_sibcall (tree
, tree
);
223 static bool ia64_return_in_memory (const_tree
, const_tree
);
224 static rtx
ia64_function_value (const_tree
, const_tree
, bool);
225 static rtx
ia64_libcall_value (enum machine_mode
, const_rtx
);
226 static bool ia64_function_value_regno_p (const unsigned int);
227 static int ia64_register_move_cost (enum machine_mode
, reg_class_t
,
229 static int ia64_memory_move_cost (enum machine_mode mode
, reg_class_t
,
231 static bool ia64_rtx_costs (rtx
, int, int, int *, bool);
232 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
233 static void fix_range (const char *);
234 static bool ia64_handle_option (size_t, const char *, int);
235 static struct machine_function
* ia64_init_machine_status (void);
236 static void emit_insn_group_barriers (FILE *);
237 static void emit_all_insn_group_barriers (FILE *);
238 static void final_emit_insn_group_barriers (FILE *);
239 static void emit_predicate_relation_info (void);
240 static void ia64_reorg (void);
241 static bool ia64_in_small_data_p (const_tree
);
242 static void process_epilogue (FILE *, rtx
, bool, bool);
244 static bool ia64_assemble_integer (rtx
, unsigned int, int);
245 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
246 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
247 static void ia64_output_function_end_prologue (FILE *);
249 static int ia64_issue_rate (void);
250 static int ia64_adjust_cost_2 (rtx
, int, rtx
, int, dw_t
);
251 static void ia64_sched_init (FILE *, int, int);
252 static void ia64_sched_init_global (FILE *, int, int);
253 static void ia64_sched_finish_global (FILE *, int);
254 static void ia64_sched_finish (FILE *, int);
255 static int ia64_dfa_sched_reorder (FILE *, int, rtx
*, int *, int, int);
256 static int ia64_sched_reorder (FILE *, int, rtx
*, int *, int);
257 static int ia64_sched_reorder2 (FILE *, int, rtx
*, int *, int);
258 static int ia64_variable_issue (FILE *, int, rtx
, int);
260 static void ia64_asm_unwind_emit (FILE *, rtx
);
261 static void ia64_asm_emit_except_personality (rtx
);
262 static void ia64_asm_init_sections (void);
264 static enum unwind_info_type
ia64_debug_unwind_info (void);
265 static enum unwind_info_type
ia64_except_unwind_info (struct gcc_options
*);
267 static struct bundle_state
*get_free_bundle_state (void);
268 static void free_bundle_state (struct bundle_state
*);
269 static void initiate_bundle_states (void);
270 static void finish_bundle_states (void);
271 static unsigned bundle_state_hash (const void *);
272 static int bundle_state_eq_p (const void *, const void *);
273 static int insert_bundle_state (struct bundle_state
*);
274 static void initiate_bundle_state_table (void);
275 static void finish_bundle_state_table (void);
276 static int try_issue_nops (struct bundle_state
*, int);
277 static int try_issue_insn (struct bundle_state
*, rtx
);
278 static void issue_nops_and_insn (struct bundle_state
*, int, rtx
, int, int);
279 static int get_max_pos (state_t
);
280 static int get_template (state_t
, int);
282 static rtx
get_next_important_insn (rtx
, rtx
);
283 static bool important_for_bundling_p (rtx
);
284 static void bundling (FILE *, int, rtx
, rtx
);
286 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
287 HOST_WIDE_INT
, tree
);
288 static void ia64_file_start (void);
289 static void ia64_globalize_decl_name (FILE *, tree
);
291 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
292 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
293 static section
*ia64_select_rtx_section (enum machine_mode
, rtx
,
294 unsigned HOST_WIDE_INT
);
295 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
297 static unsigned int ia64_section_type_flags (tree
, const char *, int);
298 static void ia64_init_libfuncs (void)
300 static void ia64_hpux_init_libfuncs (void)
302 static void ia64_sysv4_init_libfuncs (void)
304 static void ia64_vms_init_libfuncs (void)
306 static void ia64_soft_fp_init_libfuncs (void)
308 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode
)
310 static tree
ia64_vms_common_object_attribute (tree
*, tree
, tree
, int, bool *)
313 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
314 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
315 static void ia64_encode_section_info (tree
, rtx
, int);
316 static rtx
ia64_struct_value_rtx (tree
, int);
317 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
318 static bool ia64_scalar_mode_supported_p (enum machine_mode mode
);
319 static bool ia64_vector_mode_supported_p (enum machine_mode mode
);
320 static bool ia64_cannot_force_const_mem (rtx
);
321 static const char *ia64_mangle_type (const_tree
);
322 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
323 static const char *ia64_invalid_unary_op (int, const_tree
);
324 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
325 static enum machine_mode
ia64_c_mode_for_suffix (char);
326 static enum machine_mode
ia64_promote_function_mode (const_tree
,
331 static void ia64_trampoline_init (rtx
, tree
, rtx
);
332 static void ia64_override_options_after_change (void);
334 static void ia64_dwarf_handle_frame_unspec (const char *, rtx
, int);
335 static tree
ia64_builtin_decl (unsigned, bool);
337 static reg_class_t
ia64_preferred_reload_class (rtx
, reg_class_t
);
338 static enum machine_mode
ia64_get_reg_raw_mode (int regno
);
339 static section
* ia64_hpux_function_section (tree
, enum node_frequency
,
342 /* Table of valid machine attributes. */
343 static const struct attribute_spec ia64_attribute_table
[] =
345 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
346 affects_type_identity } */
347 { "syscall_linkage", 0, 0, false, true, true, NULL
, false },
348 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
,
350 #if TARGET_ABI_OPEN_VMS
351 { "common_object", 1, 1, true, false, false,
352 ia64_vms_common_object_attribute
, false },
354 { "version_id", 1, 1, true, false, false,
355 ia64_handle_version_id_attribute
, false },
356 { NULL
, 0, 0, false, false, false, NULL
, false }
359 /* Implement overriding of the optimization options. */
360 static const struct default_options ia64_option_optimization_table
[] =
362 { OPT_LEVELS_1_PLUS
, OPT_fomit_frame_pointer
, NULL
, 1 },
363 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
364 SUBTARGET_OPTIMIZATION_OPTIONS
,
366 { OPT_LEVELS_NONE
, 0, NULL
, 0 }
369 /* Initialize the GCC target structure. */
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS ia64_init_builtins
376 #undef TARGET_EXPAND_BUILTIN
377 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
379 #undef TARGET_BUILTIN_DECL
380 #define TARGET_BUILTIN_DECL ia64_builtin_decl
382 #undef TARGET_ASM_BYTE_OP
383 #define TARGET_ASM_BYTE_OP "\tdata1\t"
384 #undef TARGET_ASM_ALIGNED_HI_OP
385 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
386 #undef TARGET_ASM_ALIGNED_SI_OP
387 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
388 #undef TARGET_ASM_ALIGNED_DI_OP
389 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
390 #undef TARGET_ASM_UNALIGNED_HI_OP
391 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
392 #undef TARGET_ASM_UNALIGNED_SI_OP
393 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
394 #undef TARGET_ASM_UNALIGNED_DI_OP
395 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER ia64_assemble_integer
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE ia64_option_override
401 #undef TARGET_OPTION_OPTIMIZATION_TABLE
402 #define TARGET_OPTION_OPTIMIZATION_TABLE ia64_option_optimization_table
403 #undef TARGET_OPTION_DEFAULT_PARAMS
404 #define TARGET_OPTION_DEFAULT_PARAMS ia64_option_default_params
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
408 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
409 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
410 #undef TARGET_ASM_FUNCTION_EPILOGUE
411 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
413 #undef TARGET_IN_SMALL_DATA_P
414 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
416 #undef TARGET_SCHED_ADJUST_COST_2
417 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
418 #undef TARGET_SCHED_ISSUE_RATE
419 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
420 #undef TARGET_SCHED_VARIABLE_ISSUE
421 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
422 #undef TARGET_SCHED_INIT
423 #define TARGET_SCHED_INIT ia64_sched_init
424 #undef TARGET_SCHED_FINISH
425 #define TARGET_SCHED_FINISH ia64_sched_finish
426 #undef TARGET_SCHED_INIT_GLOBAL
427 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
428 #undef TARGET_SCHED_FINISH_GLOBAL
429 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
430 #undef TARGET_SCHED_REORDER
431 #define TARGET_SCHED_REORDER ia64_sched_reorder
432 #undef TARGET_SCHED_REORDER2
433 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
435 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
436 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
438 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
439 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
441 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
442 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
443 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
444 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
446 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
447 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
448 ia64_first_cycle_multipass_dfa_lookahead_guard
450 #undef TARGET_SCHED_DFA_NEW_CYCLE
451 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
453 #undef TARGET_SCHED_H_I_D_EXTENDED
454 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
456 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
457 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
459 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
460 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
462 #undef TARGET_SCHED_SET_SCHED_CONTEXT
463 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
465 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
466 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
468 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
469 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
471 #undef TARGET_SCHED_SET_SCHED_FLAGS
472 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
474 #undef TARGET_SCHED_GET_INSN_SPEC_DS
475 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
477 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
478 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
480 #undef TARGET_SCHED_SPECULATE_INSN
481 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
483 #undef TARGET_SCHED_NEEDS_BLOCK_P
484 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
486 #undef TARGET_SCHED_GEN_SPEC_CHECK
487 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
489 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
490 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
491 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
493 #undef TARGET_SCHED_SKIP_RTX_P
494 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
496 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
497 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
498 #undef TARGET_ARG_PARTIAL_BYTES
499 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
500 #undef TARGET_FUNCTION_ARG
501 #define TARGET_FUNCTION_ARG ia64_function_arg
502 #undef TARGET_FUNCTION_INCOMING_ARG
503 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
504 #undef TARGET_FUNCTION_ARG_ADVANCE
505 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
506 #undef TARGET_FUNCTION_ARG_BOUNDARY
507 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
509 #undef TARGET_ASM_OUTPUT_MI_THUNK
510 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
511 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
512 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
514 #undef TARGET_ASM_FILE_START
515 #define TARGET_ASM_FILE_START ia64_file_start
517 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
518 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
520 #undef TARGET_REGISTER_MOVE_COST
521 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
522 #undef TARGET_MEMORY_MOVE_COST
523 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
524 #undef TARGET_RTX_COSTS
525 #define TARGET_RTX_COSTS ia64_rtx_costs
526 #undef TARGET_ADDRESS_COST
527 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
529 #undef TARGET_UNSPEC_MAY_TRAP_P
530 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
532 #undef TARGET_MACHINE_DEPENDENT_REORG
533 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
535 #undef TARGET_ENCODE_SECTION_INFO
536 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
538 #undef TARGET_SECTION_TYPE_FLAGS
539 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
542 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
543 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
546 #undef TARGET_PROMOTE_FUNCTION_MODE
547 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
549 /* ??? Investigate. */
551 #undef TARGET_PROMOTE_PROTOTYPES
552 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
555 #undef TARGET_FUNCTION_VALUE
556 #define TARGET_FUNCTION_VALUE ia64_function_value
557 #undef TARGET_LIBCALL_VALUE
558 #define TARGET_LIBCALL_VALUE ia64_libcall_value
559 #undef TARGET_FUNCTION_VALUE_REGNO_P
560 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
562 #undef TARGET_STRUCT_VALUE_RTX
563 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
564 #undef TARGET_RETURN_IN_MEMORY
565 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
566 #undef TARGET_SETUP_INCOMING_VARARGS
567 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
568 #undef TARGET_STRICT_ARGUMENT_NAMING
569 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
570 #undef TARGET_MUST_PASS_IN_STACK
571 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
572 #undef TARGET_GET_RAW_RESULT_MODE
573 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
574 #undef TARGET_GET_RAW_ARG_MODE
575 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
577 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
578 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
580 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
581 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ia64_dwarf_handle_frame_unspec
582 #undef TARGET_ASM_UNWIND_EMIT
583 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
584 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
585 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
589 #undef TARGET_DEBUG_UNWIND_INFO
590 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
591 #undef TARGET_EXCEPT_UNWIND_INFO
592 #define TARGET_EXCEPT_UNWIND_INFO ia64_except_unwind_info
594 #undef TARGET_SCALAR_MODE_SUPPORTED_P
595 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
596 #undef TARGET_VECTOR_MODE_SUPPORTED_P
597 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
599 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
600 in an order different from the specified program order. */
601 #undef TARGET_RELAXED_ORDERING
602 #define TARGET_RELAXED_ORDERING true
604 #undef TARGET_DEFAULT_TARGET_FLAGS
605 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
606 #undef TARGET_HANDLE_OPTION
607 #define TARGET_HANDLE_OPTION ia64_handle_option
609 #undef TARGET_CANNOT_FORCE_CONST_MEM
610 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
612 #undef TARGET_MANGLE_TYPE
613 #define TARGET_MANGLE_TYPE ia64_mangle_type
615 #undef TARGET_INVALID_CONVERSION
616 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
617 #undef TARGET_INVALID_UNARY_OP
618 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
619 #undef TARGET_INVALID_BINARY_OP
620 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
622 #undef TARGET_C_MODE_FOR_SUFFIX
623 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
625 #undef TARGET_CAN_ELIMINATE
626 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
628 #undef TARGET_TRAMPOLINE_INIT
629 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
631 #undef TARGET_INVALID_WITHIN_DOLOOP
632 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
634 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
635 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
637 #undef TARGET_PREFERRED_RELOAD_CLASS
638 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
640 struct gcc_target targetm
= TARGET_INITIALIZER
;
644 ADDR_AREA_NORMAL
, /* normal address area */
645 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
649 static GTY(()) tree small_ident1
;
650 static GTY(()) tree small_ident2
;
655 if (small_ident1
== 0)
657 small_ident1
= get_identifier ("small");
658 small_ident2
= get_identifier ("__small__");
662 /* Retrieve the address area that has been chosen for the given decl. */
664 static ia64_addr_area
665 ia64_get_addr_area (tree decl
)
669 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
675 id
= TREE_VALUE (TREE_VALUE (model_attr
));
676 if (id
== small_ident1
|| id
== small_ident2
)
677 return ADDR_AREA_SMALL
;
679 return ADDR_AREA_NORMAL
;
683 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
684 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
686 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
688 tree arg
, decl
= *node
;
691 arg
= TREE_VALUE (args
);
692 if (arg
== small_ident1
|| arg
== small_ident2
)
694 addr_area
= ADDR_AREA_SMALL
;
698 warning (OPT_Wattributes
, "invalid argument of %qE attribute",
700 *no_add_attrs
= true;
703 switch (TREE_CODE (decl
))
706 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
708 && !TREE_STATIC (decl
))
710 error_at (DECL_SOURCE_LOCATION (decl
),
711 "an address area attribute cannot be specified for "
713 *no_add_attrs
= true;
715 area
= ia64_get_addr_area (decl
);
716 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
718 error ("address area of %q+D conflicts with previous "
719 "declaration", decl
);
720 *no_add_attrs
= true;
725 error_at (DECL_SOURCE_LOCATION (decl
),
726 "address area attribute cannot be specified for "
728 *no_add_attrs
= true;
732 warning (OPT_Wattributes
, "%qE attribute ignored",
734 *no_add_attrs
= true;
741 /* The section must have global and overlaid attributes. */
742 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
744 /* Part of the low level implementation of DEC Ada pragma Common_Object which
745 enables the shared use of variables stored in overlaid linker areas
746 corresponding to the use of Fortran COMMON. */
749 ia64_vms_common_object_attribute (tree
*node
, tree name
, tree args
,
750 int flags ATTRIBUTE_UNUSED
,
758 DECL_COMMON (decl
) = 1;
759 id
= TREE_VALUE (args
);
760 if (TREE_CODE (id
) == IDENTIFIER_NODE
)
761 val
= build_string (IDENTIFIER_LENGTH (id
), IDENTIFIER_POINTER (id
));
762 else if (TREE_CODE (id
) == STRING_CST
)
766 warning (OPT_Wattributes
,
767 "%qE attribute requires a string constant argument", name
);
768 *no_add_attrs
= true;
771 DECL_SECTION_NAME (decl
) = val
;
775 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
778 ia64_vms_output_aligned_decl_common (FILE *file
, tree decl
, const char *name
,
779 unsigned HOST_WIDE_INT size
,
782 tree attr
= DECL_ATTRIBUTES (decl
);
784 /* As common_object attribute set DECL_SECTION_NAME check it before
785 looking up the attribute. */
786 if (DECL_SECTION_NAME (decl
) && attr
)
787 attr
= lookup_attribute ("common_object", attr
);
793 /* Code from elfos.h. */
794 fprintf (file
, "%s", COMMON_ASM_OP
);
795 assemble_name (file
, name
);
796 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
797 size
, align
/ BITS_PER_UNIT
);
801 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
802 ASM_OUTPUT_LABEL (file
, name
);
803 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
807 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
810 ia64_vms_elf_asm_named_section (const char *name
, unsigned int flags
,
813 if (!(flags
& SECTION_VMS_OVERLAY
))
815 default_elf_asm_named_section (name
, flags
, decl
);
818 if (flags
!= (SECTION_VMS_OVERLAY
| SECTION_WRITE
))
821 if (flags
& SECTION_DECLARED
)
823 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
827 fprintf (asm_out_file
, "\t.section\t%s,\"awgO\"\n", name
);
831 ia64_encode_addr_area (tree decl
, rtx symbol
)
835 flags
= SYMBOL_REF_FLAGS (symbol
);
836 switch (ia64_get_addr_area (decl
))
838 case ADDR_AREA_NORMAL
: break;
839 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
840 default: gcc_unreachable ();
842 SYMBOL_REF_FLAGS (symbol
) = flags
;
846 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
848 default_encode_section_info (decl
, rtl
, first
);
850 /* Careful not to prod global register variables. */
851 if (TREE_CODE (decl
) == VAR_DECL
852 && GET_CODE (DECL_RTL (decl
)) == MEM
853 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
854 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
855 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
858 /* Return 1 if the operands of a move are ok. */
861 ia64_move_ok (rtx dst
, rtx src
)
863 /* If we're under init_recog_no_volatile, we'll not be able to use
864 memory_operand. So check the code directly and don't worry about
865 the validity of the underlying address, which should have been
866 checked elsewhere anyway. */
867 if (GET_CODE (dst
) != MEM
)
869 if (GET_CODE (src
) == MEM
)
871 if (register_operand (src
, VOIDmode
))
874 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
875 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
876 return src
== const0_rtx
;
878 return satisfies_constraint_G (src
);
881 /* Return 1 if the operands are ok for a floating point load pair. */
884 ia64_load_pair_ok (rtx dst
, rtx src
)
886 if (GET_CODE (dst
) != REG
|| !FP_REGNO_P (REGNO (dst
)))
888 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
890 switch (GET_CODE (XEXP (src
, 0)))
899 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
901 if (GET_CODE (adjust
) != CONST_INT
902 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
913 addp4_optimize_ok (rtx op1
, rtx op2
)
915 return (basereg_operand (op1
, GET_MODE(op1
)) !=
916 basereg_operand (op2
, GET_MODE(op2
)));
919 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
920 Return the length of the field, or <= 0 on failure. */
923 ia64_depz_field_mask (rtx rop
, rtx rshift
)
925 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
926 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
928 /* Get rid of the zero bits we're shifting in. */
931 /* We must now have a solid block of 1's at bit 0. */
932 return exact_log2 (op
+ 1);
935 /* Return the TLS model to use for ADDR. */
937 static enum tls_model
938 tls_symbolic_operand_type (rtx addr
)
940 enum tls_model tls_kind
= TLS_MODEL_NONE
;
942 if (GET_CODE (addr
) == CONST
)
944 if (GET_CODE (XEXP (addr
, 0)) == PLUS
945 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
946 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
948 else if (GET_CODE (addr
) == SYMBOL_REF
)
949 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
954 /* Return true if X is a constant that is valid for some immediate
955 field in an instruction. */
958 ia64_legitimate_constant_p (rtx x
)
960 switch (GET_CODE (x
))
967 if (GET_MODE (x
) == VOIDmode
|| GET_MODE (x
) == SFmode
968 || GET_MODE (x
) == DFmode
)
970 return satisfies_constraint_G (x
);
974 /* ??? Short term workaround for PR 28490. We must make the code here
975 match the code in ia64_expand_move and move_operand, even though they
976 are both technically wrong. */
977 if (tls_symbolic_operand_type (x
) == 0)
979 HOST_WIDE_INT addend
= 0;
982 if (GET_CODE (op
) == CONST
983 && GET_CODE (XEXP (op
, 0)) == PLUS
984 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
986 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
987 op
= XEXP (XEXP (op
, 0), 0);
990 if (any_offset_symbol_operand (op
, GET_MODE (op
))
991 || function_operand (op
, GET_MODE (op
)))
993 if (aligned_offset_symbol_operand (op
, GET_MODE (op
)))
994 return (addend
& 0x3fff) == 0;
1001 enum machine_mode mode
= GET_MODE (x
);
1003 if (mode
== V2SFmode
)
1004 return satisfies_constraint_Y (x
);
1006 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1007 && GET_MODE_SIZE (mode
) <= 8);
1015 /* Don't allow TLS addresses to get spilled to memory. */
1018 ia64_cannot_force_const_mem (rtx x
)
1020 if (GET_MODE (x
) == RFmode
)
1022 return tls_symbolic_operand_type (x
) != 0;
1025 /* Expand a symbolic constant load. */
1028 ia64_expand_load_address (rtx dest
, rtx src
)
1030 gcc_assert (GET_CODE (dest
) == REG
);
1032 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1033 having to pointer-extend the value afterward. Other forms of address
1034 computation below are also more natural to compute as 64-bit quantities.
1035 If we've been given an SImode destination register, change it. */
1036 if (GET_MODE (dest
) != Pmode
)
1037 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
1038 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
1042 if (small_addr_symbolic_operand (src
, VOIDmode
))
1045 if (TARGET_AUTO_PIC
)
1046 emit_insn (gen_load_gprel64 (dest
, src
));
1047 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1048 emit_insn (gen_load_fptr (dest
, src
));
1049 else if (sdata_symbolic_operand (src
, VOIDmode
))
1050 emit_insn (gen_load_gprel (dest
, src
));
1053 HOST_WIDE_INT addend
= 0;
1056 /* We did split constant offsets in ia64_expand_move, and we did try
1057 to keep them split in move_operand, but we also allowed reload to
1058 rematerialize arbitrary constants rather than spill the value to
1059 the stack and reload it. So we have to be prepared here to split
1060 them apart again. */
1061 if (GET_CODE (src
) == CONST
)
1063 HOST_WIDE_INT hi
, lo
;
1065 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
1066 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
1072 src
= plus_constant (XEXP (XEXP (src
, 0), 0), hi
);
1076 tmp
= gen_rtx_HIGH (Pmode
, src
);
1077 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1078 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
1080 tmp
= gen_rtx_LO_SUM (Pmode
, dest
, src
);
1081 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
1085 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
1086 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
1093 static GTY(()) rtx gen_tls_tga
;
1095 gen_tls_get_addr (void)
1098 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1102 static GTY(()) rtx thread_pointer_rtx
;
1104 gen_thread_pointer (void)
1106 if (!thread_pointer_rtx
)
1107 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1108 return thread_pointer_rtx
;
1112 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
1113 rtx orig_op1
, HOST_WIDE_INT addend
)
1115 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
1117 HOST_WIDE_INT addend_lo
, addend_hi
;
1121 case TLS_MODEL_GLOBAL_DYNAMIC
:
1124 tga_op1
= gen_reg_rtx (Pmode
);
1125 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1127 tga_op2
= gen_reg_rtx (Pmode
);
1128 emit_insn (gen_load_dtprel (tga_op2
, op1
));
1130 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1131 LCT_CONST
, Pmode
, 2, tga_op1
,
1132 Pmode
, tga_op2
, Pmode
);
1134 insns
= get_insns ();
1137 if (GET_MODE (op0
) != Pmode
)
1139 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1142 case TLS_MODEL_LOCAL_DYNAMIC
:
1143 /* ??? This isn't the completely proper way to do local-dynamic
1144 If the call to __tls_get_addr is used only by a single symbol,
1145 then we should (somehow) move the dtprel to the second arg
1146 to avoid the extra add. */
1149 tga_op1
= gen_reg_rtx (Pmode
);
1150 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1152 tga_op2
= const0_rtx
;
1154 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1155 LCT_CONST
, Pmode
, 2, tga_op1
,
1156 Pmode
, tga_op2
, Pmode
);
1158 insns
= get_insns ();
1161 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1163 tmp
= gen_reg_rtx (Pmode
);
1164 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1166 if (!register_operand (op0
, Pmode
))
1167 op0
= gen_reg_rtx (Pmode
);
1170 emit_insn (gen_load_dtprel (op0
, op1
));
1171 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
1174 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
1177 case TLS_MODEL_INITIAL_EXEC
:
1178 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1179 addend_hi
= addend
- addend_lo
;
1181 op1
= plus_constant (op1
, addend_hi
);
1184 tmp
= gen_reg_rtx (Pmode
);
1185 emit_insn (gen_load_tprel (tmp
, op1
));
1187 if (!register_operand (op0
, Pmode
))
1188 op0
= gen_reg_rtx (Pmode
);
1189 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
1192 case TLS_MODEL_LOCAL_EXEC
:
1193 if (!register_operand (op0
, Pmode
))
1194 op0
= gen_reg_rtx (Pmode
);
1200 emit_insn (gen_load_tprel (op0
, op1
));
1201 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
1204 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
1212 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
1213 orig_op0
, 1, OPTAB_DIRECT
);
1214 if (orig_op0
== op0
)
1216 if (GET_MODE (orig_op0
) == Pmode
)
1218 return gen_lowpart (GET_MODE (orig_op0
), op0
);
1222 ia64_expand_move (rtx op0
, rtx op1
)
1224 enum machine_mode mode
= GET_MODE (op0
);
1226 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1227 op1
= force_reg (mode
, op1
);
1229 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1231 HOST_WIDE_INT addend
= 0;
1232 enum tls_model tls_kind
;
1235 if (GET_CODE (op1
) == CONST
1236 && GET_CODE (XEXP (op1
, 0)) == PLUS
1237 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1239 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1240 sym
= XEXP (XEXP (op1
, 0), 0);
1243 tls_kind
= tls_symbolic_operand_type (sym
);
1245 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1247 if (any_offset_symbol_operand (sym
, mode
))
1249 else if (aligned_offset_symbol_operand (sym
, mode
))
1251 HOST_WIDE_INT addend_lo
, addend_hi
;
1253 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1254 addend_hi
= addend
- addend_lo
;
1258 op1
= plus_constant (sym
, addend_hi
);
1267 if (reload_completed
)
1269 /* We really should have taken care of this offset earlier. */
1270 gcc_assert (addend
== 0);
1271 if (ia64_expand_load_address (op0
, op1
))
1277 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1279 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, op1
));
1281 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1282 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1291 /* Split a move from OP1 to OP0 conditional on COND. */
1294 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1296 rtx insn
, first
= get_last_insn ();
1298 emit_move_insn (op0
, op1
);
1300 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1302 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1306 /* Split a post-reload TImode or TFmode reference into two DImode
1307 components. This is made extra difficult by the fact that we do
1308 not get any scratch registers to work with, because reload cannot
1309 be prevented from giving us a scratch that overlaps the register
1310 pair involved. So instead, when addressing memory, we tweak the
1311 pointer register up and back down with POST_INCs. Or up and not
1312 back down when we can get away with it.
1314 REVERSED is true when the loads must be done in reversed order
1315 (high word first) for correctness. DEAD is true when the pointer
1316 dies with the second insn we generate and therefore the second
1317 address must not carry a postmodify.
1319 May return an insn which is to be emitted after the moves. */
1322 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1326 switch (GET_CODE (in
))
1329 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1330 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1335 /* Cannot occur reversed. */
1336 gcc_assert (!reversed
);
1338 if (GET_MODE (in
) != TFmode
)
1339 split_double (in
, &out
[0], &out
[1]);
1341 /* split_double does not understand how to split a TFmode
1342 quantity into a pair of DImode constants. */
1345 unsigned HOST_WIDE_INT p
[2];
1346 long l
[4]; /* TFmode is 128 bits */
1348 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
1349 real_to_target (l
, &r
, TFmode
);
1351 if (FLOAT_WORDS_BIG_ENDIAN
)
1353 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1354 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1358 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1359 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1361 out
[0] = GEN_INT (p
[0]);
1362 out
[1] = GEN_INT (p
[1]);
1368 rtx base
= XEXP (in
, 0);
1371 switch (GET_CODE (base
))
1376 out
[0] = adjust_automodify_address
1377 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1378 out
[1] = adjust_automodify_address
1379 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1383 /* Reversal requires a pre-increment, which can only
1384 be done as a separate insn. */
1385 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1386 out
[0] = adjust_automodify_address
1387 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1388 out
[1] = adjust_address (in
, DImode
, 0);
1393 gcc_assert (!reversed
&& !dead
);
1395 /* Just do the increment in two steps. */
1396 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1397 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1401 gcc_assert (!reversed
&& !dead
);
1403 /* Add 8, subtract 24. */
1404 base
= XEXP (base
, 0);
1405 out
[0] = adjust_automodify_address
1406 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1407 out
[1] = adjust_automodify_address
1409 gen_rtx_POST_MODIFY (Pmode
, base
, plus_constant (base
, -24)),
1414 gcc_assert (!reversed
&& !dead
);
1416 /* Extract and adjust the modification. This case is
1417 trickier than the others, because we might have an
1418 index register, or we might have a combined offset that
1419 doesn't fit a signed 9-bit displacement field. We can
1420 assume the incoming expression is already legitimate. */
1421 offset
= XEXP (base
, 1);
1422 base
= XEXP (base
, 0);
1424 out
[0] = adjust_automodify_address
1425 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1427 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1429 /* Can't adjust the postmodify to match. Emit the
1430 original, then a separate addition insn. */
1431 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1432 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1436 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1437 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1439 /* Again the postmodify cannot be made to match,
1440 but in this case it's more efficient to get rid
1441 of the postmodify entirely and fix up with an
1443 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1445 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1449 /* Combined offset still fits in the displacement field.
1450 (We cannot overflow it at the high end.) */
1451 out
[1] = adjust_automodify_address
1452 (in
, DImode
, gen_rtx_POST_MODIFY
1453 (Pmode
, base
, gen_rtx_PLUS
1455 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1474 /* Split a TImode or TFmode move instruction after reload.
1475 This is used by *movtf_internal and *movti_internal. */
1477 ia64_split_tmode_move (rtx operands
[])
1479 rtx in
[2], out
[2], insn
;
1482 bool reversed
= false;
1484 /* It is possible for reload to decide to overwrite a pointer with
1485 the value it points to. In that case we have to do the loads in
1486 the appropriate order so that the pointer is not destroyed too
1487 early. Also we must not generate a postmodify for that second
1488 load, or rws_access_regno will die. */
1489 if (GET_CODE (operands
[1]) == MEM
1490 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1492 rtx base
= XEXP (operands
[1], 0);
1493 while (GET_CODE (base
) != REG
)
1494 base
= XEXP (base
, 0);
1496 if (REGNO (base
) == REGNO (operands
[0]))
1500 /* Another reason to do the moves in reversed order is if the first
1501 element of the target register pair is also the second element of
1502 the source register pair. */
1503 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1504 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1507 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1508 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1510 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1511 if (GET_CODE (EXP) == MEM \
1512 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1513 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1514 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1515 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1517 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[0], in
[0]));
1518 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1519 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1521 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[1], in
[1]));
1522 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1523 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1526 emit_insn (fixup
[0]);
1528 emit_insn (fixup
[1]);
1530 #undef MAYBE_ADD_REG_INC_NOTE
1533 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1534 through memory plus an extra GR scratch register. Except that you can
1535 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1536 SECONDARY_RELOAD_CLASS, but not both.
1538 We got into problems in the first place by allowing a construct like
1539 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1540 This solution attempts to prevent this situation from occurring. When
1541 we see something like the above, we spill the inner register to memory. */
1544 spill_xfmode_rfmode_operand (rtx in
, int force
, enum machine_mode mode
)
1546 if (GET_CODE (in
) == SUBREG
1547 && GET_MODE (SUBREG_REG (in
)) == TImode
1548 && GET_CODE (SUBREG_REG (in
)) == REG
)
1550 rtx memt
= assign_stack_temp (TImode
, 16, 0);
1551 emit_move_insn (memt
, SUBREG_REG (in
));
1552 return adjust_address (memt
, mode
, 0);
1554 else if (force
&& GET_CODE (in
) == REG
)
1556 rtx memx
= assign_stack_temp (mode
, 16, 0);
1557 emit_move_insn (memx
, in
);
1564 /* Expand the movxf or movrf pattern (MODE says which) with the given
1565 OPERANDS, returning true if the pattern should then invoke
1569 ia64_expand_movxf_movrf (enum machine_mode mode
, rtx operands
[])
1571 rtx op0
= operands
[0];
1573 if (GET_CODE (op0
) == SUBREG
)
1574 op0
= SUBREG_REG (op0
);
1576 /* We must support XFmode loads into general registers for stdarg/vararg,
1577 unprototyped calls, and a rare case where a long double is passed as
1578 an argument after a float HFA fills the FP registers. We split them into
1579 DImode loads for convenience. We also need to support XFmode stores
1580 for the last case. This case does not happen for stdarg/vararg routines,
1581 because we do a block store to memory of unnamed arguments. */
1583 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1587 /* We're hoping to transform everything that deals with XFmode
1588 quantities and GR registers early in the compiler. */
1589 gcc_assert (can_create_pseudo_p ());
1591 /* Struct to register can just use TImode instead. */
1592 if ((GET_CODE (operands
[1]) == SUBREG
1593 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1594 || (GET_CODE (operands
[1]) == REG
1595 && GR_REGNO_P (REGNO (operands
[1]))))
1597 rtx op1
= operands
[1];
1599 if (GET_CODE (op1
) == SUBREG
)
1600 op1
= SUBREG_REG (op1
);
1602 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1604 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1608 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1610 /* Don't word-swap when reading in the constant. */
1611 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1612 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1614 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1615 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1620 /* If the quantity is in a register not known to be GR, spill it. */
1621 if (register_operand (operands
[1], mode
))
1622 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1624 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1626 /* Don't word-swap when reading in the value. */
1627 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1628 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1630 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1631 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1635 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1637 /* We're hoping to transform everything that deals with XFmode
1638 quantities and GR registers early in the compiler. */
1639 gcc_assert (can_create_pseudo_p ());
1641 /* Op0 can't be a GR_REG here, as that case is handled above.
1642 If op0 is a register, then we spill op1, so that we now have a
1643 MEM operand. This requires creating an XFmode subreg of a TImode reg
1644 to force the spill. */
1645 if (register_operand (operands
[0], mode
))
1647 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1648 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1649 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1656 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1658 /* Don't word-swap when writing out the value. */
1659 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1660 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1662 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1663 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1668 if (!reload_in_progress
&& !reload_completed
)
1670 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1672 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1674 rtx memt
, memx
, in
= operands
[1];
1675 if (CONSTANT_P (in
))
1676 in
= validize_mem (force_const_mem (mode
, in
));
1677 if (GET_CODE (in
) == MEM
)
1678 memt
= adjust_address (in
, TImode
, 0);
1681 memt
= assign_stack_temp (TImode
, 16, 0);
1682 memx
= adjust_address (memt
, mode
, 0);
1683 emit_move_insn (memx
, in
);
1685 emit_move_insn (op0
, memt
);
1689 if (!ia64_move_ok (operands
[0], operands
[1]))
1690 operands
[1] = force_reg (mode
, operands
[1]);
1696 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1697 with the expression that holds the compare result (in VOIDmode). */
1699 static GTY(()) rtx cmptf_libfunc
;
1702 ia64_expand_compare (rtx
*expr
, rtx
*op0
, rtx
*op1
)
1704 enum rtx_code code
= GET_CODE (*expr
);
1707 /* If we have a BImode input, then we already have a compare result, and
1708 do not need to emit another comparison. */
1709 if (GET_MODE (*op0
) == BImode
)
1711 gcc_assert ((code
== NE
|| code
== EQ
) && *op1
== const0_rtx
);
1714 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1715 magic number as its third argument, that indicates what to do.
1716 The return value is an integer to be compared against zero. */
1717 else if (TARGET_HPUX
&& GET_MODE (*op0
) == TFmode
)
1720 QCMP_INV
= 1, /* Raise FP_INVALID on SNaN as a side effect. */
1727 enum rtx_code ncode
;
1730 gcc_assert (cmptf_libfunc
&& GET_MODE (*op1
) == TFmode
);
1733 /* 1 = equal, 0 = not equal. Equality operators do
1734 not raise FP_INVALID when given an SNaN operand. */
1735 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1736 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1737 /* isunordered() from C99. */
1738 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1739 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1740 /* Relational operators raise FP_INVALID when given
1742 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1743 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1744 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1745 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1746 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1747 Expanders for buneq etc. weuld have to be added to ia64.md
1748 for this to be useful. */
1749 default: gcc_unreachable ();
1754 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1755 *op0
, TFmode
, *op1
, TFmode
,
1756 GEN_INT (magic
), DImode
);
1757 cmp
= gen_reg_rtx (BImode
);
1758 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1759 gen_rtx_fmt_ee (ncode
, BImode
,
1762 insns
= get_insns ();
1765 emit_libcall_block (insns
, cmp
, cmp
,
1766 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
));
1771 cmp
= gen_reg_rtx (BImode
);
1772 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1773 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
)));
1777 *expr
= gen_rtx_fmt_ee (code
, VOIDmode
, cmp
, const0_rtx
);
1782 /* Generate an integral vector comparison. Return true if the condition has
1783 been reversed, and so the sense of the comparison should be inverted. */
1786 ia64_expand_vecint_compare (enum rtx_code code
, enum machine_mode mode
,
1787 rtx dest
, rtx op0
, rtx op1
)
1789 bool negate
= false;
1792 /* Canonicalize the comparison to EQ, GT, GTU. */
1803 code
= reverse_condition (code
);
1809 code
= reverse_condition (code
);
1815 code
= swap_condition (code
);
1816 x
= op0
, op0
= op1
, op1
= x
;
1823 /* Unsigned parallel compare is not supported by the hardware. Play some
1824 tricks to turn this into a signed comparison against 0. */
1833 /* Subtract (-(INT MAX) - 1) from both operands to make
1835 mask
= GEN_INT (0x80000000);
1836 mask
= gen_rtx_CONST_VECTOR (V2SImode
, gen_rtvec (2, mask
, mask
));
1837 mask
= force_reg (mode
, mask
);
1838 t1
= gen_reg_rtx (mode
);
1839 emit_insn (gen_subv2si3 (t1
, op0
, mask
));
1840 t2
= gen_reg_rtx (mode
);
1841 emit_insn (gen_subv2si3 (t2
, op1
, mask
));
1850 /* Perform a parallel unsigned saturating subtraction. */
1851 x
= gen_reg_rtx (mode
);
1852 emit_insn (gen_rtx_SET (VOIDmode
, x
,
1853 gen_rtx_US_MINUS (mode
, op0
, op1
)));
1857 op1
= CONST0_RTX (mode
);
1866 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1867 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
1872 /* Emit an integral vector conditional move. */
1875 ia64_expand_vecint_cmov (rtx operands
[])
1877 enum machine_mode mode
= GET_MODE (operands
[0]);
1878 enum rtx_code code
= GET_CODE (operands
[3]);
1882 cmp
= gen_reg_rtx (mode
);
1883 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1884 operands
[4], operands
[5]);
1886 ot
= operands
[1+negate
];
1887 of
= operands
[2-negate
];
1889 if (ot
== CONST0_RTX (mode
))
1891 if (of
== CONST0_RTX (mode
))
1893 emit_move_insn (operands
[0], ot
);
1897 x
= gen_rtx_NOT (mode
, cmp
);
1898 x
= gen_rtx_AND (mode
, x
, of
);
1899 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1901 else if (of
== CONST0_RTX (mode
))
1903 x
= gen_rtx_AND (mode
, cmp
, ot
);
1904 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1910 t
= gen_reg_rtx (mode
);
1911 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
1912 emit_insn (gen_rtx_SET (VOIDmode
, t
, x
));
1914 f
= gen_reg_rtx (mode
);
1915 x
= gen_rtx_NOT (mode
, cmp
);
1916 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
1917 emit_insn (gen_rtx_SET (VOIDmode
, f
, x
));
1919 x
= gen_rtx_IOR (mode
, t
, f
);
1920 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1924 /* Emit an integral vector min or max operation. Return true if all done. */
1927 ia64_expand_vecint_minmax (enum rtx_code code
, enum machine_mode mode
,
1932 /* These four combinations are supported directly. */
1933 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
1935 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
1938 /* This combination can be implemented with only saturating subtraction. */
1939 if (mode
== V4HImode
&& code
== UMAX
)
1941 rtx x
, tmp
= gen_reg_rtx (mode
);
1943 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
1944 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, x
));
1946 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
1950 /* Everything else implemented via vector comparisons. */
1951 xops
[0] = operands
[0];
1952 xops
[4] = xops
[1] = operands
[1];
1953 xops
[5] = xops
[2] = operands
[2];
1972 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
1974 ia64_expand_vecint_cmov (xops
);
1978 /* The vectors LO and HI each contain N halves of a double-wide vector.
1979 Reassemble either the first N/2 or the second N/2 elements. */
1982 ia64_unpack_assemble (rtx out
, rtx lo
, rtx hi
, bool highp
)
1984 enum machine_mode mode
= GET_MODE (lo
);
1985 rtx (*gen
) (rtx
, rtx
, rtx
);
1991 gen
= highp
? gen_vec_interleave_highv8qi
: gen_vec_interleave_lowv8qi
;
1994 gen
= highp
? gen_vec_interleave_highv4hi
: gen_vec_interleave_lowv4hi
;
2000 x
= gen_lowpart (mode
, out
);
2001 if (TARGET_BIG_ENDIAN
)
2002 x
= gen (x
, hi
, lo
);
2004 x
= gen (x
, lo
, hi
);
2008 /* Return a vector of the sign-extension of VEC. */
2011 ia64_unpack_sign (rtx vec
, bool unsignedp
)
2013 enum machine_mode mode
= GET_MODE (vec
);
2014 rtx zero
= CONST0_RTX (mode
);
2020 rtx sign
= gen_reg_rtx (mode
);
2023 neg
= ia64_expand_vecint_compare (LT
, mode
, sign
, vec
, zero
);
2030 /* Emit an integral vector unpack operation. */
2033 ia64_expand_unpack (rtx operands
[3], bool unsignedp
, bool highp
)
2035 rtx sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2036 ia64_unpack_assemble (operands
[0], operands
[1], sign
, highp
);
2039 /* Emit an integral vector widening sum operations. */
2042 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
2044 enum machine_mode wmode
;
2047 sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2049 wmode
= GET_MODE (operands
[0]);
2050 l
= gen_reg_rtx (wmode
);
2051 h
= gen_reg_rtx (wmode
);
2053 ia64_unpack_assemble (l
, operands
[1], sign
, false);
2054 ia64_unpack_assemble (h
, operands
[1], sign
, true);
2056 t
= expand_binop (wmode
, add_optab
, l
, operands
[2], NULL
, 0, OPTAB_DIRECT
);
2057 t
= expand_binop (wmode
, add_optab
, h
, t
, operands
[0], 0, OPTAB_DIRECT
);
2058 if (t
!= operands
[0])
2059 emit_move_insn (operands
[0], t
);
2062 /* Emit a signed or unsigned V8QI dot product operation. */
2065 ia64_expand_dot_prod_v8qi (rtx operands
[4], bool unsignedp
)
2067 rtx op1
, op2
, sn1
, sn2
, l1
, l2
, h1
, h2
;
2068 rtx p1
, p2
, p3
, p4
, s1
, s2
, s3
;
2072 sn1
= ia64_unpack_sign (op1
, unsignedp
);
2073 sn2
= ia64_unpack_sign (op2
, unsignedp
);
2075 l1
= gen_reg_rtx (V4HImode
);
2076 l2
= gen_reg_rtx (V4HImode
);
2077 h1
= gen_reg_rtx (V4HImode
);
2078 h2
= gen_reg_rtx (V4HImode
);
2079 ia64_unpack_assemble (l1
, op1
, sn1
, false);
2080 ia64_unpack_assemble (l2
, op2
, sn2
, false);
2081 ia64_unpack_assemble (h1
, op1
, sn1
, true);
2082 ia64_unpack_assemble (h2
, op2
, sn2
, true);
2084 p1
= gen_reg_rtx (V2SImode
);
2085 p2
= gen_reg_rtx (V2SImode
);
2086 p3
= gen_reg_rtx (V2SImode
);
2087 p4
= gen_reg_rtx (V2SImode
);
2088 emit_insn (gen_pmpy2_even (p1
, l1
, l2
));
2089 emit_insn (gen_pmpy2_even (p2
, h1
, h2
));
2090 emit_insn (gen_pmpy2_odd (p3
, l1
, l2
));
2091 emit_insn (gen_pmpy2_odd (p4
, h1
, h2
));
2093 s1
= gen_reg_rtx (V2SImode
);
2094 s2
= gen_reg_rtx (V2SImode
);
2095 s3
= gen_reg_rtx (V2SImode
);
2096 emit_insn (gen_addv2si3 (s1
, p1
, p2
));
2097 emit_insn (gen_addv2si3 (s2
, p3
, p4
));
2098 emit_insn (gen_addv2si3 (s3
, s1
, operands
[3]));
2099 emit_insn (gen_addv2si3 (operands
[0], s2
, s3
));
2102 /* Emit the appropriate sequence for a call. */
2105 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
2110 addr
= XEXP (addr
, 0);
2111 addr
= convert_memory_address (DImode
, addr
);
2112 b0
= gen_rtx_REG (DImode
, R_BR (0));
2114 /* ??? Should do this for functions known to bind local too. */
2115 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2118 insn
= gen_sibcall_nogp (addr
);
2120 insn
= gen_call_nogp (addr
, b0
);
2122 insn
= gen_call_value_nogp (retval
, addr
, b0
);
2123 insn
= emit_call_insn (insn
);
2128 insn
= gen_sibcall_gp (addr
);
2130 insn
= gen_call_gp (addr
, b0
);
2132 insn
= gen_call_value_gp (retval
, addr
, b0
);
2133 insn
= emit_call_insn (insn
);
2135 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
2139 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
2141 if (TARGET_ABI_OPEN_VMS
)
2142 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
2143 gen_rtx_REG (DImode
, GR_REG (25)));
2147 reg_emitted (enum ia64_frame_regs r
)
2149 if (emitted_frame_related_regs
[r
] == 0)
2150 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
2152 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
2156 get_reg (enum ia64_frame_regs r
)
2159 return current_frame_info
.r
[r
];
2163 is_emitted (int regno
)
2167 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
2168 if (emitted_frame_related_regs
[r
] == regno
)
2174 ia64_reload_gp (void)
2178 if (current_frame_info
.r
[reg_save_gp
])
2180 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
2184 HOST_WIDE_INT offset
;
2187 offset
= (current_frame_info
.spill_cfa_off
2188 + current_frame_info
.spill_size
);
2189 if (frame_pointer_needed
)
2191 tmp
= hard_frame_pointer_rtx
;
2196 tmp
= stack_pointer_rtx
;
2197 offset
= current_frame_info
.total_size
- offset
;
2200 offset_r
= GEN_INT (offset
);
2201 if (satisfies_constraint_I (offset_r
))
2202 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
2205 emit_move_insn (pic_offset_table_rtx
, offset_r
);
2206 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
2207 pic_offset_table_rtx
, tmp
));
2210 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
2213 emit_move_insn (pic_offset_table_rtx
, tmp
);
2217 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
2218 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
2221 bool is_desc
= false;
2223 /* If we find we're calling through a register, then we're actually
2224 calling through a descriptor, so load up the values. */
2225 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
2230 /* ??? We are currently constrained to *not* use peep2, because
2231 we can legitimately change the global lifetime of the GP
2232 (in the form of killing where previously live). This is
2233 because a call through a descriptor doesn't use the previous
2234 value of the GP, while a direct call does, and we do not
2235 commit to either form until the split here.
2237 That said, this means that we lack precise life info for
2238 whether ADDR is dead after this call. This is not terribly
2239 important, since we can fix things up essentially for free
2240 with the POST_DEC below, but it's nice to not use it when we
2241 can immediately tell it's not necessary. */
2242 addr_dead_p
= ((noreturn_p
|| sibcall_p
2243 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2245 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2247 /* Load the code address into scratch_b. */
2248 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2249 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2250 emit_move_insn (scratch_r
, tmp
);
2251 emit_move_insn (scratch_b
, scratch_r
);
2253 /* Load the GP address. If ADDR is not dead here, then we must
2254 revert the change made above via the POST_INCREMENT. */
2256 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2259 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2260 emit_move_insn (pic_offset_table_rtx
, tmp
);
2267 insn
= gen_sibcall_nogp (addr
);
2269 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2271 insn
= gen_call_nogp (addr
, retaddr
);
2272 emit_call_insn (insn
);
2274 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2278 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2280 This differs from the generic code in that we know about the zero-extending
2281 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2282 also know that ld.acq+cmpxchg.rel equals a full barrier.
2284 The loop we want to generate looks like
2289 new_reg = cmp_reg op val;
2290 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2291 if (cmp_reg != old_reg)
2294 Note that we only do the plain load from memory once. Subsequent
2295 iterations use the value loaded by the compare-and-swap pattern. */
2298 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2299 rtx old_dst
, rtx new_dst
)
2301 enum machine_mode mode
= GET_MODE (mem
);
2302 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2303 enum insn_code icode
;
2305 /* Special case for using fetchadd. */
2306 if ((mode
== SImode
|| mode
== DImode
)
2307 && (code
== PLUS
|| code
== MINUS
)
2308 && fetchadd_operand (val
, mode
))
2311 val
= GEN_INT (-INTVAL (val
));
2314 old_dst
= gen_reg_rtx (mode
);
2316 emit_insn (gen_memory_barrier ());
2319 icode
= CODE_FOR_fetchadd_acq_si
;
2321 icode
= CODE_FOR_fetchadd_acq_di
;
2322 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2326 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2328 if (new_reg
!= new_dst
)
2329 emit_move_insn (new_dst
, new_reg
);
2334 /* Because of the volatile mem read, we get an ld.acq, which is the
2335 front half of the full barrier. The end half is the cmpxchg.rel. */
2336 gcc_assert (MEM_VOLATILE_P (mem
));
2338 old_reg
= gen_reg_rtx (DImode
);
2339 cmp_reg
= gen_reg_rtx (DImode
);
2340 label
= gen_label_rtx ();
2344 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2345 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2348 emit_move_insn (cmp_reg
, mem
);
2352 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2353 emit_move_insn (old_reg
, cmp_reg
);
2354 emit_move_insn (ar_ccv
, cmp_reg
);
2357 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2362 new_reg
= expand_simple_binop (DImode
, AND
, new_reg
, val
, NULL_RTX
,
2363 true, OPTAB_DIRECT
);
2364 new_reg
= expand_simple_unop (DImode
, code
, new_reg
, NULL_RTX
, true);
2367 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2368 true, OPTAB_DIRECT
);
2371 new_reg
= gen_lowpart (mode
, new_reg
);
2373 emit_move_insn (new_dst
, new_reg
);
2377 case QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2378 case HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2379 case SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2380 case DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2385 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2387 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2390 /* Begin the assembly file. */
2393 ia64_file_start (void)
2395 /* Variable tracking should be run after all optimizations which change order
2396 of insns. It also needs a valid CFG. This can't be done in
2397 ia64_option_override, because flag_var_tracking is finalized after
2399 ia64_flag_var_tracking
= flag_var_tracking
;
2400 flag_var_tracking
= 0;
2402 default_file_start ();
2403 emit_safe_across_calls ();
2407 emit_safe_across_calls (void)
2409 unsigned int rs
, re
;
2416 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
2420 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
2424 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2428 fputc (',', asm_out_file
);
2430 fprintf (asm_out_file
, "p%u", rs
);
2432 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2436 fputc ('\n', asm_out_file
);
2439 /* Globalize a declaration. */
2442 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2444 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2445 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2448 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2449 const char *p
= TREE_STRING_POINTER (v
);
2450 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2452 targetm
.asm_out
.globalize_label (stream
, name
);
2453 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2454 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2457 /* Helper function for ia64_compute_frame_size: find an appropriate general
2458 register to spill some special register to. SPECIAL_SPILL_MASK contains
2459 bits in GR0 to GR31 that have already been allocated by this routine.
2460 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2463 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2467 if (emitted_frame_related_regs
[r
] != 0)
2469 regno
= emitted_frame_related_regs
[r
];
2470 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2471 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2472 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2473 else if (current_function_is_leaf
2474 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2475 current_frame_info
.gr_used_mask
|= 1 << regno
;
2480 /* If this is a leaf function, first try an otherwise unused
2481 call-clobbered register. */
2482 if (current_function_is_leaf
)
2484 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2485 if (! df_regs_ever_live_p (regno
)
2486 && call_used_regs
[regno
]
2487 && ! fixed_regs
[regno
]
2488 && ! global_regs
[regno
]
2489 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2490 && ! is_emitted (regno
))
2492 current_frame_info
.gr_used_mask
|= 1 << regno
;
2499 regno
= current_frame_info
.n_local_regs
;
2500 /* If there is a frame pointer, then we can't use loc79, because
2501 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2502 reg_name switching code in ia64_expand_prologue. */
2503 while (regno
< (80 - frame_pointer_needed
))
2504 if (! is_emitted (LOC_REG (regno
++)))
2506 current_frame_info
.n_local_regs
= regno
;
2507 return LOC_REG (regno
- 1);
2511 /* Failed to find a general register to spill to. Must use stack. */
2515 /* In order to make for nice schedules, we try to allocate every temporary
2516 to a different register. We must of course stay away from call-saved,
2517 fixed, and global registers. We must also stay away from registers
2518 allocated in current_frame_info.gr_used_mask, since those include regs
2519 used all through the prologue.
2521 Any register allocated here must be used immediately. The idea is to
2522 aid scheduling, not to solve data flow problems. */
2524 static int last_scratch_gr_reg
;
2527 next_scratch_gr_reg (void)
2531 for (i
= 0; i
< 32; ++i
)
2533 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2534 if (call_used_regs
[regno
]
2535 && ! fixed_regs
[regno
]
2536 && ! global_regs
[regno
]
2537 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2539 last_scratch_gr_reg
= regno
;
2544 /* There must be _something_ available. */
2548 /* Helper function for ia64_compute_frame_size, called through
2549 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2552 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2554 unsigned int regno
= REGNO (reg
);
2557 unsigned int i
, n
= hard_regno_nregs
[regno
][GET_MODE (reg
)];
2558 for (i
= 0; i
< n
; ++i
)
2559 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2564 /* Returns the number of bytes offset between the frame pointer and the stack
2565 pointer for the current function. SIZE is the number of bytes of space
2566 needed for local variables. */
2569 ia64_compute_frame_size (HOST_WIDE_INT size
)
2571 HOST_WIDE_INT total_size
;
2572 HOST_WIDE_INT spill_size
= 0;
2573 HOST_WIDE_INT extra_spill_size
= 0;
2574 HOST_WIDE_INT pretend_args_size
;
2577 int spilled_gr_p
= 0;
2578 int spilled_fr_p
= 0;
2584 if (current_frame_info
.initialized
)
2587 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2588 CLEAR_HARD_REG_SET (mask
);
2590 /* Don't allocate scratches to the return register. */
2591 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2593 /* Don't allocate scratches to the EH scratch registers. */
2594 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2595 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2596 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2597 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2599 /* Find the size of the register stack frame. We have only 80 local
2600 registers, because we reserve 8 for the inputs and 8 for the
2603 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2604 since we'll be adjusting that down later. */
2605 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2606 for (; regno
>= LOC_REG (0); regno
--)
2607 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2609 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2611 /* For functions marked with the syscall_linkage attribute, we must mark
2612 all eight input registers as in use, so that locals aren't visible to
2615 if (cfun
->machine
->n_varargs
> 0
2616 || lookup_attribute ("syscall_linkage",
2617 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2618 current_frame_info
.n_input_regs
= 8;
2621 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2622 if (df_regs_ever_live_p (regno
))
2624 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2627 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2628 if (df_regs_ever_live_p (regno
))
2630 i
= regno
- OUT_REG (0) + 1;
2632 #ifndef PROFILE_HOOK
2633 /* When -p profiling, we need one output register for the mcount argument.
2634 Likewise for -a profiling for the bb_init_func argument. For -ax
2635 profiling, we need two output registers for the two bb_init_trace_func
2640 current_frame_info
.n_output_regs
= i
;
2642 /* ??? No rotating register support yet. */
2643 current_frame_info
.n_rotate_regs
= 0;
2645 /* Discover which registers need spilling, and how much room that
2646 will take. Begin with floating point and general registers,
2647 which will always wind up on the stack. */
2649 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2650 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2652 SET_HARD_REG_BIT (mask
, regno
);
2658 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2659 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2661 SET_HARD_REG_BIT (mask
, regno
);
2667 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2668 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2670 SET_HARD_REG_BIT (mask
, regno
);
2675 /* Now come all special registers that might get saved in other
2676 general registers. */
2678 if (frame_pointer_needed
)
2680 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2681 /* If we did not get a register, then we take LOC79. This is guaranteed
2682 to be free, even if regs_ever_live is already set, because this is
2683 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2684 as we don't count loc79 above. */
2685 if (current_frame_info
.r
[reg_fp
] == 0)
2687 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2688 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2692 if (! current_function_is_leaf
)
2694 /* Emit a save of BR0 if we call other functions. Do this even
2695 if this function doesn't return, as EH depends on this to be
2696 able to unwind the stack. */
2697 SET_HARD_REG_BIT (mask
, BR_REG (0));
2699 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2700 if (current_frame_info
.r
[reg_save_b0
] == 0)
2702 extra_spill_size
+= 8;
2706 /* Similarly for ar.pfs. */
2707 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2708 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2709 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2711 extra_spill_size
+= 8;
2715 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2716 registers are clobbered, so we fall back to the stack. */
2717 current_frame_info
.r
[reg_save_gp
]
2718 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2719 if (current_frame_info
.r
[reg_save_gp
] == 0)
2721 SET_HARD_REG_BIT (mask
, GR_REG (1));
2728 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs
[BR_REG (0)])
2730 SET_HARD_REG_BIT (mask
, BR_REG (0));
2731 extra_spill_size
+= 8;
2735 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2737 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2738 current_frame_info
.r
[reg_save_ar_pfs
]
2739 = find_gr_spill (reg_save_ar_pfs
, 1);
2740 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2742 extra_spill_size
+= 8;
2748 /* Unwind descriptor hackery: things are most efficient if we allocate
2749 consecutive GR save registers for RP, PFS, FP in that order. However,
2750 it is absolutely critical that FP get the only hard register that's
2751 guaranteed to be free, so we allocated it first. If all three did
2752 happen to be allocated hard regs, and are consecutive, rearrange them
2753 into the preferred order now.
2755 If we have already emitted code for any of those registers,
2756 then it's already too late to change. */
2757 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2758 MIN (current_frame_info
.r
[reg_save_b0
],
2759 current_frame_info
.r
[reg_save_ar_pfs
]));
2760 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2761 MAX (current_frame_info
.r
[reg_save_b0
],
2762 current_frame_info
.r
[reg_save_ar_pfs
]));
2764 && min_regno
+ 2 == max_regno
2765 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2766 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2767 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2768 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2769 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2770 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2771 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2772 && (emitted_frame_related_regs
[reg_fp
] == 0
2773 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2775 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2776 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2777 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2780 /* See if we need to store the predicate register block. */
2781 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2782 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2784 if (regno
<= PR_REG (63))
2786 SET_HARD_REG_BIT (mask
, PR_REG (0));
2787 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2788 if (current_frame_info
.r
[reg_save_pr
] == 0)
2790 extra_spill_size
+= 8;
2794 /* ??? Mark them all as used so that register renaming and such
2795 are free to use them. */
2796 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2797 df_set_regs_ever_live (regno
, true);
2800 /* If we're forced to use st8.spill, we're forced to save and restore
2801 ar.unat as well. The check for existing liveness allows inline asm
2802 to touch ar.unat. */
2803 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2804 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2806 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2807 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2808 current_frame_info
.r
[reg_save_ar_unat
]
2809 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2810 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2812 extra_spill_size
+= 8;
2817 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2819 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2820 current_frame_info
.r
[reg_save_ar_lc
]
2821 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2822 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2824 extra_spill_size
+= 8;
2829 /* If we have an odd number of words of pretend arguments written to
2830 the stack, then the FR save area will be unaligned. We round the
2831 size of this area up to keep things 16 byte aligned. */
2833 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2835 pretend_args_size
= crtl
->args
.pretend_args_size
;
2837 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2838 + crtl
->outgoing_args_size
);
2839 total_size
= IA64_STACK_ALIGN (total_size
);
2841 /* We always use the 16-byte scratch area provided by the caller, but
2842 if we are a leaf function, there's no one to which we need to provide
2844 if (current_function_is_leaf
)
2845 total_size
= MAX (0, total_size
- 16);
2847 current_frame_info
.total_size
= total_size
;
2848 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2849 current_frame_info
.spill_size
= spill_size
;
2850 current_frame_info
.extra_spill_size
= extra_spill_size
;
2851 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2852 current_frame_info
.n_spilled
= n_spilled
;
2853 current_frame_info
.initialized
= reload_completed
;
2856 /* Worker function for TARGET_CAN_ELIMINATE. */
2859 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED
, const int to
)
2861 return (to
== BR_REG (0) ? current_function_is_leaf
: true);
2864 /* Compute the initial difference between the specified pair of registers. */
2867 ia64_initial_elimination_offset (int from
, int to
)
2869 HOST_WIDE_INT offset
;
2871 ia64_compute_frame_size (get_frame_size ());
2874 case FRAME_POINTER_REGNUM
:
2877 case HARD_FRAME_POINTER_REGNUM
:
2878 if (current_function_is_leaf
)
2879 offset
= -current_frame_info
.total_size
;
2881 offset
= -(current_frame_info
.total_size
2882 - crtl
->outgoing_args_size
- 16);
2885 case STACK_POINTER_REGNUM
:
2886 if (current_function_is_leaf
)
2889 offset
= 16 + crtl
->outgoing_args_size
;
2897 case ARG_POINTER_REGNUM
:
2898 /* Arguments start above the 16 byte save area, unless stdarg
2899 in which case we store through the 16 byte save area. */
2902 case HARD_FRAME_POINTER_REGNUM
:
2903 offset
= 16 - crtl
->args
.pretend_args_size
;
2906 case STACK_POINTER_REGNUM
:
2907 offset
= (current_frame_info
.total_size
2908 + 16 - crtl
->args
.pretend_args_size
);
2923 /* If there are more than a trivial number of register spills, we use
2924 two interleaved iterators so that we can get two memory references
2927 In order to simplify things in the prologue and epilogue expanders,
2928 we use helper functions to fix up the memory references after the
2929 fact with the appropriate offsets to a POST_MODIFY memory mode.
2930 The following data structure tracks the state of the two iterators
2931 while insns are being emitted. */
2933 struct spill_fill_data
2935 rtx init_after
; /* point at which to emit initializations */
2936 rtx init_reg
[2]; /* initial base register */
2937 rtx iter_reg
[2]; /* the iterator registers */
2938 rtx
*prev_addr
[2]; /* address of last memory use */
2939 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
2940 HOST_WIDE_INT prev_off
[2]; /* last offset */
2941 int n_iter
; /* number of iterators in use */
2942 int next_iter
; /* next iterator to use */
2943 unsigned int save_gr_used_mask
;
2946 static struct spill_fill_data spill_fill_data
;
2949 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
2953 spill_fill_data
.init_after
= get_last_insn ();
2954 spill_fill_data
.init_reg
[0] = init_reg
;
2955 spill_fill_data
.init_reg
[1] = init_reg
;
2956 spill_fill_data
.prev_addr
[0] = NULL
;
2957 spill_fill_data
.prev_addr
[1] = NULL
;
2958 spill_fill_data
.prev_insn
[0] = NULL
;
2959 spill_fill_data
.prev_insn
[1] = NULL
;
2960 spill_fill_data
.prev_off
[0] = cfa_off
;
2961 spill_fill_data
.prev_off
[1] = cfa_off
;
2962 spill_fill_data
.next_iter
= 0;
2963 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
2965 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
2966 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
2968 int regno
= next_scratch_gr_reg ();
2969 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
2970 current_frame_info
.gr_used_mask
|= 1 << regno
;
2975 finish_spill_pointers (void)
2977 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
2981 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
2983 int iter
= spill_fill_data
.next_iter
;
2984 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
2985 rtx disp_rtx
= GEN_INT (disp
);
2988 if (spill_fill_data
.prev_addr
[iter
])
2990 if (satisfies_constraint_N (disp_rtx
))
2992 *spill_fill_data
.prev_addr
[iter
]
2993 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
2994 gen_rtx_PLUS (DImode
,
2995 spill_fill_data
.iter_reg
[iter
],
2997 add_reg_note (spill_fill_data
.prev_insn
[iter
],
2998 REG_INC
, spill_fill_data
.iter_reg
[iter
]);
3002 /* ??? Could use register post_modify for loads. */
3003 if (!satisfies_constraint_I (disp_rtx
))
3005 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3006 emit_move_insn (tmp
, disp_rtx
);
3009 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3010 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
3013 /* Micro-optimization: if we've created a frame pointer, it's at
3014 CFA 0, which may allow the real iterator to be initialized lower,
3015 slightly increasing parallelism. Also, if there are few saves
3016 it may eliminate the iterator entirely. */
3018 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
3019 && frame_pointer_needed
)
3021 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
3022 set_mem_alias_set (mem
, get_varargs_alias_set ());
3030 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
3031 spill_fill_data
.init_reg
[iter
]);
3036 if (!satisfies_constraint_I (disp_rtx
))
3038 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3039 emit_move_insn (tmp
, disp_rtx
);
3043 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3044 spill_fill_data
.init_reg
[iter
],
3051 /* Careful for being the first insn in a sequence. */
3052 if (spill_fill_data
.init_after
)
3053 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
3056 rtx first
= get_insns ();
3058 insn
= emit_insn_before (seq
, first
);
3060 insn
= emit_insn (seq
);
3062 spill_fill_data
.init_after
= insn
;
3065 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
3067 /* ??? Not all of the spills are for varargs, but some of them are.
3068 The rest of the spills belong in an alias set of their own. But
3069 it doesn't actually hurt to include them here. */
3070 set_mem_alias_set (mem
, get_varargs_alias_set ());
3072 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
3073 spill_fill_data
.prev_off
[iter
] = cfa_off
;
3075 if (++iter
>= spill_fill_data
.n_iter
)
3077 spill_fill_data
.next_iter
= iter
;
3083 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
3086 int iter
= spill_fill_data
.next_iter
;
3089 mem
= spill_restore_mem (reg
, cfa_off
);
3090 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
3091 spill_fill_data
.prev_insn
[iter
] = insn
;
3098 RTX_FRAME_RELATED_P (insn
) = 1;
3100 /* Don't even pretend that the unwind code can intuit its way
3101 through a pair of interleaved post_modify iterators. Just
3102 provide the correct answer. */
3104 if (frame_pointer_needed
)
3106 base
= hard_frame_pointer_rtx
;
3111 base
= stack_pointer_rtx
;
3112 off
= current_frame_info
.total_size
- cfa_off
;
3115 add_reg_note (insn
, REG_CFA_OFFSET
,
3116 gen_rtx_SET (VOIDmode
,
3117 gen_rtx_MEM (GET_MODE (reg
),
3118 plus_constant (base
, off
)),
3124 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
3126 int iter
= spill_fill_data
.next_iter
;
3129 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
3130 GEN_INT (cfa_off
)));
3131 spill_fill_data
.prev_insn
[iter
] = insn
;
3134 /* Wrapper functions that discards the CONST_INT spill offset. These
3135 exist so that we can give gr_spill/gr_fill the offset they need and
3136 use a consistent function interface. */
3139 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3141 return gen_movdi (dest
, src
);
3145 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3147 return gen_fr_spill (dest
, src
);
3151 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3153 return gen_fr_restore (dest
, src
);
3156 /* Called after register allocation to add any instructions needed for the
3157 prologue. Using a prologue insn is favored compared to putting all of the
3158 instructions in output_function_prologue(), since it allows the scheduler
3159 to intermix instructions with the saves of the caller saved registers. In
3160 some cases, it might be necessary to emit a barrier instruction as the last
3161 insn to prevent such scheduling.
3163 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3164 so that the debug info generation code can handle them properly.
3166 The register save area is layed out like so:
3168 [ varargs spill area ]
3169 [ fr register spill area ]
3170 [ br register spill area ]
3171 [ ar register spill area ]
3172 [ pr register spill area ]
3173 [ gr register spill area ] */
3175 /* ??? Get inefficient code when the frame size is larger than can fit in an
3176 adds instruction. */
3179 ia64_expand_prologue (void)
3181 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
3182 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
3185 ia64_compute_frame_size (get_frame_size ());
3186 last_scratch_gr_reg
= 15;
3188 if (flag_stack_usage
)
3189 current_function_static_stack_size
= current_frame_info
.total_size
;
3193 fprintf (dump_file
, "ia64 frame related registers "
3194 "recorded in current_frame_info.r[]:\n");
3195 #define PRINTREG(a) if (current_frame_info.r[a]) \
3196 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3198 PRINTREG(reg_save_b0
);
3199 PRINTREG(reg_save_pr
);
3200 PRINTREG(reg_save_ar_pfs
);
3201 PRINTREG(reg_save_ar_unat
);
3202 PRINTREG(reg_save_ar_lc
);
3203 PRINTREG(reg_save_gp
);
3207 /* If there is no epilogue, then we don't need some prologue insns.
3208 We need to avoid emitting the dead prologue insns, because flow
3209 will complain about them. */
3215 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
3216 if ((e
->flags
& EDGE_FAKE
) == 0
3217 && (e
->flags
& EDGE_FALLTHRU
) != 0)
3219 epilogue_p
= (e
!= NULL
);
3224 /* Set the local, input, and output register names. We need to do this
3225 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3226 half. If we use in/loc/out register names, then we get assembler errors
3227 in crtn.S because there is no alloc insn or regstk directive in there. */
3228 if (! TARGET_REG_NAMES
)
3230 int inputs
= current_frame_info
.n_input_regs
;
3231 int locals
= current_frame_info
.n_local_regs
;
3232 int outputs
= current_frame_info
.n_output_regs
;
3234 for (i
= 0; i
< inputs
; i
++)
3235 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
3236 for (i
= 0; i
< locals
; i
++)
3237 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
3238 for (i
= 0; i
< outputs
; i
++)
3239 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
3242 /* Set the frame pointer register name. The regnum is logically loc79,
3243 but of course we'll not have allocated that many locals. Rather than
3244 worrying about renumbering the existing rtxs, we adjust the name. */
3245 /* ??? This code means that we can never use one local register when
3246 there is a frame pointer. loc79 gets wasted in this case, as it is
3247 renamed to a register that will never be used. See also the try_locals
3248 code in find_gr_spill. */
3249 if (current_frame_info
.r
[reg_fp
])
3251 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3252 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3253 = reg_names
[current_frame_info
.r
[reg_fp
]];
3254 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3257 /* We don't need an alloc instruction if we've used no outputs or locals. */
3258 if (current_frame_info
.n_local_regs
== 0
3259 && current_frame_info
.n_output_regs
== 0
3260 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3261 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3263 /* If there is no alloc, but there are input registers used, then we
3264 need a .regstk directive. */
3265 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3266 ar_pfs_save_reg
= NULL_RTX
;
3270 current_frame_info
.need_regstk
= 0;
3272 if (current_frame_info
.r
[reg_save_ar_pfs
])
3274 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3275 reg_emitted (reg_save_ar_pfs
);
3278 regno
= next_scratch_gr_reg ();
3279 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3281 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3282 GEN_INT (current_frame_info
.n_input_regs
),
3283 GEN_INT (current_frame_info
.n_local_regs
),
3284 GEN_INT (current_frame_info
.n_output_regs
),
3285 GEN_INT (current_frame_info
.n_rotate_regs
)));
3286 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.r
[reg_save_ar_pfs
] != 0);
3289 /* Set up frame pointer, stack pointer, and spill iterators. */
3291 n_varargs
= cfun
->machine
->n_varargs
;
3292 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3293 stack_pointer_rtx
, 0);
3295 if (frame_pointer_needed
)
3297 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3298 RTX_FRAME_RELATED_P (insn
) = 1;
3300 /* Force the unwind info to recognize this as defining a new CFA,
3301 rather than some temp register setup. */
3302 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL_RTX
);
3305 if (current_frame_info
.total_size
!= 0)
3307 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3310 if (satisfies_constraint_I (frame_size_rtx
))
3311 offset
= frame_size_rtx
;
3314 regno
= next_scratch_gr_reg ();
3315 offset
= gen_rtx_REG (DImode
, regno
);
3316 emit_move_insn (offset
, frame_size_rtx
);
3319 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3320 stack_pointer_rtx
, offset
));
3322 if (! frame_pointer_needed
)
3324 RTX_FRAME_RELATED_P (insn
) = 1;
3325 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
3326 gen_rtx_SET (VOIDmode
,
3328 gen_rtx_PLUS (DImode
,
3333 /* ??? At this point we must generate a magic insn that appears to
3334 modify the stack pointer, the frame pointer, and all spill
3335 iterators. This would allow the most scheduling freedom. For
3336 now, just hard stop. */
3337 emit_insn (gen_blockage ());
3340 /* Must copy out ar.unat before doing any integer spills. */
3341 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3343 if (current_frame_info
.r
[reg_save_ar_unat
])
3346 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3347 reg_emitted (reg_save_ar_unat
);
3351 alt_regno
= next_scratch_gr_reg ();
3352 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3353 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3356 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3357 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3358 if (current_frame_info
.r
[reg_save_ar_unat
])
3360 RTX_FRAME_RELATED_P (insn
) = 1;
3361 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3364 /* Even if we're not going to generate an epilogue, we still
3365 need to save the register so that EH works. */
3366 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3367 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3370 ar_unat_save_reg
= NULL_RTX
;
3372 /* Spill all varargs registers. Do this before spilling any GR registers,
3373 since we want the UNAT bits for the GR registers to override the UNAT
3374 bits from varargs, which we don't care about. */
3377 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3379 reg
= gen_rtx_REG (DImode
, regno
);
3380 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3383 /* Locate the bottom of the register save area. */
3384 cfa_off
= (current_frame_info
.spill_cfa_off
3385 + current_frame_info
.spill_size
3386 + current_frame_info
.extra_spill_size
);
3388 /* Save the predicate register block either in a register or in memory. */
3389 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3391 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3392 if (current_frame_info
.r
[reg_save_pr
] != 0)
3394 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3395 reg_emitted (reg_save_pr
);
3396 insn
= emit_move_insn (alt_reg
, reg
);
3398 /* ??? Denote pr spill/fill by a DImode move that modifies all
3399 64 hard registers. */
3400 RTX_FRAME_RELATED_P (insn
) = 1;
3401 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3403 /* Even if we're not going to generate an epilogue, we still
3404 need to save the register so that EH works. */
3406 emit_insn (gen_prologue_use (alt_reg
));
3410 alt_regno
= next_scratch_gr_reg ();
3411 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3412 insn
= emit_move_insn (alt_reg
, reg
);
3413 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3418 /* Handle AR regs in numerical order. All of them get special handling. */
3419 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3420 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3422 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3423 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3427 /* The alloc insn already copied ar.pfs into a general register. The
3428 only thing we have to do now is copy that register to a stack slot
3429 if we'd not allocated a local register for the job. */
3430 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3431 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3433 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3434 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3438 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3440 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3441 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3443 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3444 reg_emitted (reg_save_ar_lc
);
3445 insn
= emit_move_insn (alt_reg
, reg
);
3446 RTX_FRAME_RELATED_P (insn
) = 1;
3447 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3449 /* Even if we're not going to generate an epilogue, we still
3450 need to save the register so that EH works. */
3452 emit_insn (gen_prologue_use (alt_reg
));
3456 alt_regno
= next_scratch_gr_reg ();
3457 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3458 emit_move_insn (alt_reg
, reg
);
3459 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3464 /* Save the return pointer. */
3465 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3467 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3468 if (current_frame_info
.r
[reg_save_b0
] != 0)
3470 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3471 reg_emitted (reg_save_b0
);
3472 insn
= emit_move_insn (alt_reg
, reg
);
3473 RTX_FRAME_RELATED_P (insn
) = 1;
3474 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3476 /* Even if we're not going to generate an epilogue, we still
3477 need to save the register so that EH works. */
3479 emit_insn (gen_prologue_use (alt_reg
));
3483 alt_regno
= next_scratch_gr_reg ();
3484 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3485 emit_move_insn (alt_reg
, reg
);
3486 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3491 if (current_frame_info
.r
[reg_save_gp
])
3493 reg_emitted (reg_save_gp
);
3494 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3495 current_frame_info
.r
[reg_save_gp
]),
3496 pic_offset_table_rtx
);
3499 /* We should now be at the base of the gr/br/fr spill area. */
3500 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3501 + current_frame_info
.spill_size
));
3503 /* Spill all general registers. */
3504 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3505 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3507 reg
= gen_rtx_REG (DImode
, regno
);
3508 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3512 /* Spill the rest of the BR registers. */
3513 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3514 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3516 alt_regno
= next_scratch_gr_reg ();
3517 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3518 reg
= gen_rtx_REG (DImode
, regno
);
3519 emit_move_insn (alt_reg
, reg
);
3520 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3524 /* Align the frame and spill all FR registers. */
3525 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3526 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3528 gcc_assert (!(cfa_off
& 15));
3529 reg
= gen_rtx_REG (XFmode
, regno
);
3530 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3534 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3536 finish_spill_pointers ();
3539 /* Output the textual info surrounding the prologue. */
3542 ia64_start_function (FILE *file
, const char *fnname
,
3543 tree decl ATTRIBUTE_UNUSED
)
3545 #if VMS_DEBUGGING_INFO
3547 && strncmp (vms_debug_main
, fnname
, strlen (vms_debug_main
)) == 0)
3549 targetm
.asm_out
.globalize_label (asm_out_file
, VMS_DEBUG_MAIN_POINTER
);
3550 ASM_OUTPUT_DEF (asm_out_file
, VMS_DEBUG_MAIN_POINTER
, fnname
);
3551 dwarf2out_vms_debug_main_pointer ();
3556 fputs ("\t.proc ", file
);
3557 assemble_name (file
, fnname
);
3559 ASM_OUTPUT_LABEL (file
, fnname
);
3562 /* Called after register allocation to add any instructions needed for the
3563 epilogue. Using an epilogue insn is favored compared to putting all of the
3564 instructions in output_function_prologue(), since it allows the scheduler
3565 to intermix instructions with the saves of the caller saved registers. In
3566 some cases, it might be necessary to emit a barrier instruction as the last
3567 insn to prevent such scheduling. */
3570 ia64_expand_epilogue (int sibcall_p
)
3572 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
3573 int regno
, alt_regno
, cfa_off
;
3575 ia64_compute_frame_size (get_frame_size ());
3577 /* If there is a frame pointer, then we use it instead of the stack
3578 pointer, so that the stack pointer does not need to be valid when
3579 the epilogue starts. See EXIT_IGNORE_STACK. */
3580 if (frame_pointer_needed
)
3581 setup_spill_pointers (current_frame_info
.n_spilled
,
3582 hard_frame_pointer_rtx
, 0);
3584 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3585 current_frame_info
.total_size
);
3587 if (current_frame_info
.total_size
!= 0)
3589 /* ??? At this point we must generate a magic insn that appears to
3590 modify the spill iterators and the frame pointer. This would
3591 allow the most scheduling freedom. For now, just hard stop. */
3592 emit_insn (gen_blockage ());
3595 /* Locate the bottom of the register save area. */
3596 cfa_off
= (current_frame_info
.spill_cfa_off
3597 + current_frame_info
.spill_size
3598 + current_frame_info
.extra_spill_size
);
3600 /* Restore the predicate registers. */
3601 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3603 if (current_frame_info
.r
[reg_save_pr
] != 0)
3605 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3606 reg_emitted (reg_save_pr
);
3610 alt_regno
= next_scratch_gr_reg ();
3611 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3612 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3615 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3616 emit_move_insn (reg
, alt_reg
);
3619 /* Restore the application registers. */
3621 /* Load the saved unat from the stack, but do not restore it until
3622 after the GRs have been restored. */
3623 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3625 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3628 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3629 reg_emitted (reg_save_ar_unat
);
3633 alt_regno
= next_scratch_gr_reg ();
3634 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3635 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3636 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3641 ar_unat_save_reg
= NULL_RTX
;
3643 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3645 reg_emitted (reg_save_ar_pfs
);
3646 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3647 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3648 emit_move_insn (reg
, alt_reg
);
3650 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3652 alt_regno
= next_scratch_gr_reg ();
3653 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3654 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3656 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3657 emit_move_insn (reg
, alt_reg
);
3660 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3662 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3664 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3665 reg_emitted (reg_save_ar_lc
);
3669 alt_regno
= next_scratch_gr_reg ();
3670 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3671 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3674 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3675 emit_move_insn (reg
, alt_reg
);
3678 /* Restore the return pointer. */
3679 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3681 if (current_frame_info
.r
[reg_save_b0
] != 0)
3683 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3684 reg_emitted (reg_save_b0
);
3688 alt_regno
= next_scratch_gr_reg ();
3689 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3690 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3693 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3694 emit_move_insn (reg
, alt_reg
);
3697 /* We should now be at the base of the gr/br/fr spill area. */
3698 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3699 + current_frame_info
.spill_size
));
3701 /* The GP may be stored on the stack in the prologue, but it's
3702 never restored in the epilogue. Skip the stack slot. */
3703 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
3706 /* Restore all general registers. */
3707 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
3708 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3710 reg
= gen_rtx_REG (DImode
, regno
);
3711 do_restore (gen_gr_restore
, reg
, cfa_off
);
3715 /* Restore the branch registers. */
3716 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3717 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3719 alt_regno
= next_scratch_gr_reg ();
3720 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3721 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3723 reg
= gen_rtx_REG (DImode
, regno
);
3724 emit_move_insn (reg
, alt_reg
);
3727 /* Restore floating point registers. */
3728 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3729 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3731 gcc_assert (!(cfa_off
& 15));
3732 reg
= gen_rtx_REG (XFmode
, regno
);
3733 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
3737 /* Restore ar.unat for real. */
3738 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3740 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3741 emit_move_insn (reg
, ar_unat_save_reg
);
3744 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3746 finish_spill_pointers ();
3748 if (current_frame_info
.total_size
3749 || cfun
->machine
->ia64_eh_epilogue_sp
3750 || frame_pointer_needed
)
3752 /* ??? At this point we must generate a magic insn that appears to
3753 modify the spill iterators, the stack pointer, and the frame
3754 pointer. This would allow the most scheduling freedom. For now,
3756 emit_insn (gen_blockage ());
3759 if (cfun
->machine
->ia64_eh_epilogue_sp
)
3760 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
3761 else if (frame_pointer_needed
)
3763 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
3764 RTX_FRAME_RELATED_P (insn
) = 1;
3765 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
3767 else if (current_frame_info
.total_size
)
3769 rtx offset
, frame_size_rtx
;
3771 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
3772 if (satisfies_constraint_I (frame_size_rtx
))
3773 offset
= frame_size_rtx
;
3776 regno
= next_scratch_gr_reg ();
3777 offset
= gen_rtx_REG (DImode
, regno
);
3778 emit_move_insn (offset
, frame_size_rtx
);
3781 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
3784 RTX_FRAME_RELATED_P (insn
) = 1;
3785 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
3786 gen_rtx_SET (VOIDmode
,
3788 gen_rtx_PLUS (DImode
,
3793 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
3794 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
3797 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
3800 int fp
= GR_REG (2);
3801 /* We need a throw away register here, r0 and r1 are reserved,
3802 so r2 is the first available call clobbered register. If
3803 there was a frame_pointer register, we may have swapped the
3804 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
3805 sure we're using the string "r2" when emitting the register
3806 name for the assembler. */
3807 if (current_frame_info
.r
[reg_fp
]
3808 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
3809 fp
= HARD_FRAME_POINTER_REGNUM
;
3811 /* We must emit an alloc to force the input registers to become output
3812 registers. Otherwise, if the callee tries to pass its parameters
3813 through to another call without an intervening alloc, then these
3815 /* ??? We don't need to preserve all input registers. We only need to
3816 preserve those input registers used as arguments to the sibling call.
3817 It is unclear how to compute that number here. */
3818 if (current_frame_info
.n_input_regs
!= 0)
3820 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
3821 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
3822 const0_rtx
, const0_rtx
,
3823 n_inputs
, const0_rtx
));
3824 RTX_FRAME_RELATED_P (insn
) = 1;
3829 /* Return 1 if br.ret can do all the work required to return from a
3833 ia64_direct_return (void)
3835 if (reload_completed
&& ! frame_pointer_needed
)
3837 ia64_compute_frame_size (get_frame_size ());
3839 return (current_frame_info
.total_size
== 0
3840 && current_frame_info
.n_spilled
== 0
3841 && current_frame_info
.r
[reg_save_b0
] == 0
3842 && current_frame_info
.r
[reg_save_pr
] == 0
3843 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
3844 && current_frame_info
.r
[reg_save_ar_unat
] == 0
3845 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
3850 /* Return the magic cookie that we use to hold the return address
3851 during early compilation. */
3854 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
3858 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
3861 /* Split this value after reload, now that we know where the return
3862 address is saved. */
3865 ia64_split_return_addr_rtx (rtx dest
)
3869 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3871 if (current_frame_info
.r
[reg_save_b0
] != 0)
3873 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3874 reg_emitted (reg_save_b0
);
3882 /* Compute offset from CFA for BR0. */
3883 /* ??? Must be kept in sync with ia64_expand_prologue. */
3884 off
= (current_frame_info
.spill_cfa_off
3885 + current_frame_info
.spill_size
);
3886 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3887 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3890 /* Convert CFA offset to a register based offset. */
3891 if (frame_pointer_needed
)
3892 src
= hard_frame_pointer_rtx
;
3895 src
= stack_pointer_rtx
;
3896 off
+= current_frame_info
.total_size
;
3899 /* Load address into scratch register. */
3900 off_r
= GEN_INT (off
);
3901 if (satisfies_constraint_I (off_r
))
3902 emit_insn (gen_adddi3 (dest
, src
, off_r
));
3905 emit_move_insn (dest
, off_r
);
3906 emit_insn (gen_adddi3 (dest
, src
, dest
));
3909 src
= gen_rtx_MEM (Pmode
, dest
);
3913 src
= gen_rtx_REG (DImode
, BR_REG (0));
3915 emit_move_insn (dest
, src
);
3919 ia64_hard_regno_rename_ok (int from
, int to
)
3921 /* Don't clobber any of the registers we reserved for the prologue. */
3924 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
3925 if (to
== current_frame_info
.r
[r
]
3926 || from
== current_frame_info
.r
[r
]
3927 || to
== emitted_frame_related_regs
[r
]
3928 || from
== emitted_frame_related_regs
[r
])
3931 /* Don't use output registers outside the register frame. */
3932 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
3935 /* Retain even/oddness on predicate register pairs. */
3936 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
3937 return (from
& 1) == (to
& 1);
3942 /* Target hook for assembling integer objects. Handle word-sized
3943 aligned objects and detect the cases when @fptr is needed. */
3946 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3948 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
3949 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
3950 && GET_CODE (x
) == SYMBOL_REF
3951 && SYMBOL_REF_FUNCTION_P (x
))
3953 static const char * const directive
[2][2] = {
3954 /* 64-bit pointer */ /* 32-bit pointer */
3955 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3956 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3958 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
3959 output_addr_const (asm_out_file
, x
);
3960 fputs (")\n", asm_out_file
);
3963 return default_assemble_integer (x
, size
, aligned_p
);
3966 /* Emit the function prologue. */
3969 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3971 int mask
, grsave
, grsave_prev
;
3973 if (current_frame_info
.need_regstk
)
3974 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
3975 current_frame_info
.n_input_regs
,
3976 current_frame_info
.n_local_regs
,
3977 current_frame_info
.n_output_regs
,
3978 current_frame_info
.n_rotate_regs
);
3980 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
3983 /* Emit the .prologue directive. */
3986 grsave
= grsave_prev
= 0;
3987 if (current_frame_info
.r
[reg_save_b0
] != 0)
3990 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
3992 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
3993 && (grsave_prev
== 0
3994 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
3997 if (grsave_prev
== 0)
3998 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
3999 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
4001 if (current_frame_info
.r
[reg_fp
] != 0
4002 && (grsave_prev
== 0
4003 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
4006 if (grsave_prev
== 0)
4007 grsave
= HARD_FRAME_POINTER_REGNUM
;
4008 grsave_prev
= current_frame_info
.r
[reg_fp
];
4010 if (current_frame_info
.r
[reg_save_pr
] != 0
4011 && (grsave_prev
== 0
4012 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
4015 if (grsave_prev
== 0)
4016 grsave
= current_frame_info
.r
[reg_save_pr
];
4019 if (mask
&& TARGET_GNU_AS
)
4020 fprintf (file
, "\t.prologue %d, %d\n", mask
,
4021 ia64_dbx_register_number (grsave
));
4023 fputs ("\t.prologue\n", file
);
4025 /* Emit a .spill directive, if necessary, to relocate the base of
4026 the register spill area. */
4027 if (current_frame_info
.spill_cfa_off
!= -16)
4028 fprintf (file
, "\t.spill %ld\n",
4029 (long) (current_frame_info
.spill_cfa_off
4030 + current_frame_info
.spill_size
));
4033 /* Emit the .body directive at the scheduled end of the prologue. */
4036 ia64_output_function_end_prologue (FILE *file
)
4038 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4041 fputs ("\t.body\n", file
);
4044 /* Emit the function epilogue. */
4047 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4048 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4052 if (current_frame_info
.r
[reg_fp
])
4054 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
4055 reg_names
[HARD_FRAME_POINTER_REGNUM
]
4056 = reg_names
[current_frame_info
.r
[reg_fp
]];
4057 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
4058 reg_emitted (reg_fp
);
4060 if (! TARGET_REG_NAMES
)
4062 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
4063 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
4064 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
4065 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
4066 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
4067 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
4070 current_frame_info
.initialized
= 0;
4074 ia64_dbx_register_number (int regno
)
4076 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4077 from its home at loc79 to something inside the register frame. We
4078 must perform the same renumbering here for the debug info. */
4079 if (current_frame_info
.r
[reg_fp
])
4081 if (regno
== HARD_FRAME_POINTER_REGNUM
)
4082 regno
= current_frame_info
.r
[reg_fp
];
4083 else if (regno
== current_frame_info
.r
[reg_fp
])
4084 regno
= HARD_FRAME_POINTER_REGNUM
;
4087 if (IN_REGNO_P (regno
))
4088 return 32 + regno
- IN_REG (0);
4089 else if (LOC_REGNO_P (regno
))
4090 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
4091 else if (OUT_REGNO_P (regno
))
4092 return (32 + current_frame_info
.n_input_regs
4093 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
4098 /* Implement TARGET_TRAMPOLINE_INIT.
4100 The trampoline should set the static chain pointer to value placed
4101 into the trampoline and should branch to the specified routine.
4102 To make the normal indirect-subroutine calling convention work,
4103 the trampoline must look like a function descriptor; the first
4104 word being the target address and the second being the target's
4107 We abuse the concept of a global pointer by arranging for it
4108 to point to the data we need to load. The complete trampoline
4109 has the following form:
4111 +-------------------+ \
4112 TRAMP: | __ia64_trampoline | |
4113 +-------------------+ > fake function descriptor
4115 +-------------------+ /
4116 | target descriptor |
4117 +-------------------+
4119 +-------------------+
4123 ia64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx static_chain
)
4125 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4126 rtx addr
, addr_reg
, tramp
, eight
= GEN_INT (8);
4128 /* The Intel assembler requires that the global __ia64_trampoline symbol
4129 be declared explicitly */
4132 static bool declared_ia64_trampoline
= false;
4134 if (!declared_ia64_trampoline
)
4136 declared_ia64_trampoline
= true;
4137 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
4138 "__ia64_trampoline");
4142 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4143 addr
= convert_memory_address (Pmode
, XEXP (m_tramp
, 0));
4144 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
4145 static_chain
= convert_memory_address (Pmode
, static_chain
);
4147 /* Load up our iterator. */
4148 addr_reg
= copy_to_reg (addr
);
4149 m_tramp
= adjust_automodify_address (m_tramp
, Pmode
, addr_reg
, 0);
4151 /* The first two words are the fake descriptor:
4152 __ia64_trampoline, ADDR+16. */
4153 tramp
= gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline");
4154 if (TARGET_ABI_OPEN_VMS
)
4156 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4157 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4158 relocation against function symbols to make it identical to the
4159 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4160 strict ELF and dereference to get the bare code address. */
4161 rtx reg
= gen_reg_rtx (Pmode
);
4162 SYMBOL_REF_FLAGS (tramp
) |= SYMBOL_FLAG_FUNCTION
;
4163 emit_move_insn (reg
, tramp
);
4164 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
4167 emit_move_insn (m_tramp
, tramp
);
4168 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4169 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4171 emit_move_insn (m_tramp
, force_reg (Pmode
, plus_constant (addr
, 16)));
4172 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4173 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4175 /* The third word is the target descriptor. */
4176 emit_move_insn (m_tramp
, force_reg (Pmode
, fnaddr
));
4177 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4178 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4180 /* The fourth word is the static chain. */
4181 emit_move_insn (m_tramp
, static_chain
);
4184 /* Do any needed setup for a variadic function. CUM has not been updated
4185 for the last named argument which has type TYPE and mode MODE.
4187 We generate the actual spill instructions during prologue generation. */
4190 ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4191 tree type
, int * pretend_size
,
4192 int second_time ATTRIBUTE_UNUSED
)
4194 CUMULATIVE_ARGS next_cum
= *cum
;
4196 /* Skip the current argument. */
4197 ia64_function_arg_advance (&next_cum
, mode
, type
, 1);
4199 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
4201 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
4202 *pretend_size
= n
* UNITS_PER_WORD
;
4203 cfun
->machine
->n_varargs
= n
;
4207 /* Check whether TYPE is a homogeneous floating point aggregate. If
4208 it is, return the mode of the floating point type that appears
4209 in all leafs. If it is not, return VOIDmode.
4211 An aggregate is a homogeneous floating point aggregate is if all
4212 fields/elements in it have the same floating point type (e.g,
4213 SFmode). 128-bit quad-precision floats are excluded.
4215 Variable sized aggregates should never arrive here, since we should
4216 have already decided to pass them by reference. Top-level zero-sized
4217 aggregates are excluded because our parallels crash the middle-end. */
4219 static enum machine_mode
4220 hfa_element_mode (const_tree type
, bool nested
)
4222 enum machine_mode element_mode
= VOIDmode
;
4223 enum machine_mode mode
;
4224 enum tree_code code
= TREE_CODE (type
);
4225 int know_element_mode
= 0;
4228 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
4233 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
4234 case BOOLEAN_TYPE
: case POINTER_TYPE
:
4235 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
4236 case LANG_TYPE
: case FUNCTION_TYPE
:
4239 /* Fortran complex types are supposed to be HFAs, so we need to handle
4240 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4243 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
4244 && TYPE_MODE (type
) != TCmode
)
4245 return GET_MODE_INNER (TYPE_MODE (type
));
4250 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4251 mode if this is contained within an aggregate. */
4252 if (nested
&& TYPE_MODE (type
) != TFmode
)
4253 return TYPE_MODE (type
);
4258 return hfa_element_mode (TREE_TYPE (type
), 1);
4262 case QUAL_UNION_TYPE
:
4263 for (t
= TYPE_FIELDS (type
); t
; t
= DECL_CHAIN (t
))
4265 if (TREE_CODE (t
) != FIELD_DECL
)
4268 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
4269 if (know_element_mode
)
4271 if (mode
!= element_mode
)
4274 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
4278 know_element_mode
= 1;
4279 element_mode
= mode
;
4282 return element_mode
;
4285 /* If we reach here, we probably have some front-end specific type
4286 that the backend doesn't know about. This can happen via the
4287 aggregate_value_p call in init_function_start. All we can do is
4288 ignore unknown tree types. */
4295 /* Return the number of words required to hold a quantity of TYPE and MODE
4296 when passed as an argument. */
4298 ia64_function_arg_words (const_tree type
, enum machine_mode mode
)
4302 if (mode
== BLKmode
)
4303 words
= int_size_in_bytes (type
);
4305 words
= GET_MODE_SIZE (mode
);
4307 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
4310 /* Return the number of registers that should be skipped so the current
4311 argument (described by TYPE and WORDS) will be properly aligned.
4313 Integer and float arguments larger than 8 bytes start at the next
4314 even boundary. Aggregates larger than 8 bytes start at the next
4315 even boundary if the aggregate has 16 byte alignment. Note that
4316 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4317 but are still to be aligned in registers.
4319 ??? The ABI does not specify how to handle aggregates with
4320 alignment from 9 to 15 bytes, or greater than 16. We handle them
4321 all as if they had 16 byte alignment. Such aggregates can occur
4322 only if gcc extensions are used. */
4324 ia64_function_arg_offset (const CUMULATIVE_ARGS
*cum
,
4325 const_tree type
, int words
)
4327 /* No registers are skipped on VMS. */
4328 if (TARGET_ABI_OPEN_VMS
|| (cum
->words
& 1) == 0)
4332 && TREE_CODE (type
) != INTEGER_TYPE
4333 && TREE_CODE (type
) != REAL_TYPE
)
4334 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4339 /* Return rtx for register where argument is passed, or zero if it is passed
4341 /* ??? 128-bit quad-precision floats are always passed in general
4345 ia64_function_arg_1 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4346 const_tree type
, bool named
, bool incoming
)
4348 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4349 int words
= ia64_function_arg_words (type
, mode
);
4350 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4351 enum machine_mode hfa_mode
= VOIDmode
;
4353 /* For OPEN VMS, emit the instruction setting up the argument register here,
4354 when we know this will be together with the other arguments setup related
4355 insns. This is not the conceptually best place to do this, but this is
4356 the easiest as we have convenient access to cumulative args info. */
4358 if (TARGET_ABI_OPEN_VMS
&& mode
== VOIDmode
&& type
== void_type_node
4361 unsigned HOST_WIDE_INT regval
= cum
->words
;
4364 for (i
= 0; i
< 8; i
++)
4365 regval
|= ((int) cum
->atypes
[i
]) << (i
* 3 + 8);
4367 emit_move_insn (gen_rtx_REG (DImode
, GR_REG (25)),
4371 /* If all argument slots are used, then it must go on the stack. */
4372 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4375 /* Check for and handle homogeneous FP aggregates. */
4377 hfa_mode
= hfa_element_mode (type
, 0);
4379 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4380 and unprototyped hfas are passed specially. */
4381 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4385 int fp_regs
= cum
->fp_regs
;
4386 int int_regs
= cum
->words
+ offset
;
4387 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4391 /* If prototyped, pass it in FR regs then GR regs.
4392 If not prototyped, pass it in both FR and GR regs.
4394 If this is an SFmode aggregate, then it is possible to run out of
4395 FR regs while GR regs are still left. In that case, we pass the
4396 remaining part in the GR regs. */
4398 /* Fill the FP regs. We do this always. We stop if we reach the end
4399 of the argument, the last FP register, or the last argument slot. */
4401 byte_size
= ((mode
== BLKmode
)
4402 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4403 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4405 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4406 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4408 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4409 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4413 args_byte_size
+= hfa_size
;
4417 /* If no prototype, then the whole thing must go in GR regs. */
4418 if (! cum
->prototype
)
4420 /* If this is an SFmode aggregate, then we might have some left over
4421 that needs to go in GR regs. */
4422 else if (byte_size
!= offset
)
4423 int_regs
+= offset
/ UNITS_PER_WORD
;
4425 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4427 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4429 enum machine_mode gr_mode
= DImode
;
4430 unsigned int gr_size
;
4432 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4433 then this goes in a GR reg left adjusted/little endian, right
4434 adjusted/big endian. */
4435 /* ??? Currently this is handled wrong, because 4-byte hunks are
4436 always right adjusted/little endian. */
4439 /* If we have an even 4 byte hunk because the aggregate is a
4440 multiple of 4 bytes in size, then this goes in a GR reg right
4441 adjusted/little endian. */
4442 else if (byte_size
- offset
== 4)
4445 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4446 gen_rtx_REG (gr_mode
, (basereg
4450 gr_size
= GET_MODE_SIZE (gr_mode
);
4452 if (gr_size
== UNITS_PER_WORD
4453 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4455 else if (gr_size
> UNITS_PER_WORD
)
4456 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4458 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4461 /* On OpenVMS variable argument is either in Rn or Fn. */
4462 else if (TARGET_ABI_OPEN_VMS
&& named
== 0)
4464 if (FLOAT_MODE_P (mode
))
4465 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->words
);
4467 return gen_rtx_REG (mode
, basereg
+ cum
->words
);
4470 /* Integral and aggregates go in general registers. If we have run out of
4471 FR registers, then FP values must also go in general registers. This can
4472 happen when we have a SFmode HFA. */
4473 else if (mode
== TFmode
|| mode
== TCmode
4474 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4476 int byte_size
= ((mode
== BLKmode
)
4477 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4478 if (BYTES_BIG_ENDIAN
4479 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4480 && byte_size
< UNITS_PER_WORD
4483 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4484 gen_rtx_REG (DImode
,
4485 (basereg
+ cum
->words
4488 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4491 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4495 /* If there is a prototype, then FP values go in a FR register when
4496 named, and in a GR register when unnamed. */
4497 else if (cum
->prototype
)
4500 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4501 /* In big-endian mode, an anonymous SFmode value must be represented
4502 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4503 the value into the high half of the general register. */
4504 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
4505 return gen_rtx_PARALLEL (mode
,
4507 gen_rtx_EXPR_LIST (VOIDmode
,
4508 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4511 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4513 /* If there is no prototype, then FP values go in both FR and GR
4517 /* See comment above. */
4518 enum machine_mode inner_mode
=
4519 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
4521 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4522 gen_rtx_REG (mode
, (FR_ARG_FIRST
4525 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4526 gen_rtx_REG (inner_mode
,
4527 (basereg
+ cum
->words
4531 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4535 /* Implement TARGET_FUNCION_ARG target hook. */
4538 ia64_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4539 const_tree type
, bool named
)
4541 return ia64_function_arg_1 (cum
, mode
, type
, named
, false);
4544 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4547 ia64_function_incoming_arg (CUMULATIVE_ARGS
*cum
,
4548 enum machine_mode mode
,
4549 const_tree type
, bool named
)
4551 return ia64_function_arg_1 (cum
, mode
, type
, named
, true);
4554 /* Return number of bytes, at the beginning of the argument, that must be
4555 put in registers. 0 is the argument is entirely in registers or entirely
4559 ia64_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4560 tree type
, bool named ATTRIBUTE_UNUSED
)
4562 int words
= ia64_function_arg_words (type
, mode
);
4563 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4565 /* If all argument slots are used, then it must go on the stack. */
4566 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4569 /* It doesn't matter whether the argument goes in FR or GR regs. If
4570 it fits within the 8 argument slots, then it goes entirely in
4571 registers. If it extends past the last argument slot, then the rest
4572 goes on the stack. */
4574 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4577 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4580 /* Return ivms_arg_type based on machine_mode. */
4582 static enum ivms_arg_type
4583 ia64_arg_type (enum machine_mode mode
)
4596 /* Update CUM to point after this argument. This is patterned after
4597 ia64_function_arg. */
4600 ia64_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4601 const_tree type
, bool named
)
4603 int words
= ia64_function_arg_words (type
, mode
);
4604 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4605 enum machine_mode hfa_mode
= VOIDmode
;
4607 /* If all arg slots are already full, then there is nothing to do. */
4608 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
4610 cum
->words
+= words
+ offset
;
4614 cum
->atypes
[cum
->words
] = ia64_arg_type (mode
);
4615 cum
->words
+= words
+ offset
;
4617 /* Check for and handle homogeneous FP aggregates. */
4619 hfa_mode
= hfa_element_mode (type
, 0);
4621 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4622 and unprototyped hfas are passed specially. */
4623 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4625 int fp_regs
= cum
->fp_regs
;
4626 /* This is the original value of cum->words + offset. */
4627 int int_regs
= cum
->words
- words
;
4628 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4632 /* If prototyped, pass it in FR regs then GR regs.
4633 If not prototyped, pass it in both FR and GR regs.
4635 If this is an SFmode aggregate, then it is possible to run out of
4636 FR regs while GR regs are still left. In that case, we pass the
4637 remaining part in the GR regs. */
4639 /* Fill the FP regs. We do this always. We stop if we reach the end
4640 of the argument, the last FP register, or the last argument slot. */
4642 byte_size
= ((mode
== BLKmode
)
4643 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4644 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4646 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4647 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
4650 args_byte_size
+= hfa_size
;
4654 cum
->fp_regs
= fp_regs
;
4657 /* On OpenVMS variable argument is either in Rn or Fn. */
4658 else if (TARGET_ABI_OPEN_VMS
&& named
== 0)
4660 cum
->int_regs
= cum
->words
;
4661 cum
->fp_regs
= cum
->words
;
4664 /* Integral and aggregates go in general registers. So do TFmode FP values.
4665 If we have run out of FR registers, then other FP values must also go in
4666 general registers. This can happen when we have a SFmode HFA. */
4667 else if (mode
== TFmode
|| mode
== TCmode
4668 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4669 cum
->int_regs
= cum
->words
;
4671 /* If there is a prototype, then FP values go in a FR register when
4672 named, and in a GR register when unnamed. */
4673 else if (cum
->prototype
)
4676 cum
->int_regs
= cum
->words
;
4678 /* ??? Complex types should not reach here. */
4679 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
4681 /* If there is no prototype, then FP values go in both FR and GR
4685 /* ??? Complex types should not reach here. */
4686 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
4687 cum
->int_regs
= cum
->words
;
4691 /* Arguments with alignment larger than 8 bytes start at the next even
4692 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4693 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4696 ia64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
4698 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
4699 return PARM_BOUNDARY
* 2;
4703 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
4704 return PARM_BOUNDARY
* 2;
4706 return PARM_BOUNDARY
;
4709 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
4710 return PARM_BOUNDARY
* 2;
4712 return PARM_BOUNDARY
;
4715 /* True if it is OK to do sibling call optimization for the specified
4716 call expression EXP. DECL will be the called function, or NULL if
4717 this is an indirect call. */
4719 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
4721 /* We can't perform a sibcall if the current function has the syscall_linkage
4723 if (lookup_attribute ("syscall_linkage",
4724 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
4727 /* We must always return with our current GP. This means we can
4728 only sibcall to functions defined in the current module unless
4729 TARGET_CONST_GP is set to true. */
4730 return (decl
&& (*targetm
.binds_local_p
) (decl
)) || TARGET_CONST_GP
;
4734 /* Implement va_arg. */
4737 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4740 /* Variable sized types are passed by reference. */
4741 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
4743 tree ptrtype
= build_pointer_type (type
);
4744 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
4745 return build_va_arg_indirect_ref (addr
);
4748 /* Aggregate arguments with alignment larger than 8 bytes start at
4749 the next even boundary. Integer and floating point arguments
4750 do so if they are larger than 8 bytes, whether or not they are
4751 also aligned larger than 8 bytes. */
4752 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
4753 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
4755 tree t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (valist
), valist
,
4756 size_int (2 * UNITS_PER_WORD
- 1));
4757 t
= fold_convert (sizetype
, t
);
4758 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4759 size_int (-2 * UNITS_PER_WORD
));
4760 t
= fold_convert (TREE_TYPE (valist
), t
);
4761 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
4764 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4767 /* Return 1 if function return value returned in memory. Return 0 if it is
4771 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
4773 enum machine_mode mode
;
4774 enum machine_mode hfa_mode
;
4775 HOST_WIDE_INT byte_size
;
4777 mode
= TYPE_MODE (valtype
);
4778 byte_size
= GET_MODE_SIZE (mode
);
4779 if (mode
== BLKmode
)
4781 byte_size
= int_size_in_bytes (valtype
);
4786 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4788 hfa_mode
= hfa_element_mode (valtype
, 0);
4789 if (hfa_mode
!= VOIDmode
)
4791 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4793 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
4798 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
4804 /* Return rtx for register that holds the function return value. */
4807 ia64_function_value (const_tree valtype
,
4808 const_tree fn_decl_or_type
,
4809 bool outgoing ATTRIBUTE_UNUSED
)
4811 enum machine_mode mode
;
4812 enum machine_mode hfa_mode
;
4814 const_tree func
= fn_decl_or_type
;
4817 && !DECL_P (fn_decl_or_type
))
4820 mode
= TYPE_MODE (valtype
);
4821 hfa_mode
= hfa_element_mode (valtype
, 0);
4823 if (hfa_mode
!= VOIDmode
)
4831 hfa_size
= GET_MODE_SIZE (hfa_mode
);
4832 byte_size
= ((mode
== BLKmode
)
4833 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
4835 for (i
= 0; offset
< byte_size
; i
++)
4837 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4838 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
4842 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4844 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
4845 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
4848 bool need_parallel
= false;
4850 /* In big-endian mode, we need to manage the layout of aggregates
4851 in the registers so that we get the bits properly aligned in
4852 the highpart of the registers. */
4853 if (BYTES_BIG_ENDIAN
4854 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
4855 need_parallel
= true;
4857 /* Something like struct S { long double x; char a[0] } is not an
4858 HFA structure, and therefore doesn't go in fp registers. But
4859 the middle-end will give it XFmode anyway, and XFmode values
4860 don't normally fit in integer registers. So we need to smuggle
4861 the value inside a parallel. */
4862 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
4863 need_parallel
= true;
4873 bytesize
= int_size_in_bytes (valtype
);
4874 /* An empty PARALLEL is invalid here, but the return value
4875 doesn't matter for empty structs. */
4877 return gen_rtx_REG (mode
, GR_RET_FIRST
);
4878 for (i
= 0; offset
< bytesize
; i
++)
4880 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4881 gen_rtx_REG (DImode
,
4884 offset
+= UNITS_PER_WORD
;
4886 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4889 mode
= ia64_promote_function_mode (valtype
, mode
, &unsignedp
,
4890 func
? TREE_TYPE (func
) : NULL_TREE
,
4893 return gen_rtx_REG (mode
, GR_RET_FIRST
);
4897 /* Worker function for TARGET_LIBCALL_VALUE. */
4900 ia64_libcall_value (enum machine_mode mode
,
4901 const_rtx fun ATTRIBUTE_UNUSED
)
4903 return gen_rtx_REG (mode
,
4904 (((GET_MODE_CLASS (mode
) == MODE_FLOAT
4905 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
4906 && (mode
) != TFmode
)
4907 ? FR_RET_FIRST
: GR_RET_FIRST
));
4910 /* Worker function for FUNCTION_VALUE_REGNO_P. */
4913 ia64_function_value_regno_p (const unsigned int regno
)
4915 return ((regno
>= GR_RET_FIRST
&& regno
<= GR_RET_LAST
)
4916 || (regno
>= FR_RET_FIRST
&& regno
<= FR_RET_LAST
));
4919 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4920 We need to emit DTP-relative relocations. */
4923 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
4925 gcc_assert (size
== 4 || size
== 8);
4927 fputs ("\tdata4.ua\t@dtprel(", file
);
4929 fputs ("\tdata8.ua\t@dtprel(", file
);
4930 output_addr_const (file
, x
);
4934 /* Print a memory address as an operand to reference that memory location. */
4936 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4937 also call this from ia64_print_operand for memory addresses. */
4940 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
4941 rtx address ATTRIBUTE_UNUSED
)
4945 /* Print an operand to an assembler instruction.
4946 C Swap and print a comparison operator.
4947 D Print an FP comparison operator.
4948 E Print 32 - constant, for SImode shifts as extract.
4949 e Print 64 - constant, for DImode rotates.
4950 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4951 a floating point register emitted normally.
4952 G A floating point constant.
4953 I Invert a predicate register by adding 1.
4954 J Select the proper predicate register for a condition.
4955 j Select the inverse predicate register for a condition.
4956 O Append .acq for volatile load.
4957 P Postincrement of a MEM.
4958 Q Append .rel for volatile store.
4959 R Print .s .d or nothing for a single, double or no truncation.
4960 S Shift amount for shladd instruction.
4961 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4962 for Intel assembler.
4963 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4964 for Intel assembler.
4965 X A pair of floating point registers.
4966 r Print register name, or constant 0 as r0. HP compatibility for
4968 v Print vector constant value as an 8-byte integer value. */
4971 ia64_print_operand (FILE * file
, rtx x
, int code
)
4978 /* Handled below. */
4983 enum rtx_code c
= swap_condition (GET_CODE (x
));
4984 fputs (GET_RTX_NAME (c
), file
);
4989 switch (GET_CODE (x
))
5013 str
= GET_RTX_NAME (GET_CODE (x
));
5020 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
5024 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
5028 if (x
== CONST0_RTX (GET_MODE (x
)))
5029 str
= reg_names
[FR_REG (0)];
5030 else if (x
== CONST1_RTX (GET_MODE (x
)))
5031 str
= reg_names
[FR_REG (1)];
5034 gcc_assert (GET_CODE (x
) == REG
);
5035 str
= reg_names
[REGNO (x
)];
5044 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
5045 real_to_target (val
, &rv
, GET_MODE (x
));
5046 if (GET_MODE (x
) == SFmode
)
5047 fprintf (file
, "0x%08lx", val
[0] & 0xffffffff);
5048 else if (GET_MODE (x
) == DFmode
)
5049 fprintf (file
, "0x%08lx%08lx", (WORDS_BIG_ENDIAN
? val
[0] : val
[1])
5051 (WORDS_BIG_ENDIAN
? val
[1] : val
[0])
5054 output_operand_lossage ("invalid %%G mode");
5059 fputs (reg_names
[REGNO (x
) + 1], file
);
5065 unsigned int regno
= REGNO (XEXP (x
, 0));
5066 if (GET_CODE (x
) == EQ
)
5070 fputs (reg_names
[regno
], file
);
5075 if (MEM_VOLATILE_P (x
))
5076 fputs(".acq", file
);
5081 HOST_WIDE_INT value
;
5083 switch (GET_CODE (XEXP (x
, 0)))
5089 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5090 if (GET_CODE (x
) == CONST_INT
)
5094 gcc_assert (GET_CODE (x
) == REG
);
5095 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
5101 value
= GET_MODE_SIZE (GET_MODE (x
));
5105 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
5109 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
5114 if (MEM_VOLATILE_P (x
))
5115 fputs(".rel", file
);
5119 if (x
== CONST0_RTX (GET_MODE (x
)))
5121 else if (x
== CONST1_RTX (GET_MODE (x
)))
5123 else if (x
== CONST2_RTX (GET_MODE (x
)))
5126 output_operand_lossage ("invalid %%R value");
5130 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5134 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5136 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
5142 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5144 const char *prefix
= "0x";
5145 if (INTVAL (x
) & 0x80000000)
5147 fprintf (file
, "0xffffffff");
5150 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
5157 unsigned int regno
= REGNO (x
);
5158 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
5163 /* If this operand is the constant zero, write it as register zero.
5164 Any register, zero, or CONST_INT value is OK here. */
5165 if (GET_CODE (x
) == REG
)
5166 fputs (reg_names
[REGNO (x
)], file
);
5167 else if (x
== CONST0_RTX (GET_MODE (x
)))
5169 else if (GET_CODE (x
) == CONST_INT
)
5170 output_addr_const (file
, x
);
5172 output_operand_lossage ("invalid %%r value");
5176 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
5177 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
5184 /* For conditional branches, returns or calls, substitute
5185 sptk, dptk, dpnt, or spnt for %s. */
5186 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5189 int pred_val
= INTVAL (XEXP (x
, 0));
5191 /* Guess top and bottom 10% statically predicted. */
5192 if (pred_val
< REG_BR_PROB_BASE
/ 50
5193 && br_prob_note_reliable_p (x
))
5195 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
5197 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
5198 || !br_prob_note_reliable_p (x
))
5203 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
5208 fputs (which
, file
);
5213 x
= current_insn_predicate
;
5216 unsigned int regno
= REGNO (XEXP (x
, 0));
5217 if (GET_CODE (x
) == EQ
)
5219 fprintf (file
, "(%s) ", reg_names
[regno
]);
5224 output_operand_lossage ("ia64_print_operand: unknown code");
5228 switch (GET_CODE (x
))
5230 /* This happens for the spill/restore instructions. */
5235 /* ... fall through ... */
5238 fputs (reg_names
[REGNO (x
)], file
);
5243 rtx addr
= XEXP (x
, 0);
5244 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
5245 addr
= XEXP (addr
, 0);
5246 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
5251 output_addr_const (file
, x
);
5258 /* Compute a (partial) cost for rtx X. Return true if the complete
5259 cost has been computed, and false if subexpressions should be
5260 scanned. In either case, *TOTAL contains the cost result. */
5261 /* ??? This is incomplete. */
5264 ia64_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
5265 bool speed ATTRIBUTE_UNUSED
)
5273 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
5276 if (satisfies_constraint_I (x
))
5278 else if (satisfies_constraint_J (x
))
5281 *total
= COSTS_N_INSNS (1);
5284 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
5287 *total
= COSTS_N_INSNS (1);
5292 *total
= COSTS_N_INSNS (1);
5298 *total
= COSTS_N_INSNS (3);
5302 *total
= COSTS_N_INSNS (4);
5306 /* For multiplies wider than HImode, we have to go to the FPU,
5307 which normally involves copies. Plus there's the latency
5308 of the multiply itself, and the latency of the instructions to
5309 transfer integer regs to FP regs. */
5310 if (FLOAT_MODE_P (GET_MODE (x
)))
5311 *total
= COSTS_N_INSNS (4);
5312 else if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
5313 *total
= COSTS_N_INSNS (10);
5315 *total
= COSTS_N_INSNS (2);
5320 if (FLOAT_MODE_P (GET_MODE (x
)))
5322 *total
= COSTS_N_INSNS (4);
5330 *total
= COSTS_N_INSNS (1);
5337 /* We make divide expensive, so that divide-by-constant will be
5338 optimized to a multiply. */
5339 *total
= COSTS_N_INSNS (60);
5347 /* Calculate the cost of moving data from a register in class FROM to
5348 one in class TO, using MODE. */
5351 ia64_register_move_cost (enum machine_mode mode
, reg_class_t from_i
,
5354 enum reg_class from
= (enum reg_class
) from_i
;
5355 enum reg_class to
= (enum reg_class
) to_i
;
5357 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5358 if (to
== ADDL_REGS
)
5360 if (from
== ADDL_REGS
)
5363 /* All costs are symmetric, so reduce cases by putting the
5364 lower number class as the destination. */
5367 enum reg_class tmp
= to
;
5368 to
= from
, from
= tmp
;
5371 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5372 so that we get secondary memory reloads. Between FR_REGS,
5373 we have to make this at least as expensive as memory_move_cost
5374 to avoid spectacularly poor register class preferencing. */
5375 if (mode
== XFmode
|| mode
== RFmode
)
5377 if (to
!= GR_REGS
|| from
!= GR_REGS
)
5378 return memory_move_cost (mode
, to
, false);
5386 /* Moving between PR registers takes two insns. */
5387 if (from
== PR_REGS
)
5389 /* Moving between PR and anything but GR is impossible. */
5390 if (from
!= GR_REGS
)
5391 return memory_move_cost (mode
, to
, false);
5395 /* Moving between BR and anything but GR is impossible. */
5396 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
5397 return memory_move_cost (mode
, to
, false);
5402 /* Moving between AR and anything but GR is impossible. */
5403 if (from
!= GR_REGS
)
5404 return memory_move_cost (mode
, to
, false);
5410 case GR_AND_FR_REGS
:
5411 case GR_AND_BR_REGS
:
5422 /* Calculate the cost of moving data of MODE from a register to or from
5426 ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
5428 bool in ATTRIBUTE_UNUSED
)
5430 if (rclass
== GENERAL_REGS
5431 || rclass
== FR_REGS
5432 || rclass
== FP_REGS
5433 || rclass
== GR_AND_FR_REGS
)
5439 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5440 on RCLASS to use when copying X into that class. */
5443 ia64_preferred_reload_class (rtx x
, reg_class_t rclass
)
5449 /* Don't allow volatile mem reloads into floating point registers.
5450 This is defined to force reload to choose the r/m case instead
5451 of the f/f case when reloading (set (reg fX) (mem/v)). */
5452 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
5455 /* Force all unrecognized constants into the constant pool. */
5473 /* This function returns the register class required for a secondary
5474 register when copying between one of the registers in RCLASS, and X,
5475 using MODE. A return value of NO_REGS means that no secondary register
5479 ia64_secondary_reload_class (enum reg_class rclass
,
5480 enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5484 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5485 regno
= true_regnum (x
);
5492 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5493 interaction. We end up with two pseudos with overlapping lifetimes
5494 both of which are equiv to the same constant, and both which need
5495 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5496 changes depending on the path length, which means the qty_first_reg
5497 check in make_regs_eqv can give different answers at different times.
5498 At some point I'll probably need a reload_indi pattern to handle
5501 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5502 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5503 non-general registers for good measure. */
5504 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5507 /* This is needed if a pseudo used as a call_operand gets spilled to a
5509 if (GET_CODE (x
) == MEM
)
5515 /* Need to go through general registers to get to other class regs. */
5516 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5519 /* This can happen when a paradoxical subreg is an operand to the
5521 /* ??? This shouldn't be necessary after instruction scheduling is
5522 enabled, because paradoxical subregs are not accepted by
5523 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5524 stop the paradoxical subreg stupidity in the *_operand functions
5526 if (GET_CODE (x
) == MEM
5527 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5528 || GET_MODE (x
) == QImode
))
5531 /* This can happen because of the ior/and/etc patterns that accept FP
5532 registers as operands. If the third operand is a constant, then it
5533 needs to be reloaded into a FP register. */
5534 if (GET_CODE (x
) == CONST_INT
)
5537 /* This can happen because of register elimination in a muldi3 insn.
5538 E.g. `26107 * (unsigned long)&u'. */
5539 if (GET_CODE (x
) == PLUS
)
5544 /* ??? This happens if we cse/gcse a BImode value across a call,
5545 and the function has a nonlocal goto. This is because global
5546 does not allocate call crossing pseudos to hard registers when
5547 crtl->has_nonlocal_goto is true. This is relatively
5548 common for C++ programs that use exceptions. To reproduce,
5549 return NO_REGS and compile libstdc++. */
5550 if (GET_CODE (x
) == MEM
)
5553 /* This can happen when we take a BImode subreg of a DImode value,
5554 and that DImode value winds up in some non-GR register. */
5555 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5567 /* Implement targetm.unspec_may_trap_p hook. */
5569 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
5571 if (GET_CODE (x
) == UNSPEC
)
5573 switch (XINT (x
, 1))
5579 case UNSPEC_CHKACLR
:
5581 /* These unspecs are just wrappers. */
5582 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
5586 return default_unspec_may_trap_p (x
, flags
);
5590 /* Parse the -mfixed-range= option string. */
5593 fix_range (const char *const_str
)
5596 char *str
, *dash
, *comma
;
5598 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5599 REG2 are either register names or register numbers. The effect
5600 of this option is to mark the registers in the range from REG1 to
5601 REG2 as ``fixed'' so they won't be used by the compiler. This is
5602 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5604 i
= strlen (const_str
);
5605 str
= (char *) alloca (i
+ 1);
5606 memcpy (str
, const_str
, i
+ 1);
5610 dash
= strchr (str
, '-');
5613 warning (0, "value of -mfixed-range must have form REG1-REG2");
5618 comma
= strchr (dash
+ 1, ',');
5622 first
= decode_reg_name (str
);
5625 warning (0, "unknown register name: %s", str
);
5629 last
= decode_reg_name (dash
+ 1);
5632 warning (0, "unknown register name: %s", dash
+ 1);
5640 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5644 for (i
= first
; i
<= last
; ++i
)
5645 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5655 /* Implement TARGET_HANDLE_OPTION. */
5658 ia64_handle_option (size_t code
, const char *arg
, int value
)
5662 case OPT_mfixed_range_
:
5666 case OPT_mtls_size_
:
5667 if (value
!= 14 && value
!= 22 && value
!= 64)
5668 error ("bad value %<%s%> for -mtls-size= switch", arg
);
5675 const char *name
; /* processor name or nickname. */
5676 enum processor_type processor
;
5678 const processor_alias_table
[] =
5680 {"itanium2", PROCESSOR_ITANIUM2
},
5681 {"mckinley", PROCESSOR_ITANIUM2
},
5683 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
5686 for (i
= 0; i
< pta_size
; i
++)
5687 if (!strcmp (arg
, processor_alias_table
[i
].name
))
5689 ia64_tune
= processor_alias_table
[i
].processor
;
5693 error ("bad value %<%s%> for -mtune= switch", arg
);
5702 /* Implement TARGET_OPTION_OVERRIDE. */
5705 ia64_option_override (void)
5707 if (TARGET_AUTO_PIC
)
5708 target_flags
|= MASK_CONST_GP
;
5710 /* Numerous experiment shows that IRA based loop pressure
5711 calculation works better for RTL loop invariant motion on targets
5712 with enough (>= 32) registers. It is an expensive optimization.
5713 So it is on only for peak performance. */
5715 flag_ira_loop_pressure
= 1;
5718 ia64_section_threshold
= (global_options_set
.x_g_switch_value
5720 : IA64_DEFAULT_GVALUE
);
5722 init_machine_status
= ia64_init_machine_status
;
5724 if (align_functions
<= 0)
5725 align_functions
= 64;
5726 if (align_loops
<= 0)
5728 if (TARGET_ABI_OPEN_VMS
)
5731 ia64_override_options_after_change();
5734 /* Implement targetm.override_options_after_change. */
5737 ia64_override_options_after_change (void)
5739 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
5740 flag_schedule_insns_after_reload
= 0;
5743 && !global_options_set
.x_flag_selective_scheduling
5744 && !global_options_set
.x_flag_selective_scheduling2
)
5746 flag_selective_scheduling2
= 1;
5747 flag_sel_sched_pipelining
= 1;
5749 if (mflag_sched_control_spec
== 2)
5751 /* Control speculation is on by default for the selective scheduler,
5752 but not for the Haifa scheduler. */
5753 mflag_sched_control_spec
= flag_selective_scheduling2
? 1 : 0;
5755 if (flag_sel_sched_pipelining
&& flag_auto_inc_dec
)
5757 /* FIXME: remove this when we'd implement breaking autoinsns as
5758 a transformation. */
5759 flag_auto_inc_dec
= 0;
5763 /* Initialize the record of emitted frame related registers. */
5765 void ia64_init_expanders (void)
5767 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
5770 static struct machine_function
*
5771 ia64_init_machine_status (void)
5773 return ggc_alloc_cleared_machine_function ();
5776 static enum attr_itanium_class
ia64_safe_itanium_class (rtx
);
5777 static enum attr_type
ia64_safe_type (rtx
);
5779 static enum attr_itanium_class
5780 ia64_safe_itanium_class (rtx insn
)
5782 if (recog_memoized (insn
) >= 0)
5783 return get_attr_itanium_class (insn
);
5784 else if (DEBUG_INSN_P (insn
))
5785 return ITANIUM_CLASS_IGNORE
;
5787 return ITANIUM_CLASS_UNKNOWN
;
5790 static enum attr_type
5791 ia64_safe_type (rtx insn
)
5793 if (recog_memoized (insn
) >= 0)
5794 return get_attr_type (insn
);
5796 return TYPE_UNKNOWN
;
5799 /* The following collection of routines emit instruction group stop bits as
5800 necessary to avoid dependencies. */
5802 /* Need to track some additional registers as far as serialization is
5803 concerned so we can properly handle br.call and br.ret. We could
5804 make these registers visible to gcc, but since these registers are
5805 never explicitly used in gcc generated code, it seems wasteful to
5806 do so (plus it would make the call and return patterns needlessly
5808 #define REG_RP (BR_REG (0))
5809 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5810 /* This is used for volatile asms which may require a stop bit immediately
5811 before and after them. */
5812 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5813 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5814 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5816 /* For each register, we keep track of how it has been written in the
5817 current instruction group.
5819 If a register is written unconditionally (no qualifying predicate),
5820 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5822 If a register is written if its qualifying predicate P is true, we
5823 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5824 may be written again by the complement of P (P^1) and when this happens,
5825 WRITE_COUNT gets set to 2.
5827 The result of this is that whenever an insn attempts to write a register
5828 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5830 If a predicate register is written by a floating-point insn, we set
5831 WRITTEN_BY_FP to true.
5833 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5834 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5836 #if GCC_VERSION >= 4000
5837 #define RWS_FIELD_TYPE __extension__ unsigned short
5839 #define RWS_FIELD_TYPE unsigned int
5841 struct reg_write_state
5843 RWS_FIELD_TYPE write_count
: 2;
5844 RWS_FIELD_TYPE first_pred
: 10;
5845 RWS_FIELD_TYPE written_by_fp
: 1;
5846 RWS_FIELD_TYPE written_by_and
: 1;
5847 RWS_FIELD_TYPE written_by_or
: 1;
5850 /* Cumulative info for the current instruction group. */
5851 struct reg_write_state rws_sum
[NUM_REGS
];
5852 #ifdef ENABLE_CHECKING
5853 /* Bitmap whether a register has been written in the current insn. */
5854 HARD_REG_ELT_TYPE rws_insn
[(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
5855 / HOST_BITS_PER_WIDEST_FAST_INT
];
5858 rws_insn_set (int regno
)
5860 gcc_assert (!TEST_HARD_REG_BIT (rws_insn
, regno
));
5861 SET_HARD_REG_BIT (rws_insn
, regno
);
5865 rws_insn_test (int regno
)
5867 return TEST_HARD_REG_BIT (rws_insn
, regno
);
5870 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5871 unsigned char rws_insn
[2];
5874 rws_insn_set (int regno
)
5876 if (regno
== REG_AR_CFM
)
5878 else if (regno
== REG_VOLATILE
)
5883 rws_insn_test (int regno
)
5885 if (regno
== REG_AR_CFM
)
5887 if (regno
== REG_VOLATILE
)
5893 /* Indicates whether this is the first instruction after a stop bit,
5894 in which case we don't need another stop bit. Without this,
5895 ia64_variable_issue will die when scheduling an alloc. */
5896 static int first_instruction
;
5898 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5899 RTL for one instruction. */
5902 unsigned int is_write
: 1; /* Is register being written? */
5903 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
5904 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
5905 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
5906 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
5907 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
5910 static void rws_update (int, struct reg_flags
, int);
5911 static int rws_access_regno (int, struct reg_flags
, int);
5912 static int rws_access_reg (rtx
, struct reg_flags
, int);
5913 static void update_set_flags (rtx
, struct reg_flags
*);
5914 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
5915 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
5916 static void init_insn_group_barriers (void);
5917 static int group_barrier_needed (rtx
);
5918 static int safe_group_barrier_needed (rtx
);
5919 static int in_safe_group_barrier
;
5921 /* Update *RWS for REGNO, which is being written by the current instruction,
5922 with predicate PRED, and associated register flags in FLAGS. */
5925 rws_update (int regno
, struct reg_flags flags
, int pred
)
5928 rws_sum
[regno
].write_count
++;
5930 rws_sum
[regno
].write_count
= 2;
5931 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
5932 /* ??? Not tracking and/or across differing predicates. */
5933 rws_sum
[regno
].written_by_and
= flags
.is_and
;
5934 rws_sum
[regno
].written_by_or
= flags
.is_or
;
5935 rws_sum
[regno
].first_pred
= pred
;
5938 /* Handle an access to register REGNO of type FLAGS using predicate register
5939 PRED. Update rws_sum array. Return 1 if this access creates
5940 a dependency with an earlier instruction in the same group. */
5943 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
5945 int need_barrier
= 0;
5947 gcc_assert (regno
< NUM_REGS
);
5949 if (! PR_REGNO_P (regno
))
5950 flags
.is_and
= flags
.is_or
= 0;
5956 rws_insn_set (regno
);
5957 write_count
= rws_sum
[regno
].write_count
;
5959 switch (write_count
)
5962 /* The register has not been written yet. */
5963 if (!in_safe_group_barrier
)
5964 rws_update (regno
, flags
, pred
);
5968 /* The register has been written via a predicate. Treat
5969 it like a unconditional write and do not try to check
5970 for complementary pred reg in earlier write. */
5971 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5973 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5977 if (!in_safe_group_barrier
)
5978 rws_update (regno
, flags
, pred
);
5982 /* The register has been unconditionally written already. We
5984 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5986 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5990 if (!in_safe_group_barrier
)
5992 rws_sum
[regno
].written_by_and
= flags
.is_and
;
5993 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6003 if (flags
.is_branch
)
6005 /* Branches have several RAW exceptions that allow to avoid
6008 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
6009 /* RAW dependencies on branch regs are permissible as long
6010 as the writer is a non-branch instruction. Since we
6011 never generate code that uses a branch register written
6012 by a branch instruction, handling this case is
6016 if (REGNO_REG_CLASS (regno
) == PR_REGS
6017 && ! rws_sum
[regno
].written_by_fp
)
6018 /* The predicates of a branch are available within the
6019 same insn group as long as the predicate was written by
6020 something other than a floating-point instruction. */
6024 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6026 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6029 switch (rws_sum
[regno
].write_count
)
6032 /* The register has not been written yet. */
6036 /* The register has been written via a predicate, assume we
6037 need a barrier (don't check for complementary regs). */
6042 /* The register has been unconditionally written already. We
6052 return need_barrier
;
6056 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
6058 int regno
= REGNO (reg
);
6059 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
6062 return rws_access_regno (regno
, flags
, pred
);
6065 int need_barrier
= 0;
6067 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
6068 return need_barrier
;
6072 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6073 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6076 update_set_flags (rtx x
, struct reg_flags
*pflags
)
6078 rtx src
= SET_SRC (x
);
6080 switch (GET_CODE (src
))
6086 /* There are four cases here:
6087 (1) The destination is (pc), in which case this is a branch,
6088 nothing here applies.
6089 (2) The destination is ar.lc, in which case this is a
6090 doloop_end_internal,
6091 (3) The destination is an fp register, in which case this is
6092 an fselect instruction.
6093 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6094 this is a check load.
6095 In all cases, nothing we do in this function applies. */
6099 if (COMPARISON_P (src
)
6100 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
6101 /* Set pflags->is_fp to 1 so that we know we're dealing
6102 with a floating point comparison when processing the
6103 destination of the SET. */
6106 /* Discover if this is a parallel comparison. We only handle
6107 and.orcm and or.andcm at present, since we must retain a
6108 strict inverse on the predicate pair. */
6109 else if (GET_CODE (src
) == AND
)
6111 else if (GET_CODE (src
) == IOR
)
6118 /* Subroutine of rtx_needs_barrier; this function determines whether the
6119 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6120 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6124 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6126 int need_barrier
= 0;
6128 rtx src
= SET_SRC (x
);
6130 if (GET_CODE (src
) == CALL
)
6131 /* We don't need to worry about the result registers that
6132 get written by subroutine call. */
6133 return rtx_needs_barrier (src
, flags
, pred
);
6134 else if (SET_DEST (x
) == pc_rtx
)
6136 /* X is a conditional branch. */
6137 /* ??? This seems redundant, as the caller sets this bit for
6139 if (!ia64_spec_check_src_p (src
))
6140 flags
.is_branch
= 1;
6141 return rtx_needs_barrier (src
, flags
, pred
);
6144 if (ia64_spec_check_src_p (src
))
6145 /* Avoid checking one register twice (in condition
6146 and in 'then' section) for ldc pattern. */
6148 gcc_assert (REG_P (XEXP (src
, 2)));
6149 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
6151 /* We process MEM below. */
6152 src
= XEXP (src
, 1);
6155 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
6158 if (GET_CODE (dst
) == ZERO_EXTRACT
)
6160 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
6161 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
6163 return need_barrier
;
6166 /* Handle an access to rtx X of type FLAGS using predicate register
6167 PRED. Return 1 if this access creates a dependency with an earlier
6168 instruction in the same group. */
6171 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6174 int is_complemented
= 0;
6175 int need_barrier
= 0;
6176 const char *format_ptr
;
6177 struct reg_flags new_flags
;
6185 switch (GET_CODE (x
))
6188 update_set_flags (x
, &new_flags
);
6189 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
6190 if (GET_CODE (SET_SRC (x
)) != CALL
)
6192 new_flags
.is_write
= 1;
6193 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
6198 new_flags
.is_write
= 0;
6199 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6201 /* Avoid multiple register writes, in case this is a pattern with
6202 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6203 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
6205 new_flags
.is_write
= 1;
6206 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
6207 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
6208 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6213 /* X is a predicated instruction. */
6215 cond
= COND_EXEC_TEST (x
);
6217 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
6219 if (GET_CODE (cond
) == EQ
)
6220 is_complemented
= 1;
6221 cond
= XEXP (cond
, 0);
6222 gcc_assert (GET_CODE (cond
) == REG
6223 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
6224 pred
= REGNO (cond
);
6225 if (is_complemented
)
6228 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
6229 return need_barrier
;
6233 /* Clobber & use are for earlier compiler-phases only. */
6238 /* We always emit stop bits for traditional asms. We emit stop bits
6239 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6240 if (GET_CODE (x
) != ASM_OPERANDS
6241 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
6243 /* Avoid writing the register multiple times if we have multiple
6244 asm outputs. This avoids a failure in rws_access_reg. */
6245 if (! rws_insn_test (REG_VOLATILE
))
6247 new_flags
.is_write
= 1;
6248 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
6253 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6254 We cannot just fall through here since then we would be confused
6255 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6256 traditional asms unlike their normal usage. */
6258 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
6259 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
6264 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6266 rtx pat
= XVECEXP (x
, 0, i
);
6267 switch (GET_CODE (pat
))
6270 update_set_flags (pat
, &new_flags
);
6271 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
6277 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6281 if (REG_P (XEXP (pat
, 0))
6282 && extract_asm_operands (x
) != NULL_RTX
6283 && REGNO (XEXP (pat
, 0)) != AR_UNAT_REGNUM
)
6285 new_flags
.is_write
= 1;
6286 need_barrier
|= rtx_needs_barrier (XEXP (pat
, 0),
6299 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6301 rtx pat
= XVECEXP (x
, 0, i
);
6302 if (GET_CODE (pat
) == SET
)
6304 if (GET_CODE (SET_SRC (pat
)) != CALL
)
6306 new_flags
.is_write
= 1;
6307 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
6311 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
6312 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6317 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
6320 if (REGNO (x
) == AR_UNAT_REGNUM
)
6322 for (i
= 0; i
< 64; ++i
)
6323 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
6326 need_barrier
= rws_access_reg (x
, flags
, pred
);
6330 /* Find the regs used in memory address computation. */
6331 new_flags
.is_write
= 0;
6332 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6335 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
6336 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
6339 /* Operators with side-effects. */
6340 case POST_INC
: case POST_DEC
:
6341 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6343 new_flags
.is_write
= 0;
6344 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6345 new_flags
.is_write
= 1;
6346 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6350 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6352 new_flags
.is_write
= 0;
6353 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6354 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6355 new_flags
.is_write
= 1;
6356 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6359 /* Handle common unary and binary ops for efficiency. */
6360 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
6361 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
6362 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
6363 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
6364 case NE
: case EQ
: case GE
: case GT
: case LE
:
6365 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
6366 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6367 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6370 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
6371 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
6372 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
6373 case SQRT
: case FFS
: case POPCOUNT
:
6374 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6378 /* VEC_SELECT's second argument is a PARALLEL with integers that
6379 describe the elements selected. On ia64, those integers are
6380 always constants. Avoid walking the PARALLEL so that we don't
6381 get confused with "normal" parallels and then die. */
6382 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6386 switch (XINT (x
, 1))
6388 case UNSPEC_LTOFF_DTPMOD
:
6389 case UNSPEC_LTOFF_DTPREL
:
6391 case UNSPEC_LTOFF_TPREL
:
6393 case UNSPEC_PRED_REL_MUTEX
:
6394 case UNSPEC_PIC_CALL
:
6396 case UNSPEC_FETCHADD_ACQ
:
6397 case UNSPEC_BSP_VALUE
:
6398 case UNSPEC_FLUSHRS
:
6399 case UNSPEC_BUNDLE_SELECTOR
:
6402 case UNSPEC_GR_SPILL
:
6403 case UNSPEC_GR_RESTORE
:
6405 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
6406 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
6408 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6409 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
6410 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
6415 case UNSPEC_FR_SPILL
:
6416 case UNSPEC_FR_RESTORE
:
6417 case UNSPEC_GETF_EXP
:
6418 case UNSPEC_SETF_EXP
:
6420 case UNSPEC_FR_SQRT_RECIP_APPROX
:
6421 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
6426 case UNSPEC_CHKACLR
:
6428 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6431 case UNSPEC_FR_RECIP_APPROX
:
6433 case UNSPEC_COPYSIGN
:
6434 case UNSPEC_FR_RECIP_APPROX_RES
:
6435 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6436 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6439 case UNSPEC_CMPXCHG_ACQ
:
6440 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6441 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
6449 case UNSPEC_VOLATILE
:
6450 switch (XINT (x
, 1))
6453 /* Alloc must always be the first instruction of a group.
6454 We force this by always returning true. */
6455 /* ??? We might get better scheduling if we explicitly check for
6456 input/local/output register dependencies, and modify the
6457 scheduler so that alloc is always reordered to the start of
6458 the current group. We could then eliminate all of the
6459 first_instruction code. */
6460 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6462 new_flags
.is_write
= 1;
6463 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6466 case UNSPECV_SET_BSP
:
6470 case UNSPECV_BLOCKAGE
:
6471 case UNSPECV_INSN_GROUP_BARRIER
:
6473 case UNSPECV_PSAC_ALL
:
6474 case UNSPECV_PSAC_NORMAL
:
6483 new_flags
.is_write
= 0;
6484 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
6485 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6487 new_flags
.is_write
= 1;
6488 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6489 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6493 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
6494 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6495 switch (format_ptr
[i
])
6497 case '0': /* unused field */
6498 case 'i': /* integer */
6499 case 'n': /* note */
6500 case 'w': /* wide integer */
6501 case 's': /* pointer to string */
6502 case 'S': /* optional pointer to string */
6506 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
6511 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
6512 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
6521 return need_barrier
;
6524 /* Clear out the state for group_barrier_needed at the start of a
6525 sequence of insns. */
6528 init_insn_group_barriers (void)
6530 memset (rws_sum
, 0, sizeof (rws_sum
));
6531 first_instruction
= 1;
6534 /* Given the current state, determine whether a group barrier (a stop bit) is
6535 necessary before INSN. Return nonzero if so. This modifies the state to
6536 include the effects of INSN as a side-effect. */
6539 group_barrier_needed (rtx insn
)
6542 int need_barrier
= 0;
6543 struct reg_flags flags
;
6545 memset (&flags
, 0, sizeof (flags
));
6546 switch (GET_CODE (insn
))
6553 /* A barrier doesn't imply an instruction group boundary. */
6557 memset (rws_insn
, 0, sizeof (rws_insn
));
6561 flags
.is_branch
= 1;
6562 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6563 memset (rws_insn
, 0, sizeof (rws_insn
));
6565 /* Don't bundle a call following another call. */
6566 if ((pat
= prev_active_insn (insn
))
6567 && GET_CODE (pat
) == CALL_INSN
)
6573 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6577 if (!ia64_spec_check_p (insn
))
6578 flags
.is_branch
= 1;
6580 /* Don't bundle a jump following a call. */
6581 if ((pat
= prev_active_insn (insn
))
6582 && GET_CODE (pat
) == CALL_INSN
)
6590 if (GET_CODE (PATTERN (insn
)) == USE
6591 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6592 /* Don't care about USE and CLOBBER "insns"---those are used to
6593 indicate to the optimizer that it shouldn't get rid of
6594 certain operations. */
6597 pat
= PATTERN (insn
);
6599 /* Ug. Hack hacks hacked elsewhere. */
6600 switch (recog_memoized (insn
))
6602 /* We play dependency tricks with the epilogue in order
6603 to get proper schedules. Undo this for dv analysis. */
6604 case CODE_FOR_epilogue_deallocate_stack
:
6605 case CODE_FOR_prologue_allocate_stack
:
6606 pat
= XVECEXP (pat
, 0, 0);
6609 /* The pattern we use for br.cloop confuses the code above.
6610 The second element of the vector is representative. */
6611 case CODE_FOR_doloop_end_internal
:
6612 pat
= XVECEXP (pat
, 0, 1);
6615 /* Doesn't generate code. */
6616 case CODE_FOR_pred_rel_mutex
:
6617 case CODE_FOR_prologue_use
:
6624 memset (rws_insn
, 0, sizeof (rws_insn
));
6625 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
6627 /* Check to see if the previous instruction was a volatile
6630 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
6638 if (first_instruction
&& INSN_P (insn
)
6639 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6640 && GET_CODE (PATTERN (insn
)) != USE
6641 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6644 first_instruction
= 0;
6647 return need_barrier
;
6650 /* Like group_barrier_needed, but do not clobber the current state. */
6653 safe_group_barrier_needed (rtx insn
)
6655 int saved_first_instruction
;
6658 saved_first_instruction
= first_instruction
;
6659 in_safe_group_barrier
= 1;
6661 t
= group_barrier_needed (insn
);
6663 first_instruction
= saved_first_instruction
;
6664 in_safe_group_barrier
= 0;
6669 /* Scan the current function and insert stop bits as necessary to
6670 eliminate dependencies. This function assumes that a final
6671 instruction scheduling pass has been run which has already
6672 inserted most of the necessary stop bits. This function only
6673 inserts new ones at basic block boundaries, since these are
6674 invisible to the scheduler. */
6677 emit_insn_group_barriers (FILE *dump
)
6681 int insns_since_last_label
= 0;
6683 init_insn_group_barriers ();
6685 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6687 if (GET_CODE (insn
) == CODE_LABEL
)
6689 if (insns_since_last_label
)
6691 insns_since_last_label
= 0;
6693 else if (GET_CODE (insn
) == NOTE
6694 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
6696 if (insns_since_last_label
)
6698 insns_since_last_label
= 0;
6700 else if (GET_CODE (insn
) == INSN
6701 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
6702 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
6704 init_insn_group_barriers ();
6707 else if (NONDEBUG_INSN_P (insn
))
6709 insns_since_last_label
= 1;
6711 if (group_barrier_needed (insn
))
6716 fprintf (dump
, "Emitting stop before label %d\n",
6717 INSN_UID (last_label
));
6718 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
6721 init_insn_group_barriers ();
6729 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6730 This function has to emit all necessary group barriers. */
6733 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
6737 init_insn_group_barriers ();
6739 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6741 if (GET_CODE (insn
) == BARRIER
)
6743 rtx last
= prev_active_insn (insn
);
6747 if (GET_CODE (last
) == JUMP_INSN
6748 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
6749 last
= prev_active_insn (last
);
6750 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6751 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
6753 init_insn_group_barriers ();
6755 else if (NONDEBUG_INSN_P (insn
))
6757 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
6758 init_insn_group_barriers ();
6759 else if (group_barrier_needed (insn
))
6761 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
6762 init_insn_group_barriers ();
6763 group_barrier_needed (insn
);
6771 /* Instruction scheduling support. */
6773 #define NR_BUNDLES 10
6775 /* A list of names of all available bundles. */
6777 static const char *bundle_name
[NR_BUNDLES
] =
6783 #if NR_BUNDLES == 10
6793 /* Nonzero if we should insert stop bits into the schedule. */
6795 int ia64_final_schedule
= 0;
6797 /* Codes of the corresponding queried units: */
6799 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
6800 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
6802 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
6803 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
6805 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
6807 /* The following variable value is an insn group barrier. */
6809 static rtx dfa_stop_insn
;
6811 /* The following variable value is the last issued insn. */
6813 static rtx last_scheduled_insn
;
6815 /* The following variable value is pointer to a DFA state used as
6816 temporary variable. */
6818 static state_t temp_dfa_state
= NULL
;
6820 /* The following variable value is DFA state after issuing the last
6823 static state_t prev_cycle_state
= NULL
;
6825 /* The following array element values are TRUE if the corresponding
6826 insn requires to add stop bits before it. */
6828 static char *stops_p
= NULL
;
6830 /* The following variable is used to set up the mentioned above array. */
6832 static int stop_before_p
= 0;
6834 /* The following variable value is length of the arrays `clocks' and
6837 static int clocks_length
;
6839 /* The following variable value is number of data speculations in progress. */
6840 static int pending_data_specs
= 0;
6842 /* Number of memory references on current and three future processor cycles. */
6843 static char mem_ops_in_group
[4];
6845 /* Number of current processor cycle (from scheduler's point of view). */
6846 static int current_cycle
;
6848 static rtx
ia64_single_set (rtx
);
6849 static void ia64_emit_insn_before (rtx
, rtx
);
6851 /* Map a bundle number to its pseudo-op. */
6854 get_bundle_name (int b
)
6856 return bundle_name
[b
];
6860 /* Return the maximum number of instructions a cpu can issue. */
6863 ia64_issue_rate (void)
6868 /* Helper function - like single_set, but look inside COND_EXEC. */
6871 ia64_single_set (rtx insn
)
6873 rtx x
= PATTERN (insn
), ret
;
6874 if (GET_CODE (x
) == COND_EXEC
)
6875 x
= COND_EXEC_CODE (x
);
6876 if (GET_CODE (x
) == SET
)
6879 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6880 Although they are not classical single set, the second set is there just
6881 to protect it from moving past FP-relative stack accesses. */
6882 switch (recog_memoized (insn
))
6884 case CODE_FOR_prologue_allocate_stack
:
6885 case CODE_FOR_epilogue_deallocate_stack
:
6886 ret
= XVECEXP (x
, 0, 0);
6890 ret
= single_set_2 (insn
, x
);
6897 /* Adjust the cost of a scheduling dependency.
6898 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6899 COST is the current cost, DW is dependency weakness. */
6901 ia64_adjust_cost_2 (rtx insn
, int dep_type1
, rtx dep_insn
, int cost
, dw_t dw
)
6903 enum reg_note dep_type
= (enum reg_note
) dep_type1
;
6904 enum attr_itanium_class dep_class
;
6905 enum attr_itanium_class insn_class
;
6907 insn_class
= ia64_safe_itanium_class (insn
);
6908 dep_class
= ia64_safe_itanium_class (dep_insn
);
6910 /* Treat true memory dependencies separately. Ignore apparent true
6911 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6912 if (dep_type
== REG_DEP_TRUE
6913 && (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
)
6914 && (insn_class
== ITANIUM_CLASS_BR
|| insn_class
== ITANIUM_CLASS_SCALL
))
6917 if (dw
== MIN_DEP_WEAK
)
6918 /* Store and load are likely to alias, use higher cost to avoid stall. */
6919 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST
);
6920 else if (dw
> MIN_DEP_WEAK
)
6922 /* Store and load are less likely to alias. */
6923 if (mflag_sched_fp_mem_deps_zero_cost
&& dep_class
== ITANIUM_CLASS_STF
)
6924 /* Assume there will be no cache conflict for floating-point data.
6925 For integer data, L1 conflict penalty is huge (17 cycles), so we
6926 never assume it will not cause a conflict. */
6932 if (dep_type
!= REG_DEP_OUTPUT
)
6935 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
6936 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
6942 /* Like emit_insn_before, but skip cycle_display notes.
6943 ??? When cycle display notes are implemented, update this. */
6946 ia64_emit_insn_before (rtx insn
, rtx before
)
6948 emit_insn_before (insn
, before
);
6951 /* The following function marks insns who produce addresses for load
6952 and store insns. Such insns will be placed into M slots because it
6953 decrease latency time for Itanium1 (see function
6954 `ia64_produce_address_p' and the DFA descriptions). */
6957 ia64_dependencies_evaluation_hook (rtx head
, rtx tail
)
6959 rtx insn
, next
, next_tail
;
6961 /* Before reload, which_alternative is not set, which means that
6962 ia64_safe_itanium_class will produce wrong results for (at least)
6963 move instructions. */
6964 if (!reload_completed
)
6967 next_tail
= NEXT_INSN (tail
);
6968 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6971 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6973 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
6975 sd_iterator_def sd_it
;
6977 bool has_mem_op_consumer_p
= false;
6979 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
6981 enum attr_itanium_class c
;
6983 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
6986 next
= DEP_CON (dep
);
6987 c
= ia64_safe_itanium_class (next
);
6988 if ((c
== ITANIUM_CLASS_ST
6989 || c
== ITANIUM_CLASS_STF
)
6990 && ia64_st_address_bypass_p (insn
, next
))
6992 has_mem_op_consumer_p
= true;
6995 else if ((c
== ITANIUM_CLASS_LD
6996 || c
== ITANIUM_CLASS_FLD
6997 || c
== ITANIUM_CLASS_FLDP
)
6998 && ia64_ld_address_bypass_p (insn
, next
))
7000 has_mem_op_consumer_p
= true;
7005 insn
->call
= has_mem_op_consumer_p
;
7009 /* We're beginning a new block. Initialize data structures as necessary. */
7012 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
7013 int sched_verbose ATTRIBUTE_UNUSED
,
7014 int max_ready ATTRIBUTE_UNUSED
)
7016 #ifdef ENABLE_CHECKING
7019 if (!sel_sched_p () && reload_completed
)
7020 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
7021 insn
!= current_sched_info
->next_tail
;
7022 insn
= NEXT_INSN (insn
))
7023 gcc_assert (!SCHED_GROUP_P (insn
));
7025 last_scheduled_insn
= NULL_RTX
;
7026 init_insn_group_barriers ();
7029 memset (mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7032 /* We're beginning a scheduling pass. Check assertion. */
7035 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
7036 int sched_verbose ATTRIBUTE_UNUSED
,
7037 int max_ready ATTRIBUTE_UNUSED
)
7039 gcc_assert (pending_data_specs
== 0);
7042 /* Scheduling pass is now finished. Free/reset static variable. */
7044 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
7045 int sched_verbose ATTRIBUTE_UNUSED
)
7047 gcc_assert (pending_data_specs
== 0);
7050 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7051 speculation check), FALSE otherwise. */
7053 is_load_p (rtx insn
)
7055 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7058 ((insn_class
== ITANIUM_CLASS_LD
|| insn_class
== ITANIUM_CLASS_FLD
)
7059 && get_attr_check_load (insn
) == CHECK_LOAD_NO
);
7062 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7063 (taking account for 3-cycle cache reference postponing for stores: Intel
7064 Itanium 2 Reference Manual for Software Development and Optimization,
7067 record_memory_reference (rtx insn
)
7069 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7071 switch (insn_class
) {
7072 case ITANIUM_CLASS_FLD
:
7073 case ITANIUM_CLASS_LD
:
7074 mem_ops_in_group
[current_cycle
% 4]++;
7076 case ITANIUM_CLASS_STF
:
7077 case ITANIUM_CLASS_ST
:
7078 mem_ops_in_group
[(current_cycle
+ 3) % 4]++;
7084 /* We are about to being issuing insns for this clock cycle.
7085 Override the default sort algorithm to better slot instructions. */
7088 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
,
7089 int *pn_ready
, int clock_var
,
7093 int n_ready
= *pn_ready
;
7094 rtx
*e_ready
= ready
+ n_ready
;
7098 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
7100 if (reorder_type
== 0)
7102 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7104 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7105 if (insnp
< e_ready
)
7108 enum attr_type t
= ia64_safe_type (insn
);
7109 if (t
== TYPE_UNKNOWN
)
7111 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
7112 || asm_noperands (PATTERN (insn
)) >= 0)
7114 rtx lowest
= ready
[n_asms
];
7115 ready
[n_asms
] = insn
;
7121 rtx highest
= ready
[n_ready
- 1];
7122 ready
[n_ready
- 1] = insn
;
7129 if (n_asms
< n_ready
)
7131 /* Some normal insns to process. Skip the asms. */
7135 else if (n_ready
> 0)
7139 if (ia64_final_schedule
)
7142 int nr_need_stop
= 0;
7144 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7145 if (safe_group_barrier_needed (*insnp
))
7148 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
7150 if (reorder_type
== 0)
7153 /* Move down everything that needs a stop bit, preserving
7155 while (insnp
-- > ready
+ deleted
)
7156 while (insnp
>= ready
+ deleted
)
7159 if (! safe_group_barrier_needed (insn
))
7161 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7169 current_cycle
= clock_var
;
7170 if (reload_completed
&& mem_ops_in_group
[clock_var
% 4] >= ia64_max_memory_insns
)
7175 /* Move down loads/stores, preserving relative order. */
7176 while (insnp
-- > ready
+ moved
)
7177 while (insnp
>= ready
+ moved
)
7180 if (! is_load_p (insn
))
7182 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7193 /* We are about to being issuing insns for this clock cycle. Override
7194 the default sort algorithm to better slot instructions. */
7197 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
7200 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
7201 pn_ready
, clock_var
, 0);
7204 /* Like ia64_sched_reorder, but called after issuing each insn.
7205 Override the default sort algorithm to better slot instructions. */
7208 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
7209 int sched_verbose ATTRIBUTE_UNUSED
, rtx
*ready
,
7210 int *pn_ready
, int clock_var
)
7212 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
7216 /* We are about to issue INSN. Return the number of insns left on the
7217 ready queue that can be issued this cycle. */
7220 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
7221 int sched_verbose ATTRIBUTE_UNUSED
,
7222 rtx insn ATTRIBUTE_UNUSED
,
7223 int can_issue_more ATTRIBUTE_UNUSED
)
7225 if (sched_deps_info
->generate_spec_deps
&& !sel_sched_p ())
7226 /* Modulo scheduling does not extend h_i_d when emitting
7227 new instructions. Don't use h_i_d, if we don't have to. */
7229 if (DONE_SPEC (insn
) & BEGIN_DATA
)
7230 pending_data_specs
++;
7231 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
7232 pending_data_specs
--;
7235 if (DEBUG_INSN_P (insn
))
7238 last_scheduled_insn
= insn
;
7239 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
7240 if (reload_completed
)
7242 int needed
= group_barrier_needed (insn
);
7244 gcc_assert (!needed
);
7245 if (GET_CODE (insn
) == CALL_INSN
)
7246 init_insn_group_barriers ();
7247 stops_p
[INSN_UID (insn
)] = stop_before_p
;
7250 record_memory_reference (insn
);
7255 /* We are choosing insn from the ready queue. Return nonzero if INSN
7259 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn
)
7261 gcc_assert (insn
&& INSN_P (insn
));
7262 return ((!reload_completed
7263 || !safe_group_barrier_needed (insn
))
7264 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn
)
7265 && (!mflag_sched_mem_insns_hard_limit
7266 || !is_load_p (insn
)
7267 || mem_ops_in_group
[current_cycle
% 4] < ia64_max_memory_insns
));
7270 /* We are choosing insn from the ready queue. Return nonzero if INSN
7274 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn
)
7276 gcc_assert (insn
&& INSN_P (insn
));
7277 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7278 we keep ALAT half-empty. */
7279 return (pending_data_specs
< 16
7280 || !(TODO_SPEC (insn
) & BEGIN_DATA
));
7283 /* The following variable value is pseudo-insn used by the DFA insn
7284 scheduler to change the DFA state when the simulated clock is
7287 static rtx dfa_pre_cycle_insn
;
7289 /* Returns 1 when a meaningful insn was scheduled between the last group
7290 barrier and LAST. */
7292 scheduled_good_insn (rtx last
)
7294 if (last
&& recog_memoized (last
) >= 0)
7298 last
!= NULL
&& !NOTE_INSN_BASIC_BLOCK_P (last
)
7299 && !stops_p
[INSN_UID (last
)];
7300 last
= PREV_INSN (last
))
7301 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7302 the ebb we're scheduling. */
7303 if (INSN_P (last
) && recog_memoized (last
) >= 0)
7309 /* We are about to being issuing INSN. Return nonzero if we cannot
7310 issue it on given cycle CLOCK and return zero if we should not sort
7311 the ready queue on the next clock start. */
7314 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx insn
, int last_clock
,
7315 int clock
, int *sort_p
)
7317 gcc_assert (insn
&& INSN_P (insn
));
7319 if (DEBUG_INSN_P (insn
))
7322 /* When a group barrier is needed for insn, last_scheduled_insn
7324 gcc_assert (!(reload_completed
&& safe_group_barrier_needed (insn
))
7325 || last_scheduled_insn
);
7327 if ((reload_completed
7328 && (safe_group_barrier_needed (insn
)
7329 || (mflag_sched_stop_bits_after_every_cycle
7330 && last_clock
!= clock
7331 && last_scheduled_insn
7332 && scheduled_good_insn (last_scheduled_insn
))))
7333 || (last_scheduled_insn
7334 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
7335 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
7336 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
7338 init_insn_group_barriers ();
7340 if (verbose
&& dump
)
7341 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
7342 last_clock
== clock
? " + cycle advance" : "");
7345 current_cycle
= clock
;
7346 mem_ops_in_group
[current_cycle
% 4] = 0;
7348 if (last_clock
== clock
)
7350 state_transition (curr_state
, dfa_stop_insn
);
7351 if (TARGET_EARLY_STOP_BITS
)
7352 *sort_p
= (last_scheduled_insn
== NULL_RTX
7353 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
7359 if (last_scheduled_insn
)
7361 if (GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
7362 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)
7363 state_reset (curr_state
);
7366 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
7367 state_transition (curr_state
, dfa_stop_insn
);
7368 state_transition (curr_state
, dfa_pre_cycle_insn
);
7369 state_transition (curr_state
, NULL
);
7376 /* Implement targetm.sched.h_i_d_extended hook.
7377 Extend internal data structures. */
7379 ia64_h_i_d_extended (void)
7381 if (stops_p
!= NULL
)
7383 int new_clocks_length
= get_max_uid () * 3 / 2;
7384 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
7385 clocks_length
= new_clocks_length
;
7390 /* This structure describes the data used by the backend to guide scheduling.
7391 When the current scheduling point is switched, this data should be saved
7392 and restored later, if the scheduler returns to this point. */
7393 struct _ia64_sched_context
7395 state_t prev_cycle_state
;
7396 rtx last_scheduled_insn
;
7397 struct reg_write_state rws_sum
[NUM_REGS
];
7398 struct reg_write_state rws_insn
[NUM_REGS
];
7399 int first_instruction
;
7400 int pending_data_specs
;
7402 char mem_ops_in_group
[4];
7404 typedef struct _ia64_sched_context
*ia64_sched_context_t
;
7406 /* Allocates a scheduling context. */
7408 ia64_alloc_sched_context (void)
7410 return xmalloc (sizeof (struct _ia64_sched_context
));
7413 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7414 the global context otherwise. */
7416 ia64_init_sched_context (void *_sc
, bool clean_p
)
7418 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7420 sc
->prev_cycle_state
= xmalloc (dfa_state_size
);
7423 state_reset (sc
->prev_cycle_state
);
7424 sc
->last_scheduled_insn
= NULL_RTX
;
7425 memset (sc
->rws_sum
, 0, sizeof (rws_sum
));
7426 memset (sc
->rws_insn
, 0, sizeof (rws_insn
));
7427 sc
->first_instruction
= 1;
7428 sc
->pending_data_specs
= 0;
7429 sc
->current_cycle
= 0;
7430 memset (sc
->mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7434 memcpy (sc
->prev_cycle_state
, prev_cycle_state
, dfa_state_size
);
7435 sc
->last_scheduled_insn
= last_scheduled_insn
;
7436 memcpy (sc
->rws_sum
, rws_sum
, sizeof (rws_sum
));
7437 memcpy (sc
->rws_insn
, rws_insn
, sizeof (rws_insn
));
7438 sc
->first_instruction
= first_instruction
;
7439 sc
->pending_data_specs
= pending_data_specs
;
7440 sc
->current_cycle
= current_cycle
;
7441 memcpy (sc
->mem_ops_in_group
, mem_ops_in_group
, sizeof (mem_ops_in_group
));
7445 /* Sets the global scheduling context to the one pointed to by _SC. */
7447 ia64_set_sched_context (void *_sc
)
7449 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7451 gcc_assert (sc
!= NULL
);
7453 memcpy (prev_cycle_state
, sc
->prev_cycle_state
, dfa_state_size
);
7454 last_scheduled_insn
= sc
->last_scheduled_insn
;
7455 memcpy (rws_sum
, sc
->rws_sum
, sizeof (rws_sum
));
7456 memcpy (rws_insn
, sc
->rws_insn
, sizeof (rws_insn
));
7457 first_instruction
= sc
->first_instruction
;
7458 pending_data_specs
= sc
->pending_data_specs
;
7459 current_cycle
= sc
->current_cycle
;
7460 memcpy (mem_ops_in_group
, sc
->mem_ops_in_group
, sizeof (mem_ops_in_group
));
7463 /* Clears the data in the _SC scheduling context. */
7465 ia64_clear_sched_context (void *_sc
)
7467 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7469 free (sc
->prev_cycle_state
);
7470 sc
->prev_cycle_state
= NULL
;
7473 /* Frees the _SC scheduling context. */
7475 ia64_free_sched_context (void *_sc
)
7477 gcc_assert (_sc
!= NULL
);
7482 typedef rtx (* gen_func_t
) (rtx
, rtx
);
7484 /* Return a function that will generate a load of mode MODE_NO
7485 with speculation types TS. */
7487 get_spec_load_gen_function (ds_t ts
, int mode_no
)
7489 static gen_func_t gen_ld_
[] = {
7499 gen_zero_extendqidi2
,
7500 gen_zero_extendhidi2
,
7501 gen_zero_extendsidi2
,
7504 static gen_func_t gen_ld_a
[] = {
7514 gen_zero_extendqidi2_advanced
,
7515 gen_zero_extendhidi2_advanced
,
7516 gen_zero_extendsidi2_advanced
,
7518 static gen_func_t gen_ld_s
[] = {
7519 gen_movbi_speculative
,
7520 gen_movqi_speculative
,
7521 gen_movhi_speculative
,
7522 gen_movsi_speculative
,
7523 gen_movdi_speculative
,
7524 gen_movsf_speculative
,
7525 gen_movdf_speculative
,
7526 gen_movxf_speculative
,
7527 gen_movti_speculative
,
7528 gen_zero_extendqidi2_speculative
,
7529 gen_zero_extendhidi2_speculative
,
7530 gen_zero_extendsidi2_speculative
,
7532 static gen_func_t gen_ld_sa
[] = {
7533 gen_movbi_speculative_advanced
,
7534 gen_movqi_speculative_advanced
,
7535 gen_movhi_speculative_advanced
,
7536 gen_movsi_speculative_advanced
,
7537 gen_movdi_speculative_advanced
,
7538 gen_movsf_speculative_advanced
,
7539 gen_movdf_speculative_advanced
,
7540 gen_movxf_speculative_advanced
,
7541 gen_movti_speculative_advanced
,
7542 gen_zero_extendqidi2_speculative_advanced
,
7543 gen_zero_extendhidi2_speculative_advanced
,
7544 gen_zero_extendsidi2_speculative_advanced
,
7546 static gen_func_t gen_ld_s_a
[] = {
7547 gen_movbi_speculative_a
,
7548 gen_movqi_speculative_a
,
7549 gen_movhi_speculative_a
,
7550 gen_movsi_speculative_a
,
7551 gen_movdi_speculative_a
,
7552 gen_movsf_speculative_a
,
7553 gen_movdf_speculative_a
,
7554 gen_movxf_speculative_a
,
7555 gen_movti_speculative_a
,
7556 gen_zero_extendqidi2_speculative_a
,
7557 gen_zero_extendhidi2_speculative_a
,
7558 gen_zero_extendsidi2_speculative_a
,
7563 if (ts
& BEGIN_DATA
)
7565 if (ts
& BEGIN_CONTROL
)
7570 else if (ts
& BEGIN_CONTROL
)
7572 if ((spec_info
->flags
& SEL_SCHED_SPEC_DONT_CHECK_CONTROL
)
7573 || ia64_needs_block_p (ts
))
7576 gen_ld
= gen_ld_s_a
;
7583 return gen_ld
[mode_no
];
7586 /* Constants that help mapping 'enum machine_mode' to int. */
7589 SPEC_MODE_INVALID
= -1,
7590 SPEC_MODE_FIRST
= 0,
7591 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
7592 SPEC_MODE_FOR_EXTEND_LAST
= 3,
7598 /* Offset to reach ZERO_EXTEND patterns. */
7599 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1
7602 /* Return index of the MODE. */
7604 ia64_mode_to_int (enum machine_mode mode
)
7608 case BImode
: return 0; /* SPEC_MODE_FIRST */
7609 case QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7610 case HImode
: return 2;
7611 case SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7612 case DImode
: return 4;
7613 case SFmode
: return 5;
7614 case DFmode
: return 6;
7615 case XFmode
: return 7;
7617 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7618 mentioned in itanium[12].md. Predicate fp_register_operand also
7619 needs to be defined. Bottom line: better disable for now. */
7620 return SPEC_MODE_INVALID
;
7621 default: return SPEC_MODE_INVALID
;
7625 /* Provide information about speculation capabilities. */
7627 ia64_set_sched_flags (spec_info_t spec_info
)
7629 unsigned int *flags
= &(current_sched_info
->flags
);
7631 if (*flags
& SCHED_RGN
7632 || *flags
& SCHED_EBB
7633 || *flags
& SEL_SCHED
)
7637 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
7638 || (mflag_sched_ar_data_spec
&& reload_completed
))
7643 && ((mflag_sched_br_in_data_spec
&& !reload_completed
)
7644 || (mflag_sched_ar_in_data_spec
&& reload_completed
)))
7648 if (mflag_sched_control_spec
7650 || reload_completed
))
7652 mask
|= BEGIN_CONTROL
;
7654 if (!sel_sched_p () && mflag_sched_in_control_spec
)
7655 mask
|= BE_IN_CONTROL
;
7658 spec_info
->mask
= mask
;
7662 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
7664 if (mask
& BE_IN_SPEC
)
7667 spec_info
->flags
= 0;
7669 if ((mask
& DATA_SPEC
) && mflag_sched_prefer_non_data_spec_insns
)
7670 spec_info
->flags
|= PREFER_NON_DATA_SPEC
;
7672 if (mask
& CONTROL_SPEC
)
7674 if (mflag_sched_prefer_non_control_spec_insns
)
7675 spec_info
->flags
|= PREFER_NON_CONTROL_SPEC
;
7677 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec
)
7678 spec_info
->flags
|= SEL_SCHED_SPEC_DONT_CHECK_CONTROL
;
7681 if (sched_verbose
>= 1)
7682 spec_info
->dump
= sched_dump
;
7684 spec_info
->dump
= 0;
7686 if (mflag_sched_count_spec_in_critical_path
)
7687 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
7691 spec_info
->mask
= 0;
7694 /* If INSN is an appropriate load return its mode.
7695 Return -1 otherwise. */
7697 get_mode_no_for_insn (rtx insn
)
7699 rtx reg
, mem
, mode_rtx
;
7703 extract_insn_cached (insn
);
7705 /* We use WHICH_ALTERNATIVE only after reload. This will
7706 guarantee that reload won't touch a speculative insn. */
7708 if (recog_data
.n_operands
!= 2)
7711 reg
= recog_data
.operand
[0];
7712 mem
= recog_data
.operand
[1];
7714 /* We should use MEM's mode since REG's mode in presence of
7715 ZERO_EXTEND will always be DImode. */
7716 if (get_attr_speculable1 (insn
) == SPECULABLE1_YES
)
7717 /* Process non-speculative ld. */
7719 if (!reload_completed
)
7721 /* Do not speculate into regs like ar.lc. */
7722 if (!REG_P (reg
) || AR_REGNO_P (REGNO (reg
)))
7729 rtx mem_reg
= XEXP (mem
, 0);
7731 if (!REG_P (mem_reg
))
7737 else if (get_attr_speculable2 (insn
) == SPECULABLE2_YES
)
7739 gcc_assert (REG_P (reg
) && MEM_P (mem
));
7745 else if (get_attr_data_speculative (insn
) == DATA_SPECULATIVE_YES
7746 || get_attr_control_speculative (insn
) == CONTROL_SPECULATIVE_YES
7747 || get_attr_check_load (insn
) == CHECK_LOAD_YES
)
7748 /* Process speculative ld or ld.c. */
7750 gcc_assert (REG_P (reg
) && MEM_P (mem
));
7755 enum attr_itanium_class attr_class
= get_attr_itanium_class (insn
);
7757 if (attr_class
== ITANIUM_CLASS_CHK_A
7758 || attr_class
== ITANIUM_CLASS_CHK_S_I
7759 || attr_class
== ITANIUM_CLASS_CHK_S_F
)
7766 mode_no
= ia64_mode_to_int (GET_MODE (mode_rtx
));
7768 if (mode_no
== SPEC_MODE_INVALID
)
7771 extend_p
= (GET_MODE (reg
) != GET_MODE (mode_rtx
));
7775 if (!(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
7776 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
))
7779 mode_no
+= SPEC_GEN_EXTEND_OFFSET
;
7785 /* If X is an unspec part of a speculative load, return its code.
7786 Return -1 otherwise. */
7788 get_spec_unspec_code (const_rtx x
)
7790 if (GET_CODE (x
) != UNSPEC
)
7812 /* Implement skip_rtx_p hook. */
7814 ia64_skip_rtx_p (const_rtx x
)
7816 return get_spec_unspec_code (x
) != -1;
7819 /* If INSN is a speculative load, return its UNSPEC code.
7820 Return -1 otherwise. */
7822 get_insn_spec_code (const_rtx insn
)
7826 pat
= PATTERN (insn
);
7828 if (GET_CODE (pat
) == COND_EXEC
)
7829 pat
= COND_EXEC_CODE (pat
);
7831 if (GET_CODE (pat
) != SET
)
7834 reg
= SET_DEST (pat
);
7838 mem
= SET_SRC (pat
);
7839 if (GET_CODE (mem
) == ZERO_EXTEND
)
7840 mem
= XEXP (mem
, 0);
7842 return get_spec_unspec_code (mem
);
7845 /* If INSN is a speculative load, return a ds with the speculation types.
7846 Otherwise [if INSN is a normal instruction] return 0. */
7848 ia64_get_insn_spec_ds (rtx insn
)
7850 int code
= get_insn_spec_code (insn
);
7859 return BEGIN_CONTROL
;
7862 return BEGIN_DATA
| BEGIN_CONTROL
;
7869 /* If INSN is a speculative load return a ds with the speculation types that
7871 Otherwise [if INSN is a normal instruction] return 0. */
7873 ia64_get_insn_checked_ds (rtx insn
)
7875 int code
= get_insn_spec_code (insn
);
7880 return BEGIN_DATA
| BEGIN_CONTROL
;
7883 return BEGIN_CONTROL
;
7887 return BEGIN_DATA
| BEGIN_CONTROL
;
7894 /* If GEN_P is true, calculate the index of needed speculation check and return
7895 speculative pattern for INSN with speculative mode TS, machine mode
7896 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7897 If GEN_P is false, just calculate the index of needed speculation check. */
7899 ia64_gen_spec_load (rtx insn
, ds_t ts
, int mode_no
)
7902 gen_func_t gen_load
;
7904 gen_load
= get_spec_load_gen_function (ts
, mode_no
);
7906 new_pat
= gen_load (copy_rtx (recog_data
.operand
[0]),
7907 copy_rtx (recog_data
.operand
[1]));
7909 pat
= PATTERN (insn
);
7910 if (GET_CODE (pat
) == COND_EXEC
)
7911 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
7918 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED
,
7919 ds_t ds ATTRIBUTE_UNUSED
)
7924 /* Implement targetm.sched.speculate_insn hook.
7925 Check if the INSN can be TS speculative.
7926 If 'no' - return -1.
7927 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7928 If current pattern of the INSN already provides TS speculation,
7931 ia64_speculate_insn (rtx insn
, ds_t ts
, rtx
*new_pat
)
7936 gcc_assert (!(ts
& ~SPECULATIVE
));
7938 if (ia64_spec_check_p (insn
))
7941 if ((ts
& BE_IN_SPEC
)
7942 && !insn_can_be_in_speculative_p (insn
, ts
))
7945 mode_no
= get_mode_no_for_insn (insn
);
7947 if (mode_no
!= SPEC_MODE_INVALID
)
7949 if (ia64_get_insn_spec_ds (insn
) == ds_get_speculation_types (ts
))
7954 *new_pat
= ia64_gen_spec_load (insn
, ts
, mode_no
);
7963 /* Return a function that will generate a check for speculation TS with mode
7965 If simple check is needed, pass true for SIMPLE_CHECK_P.
7966 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7968 get_spec_check_gen_function (ds_t ts
, int mode_no
,
7969 bool simple_check_p
, bool clearing_check_p
)
7971 static gen_func_t gen_ld_c_clr
[] = {
7981 gen_zero_extendqidi2_clr
,
7982 gen_zero_extendhidi2_clr
,
7983 gen_zero_extendsidi2_clr
,
7985 static gen_func_t gen_ld_c_nc
[] = {
7995 gen_zero_extendqidi2_nc
,
7996 gen_zero_extendhidi2_nc
,
7997 gen_zero_extendsidi2_nc
,
7999 static gen_func_t gen_chk_a_clr
[] = {
8000 gen_advanced_load_check_clr_bi
,
8001 gen_advanced_load_check_clr_qi
,
8002 gen_advanced_load_check_clr_hi
,
8003 gen_advanced_load_check_clr_si
,
8004 gen_advanced_load_check_clr_di
,
8005 gen_advanced_load_check_clr_sf
,
8006 gen_advanced_load_check_clr_df
,
8007 gen_advanced_load_check_clr_xf
,
8008 gen_advanced_load_check_clr_ti
,
8009 gen_advanced_load_check_clr_di
,
8010 gen_advanced_load_check_clr_di
,
8011 gen_advanced_load_check_clr_di
,
8013 static gen_func_t gen_chk_a_nc
[] = {
8014 gen_advanced_load_check_nc_bi
,
8015 gen_advanced_load_check_nc_qi
,
8016 gen_advanced_load_check_nc_hi
,
8017 gen_advanced_load_check_nc_si
,
8018 gen_advanced_load_check_nc_di
,
8019 gen_advanced_load_check_nc_sf
,
8020 gen_advanced_load_check_nc_df
,
8021 gen_advanced_load_check_nc_xf
,
8022 gen_advanced_load_check_nc_ti
,
8023 gen_advanced_load_check_nc_di
,
8024 gen_advanced_load_check_nc_di
,
8025 gen_advanced_load_check_nc_di
,
8027 static gen_func_t gen_chk_s
[] = {
8028 gen_speculation_check_bi
,
8029 gen_speculation_check_qi
,
8030 gen_speculation_check_hi
,
8031 gen_speculation_check_si
,
8032 gen_speculation_check_di
,
8033 gen_speculation_check_sf
,
8034 gen_speculation_check_df
,
8035 gen_speculation_check_xf
,
8036 gen_speculation_check_ti
,
8037 gen_speculation_check_di
,
8038 gen_speculation_check_di
,
8039 gen_speculation_check_di
,
8042 gen_func_t
*gen_check
;
8044 if (ts
& BEGIN_DATA
)
8046 /* We don't need recovery because even if this is ld.sa
8047 ALAT entry will be allocated only if NAT bit is set to zero.
8048 So it is enough to use ld.c here. */
8052 gcc_assert (mflag_sched_spec_ldc
);
8054 if (clearing_check_p
)
8055 gen_check
= gen_ld_c_clr
;
8057 gen_check
= gen_ld_c_nc
;
8061 if (clearing_check_p
)
8062 gen_check
= gen_chk_a_clr
;
8064 gen_check
= gen_chk_a_nc
;
8067 else if (ts
& BEGIN_CONTROL
)
8070 /* We might want to use ld.sa -> ld.c instead of
8073 gcc_assert (!ia64_needs_block_p (ts
));
8075 if (clearing_check_p
)
8076 gen_check
= gen_ld_c_clr
;
8078 gen_check
= gen_ld_c_nc
;
8082 gen_check
= gen_chk_s
;
8088 gcc_assert (mode_no
>= 0);
8089 return gen_check
[mode_no
];
8092 /* Return nonzero, if INSN needs branchy recovery check. */
8094 ia64_needs_block_p (ds_t ts
)
8096 if (ts
& BEGIN_DATA
)
8097 return !mflag_sched_spec_ldc
;
8099 gcc_assert ((ts
& BEGIN_CONTROL
) != 0);
8101 return !(mflag_sched_spec_control_ldc
&& mflag_sched_spec_ldc
);
8104 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8105 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8106 Otherwise, generate a simple check. */
8108 ia64_gen_spec_check (rtx insn
, rtx label
, ds_t ds
)
8110 rtx op1
, pat
, check_pat
;
8111 gen_func_t gen_check
;
8114 mode_no
= get_mode_no_for_insn (insn
);
8115 gcc_assert (mode_no
>= 0);
8121 gcc_assert (!ia64_needs_block_p (ds
));
8122 op1
= copy_rtx (recog_data
.operand
[1]);
8125 gen_check
= get_spec_check_gen_function (ds
, mode_no
, label
== NULL_RTX
,
8128 check_pat
= gen_check (copy_rtx (recog_data
.operand
[0]), op1
);
8130 pat
= PATTERN (insn
);
8131 if (GET_CODE (pat
) == COND_EXEC
)
8132 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8138 /* Return nonzero, if X is branchy recovery check. */
8140 ia64_spec_check_p (rtx x
)
8143 if (GET_CODE (x
) == COND_EXEC
)
8144 x
= COND_EXEC_CODE (x
);
8145 if (GET_CODE (x
) == SET
)
8146 return ia64_spec_check_src_p (SET_SRC (x
));
8150 /* Return nonzero, if SRC belongs to recovery check. */
8152 ia64_spec_check_src_p (rtx src
)
8154 if (GET_CODE (src
) == IF_THEN_ELSE
)
8159 if (GET_CODE (t
) == NE
)
8163 if (GET_CODE (t
) == UNSPEC
)
8169 if (code
== UNSPEC_LDCCLR
8170 || code
== UNSPEC_LDCNC
8171 || code
== UNSPEC_CHKACLR
8172 || code
== UNSPEC_CHKANC
8173 || code
== UNSPEC_CHKS
)
8175 gcc_assert (code
!= 0);
8185 /* The following page contains abstract data `bundle states' which are
8186 used for bundling insns (inserting nops and template generation). */
8188 /* The following describes state of insn bundling. */
8192 /* Unique bundle state number to identify them in the debugging
8195 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
8196 /* number nops before and after the insn */
8197 short before_nops_num
, after_nops_num
;
8198 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
8200 int cost
; /* cost of the state in cycles */
8201 int accumulated_insns_num
; /* number of all previous insns including
8202 nops. L is considered as 2 insns */
8203 int branch_deviation
; /* deviation of previous branches from 3rd slots */
8204 int middle_bundle_stops
; /* number of stop bits in the middle of bundles */
8205 struct bundle_state
*next
; /* next state with the same insn_num */
8206 struct bundle_state
*originator
; /* originator (previous insn state) */
8207 /* All bundle states are in the following chain. */
8208 struct bundle_state
*allocated_states_chain
;
8209 /* The DFA State after issuing the insn and the nops. */
8213 /* The following is map insn number to the corresponding bundle state. */
8215 static struct bundle_state
**index_to_bundle_states
;
8217 /* The unique number of next bundle state. */
8219 static int bundle_states_num
;
8221 /* All allocated bundle states are in the following chain. */
8223 static struct bundle_state
*allocated_bundle_states_chain
;
8225 /* All allocated but not used bundle states are in the following
8228 static struct bundle_state
*free_bundle_state_chain
;
8231 /* The following function returns a free bundle state. */
8233 static struct bundle_state
*
8234 get_free_bundle_state (void)
8236 struct bundle_state
*result
;
8238 if (free_bundle_state_chain
!= NULL
)
8240 result
= free_bundle_state_chain
;
8241 free_bundle_state_chain
= result
->next
;
8245 result
= XNEW (struct bundle_state
);
8246 result
->dfa_state
= xmalloc (dfa_state_size
);
8247 result
->allocated_states_chain
= allocated_bundle_states_chain
;
8248 allocated_bundle_states_chain
= result
;
8250 result
->unique_num
= bundle_states_num
++;
8255 /* The following function frees given bundle state. */
8258 free_bundle_state (struct bundle_state
*state
)
8260 state
->next
= free_bundle_state_chain
;
8261 free_bundle_state_chain
= state
;
8264 /* Start work with abstract data `bundle states'. */
8267 initiate_bundle_states (void)
8269 bundle_states_num
= 0;
8270 free_bundle_state_chain
= NULL
;
8271 allocated_bundle_states_chain
= NULL
;
8274 /* Finish work with abstract data `bundle states'. */
8277 finish_bundle_states (void)
8279 struct bundle_state
*curr_state
, *next_state
;
8281 for (curr_state
= allocated_bundle_states_chain
;
8283 curr_state
= next_state
)
8285 next_state
= curr_state
->allocated_states_chain
;
8286 free (curr_state
->dfa_state
);
8291 /* Hash table of the bundle states. The key is dfa_state and insn_num
8292 of the bundle states. */
8294 static htab_t bundle_state_table
;
8296 /* The function returns hash of BUNDLE_STATE. */
8299 bundle_state_hash (const void *bundle_state
)
8301 const struct bundle_state
*const state
8302 = (const struct bundle_state
*) bundle_state
;
8305 for (result
= i
= 0; i
< dfa_state_size
; i
++)
8306 result
+= (((unsigned char *) state
->dfa_state
) [i
]
8307 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
8308 return result
+ state
->insn_num
;
8311 /* The function returns nonzero if the bundle state keys are equal. */
8314 bundle_state_eq_p (const void *bundle_state_1
, const void *bundle_state_2
)
8316 const struct bundle_state
*const state1
8317 = (const struct bundle_state
*) bundle_state_1
;
8318 const struct bundle_state
*const state2
8319 = (const struct bundle_state
*) bundle_state_2
;
8321 return (state1
->insn_num
== state2
->insn_num
8322 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
8323 dfa_state_size
) == 0);
8326 /* The function inserts the BUNDLE_STATE into the hash table. The
8327 function returns nonzero if the bundle has been inserted into the
8328 table. The table contains the best bundle state with given key. */
8331 insert_bundle_state (struct bundle_state
*bundle_state
)
8335 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, INSERT
);
8336 if (*entry_ptr
== NULL
)
8338 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
8339 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
8340 *entry_ptr
= (void *) bundle_state
;
8343 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
8344 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
8345 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
8346 > bundle_state
->accumulated_insns_num
8347 || (((struct bundle_state
*)
8348 *entry_ptr
)->accumulated_insns_num
8349 == bundle_state
->accumulated_insns_num
8350 && (((struct bundle_state
*)
8351 *entry_ptr
)->branch_deviation
8352 > bundle_state
->branch_deviation
8353 || (((struct bundle_state
*)
8354 *entry_ptr
)->branch_deviation
8355 == bundle_state
->branch_deviation
8356 && ((struct bundle_state
*)
8357 *entry_ptr
)->middle_bundle_stops
8358 > bundle_state
->middle_bundle_stops
))))))
8361 struct bundle_state temp
;
8363 temp
= *(struct bundle_state
*) *entry_ptr
;
8364 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
8365 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
8366 *bundle_state
= temp
;
8371 /* Start work with the hash table. */
8374 initiate_bundle_state_table (void)
8376 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
8380 /* Finish work with the hash table. */
8383 finish_bundle_state_table (void)
8385 htab_delete (bundle_state_table
);
8390 /* The following variable is a insn `nop' used to check bundle states
8391 with different number of inserted nops. */
8393 static rtx ia64_nop
;
8395 /* The following function tries to issue NOPS_NUM nops for the current
8396 state without advancing processor cycle. If it failed, the
8397 function returns FALSE and frees the current state. */
8400 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
8404 for (i
= 0; i
< nops_num
; i
++)
8405 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
8407 free_bundle_state (curr_state
);
8413 /* The following function tries to issue INSN for the current
8414 state without advancing processor cycle. If it failed, the
8415 function returns FALSE and frees the current state. */
8418 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
8420 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
8422 free_bundle_state (curr_state
);
8428 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8429 starting with ORIGINATOR without advancing processor cycle. If
8430 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8431 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8432 If it was successful, the function creates new bundle state and
8433 insert into the hash table and into `index_to_bundle_states'. */
8436 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
8437 rtx insn
, int try_bundle_end_p
, int only_bundle_end_p
)
8439 struct bundle_state
*curr_state
;
8441 curr_state
= get_free_bundle_state ();
8442 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
8443 curr_state
->insn
= insn
;
8444 curr_state
->insn_num
= originator
->insn_num
+ 1;
8445 curr_state
->cost
= originator
->cost
;
8446 curr_state
->originator
= originator
;
8447 curr_state
->before_nops_num
= before_nops_num
;
8448 curr_state
->after_nops_num
= 0;
8449 curr_state
->accumulated_insns_num
8450 = originator
->accumulated_insns_num
+ before_nops_num
;
8451 curr_state
->branch_deviation
= originator
->branch_deviation
;
8452 curr_state
->middle_bundle_stops
= originator
->middle_bundle_stops
;
8454 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
8456 gcc_assert (GET_MODE (insn
) != TImode
);
8457 if (!try_issue_nops (curr_state
, before_nops_num
))
8459 if (!try_issue_insn (curr_state
, insn
))
8461 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
8462 if (curr_state
->accumulated_insns_num
% 3 != 0)
8463 curr_state
->middle_bundle_stops
++;
8464 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
8465 && curr_state
->accumulated_insns_num
% 3 != 0)
8467 free_bundle_state (curr_state
);
8471 else if (GET_MODE (insn
) != TImode
)
8473 if (!try_issue_nops (curr_state
, before_nops_num
))
8475 if (!try_issue_insn (curr_state
, insn
))
8477 curr_state
->accumulated_insns_num
++;
8478 gcc_assert (GET_CODE (PATTERN (insn
)) != ASM_INPUT
8479 && asm_noperands (PATTERN (insn
)) < 0);
8481 if (ia64_safe_type (insn
) == TYPE_L
)
8482 curr_state
->accumulated_insns_num
++;
8486 /* If this is an insn that must be first in a group, then don't allow
8487 nops to be emitted before it. Currently, alloc is the only such
8488 supported instruction. */
8489 /* ??? The bundling automatons should handle this for us, but they do
8490 not yet have support for the first_insn attribute. */
8491 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
8493 free_bundle_state (curr_state
);
8497 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
8498 state_transition (curr_state
->dfa_state
, NULL
);
8500 if (!try_issue_nops (curr_state
, before_nops_num
))
8502 if (!try_issue_insn (curr_state
, insn
))
8504 curr_state
->accumulated_insns_num
++;
8505 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
8506 || asm_noperands (PATTERN (insn
)) >= 0)
8508 /* Finish bundle containing asm insn. */
8509 curr_state
->after_nops_num
8510 = 3 - curr_state
->accumulated_insns_num
% 3;
8511 curr_state
->accumulated_insns_num
8512 += 3 - curr_state
->accumulated_insns_num
% 3;
8514 else if (ia64_safe_type (insn
) == TYPE_L
)
8515 curr_state
->accumulated_insns_num
++;
8517 if (ia64_safe_type (insn
) == TYPE_B
)
8518 curr_state
->branch_deviation
8519 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
8520 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
8522 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
8525 struct bundle_state
*curr_state1
;
8526 struct bundle_state
*allocated_states_chain
;
8528 curr_state1
= get_free_bundle_state ();
8529 dfa_state
= curr_state1
->dfa_state
;
8530 allocated_states_chain
= curr_state1
->allocated_states_chain
;
8531 *curr_state1
= *curr_state
;
8532 curr_state1
->dfa_state
= dfa_state
;
8533 curr_state1
->allocated_states_chain
= allocated_states_chain
;
8534 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
8536 curr_state
= curr_state1
;
8538 if (!try_issue_nops (curr_state
,
8539 3 - curr_state
->accumulated_insns_num
% 3))
8541 curr_state
->after_nops_num
8542 = 3 - curr_state
->accumulated_insns_num
% 3;
8543 curr_state
->accumulated_insns_num
8544 += 3 - curr_state
->accumulated_insns_num
% 3;
8546 if (!insert_bundle_state (curr_state
))
8547 free_bundle_state (curr_state
);
8551 /* The following function returns position in the two window bundle
8555 get_max_pos (state_t state
)
8557 if (cpu_unit_reservation_p (state
, pos_6
))
8559 else if (cpu_unit_reservation_p (state
, pos_5
))
8561 else if (cpu_unit_reservation_p (state
, pos_4
))
8563 else if (cpu_unit_reservation_p (state
, pos_3
))
8565 else if (cpu_unit_reservation_p (state
, pos_2
))
8567 else if (cpu_unit_reservation_p (state
, pos_1
))
8573 /* The function returns code of a possible template for given position
8574 and state. The function should be called only with 2 values of
8575 position equal to 3 or 6. We avoid generating F NOPs by putting
8576 templates containing F insns at the end of the template search
8577 because undocumented anomaly in McKinley derived cores which can
8578 cause stalls if an F-unit insn (including a NOP) is issued within a
8579 six-cycle window after reading certain application registers (such
8580 as ar.bsp). Furthermore, power-considerations also argue against
8581 the use of F-unit instructions unless they're really needed. */
8584 get_template (state_t state
, int pos
)
8589 if (cpu_unit_reservation_p (state
, _0mmi_
))
8591 else if (cpu_unit_reservation_p (state
, _0mii_
))
8593 else if (cpu_unit_reservation_p (state
, _0mmb_
))
8595 else if (cpu_unit_reservation_p (state
, _0mib_
))
8597 else if (cpu_unit_reservation_p (state
, _0mbb_
))
8599 else if (cpu_unit_reservation_p (state
, _0bbb_
))
8601 else if (cpu_unit_reservation_p (state
, _0mmf_
))
8603 else if (cpu_unit_reservation_p (state
, _0mfi_
))
8605 else if (cpu_unit_reservation_p (state
, _0mfb_
))
8607 else if (cpu_unit_reservation_p (state
, _0mlx_
))
8612 if (cpu_unit_reservation_p (state
, _1mmi_
))
8614 else if (cpu_unit_reservation_p (state
, _1mii_
))
8616 else if (cpu_unit_reservation_p (state
, _1mmb_
))
8618 else if (cpu_unit_reservation_p (state
, _1mib_
))
8620 else if (cpu_unit_reservation_p (state
, _1mbb_
))
8622 else if (cpu_unit_reservation_p (state
, _1bbb_
))
8624 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
8626 else if (cpu_unit_reservation_p (state
, _1mfi_
))
8628 else if (cpu_unit_reservation_p (state
, _1mfb_
))
8630 else if (cpu_unit_reservation_p (state
, _1mlx_
))
8639 /* True when INSN is important for bundling. */
8641 important_for_bundling_p (rtx insn
)
8643 return (INSN_P (insn
)
8644 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
8645 && GET_CODE (PATTERN (insn
)) != USE
8646 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8649 /* The following function returns an insn important for insn bundling
8650 followed by INSN and before TAIL. */
8653 get_next_important_insn (rtx insn
, rtx tail
)
8655 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
8656 if (important_for_bundling_p (insn
))
8661 /* Add a bundle selector TEMPLATE0 before INSN. */
8664 ia64_add_bundle_selector_before (int template0
, rtx insn
)
8666 rtx b
= gen_bundle_selector (GEN_INT (template0
));
8668 ia64_emit_insn_before (b
, insn
);
8669 #if NR_BUNDLES == 10
8670 if ((template0
== 4 || template0
== 5)
8671 && ia64_except_unwind_info (&global_options
) == UI_TARGET
)
8674 rtx note
= NULL_RTX
;
8676 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8677 first or second slot. If it is and has REG_EH_NOTE set, copy it
8678 to following nops, as br.call sets rp to the address of following
8679 bundle and therefore an EH region end must be on a bundle
8681 insn
= PREV_INSN (insn
);
8682 for (i
= 0; i
< 3; i
++)
8685 insn
= next_active_insn (insn
);
8686 while (GET_CODE (insn
) == INSN
8687 && get_attr_empty (insn
) == EMPTY_YES
);
8688 if (GET_CODE (insn
) == CALL_INSN
)
8689 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
8694 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
8695 || code
== CODE_FOR_nop_b
);
8696 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
8699 add_reg_note (insn
, REG_EH_REGION
, XEXP (note
, 0));
8706 /* The following function does insn bundling. Bundling means
8707 inserting templates and nop insns to fit insn groups into permitted
8708 templates. Instruction scheduling uses NDFA (non-deterministic
8709 finite automata) encoding informations about the templates and the
8710 inserted nops. Nondeterminism of the automata permits follows
8711 all possible insn sequences very fast.
8713 Unfortunately it is not possible to get information about inserting
8714 nop insns and used templates from the automata states. The
8715 automata only says that we can issue an insn possibly inserting
8716 some nops before it and using some template. Therefore insn
8717 bundling in this function is implemented by using DFA
8718 (deterministic finite automata). We follow all possible insn
8719 sequences by inserting 0-2 nops (that is what the NDFA describe for
8720 insn scheduling) before/after each insn being bundled. We know the
8721 start of simulated processor cycle from insn scheduling (insn
8722 starting a new cycle has TImode).
8724 Simple implementation of insn bundling would create enormous
8725 number of possible insn sequences satisfying information about new
8726 cycle ticks taken from the insn scheduling. To make the algorithm
8727 practical we use dynamic programming. Each decision (about
8728 inserting nops and implicitly about previous decisions) is described
8729 by structure bundle_state (see above). If we generate the same
8730 bundle state (key is automaton state after issuing the insns and
8731 nops for it), we reuse already generated one. As consequence we
8732 reject some decisions which cannot improve the solution and
8733 reduce memory for the algorithm.
8735 When we reach the end of EBB (extended basic block), we choose the
8736 best sequence and then, moving back in EBB, insert templates for
8737 the best alternative. The templates are taken from querying
8738 automaton state for each insn in chosen bundle states.
8740 So the algorithm makes two (forward and backward) passes through
8744 bundling (FILE *dump
, int verbose
, rtx prev_head_insn
, rtx tail
)
8746 struct bundle_state
*curr_state
, *next_state
, *best_state
;
8747 rtx insn
, next_insn
;
8749 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
8750 int pos
= 0, max_pos
, template0
, template1
;
8753 enum attr_type type
;
8756 /* Count insns in the EBB. */
8757 for (insn
= NEXT_INSN (prev_head_insn
);
8758 insn
&& insn
!= tail
;
8759 insn
= NEXT_INSN (insn
))
8765 dfa_clean_insn_cache ();
8766 initiate_bundle_state_table ();
8767 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
8768 /* First (forward) pass -- generation of bundle states. */
8769 curr_state
= get_free_bundle_state ();
8770 curr_state
->insn
= NULL
;
8771 curr_state
->before_nops_num
= 0;
8772 curr_state
->after_nops_num
= 0;
8773 curr_state
->insn_num
= 0;
8774 curr_state
->cost
= 0;
8775 curr_state
->accumulated_insns_num
= 0;
8776 curr_state
->branch_deviation
= 0;
8777 curr_state
->middle_bundle_stops
= 0;
8778 curr_state
->next
= NULL
;
8779 curr_state
->originator
= NULL
;
8780 state_reset (curr_state
->dfa_state
);
8781 index_to_bundle_states
[0] = curr_state
;
8783 /* Shift cycle mark if it is put on insn which could be ignored. */
8784 for (insn
= NEXT_INSN (prev_head_insn
);
8786 insn
= NEXT_INSN (insn
))
8788 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
8789 || GET_CODE (PATTERN (insn
)) == USE
8790 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
8791 && GET_MODE (insn
) == TImode
)
8793 PUT_MODE (insn
, VOIDmode
);
8794 for (next_insn
= NEXT_INSN (insn
);
8796 next_insn
= NEXT_INSN (next_insn
))
8797 if (INSN_P (next_insn
)
8798 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
8799 && GET_CODE (PATTERN (next_insn
)) != USE
8800 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
8801 && INSN_CODE (next_insn
) != CODE_FOR_insn_group_barrier
)
8803 PUT_MODE (next_insn
, TImode
);
8807 /* Forward pass: generation of bundle states. */
8808 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
8812 gcc_assert (INSN_P (insn
)
8813 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
8814 && GET_CODE (PATTERN (insn
)) != USE
8815 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8816 type
= ia64_safe_type (insn
);
8817 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
8819 index_to_bundle_states
[insn_num
] = NULL
;
8820 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
8822 curr_state
= next_state
)
8824 pos
= curr_state
->accumulated_insns_num
% 3;
8825 next_state
= curr_state
->next
;
8826 /* We must fill up the current bundle in order to start a
8827 subsequent asm insn in a new bundle. Asm insn is always
8828 placed in a separate bundle. */
8830 = (next_insn
!= NULL_RTX
8831 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
8832 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
8833 /* We may fill up the current bundle if it is the cycle end
8834 without a group barrier. */
8836 = (only_bundle_end_p
|| next_insn
== NULL_RTX
8837 || (GET_MODE (next_insn
) == TImode
8838 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
8839 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
8841 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
8843 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
8845 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
8848 gcc_assert (index_to_bundle_states
[insn_num
]);
8849 for (curr_state
= index_to_bundle_states
[insn_num
];
8851 curr_state
= curr_state
->next
)
8852 if (verbose
>= 2 && dump
)
8854 /* This structure is taken from generated code of the
8855 pipeline hazard recognizer (see file insn-attrtab.c).
8856 Please don't forget to change the structure if a new
8857 automaton is added to .md file. */
8860 unsigned short one_automaton_state
;
8861 unsigned short oneb_automaton_state
;
8862 unsigned short two_automaton_state
;
8863 unsigned short twob_automaton_state
;
8868 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8869 curr_state
->unique_num
,
8870 (curr_state
->originator
== NULL
8871 ? -1 : curr_state
->originator
->unique_num
),
8873 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
8874 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
8875 curr_state
->middle_bundle_stops
,
8876 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
8881 /* We should find a solution because the 2nd insn scheduling has
8883 gcc_assert (index_to_bundle_states
[insn_num
]);
8884 /* Find a state corresponding to the best insn sequence. */
8886 for (curr_state
= index_to_bundle_states
[insn_num
];
8888 curr_state
= curr_state
->next
)
8889 /* We are just looking at the states with fully filled up last
8890 bundle. The first we prefer insn sequences with minimal cost
8891 then with minimal inserted nops and finally with branch insns
8892 placed in the 3rd slots. */
8893 if (curr_state
->accumulated_insns_num
% 3 == 0
8894 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
8895 || (best_state
->cost
== curr_state
->cost
8896 && (curr_state
->accumulated_insns_num
8897 < best_state
->accumulated_insns_num
8898 || (curr_state
->accumulated_insns_num
8899 == best_state
->accumulated_insns_num
8900 && (curr_state
->branch_deviation
8901 < best_state
->branch_deviation
8902 || (curr_state
->branch_deviation
8903 == best_state
->branch_deviation
8904 && curr_state
->middle_bundle_stops
8905 < best_state
->middle_bundle_stops
)))))))
8906 best_state
= curr_state
;
8907 /* Second (backward) pass: adding nops and templates. */
8908 gcc_assert (best_state
);
8909 insn_num
= best_state
->before_nops_num
;
8910 template0
= template1
= -1;
8911 for (curr_state
= best_state
;
8912 curr_state
->originator
!= NULL
;
8913 curr_state
= curr_state
->originator
)
8915 insn
= curr_state
->insn
;
8916 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
8917 || asm_noperands (PATTERN (insn
)) >= 0);
8919 if (verbose
>= 2 && dump
)
8923 unsigned short one_automaton_state
;
8924 unsigned short oneb_automaton_state
;
8925 unsigned short two_automaton_state
;
8926 unsigned short twob_automaton_state
;
8931 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8932 curr_state
->unique_num
,
8933 (curr_state
->originator
== NULL
8934 ? -1 : curr_state
->originator
->unique_num
),
8936 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
8937 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
8938 curr_state
->middle_bundle_stops
,
8939 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
8942 /* Find the position in the current bundle window. The window can
8943 contain at most two bundles. Two bundle window means that
8944 the processor will make two bundle rotation. */
8945 max_pos
= get_max_pos (curr_state
->dfa_state
);
8947 /* The following (negative template number) means that the
8948 processor did one bundle rotation. */
8949 || (max_pos
== 3 && template0
< 0))
8951 /* We are at the end of the window -- find template(s) for
8955 template0
= get_template (curr_state
->dfa_state
, 3);
8958 template1
= get_template (curr_state
->dfa_state
, 3);
8959 template0
= get_template (curr_state
->dfa_state
, 6);
8962 if (max_pos
> 3 && template1
< 0)
8963 /* It may happen when we have the stop inside a bundle. */
8965 gcc_assert (pos
<= 3);
8966 template1
= get_template (curr_state
->dfa_state
, 3);
8970 /* Emit nops after the current insn. */
8971 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
8974 emit_insn_after (nop
, insn
);
8976 gcc_assert (pos
>= 0);
8979 /* We are at the start of a bundle: emit the template
8980 (it should be defined). */
8981 gcc_assert (template0
>= 0);
8982 ia64_add_bundle_selector_before (template0
, nop
);
8983 /* If we have two bundle window, we make one bundle
8984 rotation. Otherwise template0 will be undefined
8985 (negative value). */
8986 template0
= template1
;
8990 /* Move the position backward in the window. Group barrier has
8991 no slot. Asm insn takes all bundle. */
8992 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
8993 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
8994 && asm_noperands (PATTERN (insn
)) < 0)
8996 /* Long insn takes 2 slots. */
8997 if (ia64_safe_type (insn
) == TYPE_L
)
8999 gcc_assert (pos
>= 0);
9001 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9002 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
9003 && asm_noperands (PATTERN (insn
)) < 0)
9005 /* The current insn is at the bundle start: emit the
9007 gcc_assert (template0
>= 0);
9008 ia64_add_bundle_selector_before (template0
, insn
);
9009 b
= PREV_INSN (insn
);
9011 /* See comment above in analogous place for emitting nops
9013 template0
= template1
;
9016 /* Emit nops after the current insn. */
9017 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
9020 ia64_emit_insn_before (nop
, insn
);
9021 nop
= PREV_INSN (insn
);
9024 gcc_assert (pos
>= 0);
9027 /* See comment above in analogous place for emitting nops
9029 gcc_assert (template0
>= 0);
9030 ia64_add_bundle_selector_before (template0
, insn
);
9031 b
= PREV_INSN (insn
);
9033 template0
= template1
;
9039 #ifdef ENABLE_CHECKING
9041 /* Assert right calculation of middle_bundle_stops. */
9042 int num
= best_state
->middle_bundle_stops
;
9043 bool start_bundle
= true, end_bundle
= false;
9045 for (insn
= NEXT_INSN (prev_head_insn
);
9046 insn
&& insn
!= tail
;
9047 insn
= NEXT_INSN (insn
))
9051 if (recog_memoized (insn
) == CODE_FOR_bundle_selector
)
9052 start_bundle
= true;
9057 for (next_insn
= NEXT_INSN (insn
);
9058 next_insn
&& next_insn
!= tail
;
9059 next_insn
= NEXT_INSN (next_insn
))
9060 if (INSN_P (next_insn
)
9061 && (ia64_safe_itanium_class (next_insn
)
9062 != ITANIUM_CLASS_IGNORE
9063 || recog_memoized (next_insn
)
9064 == CODE_FOR_bundle_selector
)
9065 && GET_CODE (PATTERN (next_insn
)) != USE
9066 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
9069 end_bundle
= next_insn
== NULL_RTX
9070 || next_insn
== tail
9071 || (INSN_P (next_insn
)
9072 && recog_memoized (next_insn
)
9073 == CODE_FOR_bundle_selector
);
9074 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
9075 && !start_bundle
&& !end_bundle
9077 && GET_CODE (PATTERN (next_insn
)) != ASM_INPUT
9078 && asm_noperands (PATTERN (next_insn
)) < 0)
9081 start_bundle
= false;
9085 gcc_assert (num
== 0);
9089 free (index_to_bundle_states
);
9090 finish_bundle_state_table ();
9092 dfa_clean_insn_cache ();
9095 /* The following function is called at the end of scheduling BB or
9096 EBB. After reload, it inserts stop bits and does insn bundling. */
9099 ia64_sched_finish (FILE *dump
, int sched_verbose
)
9102 fprintf (dump
, "// Finishing schedule.\n");
9103 if (!reload_completed
)
9105 if (reload_completed
)
9107 final_emit_insn_group_barriers (dump
);
9108 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
9109 current_sched_info
->next_tail
);
9110 if (sched_verbose
&& dump
)
9111 fprintf (dump
, "// finishing %d-%d\n",
9112 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
9113 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
9119 /* The following function inserts stop bits in scheduled BB or EBB. */
9122 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
9125 int need_barrier_p
= 0;
9126 int seen_good_insn
= 0;
9128 init_insn_group_barriers ();
9130 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
9131 insn
!= current_sched_info
->next_tail
;
9132 insn
= NEXT_INSN (insn
))
9134 if (GET_CODE (insn
) == BARRIER
)
9136 rtx last
= prev_active_insn (insn
);
9140 if (GET_CODE (last
) == JUMP_INSN
9141 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
9142 last
= prev_active_insn (last
);
9143 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9144 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
9146 init_insn_group_barriers ();
9150 else if (NONDEBUG_INSN_P (insn
))
9152 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
9154 init_insn_group_barriers ();
9158 else if (need_barrier_p
|| group_barrier_needed (insn
)
9159 || (mflag_sched_stop_bits_after_every_cycle
9160 && GET_MODE (insn
) == TImode
9163 if (TARGET_EARLY_STOP_BITS
)
9168 last
!= current_sched_info
->prev_head
;
9169 last
= PREV_INSN (last
))
9170 if (INSN_P (last
) && GET_MODE (last
) == TImode
9171 && stops_p
[INSN_UID (last
)])
9173 if (last
== current_sched_info
->prev_head
)
9175 last
= prev_active_insn (last
);
9177 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9178 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9180 init_insn_group_barriers ();
9181 for (last
= NEXT_INSN (last
);
9183 last
= NEXT_INSN (last
))
9186 group_barrier_needed (last
);
9187 if (recog_memoized (last
) >= 0
9188 && important_for_bundling_p (last
))
9194 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9196 init_insn_group_barriers ();
9199 group_barrier_needed (insn
);
9200 if (recog_memoized (insn
) >= 0
9201 && important_for_bundling_p (insn
))
9204 else if (recog_memoized (insn
) >= 0
9205 && important_for_bundling_p (insn
))
9207 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
9208 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
9209 || asm_noperands (PATTERN (insn
)) >= 0);
9216 /* If the following function returns TRUE, we will use the DFA
9220 ia64_first_cycle_multipass_dfa_lookahead (void)
9222 return (reload_completed
? 6 : 4);
9225 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9228 ia64_init_dfa_pre_cycle_insn (void)
9230 if (temp_dfa_state
== NULL
)
9232 dfa_state_size
= state_size ();
9233 temp_dfa_state
= xmalloc (dfa_state_size
);
9234 prev_cycle_state
= xmalloc (dfa_state_size
);
9236 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
9237 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
9238 recog_memoized (dfa_pre_cycle_insn
);
9239 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9240 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
9241 recog_memoized (dfa_stop_insn
);
9244 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9245 used by the DFA insn scheduler. */
9248 ia64_dfa_pre_cycle_insn (void)
9250 return dfa_pre_cycle_insn
;
9253 /* The following function returns TRUE if PRODUCER (of type ilog or
9254 ld) produces address for CONSUMER (of type st or stf). */
9257 ia64_st_address_bypass_p (rtx producer
, rtx consumer
)
9261 gcc_assert (producer
&& consumer
);
9262 dest
= ia64_single_set (producer
);
9264 reg
= SET_DEST (dest
);
9266 if (GET_CODE (reg
) == SUBREG
)
9267 reg
= SUBREG_REG (reg
);
9268 gcc_assert (GET_CODE (reg
) == REG
);
9270 dest
= ia64_single_set (consumer
);
9272 mem
= SET_DEST (dest
);
9273 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
9274 return reg_mentioned_p (reg
, mem
);
9277 /* The following function returns TRUE if PRODUCER (of type ilog or
9278 ld) produces address for CONSUMER (of type ld or fld). */
9281 ia64_ld_address_bypass_p (rtx producer
, rtx consumer
)
9283 rtx dest
, src
, reg
, mem
;
9285 gcc_assert (producer
&& consumer
);
9286 dest
= ia64_single_set (producer
);
9288 reg
= SET_DEST (dest
);
9290 if (GET_CODE (reg
) == SUBREG
)
9291 reg
= SUBREG_REG (reg
);
9292 gcc_assert (GET_CODE (reg
) == REG
);
9294 src
= ia64_single_set (consumer
);
9296 mem
= SET_SRC (src
);
9299 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
9300 mem
= XVECEXP (mem
, 0, 0);
9301 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
9302 /* ??? Is this bypass necessary for ld.c? */
9304 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
9305 mem
= XEXP (mem
, 1);
9308 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
9309 mem
= XEXP (mem
, 0);
9311 if (GET_CODE (mem
) == UNSPEC
)
9313 int c
= XINT (mem
, 1);
9315 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDS_A
9316 || c
== UNSPEC_LDSA
);
9317 mem
= XVECEXP (mem
, 0, 0);
9320 /* Note that LO_SUM is used for GOT loads. */
9321 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
9323 return reg_mentioned_p (reg
, mem
);
9326 /* The following function returns TRUE if INSN produces address for a
9327 load/store insn. We will place such insns into M slot because it
9328 decreases its latency time. */
9331 ia64_produce_address_p (rtx insn
)
9337 /* Emit pseudo-ops for the assembler to describe predicate relations.
9338 At present this assumes that we only consider predicate pairs to
9339 be mutex, and that the assembler can deduce proper values from
9340 straight-line code. */
9343 emit_predicate_relation_info (void)
9347 FOR_EACH_BB_REVERSE (bb
)
9350 rtx head
= BB_HEAD (bb
);
9352 /* We only need such notes at code labels. */
9353 if (GET_CODE (head
) != CODE_LABEL
)
9355 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
9356 head
= NEXT_INSN (head
);
9358 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9359 grabbing the entire block of predicate registers. */
9360 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
9361 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
9363 rtx p
= gen_rtx_REG (BImode
, r
);
9364 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
9365 if (head
== BB_END (bb
))
9371 /* Look for conditional calls that do not return, and protect predicate
9372 relations around them. Otherwise the assembler will assume the call
9373 returns, and complain about uses of call-clobbered predicates after
9375 FOR_EACH_BB_REVERSE (bb
)
9377 rtx insn
= BB_HEAD (bb
);
9381 if (GET_CODE (insn
) == CALL_INSN
9382 && GET_CODE (PATTERN (insn
)) == COND_EXEC
9383 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
9385 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
9386 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
9387 if (BB_HEAD (bb
) == insn
)
9389 if (BB_END (bb
) == insn
)
9393 if (insn
== BB_END (bb
))
9395 insn
= NEXT_INSN (insn
);
9400 /* Perform machine dependent operations on the rtl chain INSNS. */
9405 /* We are freeing block_for_insn in the toplev to keep compatibility
9406 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9407 compute_bb_for_insn ();
9409 /* If optimizing, we'll have split before scheduling. */
9413 if (optimize
&& ia64_flag_schedule_insns2
9414 && dbg_cnt (ia64_sched2
))
9416 timevar_push (TV_SCHED2
);
9417 ia64_final_schedule
= 1;
9419 initiate_bundle_states ();
9420 ia64_nop
= make_insn_raw (gen_nop ());
9421 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
9422 recog_memoized (ia64_nop
);
9423 clocks_length
= get_max_uid () + 1;
9424 stops_p
= XCNEWVEC (char, clocks_length
);
9426 if (ia64_tune
== PROCESSOR_ITANIUM2
)
9428 pos_1
= get_cpu_unit_code ("2_1");
9429 pos_2
= get_cpu_unit_code ("2_2");
9430 pos_3
= get_cpu_unit_code ("2_3");
9431 pos_4
= get_cpu_unit_code ("2_4");
9432 pos_5
= get_cpu_unit_code ("2_5");
9433 pos_6
= get_cpu_unit_code ("2_6");
9434 _0mii_
= get_cpu_unit_code ("2b_0mii.");
9435 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
9436 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
9437 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
9438 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
9439 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
9440 _0mib_
= get_cpu_unit_code ("2b_0mib.");
9441 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
9442 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
9443 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
9444 _1mii_
= get_cpu_unit_code ("2b_1mii.");
9445 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
9446 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
9447 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
9448 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
9449 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
9450 _1mib_
= get_cpu_unit_code ("2b_1mib.");
9451 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
9452 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
9453 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
9457 pos_1
= get_cpu_unit_code ("1_1");
9458 pos_2
= get_cpu_unit_code ("1_2");
9459 pos_3
= get_cpu_unit_code ("1_3");
9460 pos_4
= get_cpu_unit_code ("1_4");
9461 pos_5
= get_cpu_unit_code ("1_5");
9462 pos_6
= get_cpu_unit_code ("1_6");
9463 _0mii_
= get_cpu_unit_code ("1b_0mii.");
9464 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
9465 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
9466 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
9467 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
9468 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
9469 _0mib_
= get_cpu_unit_code ("1b_0mib.");
9470 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
9471 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
9472 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
9473 _1mii_
= get_cpu_unit_code ("1b_1mii.");
9474 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
9475 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
9476 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
9477 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
9478 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
9479 _1mib_
= get_cpu_unit_code ("1b_1mib.");
9480 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
9481 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
9482 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
9485 if (flag_selective_scheduling2
9486 && !maybe_skip_selective_scheduling ())
9487 run_selective_scheduling ();
9491 /* Redo alignment computation, as it might gone wrong. */
9492 compute_alignments ();
9494 /* We cannot reuse this one because it has been corrupted by the
9496 finish_bundle_states ();
9499 emit_insn_group_barriers (dump_file
);
9501 ia64_final_schedule
= 0;
9502 timevar_pop (TV_SCHED2
);
9505 emit_all_insn_group_barriers (dump_file
);
9509 /* A call must not be the last instruction in a function, so that the
9510 return address is still within the function, so that unwinding works
9511 properly. Note that IA-64 differs from dwarf2 on this point. */
9512 if (ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9517 insn
= get_last_insn ();
9518 if (! INSN_P (insn
))
9519 insn
= prev_active_insn (insn
);
9522 /* Skip over insns that expand to nothing. */
9523 while (GET_CODE (insn
) == INSN
9524 && get_attr_empty (insn
) == EMPTY_YES
)
9526 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
9527 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
9529 insn
= prev_active_insn (insn
);
9531 if (GET_CODE (insn
) == CALL_INSN
)
9534 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9535 emit_insn (gen_break_f ());
9536 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9541 emit_predicate_relation_info ();
9543 if (ia64_flag_var_tracking
)
9545 timevar_push (TV_VAR_TRACKING
);
9546 variable_tracking_main ();
9547 timevar_pop (TV_VAR_TRACKING
);
9549 df_finish_pass (false);
9552 /* Return true if REGNO is used by the epilogue. */
9555 ia64_epilogue_uses (int regno
)
9560 /* With a call to a function in another module, we will write a new
9561 value to "gp". After returning from such a call, we need to make
9562 sure the function restores the original gp-value, even if the
9563 function itself does not use the gp anymore. */
9564 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
9566 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9567 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9568 /* For functions defined with the syscall_linkage attribute, all
9569 input registers are marked as live at all function exits. This
9570 prevents the register allocator from using the input registers,
9571 which in turn makes it possible to restart a system call after
9572 an interrupt without having to save/restore the input registers.
9573 This also prevents kernel data from leaking to application code. */
9574 return lookup_attribute ("syscall_linkage",
9575 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
9578 /* Conditional return patterns can't represent the use of `b0' as
9579 the return address, so we force the value live this way. */
9583 /* Likewise for ar.pfs, which is used by br.ret. */
9591 /* Return true if REGNO is used by the frame unwinder. */
9594 ia64_eh_uses (int regno
)
9598 if (! reload_completed
)
9604 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
9605 if (regno
== current_frame_info
.r
[r
]
9606 || regno
== emitted_frame_related_regs
[r
])
9612 /* Return true if this goes in small data/bss. */
9614 /* ??? We could also support own long data here. Generating movl/add/ld8
9615 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9616 code faster because there is one less load. This also includes incomplete
9617 types which can't go in sdata/sbss. */
9620 ia64_in_small_data_p (const_tree exp
)
9622 if (TARGET_NO_SDATA
)
9625 /* We want to merge strings, so we never consider them small data. */
9626 if (TREE_CODE (exp
) == STRING_CST
)
9629 /* Functions are never small data. */
9630 if (TREE_CODE (exp
) == FUNCTION_DECL
)
9633 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
9635 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
9637 if (strcmp (section
, ".sdata") == 0
9638 || strncmp (section
, ".sdata.", 7) == 0
9639 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
9640 || strcmp (section
, ".sbss") == 0
9641 || strncmp (section
, ".sbss.", 6) == 0
9642 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
9647 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
9649 /* If this is an incomplete type with size 0, then we can't put it
9650 in sdata because it might be too big when completed. */
9651 if (size
> 0 && size
<= ia64_section_threshold
)
9658 /* Output assembly directives for prologue regions. */
9660 /* The current basic block number. */
9662 static bool last_block
;
9664 /* True if we need a copy_state command at the start of the next block. */
9666 static bool need_copy_state
;
9668 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9669 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9672 /* Emit a debugging label after a call-frame-related insn. We'd
9673 rather output the label right away, but we'd have to output it
9674 after, not before, the instruction, and the instruction has not
9675 been output yet. So we emit the label after the insn, delete it to
9676 avoid introducing basic blocks, and mark it as preserved, such that
9677 it is still output, given that it is referenced in debug info. */
9680 ia64_emit_deleted_label_after_insn (rtx insn
)
9682 char label
[MAX_ARTIFICIAL_LABEL_BYTES
];
9683 rtx lb
= gen_label_rtx ();
9684 rtx label_insn
= emit_label_after (lb
, insn
);
9686 LABEL_PRESERVE_P (lb
) = 1;
9688 delete_insn (label_insn
);
9690 ASM_GENERATE_INTERNAL_LABEL (label
, "L", CODE_LABEL_NUMBER (label_insn
));
9692 return xstrdup (label
);
9695 /* Define the CFA after INSN with the steady-state definition. */
9698 ia64_dwarf2out_def_steady_cfa (rtx insn
, bool frame
)
9700 rtx fp
= frame_pointer_needed
9701 ? hard_frame_pointer_rtx
9702 : stack_pointer_rtx
;
9703 const char *label
= ia64_emit_deleted_label_after_insn (insn
);
9710 ia64_initial_elimination_offset
9711 (REGNO (arg_pointer_rtx
), REGNO (fp
))
9712 + ARG_POINTER_CFA_OFFSET (current_function_decl
));
9715 /* All we need to do here is avoid a crash in the generic dwarf2
9716 processing. The real CFA definition is set up above. */
9719 ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label
),
9720 rtx
ARG_UNUSED (pattern
),
9723 gcc_assert (index
== UNSPECV_ALLOC
);
9726 /* The generic dwarf2 frame debug info generator does not define a
9727 separate region for the very end of the epilogue, so refrain from
9728 doing so in the IA64-specific code as well. */
9730 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9732 /* The function emits unwind directives for the start of an epilogue. */
9735 process_epilogue (FILE *asm_out_file
, rtx insn
, bool unwind
, bool frame
)
9737 /* If this isn't the last block of the function, then we need to label the
9738 current state, and copy it back in at the start of the next block. */
9743 fprintf (asm_out_file
, "\t.label_state %d\n",
9744 ++cfun
->machine
->state_num
);
9745 need_copy_state
= true;
9749 fprintf (asm_out_file
, "\t.restore sp\n");
9750 if (IA64_CHANGE_CFA_IN_EPILOGUE
&& frame
)
9751 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn
),
9752 STACK_POINTER_REGNUM
, INCOMING_FRAME_SP_OFFSET
);
9755 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9758 process_cfa_adjust_cfa (FILE *asm_out_file
, rtx pat
, rtx insn
,
9759 bool unwind
, bool frame
)
9761 rtx dest
= SET_DEST (pat
);
9762 rtx src
= SET_SRC (pat
);
9764 if (dest
== stack_pointer_rtx
)
9766 if (GET_CODE (src
) == PLUS
)
9768 rtx op0
= XEXP (src
, 0);
9769 rtx op1
= XEXP (src
, 1);
9771 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
9773 if (INTVAL (op1
) < 0)
9775 gcc_assert (!frame_pointer_needed
);
9777 fprintf (asm_out_file
,
9778 "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
9780 ia64_dwarf2out_def_steady_cfa (insn
, frame
);
9783 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
9787 gcc_assert (src
== hard_frame_pointer_rtx
);
9788 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
9791 else if (dest
== hard_frame_pointer_rtx
)
9793 gcc_assert (src
== stack_pointer_rtx
);
9794 gcc_assert (frame_pointer_needed
);
9797 fprintf (asm_out_file
, "\t.vframe r%d\n",
9798 ia64_dbx_register_number (REGNO (dest
)));
9799 ia64_dwarf2out_def_steady_cfa (insn
, frame
);
9805 /* This function processes a SET pattern for REG_CFA_REGISTER. */
9808 process_cfa_register (FILE *asm_out_file
, rtx pat
, bool unwind
)
9810 rtx dest
= SET_DEST (pat
);
9811 rtx src
= SET_SRC (pat
);
9813 int dest_regno
= REGNO (dest
);
9814 int src_regno
= REGNO (src
);
9819 /* Saving return address pointer. */
9820 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_b0
]);
9822 fprintf (asm_out_file
, "\t.save rp, r%d\n",
9823 ia64_dbx_register_number (dest_regno
));
9827 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
9829 fprintf (asm_out_file
, "\t.save pr, r%d\n",
9830 ia64_dbx_register_number (dest_regno
));
9833 case AR_UNAT_REGNUM
:
9834 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
9836 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
9837 ia64_dbx_register_number (dest_regno
));
9841 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
9843 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
9844 ia64_dbx_register_number (dest_regno
));
9848 /* Everything else should indicate being stored to memory. */
9853 /* This function processes a SET pattern for REG_CFA_OFFSET. */
9856 process_cfa_offset (FILE *asm_out_file
, rtx pat
, bool unwind
)
9858 rtx dest
= SET_DEST (pat
);
9859 rtx src
= SET_SRC (pat
);
9860 int src_regno
= REGNO (src
);
9865 gcc_assert (MEM_P (dest
));
9866 if (GET_CODE (XEXP (dest
, 0)) == REG
)
9868 base
= XEXP (dest
, 0);
9873 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
9874 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
9875 base
= XEXP (XEXP (dest
, 0), 0);
9876 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
9879 if (base
== hard_frame_pointer_rtx
)
9881 saveop
= ".savepsp";
9886 gcc_assert (base
== stack_pointer_rtx
);
9890 src_regno
= REGNO (src
);
9894 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
9896 fprintf (asm_out_file
, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC
"\n",
9901 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
9903 fprintf (asm_out_file
, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC
"\n",
9908 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
9910 fprintf (asm_out_file
, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC
"\n",
9915 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
9917 fprintf (asm_out_file
, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC
"\n",
9921 case AR_UNAT_REGNUM
:
9922 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
9924 fprintf (asm_out_file
, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC
"\n",
9933 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
9934 1 << (src_regno
- GR_REG (4)));
9943 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
9944 1 << (src_regno
- BR_REG (1)));
9952 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
9953 1 << (src_regno
- FR_REG (2)));
9956 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9957 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9958 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9959 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9961 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
9962 1 << (src_regno
- FR_REG (12)));
9966 /* ??? For some reason we mark other general registers, even those
9967 we can't represent in the unwind info. Ignore them. */
9972 /* This function looks at a single insn and emits any directives
9973 required to unwind this insn. */
9976 ia64_asm_unwind_emit (FILE *asm_out_file
, rtx insn
)
9978 bool unwind
= ia64_except_unwind_info (&global_options
) == UI_TARGET
;
9979 bool frame
= dwarf2out_do_frame ();
9983 if (!unwind
&& !frame
)
9986 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
9988 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
9990 /* Restore unwind state from immediately before the epilogue. */
9991 if (need_copy_state
)
9995 fprintf (asm_out_file
, "\t.body\n");
9996 fprintf (asm_out_file
, "\t.copy_state %d\n",
9997 cfun
->machine
->state_num
);
9999 if (IA64_CHANGE_CFA_IN_EPILOGUE
)
10000 ia64_dwarf2out_def_steady_cfa (insn
, frame
);
10001 need_copy_state
= false;
10005 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
10008 /* Look for the ALLOC insn. */
10009 if (INSN_CODE (insn
) == CODE_FOR_alloc
)
10011 rtx dest
= SET_DEST (XVECEXP (PATTERN (insn
), 0, 0));
10012 int dest_regno
= REGNO (dest
);
10014 /* If this is the final destination for ar.pfs, then this must
10015 be the alloc in the prologue. */
10016 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
10019 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
10020 ia64_dbx_register_number (dest_regno
));
10024 /* This must be an alloc before a sibcall. We must drop the
10025 old frame info. The easiest way to drop the old frame
10026 info is to ensure we had a ".restore sp" directive
10027 followed by a new prologue. If the procedure doesn't
10028 have a memory-stack frame, we'll issue a dummy ".restore
10030 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
10031 /* if haven't done process_epilogue() yet, do it now */
10032 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10034 fprintf (asm_out_file
, "\t.prologue\n");
10039 handled_one
= false;
10040 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
10041 switch (REG_NOTE_KIND (note
))
10043 case REG_CFA_ADJUST_CFA
:
10044 pat
= XEXP (note
, 0);
10046 pat
= PATTERN (insn
);
10047 process_cfa_adjust_cfa (asm_out_file
, pat
, insn
, unwind
, frame
);
10048 handled_one
= true;
10051 case REG_CFA_OFFSET
:
10052 pat
= XEXP (note
, 0);
10054 pat
= PATTERN (insn
);
10055 process_cfa_offset (asm_out_file
, pat
, unwind
);
10056 handled_one
= true;
10059 case REG_CFA_REGISTER
:
10060 pat
= XEXP (note
, 0);
10062 pat
= PATTERN (insn
);
10063 process_cfa_register (asm_out_file
, pat
, unwind
);
10064 handled_one
= true;
10067 case REG_FRAME_RELATED_EXPR
:
10068 case REG_CFA_DEF_CFA
:
10069 case REG_CFA_EXPRESSION
:
10070 case REG_CFA_RESTORE
:
10071 case REG_CFA_SET_VDRAP
:
10072 /* Not used in the ia64 port. */
10073 gcc_unreachable ();
10076 /* Not a frame-related note. */
10080 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10081 explicit action to take. No guessing required. */
10082 gcc_assert (handled_one
);
10085 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10088 ia64_asm_emit_except_personality (rtx personality
)
10090 fputs ("\t.personality\t", asm_out_file
);
10091 output_addr_const (asm_out_file
, personality
);
10092 fputc ('\n', asm_out_file
);
10095 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10098 ia64_asm_init_sections (void)
10100 exception_section
= get_unnamed_section (0, output_section_asm_op
,
10104 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10106 static enum unwind_info_type
10107 ia64_debug_unwind_info (void)
10112 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
10114 static enum unwind_info_type
10115 ia64_except_unwind_info (struct gcc_options
*opts
)
10117 /* Honor the --enable-sjlj-exceptions configure switch. */
10118 #ifdef CONFIG_UNWIND_EXCEPTIONS
10119 if (CONFIG_UNWIND_EXCEPTIONS
)
10123 /* For simplicity elsewhere in this file, indicate that all unwind
10124 info is disabled if we're not emitting unwind tables. */
10125 if (!opts
->x_flag_exceptions
&& !opts
->x_flag_unwind_tables
)
10134 IA64_BUILTIN_COPYSIGNQ
,
10135 IA64_BUILTIN_FABSQ
,
10136 IA64_BUILTIN_FLUSHRS
,
10138 IA64_BUILTIN_HUGE_VALQ
,
10142 static GTY(()) tree ia64_builtins
[(int) IA64_BUILTIN_max
];
10145 ia64_init_builtins (void)
10151 /* The __fpreg type. */
10152 fpreg_type
= make_node (REAL_TYPE
);
10153 TYPE_PRECISION (fpreg_type
) = 82;
10154 layout_type (fpreg_type
);
10155 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
10157 /* The __float80 type. */
10158 float80_type
= make_node (REAL_TYPE
);
10159 TYPE_PRECISION (float80_type
) = 80;
10160 layout_type (float80_type
);
10161 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
10163 /* The __float128 type. */
10167 tree float128_type
= make_node (REAL_TYPE
);
10169 TYPE_PRECISION (float128_type
) = 128;
10170 layout_type (float128_type
);
10171 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
10173 /* TFmode support builtins. */
10174 ftype
= build_function_type (float128_type
, void_list_node
);
10175 decl
= add_builtin_function ("__builtin_infq", ftype
,
10176 IA64_BUILTIN_INFQ
, BUILT_IN_MD
,
10178 ia64_builtins
[IA64_BUILTIN_INFQ
] = decl
;
10180 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
10181 IA64_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
10183 ia64_builtins
[IA64_BUILTIN_HUGE_VALQ
] = decl
;
10185 ftype
= build_function_type_list (float128_type
,
10188 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
10189 IA64_BUILTIN_FABSQ
, BUILT_IN_MD
,
10190 "__fabstf2", NULL_TREE
);
10191 TREE_READONLY (decl
) = 1;
10192 ia64_builtins
[IA64_BUILTIN_FABSQ
] = decl
;
10194 ftype
= build_function_type_list (float128_type
,
10198 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
10199 IA64_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
10200 "__copysigntf3", NULL_TREE
);
10201 TREE_READONLY (decl
) = 1;
10202 ia64_builtins
[IA64_BUILTIN_COPYSIGNQ
] = decl
;
10205 /* Under HPUX, this is a synonym for "long double". */
10206 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
10209 /* Fwrite on VMS is non-standard. */
10210 if (TARGET_ABI_OPEN_VMS
)
10212 implicit_built_in_decls
[(int) BUILT_IN_FWRITE
] = NULL_TREE
;
10213 implicit_built_in_decls
[(int) BUILT_IN_FWRITE_UNLOCKED
] = NULL_TREE
;
10216 #define def_builtin(name, type, code) \
10217 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10220 decl
= def_builtin ("__builtin_ia64_bsp",
10221 build_function_type (ptr_type_node
, void_list_node
),
10223 ia64_builtins
[IA64_BUILTIN_BSP
] = decl
;
10225 decl
= def_builtin ("__builtin_ia64_flushrs",
10226 build_function_type (void_type_node
, void_list_node
),
10227 IA64_BUILTIN_FLUSHRS
);
10228 ia64_builtins
[IA64_BUILTIN_FLUSHRS
] = decl
;
10234 if (built_in_decls
[BUILT_IN_FINITE
])
10235 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITE
],
10237 if (built_in_decls
[BUILT_IN_FINITEF
])
10238 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITEF
],
10240 if (built_in_decls
[BUILT_IN_FINITEL
])
10241 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITEL
],
10247 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
10248 enum machine_mode mode ATTRIBUTE_UNUSED
,
10249 int ignore ATTRIBUTE_UNUSED
)
10251 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
10252 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
10256 case IA64_BUILTIN_BSP
:
10257 if (! target
|| ! register_operand (target
, DImode
))
10258 target
= gen_reg_rtx (DImode
);
10259 emit_insn (gen_bsp_value (target
));
10260 #ifdef POINTERS_EXTEND_UNSIGNED
10261 target
= convert_memory_address (ptr_mode
, target
);
10265 case IA64_BUILTIN_FLUSHRS
:
10266 emit_insn (gen_flushrs ());
10269 case IA64_BUILTIN_INFQ
:
10270 case IA64_BUILTIN_HUGE_VALQ
:
10272 enum machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
10273 REAL_VALUE_TYPE inf
;
10277 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, target_mode
);
10279 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
10282 target
= gen_reg_rtx (target_mode
);
10284 emit_move_insn (target
, tmp
);
10288 case IA64_BUILTIN_FABSQ
:
10289 case IA64_BUILTIN_COPYSIGNQ
:
10290 return expand_call (exp
, target
, ignore
);
10293 gcc_unreachable ();
10299 /* Return the ia64 builtin for CODE. */
10302 ia64_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
10304 if (code
>= IA64_BUILTIN_max
)
10305 return error_mark_node
;
10307 return ia64_builtins
[code
];
10310 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10311 most significant bits of the stack slot. */
10314 ia64_hpux_function_arg_padding (enum machine_mode mode
, const_tree type
)
10316 /* Exception to normal case for structures/unions/etc. */
10318 if (type
&& AGGREGATE_TYPE_P (type
)
10319 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
10322 /* Fall back to the default. */
10323 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
10326 /* Emit text to declare externally defined variables and functions, because
10327 the Intel assembler does not support undefined externals. */
10330 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
10332 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10333 set in order to avoid putting out names that are never really
10335 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
10337 /* maybe_assemble_visibility will return 1 if the assembler
10338 visibility directive is output. */
10339 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
10340 && maybe_assemble_visibility (decl
));
10342 #ifdef DO_CRTL_NAMES
10346 /* GNU as does not need anything here, but the HP linker does
10347 need something for external functions. */
10348 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
10349 && TREE_CODE (decl
) == FUNCTION_DECL
)
10350 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
10351 else if (need_visibility
&& !TARGET_GNU_AS
)
10352 (*targetm
.asm_out
.globalize_label
) (file
, name
);
10356 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10357 modes of word_mode and larger. Rename the TFmode libfuncs using the
10358 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10359 backward compatibility. */
10362 ia64_init_libfuncs (void)
10364 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
10365 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
10366 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
10367 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
10369 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
10370 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
10371 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
10372 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
10373 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
10375 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
10376 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
10377 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
10378 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
10379 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
10380 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
10382 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
10383 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
10384 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
10385 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
10386 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
10388 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
10389 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
10390 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
10391 /* HP-UX 11.23 libc does not have a function for unsigned
10392 SImode-to-TFmode conversion. */
10393 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
10396 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10399 ia64_hpux_init_libfuncs (void)
10401 ia64_init_libfuncs ();
10403 /* The HP SI millicode division and mod functions expect DI arguments.
10404 By turning them off completely we avoid using both libgcc and the
10405 non-standard millicode routines and use the HP DI millicode routines
10408 set_optab_libfunc (sdiv_optab
, SImode
, 0);
10409 set_optab_libfunc (udiv_optab
, SImode
, 0);
10410 set_optab_libfunc (smod_optab
, SImode
, 0);
10411 set_optab_libfunc (umod_optab
, SImode
, 0);
10413 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
10414 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
10415 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
10416 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
10418 /* HP-UX libc has TF min/max/abs routines in it. */
10419 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
10420 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
10421 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
10423 /* ia64_expand_compare uses this. */
10424 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
10426 /* These should never be used. */
10427 set_optab_libfunc (eq_optab
, TFmode
, 0);
10428 set_optab_libfunc (ne_optab
, TFmode
, 0);
10429 set_optab_libfunc (gt_optab
, TFmode
, 0);
10430 set_optab_libfunc (ge_optab
, TFmode
, 0);
10431 set_optab_libfunc (lt_optab
, TFmode
, 0);
10432 set_optab_libfunc (le_optab
, TFmode
, 0);
10435 /* Rename the division and modulus functions in VMS. */
10438 ia64_vms_init_libfuncs (void)
10440 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
10441 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
10442 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
10443 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
10444 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
10445 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
10446 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
10447 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
10448 abort_libfunc
= init_one_libfunc ("decc$abort");
10449 memcmp_libfunc
= init_one_libfunc ("decc$memcmp");
10450 #ifdef MEM_LIBFUNCS_INIT
10455 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10456 the HPUX conventions. */
10459 ia64_sysv4_init_libfuncs (void)
10461 ia64_init_libfuncs ();
10463 /* These functions are not part of the HPUX TFmode interface. We
10464 use them instead of _U_Qfcmp, which doesn't work the way we
10466 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
10467 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
10468 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
10469 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
10470 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
10471 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
10473 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10474 glibc doesn't have them. */
10480 ia64_soft_fp_init_libfuncs (void)
10485 ia64_vms_valid_pointer_mode (enum machine_mode mode
)
10487 return (mode
== SImode
|| mode
== DImode
);
10490 /* For HPUX, it is illegal to have relocations in shared segments. */
10493 ia64_hpux_reloc_rw_mask (void)
10498 /* For others, relax this so that relocations to local data goes in
10499 read-only segments, but we still cannot allow global relocations
10500 in read-only segments. */
10503 ia64_reloc_rw_mask (void)
10505 return flag_pic
? 3 : 2;
10508 /* Return the section to use for X. The only special thing we do here
10509 is to honor small data. */
10512 ia64_select_rtx_section (enum machine_mode mode
, rtx x
,
10513 unsigned HOST_WIDE_INT align
)
10515 if (GET_MODE_SIZE (mode
) > 0
10516 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
10517 && !TARGET_NO_SDATA
)
10518 return sdata_section
;
10520 return default_elf_select_rtx_section (mode
, x
, align
);
10523 static unsigned int
10524 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
10526 unsigned int flags
= 0;
10528 if (strcmp (name
, ".sdata") == 0
10529 || strncmp (name
, ".sdata.", 7) == 0
10530 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
10531 || strncmp (name
, ".sdata2.", 8) == 0
10532 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
10533 || strcmp (name
, ".sbss") == 0
10534 || strncmp (name
, ".sbss.", 6) == 0
10535 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
10536 flags
= SECTION_SMALL
;
10538 #if TARGET_ABI_OPEN_VMS
10539 if (decl
&& DECL_ATTRIBUTES (decl
)
10540 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl
)))
10541 flags
|= SECTION_VMS_OVERLAY
;
10544 flags
|= default_section_type_flags (decl
, name
, reloc
);
10548 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10549 structure type and that the address of that type should be passed
10550 in out0, rather than in r8. */
10553 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
10555 tree ret_type
= TREE_TYPE (fntype
);
10557 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10558 as the structure return address parameter, if the return value
10559 type has a non-trivial copy constructor or destructor. It is not
10560 clear if this same convention should be used for other
10561 programming languages. Until G++ 3.4, we incorrectly used r8 for
10562 these return values. */
10563 return (abi_version_at_least (2)
10565 && TYPE_MODE (ret_type
) == BLKmode
10566 && TREE_ADDRESSABLE (ret_type
)
10567 && strcmp (lang_hooks
.name
, "GNU C++") == 0);
10570 /* Output the assembler code for a thunk function. THUNK_DECL is the
10571 declaration for the thunk function itself, FUNCTION is the decl for
10572 the target function. DELTA is an immediate constant offset to be
10573 added to THIS. If VCALL_OFFSET is nonzero, the word at
10574 *(*this + vcall_offset) should be added to THIS. */
10577 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
10578 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
10581 rtx this_rtx
, insn
, funexp
;
10582 unsigned int this_parmno
;
10583 unsigned int this_regno
;
10586 reload_completed
= 1;
10587 epilogue_completed
= 1;
10589 /* Set things up as ia64_expand_prologue might. */
10590 last_scratch_gr_reg
= 15;
10592 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
10593 current_frame_info
.spill_cfa_off
= -16;
10594 current_frame_info
.n_input_regs
= 1;
10595 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
10597 /* Mark the end of the (empty) prologue. */
10598 emit_note (NOTE_INSN_PROLOGUE_END
);
10600 /* Figure out whether "this" will be the first parameter (the
10601 typical case) or the second parameter (as happens when the
10602 virtual function returns certain class objects). */
10604 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
10606 this_regno
= IN_REG (this_parmno
);
10607 if (!TARGET_REG_NAMES
)
10608 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
10610 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
10612 /* Apply the constant offset, if required. */
10613 delta_rtx
= GEN_INT (delta
);
10616 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
10617 REG_POINTER (tmp
) = 1;
10618 if (delta
&& satisfies_constraint_I (delta_rtx
))
10620 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
10624 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
10628 if (!satisfies_constraint_I (delta_rtx
))
10630 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10631 emit_move_insn (tmp
, delta_rtx
);
10634 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
10637 /* Apply the offset from the vtable, if required. */
10640 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
10641 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10645 rtx t
= gen_rtx_REG (ptr_mode
, 2);
10646 REG_POINTER (t
) = 1;
10647 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
10648 if (satisfies_constraint_I (vcall_offset_rtx
))
10650 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
10654 emit_insn (gen_ptr_extend (tmp
, t
));
10657 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
10661 if (!satisfies_constraint_J (vcall_offset_rtx
))
10663 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
10664 emit_move_insn (tmp2
, vcall_offset_rtx
);
10665 vcall_offset_rtx
= tmp2
;
10667 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
10671 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
10673 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
10675 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
10678 /* Generate a tail call to the target function. */
10679 if (! TREE_USED (function
))
10681 assemble_external (function
);
10682 TREE_USED (function
) = 1;
10684 funexp
= XEXP (DECL_RTL (function
), 0);
10685 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
10686 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
10687 insn
= get_last_insn ();
10688 SIBLING_CALL_P (insn
) = 1;
10690 /* Code generation for calls relies on splitting. */
10691 reload_completed
= 1;
10692 epilogue_completed
= 1;
10693 try_split (PATTERN (insn
), insn
, 0);
10697 /* Run just enough of rest_of_compilation to get the insns emitted.
10698 There's not really enough bulk here to make other passes such as
10699 instruction scheduling worth while. Note that use_thunk calls
10700 assemble_start_function and assemble_end_function. */
10702 insn_locators_alloc ();
10703 emit_all_insn_group_barriers (NULL
);
10704 insn
= get_insns ();
10705 shorten_branches (insn
);
10706 final_start_function (insn
, file
, 1);
10707 final (insn
, file
, 1);
10708 final_end_function ();
10710 reload_completed
= 0;
10711 epilogue_completed
= 0;
10714 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10717 ia64_struct_value_rtx (tree fntype
,
10718 int incoming ATTRIBUTE_UNUSED
)
10720 if (TARGET_ABI_OPEN_VMS
||
10721 (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
)))
10723 return gen_rtx_REG (Pmode
, GR_REG (8));
10727 ia64_scalar_mode_supported_p (enum machine_mode mode
)
10753 ia64_vector_mode_supported_p (enum machine_mode mode
)
10770 /* Implement the FUNCTION_PROFILER macro. */
10773 ia64_output_function_profiler (FILE *file
, int labelno
)
10775 bool indirect_call
;
10777 /* If the function needs a static chain and the static chain
10778 register is r15, we use an indirect call so as to bypass
10779 the PLT stub in case the executable is dynamically linked,
10780 because the stub clobbers r15 as per 5.3.6 of the psABI.
10781 We don't need to do that in non canonical PIC mode. */
10783 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
10785 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
10786 indirect_call
= true;
10789 indirect_call
= false;
10792 fputs ("\t.prologue 4, r40\n", file
);
10794 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
10795 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
10797 if (NO_PROFILE_COUNTERS
)
10798 fputs ("\tmov out3 = r0\n", file
);
10802 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
10804 if (TARGET_AUTO_PIC
)
10805 fputs ("\tmovl out3 = @gprel(", file
);
10807 fputs ("\taddl out3 = @ltoff(", file
);
10808 assemble_name (file
, buf
);
10809 if (TARGET_AUTO_PIC
)
10810 fputs (")\n", file
);
10812 fputs ("), r1\n", file
);
10816 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
10817 fputs ("\t;;\n", file
);
10819 fputs ("\t.save rp, r42\n", file
);
10820 fputs ("\tmov out2 = b0\n", file
);
10822 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
10823 fputs ("\t.body\n", file
);
10824 fputs ("\tmov out1 = r1\n", file
);
10827 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
10828 fputs ("\tmov b6 = r16\n", file
);
10829 fputs ("\tld8 r1 = [r14]\n", file
);
10830 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
10833 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
10836 static GTY(()) rtx mcount_func_rtx
;
10838 gen_mcount_func_rtx (void)
10840 if (!mcount_func_rtx
)
10841 mcount_func_rtx
= init_one_libfunc ("_mcount");
10842 return mcount_func_rtx
;
10846 ia64_profile_hook (int labelno
)
10850 if (NO_PROFILE_COUNTERS
)
10851 label
= const0_rtx
;
10855 const char *label_name
;
10856 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
10857 label_name
= (*targetm
.strip_name_encoding
) (ggc_strdup (buf
));
10858 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
10859 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
10861 ip
= gen_reg_rtx (Pmode
);
10862 emit_insn (gen_ip_value (ip
));
10863 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
10865 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
10870 /* Return the mangling of TYPE if it is an extended fundamental type. */
10872 static const char *
10873 ia64_mangle_type (const_tree type
)
10875 type
= TYPE_MAIN_VARIANT (type
);
10877 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
10878 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
10881 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10883 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
10885 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10886 an extended mangling. Elsewhere, "e" is available since long
10887 double is 80 bits. */
10888 if (TYPE_MODE (type
) == XFmode
)
10889 return TARGET_HPUX
? "u9__float80" : "e";
10890 if (TYPE_MODE (type
) == RFmode
)
10891 return "u7__fpreg";
10895 /* Return the diagnostic message string if conversion from FROMTYPE to
10896 TOTYPE is not allowed, NULL otherwise. */
10897 static const char *
10898 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
10900 /* Reject nontrivial conversion to or from __fpreg. */
10901 if (TYPE_MODE (fromtype
) == RFmode
10902 && TYPE_MODE (totype
) != RFmode
10903 && TYPE_MODE (totype
) != VOIDmode
)
10904 return N_("invalid conversion from %<__fpreg%>");
10905 if (TYPE_MODE (totype
) == RFmode
10906 && TYPE_MODE (fromtype
) != RFmode
)
10907 return N_("invalid conversion to %<__fpreg%>");
10911 /* Return the diagnostic message string if the unary operation OP is
10912 not permitted on TYPE, NULL otherwise. */
10913 static const char *
10914 ia64_invalid_unary_op (int op
, const_tree type
)
10916 /* Reject operations on __fpreg other than unary + or &. */
10917 if (TYPE_MODE (type
) == RFmode
10918 && op
!= CONVERT_EXPR
10919 && op
!= ADDR_EXPR
)
10920 return N_("invalid operation on %<__fpreg%>");
10924 /* Return the diagnostic message string if the binary operation OP is
10925 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10926 static const char *
10927 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
10929 /* Reject operations on __fpreg. */
10930 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
10931 return N_("invalid operation on %<__fpreg%>");
10935 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
10937 ia64_option_default_params (void)
10939 /* Let the scheduler form additional regions. */
10940 set_default_param_value (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS
, 2);
10942 /* Set the default values for cache-related parameters. */
10943 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES
, 6);
10944 set_default_param_value (PARAM_L1_CACHE_LINE_SIZE
, 32);
10946 set_default_param_value (PARAM_SCHED_MEM_TRUE_DEP_COST
, 4);
10949 /* HP-UX version_id attribute.
10950 For object foo, if the version_id is set to 1234 put out an alias
10951 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10952 other than an alias statement because it is an illegal symbol name. */
10955 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
10956 tree name ATTRIBUTE_UNUSED
,
10958 int flags ATTRIBUTE_UNUSED
,
10959 bool *no_add_attrs
)
10961 tree arg
= TREE_VALUE (args
);
10963 if (TREE_CODE (arg
) != STRING_CST
)
10965 error("version attribute is not a string");
10966 *no_add_attrs
= true;
10972 /* Target hook for c_mode_for_suffix. */
10974 static enum machine_mode
10975 ia64_c_mode_for_suffix (char suffix
)
10985 static enum machine_mode
10986 ia64_promote_function_mode (const_tree type
,
10987 enum machine_mode mode
,
10989 const_tree funtype
,
10992 /* Special processing required for OpenVMS ... */
10994 if (!TARGET_ABI_OPEN_VMS
)
10995 return default_promote_function_mode(type
, mode
, punsignedp
, funtype
,
10998 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10999 HP OpenVMS I64 Version 8.2EFT,
11000 chapter 4 "OpenVMS I64 Conventions"
11001 section 4.7 "Procedure Linkage"
11002 subsection 4.7.5.2, "Normal Register Parameters"
11004 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
11005 values passed in registers are zero-filled; signed integral values as
11006 well as unsigned 32-bit integral values are sign-extended to 64 bits.
11007 For all other types passed in the general registers, unused bits are
11010 if (!AGGREGATE_TYPE_P (type
)
11011 && GET_MODE_CLASS (mode
) == MODE_INT
11012 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
11014 if (mode
== SImode
)
11019 return promote_mode (type
, mode
, punsignedp
);
11022 static GTY(()) rtx ia64_dconst_0_5_rtx
;
11025 ia64_dconst_0_5 (void)
11027 if (! ia64_dconst_0_5_rtx
)
11029 REAL_VALUE_TYPE rv
;
11030 real_from_string (&rv
, "0.5");
11031 ia64_dconst_0_5_rtx
= const_double_from_real_value (rv
, DFmode
);
11033 return ia64_dconst_0_5_rtx
;
11036 static GTY(()) rtx ia64_dconst_0_375_rtx
;
11039 ia64_dconst_0_375 (void)
11041 if (! ia64_dconst_0_375_rtx
)
11043 REAL_VALUE_TYPE rv
;
11044 real_from_string (&rv
, "0.375");
11045 ia64_dconst_0_375_rtx
= const_double_from_real_value (rv
, DFmode
);
11047 return ia64_dconst_0_375_rtx
;
11050 static enum machine_mode
11051 ia64_get_reg_raw_mode (int regno
)
11053 if (FR_REGNO_P (regno
))
11055 return default_get_reg_raw_mode(regno
);
11058 /* Always default to .text section until HP-UX linker is fixed. */
11060 ATTRIBUTE_UNUSED
static section
*
11061 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED
,
11062 enum node_frequency freq ATTRIBUTE_UNUSED
,
11063 bool startup ATTRIBUTE_UNUSED
,
11064 bool exit ATTRIBUTE_UNUSED
)
11069 #include "gt-ia64.h"