]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/ia64/ia64.c
b8ffc14debdc7f5cf8d430ef1ce326de03fa9a4b
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "libfuncs.h"
45 #include "diagnostic-core.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "gimple.h"
55 #include "intl.h"
56 #include "df.h"
57 #include "debug.h"
58 #include "params.h"
59 #include "dbgcnt.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
62 #include "reload.h"
63 #include "dwarf2out.h"
64 #include "opts.h"
65
66 /* This is used for communication between ASM_OUTPUT_LABEL and
67 ASM_OUTPUT_LABELREF. */
68 int ia64_asm_output_label = 0;
69
70 /* Register names for ia64_expand_prologue. */
71 static const char * const ia64_reg_numbers[96] =
72 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81 "r104","r105","r106","r107","r108","r109","r110","r111",
82 "r112","r113","r114","r115","r116","r117","r118","r119",
83 "r120","r121","r122","r123","r124","r125","r126","r127"};
84
85 /* ??? These strings could be shared with REGISTER_NAMES. */
86 static const char * const ia64_input_reg_names[8] =
87 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
88
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_local_reg_names[80] =
91 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101
102 /* ??? These strings could be shared with REGISTER_NAMES. */
103 static const char * const ia64_output_reg_names[8] =
104 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105
106 /* Variables which are this size or smaller are put in the sdata/sbss
107 sections. */
108
109 unsigned int ia64_section_threshold;
110
111 /* The following variable is used by the DFA insn scheduler. The value is
112 TRUE if we do insn bundling instead of insn scheduling. */
113 int bundling_p = 0;
114
115 enum ia64_frame_regs
116 {
117 reg_fp,
118 reg_save_b0,
119 reg_save_pr,
120 reg_save_ar_pfs,
121 reg_save_ar_unat,
122 reg_save_ar_lc,
123 reg_save_gp,
124 number_of_ia64_frame_regs
125 };
126
127 /* Structure to be filled in by ia64_compute_frame_size with register
128 save masks and offsets for the current function. */
129
130 struct ia64_frame_info
131 {
132 HOST_WIDE_INT total_size; /* size of the stack frame, not including
133 the caller's scratch area. */
134 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
135 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
136 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
137 HARD_REG_SET mask; /* mask of saved registers. */
138 unsigned int gr_used_mask; /* mask of registers in use as gr spill
139 registers or long-term scratches. */
140 int n_spilled; /* number of spilled registers. */
141 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
142 int n_input_regs; /* number of input registers used. */
143 int n_local_regs; /* number of local registers used. */
144 int n_output_regs; /* number of output registers used. */
145 int n_rotate_regs; /* number of rotating registers used. */
146
147 char need_regstk; /* true if a .regstk directive needed. */
148 char initialized; /* true if the data is finalized. */
149 };
150
151 /* Current frame information calculated by ia64_compute_frame_size. */
152 static struct ia64_frame_info current_frame_info;
153 /* The actual registers that are emitted. */
154 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
155 \f
156 static int ia64_first_cycle_multipass_dfa_lookahead (void);
157 static void ia64_dependencies_evaluation_hook (rtx, rtx);
158 static void ia64_init_dfa_pre_cycle_insn (void);
159 static rtx ia64_dfa_pre_cycle_insn (void);
160 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
161 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
162 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
163 static void ia64_h_i_d_extended (void);
164 static void * ia64_alloc_sched_context (void);
165 static void ia64_init_sched_context (void *, bool);
166 static void ia64_set_sched_context (void *);
167 static void ia64_clear_sched_context (void *);
168 static void ia64_free_sched_context (void *);
169 static int ia64_mode_to_int (enum machine_mode);
170 static void ia64_set_sched_flags (spec_info_t);
171 static ds_t ia64_get_insn_spec_ds (rtx);
172 static ds_t ia64_get_insn_checked_ds (rtx);
173 static bool ia64_skip_rtx_p (const_rtx);
174 static int ia64_speculate_insn (rtx, ds_t, rtx *);
175 static bool ia64_needs_block_p (int);
176 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
177 static int ia64_spec_check_p (rtx);
178 static int ia64_spec_check_src_p (rtx);
179 static rtx gen_tls_get_addr (void);
180 static rtx gen_thread_pointer (void);
181 static int find_gr_spill (enum ia64_frame_regs, int);
182 static int next_scratch_gr_reg (void);
183 static void mark_reg_gr_used_mask (rtx, void *);
184 static void ia64_compute_frame_size (HOST_WIDE_INT);
185 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
186 static void finish_spill_pointers (void);
187 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
188 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
189 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
190 static rtx gen_movdi_x (rtx, rtx, rtx);
191 static rtx gen_fr_spill_x (rtx, rtx, rtx);
192 static rtx gen_fr_restore_x (rtx, rtx, rtx);
193
194 static void ia64_option_override (void);
195 static void ia64_option_default_params (void);
196 static bool ia64_can_eliminate (const int, const int);
197 static enum machine_mode hfa_element_mode (const_tree, bool);
198 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
199 tree, int *, int);
200 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
201 tree, bool);
202 static rtx ia64_function_arg_1 (const CUMULATIVE_ARGS *, enum machine_mode,
203 const_tree, bool, bool);
204 static rtx ia64_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
205 const_tree, bool);
206 static rtx ia64_function_incoming_arg (CUMULATIVE_ARGS *,
207 enum machine_mode, const_tree, bool);
208 static void ia64_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
209 const_tree, bool);
210 static unsigned int ia64_function_arg_boundary (enum machine_mode,
211 const_tree);
212 static bool ia64_function_ok_for_sibcall (tree, tree);
213 static bool ia64_return_in_memory (const_tree, const_tree);
214 static rtx ia64_function_value (const_tree, const_tree, bool);
215 static rtx ia64_libcall_value (enum machine_mode, const_rtx);
216 static bool ia64_function_value_regno_p (const unsigned int);
217 static int ia64_register_move_cost (enum machine_mode, reg_class_t,
218 reg_class_t);
219 static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
220 bool);
221 static bool ia64_rtx_costs (rtx, int, int, int *, bool);
222 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
223 static void fix_range (const char *);
224 static bool ia64_handle_option (struct gcc_options *, struct gcc_options *,
225 const struct cl_decoded_option *, location_t);
226 static struct machine_function * ia64_init_machine_status (void);
227 static void emit_insn_group_barriers (FILE *);
228 static void emit_all_insn_group_barriers (FILE *);
229 static void final_emit_insn_group_barriers (FILE *);
230 static void emit_predicate_relation_info (void);
231 static void ia64_reorg (void);
232 static bool ia64_in_small_data_p (const_tree);
233 static void process_epilogue (FILE *, rtx, bool, bool);
234
235 static bool ia64_assemble_integer (rtx, unsigned int, int);
236 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
237 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
238 static void ia64_output_function_end_prologue (FILE *);
239
240 static int ia64_issue_rate (void);
241 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
242 static void ia64_sched_init (FILE *, int, int);
243 static void ia64_sched_init_global (FILE *, int, int);
244 static void ia64_sched_finish_global (FILE *, int);
245 static void ia64_sched_finish (FILE *, int);
246 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
247 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
248 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
249 static int ia64_variable_issue (FILE *, int, rtx, int);
250
251 static void ia64_asm_unwind_emit (FILE *, rtx);
252 static void ia64_asm_emit_except_personality (rtx);
253 static void ia64_asm_init_sections (void);
254
255 static enum unwind_info_type ia64_debug_unwind_info (void);
256 static enum unwind_info_type ia64_except_unwind_info (struct gcc_options *);
257
258 static struct bundle_state *get_free_bundle_state (void);
259 static void free_bundle_state (struct bundle_state *);
260 static void initiate_bundle_states (void);
261 static void finish_bundle_states (void);
262 static unsigned bundle_state_hash (const void *);
263 static int bundle_state_eq_p (const void *, const void *);
264 static int insert_bundle_state (struct bundle_state *);
265 static void initiate_bundle_state_table (void);
266 static void finish_bundle_state_table (void);
267 static int try_issue_nops (struct bundle_state *, int);
268 static int try_issue_insn (struct bundle_state *, rtx);
269 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
270 static int get_max_pos (state_t);
271 static int get_template (state_t, int);
272
273 static rtx get_next_important_insn (rtx, rtx);
274 static bool important_for_bundling_p (rtx);
275 static void bundling (FILE *, int, rtx, rtx);
276
277 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
278 HOST_WIDE_INT, tree);
279 static void ia64_file_start (void);
280 static void ia64_globalize_decl_name (FILE *, tree);
281
282 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
283 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
284 static section *ia64_select_rtx_section (enum machine_mode, rtx,
285 unsigned HOST_WIDE_INT);
286 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
287 ATTRIBUTE_UNUSED;
288 static unsigned int ia64_section_type_flags (tree, const char *, int);
289 static void ia64_init_libfuncs (void)
290 ATTRIBUTE_UNUSED;
291 static void ia64_hpux_init_libfuncs (void)
292 ATTRIBUTE_UNUSED;
293 static void ia64_sysv4_init_libfuncs (void)
294 ATTRIBUTE_UNUSED;
295 static void ia64_vms_init_libfuncs (void)
296 ATTRIBUTE_UNUSED;
297 static void ia64_soft_fp_init_libfuncs (void)
298 ATTRIBUTE_UNUSED;
299 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
300 ATTRIBUTE_UNUSED;
301 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
302 ATTRIBUTE_UNUSED;
303
304 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
305 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
306 static void ia64_encode_section_info (tree, rtx, int);
307 static rtx ia64_struct_value_rtx (tree, int);
308 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
309 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
310 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
311 static bool ia64_legitimate_constant_p (enum machine_mode, rtx);
312 static bool ia64_cannot_force_const_mem (enum machine_mode, rtx);
313 static const char *ia64_mangle_type (const_tree);
314 static const char *ia64_invalid_conversion (const_tree, const_tree);
315 static const char *ia64_invalid_unary_op (int, const_tree);
316 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
317 static enum machine_mode ia64_c_mode_for_suffix (char);
318 static enum machine_mode ia64_promote_function_mode (const_tree,
319 enum machine_mode,
320 int *,
321 const_tree,
322 int);
323 static void ia64_trampoline_init (rtx, tree, rtx);
324 static void ia64_override_options_after_change (void);
325
326 static void ia64_dwarf_handle_frame_unspec (const char *, rtx, int);
327 static tree ia64_builtin_decl (unsigned, bool);
328
329 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
330 static enum machine_mode ia64_get_reg_raw_mode (int regno);
331 static section * ia64_hpux_function_section (tree, enum node_frequency,
332 bool, bool);
333 \f
334 /* Table of valid machine attributes. */
335 static const struct attribute_spec ia64_attribute_table[] =
336 {
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
338 affects_type_identity } */
339 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
340 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
341 false },
342 #if TARGET_ABI_OPEN_VMS
343 { "common_object", 1, 1, true, false, false,
344 ia64_vms_common_object_attribute, false },
345 #endif
346 { "version_id", 1, 1, true, false, false,
347 ia64_handle_version_id_attribute, false },
348 { NULL, 0, 0, false, false, false, NULL, false }
349 };
350
351 /* Implement overriding of the optimization options. */
352 static const struct default_options ia64_option_optimization_table[] =
353 {
354 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
355 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
356 SUBTARGET_OPTIMIZATION_OPTIONS,
357 #endif
358 { OPT_LEVELS_NONE, 0, NULL, 0 }
359 };
360
361 /* Initialize the GCC target structure. */
362 #undef TARGET_ATTRIBUTE_TABLE
363 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
364
365 #undef TARGET_INIT_BUILTINS
366 #define TARGET_INIT_BUILTINS ia64_init_builtins
367
368 #undef TARGET_EXPAND_BUILTIN
369 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
370
371 #undef TARGET_BUILTIN_DECL
372 #define TARGET_BUILTIN_DECL ia64_builtin_decl
373
374 #undef TARGET_ASM_BYTE_OP
375 #define TARGET_ASM_BYTE_OP "\tdata1\t"
376 #undef TARGET_ASM_ALIGNED_HI_OP
377 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
378 #undef TARGET_ASM_ALIGNED_SI_OP
379 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
380 #undef TARGET_ASM_ALIGNED_DI_OP
381 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
382 #undef TARGET_ASM_UNALIGNED_HI_OP
383 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
384 #undef TARGET_ASM_UNALIGNED_SI_OP
385 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
386 #undef TARGET_ASM_UNALIGNED_DI_OP
387 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
388 #undef TARGET_ASM_INTEGER
389 #define TARGET_ASM_INTEGER ia64_assemble_integer
390
391 #undef TARGET_OPTION_OVERRIDE
392 #define TARGET_OPTION_OVERRIDE ia64_option_override
393 #undef TARGET_OPTION_OPTIMIZATION_TABLE
394 #define TARGET_OPTION_OPTIMIZATION_TABLE ia64_option_optimization_table
395 #undef TARGET_OPTION_DEFAULT_PARAMS
396 #define TARGET_OPTION_DEFAULT_PARAMS ia64_option_default_params
397
398 #undef TARGET_ASM_FUNCTION_PROLOGUE
399 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
400 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
401 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
402 #undef TARGET_ASM_FUNCTION_EPILOGUE
403 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
404
405 #undef TARGET_IN_SMALL_DATA_P
406 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
407
408 #undef TARGET_SCHED_ADJUST_COST_2
409 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
410 #undef TARGET_SCHED_ISSUE_RATE
411 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
412 #undef TARGET_SCHED_VARIABLE_ISSUE
413 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
414 #undef TARGET_SCHED_INIT
415 #define TARGET_SCHED_INIT ia64_sched_init
416 #undef TARGET_SCHED_FINISH
417 #define TARGET_SCHED_FINISH ia64_sched_finish
418 #undef TARGET_SCHED_INIT_GLOBAL
419 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
420 #undef TARGET_SCHED_FINISH_GLOBAL
421 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
422 #undef TARGET_SCHED_REORDER
423 #define TARGET_SCHED_REORDER ia64_sched_reorder
424 #undef TARGET_SCHED_REORDER2
425 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
426
427 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
428 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
429
430 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
431 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
432
433 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
434 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
435 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
436 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
437
438 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
439 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
440 ia64_first_cycle_multipass_dfa_lookahead_guard
441
442 #undef TARGET_SCHED_DFA_NEW_CYCLE
443 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
444
445 #undef TARGET_SCHED_H_I_D_EXTENDED
446 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
447
448 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
449 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
450
451 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
452 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
453
454 #undef TARGET_SCHED_SET_SCHED_CONTEXT
455 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
456
457 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
458 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
459
460 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
461 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
462
463 #undef TARGET_SCHED_SET_SCHED_FLAGS
464 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
465
466 #undef TARGET_SCHED_GET_INSN_SPEC_DS
467 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
468
469 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
470 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
471
472 #undef TARGET_SCHED_SPECULATE_INSN
473 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
474
475 #undef TARGET_SCHED_NEEDS_BLOCK_P
476 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
477
478 #undef TARGET_SCHED_GEN_SPEC_CHECK
479 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
480
481 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
482 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
483 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
484
485 #undef TARGET_SCHED_SKIP_RTX_P
486 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
487
488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
489 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
490 #undef TARGET_ARG_PARTIAL_BYTES
491 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
492 #undef TARGET_FUNCTION_ARG
493 #define TARGET_FUNCTION_ARG ia64_function_arg
494 #undef TARGET_FUNCTION_INCOMING_ARG
495 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
496 #undef TARGET_FUNCTION_ARG_ADVANCE
497 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
498 #undef TARGET_FUNCTION_ARG_BOUNDARY
499 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
500
501 #undef TARGET_ASM_OUTPUT_MI_THUNK
502 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
503 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
504 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
505
506 #undef TARGET_ASM_FILE_START
507 #define TARGET_ASM_FILE_START ia64_file_start
508
509 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
510 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
511
512 #undef TARGET_REGISTER_MOVE_COST
513 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
514 #undef TARGET_MEMORY_MOVE_COST
515 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
516 #undef TARGET_RTX_COSTS
517 #define TARGET_RTX_COSTS ia64_rtx_costs
518 #undef TARGET_ADDRESS_COST
519 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
520
521 #undef TARGET_UNSPEC_MAY_TRAP_P
522 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
523
524 #undef TARGET_MACHINE_DEPENDENT_REORG
525 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
526
527 #undef TARGET_ENCODE_SECTION_INFO
528 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
529
530 #undef TARGET_SECTION_TYPE_FLAGS
531 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
532
533 #ifdef HAVE_AS_TLS
534 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
535 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
536 #endif
537
538 #undef TARGET_PROMOTE_FUNCTION_MODE
539 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
540
541 /* ??? Investigate. */
542 #if 0
543 #undef TARGET_PROMOTE_PROTOTYPES
544 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
545 #endif
546
547 #undef TARGET_FUNCTION_VALUE
548 #define TARGET_FUNCTION_VALUE ia64_function_value
549 #undef TARGET_LIBCALL_VALUE
550 #define TARGET_LIBCALL_VALUE ia64_libcall_value
551 #undef TARGET_FUNCTION_VALUE_REGNO_P
552 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
553
554 #undef TARGET_STRUCT_VALUE_RTX
555 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
556 #undef TARGET_RETURN_IN_MEMORY
557 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
558 #undef TARGET_SETUP_INCOMING_VARARGS
559 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
560 #undef TARGET_STRICT_ARGUMENT_NAMING
561 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
562 #undef TARGET_MUST_PASS_IN_STACK
563 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
564 #undef TARGET_GET_RAW_RESULT_MODE
565 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
566 #undef TARGET_GET_RAW_ARG_MODE
567 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
568
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
571
572 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
573 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ia64_dwarf_handle_frame_unspec
574 #undef TARGET_ASM_UNWIND_EMIT
575 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
576 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
577 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
578 #undef TARGET_ASM_INIT_SECTIONS
579 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
580
581 #undef TARGET_DEBUG_UNWIND_INFO
582 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
583 #undef TARGET_EXCEPT_UNWIND_INFO
584 #define TARGET_EXCEPT_UNWIND_INFO ia64_except_unwind_info
585
586 #undef TARGET_SCALAR_MODE_SUPPORTED_P
587 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
588 #undef TARGET_VECTOR_MODE_SUPPORTED_P
589 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
590
591 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
592 in an order different from the specified program order. */
593 #undef TARGET_RELAXED_ORDERING
594 #define TARGET_RELAXED_ORDERING true
595
596 #undef TARGET_DEFAULT_TARGET_FLAGS
597 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
598 #undef TARGET_HANDLE_OPTION
599 #define TARGET_HANDLE_OPTION ia64_handle_option
600
601 #undef TARGET_LEGITIMATE_CONSTANT_P
602 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
603
604 #undef TARGET_CANNOT_FORCE_CONST_MEM
605 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
606
607 #undef TARGET_MANGLE_TYPE
608 #define TARGET_MANGLE_TYPE ia64_mangle_type
609
610 #undef TARGET_INVALID_CONVERSION
611 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
612 #undef TARGET_INVALID_UNARY_OP
613 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
614 #undef TARGET_INVALID_BINARY_OP
615 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
616
617 #undef TARGET_C_MODE_FOR_SUFFIX
618 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
619
620 #undef TARGET_CAN_ELIMINATE
621 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
622
623 #undef TARGET_TRAMPOLINE_INIT
624 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
625
626 #undef TARGET_INVALID_WITHIN_DOLOOP
627 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
628
629 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
630 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
631
632 #undef TARGET_PREFERRED_RELOAD_CLASS
633 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
634
635 #undef TARGET_DELAY_SCHED2
636 #define TARGET_DELAY_SCHED2 true
637
638 /* Variable tracking should be run after all optimizations which
639 change order of insns. It also needs a valid CFG. */
640 #undef TARGET_DELAY_VARTRACK
641 #define TARGET_DELAY_VARTRACK true
642
643 struct gcc_target targetm = TARGET_INITIALIZER;
644 \f
645 typedef enum
646 {
647 ADDR_AREA_NORMAL, /* normal address area */
648 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
649 }
650 ia64_addr_area;
651
652 static GTY(()) tree small_ident1;
653 static GTY(()) tree small_ident2;
654
655 static void
656 init_idents (void)
657 {
658 if (small_ident1 == 0)
659 {
660 small_ident1 = get_identifier ("small");
661 small_ident2 = get_identifier ("__small__");
662 }
663 }
664
665 /* Retrieve the address area that has been chosen for the given decl. */
666
667 static ia64_addr_area
668 ia64_get_addr_area (tree decl)
669 {
670 tree model_attr;
671
672 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
673 if (model_attr)
674 {
675 tree id;
676
677 init_idents ();
678 id = TREE_VALUE (TREE_VALUE (model_attr));
679 if (id == small_ident1 || id == small_ident2)
680 return ADDR_AREA_SMALL;
681 }
682 return ADDR_AREA_NORMAL;
683 }
684
685 static tree
686 ia64_handle_model_attribute (tree *node, tree name, tree args,
687 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
688 {
689 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
690 ia64_addr_area area;
691 tree arg, decl = *node;
692
693 init_idents ();
694 arg = TREE_VALUE (args);
695 if (arg == small_ident1 || arg == small_ident2)
696 {
697 addr_area = ADDR_AREA_SMALL;
698 }
699 else
700 {
701 warning (OPT_Wattributes, "invalid argument of %qE attribute",
702 name);
703 *no_add_attrs = true;
704 }
705
706 switch (TREE_CODE (decl))
707 {
708 case VAR_DECL:
709 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
710 == FUNCTION_DECL)
711 && !TREE_STATIC (decl))
712 {
713 error_at (DECL_SOURCE_LOCATION (decl),
714 "an address area attribute cannot be specified for "
715 "local variables");
716 *no_add_attrs = true;
717 }
718 area = ia64_get_addr_area (decl);
719 if (area != ADDR_AREA_NORMAL && addr_area != area)
720 {
721 error ("address area of %q+D conflicts with previous "
722 "declaration", decl);
723 *no_add_attrs = true;
724 }
725 break;
726
727 case FUNCTION_DECL:
728 error_at (DECL_SOURCE_LOCATION (decl),
729 "address area attribute cannot be specified for "
730 "functions");
731 *no_add_attrs = true;
732 break;
733
734 default:
735 warning (OPT_Wattributes, "%qE attribute ignored",
736 name);
737 *no_add_attrs = true;
738 break;
739 }
740
741 return NULL_TREE;
742 }
743
744 /* The section must have global and overlaid attributes. */
745 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
746
747 /* Part of the low level implementation of DEC Ada pragma Common_Object which
748 enables the shared use of variables stored in overlaid linker areas
749 corresponding to the use of Fortran COMMON. */
750
751 static tree
752 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
753 int flags ATTRIBUTE_UNUSED,
754 bool *no_add_attrs)
755 {
756 tree decl = *node;
757 tree id, val;
758 if (! DECL_P (decl))
759 abort ();
760
761 DECL_COMMON (decl) = 1;
762 id = TREE_VALUE (args);
763 if (TREE_CODE (id) == IDENTIFIER_NODE)
764 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
765 else if (TREE_CODE (id) == STRING_CST)
766 val = id;
767 else
768 {
769 warning (OPT_Wattributes,
770 "%qE attribute requires a string constant argument", name);
771 *no_add_attrs = true;
772 return NULL_TREE;
773 }
774 DECL_SECTION_NAME (decl) = val;
775 return NULL_TREE;
776 }
777
778 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
779
780 void
781 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
782 unsigned HOST_WIDE_INT size,
783 unsigned int align)
784 {
785 tree attr = DECL_ATTRIBUTES (decl);
786
787 /* As common_object attribute set DECL_SECTION_NAME check it before
788 looking up the attribute. */
789 if (DECL_SECTION_NAME (decl) && attr)
790 attr = lookup_attribute ("common_object", attr);
791 else
792 attr = NULL_TREE;
793
794 if (!attr)
795 {
796 /* Code from elfos.h. */
797 fprintf (file, "%s", COMMON_ASM_OP);
798 assemble_name (file, name);
799 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
800 size, align / BITS_PER_UNIT);
801 }
802 else
803 {
804 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
805 ASM_OUTPUT_LABEL (file, name);
806 ASM_OUTPUT_SKIP (file, size ? size : 1);
807 }
808 }
809
810 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
811
812 void
813 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
814 tree decl)
815 {
816 if (!(flags & SECTION_VMS_OVERLAY))
817 {
818 default_elf_asm_named_section (name, flags, decl);
819 return;
820 }
821 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
822 abort ();
823
824 if (flags & SECTION_DECLARED)
825 {
826 fprintf (asm_out_file, "\t.section\t%s\n", name);
827 return;
828 }
829
830 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
831 }
832
833 static void
834 ia64_encode_addr_area (tree decl, rtx symbol)
835 {
836 int flags;
837
838 flags = SYMBOL_REF_FLAGS (symbol);
839 switch (ia64_get_addr_area (decl))
840 {
841 case ADDR_AREA_NORMAL: break;
842 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
843 default: gcc_unreachable ();
844 }
845 SYMBOL_REF_FLAGS (symbol) = flags;
846 }
847
848 static void
849 ia64_encode_section_info (tree decl, rtx rtl, int first)
850 {
851 default_encode_section_info (decl, rtl, first);
852
853 /* Careful not to prod global register variables. */
854 if (TREE_CODE (decl) == VAR_DECL
855 && GET_CODE (DECL_RTL (decl)) == MEM
856 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
857 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
858 ia64_encode_addr_area (decl, XEXP (rtl, 0));
859 }
860 \f
861 /* Return 1 if the operands of a move are ok. */
862
863 int
864 ia64_move_ok (rtx dst, rtx src)
865 {
866 /* If we're under init_recog_no_volatile, we'll not be able to use
867 memory_operand. So check the code directly and don't worry about
868 the validity of the underlying address, which should have been
869 checked elsewhere anyway. */
870 if (GET_CODE (dst) != MEM)
871 return 1;
872 if (GET_CODE (src) == MEM)
873 return 0;
874 if (register_operand (src, VOIDmode))
875 return 1;
876
877 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
878 if (INTEGRAL_MODE_P (GET_MODE (dst)))
879 return src == const0_rtx;
880 else
881 return satisfies_constraint_G (src);
882 }
883
884 /* Return 1 if the operands are ok for a floating point load pair. */
885
886 int
887 ia64_load_pair_ok (rtx dst, rtx src)
888 {
889 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
890 return 0;
891 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
892 return 0;
893 switch (GET_CODE (XEXP (src, 0)))
894 {
895 case REG:
896 case POST_INC:
897 break;
898 case POST_DEC:
899 return 0;
900 case POST_MODIFY:
901 {
902 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
903
904 if (GET_CODE (adjust) != CONST_INT
905 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
906 return 0;
907 }
908 break;
909 default:
910 abort ();
911 }
912 return 1;
913 }
914
915 int
916 addp4_optimize_ok (rtx op1, rtx op2)
917 {
918 return (basereg_operand (op1, GET_MODE(op1)) !=
919 basereg_operand (op2, GET_MODE(op2)));
920 }
921
922 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
923 Return the length of the field, or <= 0 on failure. */
924
925 int
926 ia64_depz_field_mask (rtx rop, rtx rshift)
927 {
928 unsigned HOST_WIDE_INT op = INTVAL (rop);
929 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
930
931 /* Get rid of the zero bits we're shifting in. */
932 op >>= shift;
933
934 /* We must now have a solid block of 1's at bit 0. */
935 return exact_log2 (op + 1);
936 }
937
938 /* Return the TLS model to use for ADDR. */
939
940 static enum tls_model
941 tls_symbolic_operand_type (rtx addr)
942 {
943 enum tls_model tls_kind = TLS_MODEL_NONE;
944
945 if (GET_CODE (addr) == CONST)
946 {
947 if (GET_CODE (XEXP (addr, 0)) == PLUS
948 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
949 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
950 }
951 else if (GET_CODE (addr) == SYMBOL_REF)
952 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
953
954 return tls_kind;
955 }
956
957 /* Return true if X is a constant that is valid for some immediate
958 field in an instruction. */
959
960 static bool
961 ia64_legitimate_constant_p (enum machine_mode mode, rtx x)
962 {
963 switch (GET_CODE (x))
964 {
965 case CONST_INT:
966 case LABEL_REF:
967 return true;
968
969 case CONST_DOUBLE:
970 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
971 return true;
972 return satisfies_constraint_G (x);
973
974 case CONST:
975 case SYMBOL_REF:
976 /* ??? Short term workaround for PR 28490. We must make the code here
977 match the code in ia64_expand_move and move_operand, even though they
978 are both technically wrong. */
979 if (tls_symbolic_operand_type (x) == 0)
980 {
981 HOST_WIDE_INT addend = 0;
982 rtx op = x;
983
984 if (GET_CODE (op) == CONST
985 && GET_CODE (XEXP (op, 0)) == PLUS
986 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
987 {
988 addend = INTVAL (XEXP (XEXP (op, 0), 1));
989 op = XEXP (XEXP (op, 0), 0);
990 }
991
992 if (any_offset_symbol_operand (op, mode)
993 || function_operand (op, mode))
994 return true;
995 if (aligned_offset_symbol_operand (op, mode))
996 return (addend & 0x3fff) == 0;
997 return false;
998 }
999 return false;
1000
1001 case CONST_VECTOR:
1002 if (mode == V2SFmode)
1003 return satisfies_constraint_Y (x);
1004
1005 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1006 && GET_MODE_SIZE (mode) <= 8);
1007
1008 default:
1009 return false;
1010 }
1011 }
1012
1013 /* Don't allow TLS addresses to get spilled to memory. */
1014
1015 static bool
1016 ia64_cannot_force_const_mem (enum machine_mode mode, rtx x)
1017 {
1018 if (mode == RFmode)
1019 return true;
1020 return tls_symbolic_operand_type (x) != 0;
1021 }
1022
1023 /* Expand a symbolic constant load. */
1024
1025 bool
1026 ia64_expand_load_address (rtx dest, rtx src)
1027 {
1028 gcc_assert (GET_CODE (dest) == REG);
1029
1030 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1031 having to pointer-extend the value afterward. Other forms of address
1032 computation below are also more natural to compute as 64-bit quantities.
1033 If we've been given an SImode destination register, change it. */
1034 if (GET_MODE (dest) != Pmode)
1035 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1036 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1037
1038 if (TARGET_NO_PIC)
1039 return false;
1040 if (small_addr_symbolic_operand (src, VOIDmode))
1041 return false;
1042
1043 if (TARGET_AUTO_PIC)
1044 emit_insn (gen_load_gprel64 (dest, src));
1045 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1046 emit_insn (gen_load_fptr (dest, src));
1047 else if (sdata_symbolic_operand (src, VOIDmode))
1048 emit_insn (gen_load_gprel (dest, src));
1049 else
1050 {
1051 HOST_WIDE_INT addend = 0;
1052 rtx tmp;
1053
1054 /* We did split constant offsets in ia64_expand_move, and we did try
1055 to keep them split in move_operand, but we also allowed reload to
1056 rematerialize arbitrary constants rather than spill the value to
1057 the stack and reload it. So we have to be prepared here to split
1058 them apart again. */
1059 if (GET_CODE (src) == CONST)
1060 {
1061 HOST_WIDE_INT hi, lo;
1062
1063 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1064 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1065 hi = hi - lo;
1066
1067 if (lo != 0)
1068 {
1069 addend = lo;
1070 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1071 }
1072 }
1073
1074 tmp = gen_rtx_HIGH (Pmode, src);
1075 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1076 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1077
1078 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
1079 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1080
1081 if (addend)
1082 {
1083 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1084 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1085 }
1086 }
1087
1088 return true;
1089 }
1090
1091 static GTY(()) rtx gen_tls_tga;
1092 static rtx
1093 gen_tls_get_addr (void)
1094 {
1095 if (!gen_tls_tga)
1096 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1097 return gen_tls_tga;
1098 }
1099
1100 static GTY(()) rtx thread_pointer_rtx;
1101 static rtx
1102 gen_thread_pointer (void)
1103 {
1104 if (!thread_pointer_rtx)
1105 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1106 return thread_pointer_rtx;
1107 }
1108
1109 static rtx
1110 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1111 rtx orig_op1, HOST_WIDE_INT addend)
1112 {
1113 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1114 rtx orig_op0 = op0;
1115 HOST_WIDE_INT addend_lo, addend_hi;
1116
1117 switch (tls_kind)
1118 {
1119 case TLS_MODEL_GLOBAL_DYNAMIC:
1120 start_sequence ();
1121
1122 tga_op1 = gen_reg_rtx (Pmode);
1123 emit_insn (gen_load_dtpmod (tga_op1, op1));
1124
1125 tga_op2 = gen_reg_rtx (Pmode);
1126 emit_insn (gen_load_dtprel (tga_op2, op1));
1127
1128 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1129 LCT_CONST, Pmode, 2, tga_op1,
1130 Pmode, tga_op2, Pmode);
1131
1132 insns = get_insns ();
1133 end_sequence ();
1134
1135 if (GET_MODE (op0) != Pmode)
1136 op0 = tga_ret;
1137 emit_libcall_block (insns, op0, tga_ret, op1);
1138 break;
1139
1140 case TLS_MODEL_LOCAL_DYNAMIC:
1141 /* ??? This isn't the completely proper way to do local-dynamic
1142 If the call to __tls_get_addr is used only by a single symbol,
1143 then we should (somehow) move the dtprel to the second arg
1144 to avoid the extra add. */
1145 start_sequence ();
1146
1147 tga_op1 = gen_reg_rtx (Pmode);
1148 emit_insn (gen_load_dtpmod (tga_op1, op1));
1149
1150 tga_op2 = const0_rtx;
1151
1152 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1153 LCT_CONST, Pmode, 2, tga_op1,
1154 Pmode, tga_op2, Pmode);
1155
1156 insns = get_insns ();
1157 end_sequence ();
1158
1159 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1160 UNSPEC_LD_BASE);
1161 tmp = gen_reg_rtx (Pmode);
1162 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1163
1164 if (!register_operand (op0, Pmode))
1165 op0 = gen_reg_rtx (Pmode);
1166 if (TARGET_TLS64)
1167 {
1168 emit_insn (gen_load_dtprel (op0, op1));
1169 emit_insn (gen_adddi3 (op0, tmp, op0));
1170 }
1171 else
1172 emit_insn (gen_add_dtprel (op0, op1, tmp));
1173 break;
1174
1175 case TLS_MODEL_INITIAL_EXEC:
1176 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1177 addend_hi = addend - addend_lo;
1178
1179 op1 = plus_constant (op1, addend_hi);
1180 addend = addend_lo;
1181
1182 tmp = gen_reg_rtx (Pmode);
1183 emit_insn (gen_load_tprel (tmp, op1));
1184
1185 if (!register_operand (op0, Pmode))
1186 op0 = gen_reg_rtx (Pmode);
1187 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1188 break;
1189
1190 case TLS_MODEL_LOCAL_EXEC:
1191 if (!register_operand (op0, Pmode))
1192 op0 = gen_reg_rtx (Pmode);
1193
1194 op1 = orig_op1;
1195 addend = 0;
1196 if (TARGET_TLS64)
1197 {
1198 emit_insn (gen_load_tprel (op0, op1));
1199 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1200 }
1201 else
1202 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1203 break;
1204
1205 default:
1206 gcc_unreachable ();
1207 }
1208
1209 if (addend)
1210 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1211 orig_op0, 1, OPTAB_DIRECT);
1212 if (orig_op0 == op0)
1213 return NULL_RTX;
1214 if (GET_MODE (orig_op0) == Pmode)
1215 return op0;
1216 return gen_lowpart (GET_MODE (orig_op0), op0);
1217 }
1218
1219 rtx
1220 ia64_expand_move (rtx op0, rtx op1)
1221 {
1222 enum machine_mode mode = GET_MODE (op0);
1223
1224 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1225 op1 = force_reg (mode, op1);
1226
1227 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1228 {
1229 HOST_WIDE_INT addend = 0;
1230 enum tls_model tls_kind;
1231 rtx sym = op1;
1232
1233 if (GET_CODE (op1) == CONST
1234 && GET_CODE (XEXP (op1, 0)) == PLUS
1235 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1236 {
1237 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1238 sym = XEXP (XEXP (op1, 0), 0);
1239 }
1240
1241 tls_kind = tls_symbolic_operand_type (sym);
1242 if (tls_kind)
1243 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1244
1245 if (any_offset_symbol_operand (sym, mode))
1246 addend = 0;
1247 else if (aligned_offset_symbol_operand (sym, mode))
1248 {
1249 HOST_WIDE_INT addend_lo, addend_hi;
1250
1251 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1252 addend_hi = addend - addend_lo;
1253
1254 if (addend_lo != 0)
1255 {
1256 op1 = plus_constant (sym, addend_hi);
1257 addend = addend_lo;
1258 }
1259 else
1260 addend = 0;
1261 }
1262 else
1263 op1 = sym;
1264
1265 if (reload_completed)
1266 {
1267 /* We really should have taken care of this offset earlier. */
1268 gcc_assert (addend == 0);
1269 if (ia64_expand_load_address (op0, op1))
1270 return NULL_RTX;
1271 }
1272
1273 if (addend)
1274 {
1275 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1276
1277 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1278
1279 op1 = expand_simple_binop (mode, PLUS, subtarget,
1280 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1281 if (op0 == op1)
1282 return NULL_RTX;
1283 }
1284 }
1285
1286 return op1;
1287 }
1288
1289 /* Split a move from OP1 to OP0 conditional on COND. */
1290
1291 void
1292 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1293 {
1294 rtx insn, first = get_last_insn ();
1295
1296 emit_move_insn (op0, op1);
1297
1298 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1299 if (INSN_P (insn))
1300 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1301 PATTERN (insn));
1302 }
1303
1304 /* Split a post-reload TImode or TFmode reference into two DImode
1305 components. This is made extra difficult by the fact that we do
1306 not get any scratch registers to work with, because reload cannot
1307 be prevented from giving us a scratch that overlaps the register
1308 pair involved. So instead, when addressing memory, we tweak the
1309 pointer register up and back down with POST_INCs. Or up and not
1310 back down when we can get away with it.
1311
1312 REVERSED is true when the loads must be done in reversed order
1313 (high word first) for correctness. DEAD is true when the pointer
1314 dies with the second insn we generate and therefore the second
1315 address must not carry a postmodify.
1316
1317 May return an insn which is to be emitted after the moves. */
1318
1319 static rtx
1320 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1321 {
1322 rtx fixup = 0;
1323
1324 switch (GET_CODE (in))
1325 {
1326 case REG:
1327 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1328 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1329 break;
1330
1331 case CONST_INT:
1332 case CONST_DOUBLE:
1333 /* Cannot occur reversed. */
1334 gcc_assert (!reversed);
1335
1336 if (GET_MODE (in) != TFmode)
1337 split_double (in, &out[0], &out[1]);
1338 else
1339 /* split_double does not understand how to split a TFmode
1340 quantity into a pair of DImode constants. */
1341 {
1342 REAL_VALUE_TYPE r;
1343 unsigned HOST_WIDE_INT p[2];
1344 long l[4]; /* TFmode is 128 bits */
1345
1346 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1347 real_to_target (l, &r, TFmode);
1348
1349 if (FLOAT_WORDS_BIG_ENDIAN)
1350 {
1351 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1352 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1353 }
1354 else
1355 {
1356 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1357 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1358 }
1359 out[0] = GEN_INT (p[0]);
1360 out[1] = GEN_INT (p[1]);
1361 }
1362 break;
1363
1364 case MEM:
1365 {
1366 rtx base = XEXP (in, 0);
1367 rtx offset;
1368
1369 switch (GET_CODE (base))
1370 {
1371 case REG:
1372 if (!reversed)
1373 {
1374 out[0] = adjust_automodify_address
1375 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1376 out[1] = adjust_automodify_address
1377 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1378 }
1379 else
1380 {
1381 /* Reversal requires a pre-increment, which can only
1382 be done as a separate insn. */
1383 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1384 out[0] = adjust_automodify_address
1385 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1386 out[1] = adjust_address (in, DImode, 0);
1387 }
1388 break;
1389
1390 case POST_INC:
1391 gcc_assert (!reversed && !dead);
1392
1393 /* Just do the increment in two steps. */
1394 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1395 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1396 break;
1397
1398 case POST_DEC:
1399 gcc_assert (!reversed && !dead);
1400
1401 /* Add 8, subtract 24. */
1402 base = XEXP (base, 0);
1403 out[0] = adjust_automodify_address
1404 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1405 out[1] = adjust_automodify_address
1406 (in, DImode,
1407 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1408 8);
1409 break;
1410
1411 case POST_MODIFY:
1412 gcc_assert (!reversed && !dead);
1413
1414 /* Extract and adjust the modification. This case is
1415 trickier than the others, because we might have an
1416 index register, or we might have a combined offset that
1417 doesn't fit a signed 9-bit displacement field. We can
1418 assume the incoming expression is already legitimate. */
1419 offset = XEXP (base, 1);
1420 base = XEXP (base, 0);
1421
1422 out[0] = adjust_automodify_address
1423 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1424
1425 if (GET_CODE (XEXP (offset, 1)) == REG)
1426 {
1427 /* Can't adjust the postmodify to match. Emit the
1428 original, then a separate addition insn. */
1429 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1430 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1431 }
1432 else
1433 {
1434 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1435 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1436 {
1437 /* Again the postmodify cannot be made to match,
1438 but in this case it's more efficient to get rid
1439 of the postmodify entirely and fix up with an
1440 add insn. */
1441 out[1] = adjust_automodify_address (in, DImode, base, 8);
1442 fixup = gen_adddi3
1443 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1444 }
1445 else
1446 {
1447 /* Combined offset still fits in the displacement field.
1448 (We cannot overflow it at the high end.) */
1449 out[1] = adjust_automodify_address
1450 (in, DImode, gen_rtx_POST_MODIFY
1451 (Pmode, base, gen_rtx_PLUS
1452 (Pmode, base,
1453 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1454 8);
1455 }
1456 }
1457 break;
1458
1459 default:
1460 gcc_unreachable ();
1461 }
1462 break;
1463 }
1464
1465 default:
1466 gcc_unreachable ();
1467 }
1468
1469 return fixup;
1470 }
1471
1472 /* Split a TImode or TFmode move instruction after reload.
1473 This is used by *movtf_internal and *movti_internal. */
1474 void
1475 ia64_split_tmode_move (rtx operands[])
1476 {
1477 rtx in[2], out[2], insn;
1478 rtx fixup[2];
1479 bool dead = false;
1480 bool reversed = false;
1481
1482 /* It is possible for reload to decide to overwrite a pointer with
1483 the value it points to. In that case we have to do the loads in
1484 the appropriate order so that the pointer is not destroyed too
1485 early. Also we must not generate a postmodify for that second
1486 load, or rws_access_regno will die. */
1487 if (GET_CODE (operands[1]) == MEM
1488 && reg_overlap_mentioned_p (operands[0], operands[1]))
1489 {
1490 rtx base = XEXP (operands[1], 0);
1491 while (GET_CODE (base) != REG)
1492 base = XEXP (base, 0);
1493
1494 if (REGNO (base) == REGNO (operands[0]))
1495 reversed = true;
1496 dead = true;
1497 }
1498 /* Another reason to do the moves in reversed order is if the first
1499 element of the target register pair is also the second element of
1500 the source register pair. */
1501 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1502 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1503 reversed = true;
1504
1505 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1506 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1507
1508 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1509 if (GET_CODE (EXP) == MEM \
1510 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1511 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1512 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1513 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1514
1515 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1516 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1517 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1518
1519 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1520 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1521 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1522
1523 if (fixup[0])
1524 emit_insn (fixup[0]);
1525 if (fixup[1])
1526 emit_insn (fixup[1]);
1527
1528 #undef MAYBE_ADD_REG_INC_NOTE
1529 }
1530
1531 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1532 through memory plus an extra GR scratch register. Except that you can
1533 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1534 SECONDARY_RELOAD_CLASS, but not both.
1535
1536 We got into problems in the first place by allowing a construct like
1537 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1538 This solution attempts to prevent this situation from occurring. When
1539 we see something like the above, we spill the inner register to memory. */
1540
1541 static rtx
1542 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1543 {
1544 if (GET_CODE (in) == SUBREG
1545 && GET_MODE (SUBREG_REG (in)) == TImode
1546 && GET_CODE (SUBREG_REG (in)) == REG)
1547 {
1548 rtx memt = assign_stack_temp (TImode, 16, 0);
1549 emit_move_insn (memt, SUBREG_REG (in));
1550 return adjust_address (memt, mode, 0);
1551 }
1552 else if (force && GET_CODE (in) == REG)
1553 {
1554 rtx memx = assign_stack_temp (mode, 16, 0);
1555 emit_move_insn (memx, in);
1556 return memx;
1557 }
1558 else
1559 return in;
1560 }
1561
1562 /* Expand the movxf or movrf pattern (MODE says which) with the given
1563 OPERANDS, returning true if the pattern should then invoke
1564 DONE. */
1565
1566 bool
1567 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1568 {
1569 rtx op0 = operands[0];
1570
1571 if (GET_CODE (op0) == SUBREG)
1572 op0 = SUBREG_REG (op0);
1573
1574 /* We must support XFmode loads into general registers for stdarg/vararg,
1575 unprototyped calls, and a rare case where a long double is passed as
1576 an argument after a float HFA fills the FP registers. We split them into
1577 DImode loads for convenience. We also need to support XFmode stores
1578 for the last case. This case does not happen for stdarg/vararg routines,
1579 because we do a block store to memory of unnamed arguments. */
1580
1581 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1582 {
1583 rtx out[2];
1584
1585 /* We're hoping to transform everything that deals with XFmode
1586 quantities and GR registers early in the compiler. */
1587 gcc_assert (can_create_pseudo_p ());
1588
1589 /* Struct to register can just use TImode instead. */
1590 if ((GET_CODE (operands[1]) == SUBREG
1591 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1592 || (GET_CODE (operands[1]) == REG
1593 && GR_REGNO_P (REGNO (operands[1]))))
1594 {
1595 rtx op1 = operands[1];
1596
1597 if (GET_CODE (op1) == SUBREG)
1598 op1 = SUBREG_REG (op1);
1599 else
1600 op1 = gen_rtx_REG (TImode, REGNO (op1));
1601
1602 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1603 return true;
1604 }
1605
1606 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1607 {
1608 /* Don't word-swap when reading in the constant. */
1609 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1610 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1611 0, mode));
1612 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1613 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1614 0, mode));
1615 return true;
1616 }
1617
1618 /* If the quantity is in a register not known to be GR, spill it. */
1619 if (register_operand (operands[1], mode))
1620 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1621
1622 gcc_assert (GET_CODE (operands[1]) == MEM);
1623
1624 /* Don't word-swap when reading in the value. */
1625 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1626 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1627
1628 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1629 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1630 return true;
1631 }
1632
1633 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1634 {
1635 /* We're hoping to transform everything that deals with XFmode
1636 quantities and GR registers early in the compiler. */
1637 gcc_assert (can_create_pseudo_p ());
1638
1639 /* Op0 can't be a GR_REG here, as that case is handled above.
1640 If op0 is a register, then we spill op1, so that we now have a
1641 MEM operand. This requires creating an XFmode subreg of a TImode reg
1642 to force the spill. */
1643 if (register_operand (operands[0], mode))
1644 {
1645 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1646 op1 = gen_rtx_SUBREG (mode, op1, 0);
1647 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1648 }
1649
1650 else
1651 {
1652 rtx in[2];
1653
1654 gcc_assert (GET_CODE (operands[0]) == MEM);
1655
1656 /* Don't word-swap when writing out the value. */
1657 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1658 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1659
1660 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1661 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1662 return true;
1663 }
1664 }
1665
1666 if (!reload_in_progress && !reload_completed)
1667 {
1668 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1669
1670 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1671 {
1672 rtx memt, memx, in = operands[1];
1673 if (CONSTANT_P (in))
1674 in = validize_mem (force_const_mem (mode, in));
1675 if (GET_CODE (in) == MEM)
1676 memt = adjust_address (in, TImode, 0);
1677 else
1678 {
1679 memt = assign_stack_temp (TImode, 16, 0);
1680 memx = adjust_address (memt, mode, 0);
1681 emit_move_insn (memx, in);
1682 }
1683 emit_move_insn (op0, memt);
1684 return true;
1685 }
1686
1687 if (!ia64_move_ok (operands[0], operands[1]))
1688 operands[1] = force_reg (mode, operands[1]);
1689 }
1690
1691 return false;
1692 }
1693
1694 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1695 with the expression that holds the compare result (in VOIDmode). */
1696
1697 static GTY(()) rtx cmptf_libfunc;
1698
1699 void
1700 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1701 {
1702 enum rtx_code code = GET_CODE (*expr);
1703 rtx cmp;
1704
1705 /* If we have a BImode input, then we already have a compare result, and
1706 do not need to emit another comparison. */
1707 if (GET_MODE (*op0) == BImode)
1708 {
1709 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1710 cmp = *op0;
1711 }
1712 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1713 magic number as its third argument, that indicates what to do.
1714 The return value is an integer to be compared against zero. */
1715 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1716 {
1717 enum qfcmp_magic {
1718 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1719 QCMP_UNORD = 2,
1720 QCMP_EQ = 4,
1721 QCMP_LT = 8,
1722 QCMP_GT = 16
1723 };
1724 int magic;
1725 enum rtx_code ncode;
1726 rtx ret, insns;
1727
1728 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1729 switch (code)
1730 {
1731 /* 1 = equal, 0 = not equal. Equality operators do
1732 not raise FP_INVALID when given an SNaN operand. */
1733 case EQ: magic = QCMP_EQ; ncode = NE; break;
1734 case NE: magic = QCMP_EQ; ncode = EQ; break;
1735 /* isunordered() from C99. */
1736 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1737 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1738 /* Relational operators raise FP_INVALID when given
1739 an SNaN operand. */
1740 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1741 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1742 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1743 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1744 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1745 Expanders for buneq etc. weuld have to be added to ia64.md
1746 for this to be useful. */
1747 default: gcc_unreachable ();
1748 }
1749
1750 start_sequence ();
1751
1752 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1753 *op0, TFmode, *op1, TFmode,
1754 GEN_INT (magic), DImode);
1755 cmp = gen_reg_rtx (BImode);
1756 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1757 gen_rtx_fmt_ee (ncode, BImode,
1758 ret, const0_rtx)));
1759
1760 insns = get_insns ();
1761 end_sequence ();
1762
1763 emit_libcall_block (insns, cmp, cmp,
1764 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1765 code = NE;
1766 }
1767 else
1768 {
1769 cmp = gen_reg_rtx (BImode);
1770 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1771 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1772 code = NE;
1773 }
1774
1775 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1776 *op0 = cmp;
1777 *op1 = const0_rtx;
1778 }
1779
1780 /* Generate an integral vector comparison. Return true if the condition has
1781 been reversed, and so the sense of the comparison should be inverted. */
1782
1783 static bool
1784 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1785 rtx dest, rtx op0, rtx op1)
1786 {
1787 bool negate = false;
1788 rtx x;
1789
1790 /* Canonicalize the comparison to EQ, GT, GTU. */
1791 switch (code)
1792 {
1793 case EQ:
1794 case GT:
1795 case GTU:
1796 break;
1797
1798 case NE:
1799 case LE:
1800 case LEU:
1801 code = reverse_condition (code);
1802 negate = true;
1803 break;
1804
1805 case GE:
1806 case GEU:
1807 code = reverse_condition (code);
1808 negate = true;
1809 /* FALLTHRU */
1810
1811 case LT:
1812 case LTU:
1813 code = swap_condition (code);
1814 x = op0, op0 = op1, op1 = x;
1815 break;
1816
1817 default:
1818 gcc_unreachable ();
1819 }
1820
1821 /* Unsigned parallel compare is not supported by the hardware. Play some
1822 tricks to turn this into a signed comparison against 0. */
1823 if (code == GTU)
1824 {
1825 switch (mode)
1826 {
1827 case V2SImode:
1828 {
1829 rtx t1, t2, mask;
1830
1831 /* Subtract (-(INT MAX) - 1) from both operands to make
1832 them signed. */
1833 mask = GEN_INT (0x80000000);
1834 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1835 mask = force_reg (mode, mask);
1836 t1 = gen_reg_rtx (mode);
1837 emit_insn (gen_subv2si3 (t1, op0, mask));
1838 t2 = gen_reg_rtx (mode);
1839 emit_insn (gen_subv2si3 (t2, op1, mask));
1840 op0 = t1;
1841 op1 = t2;
1842 code = GT;
1843 }
1844 break;
1845
1846 case V8QImode:
1847 case V4HImode:
1848 /* Perform a parallel unsigned saturating subtraction. */
1849 x = gen_reg_rtx (mode);
1850 emit_insn (gen_rtx_SET (VOIDmode, x,
1851 gen_rtx_US_MINUS (mode, op0, op1)));
1852
1853 code = EQ;
1854 op0 = x;
1855 op1 = CONST0_RTX (mode);
1856 negate = !negate;
1857 break;
1858
1859 default:
1860 gcc_unreachable ();
1861 }
1862 }
1863
1864 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1865 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1866
1867 return negate;
1868 }
1869
1870 /* Emit an integral vector conditional move. */
1871
1872 void
1873 ia64_expand_vecint_cmov (rtx operands[])
1874 {
1875 enum machine_mode mode = GET_MODE (operands[0]);
1876 enum rtx_code code = GET_CODE (operands[3]);
1877 bool negate;
1878 rtx cmp, x, ot, of;
1879
1880 cmp = gen_reg_rtx (mode);
1881 negate = ia64_expand_vecint_compare (code, mode, cmp,
1882 operands[4], operands[5]);
1883
1884 ot = operands[1+negate];
1885 of = operands[2-negate];
1886
1887 if (ot == CONST0_RTX (mode))
1888 {
1889 if (of == CONST0_RTX (mode))
1890 {
1891 emit_move_insn (operands[0], ot);
1892 return;
1893 }
1894
1895 x = gen_rtx_NOT (mode, cmp);
1896 x = gen_rtx_AND (mode, x, of);
1897 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1898 }
1899 else if (of == CONST0_RTX (mode))
1900 {
1901 x = gen_rtx_AND (mode, cmp, ot);
1902 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1903 }
1904 else
1905 {
1906 rtx t, f;
1907
1908 t = gen_reg_rtx (mode);
1909 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1910 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1911
1912 f = gen_reg_rtx (mode);
1913 x = gen_rtx_NOT (mode, cmp);
1914 x = gen_rtx_AND (mode, x, operands[2-negate]);
1915 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1916
1917 x = gen_rtx_IOR (mode, t, f);
1918 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1919 }
1920 }
1921
1922 /* Emit an integral vector min or max operation. Return true if all done. */
1923
1924 bool
1925 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1926 rtx operands[])
1927 {
1928 rtx xops[6];
1929
1930 /* These four combinations are supported directly. */
1931 if (mode == V8QImode && (code == UMIN || code == UMAX))
1932 return false;
1933 if (mode == V4HImode && (code == SMIN || code == SMAX))
1934 return false;
1935
1936 /* This combination can be implemented with only saturating subtraction. */
1937 if (mode == V4HImode && code == UMAX)
1938 {
1939 rtx x, tmp = gen_reg_rtx (mode);
1940
1941 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1942 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1943
1944 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1945 return true;
1946 }
1947
1948 /* Everything else implemented via vector comparisons. */
1949 xops[0] = operands[0];
1950 xops[4] = xops[1] = operands[1];
1951 xops[5] = xops[2] = operands[2];
1952
1953 switch (code)
1954 {
1955 case UMIN:
1956 code = LTU;
1957 break;
1958 case UMAX:
1959 code = GTU;
1960 break;
1961 case SMIN:
1962 code = LT;
1963 break;
1964 case SMAX:
1965 code = GT;
1966 break;
1967 default:
1968 gcc_unreachable ();
1969 }
1970 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1971
1972 ia64_expand_vecint_cmov (xops);
1973 return true;
1974 }
1975
1976 /* The vectors LO and HI each contain N halves of a double-wide vector.
1977 Reassemble either the first N/2 or the second N/2 elements. */
1978
1979 void
1980 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
1981 {
1982 enum machine_mode mode = GET_MODE (lo);
1983 rtx (*gen) (rtx, rtx, rtx);
1984 rtx x;
1985
1986 switch (mode)
1987 {
1988 case V8QImode:
1989 gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi;
1990 break;
1991 case V4HImode:
1992 gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
1993 break;
1994 default:
1995 gcc_unreachable ();
1996 }
1997
1998 x = gen_lowpart (mode, out);
1999 if (TARGET_BIG_ENDIAN)
2000 x = gen (x, hi, lo);
2001 else
2002 x = gen (x, lo, hi);
2003 emit_insn (x);
2004 }
2005
2006 /* Return a vector of the sign-extension of VEC. */
2007
2008 static rtx
2009 ia64_unpack_sign (rtx vec, bool unsignedp)
2010 {
2011 enum machine_mode mode = GET_MODE (vec);
2012 rtx zero = CONST0_RTX (mode);
2013
2014 if (unsignedp)
2015 return zero;
2016 else
2017 {
2018 rtx sign = gen_reg_rtx (mode);
2019 bool neg;
2020
2021 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2022 gcc_assert (!neg);
2023
2024 return sign;
2025 }
2026 }
2027
2028 /* Emit an integral vector unpack operation. */
2029
2030 void
2031 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2032 {
2033 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2034 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2035 }
2036
2037 /* Emit an integral vector widening sum operations. */
2038
2039 void
2040 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2041 {
2042 enum machine_mode wmode;
2043 rtx l, h, t, sign;
2044
2045 sign = ia64_unpack_sign (operands[1], unsignedp);
2046
2047 wmode = GET_MODE (operands[0]);
2048 l = gen_reg_rtx (wmode);
2049 h = gen_reg_rtx (wmode);
2050
2051 ia64_unpack_assemble (l, operands[1], sign, false);
2052 ia64_unpack_assemble (h, operands[1], sign, true);
2053
2054 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2055 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2056 if (t != operands[0])
2057 emit_move_insn (operands[0], t);
2058 }
2059
2060 /* Emit a signed or unsigned V8QI dot product operation. */
2061
2062 void
2063 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
2064 {
2065 rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
2066 rtx p1, p2, p3, p4, s1, s2, s3;
2067
2068 op1 = operands[1];
2069 op2 = operands[2];
2070 sn1 = ia64_unpack_sign (op1, unsignedp);
2071 sn2 = ia64_unpack_sign (op2, unsignedp);
2072
2073 l1 = gen_reg_rtx (V4HImode);
2074 l2 = gen_reg_rtx (V4HImode);
2075 h1 = gen_reg_rtx (V4HImode);
2076 h2 = gen_reg_rtx (V4HImode);
2077 ia64_unpack_assemble (l1, op1, sn1, false);
2078 ia64_unpack_assemble (l2, op2, sn2, false);
2079 ia64_unpack_assemble (h1, op1, sn1, true);
2080 ia64_unpack_assemble (h2, op2, sn2, true);
2081
2082 p1 = gen_reg_rtx (V2SImode);
2083 p2 = gen_reg_rtx (V2SImode);
2084 p3 = gen_reg_rtx (V2SImode);
2085 p4 = gen_reg_rtx (V2SImode);
2086 emit_insn (gen_pmpy2_even (p1, l1, l2));
2087 emit_insn (gen_pmpy2_even (p2, h1, h2));
2088 emit_insn (gen_pmpy2_odd (p3, l1, l2));
2089 emit_insn (gen_pmpy2_odd (p4, h1, h2));
2090
2091 s1 = gen_reg_rtx (V2SImode);
2092 s2 = gen_reg_rtx (V2SImode);
2093 s3 = gen_reg_rtx (V2SImode);
2094 emit_insn (gen_addv2si3 (s1, p1, p2));
2095 emit_insn (gen_addv2si3 (s2, p3, p4));
2096 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2097 emit_insn (gen_addv2si3 (operands[0], s2, s3));
2098 }
2099
2100 /* Emit the appropriate sequence for a call. */
2101
2102 void
2103 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2104 int sibcall_p)
2105 {
2106 rtx insn, b0;
2107
2108 addr = XEXP (addr, 0);
2109 addr = convert_memory_address (DImode, addr);
2110 b0 = gen_rtx_REG (DImode, R_BR (0));
2111
2112 /* ??? Should do this for functions known to bind local too. */
2113 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2114 {
2115 if (sibcall_p)
2116 insn = gen_sibcall_nogp (addr);
2117 else if (! retval)
2118 insn = gen_call_nogp (addr, b0);
2119 else
2120 insn = gen_call_value_nogp (retval, addr, b0);
2121 insn = emit_call_insn (insn);
2122 }
2123 else
2124 {
2125 if (sibcall_p)
2126 insn = gen_sibcall_gp (addr);
2127 else if (! retval)
2128 insn = gen_call_gp (addr, b0);
2129 else
2130 insn = gen_call_value_gp (retval, addr, b0);
2131 insn = emit_call_insn (insn);
2132
2133 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2134 }
2135
2136 if (sibcall_p)
2137 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2138
2139 if (TARGET_ABI_OPEN_VMS)
2140 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2141 gen_rtx_REG (DImode, GR_REG (25)));
2142 }
2143
2144 static void
2145 reg_emitted (enum ia64_frame_regs r)
2146 {
2147 if (emitted_frame_related_regs[r] == 0)
2148 emitted_frame_related_regs[r] = current_frame_info.r[r];
2149 else
2150 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2151 }
2152
2153 static int
2154 get_reg (enum ia64_frame_regs r)
2155 {
2156 reg_emitted (r);
2157 return current_frame_info.r[r];
2158 }
2159
2160 static bool
2161 is_emitted (int regno)
2162 {
2163 unsigned int r;
2164
2165 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2166 if (emitted_frame_related_regs[r] == regno)
2167 return true;
2168 return false;
2169 }
2170
2171 void
2172 ia64_reload_gp (void)
2173 {
2174 rtx tmp;
2175
2176 if (current_frame_info.r[reg_save_gp])
2177 {
2178 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2179 }
2180 else
2181 {
2182 HOST_WIDE_INT offset;
2183 rtx offset_r;
2184
2185 offset = (current_frame_info.spill_cfa_off
2186 + current_frame_info.spill_size);
2187 if (frame_pointer_needed)
2188 {
2189 tmp = hard_frame_pointer_rtx;
2190 offset = -offset;
2191 }
2192 else
2193 {
2194 tmp = stack_pointer_rtx;
2195 offset = current_frame_info.total_size - offset;
2196 }
2197
2198 offset_r = GEN_INT (offset);
2199 if (satisfies_constraint_I (offset_r))
2200 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2201 else
2202 {
2203 emit_move_insn (pic_offset_table_rtx, offset_r);
2204 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2205 pic_offset_table_rtx, tmp));
2206 }
2207
2208 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2209 }
2210
2211 emit_move_insn (pic_offset_table_rtx, tmp);
2212 }
2213
2214 void
2215 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2216 rtx scratch_b, int noreturn_p, int sibcall_p)
2217 {
2218 rtx insn;
2219 bool is_desc = false;
2220
2221 /* If we find we're calling through a register, then we're actually
2222 calling through a descriptor, so load up the values. */
2223 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2224 {
2225 rtx tmp;
2226 bool addr_dead_p;
2227
2228 /* ??? We are currently constrained to *not* use peep2, because
2229 we can legitimately change the global lifetime of the GP
2230 (in the form of killing where previously live). This is
2231 because a call through a descriptor doesn't use the previous
2232 value of the GP, while a direct call does, and we do not
2233 commit to either form until the split here.
2234
2235 That said, this means that we lack precise life info for
2236 whether ADDR is dead after this call. This is not terribly
2237 important, since we can fix things up essentially for free
2238 with the POST_DEC below, but it's nice to not use it when we
2239 can immediately tell it's not necessary. */
2240 addr_dead_p = ((noreturn_p || sibcall_p
2241 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2242 REGNO (addr)))
2243 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2244
2245 /* Load the code address into scratch_b. */
2246 tmp = gen_rtx_POST_INC (Pmode, addr);
2247 tmp = gen_rtx_MEM (Pmode, tmp);
2248 emit_move_insn (scratch_r, tmp);
2249 emit_move_insn (scratch_b, scratch_r);
2250
2251 /* Load the GP address. If ADDR is not dead here, then we must
2252 revert the change made above via the POST_INCREMENT. */
2253 if (!addr_dead_p)
2254 tmp = gen_rtx_POST_DEC (Pmode, addr);
2255 else
2256 tmp = addr;
2257 tmp = gen_rtx_MEM (Pmode, tmp);
2258 emit_move_insn (pic_offset_table_rtx, tmp);
2259
2260 is_desc = true;
2261 addr = scratch_b;
2262 }
2263
2264 if (sibcall_p)
2265 insn = gen_sibcall_nogp (addr);
2266 else if (retval)
2267 insn = gen_call_value_nogp (retval, addr, retaddr);
2268 else
2269 insn = gen_call_nogp (addr, retaddr);
2270 emit_call_insn (insn);
2271
2272 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2273 ia64_reload_gp ();
2274 }
2275
2276 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2277
2278 This differs from the generic code in that we know about the zero-extending
2279 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2280 also know that ld.acq+cmpxchg.rel equals a full barrier.
2281
2282 The loop we want to generate looks like
2283
2284 cmp_reg = mem;
2285 label:
2286 old_reg = cmp_reg;
2287 new_reg = cmp_reg op val;
2288 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2289 if (cmp_reg != old_reg)
2290 goto label;
2291
2292 Note that we only do the plain load from memory once. Subsequent
2293 iterations use the value loaded by the compare-and-swap pattern. */
2294
2295 void
2296 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2297 rtx old_dst, rtx new_dst)
2298 {
2299 enum machine_mode mode = GET_MODE (mem);
2300 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2301 enum insn_code icode;
2302
2303 /* Special case for using fetchadd. */
2304 if ((mode == SImode || mode == DImode)
2305 && (code == PLUS || code == MINUS)
2306 && fetchadd_operand (val, mode))
2307 {
2308 if (code == MINUS)
2309 val = GEN_INT (-INTVAL (val));
2310
2311 if (!old_dst)
2312 old_dst = gen_reg_rtx (mode);
2313
2314 emit_insn (gen_memory_barrier ());
2315
2316 if (mode == SImode)
2317 icode = CODE_FOR_fetchadd_acq_si;
2318 else
2319 icode = CODE_FOR_fetchadd_acq_di;
2320 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2321
2322 if (new_dst)
2323 {
2324 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2325 true, OPTAB_WIDEN);
2326 if (new_reg != new_dst)
2327 emit_move_insn (new_dst, new_reg);
2328 }
2329 return;
2330 }
2331
2332 /* Because of the volatile mem read, we get an ld.acq, which is the
2333 front half of the full barrier. The end half is the cmpxchg.rel. */
2334 gcc_assert (MEM_VOLATILE_P (mem));
2335
2336 old_reg = gen_reg_rtx (DImode);
2337 cmp_reg = gen_reg_rtx (DImode);
2338 label = gen_label_rtx ();
2339
2340 if (mode != DImode)
2341 {
2342 val = simplify_gen_subreg (DImode, val, mode, 0);
2343 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2344 }
2345 else
2346 emit_move_insn (cmp_reg, mem);
2347
2348 emit_label (label);
2349
2350 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2351 emit_move_insn (old_reg, cmp_reg);
2352 emit_move_insn (ar_ccv, cmp_reg);
2353
2354 if (old_dst)
2355 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2356
2357 new_reg = cmp_reg;
2358 if (code == NOT)
2359 {
2360 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2361 true, OPTAB_DIRECT);
2362 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2363 }
2364 else
2365 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2366 true, OPTAB_DIRECT);
2367
2368 if (mode != DImode)
2369 new_reg = gen_lowpart (mode, new_reg);
2370 if (new_dst)
2371 emit_move_insn (new_dst, new_reg);
2372
2373 switch (mode)
2374 {
2375 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2376 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2377 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2378 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2379 default:
2380 gcc_unreachable ();
2381 }
2382
2383 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2384
2385 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2386 }
2387 \f
2388 /* Begin the assembly file. */
2389
2390 static void
2391 ia64_file_start (void)
2392 {
2393 default_file_start ();
2394 emit_safe_across_calls ();
2395 }
2396
2397 void
2398 emit_safe_across_calls (void)
2399 {
2400 unsigned int rs, re;
2401 int out_state;
2402
2403 rs = 1;
2404 out_state = 0;
2405 while (1)
2406 {
2407 while (rs < 64 && call_used_regs[PR_REG (rs)])
2408 rs++;
2409 if (rs >= 64)
2410 break;
2411 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2412 continue;
2413 if (out_state == 0)
2414 {
2415 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2416 out_state = 1;
2417 }
2418 else
2419 fputc (',', asm_out_file);
2420 if (re == rs + 1)
2421 fprintf (asm_out_file, "p%u", rs);
2422 else
2423 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2424 rs = re + 1;
2425 }
2426 if (out_state)
2427 fputc ('\n', asm_out_file);
2428 }
2429
2430 /* Globalize a declaration. */
2431
2432 static void
2433 ia64_globalize_decl_name (FILE * stream, tree decl)
2434 {
2435 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2436 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2437 if (version_attr)
2438 {
2439 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2440 const char *p = TREE_STRING_POINTER (v);
2441 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2442 }
2443 targetm.asm_out.globalize_label (stream, name);
2444 if (TREE_CODE (decl) == FUNCTION_DECL)
2445 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2446 }
2447
2448 /* Helper function for ia64_compute_frame_size: find an appropriate general
2449 register to spill some special register to. SPECIAL_SPILL_MASK contains
2450 bits in GR0 to GR31 that have already been allocated by this routine.
2451 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2452
2453 static int
2454 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2455 {
2456 int regno;
2457
2458 if (emitted_frame_related_regs[r] != 0)
2459 {
2460 regno = emitted_frame_related_regs[r];
2461 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2462 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2463 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2464 else if (current_function_is_leaf
2465 && regno >= GR_REG (1) && regno <= GR_REG (31))
2466 current_frame_info.gr_used_mask |= 1 << regno;
2467
2468 return regno;
2469 }
2470
2471 /* If this is a leaf function, first try an otherwise unused
2472 call-clobbered register. */
2473 if (current_function_is_leaf)
2474 {
2475 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2476 if (! df_regs_ever_live_p (regno)
2477 && call_used_regs[regno]
2478 && ! fixed_regs[regno]
2479 && ! global_regs[regno]
2480 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2481 && ! is_emitted (regno))
2482 {
2483 current_frame_info.gr_used_mask |= 1 << regno;
2484 return regno;
2485 }
2486 }
2487
2488 if (try_locals)
2489 {
2490 regno = current_frame_info.n_local_regs;
2491 /* If there is a frame pointer, then we can't use loc79, because
2492 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2493 reg_name switching code in ia64_expand_prologue. */
2494 while (regno < (80 - frame_pointer_needed))
2495 if (! is_emitted (LOC_REG (regno++)))
2496 {
2497 current_frame_info.n_local_regs = regno;
2498 return LOC_REG (regno - 1);
2499 }
2500 }
2501
2502 /* Failed to find a general register to spill to. Must use stack. */
2503 return 0;
2504 }
2505
2506 /* In order to make for nice schedules, we try to allocate every temporary
2507 to a different register. We must of course stay away from call-saved,
2508 fixed, and global registers. We must also stay away from registers
2509 allocated in current_frame_info.gr_used_mask, since those include regs
2510 used all through the prologue.
2511
2512 Any register allocated here must be used immediately. The idea is to
2513 aid scheduling, not to solve data flow problems. */
2514
2515 static int last_scratch_gr_reg;
2516
2517 static int
2518 next_scratch_gr_reg (void)
2519 {
2520 int i, regno;
2521
2522 for (i = 0; i < 32; ++i)
2523 {
2524 regno = (last_scratch_gr_reg + i + 1) & 31;
2525 if (call_used_regs[regno]
2526 && ! fixed_regs[regno]
2527 && ! global_regs[regno]
2528 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2529 {
2530 last_scratch_gr_reg = regno;
2531 return regno;
2532 }
2533 }
2534
2535 /* There must be _something_ available. */
2536 gcc_unreachable ();
2537 }
2538
2539 /* Helper function for ia64_compute_frame_size, called through
2540 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2541
2542 static void
2543 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2544 {
2545 unsigned int regno = REGNO (reg);
2546 if (regno < 32)
2547 {
2548 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2549 for (i = 0; i < n; ++i)
2550 current_frame_info.gr_used_mask |= 1 << (regno + i);
2551 }
2552 }
2553
2554
2555 /* Returns the number of bytes offset between the frame pointer and the stack
2556 pointer for the current function. SIZE is the number of bytes of space
2557 needed for local variables. */
2558
2559 static void
2560 ia64_compute_frame_size (HOST_WIDE_INT size)
2561 {
2562 HOST_WIDE_INT total_size;
2563 HOST_WIDE_INT spill_size = 0;
2564 HOST_WIDE_INT extra_spill_size = 0;
2565 HOST_WIDE_INT pretend_args_size;
2566 HARD_REG_SET mask;
2567 int n_spilled = 0;
2568 int spilled_gr_p = 0;
2569 int spilled_fr_p = 0;
2570 unsigned int regno;
2571 int min_regno;
2572 int max_regno;
2573 int i;
2574
2575 if (current_frame_info.initialized)
2576 return;
2577
2578 memset (&current_frame_info, 0, sizeof current_frame_info);
2579 CLEAR_HARD_REG_SET (mask);
2580
2581 /* Don't allocate scratches to the return register. */
2582 diddle_return_value (mark_reg_gr_used_mask, NULL);
2583
2584 /* Don't allocate scratches to the EH scratch registers. */
2585 if (cfun->machine->ia64_eh_epilogue_sp)
2586 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2587 if (cfun->machine->ia64_eh_epilogue_bsp)
2588 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2589
2590 /* Find the size of the register stack frame. We have only 80 local
2591 registers, because we reserve 8 for the inputs and 8 for the
2592 outputs. */
2593
2594 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2595 since we'll be adjusting that down later. */
2596 regno = LOC_REG (78) + ! frame_pointer_needed;
2597 for (; regno >= LOC_REG (0); regno--)
2598 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2599 break;
2600 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2601
2602 /* For functions marked with the syscall_linkage attribute, we must mark
2603 all eight input registers as in use, so that locals aren't visible to
2604 the caller. */
2605
2606 if (cfun->machine->n_varargs > 0
2607 || lookup_attribute ("syscall_linkage",
2608 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2609 current_frame_info.n_input_regs = 8;
2610 else
2611 {
2612 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2613 if (df_regs_ever_live_p (regno))
2614 break;
2615 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2616 }
2617
2618 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2619 if (df_regs_ever_live_p (regno))
2620 break;
2621 i = regno - OUT_REG (0) + 1;
2622
2623 #ifndef PROFILE_HOOK
2624 /* When -p profiling, we need one output register for the mcount argument.
2625 Likewise for -a profiling for the bb_init_func argument. For -ax
2626 profiling, we need two output registers for the two bb_init_trace_func
2627 arguments. */
2628 if (crtl->profile)
2629 i = MAX (i, 1);
2630 #endif
2631 current_frame_info.n_output_regs = i;
2632
2633 /* ??? No rotating register support yet. */
2634 current_frame_info.n_rotate_regs = 0;
2635
2636 /* Discover which registers need spilling, and how much room that
2637 will take. Begin with floating point and general registers,
2638 which will always wind up on the stack. */
2639
2640 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2641 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2642 {
2643 SET_HARD_REG_BIT (mask, regno);
2644 spill_size += 16;
2645 n_spilled += 1;
2646 spilled_fr_p = 1;
2647 }
2648
2649 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2650 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2651 {
2652 SET_HARD_REG_BIT (mask, regno);
2653 spill_size += 8;
2654 n_spilled += 1;
2655 spilled_gr_p = 1;
2656 }
2657
2658 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2659 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2660 {
2661 SET_HARD_REG_BIT (mask, regno);
2662 spill_size += 8;
2663 n_spilled += 1;
2664 }
2665
2666 /* Now come all special registers that might get saved in other
2667 general registers. */
2668
2669 if (frame_pointer_needed)
2670 {
2671 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2672 /* If we did not get a register, then we take LOC79. This is guaranteed
2673 to be free, even if regs_ever_live is already set, because this is
2674 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2675 as we don't count loc79 above. */
2676 if (current_frame_info.r[reg_fp] == 0)
2677 {
2678 current_frame_info.r[reg_fp] = LOC_REG (79);
2679 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2680 }
2681 }
2682
2683 if (! current_function_is_leaf)
2684 {
2685 /* Emit a save of BR0 if we call other functions. Do this even
2686 if this function doesn't return, as EH depends on this to be
2687 able to unwind the stack. */
2688 SET_HARD_REG_BIT (mask, BR_REG (0));
2689
2690 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2691 if (current_frame_info.r[reg_save_b0] == 0)
2692 {
2693 extra_spill_size += 8;
2694 n_spilled += 1;
2695 }
2696
2697 /* Similarly for ar.pfs. */
2698 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2699 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2700 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2701 {
2702 extra_spill_size += 8;
2703 n_spilled += 1;
2704 }
2705
2706 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2707 registers are clobbered, so we fall back to the stack. */
2708 current_frame_info.r[reg_save_gp]
2709 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2710 if (current_frame_info.r[reg_save_gp] == 0)
2711 {
2712 SET_HARD_REG_BIT (mask, GR_REG (1));
2713 spill_size += 8;
2714 n_spilled += 1;
2715 }
2716 }
2717 else
2718 {
2719 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2720 {
2721 SET_HARD_REG_BIT (mask, BR_REG (0));
2722 extra_spill_size += 8;
2723 n_spilled += 1;
2724 }
2725
2726 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2727 {
2728 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2729 current_frame_info.r[reg_save_ar_pfs]
2730 = find_gr_spill (reg_save_ar_pfs, 1);
2731 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2732 {
2733 extra_spill_size += 8;
2734 n_spilled += 1;
2735 }
2736 }
2737 }
2738
2739 /* Unwind descriptor hackery: things are most efficient if we allocate
2740 consecutive GR save registers for RP, PFS, FP in that order. However,
2741 it is absolutely critical that FP get the only hard register that's
2742 guaranteed to be free, so we allocated it first. If all three did
2743 happen to be allocated hard regs, and are consecutive, rearrange them
2744 into the preferred order now.
2745
2746 If we have already emitted code for any of those registers,
2747 then it's already too late to change. */
2748 min_regno = MIN (current_frame_info.r[reg_fp],
2749 MIN (current_frame_info.r[reg_save_b0],
2750 current_frame_info.r[reg_save_ar_pfs]));
2751 max_regno = MAX (current_frame_info.r[reg_fp],
2752 MAX (current_frame_info.r[reg_save_b0],
2753 current_frame_info.r[reg_save_ar_pfs]));
2754 if (min_regno > 0
2755 && min_regno + 2 == max_regno
2756 && (current_frame_info.r[reg_fp] == min_regno + 1
2757 || current_frame_info.r[reg_save_b0] == min_regno + 1
2758 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2759 && (emitted_frame_related_regs[reg_save_b0] == 0
2760 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2761 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2762 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2763 && (emitted_frame_related_regs[reg_fp] == 0
2764 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2765 {
2766 current_frame_info.r[reg_save_b0] = min_regno;
2767 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2768 current_frame_info.r[reg_fp] = min_regno + 2;
2769 }
2770
2771 /* See if we need to store the predicate register block. */
2772 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2773 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2774 break;
2775 if (regno <= PR_REG (63))
2776 {
2777 SET_HARD_REG_BIT (mask, PR_REG (0));
2778 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2779 if (current_frame_info.r[reg_save_pr] == 0)
2780 {
2781 extra_spill_size += 8;
2782 n_spilled += 1;
2783 }
2784
2785 /* ??? Mark them all as used so that register renaming and such
2786 are free to use them. */
2787 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2788 df_set_regs_ever_live (regno, true);
2789 }
2790
2791 /* If we're forced to use st8.spill, we're forced to save and restore
2792 ar.unat as well. The check for existing liveness allows inline asm
2793 to touch ar.unat. */
2794 if (spilled_gr_p || cfun->machine->n_varargs
2795 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2796 {
2797 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2798 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2799 current_frame_info.r[reg_save_ar_unat]
2800 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2801 if (current_frame_info.r[reg_save_ar_unat] == 0)
2802 {
2803 extra_spill_size += 8;
2804 n_spilled += 1;
2805 }
2806 }
2807
2808 if (df_regs_ever_live_p (AR_LC_REGNUM))
2809 {
2810 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2811 current_frame_info.r[reg_save_ar_lc]
2812 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2813 if (current_frame_info.r[reg_save_ar_lc] == 0)
2814 {
2815 extra_spill_size += 8;
2816 n_spilled += 1;
2817 }
2818 }
2819
2820 /* If we have an odd number of words of pretend arguments written to
2821 the stack, then the FR save area will be unaligned. We round the
2822 size of this area up to keep things 16 byte aligned. */
2823 if (spilled_fr_p)
2824 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2825 else
2826 pretend_args_size = crtl->args.pretend_args_size;
2827
2828 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2829 + crtl->outgoing_args_size);
2830 total_size = IA64_STACK_ALIGN (total_size);
2831
2832 /* We always use the 16-byte scratch area provided by the caller, but
2833 if we are a leaf function, there's no one to which we need to provide
2834 a scratch area. */
2835 if (current_function_is_leaf)
2836 total_size = MAX (0, total_size - 16);
2837
2838 current_frame_info.total_size = total_size;
2839 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2840 current_frame_info.spill_size = spill_size;
2841 current_frame_info.extra_spill_size = extra_spill_size;
2842 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2843 current_frame_info.n_spilled = n_spilled;
2844 current_frame_info.initialized = reload_completed;
2845 }
2846
2847 /* Worker function for TARGET_CAN_ELIMINATE. */
2848
2849 bool
2850 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2851 {
2852 return (to == BR_REG (0) ? current_function_is_leaf : true);
2853 }
2854
2855 /* Compute the initial difference between the specified pair of registers. */
2856
2857 HOST_WIDE_INT
2858 ia64_initial_elimination_offset (int from, int to)
2859 {
2860 HOST_WIDE_INT offset;
2861
2862 ia64_compute_frame_size (get_frame_size ());
2863 switch (from)
2864 {
2865 case FRAME_POINTER_REGNUM:
2866 switch (to)
2867 {
2868 case HARD_FRAME_POINTER_REGNUM:
2869 if (current_function_is_leaf)
2870 offset = -current_frame_info.total_size;
2871 else
2872 offset = -(current_frame_info.total_size
2873 - crtl->outgoing_args_size - 16);
2874 break;
2875
2876 case STACK_POINTER_REGNUM:
2877 if (current_function_is_leaf)
2878 offset = 0;
2879 else
2880 offset = 16 + crtl->outgoing_args_size;
2881 break;
2882
2883 default:
2884 gcc_unreachable ();
2885 }
2886 break;
2887
2888 case ARG_POINTER_REGNUM:
2889 /* Arguments start above the 16 byte save area, unless stdarg
2890 in which case we store through the 16 byte save area. */
2891 switch (to)
2892 {
2893 case HARD_FRAME_POINTER_REGNUM:
2894 offset = 16 - crtl->args.pretend_args_size;
2895 break;
2896
2897 case STACK_POINTER_REGNUM:
2898 offset = (current_frame_info.total_size
2899 + 16 - crtl->args.pretend_args_size);
2900 break;
2901
2902 default:
2903 gcc_unreachable ();
2904 }
2905 break;
2906
2907 default:
2908 gcc_unreachable ();
2909 }
2910
2911 return offset;
2912 }
2913
2914 /* If there are more than a trivial number of register spills, we use
2915 two interleaved iterators so that we can get two memory references
2916 per insn group.
2917
2918 In order to simplify things in the prologue and epilogue expanders,
2919 we use helper functions to fix up the memory references after the
2920 fact with the appropriate offsets to a POST_MODIFY memory mode.
2921 The following data structure tracks the state of the two iterators
2922 while insns are being emitted. */
2923
2924 struct spill_fill_data
2925 {
2926 rtx init_after; /* point at which to emit initializations */
2927 rtx init_reg[2]; /* initial base register */
2928 rtx iter_reg[2]; /* the iterator registers */
2929 rtx *prev_addr[2]; /* address of last memory use */
2930 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2931 HOST_WIDE_INT prev_off[2]; /* last offset */
2932 int n_iter; /* number of iterators in use */
2933 int next_iter; /* next iterator to use */
2934 unsigned int save_gr_used_mask;
2935 };
2936
2937 static struct spill_fill_data spill_fill_data;
2938
2939 static void
2940 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2941 {
2942 int i;
2943
2944 spill_fill_data.init_after = get_last_insn ();
2945 spill_fill_data.init_reg[0] = init_reg;
2946 spill_fill_data.init_reg[1] = init_reg;
2947 spill_fill_data.prev_addr[0] = NULL;
2948 spill_fill_data.prev_addr[1] = NULL;
2949 spill_fill_data.prev_insn[0] = NULL;
2950 spill_fill_data.prev_insn[1] = NULL;
2951 spill_fill_data.prev_off[0] = cfa_off;
2952 spill_fill_data.prev_off[1] = cfa_off;
2953 spill_fill_data.next_iter = 0;
2954 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2955
2956 spill_fill_data.n_iter = 1 + (n_spills > 2);
2957 for (i = 0; i < spill_fill_data.n_iter; ++i)
2958 {
2959 int regno = next_scratch_gr_reg ();
2960 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2961 current_frame_info.gr_used_mask |= 1 << regno;
2962 }
2963 }
2964
2965 static void
2966 finish_spill_pointers (void)
2967 {
2968 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2969 }
2970
2971 static rtx
2972 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2973 {
2974 int iter = spill_fill_data.next_iter;
2975 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2976 rtx disp_rtx = GEN_INT (disp);
2977 rtx mem;
2978
2979 if (spill_fill_data.prev_addr[iter])
2980 {
2981 if (satisfies_constraint_N (disp_rtx))
2982 {
2983 *spill_fill_data.prev_addr[iter]
2984 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2985 gen_rtx_PLUS (DImode,
2986 spill_fill_data.iter_reg[iter],
2987 disp_rtx));
2988 add_reg_note (spill_fill_data.prev_insn[iter],
2989 REG_INC, spill_fill_data.iter_reg[iter]);
2990 }
2991 else
2992 {
2993 /* ??? Could use register post_modify for loads. */
2994 if (!satisfies_constraint_I (disp_rtx))
2995 {
2996 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2997 emit_move_insn (tmp, disp_rtx);
2998 disp_rtx = tmp;
2999 }
3000 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3001 spill_fill_data.iter_reg[iter], disp_rtx));
3002 }
3003 }
3004 /* Micro-optimization: if we've created a frame pointer, it's at
3005 CFA 0, which may allow the real iterator to be initialized lower,
3006 slightly increasing parallelism. Also, if there are few saves
3007 it may eliminate the iterator entirely. */
3008 else if (disp == 0
3009 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3010 && frame_pointer_needed)
3011 {
3012 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3013 set_mem_alias_set (mem, get_varargs_alias_set ());
3014 return mem;
3015 }
3016 else
3017 {
3018 rtx seq, insn;
3019
3020 if (disp == 0)
3021 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3022 spill_fill_data.init_reg[iter]);
3023 else
3024 {
3025 start_sequence ();
3026
3027 if (!satisfies_constraint_I (disp_rtx))
3028 {
3029 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3030 emit_move_insn (tmp, disp_rtx);
3031 disp_rtx = tmp;
3032 }
3033
3034 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3035 spill_fill_data.init_reg[iter],
3036 disp_rtx));
3037
3038 seq = get_insns ();
3039 end_sequence ();
3040 }
3041
3042 /* Careful for being the first insn in a sequence. */
3043 if (spill_fill_data.init_after)
3044 insn = emit_insn_after (seq, spill_fill_data.init_after);
3045 else
3046 {
3047 rtx first = get_insns ();
3048 if (first)
3049 insn = emit_insn_before (seq, first);
3050 else
3051 insn = emit_insn (seq);
3052 }
3053 spill_fill_data.init_after = insn;
3054 }
3055
3056 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3057
3058 /* ??? Not all of the spills are for varargs, but some of them are.
3059 The rest of the spills belong in an alias set of their own. But
3060 it doesn't actually hurt to include them here. */
3061 set_mem_alias_set (mem, get_varargs_alias_set ());
3062
3063 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3064 spill_fill_data.prev_off[iter] = cfa_off;
3065
3066 if (++iter >= spill_fill_data.n_iter)
3067 iter = 0;
3068 spill_fill_data.next_iter = iter;
3069
3070 return mem;
3071 }
3072
3073 static void
3074 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3075 rtx frame_reg)
3076 {
3077 int iter = spill_fill_data.next_iter;
3078 rtx mem, insn;
3079
3080 mem = spill_restore_mem (reg, cfa_off);
3081 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3082 spill_fill_data.prev_insn[iter] = insn;
3083
3084 if (frame_reg)
3085 {
3086 rtx base;
3087 HOST_WIDE_INT off;
3088
3089 RTX_FRAME_RELATED_P (insn) = 1;
3090
3091 /* Don't even pretend that the unwind code can intuit its way
3092 through a pair of interleaved post_modify iterators. Just
3093 provide the correct answer. */
3094
3095 if (frame_pointer_needed)
3096 {
3097 base = hard_frame_pointer_rtx;
3098 off = - cfa_off;
3099 }
3100 else
3101 {
3102 base = stack_pointer_rtx;
3103 off = current_frame_info.total_size - cfa_off;
3104 }
3105
3106 add_reg_note (insn, REG_CFA_OFFSET,
3107 gen_rtx_SET (VOIDmode,
3108 gen_rtx_MEM (GET_MODE (reg),
3109 plus_constant (base, off)),
3110 frame_reg));
3111 }
3112 }
3113
3114 static void
3115 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3116 {
3117 int iter = spill_fill_data.next_iter;
3118 rtx insn;
3119
3120 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3121 GEN_INT (cfa_off)));
3122 spill_fill_data.prev_insn[iter] = insn;
3123 }
3124
3125 /* Wrapper functions that discards the CONST_INT spill offset. These
3126 exist so that we can give gr_spill/gr_fill the offset they need and
3127 use a consistent function interface. */
3128
3129 static rtx
3130 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3131 {
3132 return gen_movdi (dest, src);
3133 }
3134
3135 static rtx
3136 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3137 {
3138 return gen_fr_spill (dest, src);
3139 }
3140
3141 static rtx
3142 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3143 {
3144 return gen_fr_restore (dest, src);
3145 }
3146
3147 /* Called after register allocation to add any instructions needed for the
3148 prologue. Using a prologue insn is favored compared to putting all of the
3149 instructions in output_function_prologue(), since it allows the scheduler
3150 to intermix instructions with the saves of the caller saved registers. In
3151 some cases, it might be necessary to emit a barrier instruction as the last
3152 insn to prevent such scheduling.
3153
3154 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3155 so that the debug info generation code can handle them properly.
3156
3157 The register save area is layed out like so:
3158 cfa+16
3159 [ varargs spill area ]
3160 [ fr register spill area ]
3161 [ br register spill area ]
3162 [ ar register spill area ]
3163 [ pr register spill area ]
3164 [ gr register spill area ] */
3165
3166 /* ??? Get inefficient code when the frame size is larger than can fit in an
3167 adds instruction. */
3168
3169 void
3170 ia64_expand_prologue (void)
3171 {
3172 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3173 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3174 rtx reg, alt_reg;
3175
3176 ia64_compute_frame_size (get_frame_size ());
3177 last_scratch_gr_reg = 15;
3178
3179 if (flag_stack_usage_info)
3180 current_function_static_stack_size = current_frame_info.total_size;
3181
3182 if (dump_file)
3183 {
3184 fprintf (dump_file, "ia64 frame related registers "
3185 "recorded in current_frame_info.r[]:\n");
3186 #define PRINTREG(a) if (current_frame_info.r[a]) \
3187 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3188 PRINTREG(reg_fp);
3189 PRINTREG(reg_save_b0);
3190 PRINTREG(reg_save_pr);
3191 PRINTREG(reg_save_ar_pfs);
3192 PRINTREG(reg_save_ar_unat);
3193 PRINTREG(reg_save_ar_lc);
3194 PRINTREG(reg_save_gp);
3195 #undef PRINTREG
3196 }
3197
3198 /* If there is no epilogue, then we don't need some prologue insns.
3199 We need to avoid emitting the dead prologue insns, because flow
3200 will complain about them. */
3201 if (optimize)
3202 {
3203 edge e;
3204 edge_iterator ei;
3205
3206 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3207 if ((e->flags & EDGE_FAKE) == 0
3208 && (e->flags & EDGE_FALLTHRU) != 0)
3209 break;
3210 epilogue_p = (e != NULL);
3211 }
3212 else
3213 epilogue_p = 1;
3214
3215 /* Set the local, input, and output register names. We need to do this
3216 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3217 half. If we use in/loc/out register names, then we get assembler errors
3218 in crtn.S because there is no alloc insn or regstk directive in there. */
3219 if (! TARGET_REG_NAMES)
3220 {
3221 int inputs = current_frame_info.n_input_regs;
3222 int locals = current_frame_info.n_local_regs;
3223 int outputs = current_frame_info.n_output_regs;
3224
3225 for (i = 0; i < inputs; i++)
3226 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3227 for (i = 0; i < locals; i++)
3228 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3229 for (i = 0; i < outputs; i++)
3230 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3231 }
3232
3233 /* Set the frame pointer register name. The regnum is logically loc79,
3234 but of course we'll not have allocated that many locals. Rather than
3235 worrying about renumbering the existing rtxs, we adjust the name. */
3236 /* ??? This code means that we can never use one local register when
3237 there is a frame pointer. loc79 gets wasted in this case, as it is
3238 renamed to a register that will never be used. See also the try_locals
3239 code in find_gr_spill. */
3240 if (current_frame_info.r[reg_fp])
3241 {
3242 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3243 reg_names[HARD_FRAME_POINTER_REGNUM]
3244 = reg_names[current_frame_info.r[reg_fp]];
3245 reg_names[current_frame_info.r[reg_fp]] = tmp;
3246 }
3247
3248 /* We don't need an alloc instruction if we've used no outputs or locals. */
3249 if (current_frame_info.n_local_regs == 0
3250 && current_frame_info.n_output_regs == 0
3251 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3252 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3253 {
3254 /* If there is no alloc, but there are input registers used, then we
3255 need a .regstk directive. */
3256 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3257 ar_pfs_save_reg = NULL_RTX;
3258 }
3259 else
3260 {
3261 current_frame_info.need_regstk = 0;
3262
3263 if (current_frame_info.r[reg_save_ar_pfs])
3264 {
3265 regno = current_frame_info.r[reg_save_ar_pfs];
3266 reg_emitted (reg_save_ar_pfs);
3267 }
3268 else
3269 regno = next_scratch_gr_reg ();
3270 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3271
3272 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3273 GEN_INT (current_frame_info.n_input_regs),
3274 GEN_INT (current_frame_info.n_local_regs),
3275 GEN_INT (current_frame_info.n_output_regs),
3276 GEN_INT (current_frame_info.n_rotate_regs)));
3277 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3278 }
3279
3280 /* Set up frame pointer, stack pointer, and spill iterators. */
3281
3282 n_varargs = cfun->machine->n_varargs;
3283 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3284 stack_pointer_rtx, 0);
3285
3286 if (frame_pointer_needed)
3287 {
3288 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3289 RTX_FRAME_RELATED_P (insn) = 1;
3290
3291 /* Force the unwind info to recognize this as defining a new CFA,
3292 rather than some temp register setup. */
3293 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3294 }
3295
3296 if (current_frame_info.total_size != 0)
3297 {
3298 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3299 rtx offset;
3300
3301 if (satisfies_constraint_I (frame_size_rtx))
3302 offset = frame_size_rtx;
3303 else
3304 {
3305 regno = next_scratch_gr_reg ();
3306 offset = gen_rtx_REG (DImode, regno);
3307 emit_move_insn (offset, frame_size_rtx);
3308 }
3309
3310 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3311 stack_pointer_rtx, offset));
3312
3313 if (! frame_pointer_needed)
3314 {
3315 RTX_FRAME_RELATED_P (insn) = 1;
3316 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3317 gen_rtx_SET (VOIDmode,
3318 stack_pointer_rtx,
3319 gen_rtx_PLUS (DImode,
3320 stack_pointer_rtx,
3321 frame_size_rtx)));
3322 }
3323
3324 /* ??? At this point we must generate a magic insn that appears to
3325 modify the stack pointer, the frame pointer, and all spill
3326 iterators. This would allow the most scheduling freedom. For
3327 now, just hard stop. */
3328 emit_insn (gen_blockage ());
3329 }
3330
3331 /* Must copy out ar.unat before doing any integer spills. */
3332 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3333 {
3334 if (current_frame_info.r[reg_save_ar_unat])
3335 {
3336 ar_unat_save_reg
3337 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3338 reg_emitted (reg_save_ar_unat);
3339 }
3340 else
3341 {
3342 alt_regno = next_scratch_gr_reg ();
3343 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3344 current_frame_info.gr_used_mask |= 1 << alt_regno;
3345 }
3346
3347 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3348 insn = emit_move_insn (ar_unat_save_reg, reg);
3349 if (current_frame_info.r[reg_save_ar_unat])
3350 {
3351 RTX_FRAME_RELATED_P (insn) = 1;
3352 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3353 }
3354
3355 /* Even if we're not going to generate an epilogue, we still
3356 need to save the register so that EH works. */
3357 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3358 emit_insn (gen_prologue_use (ar_unat_save_reg));
3359 }
3360 else
3361 ar_unat_save_reg = NULL_RTX;
3362
3363 /* Spill all varargs registers. Do this before spilling any GR registers,
3364 since we want the UNAT bits for the GR registers to override the UNAT
3365 bits from varargs, which we don't care about. */
3366
3367 cfa_off = -16;
3368 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3369 {
3370 reg = gen_rtx_REG (DImode, regno);
3371 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3372 }
3373
3374 /* Locate the bottom of the register save area. */
3375 cfa_off = (current_frame_info.spill_cfa_off
3376 + current_frame_info.spill_size
3377 + current_frame_info.extra_spill_size);
3378
3379 /* Save the predicate register block either in a register or in memory. */
3380 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3381 {
3382 reg = gen_rtx_REG (DImode, PR_REG (0));
3383 if (current_frame_info.r[reg_save_pr] != 0)
3384 {
3385 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3386 reg_emitted (reg_save_pr);
3387 insn = emit_move_insn (alt_reg, reg);
3388
3389 /* ??? Denote pr spill/fill by a DImode move that modifies all
3390 64 hard registers. */
3391 RTX_FRAME_RELATED_P (insn) = 1;
3392 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3393
3394 /* Even if we're not going to generate an epilogue, we still
3395 need to save the register so that EH works. */
3396 if (! epilogue_p)
3397 emit_insn (gen_prologue_use (alt_reg));
3398 }
3399 else
3400 {
3401 alt_regno = next_scratch_gr_reg ();
3402 alt_reg = gen_rtx_REG (DImode, alt_regno);
3403 insn = emit_move_insn (alt_reg, reg);
3404 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3405 cfa_off -= 8;
3406 }
3407 }
3408
3409 /* Handle AR regs in numerical order. All of them get special handling. */
3410 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3411 && current_frame_info.r[reg_save_ar_unat] == 0)
3412 {
3413 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3414 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3415 cfa_off -= 8;
3416 }
3417
3418 /* The alloc insn already copied ar.pfs into a general register. The
3419 only thing we have to do now is copy that register to a stack slot
3420 if we'd not allocated a local register for the job. */
3421 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3422 && current_frame_info.r[reg_save_ar_pfs] == 0)
3423 {
3424 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3425 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3426 cfa_off -= 8;
3427 }
3428
3429 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3430 {
3431 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3432 if (current_frame_info.r[reg_save_ar_lc] != 0)
3433 {
3434 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3435 reg_emitted (reg_save_ar_lc);
3436 insn = emit_move_insn (alt_reg, reg);
3437 RTX_FRAME_RELATED_P (insn) = 1;
3438 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3439
3440 /* Even if we're not going to generate an epilogue, we still
3441 need to save the register so that EH works. */
3442 if (! epilogue_p)
3443 emit_insn (gen_prologue_use (alt_reg));
3444 }
3445 else
3446 {
3447 alt_regno = next_scratch_gr_reg ();
3448 alt_reg = gen_rtx_REG (DImode, alt_regno);
3449 emit_move_insn (alt_reg, reg);
3450 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3451 cfa_off -= 8;
3452 }
3453 }
3454
3455 /* Save the return pointer. */
3456 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3457 {
3458 reg = gen_rtx_REG (DImode, BR_REG (0));
3459 if (current_frame_info.r[reg_save_b0] != 0)
3460 {
3461 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3462 reg_emitted (reg_save_b0);
3463 insn = emit_move_insn (alt_reg, reg);
3464 RTX_FRAME_RELATED_P (insn) = 1;
3465 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3466
3467 /* Even if we're not going to generate an epilogue, we still
3468 need to save the register so that EH works. */
3469 if (! epilogue_p)
3470 emit_insn (gen_prologue_use (alt_reg));
3471 }
3472 else
3473 {
3474 alt_regno = next_scratch_gr_reg ();
3475 alt_reg = gen_rtx_REG (DImode, alt_regno);
3476 emit_move_insn (alt_reg, reg);
3477 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3478 cfa_off -= 8;
3479 }
3480 }
3481
3482 if (current_frame_info.r[reg_save_gp])
3483 {
3484 reg_emitted (reg_save_gp);
3485 insn = emit_move_insn (gen_rtx_REG (DImode,
3486 current_frame_info.r[reg_save_gp]),
3487 pic_offset_table_rtx);
3488 }
3489
3490 /* We should now be at the base of the gr/br/fr spill area. */
3491 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3492 + current_frame_info.spill_size));
3493
3494 /* Spill all general registers. */
3495 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3496 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3497 {
3498 reg = gen_rtx_REG (DImode, regno);
3499 do_spill (gen_gr_spill, reg, cfa_off, reg);
3500 cfa_off -= 8;
3501 }
3502
3503 /* Spill the rest of the BR registers. */
3504 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3505 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3506 {
3507 alt_regno = next_scratch_gr_reg ();
3508 alt_reg = gen_rtx_REG (DImode, alt_regno);
3509 reg = gen_rtx_REG (DImode, regno);
3510 emit_move_insn (alt_reg, reg);
3511 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3512 cfa_off -= 8;
3513 }
3514
3515 /* Align the frame and spill all FR registers. */
3516 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3517 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3518 {
3519 gcc_assert (!(cfa_off & 15));
3520 reg = gen_rtx_REG (XFmode, regno);
3521 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3522 cfa_off -= 16;
3523 }
3524
3525 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3526
3527 finish_spill_pointers ();
3528 }
3529
3530 /* Output the textual info surrounding the prologue. */
3531
3532 void
3533 ia64_start_function (FILE *file, const char *fnname,
3534 tree decl ATTRIBUTE_UNUSED)
3535 {
3536 #if VMS_DEBUGGING_INFO
3537 if (vms_debug_main
3538 && debug_info_level > DINFO_LEVEL_NONE
3539 && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
3540 {
3541 targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
3542 ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
3543 dwarf2out_vms_debug_main_pointer ();
3544 vms_debug_main = 0;
3545 }
3546 #endif
3547
3548 fputs ("\t.proc ", file);
3549 assemble_name (file, fnname);
3550 fputc ('\n', file);
3551 ASM_OUTPUT_LABEL (file, fnname);
3552 }
3553
3554 /* Called after register allocation to add any instructions needed for the
3555 epilogue. Using an epilogue insn is favored compared to putting all of the
3556 instructions in output_function_prologue(), since it allows the scheduler
3557 to intermix instructions with the saves of the caller saved registers. In
3558 some cases, it might be necessary to emit a barrier instruction as the last
3559 insn to prevent such scheduling. */
3560
3561 void
3562 ia64_expand_epilogue (int sibcall_p)
3563 {
3564 rtx insn, reg, alt_reg, ar_unat_save_reg;
3565 int regno, alt_regno, cfa_off;
3566
3567 ia64_compute_frame_size (get_frame_size ());
3568
3569 /* If there is a frame pointer, then we use it instead of the stack
3570 pointer, so that the stack pointer does not need to be valid when
3571 the epilogue starts. See EXIT_IGNORE_STACK. */
3572 if (frame_pointer_needed)
3573 setup_spill_pointers (current_frame_info.n_spilled,
3574 hard_frame_pointer_rtx, 0);
3575 else
3576 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3577 current_frame_info.total_size);
3578
3579 if (current_frame_info.total_size != 0)
3580 {
3581 /* ??? At this point we must generate a magic insn that appears to
3582 modify the spill iterators and the frame pointer. This would
3583 allow the most scheduling freedom. For now, just hard stop. */
3584 emit_insn (gen_blockage ());
3585 }
3586
3587 /* Locate the bottom of the register save area. */
3588 cfa_off = (current_frame_info.spill_cfa_off
3589 + current_frame_info.spill_size
3590 + current_frame_info.extra_spill_size);
3591
3592 /* Restore the predicate registers. */
3593 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3594 {
3595 if (current_frame_info.r[reg_save_pr] != 0)
3596 {
3597 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3598 reg_emitted (reg_save_pr);
3599 }
3600 else
3601 {
3602 alt_regno = next_scratch_gr_reg ();
3603 alt_reg = gen_rtx_REG (DImode, alt_regno);
3604 do_restore (gen_movdi_x, alt_reg, cfa_off);
3605 cfa_off -= 8;
3606 }
3607 reg = gen_rtx_REG (DImode, PR_REG (0));
3608 emit_move_insn (reg, alt_reg);
3609 }
3610
3611 /* Restore the application registers. */
3612
3613 /* Load the saved unat from the stack, but do not restore it until
3614 after the GRs have been restored. */
3615 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3616 {
3617 if (current_frame_info.r[reg_save_ar_unat] != 0)
3618 {
3619 ar_unat_save_reg
3620 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3621 reg_emitted (reg_save_ar_unat);
3622 }
3623 else
3624 {
3625 alt_regno = next_scratch_gr_reg ();
3626 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3627 current_frame_info.gr_used_mask |= 1 << alt_regno;
3628 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3629 cfa_off -= 8;
3630 }
3631 }
3632 else
3633 ar_unat_save_reg = NULL_RTX;
3634
3635 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3636 {
3637 reg_emitted (reg_save_ar_pfs);
3638 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3639 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3640 emit_move_insn (reg, alt_reg);
3641 }
3642 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3643 {
3644 alt_regno = next_scratch_gr_reg ();
3645 alt_reg = gen_rtx_REG (DImode, alt_regno);
3646 do_restore (gen_movdi_x, alt_reg, cfa_off);
3647 cfa_off -= 8;
3648 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3649 emit_move_insn (reg, alt_reg);
3650 }
3651
3652 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3653 {
3654 if (current_frame_info.r[reg_save_ar_lc] != 0)
3655 {
3656 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3657 reg_emitted (reg_save_ar_lc);
3658 }
3659 else
3660 {
3661 alt_regno = next_scratch_gr_reg ();
3662 alt_reg = gen_rtx_REG (DImode, alt_regno);
3663 do_restore (gen_movdi_x, alt_reg, cfa_off);
3664 cfa_off -= 8;
3665 }
3666 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3667 emit_move_insn (reg, alt_reg);
3668 }
3669
3670 /* Restore the return pointer. */
3671 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3672 {
3673 if (current_frame_info.r[reg_save_b0] != 0)
3674 {
3675 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3676 reg_emitted (reg_save_b0);
3677 }
3678 else
3679 {
3680 alt_regno = next_scratch_gr_reg ();
3681 alt_reg = gen_rtx_REG (DImode, alt_regno);
3682 do_restore (gen_movdi_x, alt_reg, cfa_off);
3683 cfa_off -= 8;
3684 }
3685 reg = gen_rtx_REG (DImode, BR_REG (0));
3686 emit_move_insn (reg, alt_reg);
3687 }
3688
3689 /* We should now be at the base of the gr/br/fr spill area. */
3690 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3691 + current_frame_info.spill_size));
3692
3693 /* The GP may be stored on the stack in the prologue, but it's
3694 never restored in the epilogue. Skip the stack slot. */
3695 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3696 cfa_off -= 8;
3697
3698 /* Restore all general registers. */
3699 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3700 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3701 {
3702 reg = gen_rtx_REG (DImode, regno);
3703 do_restore (gen_gr_restore, reg, cfa_off);
3704 cfa_off -= 8;
3705 }
3706
3707 /* Restore the branch registers. */
3708 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3709 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3710 {
3711 alt_regno = next_scratch_gr_reg ();
3712 alt_reg = gen_rtx_REG (DImode, alt_regno);
3713 do_restore (gen_movdi_x, alt_reg, cfa_off);
3714 cfa_off -= 8;
3715 reg = gen_rtx_REG (DImode, regno);
3716 emit_move_insn (reg, alt_reg);
3717 }
3718
3719 /* Restore floating point registers. */
3720 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3721 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3722 {
3723 gcc_assert (!(cfa_off & 15));
3724 reg = gen_rtx_REG (XFmode, regno);
3725 do_restore (gen_fr_restore_x, reg, cfa_off);
3726 cfa_off -= 16;
3727 }
3728
3729 /* Restore ar.unat for real. */
3730 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3731 {
3732 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3733 emit_move_insn (reg, ar_unat_save_reg);
3734 }
3735
3736 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3737
3738 finish_spill_pointers ();
3739
3740 if (current_frame_info.total_size
3741 || cfun->machine->ia64_eh_epilogue_sp
3742 || frame_pointer_needed)
3743 {
3744 /* ??? At this point we must generate a magic insn that appears to
3745 modify the spill iterators, the stack pointer, and the frame
3746 pointer. This would allow the most scheduling freedom. For now,
3747 just hard stop. */
3748 emit_insn (gen_blockage ());
3749 }
3750
3751 if (cfun->machine->ia64_eh_epilogue_sp)
3752 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3753 else if (frame_pointer_needed)
3754 {
3755 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3756 RTX_FRAME_RELATED_P (insn) = 1;
3757 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
3758 }
3759 else if (current_frame_info.total_size)
3760 {
3761 rtx offset, frame_size_rtx;
3762
3763 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3764 if (satisfies_constraint_I (frame_size_rtx))
3765 offset = frame_size_rtx;
3766 else
3767 {
3768 regno = next_scratch_gr_reg ();
3769 offset = gen_rtx_REG (DImode, regno);
3770 emit_move_insn (offset, frame_size_rtx);
3771 }
3772
3773 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3774 offset));
3775
3776 RTX_FRAME_RELATED_P (insn) = 1;
3777 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3778 gen_rtx_SET (VOIDmode,
3779 stack_pointer_rtx,
3780 gen_rtx_PLUS (DImode,
3781 stack_pointer_rtx,
3782 frame_size_rtx)));
3783 }
3784
3785 if (cfun->machine->ia64_eh_epilogue_bsp)
3786 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3787
3788 if (! sibcall_p)
3789 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3790 else
3791 {
3792 int fp = GR_REG (2);
3793 /* We need a throw away register here, r0 and r1 are reserved,
3794 so r2 is the first available call clobbered register. If
3795 there was a frame_pointer register, we may have swapped the
3796 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
3797 sure we're using the string "r2" when emitting the register
3798 name for the assembler. */
3799 if (current_frame_info.r[reg_fp]
3800 && current_frame_info.r[reg_fp] == GR_REG (2))
3801 fp = HARD_FRAME_POINTER_REGNUM;
3802
3803 /* We must emit an alloc to force the input registers to become output
3804 registers. Otherwise, if the callee tries to pass its parameters
3805 through to another call without an intervening alloc, then these
3806 values get lost. */
3807 /* ??? We don't need to preserve all input registers. We only need to
3808 preserve those input registers used as arguments to the sibling call.
3809 It is unclear how to compute that number here. */
3810 if (current_frame_info.n_input_regs != 0)
3811 {
3812 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3813 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3814 const0_rtx, const0_rtx,
3815 n_inputs, const0_rtx));
3816 RTX_FRAME_RELATED_P (insn) = 1;
3817 }
3818 }
3819 }
3820
3821 /* Return 1 if br.ret can do all the work required to return from a
3822 function. */
3823
3824 int
3825 ia64_direct_return (void)
3826 {
3827 if (reload_completed && ! frame_pointer_needed)
3828 {
3829 ia64_compute_frame_size (get_frame_size ());
3830
3831 return (current_frame_info.total_size == 0
3832 && current_frame_info.n_spilled == 0
3833 && current_frame_info.r[reg_save_b0] == 0
3834 && current_frame_info.r[reg_save_pr] == 0
3835 && current_frame_info.r[reg_save_ar_pfs] == 0
3836 && current_frame_info.r[reg_save_ar_unat] == 0
3837 && current_frame_info.r[reg_save_ar_lc] == 0);
3838 }
3839 return 0;
3840 }
3841
3842 /* Return the magic cookie that we use to hold the return address
3843 during early compilation. */
3844
3845 rtx
3846 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3847 {
3848 if (count != 0)
3849 return NULL;
3850 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3851 }
3852
3853 /* Split this value after reload, now that we know where the return
3854 address is saved. */
3855
3856 void
3857 ia64_split_return_addr_rtx (rtx dest)
3858 {
3859 rtx src;
3860
3861 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3862 {
3863 if (current_frame_info.r[reg_save_b0] != 0)
3864 {
3865 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3866 reg_emitted (reg_save_b0);
3867 }
3868 else
3869 {
3870 HOST_WIDE_INT off;
3871 unsigned int regno;
3872 rtx off_r;
3873
3874 /* Compute offset from CFA for BR0. */
3875 /* ??? Must be kept in sync with ia64_expand_prologue. */
3876 off = (current_frame_info.spill_cfa_off
3877 + current_frame_info.spill_size);
3878 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3879 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3880 off -= 8;
3881
3882 /* Convert CFA offset to a register based offset. */
3883 if (frame_pointer_needed)
3884 src = hard_frame_pointer_rtx;
3885 else
3886 {
3887 src = stack_pointer_rtx;
3888 off += current_frame_info.total_size;
3889 }
3890
3891 /* Load address into scratch register. */
3892 off_r = GEN_INT (off);
3893 if (satisfies_constraint_I (off_r))
3894 emit_insn (gen_adddi3 (dest, src, off_r));
3895 else
3896 {
3897 emit_move_insn (dest, off_r);
3898 emit_insn (gen_adddi3 (dest, src, dest));
3899 }
3900
3901 src = gen_rtx_MEM (Pmode, dest);
3902 }
3903 }
3904 else
3905 src = gen_rtx_REG (DImode, BR_REG (0));
3906
3907 emit_move_insn (dest, src);
3908 }
3909
3910 int
3911 ia64_hard_regno_rename_ok (int from, int to)
3912 {
3913 /* Don't clobber any of the registers we reserved for the prologue. */
3914 unsigned int r;
3915
3916 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3917 if (to == current_frame_info.r[r]
3918 || from == current_frame_info.r[r]
3919 || to == emitted_frame_related_regs[r]
3920 || from == emitted_frame_related_regs[r])
3921 return 0;
3922
3923 /* Don't use output registers outside the register frame. */
3924 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3925 return 0;
3926
3927 /* Retain even/oddness on predicate register pairs. */
3928 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3929 return (from & 1) == (to & 1);
3930
3931 return 1;
3932 }
3933
3934 /* Target hook for assembling integer objects. Handle word-sized
3935 aligned objects and detect the cases when @fptr is needed. */
3936
3937 static bool
3938 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3939 {
3940 if (size == POINTER_SIZE / BITS_PER_UNIT
3941 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3942 && GET_CODE (x) == SYMBOL_REF
3943 && SYMBOL_REF_FUNCTION_P (x))
3944 {
3945 static const char * const directive[2][2] = {
3946 /* 64-bit pointer */ /* 32-bit pointer */
3947 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3948 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3949 };
3950 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3951 output_addr_const (asm_out_file, x);
3952 fputs (")\n", asm_out_file);
3953 return true;
3954 }
3955 return default_assemble_integer (x, size, aligned_p);
3956 }
3957
3958 /* Emit the function prologue. */
3959
3960 static void
3961 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3962 {
3963 int mask, grsave, grsave_prev;
3964
3965 if (current_frame_info.need_regstk)
3966 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3967 current_frame_info.n_input_regs,
3968 current_frame_info.n_local_regs,
3969 current_frame_info.n_output_regs,
3970 current_frame_info.n_rotate_regs);
3971
3972 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
3973 return;
3974
3975 /* Emit the .prologue directive. */
3976
3977 mask = 0;
3978 grsave = grsave_prev = 0;
3979 if (current_frame_info.r[reg_save_b0] != 0)
3980 {
3981 mask |= 8;
3982 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3983 }
3984 if (current_frame_info.r[reg_save_ar_pfs] != 0
3985 && (grsave_prev == 0
3986 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3987 {
3988 mask |= 4;
3989 if (grsave_prev == 0)
3990 grsave = current_frame_info.r[reg_save_ar_pfs];
3991 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3992 }
3993 if (current_frame_info.r[reg_fp] != 0
3994 && (grsave_prev == 0
3995 || current_frame_info.r[reg_fp] == grsave_prev + 1))
3996 {
3997 mask |= 2;
3998 if (grsave_prev == 0)
3999 grsave = HARD_FRAME_POINTER_REGNUM;
4000 grsave_prev = current_frame_info.r[reg_fp];
4001 }
4002 if (current_frame_info.r[reg_save_pr] != 0
4003 && (grsave_prev == 0
4004 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4005 {
4006 mask |= 1;
4007 if (grsave_prev == 0)
4008 grsave = current_frame_info.r[reg_save_pr];
4009 }
4010
4011 if (mask && TARGET_GNU_AS)
4012 fprintf (file, "\t.prologue %d, %d\n", mask,
4013 ia64_dbx_register_number (grsave));
4014 else
4015 fputs ("\t.prologue\n", file);
4016
4017 /* Emit a .spill directive, if necessary, to relocate the base of
4018 the register spill area. */
4019 if (current_frame_info.spill_cfa_off != -16)
4020 fprintf (file, "\t.spill %ld\n",
4021 (long) (current_frame_info.spill_cfa_off
4022 + current_frame_info.spill_size));
4023 }
4024
4025 /* Emit the .body directive at the scheduled end of the prologue. */
4026
4027 static void
4028 ia64_output_function_end_prologue (FILE *file)
4029 {
4030 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4031 return;
4032
4033 fputs ("\t.body\n", file);
4034 }
4035
4036 /* Emit the function epilogue. */
4037
4038 static void
4039 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4040 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4041 {
4042 int i;
4043
4044 if (current_frame_info.r[reg_fp])
4045 {
4046 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4047 reg_names[HARD_FRAME_POINTER_REGNUM]
4048 = reg_names[current_frame_info.r[reg_fp]];
4049 reg_names[current_frame_info.r[reg_fp]] = tmp;
4050 reg_emitted (reg_fp);
4051 }
4052 if (! TARGET_REG_NAMES)
4053 {
4054 for (i = 0; i < current_frame_info.n_input_regs; i++)
4055 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4056 for (i = 0; i < current_frame_info.n_local_regs; i++)
4057 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4058 for (i = 0; i < current_frame_info.n_output_regs; i++)
4059 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4060 }
4061
4062 current_frame_info.initialized = 0;
4063 }
4064
4065 int
4066 ia64_dbx_register_number (int regno)
4067 {
4068 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4069 from its home at loc79 to something inside the register frame. We
4070 must perform the same renumbering here for the debug info. */
4071 if (current_frame_info.r[reg_fp])
4072 {
4073 if (regno == HARD_FRAME_POINTER_REGNUM)
4074 regno = current_frame_info.r[reg_fp];
4075 else if (regno == current_frame_info.r[reg_fp])
4076 regno = HARD_FRAME_POINTER_REGNUM;
4077 }
4078
4079 if (IN_REGNO_P (regno))
4080 return 32 + regno - IN_REG (0);
4081 else if (LOC_REGNO_P (regno))
4082 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4083 else if (OUT_REGNO_P (regno))
4084 return (32 + current_frame_info.n_input_regs
4085 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4086 else
4087 return regno;
4088 }
4089
4090 /* Implement TARGET_TRAMPOLINE_INIT.
4091
4092 The trampoline should set the static chain pointer to value placed
4093 into the trampoline and should branch to the specified routine.
4094 To make the normal indirect-subroutine calling convention work,
4095 the trampoline must look like a function descriptor; the first
4096 word being the target address and the second being the target's
4097 global pointer.
4098
4099 We abuse the concept of a global pointer by arranging for it
4100 to point to the data we need to load. The complete trampoline
4101 has the following form:
4102
4103 +-------------------+ \
4104 TRAMP: | __ia64_trampoline | |
4105 +-------------------+ > fake function descriptor
4106 | TRAMP+16 | |
4107 +-------------------+ /
4108 | target descriptor |
4109 +-------------------+
4110 | static link |
4111 +-------------------+
4112 */
4113
4114 static void
4115 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4116 {
4117 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4118 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4119
4120 /* The Intel assembler requires that the global __ia64_trampoline symbol
4121 be declared explicitly */
4122 if (!TARGET_GNU_AS)
4123 {
4124 static bool declared_ia64_trampoline = false;
4125
4126 if (!declared_ia64_trampoline)
4127 {
4128 declared_ia64_trampoline = true;
4129 (*targetm.asm_out.globalize_label) (asm_out_file,
4130 "__ia64_trampoline");
4131 }
4132 }
4133
4134 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4135 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4136 fnaddr = convert_memory_address (Pmode, fnaddr);
4137 static_chain = convert_memory_address (Pmode, static_chain);
4138
4139 /* Load up our iterator. */
4140 addr_reg = copy_to_reg (addr);
4141 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4142
4143 /* The first two words are the fake descriptor:
4144 __ia64_trampoline, ADDR+16. */
4145 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4146 if (TARGET_ABI_OPEN_VMS)
4147 {
4148 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4149 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4150 relocation against function symbols to make it identical to the
4151 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4152 strict ELF and dereference to get the bare code address. */
4153 rtx reg = gen_reg_rtx (Pmode);
4154 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4155 emit_move_insn (reg, tramp);
4156 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4157 tramp = reg;
4158 }
4159 emit_move_insn (m_tramp, tramp);
4160 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4161 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4162
4163 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4164 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4165 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4166
4167 /* The third word is the target descriptor. */
4168 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4169 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4170 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4171
4172 /* The fourth word is the static chain. */
4173 emit_move_insn (m_tramp, static_chain);
4174 }
4175 \f
4176 /* Do any needed setup for a variadic function. CUM has not been updated
4177 for the last named argument which has type TYPE and mode MODE.
4178
4179 We generate the actual spill instructions during prologue generation. */
4180
4181 static void
4182 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4183 tree type, int * pretend_size,
4184 int second_time ATTRIBUTE_UNUSED)
4185 {
4186 CUMULATIVE_ARGS next_cum = *cum;
4187
4188 /* Skip the current argument. */
4189 ia64_function_arg_advance (&next_cum, mode, type, 1);
4190
4191 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4192 {
4193 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4194 *pretend_size = n * UNITS_PER_WORD;
4195 cfun->machine->n_varargs = n;
4196 }
4197 }
4198
4199 /* Check whether TYPE is a homogeneous floating point aggregate. If
4200 it is, return the mode of the floating point type that appears
4201 in all leafs. If it is not, return VOIDmode.
4202
4203 An aggregate is a homogeneous floating point aggregate is if all
4204 fields/elements in it have the same floating point type (e.g,
4205 SFmode). 128-bit quad-precision floats are excluded.
4206
4207 Variable sized aggregates should never arrive here, since we should
4208 have already decided to pass them by reference. Top-level zero-sized
4209 aggregates are excluded because our parallels crash the middle-end. */
4210
4211 static enum machine_mode
4212 hfa_element_mode (const_tree type, bool nested)
4213 {
4214 enum machine_mode element_mode = VOIDmode;
4215 enum machine_mode mode;
4216 enum tree_code code = TREE_CODE (type);
4217 int know_element_mode = 0;
4218 tree t;
4219
4220 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4221 return VOIDmode;
4222
4223 switch (code)
4224 {
4225 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4226 case BOOLEAN_TYPE: case POINTER_TYPE:
4227 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4228 case LANG_TYPE: case FUNCTION_TYPE:
4229 return VOIDmode;
4230
4231 /* Fortran complex types are supposed to be HFAs, so we need to handle
4232 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4233 types though. */
4234 case COMPLEX_TYPE:
4235 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4236 && TYPE_MODE (type) != TCmode)
4237 return GET_MODE_INNER (TYPE_MODE (type));
4238 else
4239 return VOIDmode;
4240
4241 case REAL_TYPE:
4242 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4243 mode if this is contained within an aggregate. */
4244 if (nested && TYPE_MODE (type) != TFmode)
4245 return TYPE_MODE (type);
4246 else
4247 return VOIDmode;
4248
4249 case ARRAY_TYPE:
4250 return hfa_element_mode (TREE_TYPE (type), 1);
4251
4252 case RECORD_TYPE:
4253 case UNION_TYPE:
4254 case QUAL_UNION_TYPE:
4255 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4256 {
4257 if (TREE_CODE (t) != FIELD_DECL)
4258 continue;
4259
4260 mode = hfa_element_mode (TREE_TYPE (t), 1);
4261 if (know_element_mode)
4262 {
4263 if (mode != element_mode)
4264 return VOIDmode;
4265 }
4266 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4267 return VOIDmode;
4268 else
4269 {
4270 know_element_mode = 1;
4271 element_mode = mode;
4272 }
4273 }
4274 return element_mode;
4275
4276 default:
4277 /* If we reach here, we probably have some front-end specific type
4278 that the backend doesn't know about. This can happen via the
4279 aggregate_value_p call in init_function_start. All we can do is
4280 ignore unknown tree types. */
4281 return VOIDmode;
4282 }
4283
4284 return VOIDmode;
4285 }
4286
4287 /* Return the number of words required to hold a quantity of TYPE and MODE
4288 when passed as an argument. */
4289 static int
4290 ia64_function_arg_words (const_tree type, enum machine_mode mode)
4291 {
4292 int words;
4293
4294 if (mode == BLKmode)
4295 words = int_size_in_bytes (type);
4296 else
4297 words = GET_MODE_SIZE (mode);
4298
4299 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4300 }
4301
4302 /* Return the number of registers that should be skipped so the current
4303 argument (described by TYPE and WORDS) will be properly aligned.
4304
4305 Integer and float arguments larger than 8 bytes start at the next
4306 even boundary. Aggregates larger than 8 bytes start at the next
4307 even boundary if the aggregate has 16 byte alignment. Note that
4308 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4309 but are still to be aligned in registers.
4310
4311 ??? The ABI does not specify how to handle aggregates with
4312 alignment from 9 to 15 bytes, or greater than 16. We handle them
4313 all as if they had 16 byte alignment. Such aggregates can occur
4314 only if gcc extensions are used. */
4315 static int
4316 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4317 const_tree type, int words)
4318 {
4319 /* No registers are skipped on VMS. */
4320 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4321 return 0;
4322
4323 if (type
4324 && TREE_CODE (type) != INTEGER_TYPE
4325 && TREE_CODE (type) != REAL_TYPE)
4326 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4327 else
4328 return words > 1;
4329 }
4330
4331 /* Return rtx for register where argument is passed, or zero if it is passed
4332 on the stack. */
4333 /* ??? 128-bit quad-precision floats are always passed in general
4334 registers. */
4335
4336 static rtx
4337 ia64_function_arg_1 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
4338 const_tree type, bool named, bool incoming)
4339 {
4340 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4341 int words = ia64_function_arg_words (type, mode);
4342 int offset = ia64_function_arg_offset (cum, type, words);
4343 enum machine_mode hfa_mode = VOIDmode;
4344
4345 /* For OPEN VMS, emit the instruction setting up the argument register here,
4346 when we know this will be together with the other arguments setup related
4347 insns. This is not the conceptually best place to do this, but this is
4348 the easiest as we have convenient access to cumulative args info. */
4349
4350 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4351 && named == 1)
4352 {
4353 unsigned HOST_WIDE_INT regval = cum->words;
4354 int i;
4355
4356 for (i = 0; i < 8; i++)
4357 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4358
4359 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4360 GEN_INT (regval));
4361 }
4362
4363 /* If all argument slots are used, then it must go on the stack. */
4364 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4365 return 0;
4366
4367 /* Check for and handle homogeneous FP aggregates. */
4368 if (type)
4369 hfa_mode = hfa_element_mode (type, 0);
4370
4371 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4372 and unprototyped hfas are passed specially. */
4373 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4374 {
4375 rtx loc[16];
4376 int i = 0;
4377 int fp_regs = cum->fp_regs;
4378 int int_regs = cum->words + offset;
4379 int hfa_size = GET_MODE_SIZE (hfa_mode);
4380 int byte_size;
4381 int args_byte_size;
4382
4383 /* If prototyped, pass it in FR regs then GR regs.
4384 If not prototyped, pass it in both FR and GR regs.
4385
4386 If this is an SFmode aggregate, then it is possible to run out of
4387 FR regs while GR regs are still left. In that case, we pass the
4388 remaining part in the GR regs. */
4389
4390 /* Fill the FP regs. We do this always. We stop if we reach the end
4391 of the argument, the last FP register, or the last argument slot. */
4392
4393 byte_size = ((mode == BLKmode)
4394 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4395 args_byte_size = int_regs * UNITS_PER_WORD;
4396 offset = 0;
4397 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4398 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4399 {
4400 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4401 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4402 + fp_regs)),
4403 GEN_INT (offset));
4404 offset += hfa_size;
4405 args_byte_size += hfa_size;
4406 fp_regs++;
4407 }
4408
4409 /* If no prototype, then the whole thing must go in GR regs. */
4410 if (! cum->prototype)
4411 offset = 0;
4412 /* If this is an SFmode aggregate, then we might have some left over
4413 that needs to go in GR regs. */
4414 else if (byte_size != offset)
4415 int_regs += offset / UNITS_PER_WORD;
4416
4417 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4418
4419 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4420 {
4421 enum machine_mode gr_mode = DImode;
4422 unsigned int gr_size;
4423
4424 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4425 then this goes in a GR reg left adjusted/little endian, right
4426 adjusted/big endian. */
4427 /* ??? Currently this is handled wrong, because 4-byte hunks are
4428 always right adjusted/little endian. */
4429 if (offset & 0x4)
4430 gr_mode = SImode;
4431 /* If we have an even 4 byte hunk because the aggregate is a
4432 multiple of 4 bytes in size, then this goes in a GR reg right
4433 adjusted/little endian. */
4434 else if (byte_size - offset == 4)
4435 gr_mode = SImode;
4436
4437 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4438 gen_rtx_REG (gr_mode, (basereg
4439 + int_regs)),
4440 GEN_INT (offset));
4441
4442 gr_size = GET_MODE_SIZE (gr_mode);
4443 offset += gr_size;
4444 if (gr_size == UNITS_PER_WORD
4445 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4446 int_regs++;
4447 else if (gr_size > UNITS_PER_WORD)
4448 int_regs += gr_size / UNITS_PER_WORD;
4449 }
4450 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4451 }
4452
4453 /* On OpenVMS variable argument is either in Rn or Fn. */
4454 else if (TARGET_ABI_OPEN_VMS && named == 0)
4455 {
4456 if (FLOAT_MODE_P (mode))
4457 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4458 else
4459 return gen_rtx_REG (mode, basereg + cum->words);
4460 }
4461
4462 /* Integral and aggregates go in general registers. If we have run out of
4463 FR registers, then FP values must also go in general registers. This can
4464 happen when we have a SFmode HFA. */
4465 else if (mode == TFmode || mode == TCmode
4466 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4467 {
4468 int byte_size = ((mode == BLKmode)
4469 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4470 if (BYTES_BIG_ENDIAN
4471 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4472 && byte_size < UNITS_PER_WORD
4473 && byte_size > 0)
4474 {
4475 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4476 gen_rtx_REG (DImode,
4477 (basereg + cum->words
4478 + offset)),
4479 const0_rtx);
4480 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4481 }
4482 else
4483 return gen_rtx_REG (mode, basereg + cum->words + offset);
4484
4485 }
4486
4487 /* If there is a prototype, then FP values go in a FR register when
4488 named, and in a GR register when unnamed. */
4489 else if (cum->prototype)
4490 {
4491 if (named)
4492 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4493 /* In big-endian mode, an anonymous SFmode value must be represented
4494 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4495 the value into the high half of the general register. */
4496 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4497 return gen_rtx_PARALLEL (mode,
4498 gen_rtvec (1,
4499 gen_rtx_EXPR_LIST (VOIDmode,
4500 gen_rtx_REG (DImode, basereg + cum->words + offset),
4501 const0_rtx)));
4502 else
4503 return gen_rtx_REG (mode, basereg + cum->words + offset);
4504 }
4505 /* If there is no prototype, then FP values go in both FR and GR
4506 registers. */
4507 else
4508 {
4509 /* See comment above. */
4510 enum machine_mode inner_mode =
4511 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4512
4513 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4514 gen_rtx_REG (mode, (FR_ARG_FIRST
4515 + cum->fp_regs)),
4516 const0_rtx);
4517 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4518 gen_rtx_REG (inner_mode,
4519 (basereg + cum->words
4520 + offset)),
4521 const0_rtx);
4522
4523 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4524 }
4525 }
4526
4527 /* Implement TARGET_FUNCION_ARG target hook. */
4528
4529 static rtx
4530 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4531 const_tree type, bool named)
4532 {
4533 return ia64_function_arg_1 (cum, mode, type, named, false);
4534 }
4535
4536 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4537
4538 static rtx
4539 ia64_function_incoming_arg (CUMULATIVE_ARGS *cum,
4540 enum machine_mode mode,
4541 const_tree type, bool named)
4542 {
4543 return ia64_function_arg_1 (cum, mode, type, named, true);
4544 }
4545
4546 /* Return number of bytes, at the beginning of the argument, that must be
4547 put in registers. 0 is the argument is entirely in registers or entirely
4548 in memory. */
4549
4550 static int
4551 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4552 tree type, bool named ATTRIBUTE_UNUSED)
4553 {
4554 int words = ia64_function_arg_words (type, mode);
4555 int offset = ia64_function_arg_offset (cum, type, words);
4556
4557 /* If all argument slots are used, then it must go on the stack. */
4558 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4559 return 0;
4560
4561 /* It doesn't matter whether the argument goes in FR or GR regs. If
4562 it fits within the 8 argument slots, then it goes entirely in
4563 registers. If it extends past the last argument slot, then the rest
4564 goes on the stack. */
4565
4566 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4567 return 0;
4568
4569 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4570 }
4571
4572 /* Return ivms_arg_type based on machine_mode. */
4573
4574 static enum ivms_arg_type
4575 ia64_arg_type (enum machine_mode mode)
4576 {
4577 switch (mode)
4578 {
4579 case SFmode:
4580 return FS;
4581 case DFmode:
4582 return FT;
4583 default:
4584 return I64;
4585 }
4586 }
4587
4588 /* Update CUM to point after this argument. This is patterned after
4589 ia64_function_arg. */
4590
4591 static void
4592 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4593 const_tree type, bool named)
4594 {
4595 int words = ia64_function_arg_words (type, mode);
4596 int offset = ia64_function_arg_offset (cum, type, words);
4597 enum machine_mode hfa_mode = VOIDmode;
4598
4599 /* If all arg slots are already full, then there is nothing to do. */
4600 if (cum->words >= MAX_ARGUMENT_SLOTS)
4601 {
4602 cum->words += words + offset;
4603 return;
4604 }
4605
4606 cum->atypes[cum->words] = ia64_arg_type (mode);
4607 cum->words += words + offset;
4608
4609 /* Check for and handle homogeneous FP aggregates. */
4610 if (type)
4611 hfa_mode = hfa_element_mode (type, 0);
4612
4613 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4614 and unprototyped hfas are passed specially. */
4615 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4616 {
4617 int fp_regs = cum->fp_regs;
4618 /* This is the original value of cum->words + offset. */
4619 int int_regs = cum->words - words;
4620 int hfa_size = GET_MODE_SIZE (hfa_mode);
4621 int byte_size;
4622 int args_byte_size;
4623
4624 /* If prototyped, pass it in FR regs then GR regs.
4625 If not prototyped, pass it in both FR and GR regs.
4626
4627 If this is an SFmode aggregate, then it is possible to run out of
4628 FR regs while GR regs are still left. In that case, we pass the
4629 remaining part in the GR regs. */
4630
4631 /* Fill the FP regs. We do this always. We stop if we reach the end
4632 of the argument, the last FP register, or the last argument slot. */
4633
4634 byte_size = ((mode == BLKmode)
4635 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4636 args_byte_size = int_regs * UNITS_PER_WORD;
4637 offset = 0;
4638 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4639 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4640 {
4641 offset += hfa_size;
4642 args_byte_size += hfa_size;
4643 fp_regs++;
4644 }
4645
4646 cum->fp_regs = fp_regs;
4647 }
4648
4649 /* On OpenVMS variable argument is either in Rn or Fn. */
4650 else if (TARGET_ABI_OPEN_VMS && named == 0)
4651 {
4652 cum->int_regs = cum->words;
4653 cum->fp_regs = cum->words;
4654 }
4655
4656 /* Integral and aggregates go in general registers. So do TFmode FP values.
4657 If we have run out of FR registers, then other FP values must also go in
4658 general registers. This can happen when we have a SFmode HFA. */
4659 else if (mode == TFmode || mode == TCmode
4660 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4661 cum->int_regs = cum->words;
4662
4663 /* If there is a prototype, then FP values go in a FR register when
4664 named, and in a GR register when unnamed. */
4665 else if (cum->prototype)
4666 {
4667 if (! named)
4668 cum->int_regs = cum->words;
4669 else
4670 /* ??? Complex types should not reach here. */
4671 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4672 }
4673 /* If there is no prototype, then FP values go in both FR and GR
4674 registers. */
4675 else
4676 {
4677 /* ??? Complex types should not reach here. */
4678 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4679 cum->int_regs = cum->words;
4680 }
4681 }
4682
4683 /* Arguments with alignment larger than 8 bytes start at the next even
4684 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4685 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4686
4687 static unsigned int
4688 ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
4689 {
4690 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4691 return PARM_BOUNDARY * 2;
4692
4693 if (type)
4694 {
4695 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4696 return PARM_BOUNDARY * 2;
4697 else
4698 return PARM_BOUNDARY;
4699 }
4700
4701 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4702 return PARM_BOUNDARY * 2;
4703 else
4704 return PARM_BOUNDARY;
4705 }
4706
4707 /* True if it is OK to do sibling call optimization for the specified
4708 call expression EXP. DECL will be the called function, or NULL if
4709 this is an indirect call. */
4710 static bool
4711 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4712 {
4713 /* We can't perform a sibcall if the current function has the syscall_linkage
4714 attribute. */
4715 if (lookup_attribute ("syscall_linkage",
4716 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4717 return false;
4718
4719 /* We must always return with our current GP. This means we can
4720 only sibcall to functions defined in the current module unless
4721 TARGET_CONST_GP is set to true. */
4722 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4723 }
4724 \f
4725
4726 /* Implement va_arg. */
4727
4728 static tree
4729 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4730 gimple_seq *post_p)
4731 {
4732 /* Variable sized types are passed by reference. */
4733 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4734 {
4735 tree ptrtype = build_pointer_type (type);
4736 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4737 return build_va_arg_indirect_ref (addr);
4738 }
4739
4740 /* Aggregate arguments with alignment larger than 8 bytes start at
4741 the next even boundary. Integer and floating point arguments
4742 do so if they are larger than 8 bytes, whether or not they are
4743 also aligned larger than 8 bytes. */
4744 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4745 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4746 {
4747 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4748 size_int (2 * UNITS_PER_WORD - 1));
4749 t = fold_convert (sizetype, t);
4750 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4751 size_int (-2 * UNITS_PER_WORD));
4752 t = fold_convert (TREE_TYPE (valist), t);
4753 gimplify_assign (unshare_expr (valist), t, pre_p);
4754 }
4755
4756 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4757 }
4758 \f
4759 /* Return 1 if function return value returned in memory. Return 0 if it is
4760 in a register. */
4761
4762 static bool
4763 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4764 {
4765 enum machine_mode mode;
4766 enum machine_mode hfa_mode;
4767 HOST_WIDE_INT byte_size;
4768
4769 mode = TYPE_MODE (valtype);
4770 byte_size = GET_MODE_SIZE (mode);
4771 if (mode == BLKmode)
4772 {
4773 byte_size = int_size_in_bytes (valtype);
4774 if (byte_size < 0)
4775 return true;
4776 }
4777
4778 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4779
4780 hfa_mode = hfa_element_mode (valtype, 0);
4781 if (hfa_mode != VOIDmode)
4782 {
4783 int hfa_size = GET_MODE_SIZE (hfa_mode);
4784
4785 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4786 return true;
4787 else
4788 return false;
4789 }
4790 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4791 return true;
4792 else
4793 return false;
4794 }
4795
4796 /* Return rtx for register that holds the function return value. */
4797
4798 static rtx
4799 ia64_function_value (const_tree valtype,
4800 const_tree fn_decl_or_type,
4801 bool outgoing ATTRIBUTE_UNUSED)
4802 {
4803 enum machine_mode mode;
4804 enum machine_mode hfa_mode;
4805 int unsignedp;
4806 const_tree func = fn_decl_or_type;
4807
4808 if (fn_decl_or_type
4809 && !DECL_P (fn_decl_or_type))
4810 func = NULL;
4811
4812 mode = TYPE_MODE (valtype);
4813 hfa_mode = hfa_element_mode (valtype, 0);
4814
4815 if (hfa_mode != VOIDmode)
4816 {
4817 rtx loc[8];
4818 int i;
4819 int hfa_size;
4820 int byte_size;
4821 int offset;
4822
4823 hfa_size = GET_MODE_SIZE (hfa_mode);
4824 byte_size = ((mode == BLKmode)
4825 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4826 offset = 0;
4827 for (i = 0; offset < byte_size; i++)
4828 {
4829 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4830 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4831 GEN_INT (offset));
4832 offset += hfa_size;
4833 }
4834 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4835 }
4836 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4837 return gen_rtx_REG (mode, FR_ARG_FIRST);
4838 else
4839 {
4840 bool need_parallel = false;
4841
4842 /* In big-endian mode, we need to manage the layout of aggregates
4843 in the registers so that we get the bits properly aligned in
4844 the highpart of the registers. */
4845 if (BYTES_BIG_ENDIAN
4846 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4847 need_parallel = true;
4848
4849 /* Something like struct S { long double x; char a[0] } is not an
4850 HFA structure, and therefore doesn't go in fp registers. But
4851 the middle-end will give it XFmode anyway, and XFmode values
4852 don't normally fit in integer registers. So we need to smuggle
4853 the value inside a parallel. */
4854 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4855 need_parallel = true;
4856
4857 if (need_parallel)
4858 {
4859 rtx loc[8];
4860 int offset;
4861 int bytesize;
4862 int i;
4863
4864 offset = 0;
4865 bytesize = int_size_in_bytes (valtype);
4866 /* An empty PARALLEL is invalid here, but the return value
4867 doesn't matter for empty structs. */
4868 if (bytesize == 0)
4869 return gen_rtx_REG (mode, GR_RET_FIRST);
4870 for (i = 0; offset < bytesize; i++)
4871 {
4872 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4873 gen_rtx_REG (DImode,
4874 GR_RET_FIRST + i),
4875 GEN_INT (offset));
4876 offset += UNITS_PER_WORD;
4877 }
4878 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4879 }
4880
4881 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4882 func ? TREE_TYPE (func) : NULL_TREE,
4883 true);
4884
4885 return gen_rtx_REG (mode, GR_RET_FIRST);
4886 }
4887 }
4888
4889 /* Worker function for TARGET_LIBCALL_VALUE. */
4890
4891 static rtx
4892 ia64_libcall_value (enum machine_mode mode,
4893 const_rtx fun ATTRIBUTE_UNUSED)
4894 {
4895 return gen_rtx_REG (mode,
4896 (((GET_MODE_CLASS (mode) == MODE_FLOAT
4897 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4898 && (mode) != TFmode)
4899 ? FR_RET_FIRST : GR_RET_FIRST));
4900 }
4901
4902 /* Worker function for FUNCTION_VALUE_REGNO_P. */
4903
4904 static bool
4905 ia64_function_value_regno_p (const unsigned int regno)
4906 {
4907 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
4908 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
4909 }
4910
4911 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4912 We need to emit DTP-relative relocations. */
4913
4914 static void
4915 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4916 {
4917 gcc_assert (size == 4 || size == 8);
4918 if (size == 4)
4919 fputs ("\tdata4.ua\t@dtprel(", file);
4920 else
4921 fputs ("\tdata8.ua\t@dtprel(", file);
4922 output_addr_const (file, x);
4923 fputs (")", file);
4924 }
4925
4926 /* Print a memory address as an operand to reference that memory location. */
4927
4928 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4929 also call this from ia64_print_operand for memory addresses. */
4930
4931 void
4932 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4933 rtx address ATTRIBUTE_UNUSED)
4934 {
4935 }
4936
4937 /* Print an operand to an assembler instruction.
4938 C Swap and print a comparison operator.
4939 D Print an FP comparison operator.
4940 E Print 32 - constant, for SImode shifts as extract.
4941 e Print 64 - constant, for DImode rotates.
4942 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4943 a floating point register emitted normally.
4944 G A floating point constant.
4945 I Invert a predicate register by adding 1.
4946 J Select the proper predicate register for a condition.
4947 j Select the inverse predicate register for a condition.
4948 O Append .acq for volatile load.
4949 P Postincrement of a MEM.
4950 Q Append .rel for volatile store.
4951 R Print .s .d or nothing for a single, double or no truncation.
4952 S Shift amount for shladd instruction.
4953 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4954 for Intel assembler.
4955 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4956 for Intel assembler.
4957 X A pair of floating point registers.
4958 r Print register name, or constant 0 as r0. HP compatibility for
4959 Linux kernel.
4960 v Print vector constant value as an 8-byte integer value. */
4961
4962 void
4963 ia64_print_operand (FILE * file, rtx x, int code)
4964 {
4965 const char *str;
4966
4967 switch (code)
4968 {
4969 case 0:
4970 /* Handled below. */
4971 break;
4972
4973 case 'C':
4974 {
4975 enum rtx_code c = swap_condition (GET_CODE (x));
4976 fputs (GET_RTX_NAME (c), file);
4977 return;
4978 }
4979
4980 case 'D':
4981 switch (GET_CODE (x))
4982 {
4983 case NE:
4984 str = "neq";
4985 break;
4986 case UNORDERED:
4987 str = "unord";
4988 break;
4989 case ORDERED:
4990 str = "ord";
4991 break;
4992 case UNLT:
4993 str = "nge";
4994 break;
4995 case UNLE:
4996 str = "ngt";
4997 break;
4998 case UNGT:
4999 str = "nle";
5000 break;
5001 case UNGE:
5002 str = "nlt";
5003 break;
5004 default:
5005 str = GET_RTX_NAME (GET_CODE (x));
5006 break;
5007 }
5008 fputs (str, file);
5009 return;
5010
5011 case 'E':
5012 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5013 return;
5014
5015 case 'e':
5016 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5017 return;
5018
5019 case 'F':
5020 if (x == CONST0_RTX (GET_MODE (x)))
5021 str = reg_names [FR_REG (0)];
5022 else if (x == CONST1_RTX (GET_MODE (x)))
5023 str = reg_names [FR_REG (1)];
5024 else
5025 {
5026 gcc_assert (GET_CODE (x) == REG);
5027 str = reg_names [REGNO (x)];
5028 }
5029 fputs (str, file);
5030 return;
5031
5032 case 'G':
5033 {
5034 long val[4];
5035 REAL_VALUE_TYPE rv;
5036 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5037 real_to_target (val, &rv, GET_MODE (x));
5038 if (GET_MODE (x) == SFmode)
5039 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5040 else if (GET_MODE (x) == DFmode)
5041 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5042 & 0xffffffff,
5043 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5044 & 0xffffffff);
5045 else
5046 output_operand_lossage ("invalid %%G mode");
5047 }
5048 return;
5049
5050 case 'I':
5051 fputs (reg_names [REGNO (x) + 1], file);
5052 return;
5053
5054 case 'J':
5055 case 'j':
5056 {
5057 unsigned int regno = REGNO (XEXP (x, 0));
5058 if (GET_CODE (x) == EQ)
5059 regno += 1;
5060 if (code == 'j')
5061 regno ^= 1;
5062 fputs (reg_names [regno], file);
5063 }
5064 return;
5065
5066 case 'O':
5067 if (MEM_VOLATILE_P (x))
5068 fputs(".acq", file);
5069 return;
5070
5071 case 'P':
5072 {
5073 HOST_WIDE_INT value;
5074
5075 switch (GET_CODE (XEXP (x, 0)))
5076 {
5077 default:
5078 return;
5079
5080 case POST_MODIFY:
5081 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5082 if (GET_CODE (x) == CONST_INT)
5083 value = INTVAL (x);
5084 else
5085 {
5086 gcc_assert (GET_CODE (x) == REG);
5087 fprintf (file, ", %s", reg_names[REGNO (x)]);
5088 return;
5089 }
5090 break;
5091
5092 case POST_INC:
5093 value = GET_MODE_SIZE (GET_MODE (x));
5094 break;
5095
5096 case POST_DEC:
5097 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5098 break;
5099 }
5100
5101 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5102 return;
5103 }
5104
5105 case 'Q':
5106 if (MEM_VOLATILE_P (x))
5107 fputs(".rel", file);
5108 return;
5109
5110 case 'R':
5111 if (x == CONST0_RTX (GET_MODE (x)))
5112 fputs(".s", file);
5113 else if (x == CONST1_RTX (GET_MODE (x)))
5114 fputs(".d", file);
5115 else if (x == CONST2_RTX (GET_MODE (x)))
5116 ;
5117 else
5118 output_operand_lossage ("invalid %%R value");
5119 return;
5120
5121 case 'S':
5122 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5123 return;
5124
5125 case 'T':
5126 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5127 {
5128 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5129 return;
5130 }
5131 break;
5132
5133 case 'U':
5134 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5135 {
5136 const char *prefix = "0x";
5137 if (INTVAL (x) & 0x80000000)
5138 {
5139 fprintf (file, "0xffffffff");
5140 prefix = "";
5141 }
5142 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5143 return;
5144 }
5145 break;
5146
5147 case 'X':
5148 {
5149 unsigned int regno = REGNO (x);
5150 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5151 }
5152 return;
5153
5154 case 'r':
5155 /* If this operand is the constant zero, write it as register zero.
5156 Any register, zero, or CONST_INT value is OK here. */
5157 if (GET_CODE (x) == REG)
5158 fputs (reg_names[REGNO (x)], file);
5159 else if (x == CONST0_RTX (GET_MODE (x)))
5160 fputs ("r0", file);
5161 else if (GET_CODE (x) == CONST_INT)
5162 output_addr_const (file, x);
5163 else
5164 output_operand_lossage ("invalid %%r value");
5165 return;
5166
5167 case 'v':
5168 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5169 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5170 break;
5171
5172 case '+':
5173 {
5174 const char *which;
5175
5176 /* For conditional branches, returns or calls, substitute
5177 sptk, dptk, dpnt, or spnt for %s. */
5178 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5179 if (x)
5180 {
5181 int pred_val = INTVAL (XEXP (x, 0));
5182
5183 /* Guess top and bottom 10% statically predicted. */
5184 if (pred_val < REG_BR_PROB_BASE / 50
5185 && br_prob_note_reliable_p (x))
5186 which = ".spnt";
5187 else if (pred_val < REG_BR_PROB_BASE / 2)
5188 which = ".dpnt";
5189 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5190 || !br_prob_note_reliable_p (x))
5191 which = ".dptk";
5192 else
5193 which = ".sptk";
5194 }
5195 else if (GET_CODE (current_output_insn) == CALL_INSN)
5196 which = ".sptk";
5197 else
5198 which = ".dptk";
5199
5200 fputs (which, file);
5201 return;
5202 }
5203
5204 case ',':
5205 x = current_insn_predicate;
5206 if (x)
5207 {
5208 unsigned int regno = REGNO (XEXP (x, 0));
5209 if (GET_CODE (x) == EQ)
5210 regno += 1;
5211 fprintf (file, "(%s) ", reg_names [regno]);
5212 }
5213 return;
5214
5215 default:
5216 output_operand_lossage ("ia64_print_operand: unknown code");
5217 return;
5218 }
5219
5220 switch (GET_CODE (x))
5221 {
5222 /* This happens for the spill/restore instructions. */
5223 case POST_INC:
5224 case POST_DEC:
5225 case POST_MODIFY:
5226 x = XEXP (x, 0);
5227 /* ... fall through ... */
5228
5229 case REG:
5230 fputs (reg_names [REGNO (x)], file);
5231 break;
5232
5233 case MEM:
5234 {
5235 rtx addr = XEXP (x, 0);
5236 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5237 addr = XEXP (addr, 0);
5238 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5239 break;
5240 }
5241
5242 default:
5243 output_addr_const (file, x);
5244 break;
5245 }
5246
5247 return;
5248 }
5249 \f
5250 /* Compute a (partial) cost for rtx X. Return true if the complete
5251 cost has been computed, and false if subexpressions should be
5252 scanned. In either case, *TOTAL contains the cost result. */
5253 /* ??? This is incomplete. */
5254
5255 static bool
5256 ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5257 bool speed ATTRIBUTE_UNUSED)
5258 {
5259 switch (code)
5260 {
5261 case CONST_INT:
5262 switch (outer_code)
5263 {
5264 case SET:
5265 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5266 return true;
5267 case PLUS:
5268 if (satisfies_constraint_I (x))
5269 *total = 0;
5270 else if (satisfies_constraint_J (x))
5271 *total = 1;
5272 else
5273 *total = COSTS_N_INSNS (1);
5274 return true;
5275 default:
5276 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5277 *total = 0;
5278 else
5279 *total = COSTS_N_INSNS (1);
5280 return true;
5281 }
5282
5283 case CONST_DOUBLE:
5284 *total = COSTS_N_INSNS (1);
5285 return true;
5286
5287 case CONST:
5288 case SYMBOL_REF:
5289 case LABEL_REF:
5290 *total = COSTS_N_INSNS (3);
5291 return true;
5292
5293 case FMA:
5294 *total = COSTS_N_INSNS (4);
5295 return true;
5296
5297 case MULT:
5298 /* For multiplies wider than HImode, we have to go to the FPU,
5299 which normally involves copies. Plus there's the latency
5300 of the multiply itself, and the latency of the instructions to
5301 transfer integer regs to FP regs. */
5302 if (FLOAT_MODE_P (GET_MODE (x)))
5303 *total = COSTS_N_INSNS (4);
5304 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5305 *total = COSTS_N_INSNS (10);
5306 else
5307 *total = COSTS_N_INSNS (2);
5308 return true;
5309
5310 case PLUS:
5311 case MINUS:
5312 if (FLOAT_MODE_P (GET_MODE (x)))
5313 {
5314 *total = COSTS_N_INSNS (4);
5315 return true;
5316 }
5317 /* FALLTHRU */
5318
5319 case ASHIFT:
5320 case ASHIFTRT:
5321 case LSHIFTRT:
5322 *total = COSTS_N_INSNS (1);
5323 return true;
5324
5325 case DIV:
5326 case UDIV:
5327 case MOD:
5328 case UMOD:
5329 /* We make divide expensive, so that divide-by-constant will be
5330 optimized to a multiply. */
5331 *total = COSTS_N_INSNS (60);
5332 return true;
5333
5334 default:
5335 return false;
5336 }
5337 }
5338
5339 /* Calculate the cost of moving data from a register in class FROM to
5340 one in class TO, using MODE. */
5341
5342 static int
5343 ia64_register_move_cost (enum machine_mode mode, reg_class_t from,
5344 reg_class_t to)
5345 {
5346 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5347 if (to == ADDL_REGS)
5348 to = GR_REGS;
5349 if (from == ADDL_REGS)
5350 from = GR_REGS;
5351
5352 /* All costs are symmetric, so reduce cases by putting the
5353 lower number class as the destination. */
5354 if (from < to)
5355 {
5356 reg_class_t tmp = to;
5357 to = from, from = tmp;
5358 }
5359
5360 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5361 so that we get secondary memory reloads. Between FR_REGS,
5362 we have to make this at least as expensive as memory_move_cost
5363 to avoid spectacularly poor register class preferencing. */
5364 if (mode == XFmode || mode == RFmode)
5365 {
5366 if (to != GR_REGS || from != GR_REGS)
5367 return memory_move_cost (mode, to, false);
5368 else
5369 return 3;
5370 }
5371
5372 switch (to)
5373 {
5374 case PR_REGS:
5375 /* Moving between PR registers takes two insns. */
5376 if (from == PR_REGS)
5377 return 3;
5378 /* Moving between PR and anything but GR is impossible. */
5379 if (from != GR_REGS)
5380 return memory_move_cost (mode, to, false);
5381 break;
5382
5383 case BR_REGS:
5384 /* Moving between BR and anything but GR is impossible. */
5385 if (from != GR_REGS && from != GR_AND_BR_REGS)
5386 return memory_move_cost (mode, to, false);
5387 break;
5388
5389 case AR_I_REGS:
5390 case AR_M_REGS:
5391 /* Moving between AR and anything but GR is impossible. */
5392 if (from != GR_REGS)
5393 return memory_move_cost (mode, to, false);
5394 break;
5395
5396 case GR_REGS:
5397 case FR_REGS:
5398 case FP_REGS:
5399 case GR_AND_FR_REGS:
5400 case GR_AND_BR_REGS:
5401 case ALL_REGS:
5402 break;
5403
5404 default:
5405 gcc_unreachable ();
5406 }
5407
5408 return 2;
5409 }
5410
5411 /* Calculate the cost of moving data of MODE from a register to or from
5412 memory. */
5413
5414 static int
5415 ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5416 reg_class_t rclass,
5417 bool in ATTRIBUTE_UNUSED)
5418 {
5419 if (rclass == GENERAL_REGS
5420 || rclass == FR_REGS
5421 || rclass == FP_REGS
5422 || rclass == GR_AND_FR_REGS)
5423 return 4;
5424 else
5425 return 10;
5426 }
5427
5428 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5429 on RCLASS to use when copying X into that class. */
5430
5431 static reg_class_t
5432 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5433 {
5434 switch (rclass)
5435 {
5436 case FR_REGS:
5437 case FP_REGS:
5438 /* Don't allow volatile mem reloads into floating point registers.
5439 This is defined to force reload to choose the r/m case instead
5440 of the f/f case when reloading (set (reg fX) (mem/v)). */
5441 if (MEM_P (x) && MEM_VOLATILE_P (x))
5442 return NO_REGS;
5443
5444 /* Force all unrecognized constants into the constant pool. */
5445 if (CONSTANT_P (x))
5446 return NO_REGS;
5447 break;
5448
5449 case AR_M_REGS:
5450 case AR_I_REGS:
5451 if (!OBJECT_P (x))
5452 return NO_REGS;
5453 break;
5454
5455 default:
5456 break;
5457 }
5458
5459 return rclass;
5460 }
5461
5462 /* This function returns the register class required for a secondary
5463 register when copying between one of the registers in RCLASS, and X,
5464 using MODE. A return value of NO_REGS means that no secondary register
5465 is required. */
5466
5467 enum reg_class
5468 ia64_secondary_reload_class (enum reg_class rclass,
5469 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5470 {
5471 int regno = -1;
5472
5473 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5474 regno = true_regnum (x);
5475
5476 switch (rclass)
5477 {
5478 case BR_REGS:
5479 case AR_M_REGS:
5480 case AR_I_REGS:
5481 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5482 interaction. We end up with two pseudos with overlapping lifetimes
5483 both of which are equiv to the same constant, and both which need
5484 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5485 changes depending on the path length, which means the qty_first_reg
5486 check in make_regs_eqv can give different answers at different times.
5487 At some point I'll probably need a reload_indi pattern to handle
5488 this.
5489
5490 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5491 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5492 non-general registers for good measure. */
5493 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5494 return GR_REGS;
5495
5496 /* This is needed if a pseudo used as a call_operand gets spilled to a
5497 stack slot. */
5498 if (GET_CODE (x) == MEM)
5499 return GR_REGS;
5500 break;
5501
5502 case FR_REGS:
5503 case FP_REGS:
5504 /* Need to go through general registers to get to other class regs. */
5505 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5506 return GR_REGS;
5507
5508 /* This can happen when a paradoxical subreg is an operand to the
5509 muldi3 pattern. */
5510 /* ??? This shouldn't be necessary after instruction scheduling is
5511 enabled, because paradoxical subregs are not accepted by
5512 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5513 stop the paradoxical subreg stupidity in the *_operand functions
5514 in recog.c. */
5515 if (GET_CODE (x) == MEM
5516 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5517 || GET_MODE (x) == QImode))
5518 return GR_REGS;
5519
5520 /* This can happen because of the ior/and/etc patterns that accept FP
5521 registers as operands. If the third operand is a constant, then it
5522 needs to be reloaded into a FP register. */
5523 if (GET_CODE (x) == CONST_INT)
5524 return GR_REGS;
5525
5526 /* This can happen because of register elimination in a muldi3 insn.
5527 E.g. `26107 * (unsigned long)&u'. */
5528 if (GET_CODE (x) == PLUS)
5529 return GR_REGS;
5530 break;
5531
5532 case PR_REGS:
5533 /* ??? This happens if we cse/gcse a BImode value across a call,
5534 and the function has a nonlocal goto. This is because global
5535 does not allocate call crossing pseudos to hard registers when
5536 crtl->has_nonlocal_goto is true. This is relatively
5537 common for C++ programs that use exceptions. To reproduce,
5538 return NO_REGS and compile libstdc++. */
5539 if (GET_CODE (x) == MEM)
5540 return GR_REGS;
5541
5542 /* This can happen when we take a BImode subreg of a DImode value,
5543 and that DImode value winds up in some non-GR register. */
5544 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5545 return GR_REGS;
5546 break;
5547
5548 default:
5549 break;
5550 }
5551
5552 return NO_REGS;
5553 }
5554
5555 \f
5556 /* Implement targetm.unspec_may_trap_p hook. */
5557 static int
5558 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5559 {
5560 if (GET_CODE (x) == UNSPEC)
5561 {
5562 switch (XINT (x, 1))
5563 {
5564 case UNSPEC_LDA:
5565 case UNSPEC_LDS:
5566 case UNSPEC_LDSA:
5567 case UNSPEC_LDCCLR:
5568 case UNSPEC_CHKACLR:
5569 case UNSPEC_CHKS:
5570 /* These unspecs are just wrappers. */
5571 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5572 }
5573 }
5574
5575 return default_unspec_may_trap_p (x, flags);
5576 }
5577
5578 \f
5579 /* Parse the -mfixed-range= option string. */
5580
5581 static void
5582 fix_range (const char *const_str)
5583 {
5584 int i, first, last;
5585 char *str, *dash, *comma;
5586
5587 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5588 REG2 are either register names or register numbers. The effect
5589 of this option is to mark the registers in the range from REG1 to
5590 REG2 as ``fixed'' so they won't be used by the compiler. This is
5591 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5592
5593 i = strlen (const_str);
5594 str = (char *) alloca (i + 1);
5595 memcpy (str, const_str, i + 1);
5596
5597 while (1)
5598 {
5599 dash = strchr (str, '-');
5600 if (!dash)
5601 {
5602 warning (0, "value of -mfixed-range must have form REG1-REG2");
5603 return;
5604 }
5605 *dash = '\0';
5606
5607 comma = strchr (dash + 1, ',');
5608 if (comma)
5609 *comma = '\0';
5610
5611 first = decode_reg_name (str);
5612 if (first < 0)
5613 {
5614 warning (0, "unknown register name: %s", str);
5615 return;
5616 }
5617
5618 last = decode_reg_name (dash + 1);
5619 if (last < 0)
5620 {
5621 warning (0, "unknown register name: %s", dash + 1);
5622 return;
5623 }
5624
5625 *dash = '-';
5626
5627 if (first > last)
5628 {
5629 warning (0, "%s-%s is an empty range", str, dash + 1);
5630 return;
5631 }
5632
5633 for (i = first; i <= last; ++i)
5634 fixed_regs[i] = call_used_regs[i] = 1;
5635
5636 if (!comma)
5637 break;
5638
5639 *comma = ',';
5640 str = comma + 1;
5641 }
5642 }
5643
5644 /* Implement TARGET_HANDLE_OPTION. */
5645
5646 static bool
5647 ia64_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
5648 struct gcc_options *opts_set ATTRIBUTE_UNUSED,
5649 const struct cl_decoded_option *decoded,
5650 location_t loc)
5651 {
5652 size_t code = decoded->opt_index;
5653 const char *arg = decoded->arg;
5654 int value = decoded->value;
5655
5656 switch (code)
5657 {
5658 case OPT_mtls_size_:
5659 if (value != 14 && value != 22 && value != 64)
5660 error_at (loc, "bad value %<%s%> for -mtls-size= switch", arg);
5661 return true;
5662
5663 default:
5664 return true;
5665 }
5666 }
5667
5668 /* Implement TARGET_OPTION_OVERRIDE. */
5669
5670 static void
5671 ia64_option_override (void)
5672 {
5673 unsigned int i;
5674 cl_deferred_option *opt;
5675 VEC(cl_deferred_option,heap) *vec
5676 = (VEC(cl_deferred_option,heap) *) ia64_deferred_options;
5677
5678 FOR_EACH_VEC_ELT (cl_deferred_option, vec, i, opt)
5679 {
5680 switch (opt->opt_index)
5681 {
5682 case OPT_mfixed_range_:
5683 fix_range (opt->arg);
5684 break;
5685
5686 default:
5687 gcc_unreachable ();
5688 }
5689 }
5690
5691 if (TARGET_AUTO_PIC)
5692 target_flags |= MASK_CONST_GP;
5693
5694 /* Numerous experiment shows that IRA based loop pressure
5695 calculation works better for RTL loop invariant motion on targets
5696 with enough (>= 32) registers. It is an expensive optimization.
5697 So it is on only for peak performance. */
5698 if (optimize >= 3)
5699 flag_ira_loop_pressure = 1;
5700
5701
5702 ia64_section_threshold = (global_options_set.x_g_switch_value
5703 ? g_switch_value
5704 : IA64_DEFAULT_GVALUE);
5705
5706 init_machine_status = ia64_init_machine_status;
5707
5708 if (align_functions <= 0)
5709 align_functions = 64;
5710 if (align_loops <= 0)
5711 align_loops = 32;
5712 if (TARGET_ABI_OPEN_VMS)
5713 flag_no_common = 1;
5714
5715 ia64_override_options_after_change();
5716 }
5717
5718 /* Implement targetm.override_options_after_change. */
5719
5720 static void
5721 ia64_override_options_after_change (void)
5722 {
5723 if (optimize >= 3
5724 && !global_options_set.x_flag_selective_scheduling
5725 && !global_options_set.x_flag_selective_scheduling2)
5726 {
5727 flag_selective_scheduling2 = 1;
5728 flag_sel_sched_pipelining = 1;
5729 }
5730 if (mflag_sched_control_spec == 2)
5731 {
5732 /* Control speculation is on by default for the selective scheduler,
5733 but not for the Haifa scheduler. */
5734 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5735 }
5736 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5737 {
5738 /* FIXME: remove this when we'd implement breaking autoinsns as
5739 a transformation. */
5740 flag_auto_inc_dec = 0;
5741 }
5742 }
5743
5744 /* Initialize the record of emitted frame related registers. */
5745
5746 void ia64_init_expanders (void)
5747 {
5748 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5749 }
5750
5751 static struct machine_function *
5752 ia64_init_machine_status (void)
5753 {
5754 return ggc_alloc_cleared_machine_function ();
5755 }
5756 \f
5757 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5758 static enum attr_type ia64_safe_type (rtx);
5759
5760 static enum attr_itanium_class
5761 ia64_safe_itanium_class (rtx insn)
5762 {
5763 if (recog_memoized (insn) >= 0)
5764 return get_attr_itanium_class (insn);
5765 else if (DEBUG_INSN_P (insn))
5766 return ITANIUM_CLASS_IGNORE;
5767 else
5768 return ITANIUM_CLASS_UNKNOWN;
5769 }
5770
5771 static enum attr_type
5772 ia64_safe_type (rtx insn)
5773 {
5774 if (recog_memoized (insn) >= 0)
5775 return get_attr_type (insn);
5776 else
5777 return TYPE_UNKNOWN;
5778 }
5779 \f
5780 /* The following collection of routines emit instruction group stop bits as
5781 necessary to avoid dependencies. */
5782
5783 /* Need to track some additional registers as far as serialization is
5784 concerned so we can properly handle br.call and br.ret. We could
5785 make these registers visible to gcc, but since these registers are
5786 never explicitly used in gcc generated code, it seems wasteful to
5787 do so (plus it would make the call and return patterns needlessly
5788 complex). */
5789 #define REG_RP (BR_REG (0))
5790 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5791 /* This is used for volatile asms which may require a stop bit immediately
5792 before and after them. */
5793 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5794 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5795 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5796
5797 /* For each register, we keep track of how it has been written in the
5798 current instruction group.
5799
5800 If a register is written unconditionally (no qualifying predicate),
5801 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5802
5803 If a register is written if its qualifying predicate P is true, we
5804 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5805 may be written again by the complement of P (P^1) and when this happens,
5806 WRITE_COUNT gets set to 2.
5807
5808 The result of this is that whenever an insn attempts to write a register
5809 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5810
5811 If a predicate register is written by a floating-point insn, we set
5812 WRITTEN_BY_FP to true.
5813
5814 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5815 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5816
5817 #if GCC_VERSION >= 4000
5818 #define RWS_FIELD_TYPE __extension__ unsigned short
5819 #else
5820 #define RWS_FIELD_TYPE unsigned int
5821 #endif
5822 struct reg_write_state
5823 {
5824 RWS_FIELD_TYPE write_count : 2;
5825 RWS_FIELD_TYPE first_pred : 10;
5826 RWS_FIELD_TYPE written_by_fp : 1;
5827 RWS_FIELD_TYPE written_by_and : 1;
5828 RWS_FIELD_TYPE written_by_or : 1;
5829 };
5830
5831 /* Cumulative info for the current instruction group. */
5832 struct reg_write_state rws_sum[NUM_REGS];
5833 #ifdef ENABLE_CHECKING
5834 /* Bitmap whether a register has been written in the current insn. */
5835 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5836 / HOST_BITS_PER_WIDEST_FAST_INT];
5837
5838 static inline void
5839 rws_insn_set (int regno)
5840 {
5841 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5842 SET_HARD_REG_BIT (rws_insn, regno);
5843 }
5844
5845 static inline int
5846 rws_insn_test (int regno)
5847 {
5848 return TEST_HARD_REG_BIT (rws_insn, regno);
5849 }
5850 #else
5851 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5852 unsigned char rws_insn[2];
5853
5854 static inline void
5855 rws_insn_set (int regno)
5856 {
5857 if (regno == REG_AR_CFM)
5858 rws_insn[0] = 1;
5859 else if (regno == REG_VOLATILE)
5860 rws_insn[1] = 1;
5861 }
5862
5863 static inline int
5864 rws_insn_test (int regno)
5865 {
5866 if (regno == REG_AR_CFM)
5867 return rws_insn[0];
5868 if (regno == REG_VOLATILE)
5869 return rws_insn[1];
5870 return 0;
5871 }
5872 #endif
5873
5874 /* Indicates whether this is the first instruction after a stop bit,
5875 in which case we don't need another stop bit. Without this,
5876 ia64_variable_issue will die when scheduling an alloc. */
5877 static int first_instruction;
5878
5879 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5880 RTL for one instruction. */
5881 struct reg_flags
5882 {
5883 unsigned int is_write : 1; /* Is register being written? */
5884 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5885 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5886 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5887 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5888 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5889 };
5890
5891 static void rws_update (int, struct reg_flags, int);
5892 static int rws_access_regno (int, struct reg_flags, int);
5893 static int rws_access_reg (rtx, struct reg_flags, int);
5894 static void update_set_flags (rtx, struct reg_flags *);
5895 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5896 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5897 static void init_insn_group_barriers (void);
5898 static int group_barrier_needed (rtx);
5899 static int safe_group_barrier_needed (rtx);
5900 static int in_safe_group_barrier;
5901
5902 /* Update *RWS for REGNO, which is being written by the current instruction,
5903 with predicate PRED, and associated register flags in FLAGS. */
5904
5905 static void
5906 rws_update (int regno, struct reg_flags flags, int pred)
5907 {
5908 if (pred)
5909 rws_sum[regno].write_count++;
5910 else
5911 rws_sum[regno].write_count = 2;
5912 rws_sum[regno].written_by_fp |= flags.is_fp;
5913 /* ??? Not tracking and/or across differing predicates. */
5914 rws_sum[regno].written_by_and = flags.is_and;
5915 rws_sum[regno].written_by_or = flags.is_or;
5916 rws_sum[regno].first_pred = pred;
5917 }
5918
5919 /* Handle an access to register REGNO of type FLAGS using predicate register
5920 PRED. Update rws_sum array. Return 1 if this access creates
5921 a dependency with an earlier instruction in the same group. */
5922
5923 static int
5924 rws_access_regno (int regno, struct reg_flags flags, int pred)
5925 {
5926 int need_barrier = 0;
5927
5928 gcc_assert (regno < NUM_REGS);
5929
5930 if (! PR_REGNO_P (regno))
5931 flags.is_and = flags.is_or = 0;
5932
5933 if (flags.is_write)
5934 {
5935 int write_count;
5936
5937 rws_insn_set (regno);
5938 write_count = rws_sum[regno].write_count;
5939
5940 switch (write_count)
5941 {
5942 case 0:
5943 /* The register has not been written yet. */
5944 if (!in_safe_group_barrier)
5945 rws_update (regno, flags, pred);
5946 break;
5947
5948 case 1:
5949 /* The register has been written via a predicate. Treat
5950 it like a unconditional write and do not try to check
5951 for complementary pred reg in earlier write. */
5952 if (flags.is_and && rws_sum[regno].written_by_and)
5953 ;
5954 else if (flags.is_or && rws_sum[regno].written_by_or)
5955 ;
5956 else
5957 need_barrier = 1;
5958 if (!in_safe_group_barrier)
5959 rws_update (regno, flags, pred);
5960 break;
5961
5962 case 2:
5963 /* The register has been unconditionally written already. We
5964 need a barrier. */
5965 if (flags.is_and && rws_sum[regno].written_by_and)
5966 ;
5967 else if (flags.is_or && rws_sum[regno].written_by_or)
5968 ;
5969 else
5970 need_barrier = 1;
5971 if (!in_safe_group_barrier)
5972 {
5973 rws_sum[regno].written_by_and = flags.is_and;
5974 rws_sum[regno].written_by_or = flags.is_or;
5975 }
5976 break;
5977
5978 default:
5979 gcc_unreachable ();
5980 }
5981 }
5982 else
5983 {
5984 if (flags.is_branch)
5985 {
5986 /* Branches have several RAW exceptions that allow to avoid
5987 barriers. */
5988
5989 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5990 /* RAW dependencies on branch regs are permissible as long
5991 as the writer is a non-branch instruction. Since we
5992 never generate code that uses a branch register written
5993 by a branch instruction, handling this case is
5994 easy. */
5995 return 0;
5996
5997 if (REGNO_REG_CLASS (regno) == PR_REGS
5998 && ! rws_sum[regno].written_by_fp)
5999 /* The predicates of a branch are available within the
6000 same insn group as long as the predicate was written by
6001 something other than a floating-point instruction. */
6002 return 0;
6003 }
6004
6005 if (flags.is_and && rws_sum[regno].written_by_and)
6006 return 0;
6007 if (flags.is_or && rws_sum[regno].written_by_or)
6008 return 0;
6009
6010 switch (rws_sum[regno].write_count)
6011 {
6012 case 0:
6013 /* The register has not been written yet. */
6014 break;
6015
6016 case 1:
6017 /* The register has been written via a predicate, assume we
6018 need a barrier (don't check for complementary regs). */
6019 need_barrier = 1;
6020 break;
6021
6022 case 2:
6023 /* The register has been unconditionally written already. We
6024 need a barrier. */
6025 need_barrier = 1;
6026 break;
6027
6028 default:
6029 gcc_unreachable ();
6030 }
6031 }
6032
6033 return need_barrier;
6034 }
6035
6036 static int
6037 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6038 {
6039 int regno = REGNO (reg);
6040 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6041
6042 if (n == 1)
6043 return rws_access_regno (regno, flags, pred);
6044 else
6045 {
6046 int need_barrier = 0;
6047 while (--n >= 0)
6048 need_barrier |= rws_access_regno (regno + n, flags, pred);
6049 return need_barrier;
6050 }
6051 }
6052
6053 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6054 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6055
6056 static void
6057 update_set_flags (rtx x, struct reg_flags *pflags)
6058 {
6059 rtx src = SET_SRC (x);
6060
6061 switch (GET_CODE (src))
6062 {
6063 case CALL:
6064 return;
6065
6066 case IF_THEN_ELSE:
6067 /* There are four cases here:
6068 (1) The destination is (pc), in which case this is a branch,
6069 nothing here applies.
6070 (2) The destination is ar.lc, in which case this is a
6071 doloop_end_internal,
6072 (3) The destination is an fp register, in which case this is
6073 an fselect instruction.
6074 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6075 this is a check load.
6076 In all cases, nothing we do in this function applies. */
6077 return;
6078
6079 default:
6080 if (COMPARISON_P (src)
6081 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6082 /* Set pflags->is_fp to 1 so that we know we're dealing
6083 with a floating point comparison when processing the
6084 destination of the SET. */
6085 pflags->is_fp = 1;
6086
6087 /* Discover if this is a parallel comparison. We only handle
6088 and.orcm and or.andcm at present, since we must retain a
6089 strict inverse on the predicate pair. */
6090 else if (GET_CODE (src) == AND)
6091 pflags->is_and = 1;
6092 else if (GET_CODE (src) == IOR)
6093 pflags->is_or = 1;
6094
6095 break;
6096 }
6097 }
6098
6099 /* Subroutine of rtx_needs_barrier; this function determines whether the
6100 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6101 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6102 for this insn. */
6103
6104 static int
6105 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6106 {
6107 int need_barrier = 0;
6108 rtx dst;
6109 rtx src = SET_SRC (x);
6110
6111 if (GET_CODE (src) == CALL)
6112 /* We don't need to worry about the result registers that
6113 get written by subroutine call. */
6114 return rtx_needs_barrier (src, flags, pred);
6115 else if (SET_DEST (x) == pc_rtx)
6116 {
6117 /* X is a conditional branch. */
6118 /* ??? This seems redundant, as the caller sets this bit for
6119 all JUMP_INSNs. */
6120 if (!ia64_spec_check_src_p (src))
6121 flags.is_branch = 1;
6122 return rtx_needs_barrier (src, flags, pred);
6123 }
6124
6125 if (ia64_spec_check_src_p (src))
6126 /* Avoid checking one register twice (in condition
6127 and in 'then' section) for ldc pattern. */
6128 {
6129 gcc_assert (REG_P (XEXP (src, 2)));
6130 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6131
6132 /* We process MEM below. */
6133 src = XEXP (src, 1);
6134 }
6135
6136 need_barrier |= rtx_needs_barrier (src, flags, pred);
6137
6138 dst = SET_DEST (x);
6139 if (GET_CODE (dst) == ZERO_EXTRACT)
6140 {
6141 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6142 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6143 }
6144 return need_barrier;
6145 }
6146
6147 /* Handle an access to rtx X of type FLAGS using predicate register
6148 PRED. Return 1 if this access creates a dependency with an earlier
6149 instruction in the same group. */
6150
6151 static int
6152 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6153 {
6154 int i, j;
6155 int is_complemented = 0;
6156 int need_barrier = 0;
6157 const char *format_ptr;
6158 struct reg_flags new_flags;
6159 rtx cond;
6160
6161 if (! x)
6162 return 0;
6163
6164 new_flags = flags;
6165
6166 switch (GET_CODE (x))
6167 {
6168 case SET:
6169 update_set_flags (x, &new_flags);
6170 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6171 if (GET_CODE (SET_SRC (x)) != CALL)
6172 {
6173 new_flags.is_write = 1;
6174 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6175 }
6176 break;
6177
6178 case CALL:
6179 new_flags.is_write = 0;
6180 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6181
6182 /* Avoid multiple register writes, in case this is a pattern with
6183 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6184 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6185 {
6186 new_flags.is_write = 1;
6187 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6188 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6189 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6190 }
6191 break;
6192
6193 case COND_EXEC:
6194 /* X is a predicated instruction. */
6195
6196 cond = COND_EXEC_TEST (x);
6197 gcc_assert (!pred);
6198 need_barrier = rtx_needs_barrier (cond, flags, 0);
6199
6200 if (GET_CODE (cond) == EQ)
6201 is_complemented = 1;
6202 cond = XEXP (cond, 0);
6203 gcc_assert (GET_CODE (cond) == REG
6204 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6205 pred = REGNO (cond);
6206 if (is_complemented)
6207 ++pred;
6208
6209 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6210 return need_barrier;
6211
6212 case CLOBBER:
6213 case USE:
6214 /* Clobber & use are for earlier compiler-phases only. */
6215 break;
6216
6217 case ASM_OPERANDS:
6218 case ASM_INPUT:
6219 /* We always emit stop bits for traditional asms. We emit stop bits
6220 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6221 if (GET_CODE (x) != ASM_OPERANDS
6222 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6223 {
6224 /* Avoid writing the register multiple times if we have multiple
6225 asm outputs. This avoids a failure in rws_access_reg. */
6226 if (! rws_insn_test (REG_VOLATILE))
6227 {
6228 new_flags.is_write = 1;
6229 rws_access_regno (REG_VOLATILE, new_flags, pred);
6230 }
6231 return 1;
6232 }
6233
6234 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6235 We cannot just fall through here since then we would be confused
6236 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6237 traditional asms unlike their normal usage. */
6238
6239 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6240 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6241 need_barrier = 1;
6242 break;
6243
6244 case PARALLEL:
6245 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6246 {
6247 rtx pat = XVECEXP (x, 0, i);
6248 switch (GET_CODE (pat))
6249 {
6250 case SET:
6251 update_set_flags (pat, &new_flags);
6252 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6253 break;
6254
6255 case USE:
6256 case CALL:
6257 case ASM_OPERANDS:
6258 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6259 break;
6260
6261 case CLOBBER:
6262 if (REG_P (XEXP (pat, 0))
6263 && extract_asm_operands (x) != NULL_RTX
6264 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6265 {
6266 new_flags.is_write = 1;
6267 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6268 new_flags, pred);
6269 new_flags = flags;
6270 }
6271 break;
6272
6273 case RETURN:
6274 break;
6275
6276 default:
6277 gcc_unreachable ();
6278 }
6279 }
6280 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6281 {
6282 rtx pat = XVECEXP (x, 0, i);
6283 if (GET_CODE (pat) == SET)
6284 {
6285 if (GET_CODE (SET_SRC (pat)) != CALL)
6286 {
6287 new_flags.is_write = 1;
6288 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6289 pred);
6290 }
6291 }
6292 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6293 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6294 }
6295 break;
6296
6297 case SUBREG:
6298 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6299 break;
6300 case REG:
6301 if (REGNO (x) == AR_UNAT_REGNUM)
6302 {
6303 for (i = 0; i < 64; ++i)
6304 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6305 }
6306 else
6307 need_barrier = rws_access_reg (x, flags, pred);
6308 break;
6309
6310 case MEM:
6311 /* Find the regs used in memory address computation. */
6312 new_flags.is_write = 0;
6313 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6314 break;
6315
6316 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6317 case SYMBOL_REF: case LABEL_REF: case CONST:
6318 break;
6319
6320 /* Operators with side-effects. */
6321 case POST_INC: case POST_DEC:
6322 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6323
6324 new_flags.is_write = 0;
6325 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6326 new_flags.is_write = 1;
6327 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6328 break;
6329
6330 case POST_MODIFY:
6331 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6332
6333 new_flags.is_write = 0;
6334 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6335 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6336 new_flags.is_write = 1;
6337 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6338 break;
6339
6340 /* Handle common unary and binary ops for efficiency. */
6341 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6342 case MOD: case UDIV: case UMOD: case AND: case IOR:
6343 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6344 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6345 case NE: case EQ: case GE: case GT: case LE:
6346 case LT: case GEU: case GTU: case LEU: case LTU:
6347 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6348 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6349 break;
6350
6351 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6352 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6353 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6354 case SQRT: case FFS: case POPCOUNT:
6355 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6356 break;
6357
6358 case VEC_SELECT:
6359 /* VEC_SELECT's second argument is a PARALLEL with integers that
6360 describe the elements selected. On ia64, those integers are
6361 always constants. Avoid walking the PARALLEL so that we don't
6362 get confused with "normal" parallels and then die. */
6363 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6364 break;
6365
6366 case UNSPEC:
6367 switch (XINT (x, 1))
6368 {
6369 case UNSPEC_LTOFF_DTPMOD:
6370 case UNSPEC_LTOFF_DTPREL:
6371 case UNSPEC_DTPREL:
6372 case UNSPEC_LTOFF_TPREL:
6373 case UNSPEC_TPREL:
6374 case UNSPEC_PRED_REL_MUTEX:
6375 case UNSPEC_PIC_CALL:
6376 case UNSPEC_MF:
6377 case UNSPEC_FETCHADD_ACQ:
6378 case UNSPEC_BSP_VALUE:
6379 case UNSPEC_FLUSHRS:
6380 case UNSPEC_BUNDLE_SELECTOR:
6381 break;
6382
6383 case UNSPEC_GR_SPILL:
6384 case UNSPEC_GR_RESTORE:
6385 {
6386 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6387 HOST_WIDE_INT bit = (offset >> 3) & 63;
6388
6389 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6390 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6391 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6392 new_flags, pred);
6393 break;
6394 }
6395
6396 case UNSPEC_FR_SPILL:
6397 case UNSPEC_FR_RESTORE:
6398 case UNSPEC_GETF_EXP:
6399 case UNSPEC_SETF_EXP:
6400 case UNSPEC_ADDP4:
6401 case UNSPEC_FR_SQRT_RECIP_APPROX:
6402 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6403 case UNSPEC_LDA:
6404 case UNSPEC_LDS:
6405 case UNSPEC_LDS_A:
6406 case UNSPEC_LDSA:
6407 case UNSPEC_CHKACLR:
6408 case UNSPEC_CHKS:
6409 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6410 break;
6411
6412 case UNSPEC_FR_RECIP_APPROX:
6413 case UNSPEC_SHRP:
6414 case UNSPEC_COPYSIGN:
6415 case UNSPEC_FR_RECIP_APPROX_RES:
6416 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6417 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6418 break;
6419
6420 case UNSPEC_CMPXCHG_ACQ:
6421 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6422 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6423 break;
6424
6425 default:
6426 gcc_unreachable ();
6427 }
6428 break;
6429
6430 case UNSPEC_VOLATILE:
6431 switch (XINT (x, 1))
6432 {
6433 case UNSPECV_ALLOC:
6434 /* Alloc must always be the first instruction of a group.
6435 We force this by always returning true. */
6436 /* ??? We might get better scheduling if we explicitly check for
6437 input/local/output register dependencies, and modify the
6438 scheduler so that alloc is always reordered to the start of
6439 the current group. We could then eliminate all of the
6440 first_instruction code. */
6441 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6442
6443 new_flags.is_write = 1;
6444 rws_access_regno (REG_AR_CFM, new_flags, pred);
6445 return 1;
6446
6447 case UNSPECV_SET_BSP:
6448 need_barrier = 1;
6449 break;
6450
6451 case UNSPECV_BLOCKAGE:
6452 case UNSPECV_INSN_GROUP_BARRIER:
6453 case UNSPECV_BREAK:
6454 case UNSPECV_PSAC_ALL:
6455 case UNSPECV_PSAC_NORMAL:
6456 return 0;
6457
6458 default:
6459 gcc_unreachable ();
6460 }
6461 break;
6462
6463 case RETURN:
6464 new_flags.is_write = 0;
6465 need_barrier = rws_access_regno (REG_RP, flags, pred);
6466 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6467
6468 new_flags.is_write = 1;
6469 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6470 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6471 break;
6472
6473 default:
6474 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6475 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6476 switch (format_ptr[i])
6477 {
6478 case '0': /* unused field */
6479 case 'i': /* integer */
6480 case 'n': /* note */
6481 case 'w': /* wide integer */
6482 case 's': /* pointer to string */
6483 case 'S': /* optional pointer to string */
6484 break;
6485
6486 case 'e':
6487 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6488 need_barrier = 1;
6489 break;
6490
6491 case 'E':
6492 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6493 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6494 need_barrier = 1;
6495 break;
6496
6497 default:
6498 gcc_unreachable ();
6499 }
6500 break;
6501 }
6502 return need_barrier;
6503 }
6504
6505 /* Clear out the state for group_barrier_needed at the start of a
6506 sequence of insns. */
6507
6508 static void
6509 init_insn_group_barriers (void)
6510 {
6511 memset (rws_sum, 0, sizeof (rws_sum));
6512 first_instruction = 1;
6513 }
6514
6515 /* Given the current state, determine whether a group barrier (a stop bit) is
6516 necessary before INSN. Return nonzero if so. This modifies the state to
6517 include the effects of INSN as a side-effect. */
6518
6519 static int
6520 group_barrier_needed (rtx insn)
6521 {
6522 rtx pat;
6523 int need_barrier = 0;
6524 struct reg_flags flags;
6525
6526 memset (&flags, 0, sizeof (flags));
6527 switch (GET_CODE (insn))
6528 {
6529 case NOTE:
6530 case DEBUG_INSN:
6531 break;
6532
6533 case BARRIER:
6534 /* A barrier doesn't imply an instruction group boundary. */
6535 break;
6536
6537 case CODE_LABEL:
6538 memset (rws_insn, 0, sizeof (rws_insn));
6539 return 1;
6540
6541 case CALL_INSN:
6542 flags.is_branch = 1;
6543 flags.is_sibcall = SIBLING_CALL_P (insn);
6544 memset (rws_insn, 0, sizeof (rws_insn));
6545
6546 /* Don't bundle a call following another call. */
6547 if ((pat = prev_active_insn (insn))
6548 && GET_CODE (pat) == CALL_INSN)
6549 {
6550 need_barrier = 1;
6551 break;
6552 }
6553
6554 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6555 break;
6556
6557 case JUMP_INSN:
6558 if (!ia64_spec_check_p (insn))
6559 flags.is_branch = 1;
6560
6561 /* Don't bundle a jump following a call. */
6562 if ((pat = prev_active_insn (insn))
6563 && GET_CODE (pat) == CALL_INSN)
6564 {
6565 need_barrier = 1;
6566 break;
6567 }
6568 /* FALLTHRU */
6569
6570 case INSN:
6571 if (GET_CODE (PATTERN (insn)) == USE
6572 || GET_CODE (PATTERN (insn)) == CLOBBER)
6573 /* Don't care about USE and CLOBBER "insns"---those are used to
6574 indicate to the optimizer that it shouldn't get rid of
6575 certain operations. */
6576 break;
6577
6578 pat = PATTERN (insn);
6579
6580 /* Ug. Hack hacks hacked elsewhere. */
6581 switch (recog_memoized (insn))
6582 {
6583 /* We play dependency tricks with the epilogue in order
6584 to get proper schedules. Undo this for dv analysis. */
6585 case CODE_FOR_epilogue_deallocate_stack:
6586 case CODE_FOR_prologue_allocate_stack:
6587 pat = XVECEXP (pat, 0, 0);
6588 break;
6589
6590 /* The pattern we use for br.cloop confuses the code above.
6591 The second element of the vector is representative. */
6592 case CODE_FOR_doloop_end_internal:
6593 pat = XVECEXP (pat, 0, 1);
6594 break;
6595
6596 /* Doesn't generate code. */
6597 case CODE_FOR_pred_rel_mutex:
6598 case CODE_FOR_prologue_use:
6599 return 0;
6600
6601 default:
6602 break;
6603 }
6604
6605 memset (rws_insn, 0, sizeof (rws_insn));
6606 need_barrier = rtx_needs_barrier (pat, flags, 0);
6607
6608 /* Check to see if the previous instruction was a volatile
6609 asm. */
6610 if (! need_barrier)
6611 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6612
6613 break;
6614
6615 default:
6616 gcc_unreachable ();
6617 }
6618
6619 if (first_instruction && INSN_P (insn)
6620 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6621 && GET_CODE (PATTERN (insn)) != USE
6622 && GET_CODE (PATTERN (insn)) != CLOBBER)
6623 {
6624 need_barrier = 0;
6625 first_instruction = 0;
6626 }
6627
6628 return need_barrier;
6629 }
6630
6631 /* Like group_barrier_needed, but do not clobber the current state. */
6632
6633 static int
6634 safe_group_barrier_needed (rtx insn)
6635 {
6636 int saved_first_instruction;
6637 int t;
6638
6639 saved_first_instruction = first_instruction;
6640 in_safe_group_barrier = 1;
6641
6642 t = group_barrier_needed (insn);
6643
6644 first_instruction = saved_first_instruction;
6645 in_safe_group_barrier = 0;
6646
6647 return t;
6648 }
6649
6650 /* Scan the current function and insert stop bits as necessary to
6651 eliminate dependencies. This function assumes that a final
6652 instruction scheduling pass has been run which has already
6653 inserted most of the necessary stop bits. This function only
6654 inserts new ones at basic block boundaries, since these are
6655 invisible to the scheduler. */
6656
6657 static void
6658 emit_insn_group_barriers (FILE *dump)
6659 {
6660 rtx insn;
6661 rtx last_label = 0;
6662 int insns_since_last_label = 0;
6663
6664 init_insn_group_barriers ();
6665
6666 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6667 {
6668 if (GET_CODE (insn) == CODE_LABEL)
6669 {
6670 if (insns_since_last_label)
6671 last_label = insn;
6672 insns_since_last_label = 0;
6673 }
6674 else if (GET_CODE (insn) == NOTE
6675 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6676 {
6677 if (insns_since_last_label)
6678 last_label = insn;
6679 insns_since_last_label = 0;
6680 }
6681 else if (GET_CODE (insn) == INSN
6682 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6683 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6684 {
6685 init_insn_group_barriers ();
6686 last_label = 0;
6687 }
6688 else if (NONDEBUG_INSN_P (insn))
6689 {
6690 insns_since_last_label = 1;
6691
6692 if (group_barrier_needed (insn))
6693 {
6694 if (last_label)
6695 {
6696 if (dump)
6697 fprintf (dump, "Emitting stop before label %d\n",
6698 INSN_UID (last_label));
6699 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6700 insn = last_label;
6701
6702 init_insn_group_barriers ();
6703 last_label = 0;
6704 }
6705 }
6706 }
6707 }
6708 }
6709
6710 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6711 This function has to emit all necessary group barriers. */
6712
6713 static void
6714 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6715 {
6716 rtx insn;
6717
6718 init_insn_group_barriers ();
6719
6720 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6721 {
6722 if (GET_CODE (insn) == BARRIER)
6723 {
6724 rtx last = prev_active_insn (insn);
6725
6726 if (! last)
6727 continue;
6728 if (GET_CODE (last) == JUMP_INSN
6729 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6730 last = prev_active_insn (last);
6731 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6732 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6733
6734 init_insn_group_barriers ();
6735 }
6736 else if (NONDEBUG_INSN_P (insn))
6737 {
6738 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6739 init_insn_group_barriers ();
6740 else if (group_barrier_needed (insn))
6741 {
6742 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6743 init_insn_group_barriers ();
6744 group_barrier_needed (insn);
6745 }
6746 }
6747 }
6748 }
6749
6750 \f
6751
6752 /* Instruction scheduling support. */
6753
6754 #define NR_BUNDLES 10
6755
6756 /* A list of names of all available bundles. */
6757
6758 static const char *bundle_name [NR_BUNDLES] =
6759 {
6760 ".mii",
6761 ".mmi",
6762 ".mfi",
6763 ".mmf",
6764 #if NR_BUNDLES == 10
6765 ".bbb",
6766 ".mbb",
6767 #endif
6768 ".mib",
6769 ".mmb",
6770 ".mfb",
6771 ".mlx"
6772 };
6773
6774 /* Nonzero if we should insert stop bits into the schedule. */
6775
6776 int ia64_final_schedule = 0;
6777
6778 /* Codes of the corresponding queried units: */
6779
6780 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6781 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6782
6783 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6784 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6785
6786 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6787
6788 /* The following variable value is an insn group barrier. */
6789
6790 static rtx dfa_stop_insn;
6791
6792 /* The following variable value is the last issued insn. */
6793
6794 static rtx last_scheduled_insn;
6795
6796 /* The following variable value is pointer to a DFA state used as
6797 temporary variable. */
6798
6799 static state_t temp_dfa_state = NULL;
6800
6801 /* The following variable value is DFA state after issuing the last
6802 insn. */
6803
6804 static state_t prev_cycle_state = NULL;
6805
6806 /* The following array element values are TRUE if the corresponding
6807 insn requires to add stop bits before it. */
6808
6809 static char *stops_p = NULL;
6810
6811 /* The following variable is used to set up the mentioned above array. */
6812
6813 static int stop_before_p = 0;
6814
6815 /* The following variable value is length of the arrays `clocks' and
6816 `add_cycles'. */
6817
6818 static int clocks_length;
6819
6820 /* The following variable value is number of data speculations in progress. */
6821 static int pending_data_specs = 0;
6822
6823 /* Number of memory references on current and three future processor cycles. */
6824 static char mem_ops_in_group[4];
6825
6826 /* Number of current processor cycle (from scheduler's point of view). */
6827 static int current_cycle;
6828
6829 static rtx ia64_single_set (rtx);
6830 static void ia64_emit_insn_before (rtx, rtx);
6831
6832 /* Map a bundle number to its pseudo-op. */
6833
6834 const char *
6835 get_bundle_name (int b)
6836 {
6837 return bundle_name[b];
6838 }
6839
6840
6841 /* Return the maximum number of instructions a cpu can issue. */
6842
6843 static int
6844 ia64_issue_rate (void)
6845 {
6846 return 6;
6847 }
6848
6849 /* Helper function - like single_set, but look inside COND_EXEC. */
6850
6851 static rtx
6852 ia64_single_set (rtx insn)
6853 {
6854 rtx x = PATTERN (insn), ret;
6855 if (GET_CODE (x) == COND_EXEC)
6856 x = COND_EXEC_CODE (x);
6857 if (GET_CODE (x) == SET)
6858 return x;
6859
6860 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6861 Although they are not classical single set, the second set is there just
6862 to protect it from moving past FP-relative stack accesses. */
6863 switch (recog_memoized (insn))
6864 {
6865 case CODE_FOR_prologue_allocate_stack:
6866 case CODE_FOR_epilogue_deallocate_stack:
6867 ret = XVECEXP (x, 0, 0);
6868 break;
6869
6870 default:
6871 ret = single_set_2 (insn, x);
6872 break;
6873 }
6874
6875 return ret;
6876 }
6877
6878 /* Adjust the cost of a scheduling dependency.
6879 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6880 COST is the current cost, DW is dependency weakness. */
6881 static int
6882 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6883 {
6884 enum reg_note dep_type = (enum reg_note) dep_type1;
6885 enum attr_itanium_class dep_class;
6886 enum attr_itanium_class insn_class;
6887
6888 insn_class = ia64_safe_itanium_class (insn);
6889 dep_class = ia64_safe_itanium_class (dep_insn);
6890
6891 /* Treat true memory dependencies separately. Ignore apparent true
6892 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6893 if (dep_type == REG_DEP_TRUE
6894 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6895 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6896 return 0;
6897
6898 if (dw == MIN_DEP_WEAK)
6899 /* Store and load are likely to alias, use higher cost to avoid stall. */
6900 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6901 else if (dw > MIN_DEP_WEAK)
6902 {
6903 /* Store and load are less likely to alias. */
6904 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6905 /* Assume there will be no cache conflict for floating-point data.
6906 For integer data, L1 conflict penalty is huge (17 cycles), so we
6907 never assume it will not cause a conflict. */
6908 return 0;
6909 else
6910 return cost;
6911 }
6912
6913 if (dep_type != REG_DEP_OUTPUT)
6914 return cost;
6915
6916 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6917 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6918 return 0;
6919
6920 return cost;
6921 }
6922
6923 /* Like emit_insn_before, but skip cycle_display notes.
6924 ??? When cycle display notes are implemented, update this. */
6925
6926 static void
6927 ia64_emit_insn_before (rtx insn, rtx before)
6928 {
6929 emit_insn_before (insn, before);
6930 }
6931
6932 /* The following function marks insns who produce addresses for load
6933 and store insns. Such insns will be placed into M slots because it
6934 decrease latency time for Itanium1 (see function
6935 `ia64_produce_address_p' and the DFA descriptions). */
6936
6937 static void
6938 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6939 {
6940 rtx insn, next, next_tail;
6941
6942 /* Before reload, which_alternative is not set, which means that
6943 ia64_safe_itanium_class will produce wrong results for (at least)
6944 move instructions. */
6945 if (!reload_completed)
6946 return;
6947
6948 next_tail = NEXT_INSN (tail);
6949 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6950 if (INSN_P (insn))
6951 insn->call = 0;
6952 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6953 if (INSN_P (insn)
6954 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6955 {
6956 sd_iterator_def sd_it;
6957 dep_t dep;
6958 bool has_mem_op_consumer_p = false;
6959
6960 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6961 {
6962 enum attr_itanium_class c;
6963
6964 if (DEP_TYPE (dep) != REG_DEP_TRUE)
6965 continue;
6966
6967 next = DEP_CON (dep);
6968 c = ia64_safe_itanium_class (next);
6969 if ((c == ITANIUM_CLASS_ST
6970 || c == ITANIUM_CLASS_STF)
6971 && ia64_st_address_bypass_p (insn, next))
6972 {
6973 has_mem_op_consumer_p = true;
6974 break;
6975 }
6976 else if ((c == ITANIUM_CLASS_LD
6977 || c == ITANIUM_CLASS_FLD
6978 || c == ITANIUM_CLASS_FLDP)
6979 && ia64_ld_address_bypass_p (insn, next))
6980 {
6981 has_mem_op_consumer_p = true;
6982 break;
6983 }
6984 }
6985
6986 insn->call = has_mem_op_consumer_p;
6987 }
6988 }
6989
6990 /* We're beginning a new block. Initialize data structures as necessary. */
6991
6992 static void
6993 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6994 int sched_verbose ATTRIBUTE_UNUSED,
6995 int max_ready ATTRIBUTE_UNUSED)
6996 {
6997 #ifdef ENABLE_CHECKING
6998 rtx insn;
6999
7000 if (!sel_sched_p () && reload_completed)
7001 for (insn = NEXT_INSN (current_sched_info->prev_head);
7002 insn != current_sched_info->next_tail;
7003 insn = NEXT_INSN (insn))
7004 gcc_assert (!SCHED_GROUP_P (insn));
7005 #endif
7006 last_scheduled_insn = NULL_RTX;
7007 init_insn_group_barriers ();
7008
7009 current_cycle = 0;
7010 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7011 }
7012
7013 /* We're beginning a scheduling pass. Check assertion. */
7014
7015 static void
7016 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7017 int sched_verbose ATTRIBUTE_UNUSED,
7018 int max_ready ATTRIBUTE_UNUSED)
7019 {
7020 gcc_assert (pending_data_specs == 0);
7021 }
7022
7023 /* Scheduling pass is now finished. Free/reset static variable. */
7024 static void
7025 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7026 int sched_verbose ATTRIBUTE_UNUSED)
7027 {
7028 gcc_assert (pending_data_specs == 0);
7029 }
7030
7031 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7032 speculation check), FALSE otherwise. */
7033 static bool
7034 is_load_p (rtx insn)
7035 {
7036 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7037
7038 return
7039 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7040 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7041 }
7042
7043 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7044 (taking account for 3-cycle cache reference postponing for stores: Intel
7045 Itanium 2 Reference Manual for Software Development and Optimization,
7046 6.7.3.1). */
7047 static void
7048 record_memory_reference (rtx insn)
7049 {
7050 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7051
7052 switch (insn_class) {
7053 case ITANIUM_CLASS_FLD:
7054 case ITANIUM_CLASS_LD:
7055 mem_ops_in_group[current_cycle % 4]++;
7056 break;
7057 case ITANIUM_CLASS_STF:
7058 case ITANIUM_CLASS_ST:
7059 mem_ops_in_group[(current_cycle + 3) % 4]++;
7060 break;
7061 default:;
7062 }
7063 }
7064
7065 /* We are about to being issuing insns for this clock cycle.
7066 Override the default sort algorithm to better slot instructions. */
7067
7068 static int
7069 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
7070 int *pn_ready, int clock_var,
7071 int reorder_type)
7072 {
7073 int n_asms;
7074 int n_ready = *pn_ready;
7075 rtx *e_ready = ready + n_ready;
7076 rtx *insnp;
7077
7078 if (sched_verbose)
7079 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7080
7081 if (reorder_type == 0)
7082 {
7083 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7084 n_asms = 0;
7085 for (insnp = ready; insnp < e_ready; insnp++)
7086 if (insnp < e_ready)
7087 {
7088 rtx insn = *insnp;
7089 enum attr_type t = ia64_safe_type (insn);
7090 if (t == TYPE_UNKNOWN)
7091 {
7092 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7093 || asm_noperands (PATTERN (insn)) >= 0)
7094 {
7095 rtx lowest = ready[n_asms];
7096 ready[n_asms] = insn;
7097 *insnp = lowest;
7098 n_asms++;
7099 }
7100 else
7101 {
7102 rtx highest = ready[n_ready - 1];
7103 ready[n_ready - 1] = insn;
7104 *insnp = highest;
7105 return 1;
7106 }
7107 }
7108 }
7109
7110 if (n_asms < n_ready)
7111 {
7112 /* Some normal insns to process. Skip the asms. */
7113 ready += n_asms;
7114 n_ready -= n_asms;
7115 }
7116 else if (n_ready > 0)
7117 return 1;
7118 }
7119
7120 if (ia64_final_schedule)
7121 {
7122 int deleted = 0;
7123 int nr_need_stop = 0;
7124
7125 for (insnp = ready; insnp < e_ready; insnp++)
7126 if (safe_group_barrier_needed (*insnp))
7127 nr_need_stop++;
7128
7129 if (reorder_type == 1 && n_ready == nr_need_stop)
7130 return 0;
7131 if (reorder_type == 0)
7132 return 1;
7133 insnp = e_ready;
7134 /* Move down everything that needs a stop bit, preserving
7135 relative order. */
7136 while (insnp-- > ready + deleted)
7137 while (insnp >= ready + deleted)
7138 {
7139 rtx insn = *insnp;
7140 if (! safe_group_barrier_needed (insn))
7141 break;
7142 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7143 *ready = insn;
7144 deleted++;
7145 }
7146 n_ready -= deleted;
7147 ready += deleted;
7148 }
7149
7150 current_cycle = clock_var;
7151 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7152 {
7153 int moved = 0;
7154
7155 insnp = e_ready;
7156 /* Move down loads/stores, preserving relative order. */
7157 while (insnp-- > ready + moved)
7158 while (insnp >= ready + moved)
7159 {
7160 rtx insn = *insnp;
7161 if (! is_load_p (insn))
7162 break;
7163 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7164 *ready = insn;
7165 moved++;
7166 }
7167 n_ready -= moved;
7168 ready += moved;
7169 }
7170
7171 return 1;
7172 }
7173
7174 /* We are about to being issuing insns for this clock cycle. Override
7175 the default sort algorithm to better slot instructions. */
7176
7177 static int
7178 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7179 int clock_var)
7180 {
7181 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7182 pn_ready, clock_var, 0);
7183 }
7184
7185 /* Like ia64_sched_reorder, but called after issuing each insn.
7186 Override the default sort algorithm to better slot instructions. */
7187
7188 static int
7189 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7190 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7191 int *pn_ready, int clock_var)
7192 {
7193 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7194 clock_var, 1);
7195 }
7196
7197 /* We are about to issue INSN. Return the number of insns left on the
7198 ready queue that can be issued this cycle. */
7199
7200 static int
7201 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7202 int sched_verbose ATTRIBUTE_UNUSED,
7203 rtx insn ATTRIBUTE_UNUSED,
7204 int can_issue_more ATTRIBUTE_UNUSED)
7205 {
7206 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7207 /* Modulo scheduling does not extend h_i_d when emitting
7208 new instructions. Don't use h_i_d, if we don't have to. */
7209 {
7210 if (DONE_SPEC (insn) & BEGIN_DATA)
7211 pending_data_specs++;
7212 if (CHECK_SPEC (insn) & BEGIN_DATA)
7213 pending_data_specs--;
7214 }
7215
7216 if (DEBUG_INSN_P (insn))
7217 return 1;
7218
7219 last_scheduled_insn = insn;
7220 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7221 if (reload_completed)
7222 {
7223 int needed = group_barrier_needed (insn);
7224
7225 gcc_assert (!needed);
7226 if (GET_CODE (insn) == CALL_INSN)
7227 init_insn_group_barriers ();
7228 stops_p [INSN_UID (insn)] = stop_before_p;
7229 stop_before_p = 0;
7230
7231 record_memory_reference (insn);
7232 }
7233 return 1;
7234 }
7235
7236 /* We are choosing insn from the ready queue. Return nonzero if INSN
7237 can be chosen. */
7238
7239 static int
7240 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7241 {
7242 gcc_assert (insn && INSN_P (insn));
7243 return ((!reload_completed
7244 || !safe_group_barrier_needed (insn))
7245 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7246 && (!mflag_sched_mem_insns_hard_limit
7247 || !is_load_p (insn)
7248 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7249 }
7250
7251 /* We are choosing insn from the ready queue. Return nonzero if INSN
7252 can be chosen. */
7253
7254 static bool
7255 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7256 {
7257 gcc_assert (insn && INSN_P (insn));
7258 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7259 we keep ALAT half-empty. */
7260 return (pending_data_specs < 16
7261 || !(TODO_SPEC (insn) & BEGIN_DATA));
7262 }
7263
7264 /* The following variable value is pseudo-insn used by the DFA insn
7265 scheduler to change the DFA state when the simulated clock is
7266 increased. */
7267
7268 static rtx dfa_pre_cycle_insn;
7269
7270 /* Returns 1 when a meaningful insn was scheduled between the last group
7271 barrier and LAST. */
7272 static int
7273 scheduled_good_insn (rtx last)
7274 {
7275 if (last && recog_memoized (last) >= 0)
7276 return 1;
7277
7278 for ( ;
7279 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7280 && !stops_p[INSN_UID (last)];
7281 last = PREV_INSN (last))
7282 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7283 the ebb we're scheduling. */
7284 if (INSN_P (last) && recog_memoized (last) >= 0)
7285 return 1;
7286
7287 return 0;
7288 }
7289
7290 /* We are about to being issuing INSN. Return nonzero if we cannot
7291 issue it on given cycle CLOCK and return zero if we should not sort
7292 the ready queue on the next clock start. */
7293
7294 static int
7295 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7296 int clock, int *sort_p)
7297 {
7298 gcc_assert (insn && INSN_P (insn));
7299
7300 if (DEBUG_INSN_P (insn))
7301 return 0;
7302
7303 /* When a group barrier is needed for insn, last_scheduled_insn
7304 should be set. */
7305 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7306 || last_scheduled_insn);
7307
7308 if ((reload_completed
7309 && (safe_group_barrier_needed (insn)
7310 || (mflag_sched_stop_bits_after_every_cycle
7311 && last_clock != clock
7312 && last_scheduled_insn
7313 && scheduled_good_insn (last_scheduled_insn))))
7314 || (last_scheduled_insn
7315 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7316 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7317 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7318 {
7319 init_insn_group_barriers ();
7320
7321 if (verbose && dump)
7322 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7323 last_clock == clock ? " + cycle advance" : "");
7324
7325 stop_before_p = 1;
7326 current_cycle = clock;
7327 mem_ops_in_group[current_cycle % 4] = 0;
7328
7329 if (last_clock == clock)
7330 {
7331 state_transition (curr_state, dfa_stop_insn);
7332 if (TARGET_EARLY_STOP_BITS)
7333 *sort_p = (last_scheduled_insn == NULL_RTX
7334 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7335 else
7336 *sort_p = 0;
7337 return 1;
7338 }
7339
7340 if (last_scheduled_insn)
7341 {
7342 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7343 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7344 state_reset (curr_state);
7345 else
7346 {
7347 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7348 state_transition (curr_state, dfa_stop_insn);
7349 state_transition (curr_state, dfa_pre_cycle_insn);
7350 state_transition (curr_state, NULL);
7351 }
7352 }
7353 }
7354 return 0;
7355 }
7356
7357 /* Implement targetm.sched.h_i_d_extended hook.
7358 Extend internal data structures. */
7359 static void
7360 ia64_h_i_d_extended (void)
7361 {
7362 if (stops_p != NULL)
7363 {
7364 int new_clocks_length = get_max_uid () * 3 / 2;
7365 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7366 clocks_length = new_clocks_length;
7367 }
7368 }
7369 \f
7370
7371 /* This structure describes the data used by the backend to guide scheduling.
7372 When the current scheduling point is switched, this data should be saved
7373 and restored later, if the scheduler returns to this point. */
7374 struct _ia64_sched_context
7375 {
7376 state_t prev_cycle_state;
7377 rtx last_scheduled_insn;
7378 struct reg_write_state rws_sum[NUM_REGS];
7379 struct reg_write_state rws_insn[NUM_REGS];
7380 int first_instruction;
7381 int pending_data_specs;
7382 int current_cycle;
7383 char mem_ops_in_group[4];
7384 };
7385 typedef struct _ia64_sched_context *ia64_sched_context_t;
7386
7387 /* Allocates a scheduling context. */
7388 static void *
7389 ia64_alloc_sched_context (void)
7390 {
7391 return xmalloc (sizeof (struct _ia64_sched_context));
7392 }
7393
7394 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7395 the global context otherwise. */
7396 static void
7397 ia64_init_sched_context (void *_sc, bool clean_p)
7398 {
7399 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7400
7401 sc->prev_cycle_state = xmalloc (dfa_state_size);
7402 if (clean_p)
7403 {
7404 state_reset (sc->prev_cycle_state);
7405 sc->last_scheduled_insn = NULL_RTX;
7406 memset (sc->rws_sum, 0, sizeof (rws_sum));
7407 memset (sc->rws_insn, 0, sizeof (rws_insn));
7408 sc->first_instruction = 1;
7409 sc->pending_data_specs = 0;
7410 sc->current_cycle = 0;
7411 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7412 }
7413 else
7414 {
7415 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7416 sc->last_scheduled_insn = last_scheduled_insn;
7417 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7418 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7419 sc->first_instruction = first_instruction;
7420 sc->pending_data_specs = pending_data_specs;
7421 sc->current_cycle = current_cycle;
7422 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7423 }
7424 }
7425
7426 /* Sets the global scheduling context to the one pointed to by _SC. */
7427 static void
7428 ia64_set_sched_context (void *_sc)
7429 {
7430 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7431
7432 gcc_assert (sc != NULL);
7433
7434 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7435 last_scheduled_insn = sc->last_scheduled_insn;
7436 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7437 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7438 first_instruction = sc->first_instruction;
7439 pending_data_specs = sc->pending_data_specs;
7440 current_cycle = sc->current_cycle;
7441 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7442 }
7443
7444 /* Clears the data in the _SC scheduling context. */
7445 static void
7446 ia64_clear_sched_context (void *_sc)
7447 {
7448 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7449
7450 free (sc->prev_cycle_state);
7451 sc->prev_cycle_state = NULL;
7452 }
7453
7454 /* Frees the _SC scheduling context. */
7455 static void
7456 ia64_free_sched_context (void *_sc)
7457 {
7458 gcc_assert (_sc != NULL);
7459
7460 free (_sc);
7461 }
7462
7463 typedef rtx (* gen_func_t) (rtx, rtx);
7464
7465 /* Return a function that will generate a load of mode MODE_NO
7466 with speculation types TS. */
7467 static gen_func_t
7468 get_spec_load_gen_function (ds_t ts, int mode_no)
7469 {
7470 static gen_func_t gen_ld_[] = {
7471 gen_movbi,
7472 gen_movqi_internal,
7473 gen_movhi_internal,
7474 gen_movsi_internal,
7475 gen_movdi_internal,
7476 gen_movsf_internal,
7477 gen_movdf_internal,
7478 gen_movxf_internal,
7479 gen_movti_internal,
7480 gen_zero_extendqidi2,
7481 gen_zero_extendhidi2,
7482 gen_zero_extendsidi2,
7483 };
7484
7485 static gen_func_t gen_ld_a[] = {
7486 gen_movbi_advanced,
7487 gen_movqi_advanced,
7488 gen_movhi_advanced,
7489 gen_movsi_advanced,
7490 gen_movdi_advanced,
7491 gen_movsf_advanced,
7492 gen_movdf_advanced,
7493 gen_movxf_advanced,
7494 gen_movti_advanced,
7495 gen_zero_extendqidi2_advanced,
7496 gen_zero_extendhidi2_advanced,
7497 gen_zero_extendsidi2_advanced,
7498 };
7499 static gen_func_t gen_ld_s[] = {
7500 gen_movbi_speculative,
7501 gen_movqi_speculative,
7502 gen_movhi_speculative,
7503 gen_movsi_speculative,
7504 gen_movdi_speculative,
7505 gen_movsf_speculative,
7506 gen_movdf_speculative,
7507 gen_movxf_speculative,
7508 gen_movti_speculative,
7509 gen_zero_extendqidi2_speculative,
7510 gen_zero_extendhidi2_speculative,
7511 gen_zero_extendsidi2_speculative,
7512 };
7513 static gen_func_t gen_ld_sa[] = {
7514 gen_movbi_speculative_advanced,
7515 gen_movqi_speculative_advanced,
7516 gen_movhi_speculative_advanced,
7517 gen_movsi_speculative_advanced,
7518 gen_movdi_speculative_advanced,
7519 gen_movsf_speculative_advanced,
7520 gen_movdf_speculative_advanced,
7521 gen_movxf_speculative_advanced,
7522 gen_movti_speculative_advanced,
7523 gen_zero_extendqidi2_speculative_advanced,
7524 gen_zero_extendhidi2_speculative_advanced,
7525 gen_zero_extendsidi2_speculative_advanced,
7526 };
7527 static gen_func_t gen_ld_s_a[] = {
7528 gen_movbi_speculative_a,
7529 gen_movqi_speculative_a,
7530 gen_movhi_speculative_a,
7531 gen_movsi_speculative_a,
7532 gen_movdi_speculative_a,
7533 gen_movsf_speculative_a,
7534 gen_movdf_speculative_a,
7535 gen_movxf_speculative_a,
7536 gen_movti_speculative_a,
7537 gen_zero_extendqidi2_speculative_a,
7538 gen_zero_extendhidi2_speculative_a,
7539 gen_zero_extendsidi2_speculative_a,
7540 };
7541
7542 gen_func_t *gen_ld;
7543
7544 if (ts & BEGIN_DATA)
7545 {
7546 if (ts & BEGIN_CONTROL)
7547 gen_ld = gen_ld_sa;
7548 else
7549 gen_ld = gen_ld_a;
7550 }
7551 else if (ts & BEGIN_CONTROL)
7552 {
7553 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7554 || ia64_needs_block_p (ts))
7555 gen_ld = gen_ld_s;
7556 else
7557 gen_ld = gen_ld_s_a;
7558 }
7559 else if (ts == 0)
7560 gen_ld = gen_ld_;
7561 else
7562 gcc_unreachable ();
7563
7564 return gen_ld[mode_no];
7565 }
7566
7567 /* Constants that help mapping 'enum machine_mode' to int. */
7568 enum SPEC_MODES
7569 {
7570 SPEC_MODE_INVALID = -1,
7571 SPEC_MODE_FIRST = 0,
7572 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7573 SPEC_MODE_FOR_EXTEND_LAST = 3,
7574 SPEC_MODE_LAST = 8
7575 };
7576
7577 enum
7578 {
7579 /* Offset to reach ZERO_EXTEND patterns. */
7580 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7581 };
7582
7583 /* Return index of the MODE. */
7584 static int
7585 ia64_mode_to_int (enum machine_mode mode)
7586 {
7587 switch (mode)
7588 {
7589 case BImode: return 0; /* SPEC_MODE_FIRST */
7590 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7591 case HImode: return 2;
7592 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7593 case DImode: return 4;
7594 case SFmode: return 5;
7595 case DFmode: return 6;
7596 case XFmode: return 7;
7597 case TImode:
7598 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7599 mentioned in itanium[12].md. Predicate fp_register_operand also
7600 needs to be defined. Bottom line: better disable for now. */
7601 return SPEC_MODE_INVALID;
7602 default: return SPEC_MODE_INVALID;
7603 }
7604 }
7605
7606 /* Provide information about speculation capabilities. */
7607 static void
7608 ia64_set_sched_flags (spec_info_t spec_info)
7609 {
7610 unsigned int *flags = &(current_sched_info->flags);
7611
7612 if (*flags & SCHED_RGN
7613 || *flags & SCHED_EBB
7614 || *flags & SEL_SCHED)
7615 {
7616 int mask = 0;
7617
7618 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7619 || (mflag_sched_ar_data_spec && reload_completed))
7620 {
7621 mask |= BEGIN_DATA;
7622
7623 if (!sel_sched_p ()
7624 && ((mflag_sched_br_in_data_spec && !reload_completed)
7625 || (mflag_sched_ar_in_data_spec && reload_completed)))
7626 mask |= BE_IN_DATA;
7627 }
7628
7629 if (mflag_sched_control_spec
7630 && (!sel_sched_p ()
7631 || reload_completed))
7632 {
7633 mask |= BEGIN_CONTROL;
7634
7635 if (!sel_sched_p () && mflag_sched_in_control_spec)
7636 mask |= BE_IN_CONTROL;
7637 }
7638
7639 spec_info->mask = mask;
7640
7641 if (mask)
7642 {
7643 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7644
7645 if (mask & BE_IN_SPEC)
7646 *flags |= NEW_BBS;
7647
7648 spec_info->flags = 0;
7649
7650 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7651 spec_info->flags |= PREFER_NON_DATA_SPEC;
7652
7653 if (mask & CONTROL_SPEC)
7654 {
7655 if (mflag_sched_prefer_non_control_spec_insns)
7656 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7657
7658 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7659 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7660 }
7661
7662 if (sched_verbose >= 1)
7663 spec_info->dump = sched_dump;
7664 else
7665 spec_info->dump = 0;
7666
7667 if (mflag_sched_count_spec_in_critical_path)
7668 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7669 }
7670 }
7671 else
7672 spec_info->mask = 0;
7673 }
7674
7675 /* If INSN is an appropriate load return its mode.
7676 Return -1 otherwise. */
7677 static int
7678 get_mode_no_for_insn (rtx insn)
7679 {
7680 rtx reg, mem, mode_rtx;
7681 int mode_no;
7682 bool extend_p;
7683
7684 extract_insn_cached (insn);
7685
7686 /* We use WHICH_ALTERNATIVE only after reload. This will
7687 guarantee that reload won't touch a speculative insn. */
7688
7689 if (recog_data.n_operands != 2)
7690 return -1;
7691
7692 reg = recog_data.operand[0];
7693 mem = recog_data.operand[1];
7694
7695 /* We should use MEM's mode since REG's mode in presence of
7696 ZERO_EXTEND will always be DImode. */
7697 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7698 /* Process non-speculative ld. */
7699 {
7700 if (!reload_completed)
7701 {
7702 /* Do not speculate into regs like ar.lc. */
7703 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7704 return -1;
7705
7706 if (!MEM_P (mem))
7707 return -1;
7708
7709 {
7710 rtx mem_reg = XEXP (mem, 0);
7711
7712 if (!REG_P (mem_reg))
7713 return -1;
7714 }
7715
7716 mode_rtx = mem;
7717 }
7718 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7719 {
7720 gcc_assert (REG_P (reg) && MEM_P (mem));
7721 mode_rtx = mem;
7722 }
7723 else
7724 return -1;
7725 }
7726 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7727 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7728 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7729 /* Process speculative ld or ld.c. */
7730 {
7731 gcc_assert (REG_P (reg) && MEM_P (mem));
7732 mode_rtx = mem;
7733 }
7734 else
7735 {
7736 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7737
7738 if (attr_class == ITANIUM_CLASS_CHK_A
7739 || attr_class == ITANIUM_CLASS_CHK_S_I
7740 || attr_class == ITANIUM_CLASS_CHK_S_F)
7741 /* Process chk. */
7742 mode_rtx = reg;
7743 else
7744 return -1;
7745 }
7746
7747 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7748
7749 if (mode_no == SPEC_MODE_INVALID)
7750 return -1;
7751
7752 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7753
7754 if (extend_p)
7755 {
7756 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7757 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7758 return -1;
7759
7760 mode_no += SPEC_GEN_EXTEND_OFFSET;
7761 }
7762
7763 return mode_no;
7764 }
7765
7766 /* If X is an unspec part of a speculative load, return its code.
7767 Return -1 otherwise. */
7768 static int
7769 get_spec_unspec_code (const_rtx x)
7770 {
7771 if (GET_CODE (x) != UNSPEC)
7772 return -1;
7773
7774 {
7775 int code;
7776
7777 code = XINT (x, 1);
7778
7779 switch (code)
7780 {
7781 case UNSPEC_LDA:
7782 case UNSPEC_LDS:
7783 case UNSPEC_LDS_A:
7784 case UNSPEC_LDSA:
7785 return code;
7786
7787 default:
7788 return -1;
7789 }
7790 }
7791 }
7792
7793 /* Implement skip_rtx_p hook. */
7794 static bool
7795 ia64_skip_rtx_p (const_rtx x)
7796 {
7797 return get_spec_unspec_code (x) != -1;
7798 }
7799
7800 /* If INSN is a speculative load, return its UNSPEC code.
7801 Return -1 otherwise. */
7802 static int
7803 get_insn_spec_code (const_rtx insn)
7804 {
7805 rtx pat, reg, mem;
7806
7807 pat = PATTERN (insn);
7808
7809 if (GET_CODE (pat) == COND_EXEC)
7810 pat = COND_EXEC_CODE (pat);
7811
7812 if (GET_CODE (pat) != SET)
7813 return -1;
7814
7815 reg = SET_DEST (pat);
7816 if (!REG_P (reg))
7817 return -1;
7818
7819 mem = SET_SRC (pat);
7820 if (GET_CODE (mem) == ZERO_EXTEND)
7821 mem = XEXP (mem, 0);
7822
7823 return get_spec_unspec_code (mem);
7824 }
7825
7826 /* If INSN is a speculative load, return a ds with the speculation types.
7827 Otherwise [if INSN is a normal instruction] return 0. */
7828 static ds_t
7829 ia64_get_insn_spec_ds (rtx insn)
7830 {
7831 int code = get_insn_spec_code (insn);
7832
7833 switch (code)
7834 {
7835 case UNSPEC_LDA:
7836 return BEGIN_DATA;
7837
7838 case UNSPEC_LDS:
7839 case UNSPEC_LDS_A:
7840 return BEGIN_CONTROL;
7841
7842 case UNSPEC_LDSA:
7843 return BEGIN_DATA | BEGIN_CONTROL;
7844
7845 default:
7846 return 0;
7847 }
7848 }
7849
7850 /* If INSN is a speculative load return a ds with the speculation types that
7851 will be checked.
7852 Otherwise [if INSN is a normal instruction] return 0. */
7853 static ds_t
7854 ia64_get_insn_checked_ds (rtx insn)
7855 {
7856 int code = get_insn_spec_code (insn);
7857
7858 switch (code)
7859 {
7860 case UNSPEC_LDA:
7861 return BEGIN_DATA | BEGIN_CONTROL;
7862
7863 case UNSPEC_LDS:
7864 return BEGIN_CONTROL;
7865
7866 case UNSPEC_LDS_A:
7867 case UNSPEC_LDSA:
7868 return BEGIN_DATA | BEGIN_CONTROL;
7869
7870 default:
7871 return 0;
7872 }
7873 }
7874
7875 /* If GEN_P is true, calculate the index of needed speculation check and return
7876 speculative pattern for INSN with speculative mode TS, machine mode
7877 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7878 If GEN_P is false, just calculate the index of needed speculation check. */
7879 static rtx
7880 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7881 {
7882 rtx pat, new_pat;
7883 gen_func_t gen_load;
7884
7885 gen_load = get_spec_load_gen_function (ts, mode_no);
7886
7887 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7888 copy_rtx (recog_data.operand[1]));
7889
7890 pat = PATTERN (insn);
7891 if (GET_CODE (pat) == COND_EXEC)
7892 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7893 new_pat);
7894
7895 return new_pat;
7896 }
7897
7898 static bool
7899 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7900 ds_t ds ATTRIBUTE_UNUSED)
7901 {
7902 return false;
7903 }
7904
7905 /* Implement targetm.sched.speculate_insn hook.
7906 Check if the INSN can be TS speculative.
7907 If 'no' - return -1.
7908 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7909 If current pattern of the INSN already provides TS speculation,
7910 return 0. */
7911 static int
7912 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7913 {
7914 int mode_no;
7915 int res;
7916
7917 gcc_assert (!(ts & ~SPECULATIVE));
7918
7919 if (ia64_spec_check_p (insn))
7920 return -1;
7921
7922 if ((ts & BE_IN_SPEC)
7923 && !insn_can_be_in_speculative_p (insn, ts))
7924 return -1;
7925
7926 mode_no = get_mode_no_for_insn (insn);
7927
7928 if (mode_no != SPEC_MODE_INVALID)
7929 {
7930 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7931 res = 0;
7932 else
7933 {
7934 res = 1;
7935 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7936 }
7937 }
7938 else
7939 res = -1;
7940
7941 return res;
7942 }
7943
7944 /* Return a function that will generate a check for speculation TS with mode
7945 MODE_NO.
7946 If simple check is needed, pass true for SIMPLE_CHECK_P.
7947 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7948 static gen_func_t
7949 get_spec_check_gen_function (ds_t ts, int mode_no,
7950 bool simple_check_p, bool clearing_check_p)
7951 {
7952 static gen_func_t gen_ld_c_clr[] = {
7953 gen_movbi_clr,
7954 gen_movqi_clr,
7955 gen_movhi_clr,
7956 gen_movsi_clr,
7957 gen_movdi_clr,
7958 gen_movsf_clr,
7959 gen_movdf_clr,
7960 gen_movxf_clr,
7961 gen_movti_clr,
7962 gen_zero_extendqidi2_clr,
7963 gen_zero_extendhidi2_clr,
7964 gen_zero_extendsidi2_clr,
7965 };
7966 static gen_func_t gen_ld_c_nc[] = {
7967 gen_movbi_nc,
7968 gen_movqi_nc,
7969 gen_movhi_nc,
7970 gen_movsi_nc,
7971 gen_movdi_nc,
7972 gen_movsf_nc,
7973 gen_movdf_nc,
7974 gen_movxf_nc,
7975 gen_movti_nc,
7976 gen_zero_extendqidi2_nc,
7977 gen_zero_extendhidi2_nc,
7978 gen_zero_extendsidi2_nc,
7979 };
7980 static gen_func_t gen_chk_a_clr[] = {
7981 gen_advanced_load_check_clr_bi,
7982 gen_advanced_load_check_clr_qi,
7983 gen_advanced_load_check_clr_hi,
7984 gen_advanced_load_check_clr_si,
7985 gen_advanced_load_check_clr_di,
7986 gen_advanced_load_check_clr_sf,
7987 gen_advanced_load_check_clr_df,
7988 gen_advanced_load_check_clr_xf,
7989 gen_advanced_load_check_clr_ti,
7990 gen_advanced_load_check_clr_di,
7991 gen_advanced_load_check_clr_di,
7992 gen_advanced_load_check_clr_di,
7993 };
7994 static gen_func_t gen_chk_a_nc[] = {
7995 gen_advanced_load_check_nc_bi,
7996 gen_advanced_load_check_nc_qi,
7997 gen_advanced_load_check_nc_hi,
7998 gen_advanced_load_check_nc_si,
7999 gen_advanced_load_check_nc_di,
8000 gen_advanced_load_check_nc_sf,
8001 gen_advanced_load_check_nc_df,
8002 gen_advanced_load_check_nc_xf,
8003 gen_advanced_load_check_nc_ti,
8004 gen_advanced_load_check_nc_di,
8005 gen_advanced_load_check_nc_di,
8006 gen_advanced_load_check_nc_di,
8007 };
8008 static gen_func_t gen_chk_s[] = {
8009 gen_speculation_check_bi,
8010 gen_speculation_check_qi,
8011 gen_speculation_check_hi,
8012 gen_speculation_check_si,
8013 gen_speculation_check_di,
8014 gen_speculation_check_sf,
8015 gen_speculation_check_df,
8016 gen_speculation_check_xf,
8017 gen_speculation_check_ti,
8018 gen_speculation_check_di,
8019 gen_speculation_check_di,
8020 gen_speculation_check_di,
8021 };
8022
8023 gen_func_t *gen_check;
8024
8025 if (ts & BEGIN_DATA)
8026 {
8027 /* We don't need recovery because even if this is ld.sa
8028 ALAT entry will be allocated only if NAT bit is set to zero.
8029 So it is enough to use ld.c here. */
8030
8031 if (simple_check_p)
8032 {
8033 gcc_assert (mflag_sched_spec_ldc);
8034
8035 if (clearing_check_p)
8036 gen_check = gen_ld_c_clr;
8037 else
8038 gen_check = gen_ld_c_nc;
8039 }
8040 else
8041 {
8042 if (clearing_check_p)
8043 gen_check = gen_chk_a_clr;
8044 else
8045 gen_check = gen_chk_a_nc;
8046 }
8047 }
8048 else if (ts & BEGIN_CONTROL)
8049 {
8050 if (simple_check_p)
8051 /* We might want to use ld.sa -> ld.c instead of
8052 ld.s -> chk.s. */
8053 {
8054 gcc_assert (!ia64_needs_block_p (ts));
8055
8056 if (clearing_check_p)
8057 gen_check = gen_ld_c_clr;
8058 else
8059 gen_check = gen_ld_c_nc;
8060 }
8061 else
8062 {
8063 gen_check = gen_chk_s;
8064 }
8065 }
8066 else
8067 gcc_unreachable ();
8068
8069 gcc_assert (mode_no >= 0);
8070 return gen_check[mode_no];
8071 }
8072
8073 /* Return nonzero, if INSN needs branchy recovery check. */
8074 static bool
8075 ia64_needs_block_p (ds_t ts)
8076 {
8077 if (ts & BEGIN_DATA)
8078 return !mflag_sched_spec_ldc;
8079
8080 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8081
8082 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8083 }
8084
8085 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8086 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8087 Otherwise, generate a simple check. */
8088 static rtx
8089 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
8090 {
8091 rtx op1, pat, check_pat;
8092 gen_func_t gen_check;
8093 int mode_no;
8094
8095 mode_no = get_mode_no_for_insn (insn);
8096 gcc_assert (mode_no >= 0);
8097
8098 if (label)
8099 op1 = label;
8100 else
8101 {
8102 gcc_assert (!ia64_needs_block_p (ds));
8103 op1 = copy_rtx (recog_data.operand[1]);
8104 }
8105
8106 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8107 true);
8108
8109 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8110
8111 pat = PATTERN (insn);
8112 if (GET_CODE (pat) == COND_EXEC)
8113 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8114 check_pat);
8115
8116 return check_pat;
8117 }
8118
8119 /* Return nonzero, if X is branchy recovery check. */
8120 static int
8121 ia64_spec_check_p (rtx x)
8122 {
8123 x = PATTERN (x);
8124 if (GET_CODE (x) == COND_EXEC)
8125 x = COND_EXEC_CODE (x);
8126 if (GET_CODE (x) == SET)
8127 return ia64_spec_check_src_p (SET_SRC (x));
8128 return 0;
8129 }
8130
8131 /* Return nonzero, if SRC belongs to recovery check. */
8132 static int
8133 ia64_spec_check_src_p (rtx src)
8134 {
8135 if (GET_CODE (src) == IF_THEN_ELSE)
8136 {
8137 rtx t;
8138
8139 t = XEXP (src, 0);
8140 if (GET_CODE (t) == NE)
8141 {
8142 t = XEXP (t, 0);
8143
8144 if (GET_CODE (t) == UNSPEC)
8145 {
8146 int code;
8147
8148 code = XINT (t, 1);
8149
8150 if (code == UNSPEC_LDCCLR
8151 || code == UNSPEC_LDCNC
8152 || code == UNSPEC_CHKACLR
8153 || code == UNSPEC_CHKANC
8154 || code == UNSPEC_CHKS)
8155 {
8156 gcc_assert (code != 0);
8157 return code;
8158 }
8159 }
8160 }
8161 }
8162 return 0;
8163 }
8164 \f
8165
8166 /* The following page contains abstract data `bundle states' which are
8167 used for bundling insns (inserting nops and template generation). */
8168
8169 /* The following describes state of insn bundling. */
8170
8171 struct bundle_state
8172 {
8173 /* Unique bundle state number to identify them in the debugging
8174 output */
8175 int unique_num;
8176 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
8177 /* number nops before and after the insn */
8178 short before_nops_num, after_nops_num;
8179 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8180 insn */
8181 int cost; /* cost of the state in cycles */
8182 int accumulated_insns_num; /* number of all previous insns including
8183 nops. L is considered as 2 insns */
8184 int branch_deviation; /* deviation of previous branches from 3rd slots */
8185 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8186 struct bundle_state *next; /* next state with the same insn_num */
8187 struct bundle_state *originator; /* originator (previous insn state) */
8188 /* All bundle states are in the following chain. */
8189 struct bundle_state *allocated_states_chain;
8190 /* The DFA State after issuing the insn and the nops. */
8191 state_t dfa_state;
8192 };
8193
8194 /* The following is map insn number to the corresponding bundle state. */
8195
8196 static struct bundle_state **index_to_bundle_states;
8197
8198 /* The unique number of next bundle state. */
8199
8200 static int bundle_states_num;
8201
8202 /* All allocated bundle states are in the following chain. */
8203
8204 static struct bundle_state *allocated_bundle_states_chain;
8205
8206 /* All allocated but not used bundle states are in the following
8207 chain. */
8208
8209 static struct bundle_state *free_bundle_state_chain;
8210
8211
8212 /* The following function returns a free bundle state. */
8213
8214 static struct bundle_state *
8215 get_free_bundle_state (void)
8216 {
8217 struct bundle_state *result;
8218
8219 if (free_bundle_state_chain != NULL)
8220 {
8221 result = free_bundle_state_chain;
8222 free_bundle_state_chain = result->next;
8223 }
8224 else
8225 {
8226 result = XNEW (struct bundle_state);
8227 result->dfa_state = xmalloc (dfa_state_size);
8228 result->allocated_states_chain = allocated_bundle_states_chain;
8229 allocated_bundle_states_chain = result;
8230 }
8231 result->unique_num = bundle_states_num++;
8232 return result;
8233
8234 }
8235
8236 /* The following function frees given bundle state. */
8237
8238 static void
8239 free_bundle_state (struct bundle_state *state)
8240 {
8241 state->next = free_bundle_state_chain;
8242 free_bundle_state_chain = state;
8243 }
8244
8245 /* Start work with abstract data `bundle states'. */
8246
8247 static void
8248 initiate_bundle_states (void)
8249 {
8250 bundle_states_num = 0;
8251 free_bundle_state_chain = NULL;
8252 allocated_bundle_states_chain = NULL;
8253 }
8254
8255 /* Finish work with abstract data `bundle states'. */
8256
8257 static void
8258 finish_bundle_states (void)
8259 {
8260 struct bundle_state *curr_state, *next_state;
8261
8262 for (curr_state = allocated_bundle_states_chain;
8263 curr_state != NULL;
8264 curr_state = next_state)
8265 {
8266 next_state = curr_state->allocated_states_chain;
8267 free (curr_state->dfa_state);
8268 free (curr_state);
8269 }
8270 }
8271
8272 /* Hash table of the bundle states. The key is dfa_state and insn_num
8273 of the bundle states. */
8274
8275 static htab_t bundle_state_table;
8276
8277 /* The function returns hash of BUNDLE_STATE. */
8278
8279 static unsigned
8280 bundle_state_hash (const void *bundle_state)
8281 {
8282 const struct bundle_state *const state
8283 = (const struct bundle_state *) bundle_state;
8284 unsigned result, i;
8285
8286 for (result = i = 0; i < dfa_state_size; i++)
8287 result += (((unsigned char *) state->dfa_state) [i]
8288 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8289 return result + state->insn_num;
8290 }
8291
8292 /* The function returns nonzero if the bundle state keys are equal. */
8293
8294 static int
8295 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8296 {
8297 const struct bundle_state *const state1
8298 = (const struct bundle_state *) bundle_state_1;
8299 const struct bundle_state *const state2
8300 = (const struct bundle_state *) bundle_state_2;
8301
8302 return (state1->insn_num == state2->insn_num
8303 && memcmp (state1->dfa_state, state2->dfa_state,
8304 dfa_state_size) == 0);
8305 }
8306
8307 /* The function inserts the BUNDLE_STATE into the hash table. The
8308 function returns nonzero if the bundle has been inserted into the
8309 table. The table contains the best bundle state with given key. */
8310
8311 static int
8312 insert_bundle_state (struct bundle_state *bundle_state)
8313 {
8314 void **entry_ptr;
8315
8316 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8317 if (*entry_ptr == NULL)
8318 {
8319 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8320 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8321 *entry_ptr = (void *) bundle_state;
8322 return TRUE;
8323 }
8324 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8325 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8326 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8327 > bundle_state->accumulated_insns_num
8328 || (((struct bundle_state *)
8329 *entry_ptr)->accumulated_insns_num
8330 == bundle_state->accumulated_insns_num
8331 && (((struct bundle_state *)
8332 *entry_ptr)->branch_deviation
8333 > bundle_state->branch_deviation
8334 || (((struct bundle_state *)
8335 *entry_ptr)->branch_deviation
8336 == bundle_state->branch_deviation
8337 && ((struct bundle_state *)
8338 *entry_ptr)->middle_bundle_stops
8339 > bundle_state->middle_bundle_stops))))))
8340
8341 {
8342 struct bundle_state temp;
8343
8344 temp = *(struct bundle_state *) *entry_ptr;
8345 *(struct bundle_state *) *entry_ptr = *bundle_state;
8346 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8347 *bundle_state = temp;
8348 }
8349 return FALSE;
8350 }
8351
8352 /* Start work with the hash table. */
8353
8354 static void
8355 initiate_bundle_state_table (void)
8356 {
8357 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8358 (htab_del) 0);
8359 }
8360
8361 /* Finish work with the hash table. */
8362
8363 static void
8364 finish_bundle_state_table (void)
8365 {
8366 htab_delete (bundle_state_table);
8367 }
8368
8369 \f
8370
8371 /* The following variable is a insn `nop' used to check bundle states
8372 with different number of inserted nops. */
8373
8374 static rtx ia64_nop;
8375
8376 /* The following function tries to issue NOPS_NUM nops for the current
8377 state without advancing processor cycle. If it failed, the
8378 function returns FALSE and frees the current state. */
8379
8380 static int
8381 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8382 {
8383 int i;
8384
8385 for (i = 0; i < nops_num; i++)
8386 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8387 {
8388 free_bundle_state (curr_state);
8389 return FALSE;
8390 }
8391 return TRUE;
8392 }
8393
8394 /* The following function tries to issue INSN for the current
8395 state without advancing processor cycle. If it failed, the
8396 function returns FALSE and frees the current state. */
8397
8398 static int
8399 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8400 {
8401 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8402 {
8403 free_bundle_state (curr_state);
8404 return FALSE;
8405 }
8406 return TRUE;
8407 }
8408
8409 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8410 starting with ORIGINATOR without advancing processor cycle. If
8411 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8412 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8413 If it was successful, the function creates new bundle state and
8414 insert into the hash table and into `index_to_bundle_states'. */
8415
8416 static void
8417 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8418 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8419 {
8420 struct bundle_state *curr_state;
8421
8422 curr_state = get_free_bundle_state ();
8423 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8424 curr_state->insn = insn;
8425 curr_state->insn_num = originator->insn_num + 1;
8426 curr_state->cost = originator->cost;
8427 curr_state->originator = originator;
8428 curr_state->before_nops_num = before_nops_num;
8429 curr_state->after_nops_num = 0;
8430 curr_state->accumulated_insns_num
8431 = originator->accumulated_insns_num + before_nops_num;
8432 curr_state->branch_deviation = originator->branch_deviation;
8433 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8434 gcc_assert (insn);
8435 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8436 {
8437 gcc_assert (GET_MODE (insn) != TImode);
8438 if (!try_issue_nops (curr_state, before_nops_num))
8439 return;
8440 if (!try_issue_insn (curr_state, insn))
8441 return;
8442 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8443 if (curr_state->accumulated_insns_num % 3 != 0)
8444 curr_state->middle_bundle_stops++;
8445 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8446 && curr_state->accumulated_insns_num % 3 != 0)
8447 {
8448 free_bundle_state (curr_state);
8449 return;
8450 }
8451 }
8452 else if (GET_MODE (insn) != TImode)
8453 {
8454 if (!try_issue_nops (curr_state, before_nops_num))
8455 return;
8456 if (!try_issue_insn (curr_state, insn))
8457 return;
8458 curr_state->accumulated_insns_num++;
8459 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8460 && asm_noperands (PATTERN (insn)) < 0);
8461
8462 if (ia64_safe_type (insn) == TYPE_L)
8463 curr_state->accumulated_insns_num++;
8464 }
8465 else
8466 {
8467 /* If this is an insn that must be first in a group, then don't allow
8468 nops to be emitted before it. Currently, alloc is the only such
8469 supported instruction. */
8470 /* ??? The bundling automatons should handle this for us, but they do
8471 not yet have support for the first_insn attribute. */
8472 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8473 {
8474 free_bundle_state (curr_state);
8475 return;
8476 }
8477
8478 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8479 state_transition (curr_state->dfa_state, NULL);
8480 curr_state->cost++;
8481 if (!try_issue_nops (curr_state, before_nops_num))
8482 return;
8483 if (!try_issue_insn (curr_state, insn))
8484 return;
8485 curr_state->accumulated_insns_num++;
8486 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8487 || asm_noperands (PATTERN (insn)) >= 0)
8488 {
8489 /* Finish bundle containing asm insn. */
8490 curr_state->after_nops_num
8491 = 3 - curr_state->accumulated_insns_num % 3;
8492 curr_state->accumulated_insns_num
8493 += 3 - curr_state->accumulated_insns_num % 3;
8494 }
8495 else if (ia64_safe_type (insn) == TYPE_L)
8496 curr_state->accumulated_insns_num++;
8497 }
8498 if (ia64_safe_type (insn) == TYPE_B)
8499 curr_state->branch_deviation
8500 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8501 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8502 {
8503 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8504 {
8505 state_t dfa_state;
8506 struct bundle_state *curr_state1;
8507 struct bundle_state *allocated_states_chain;
8508
8509 curr_state1 = get_free_bundle_state ();
8510 dfa_state = curr_state1->dfa_state;
8511 allocated_states_chain = curr_state1->allocated_states_chain;
8512 *curr_state1 = *curr_state;
8513 curr_state1->dfa_state = dfa_state;
8514 curr_state1->allocated_states_chain = allocated_states_chain;
8515 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8516 dfa_state_size);
8517 curr_state = curr_state1;
8518 }
8519 if (!try_issue_nops (curr_state,
8520 3 - curr_state->accumulated_insns_num % 3))
8521 return;
8522 curr_state->after_nops_num
8523 = 3 - curr_state->accumulated_insns_num % 3;
8524 curr_state->accumulated_insns_num
8525 += 3 - curr_state->accumulated_insns_num % 3;
8526 }
8527 if (!insert_bundle_state (curr_state))
8528 free_bundle_state (curr_state);
8529 return;
8530 }
8531
8532 /* The following function returns position in the two window bundle
8533 for given STATE. */
8534
8535 static int
8536 get_max_pos (state_t state)
8537 {
8538 if (cpu_unit_reservation_p (state, pos_6))
8539 return 6;
8540 else if (cpu_unit_reservation_p (state, pos_5))
8541 return 5;
8542 else if (cpu_unit_reservation_p (state, pos_4))
8543 return 4;
8544 else if (cpu_unit_reservation_p (state, pos_3))
8545 return 3;
8546 else if (cpu_unit_reservation_p (state, pos_2))
8547 return 2;
8548 else if (cpu_unit_reservation_p (state, pos_1))
8549 return 1;
8550 else
8551 return 0;
8552 }
8553
8554 /* The function returns code of a possible template for given position
8555 and state. The function should be called only with 2 values of
8556 position equal to 3 or 6. We avoid generating F NOPs by putting
8557 templates containing F insns at the end of the template search
8558 because undocumented anomaly in McKinley derived cores which can
8559 cause stalls if an F-unit insn (including a NOP) is issued within a
8560 six-cycle window after reading certain application registers (such
8561 as ar.bsp). Furthermore, power-considerations also argue against
8562 the use of F-unit instructions unless they're really needed. */
8563
8564 static int
8565 get_template (state_t state, int pos)
8566 {
8567 switch (pos)
8568 {
8569 case 3:
8570 if (cpu_unit_reservation_p (state, _0mmi_))
8571 return 1;
8572 else if (cpu_unit_reservation_p (state, _0mii_))
8573 return 0;
8574 else if (cpu_unit_reservation_p (state, _0mmb_))
8575 return 7;
8576 else if (cpu_unit_reservation_p (state, _0mib_))
8577 return 6;
8578 else if (cpu_unit_reservation_p (state, _0mbb_))
8579 return 5;
8580 else if (cpu_unit_reservation_p (state, _0bbb_))
8581 return 4;
8582 else if (cpu_unit_reservation_p (state, _0mmf_))
8583 return 3;
8584 else if (cpu_unit_reservation_p (state, _0mfi_))
8585 return 2;
8586 else if (cpu_unit_reservation_p (state, _0mfb_))
8587 return 8;
8588 else if (cpu_unit_reservation_p (state, _0mlx_))
8589 return 9;
8590 else
8591 gcc_unreachable ();
8592 case 6:
8593 if (cpu_unit_reservation_p (state, _1mmi_))
8594 return 1;
8595 else if (cpu_unit_reservation_p (state, _1mii_))
8596 return 0;
8597 else if (cpu_unit_reservation_p (state, _1mmb_))
8598 return 7;
8599 else if (cpu_unit_reservation_p (state, _1mib_))
8600 return 6;
8601 else if (cpu_unit_reservation_p (state, _1mbb_))
8602 return 5;
8603 else if (cpu_unit_reservation_p (state, _1bbb_))
8604 return 4;
8605 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8606 return 3;
8607 else if (cpu_unit_reservation_p (state, _1mfi_))
8608 return 2;
8609 else if (cpu_unit_reservation_p (state, _1mfb_))
8610 return 8;
8611 else if (cpu_unit_reservation_p (state, _1mlx_))
8612 return 9;
8613 else
8614 gcc_unreachable ();
8615 default:
8616 gcc_unreachable ();
8617 }
8618 }
8619
8620 /* True when INSN is important for bundling. */
8621 static bool
8622 important_for_bundling_p (rtx insn)
8623 {
8624 return (INSN_P (insn)
8625 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8626 && GET_CODE (PATTERN (insn)) != USE
8627 && GET_CODE (PATTERN (insn)) != CLOBBER);
8628 }
8629
8630 /* The following function returns an insn important for insn bundling
8631 followed by INSN and before TAIL. */
8632
8633 static rtx
8634 get_next_important_insn (rtx insn, rtx tail)
8635 {
8636 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8637 if (important_for_bundling_p (insn))
8638 return insn;
8639 return NULL_RTX;
8640 }
8641
8642 /* Add a bundle selector TEMPLATE0 before INSN. */
8643
8644 static void
8645 ia64_add_bundle_selector_before (int template0, rtx insn)
8646 {
8647 rtx b = gen_bundle_selector (GEN_INT (template0));
8648
8649 ia64_emit_insn_before (b, insn);
8650 #if NR_BUNDLES == 10
8651 if ((template0 == 4 || template0 == 5)
8652 && ia64_except_unwind_info (&global_options) == UI_TARGET)
8653 {
8654 int i;
8655 rtx note = NULL_RTX;
8656
8657 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8658 first or second slot. If it is and has REG_EH_NOTE set, copy it
8659 to following nops, as br.call sets rp to the address of following
8660 bundle and therefore an EH region end must be on a bundle
8661 boundary. */
8662 insn = PREV_INSN (insn);
8663 for (i = 0; i < 3; i++)
8664 {
8665 do
8666 insn = next_active_insn (insn);
8667 while (GET_CODE (insn) == INSN
8668 && get_attr_empty (insn) == EMPTY_YES);
8669 if (GET_CODE (insn) == CALL_INSN)
8670 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8671 else if (note)
8672 {
8673 int code;
8674
8675 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8676 || code == CODE_FOR_nop_b);
8677 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8678 note = NULL_RTX;
8679 else
8680 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8681 }
8682 }
8683 }
8684 #endif
8685 }
8686
8687 /* The following function does insn bundling. Bundling means
8688 inserting templates and nop insns to fit insn groups into permitted
8689 templates. Instruction scheduling uses NDFA (non-deterministic
8690 finite automata) encoding informations about the templates and the
8691 inserted nops. Nondeterminism of the automata permits follows
8692 all possible insn sequences very fast.
8693
8694 Unfortunately it is not possible to get information about inserting
8695 nop insns and used templates from the automata states. The
8696 automata only says that we can issue an insn possibly inserting
8697 some nops before it and using some template. Therefore insn
8698 bundling in this function is implemented by using DFA
8699 (deterministic finite automata). We follow all possible insn
8700 sequences by inserting 0-2 nops (that is what the NDFA describe for
8701 insn scheduling) before/after each insn being bundled. We know the
8702 start of simulated processor cycle from insn scheduling (insn
8703 starting a new cycle has TImode).
8704
8705 Simple implementation of insn bundling would create enormous
8706 number of possible insn sequences satisfying information about new
8707 cycle ticks taken from the insn scheduling. To make the algorithm
8708 practical we use dynamic programming. Each decision (about
8709 inserting nops and implicitly about previous decisions) is described
8710 by structure bundle_state (see above). If we generate the same
8711 bundle state (key is automaton state after issuing the insns and
8712 nops for it), we reuse already generated one. As consequence we
8713 reject some decisions which cannot improve the solution and
8714 reduce memory for the algorithm.
8715
8716 When we reach the end of EBB (extended basic block), we choose the
8717 best sequence and then, moving back in EBB, insert templates for
8718 the best alternative. The templates are taken from querying
8719 automaton state for each insn in chosen bundle states.
8720
8721 So the algorithm makes two (forward and backward) passes through
8722 EBB. */
8723
8724 static void
8725 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8726 {
8727 struct bundle_state *curr_state, *next_state, *best_state;
8728 rtx insn, next_insn;
8729 int insn_num;
8730 int i, bundle_end_p, only_bundle_end_p, asm_p;
8731 int pos = 0, max_pos, template0, template1;
8732 rtx b;
8733 rtx nop;
8734 enum attr_type type;
8735
8736 insn_num = 0;
8737 /* Count insns in the EBB. */
8738 for (insn = NEXT_INSN (prev_head_insn);
8739 insn && insn != tail;
8740 insn = NEXT_INSN (insn))
8741 if (INSN_P (insn))
8742 insn_num++;
8743 if (insn_num == 0)
8744 return;
8745 bundling_p = 1;
8746 dfa_clean_insn_cache ();
8747 initiate_bundle_state_table ();
8748 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8749 /* First (forward) pass -- generation of bundle states. */
8750 curr_state = get_free_bundle_state ();
8751 curr_state->insn = NULL;
8752 curr_state->before_nops_num = 0;
8753 curr_state->after_nops_num = 0;
8754 curr_state->insn_num = 0;
8755 curr_state->cost = 0;
8756 curr_state->accumulated_insns_num = 0;
8757 curr_state->branch_deviation = 0;
8758 curr_state->middle_bundle_stops = 0;
8759 curr_state->next = NULL;
8760 curr_state->originator = NULL;
8761 state_reset (curr_state->dfa_state);
8762 index_to_bundle_states [0] = curr_state;
8763 insn_num = 0;
8764 /* Shift cycle mark if it is put on insn which could be ignored. */
8765 for (insn = NEXT_INSN (prev_head_insn);
8766 insn != tail;
8767 insn = NEXT_INSN (insn))
8768 if (INSN_P (insn)
8769 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8770 || GET_CODE (PATTERN (insn)) == USE
8771 || GET_CODE (PATTERN (insn)) == CLOBBER)
8772 && GET_MODE (insn) == TImode)
8773 {
8774 PUT_MODE (insn, VOIDmode);
8775 for (next_insn = NEXT_INSN (insn);
8776 next_insn != tail;
8777 next_insn = NEXT_INSN (next_insn))
8778 if (INSN_P (next_insn)
8779 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8780 && GET_CODE (PATTERN (next_insn)) != USE
8781 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8782 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8783 {
8784 PUT_MODE (next_insn, TImode);
8785 break;
8786 }
8787 }
8788 /* Forward pass: generation of bundle states. */
8789 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8790 insn != NULL_RTX;
8791 insn = next_insn)
8792 {
8793 gcc_assert (INSN_P (insn)
8794 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8795 && GET_CODE (PATTERN (insn)) != USE
8796 && GET_CODE (PATTERN (insn)) != CLOBBER);
8797 type = ia64_safe_type (insn);
8798 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8799 insn_num++;
8800 index_to_bundle_states [insn_num] = NULL;
8801 for (curr_state = index_to_bundle_states [insn_num - 1];
8802 curr_state != NULL;
8803 curr_state = next_state)
8804 {
8805 pos = curr_state->accumulated_insns_num % 3;
8806 next_state = curr_state->next;
8807 /* We must fill up the current bundle in order to start a
8808 subsequent asm insn in a new bundle. Asm insn is always
8809 placed in a separate bundle. */
8810 only_bundle_end_p
8811 = (next_insn != NULL_RTX
8812 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8813 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8814 /* We may fill up the current bundle if it is the cycle end
8815 without a group barrier. */
8816 bundle_end_p
8817 = (only_bundle_end_p || next_insn == NULL_RTX
8818 || (GET_MODE (next_insn) == TImode
8819 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8820 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8821 || type == TYPE_S)
8822 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8823 only_bundle_end_p);
8824 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8825 only_bundle_end_p);
8826 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8827 only_bundle_end_p);
8828 }
8829 gcc_assert (index_to_bundle_states [insn_num]);
8830 for (curr_state = index_to_bundle_states [insn_num];
8831 curr_state != NULL;
8832 curr_state = curr_state->next)
8833 if (verbose >= 2 && dump)
8834 {
8835 /* This structure is taken from generated code of the
8836 pipeline hazard recognizer (see file insn-attrtab.c).
8837 Please don't forget to change the structure if a new
8838 automaton is added to .md file. */
8839 struct DFA_chip
8840 {
8841 unsigned short one_automaton_state;
8842 unsigned short oneb_automaton_state;
8843 unsigned short two_automaton_state;
8844 unsigned short twob_automaton_state;
8845 };
8846
8847 fprintf
8848 (dump,
8849 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8850 curr_state->unique_num,
8851 (curr_state->originator == NULL
8852 ? -1 : curr_state->originator->unique_num),
8853 curr_state->cost,
8854 curr_state->before_nops_num, curr_state->after_nops_num,
8855 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8856 curr_state->middle_bundle_stops,
8857 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8858 INSN_UID (insn));
8859 }
8860 }
8861
8862 /* We should find a solution because the 2nd insn scheduling has
8863 found one. */
8864 gcc_assert (index_to_bundle_states [insn_num]);
8865 /* Find a state corresponding to the best insn sequence. */
8866 best_state = NULL;
8867 for (curr_state = index_to_bundle_states [insn_num];
8868 curr_state != NULL;
8869 curr_state = curr_state->next)
8870 /* We are just looking at the states with fully filled up last
8871 bundle. The first we prefer insn sequences with minimal cost
8872 then with minimal inserted nops and finally with branch insns
8873 placed in the 3rd slots. */
8874 if (curr_state->accumulated_insns_num % 3 == 0
8875 && (best_state == NULL || best_state->cost > curr_state->cost
8876 || (best_state->cost == curr_state->cost
8877 && (curr_state->accumulated_insns_num
8878 < best_state->accumulated_insns_num
8879 || (curr_state->accumulated_insns_num
8880 == best_state->accumulated_insns_num
8881 && (curr_state->branch_deviation
8882 < best_state->branch_deviation
8883 || (curr_state->branch_deviation
8884 == best_state->branch_deviation
8885 && curr_state->middle_bundle_stops
8886 < best_state->middle_bundle_stops)))))))
8887 best_state = curr_state;
8888 /* Second (backward) pass: adding nops and templates. */
8889 gcc_assert (best_state);
8890 insn_num = best_state->before_nops_num;
8891 template0 = template1 = -1;
8892 for (curr_state = best_state;
8893 curr_state->originator != NULL;
8894 curr_state = curr_state->originator)
8895 {
8896 insn = curr_state->insn;
8897 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8898 || asm_noperands (PATTERN (insn)) >= 0);
8899 insn_num++;
8900 if (verbose >= 2 && dump)
8901 {
8902 struct DFA_chip
8903 {
8904 unsigned short one_automaton_state;
8905 unsigned short oneb_automaton_state;
8906 unsigned short two_automaton_state;
8907 unsigned short twob_automaton_state;
8908 };
8909
8910 fprintf
8911 (dump,
8912 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8913 curr_state->unique_num,
8914 (curr_state->originator == NULL
8915 ? -1 : curr_state->originator->unique_num),
8916 curr_state->cost,
8917 curr_state->before_nops_num, curr_state->after_nops_num,
8918 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8919 curr_state->middle_bundle_stops,
8920 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8921 INSN_UID (insn));
8922 }
8923 /* Find the position in the current bundle window. The window can
8924 contain at most two bundles. Two bundle window means that
8925 the processor will make two bundle rotation. */
8926 max_pos = get_max_pos (curr_state->dfa_state);
8927 if (max_pos == 6
8928 /* The following (negative template number) means that the
8929 processor did one bundle rotation. */
8930 || (max_pos == 3 && template0 < 0))
8931 {
8932 /* We are at the end of the window -- find template(s) for
8933 its bundle(s). */
8934 pos = max_pos;
8935 if (max_pos == 3)
8936 template0 = get_template (curr_state->dfa_state, 3);
8937 else
8938 {
8939 template1 = get_template (curr_state->dfa_state, 3);
8940 template0 = get_template (curr_state->dfa_state, 6);
8941 }
8942 }
8943 if (max_pos > 3 && template1 < 0)
8944 /* It may happen when we have the stop inside a bundle. */
8945 {
8946 gcc_assert (pos <= 3);
8947 template1 = get_template (curr_state->dfa_state, 3);
8948 pos += 3;
8949 }
8950 if (!asm_p)
8951 /* Emit nops after the current insn. */
8952 for (i = 0; i < curr_state->after_nops_num; i++)
8953 {
8954 nop = gen_nop ();
8955 emit_insn_after (nop, insn);
8956 pos--;
8957 gcc_assert (pos >= 0);
8958 if (pos % 3 == 0)
8959 {
8960 /* We are at the start of a bundle: emit the template
8961 (it should be defined). */
8962 gcc_assert (template0 >= 0);
8963 ia64_add_bundle_selector_before (template0, nop);
8964 /* If we have two bundle window, we make one bundle
8965 rotation. Otherwise template0 will be undefined
8966 (negative value). */
8967 template0 = template1;
8968 template1 = -1;
8969 }
8970 }
8971 /* Move the position backward in the window. Group barrier has
8972 no slot. Asm insn takes all bundle. */
8973 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8974 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8975 && asm_noperands (PATTERN (insn)) < 0)
8976 pos--;
8977 /* Long insn takes 2 slots. */
8978 if (ia64_safe_type (insn) == TYPE_L)
8979 pos--;
8980 gcc_assert (pos >= 0);
8981 if (pos % 3 == 0
8982 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8983 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8984 && asm_noperands (PATTERN (insn)) < 0)
8985 {
8986 /* The current insn is at the bundle start: emit the
8987 template. */
8988 gcc_assert (template0 >= 0);
8989 ia64_add_bundle_selector_before (template0, insn);
8990 b = PREV_INSN (insn);
8991 insn = b;
8992 /* See comment above in analogous place for emitting nops
8993 after the insn. */
8994 template0 = template1;
8995 template1 = -1;
8996 }
8997 /* Emit nops after the current insn. */
8998 for (i = 0; i < curr_state->before_nops_num; i++)
8999 {
9000 nop = gen_nop ();
9001 ia64_emit_insn_before (nop, insn);
9002 nop = PREV_INSN (insn);
9003 insn = nop;
9004 pos--;
9005 gcc_assert (pos >= 0);
9006 if (pos % 3 == 0)
9007 {
9008 /* See comment above in analogous place for emitting nops
9009 after the insn. */
9010 gcc_assert (template0 >= 0);
9011 ia64_add_bundle_selector_before (template0, insn);
9012 b = PREV_INSN (insn);
9013 insn = b;
9014 template0 = template1;
9015 template1 = -1;
9016 }
9017 }
9018 }
9019
9020 #ifdef ENABLE_CHECKING
9021 {
9022 /* Assert right calculation of middle_bundle_stops. */
9023 int num = best_state->middle_bundle_stops;
9024 bool start_bundle = true, end_bundle = false;
9025
9026 for (insn = NEXT_INSN (prev_head_insn);
9027 insn && insn != tail;
9028 insn = NEXT_INSN (insn))
9029 {
9030 if (!INSN_P (insn))
9031 continue;
9032 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9033 start_bundle = true;
9034 else
9035 {
9036 rtx next_insn;
9037
9038 for (next_insn = NEXT_INSN (insn);
9039 next_insn && next_insn != tail;
9040 next_insn = NEXT_INSN (next_insn))
9041 if (INSN_P (next_insn)
9042 && (ia64_safe_itanium_class (next_insn)
9043 != ITANIUM_CLASS_IGNORE
9044 || recog_memoized (next_insn)
9045 == CODE_FOR_bundle_selector)
9046 && GET_CODE (PATTERN (next_insn)) != USE
9047 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9048 break;
9049
9050 end_bundle = next_insn == NULL_RTX
9051 || next_insn == tail
9052 || (INSN_P (next_insn)
9053 && recog_memoized (next_insn)
9054 == CODE_FOR_bundle_selector);
9055 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9056 && !start_bundle && !end_bundle
9057 && next_insn
9058 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
9059 && asm_noperands (PATTERN (next_insn)) < 0)
9060 num--;
9061
9062 start_bundle = false;
9063 }
9064 }
9065
9066 gcc_assert (num == 0);
9067 }
9068 #endif
9069
9070 free (index_to_bundle_states);
9071 finish_bundle_state_table ();
9072 bundling_p = 0;
9073 dfa_clean_insn_cache ();
9074 }
9075
9076 /* The following function is called at the end of scheduling BB or
9077 EBB. After reload, it inserts stop bits and does insn bundling. */
9078
9079 static void
9080 ia64_sched_finish (FILE *dump, int sched_verbose)
9081 {
9082 if (sched_verbose)
9083 fprintf (dump, "// Finishing schedule.\n");
9084 if (!reload_completed)
9085 return;
9086 if (reload_completed)
9087 {
9088 final_emit_insn_group_barriers (dump);
9089 bundling (dump, sched_verbose, current_sched_info->prev_head,
9090 current_sched_info->next_tail);
9091 if (sched_verbose && dump)
9092 fprintf (dump, "// finishing %d-%d\n",
9093 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9094 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9095
9096 return;
9097 }
9098 }
9099
9100 /* The following function inserts stop bits in scheduled BB or EBB. */
9101
9102 static void
9103 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9104 {
9105 rtx insn;
9106 int need_barrier_p = 0;
9107 int seen_good_insn = 0;
9108
9109 init_insn_group_barriers ();
9110
9111 for (insn = NEXT_INSN (current_sched_info->prev_head);
9112 insn != current_sched_info->next_tail;
9113 insn = NEXT_INSN (insn))
9114 {
9115 if (GET_CODE (insn) == BARRIER)
9116 {
9117 rtx last = prev_active_insn (insn);
9118
9119 if (! last)
9120 continue;
9121 if (GET_CODE (last) == JUMP_INSN
9122 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9123 last = prev_active_insn (last);
9124 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9125 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9126
9127 init_insn_group_barriers ();
9128 seen_good_insn = 0;
9129 need_barrier_p = 0;
9130 }
9131 else if (NONDEBUG_INSN_P (insn))
9132 {
9133 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9134 {
9135 init_insn_group_barriers ();
9136 seen_good_insn = 0;
9137 need_barrier_p = 0;
9138 }
9139 else if (need_barrier_p || group_barrier_needed (insn)
9140 || (mflag_sched_stop_bits_after_every_cycle
9141 && GET_MODE (insn) == TImode
9142 && seen_good_insn))
9143 {
9144 if (TARGET_EARLY_STOP_BITS)
9145 {
9146 rtx last;
9147
9148 for (last = insn;
9149 last != current_sched_info->prev_head;
9150 last = PREV_INSN (last))
9151 if (INSN_P (last) && GET_MODE (last) == TImode
9152 && stops_p [INSN_UID (last)])
9153 break;
9154 if (last == current_sched_info->prev_head)
9155 last = insn;
9156 last = prev_active_insn (last);
9157 if (last
9158 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9159 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9160 last);
9161 init_insn_group_barriers ();
9162 for (last = NEXT_INSN (last);
9163 last != insn;
9164 last = NEXT_INSN (last))
9165 if (INSN_P (last))
9166 {
9167 group_barrier_needed (last);
9168 if (recog_memoized (last) >= 0
9169 && important_for_bundling_p (last))
9170 seen_good_insn = 1;
9171 }
9172 }
9173 else
9174 {
9175 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9176 insn);
9177 init_insn_group_barriers ();
9178 seen_good_insn = 0;
9179 }
9180 group_barrier_needed (insn);
9181 if (recog_memoized (insn) >= 0
9182 && important_for_bundling_p (insn))
9183 seen_good_insn = 1;
9184 }
9185 else if (recog_memoized (insn) >= 0
9186 && important_for_bundling_p (insn))
9187 seen_good_insn = 1;
9188 need_barrier_p = (GET_CODE (insn) == CALL_INSN
9189 || GET_CODE (PATTERN (insn)) == ASM_INPUT
9190 || asm_noperands (PATTERN (insn)) >= 0);
9191 }
9192 }
9193 }
9194
9195 \f
9196
9197 /* If the following function returns TRUE, we will use the DFA
9198 insn scheduler. */
9199
9200 static int
9201 ia64_first_cycle_multipass_dfa_lookahead (void)
9202 {
9203 return (reload_completed ? 6 : 4);
9204 }
9205
9206 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9207
9208 static void
9209 ia64_init_dfa_pre_cycle_insn (void)
9210 {
9211 if (temp_dfa_state == NULL)
9212 {
9213 dfa_state_size = state_size ();
9214 temp_dfa_state = xmalloc (dfa_state_size);
9215 prev_cycle_state = xmalloc (dfa_state_size);
9216 }
9217 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9218 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9219 recog_memoized (dfa_pre_cycle_insn);
9220 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9221 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9222 recog_memoized (dfa_stop_insn);
9223 }
9224
9225 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9226 used by the DFA insn scheduler. */
9227
9228 static rtx
9229 ia64_dfa_pre_cycle_insn (void)
9230 {
9231 return dfa_pre_cycle_insn;
9232 }
9233
9234 /* The following function returns TRUE if PRODUCER (of type ilog or
9235 ld) produces address for CONSUMER (of type st or stf). */
9236
9237 int
9238 ia64_st_address_bypass_p (rtx producer, rtx consumer)
9239 {
9240 rtx dest, reg, mem;
9241
9242 gcc_assert (producer && consumer);
9243 dest = ia64_single_set (producer);
9244 gcc_assert (dest);
9245 reg = SET_DEST (dest);
9246 gcc_assert (reg);
9247 if (GET_CODE (reg) == SUBREG)
9248 reg = SUBREG_REG (reg);
9249 gcc_assert (GET_CODE (reg) == REG);
9250
9251 dest = ia64_single_set (consumer);
9252 gcc_assert (dest);
9253 mem = SET_DEST (dest);
9254 gcc_assert (mem && GET_CODE (mem) == MEM);
9255 return reg_mentioned_p (reg, mem);
9256 }
9257
9258 /* The following function returns TRUE if PRODUCER (of type ilog or
9259 ld) produces address for CONSUMER (of type ld or fld). */
9260
9261 int
9262 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9263 {
9264 rtx dest, src, reg, mem;
9265
9266 gcc_assert (producer && consumer);
9267 dest = ia64_single_set (producer);
9268 gcc_assert (dest);
9269 reg = SET_DEST (dest);
9270 gcc_assert (reg);
9271 if (GET_CODE (reg) == SUBREG)
9272 reg = SUBREG_REG (reg);
9273 gcc_assert (GET_CODE (reg) == REG);
9274
9275 src = ia64_single_set (consumer);
9276 gcc_assert (src);
9277 mem = SET_SRC (src);
9278 gcc_assert (mem);
9279
9280 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9281 mem = XVECEXP (mem, 0, 0);
9282 else if (GET_CODE (mem) == IF_THEN_ELSE)
9283 /* ??? Is this bypass necessary for ld.c? */
9284 {
9285 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9286 mem = XEXP (mem, 1);
9287 }
9288
9289 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9290 mem = XEXP (mem, 0);
9291
9292 if (GET_CODE (mem) == UNSPEC)
9293 {
9294 int c = XINT (mem, 1);
9295
9296 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9297 || c == UNSPEC_LDSA);
9298 mem = XVECEXP (mem, 0, 0);
9299 }
9300
9301 /* Note that LO_SUM is used for GOT loads. */
9302 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9303
9304 return reg_mentioned_p (reg, mem);
9305 }
9306
9307 /* The following function returns TRUE if INSN produces address for a
9308 load/store insn. We will place such insns into M slot because it
9309 decreases its latency time. */
9310
9311 int
9312 ia64_produce_address_p (rtx insn)
9313 {
9314 return insn->call;
9315 }
9316
9317 \f
9318 /* Emit pseudo-ops for the assembler to describe predicate relations.
9319 At present this assumes that we only consider predicate pairs to
9320 be mutex, and that the assembler can deduce proper values from
9321 straight-line code. */
9322
9323 static void
9324 emit_predicate_relation_info (void)
9325 {
9326 basic_block bb;
9327
9328 FOR_EACH_BB_REVERSE (bb)
9329 {
9330 int r;
9331 rtx head = BB_HEAD (bb);
9332
9333 /* We only need such notes at code labels. */
9334 if (GET_CODE (head) != CODE_LABEL)
9335 continue;
9336 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9337 head = NEXT_INSN (head);
9338
9339 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9340 grabbing the entire block of predicate registers. */
9341 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9342 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9343 {
9344 rtx p = gen_rtx_REG (BImode, r);
9345 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9346 if (head == BB_END (bb))
9347 BB_END (bb) = n;
9348 head = n;
9349 }
9350 }
9351
9352 /* Look for conditional calls that do not return, and protect predicate
9353 relations around them. Otherwise the assembler will assume the call
9354 returns, and complain about uses of call-clobbered predicates after
9355 the call. */
9356 FOR_EACH_BB_REVERSE (bb)
9357 {
9358 rtx insn = BB_HEAD (bb);
9359
9360 while (1)
9361 {
9362 if (GET_CODE (insn) == CALL_INSN
9363 && GET_CODE (PATTERN (insn)) == COND_EXEC
9364 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9365 {
9366 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9367 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9368 if (BB_HEAD (bb) == insn)
9369 BB_HEAD (bb) = b;
9370 if (BB_END (bb) == insn)
9371 BB_END (bb) = a;
9372 }
9373
9374 if (insn == BB_END (bb))
9375 break;
9376 insn = NEXT_INSN (insn);
9377 }
9378 }
9379 }
9380
9381 /* Perform machine dependent operations on the rtl chain INSNS. */
9382
9383 static void
9384 ia64_reorg (void)
9385 {
9386 /* We are freeing block_for_insn in the toplev to keep compatibility
9387 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9388 compute_bb_for_insn ();
9389
9390 /* If optimizing, we'll have split before scheduling. */
9391 if (optimize == 0)
9392 split_all_insns ();
9393
9394 if (optimize && flag_schedule_insns_after_reload
9395 && dbg_cnt (ia64_sched2))
9396 {
9397 basic_block bb;
9398 timevar_push (TV_SCHED2);
9399 ia64_final_schedule = 1;
9400
9401 /* We can't let modulo-sched prevent us from scheduling any bbs,
9402 since we need the final schedule to produce bundle information. */
9403 FOR_EACH_BB (bb)
9404 bb->flags &= ~BB_DISABLE_SCHEDULE;
9405
9406 initiate_bundle_states ();
9407 ia64_nop = make_insn_raw (gen_nop ());
9408 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9409 recog_memoized (ia64_nop);
9410 clocks_length = get_max_uid () + 1;
9411 stops_p = XCNEWVEC (char, clocks_length);
9412
9413 if (ia64_tune == PROCESSOR_ITANIUM2)
9414 {
9415 pos_1 = get_cpu_unit_code ("2_1");
9416 pos_2 = get_cpu_unit_code ("2_2");
9417 pos_3 = get_cpu_unit_code ("2_3");
9418 pos_4 = get_cpu_unit_code ("2_4");
9419 pos_5 = get_cpu_unit_code ("2_5");
9420 pos_6 = get_cpu_unit_code ("2_6");
9421 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9422 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9423 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9424 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9425 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9426 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9427 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9428 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9429 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9430 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9431 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9432 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9433 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9434 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9435 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9436 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9437 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9438 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9439 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9440 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9441 }
9442 else
9443 {
9444 pos_1 = get_cpu_unit_code ("1_1");
9445 pos_2 = get_cpu_unit_code ("1_2");
9446 pos_3 = get_cpu_unit_code ("1_3");
9447 pos_4 = get_cpu_unit_code ("1_4");
9448 pos_5 = get_cpu_unit_code ("1_5");
9449 pos_6 = get_cpu_unit_code ("1_6");
9450 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9451 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9452 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9453 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9454 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9455 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9456 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9457 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9458 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9459 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9460 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9461 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9462 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9463 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9464 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9465 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9466 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9467 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9468 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9469 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9470 }
9471
9472 if (flag_selective_scheduling2
9473 && !maybe_skip_selective_scheduling ())
9474 run_selective_scheduling ();
9475 else
9476 schedule_ebbs ();
9477
9478 /* Redo alignment computation, as it might gone wrong. */
9479 compute_alignments ();
9480
9481 /* We cannot reuse this one because it has been corrupted by the
9482 evil glat. */
9483 finish_bundle_states ();
9484 free (stops_p);
9485 stops_p = NULL;
9486 emit_insn_group_barriers (dump_file);
9487
9488 ia64_final_schedule = 0;
9489 timevar_pop (TV_SCHED2);
9490 }
9491 else
9492 emit_all_insn_group_barriers (dump_file);
9493
9494 df_analyze ();
9495
9496 /* A call must not be the last instruction in a function, so that the
9497 return address is still within the function, so that unwinding works
9498 properly. Note that IA-64 differs from dwarf2 on this point. */
9499 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9500 {
9501 rtx insn;
9502 int saw_stop = 0;
9503
9504 insn = get_last_insn ();
9505 if (! INSN_P (insn))
9506 insn = prev_active_insn (insn);
9507 if (insn)
9508 {
9509 /* Skip over insns that expand to nothing. */
9510 while (GET_CODE (insn) == INSN
9511 && get_attr_empty (insn) == EMPTY_YES)
9512 {
9513 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9514 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9515 saw_stop = 1;
9516 insn = prev_active_insn (insn);
9517 }
9518 if (GET_CODE (insn) == CALL_INSN)
9519 {
9520 if (! saw_stop)
9521 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9522 emit_insn (gen_break_f ());
9523 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9524 }
9525 }
9526 }
9527
9528 emit_predicate_relation_info ();
9529
9530 if (flag_var_tracking)
9531 {
9532 timevar_push (TV_VAR_TRACKING);
9533 variable_tracking_main ();
9534 timevar_pop (TV_VAR_TRACKING);
9535 }
9536 df_finish_pass (false);
9537 }
9538 \f
9539 /* Return true if REGNO is used by the epilogue. */
9540
9541 int
9542 ia64_epilogue_uses (int regno)
9543 {
9544 switch (regno)
9545 {
9546 case R_GR (1):
9547 /* With a call to a function in another module, we will write a new
9548 value to "gp". After returning from such a call, we need to make
9549 sure the function restores the original gp-value, even if the
9550 function itself does not use the gp anymore. */
9551 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9552
9553 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9554 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9555 /* For functions defined with the syscall_linkage attribute, all
9556 input registers are marked as live at all function exits. This
9557 prevents the register allocator from using the input registers,
9558 which in turn makes it possible to restart a system call after
9559 an interrupt without having to save/restore the input registers.
9560 This also prevents kernel data from leaking to application code. */
9561 return lookup_attribute ("syscall_linkage",
9562 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9563
9564 case R_BR (0):
9565 /* Conditional return patterns can't represent the use of `b0' as
9566 the return address, so we force the value live this way. */
9567 return 1;
9568
9569 case AR_PFS_REGNUM:
9570 /* Likewise for ar.pfs, which is used by br.ret. */
9571 return 1;
9572
9573 default:
9574 return 0;
9575 }
9576 }
9577
9578 /* Return true if REGNO is used by the frame unwinder. */
9579
9580 int
9581 ia64_eh_uses (int regno)
9582 {
9583 unsigned int r;
9584
9585 if (! reload_completed)
9586 return 0;
9587
9588 if (regno == 0)
9589 return 0;
9590
9591 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9592 if (regno == current_frame_info.r[r]
9593 || regno == emitted_frame_related_regs[r])
9594 return 1;
9595
9596 return 0;
9597 }
9598 \f
9599 /* Return true if this goes in small data/bss. */
9600
9601 /* ??? We could also support own long data here. Generating movl/add/ld8
9602 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9603 code faster because there is one less load. This also includes incomplete
9604 types which can't go in sdata/sbss. */
9605
9606 static bool
9607 ia64_in_small_data_p (const_tree exp)
9608 {
9609 if (TARGET_NO_SDATA)
9610 return false;
9611
9612 /* We want to merge strings, so we never consider them small data. */
9613 if (TREE_CODE (exp) == STRING_CST)
9614 return false;
9615
9616 /* Functions are never small data. */
9617 if (TREE_CODE (exp) == FUNCTION_DECL)
9618 return false;
9619
9620 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9621 {
9622 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9623
9624 if (strcmp (section, ".sdata") == 0
9625 || strncmp (section, ".sdata.", 7) == 0
9626 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9627 || strcmp (section, ".sbss") == 0
9628 || strncmp (section, ".sbss.", 6) == 0
9629 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9630 return true;
9631 }
9632 else
9633 {
9634 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9635
9636 /* If this is an incomplete type with size 0, then we can't put it
9637 in sdata because it might be too big when completed. */
9638 if (size > 0 && size <= ia64_section_threshold)
9639 return true;
9640 }
9641
9642 return false;
9643 }
9644 \f
9645 /* Output assembly directives for prologue regions. */
9646
9647 /* The current basic block number. */
9648
9649 static bool last_block;
9650
9651 /* True if we need a copy_state command at the start of the next block. */
9652
9653 static bool need_copy_state;
9654
9655 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9656 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9657 #endif
9658
9659 /* Emit a debugging label after a call-frame-related insn. We'd
9660 rather output the label right away, but we'd have to output it
9661 after, not before, the instruction, and the instruction has not
9662 been output yet. So we emit the label after the insn, delete it to
9663 avoid introducing basic blocks, and mark it as preserved, such that
9664 it is still output, given that it is referenced in debug info. */
9665
9666 static const char *
9667 ia64_emit_deleted_label_after_insn (rtx insn)
9668 {
9669 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9670 rtx lb = gen_label_rtx ();
9671 rtx label_insn = emit_label_after (lb, insn);
9672
9673 LABEL_PRESERVE_P (lb) = 1;
9674
9675 delete_insn (label_insn);
9676
9677 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9678
9679 return xstrdup (label);
9680 }
9681
9682 /* Define the CFA after INSN with the steady-state definition. */
9683
9684 static void
9685 ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
9686 {
9687 rtx fp = frame_pointer_needed
9688 ? hard_frame_pointer_rtx
9689 : stack_pointer_rtx;
9690 const char *label = ia64_emit_deleted_label_after_insn (insn);
9691
9692 if (!frame)
9693 return;
9694
9695 dwarf2out_def_cfa
9696 (label, REGNO (fp),
9697 ia64_initial_elimination_offset
9698 (REGNO (arg_pointer_rtx), REGNO (fp))
9699 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9700 }
9701
9702 /* All we need to do here is avoid a crash in the generic dwarf2
9703 processing. The real CFA definition is set up above. */
9704
9705 static void
9706 ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label),
9707 rtx ARG_UNUSED (pattern),
9708 int index)
9709 {
9710 gcc_assert (index == UNSPECV_ALLOC);
9711 }
9712
9713 /* The generic dwarf2 frame debug info generator does not define a
9714 separate region for the very end of the epilogue, so refrain from
9715 doing so in the IA64-specific code as well. */
9716
9717 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9718
9719 /* The function emits unwind directives for the start of an epilogue. */
9720
9721 static void
9722 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9723 {
9724 /* If this isn't the last block of the function, then we need to label the
9725 current state, and copy it back in at the start of the next block. */
9726
9727 if (!last_block)
9728 {
9729 if (unwind)
9730 fprintf (asm_out_file, "\t.label_state %d\n",
9731 ++cfun->machine->state_num);
9732 need_copy_state = true;
9733 }
9734
9735 if (unwind)
9736 fprintf (asm_out_file, "\t.restore sp\n");
9737 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9738 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9739 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9740 }
9741
9742 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9743
9744 static void
9745 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9746 bool unwind, bool frame)
9747 {
9748 rtx dest = SET_DEST (pat);
9749 rtx src = SET_SRC (pat);
9750
9751 if (dest == stack_pointer_rtx)
9752 {
9753 if (GET_CODE (src) == PLUS)
9754 {
9755 rtx op0 = XEXP (src, 0);
9756 rtx op1 = XEXP (src, 1);
9757
9758 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9759
9760 if (INTVAL (op1) < 0)
9761 {
9762 gcc_assert (!frame_pointer_needed);
9763 if (unwind)
9764 fprintf (asm_out_file,
9765 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9766 -INTVAL (op1));
9767 ia64_dwarf2out_def_steady_cfa (insn, frame);
9768 }
9769 else
9770 process_epilogue (asm_out_file, insn, unwind, frame);
9771 }
9772 else
9773 {
9774 gcc_assert (src == hard_frame_pointer_rtx);
9775 process_epilogue (asm_out_file, insn, unwind, frame);
9776 }
9777 }
9778 else if (dest == hard_frame_pointer_rtx)
9779 {
9780 gcc_assert (src == stack_pointer_rtx);
9781 gcc_assert (frame_pointer_needed);
9782
9783 if (unwind)
9784 fprintf (asm_out_file, "\t.vframe r%d\n",
9785 ia64_dbx_register_number (REGNO (dest)));
9786 ia64_dwarf2out_def_steady_cfa (insn, frame);
9787 }
9788 else
9789 gcc_unreachable ();
9790 }
9791
9792 /* This function processes a SET pattern for REG_CFA_REGISTER. */
9793
9794 static void
9795 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
9796 {
9797 rtx dest = SET_DEST (pat);
9798 rtx src = SET_SRC (pat);
9799
9800 int dest_regno = REGNO (dest);
9801 int src_regno = REGNO (src);
9802
9803 switch (src_regno)
9804 {
9805 case BR_REG (0):
9806 /* Saving return address pointer. */
9807 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9808 if (unwind)
9809 fprintf (asm_out_file, "\t.save rp, r%d\n",
9810 ia64_dbx_register_number (dest_regno));
9811 break;
9812
9813 case PR_REG (0):
9814 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9815 if (unwind)
9816 fprintf (asm_out_file, "\t.save pr, r%d\n",
9817 ia64_dbx_register_number (dest_regno));
9818 break;
9819
9820 case AR_UNAT_REGNUM:
9821 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9822 if (unwind)
9823 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9824 ia64_dbx_register_number (dest_regno));
9825 break;
9826
9827 case AR_LC_REGNUM:
9828 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9829 if (unwind)
9830 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9831 ia64_dbx_register_number (dest_regno));
9832 break;
9833
9834 default:
9835 /* Everything else should indicate being stored to memory. */
9836 gcc_unreachable ();
9837 }
9838 }
9839
9840 /* This function processes a SET pattern for REG_CFA_OFFSET. */
9841
9842 static void
9843 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
9844 {
9845 rtx dest = SET_DEST (pat);
9846 rtx src = SET_SRC (pat);
9847 int src_regno = REGNO (src);
9848 const char *saveop;
9849 HOST_WIDE_INT off;
9850 rtx base;
9851
9852 gcc_assert (MEM_P (dest));
9853 if (GET_CODE (XEXP (dest, 0)) == REG)
9854 {
9855 base = XEXP (dest, 0);
9856 off = 0;
9857 }
9858 else
9859 {
9860 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9861 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9862 base = XEXP (XEXP (dest, 0), 0);
9863 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9864 }
9865
9866 if (base == hard_frame_pointer_rtx)
9867 {
9868 saveop = ".savepsp";
9869 off = - off;
9870 }
9871 else
9872 {
9873 gcc_assert (base == stack_pointer_rtx);
9874 saveop = ".savesp";
9875 }
9876
9877 src_regno = REGNO (src);
9878 switch (src_regno)
9879 {
9880 case BR_REG (0):
9881 gcc_assert (!current_frame_info.r[reg_save_b0]);
9882 if (unwind)
9883 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
9884 saveop, off);
9885 break;
9886
9887 case PR_REG (0):
9888 gcc_assert (!current_frame_info.r[reg_save_pr]);
9889 if (unwind)
9890 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
9891 saveop, off);
9892 break;
9893
9894 case AR_LC_REGNUM:
9895 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9896 if (unwind)
9897 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
9898 saveop, off);
9899 break;
9900
9901 case AR_PFS_REGNUM:
9902 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9903 if (unwind)
9904 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
9905 saveop, off);
9906 break;
9907
9908 case AR_UNAT_REGNUM:
9909 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9910 if (unwind)
9911 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
9912 saveop, off);
9913 break;
9914
9915 case GR_REG (4):
9916 case GR_REG (5):
9917 case GR_REG (6):
9918 case GR_REG (7):
9919 if (unwind)
9920 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9921 1 << (src_regno - GR_REG (4)));
9922 break;
9923
9924 case BR_REG (1):
9925 case BR_REG (2):
9926 case BR_REG (3):
9927 case BR_REG (4):
9928 case BR_REG (5):
9929 if (unwind)
9930 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9931 1 << (src_regno - BR_REG (1)));
9932 break;
9933
9934 case FR_REG (2):
9935 case FR_REG (3):
9936 case FR_REG (4):
9937 case FR_REG (5):
9938 if (unwind)
9939 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9940 1 << (src_regno - FR_REG (2)));
9941 break;
9942
9943 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9944 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9945 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9946 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9947 if (unwind)
9948 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9949 1 << (src_regno - FR_REG (12)));
9950 break;
9951
9952 default:
9953 /* ??? For some reason we mark other general registers, even those
9954 we can't represent in the unwind info. Ignore them. */
9955 break;
9956 }
9957 }
9958
9959 /* This function looks at a single insn and emits any directives
9960 required to unwind this insn. */
9961
9962 static void
9963 ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
9964 {
9965 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
9966 bool frame = dwarf2out_do_frame ();
9967 rtx note, pat;
9968 bool handled_one;
9969
9970 if (!unwind && !frame)
9971 return;
9972
9973 if (NOTE_INSN_BASIC_BLOCK_P (insn))
9974 {
9975 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9976
9977 /* Restore unwind state from immediately before the epilogue. */
9978 if (need_copy_state)
9979 {
9980 if (unwind)
9981 {
9982 fprintf (asm_out_file, "\t.body\n");
9983 fprintf (asm_out_file, "\t.copy_state %d\n",
9984 cfun->machine->state_num);
9985 }
9986 if (IA64_CHANGE_CFA_IN_EPILOGUE)
9987 ia64_dwarf2out_def_steady_cfa (insn, frame);
9988 need_copy_state = false;
9989 }
9990 }
9991
9992 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9993 return;
9994
9995 /* Look for the ALLOC insn. */
9996 if (INSN_CODE (insn) == CODE_FOR_alloc)
9997 {
9998 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
9999 int dest_regno = REGNO (dest);
10000
10001 /* If this is the final destination for ar.pfs, then this must
10002 be the alloc in the prologue. */
10003 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10004 {
10005 if (unwind)
10006 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10007 ia64_dbx_register_number (dest_regno));
10008 }
10009 else
10010 {
10011 /* This must be an alloc before a sibcall. We must drop the
10012 old frame info. The easiest way to drop the old frame
10013 info is to ensure we had a ".restore sp" directive
10014 followed by a new prologue. If the procedure doesn't
10015 have a memory-stack frame, we'll issue a dummy ".restore
10016 sp" now. */
10017 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10018 /* if haven't done process_epilogue() yet, do it now */
10019 process_epilogue (asm_out_file, insn, unwind, frame);
10020 if (unwind)
10021 fprintf (asm_out_file, "\t.prologue\n");
10022 }
10023 return;
10024 }
10025
10026 handled_one = false;
10027 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10028 switch (REG_NOTE_KIND (note))
10029 {
10030 case REG_CFA_ADJUST_CFA:
10031 pat = XEXP (note, 0);
10032 if (pat == NULL)
10033 pat = PATTERN (insn);
10034 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10035 handled_one = true;
10036 break;
10037
10038 case REG_CFA_OFFSET:
10039 pat = XEXP (note, 0);
10040 if (pat == NULL)
10041 pat = PATTERN (insn);
10042 process_cfa_offset (asm_out_file, pat, unwind);
10043 handled_one = true;
10044 break;
10045
10046 case REG_CFA_REGISTER:
10047 pat = XEXP (note, 0);
10048 if (pat == NULL)
10049 pat = PATTERN (insn);
10050 process_cfa_register (asm_out_file, pat, unwind);
10051 handled_one = true;
10052 break;
10053
10054 case REG_FRAME_RELATED_EXPR:
10055 case REG_CFA_DEF_CFA:
10056 case REG_CFA_EXPRESSION:
10057 case REG_CFA_RESTORE:
10058 case REG_CFA_SET_VDRAP:
10059 /* Not used in the ia64 port. */
10060 gcc_unreachable ();
10061
10062 default:
10063 /* Not a frame-related note. */
10064 break;
10065 }
10066
10067 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10068 explicit action to take. No guessing required. */
10069 gcc_assert (handled_one);
10070 }
10071
10072 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10073
10074 static void
10075 ia64_asm_emit_except_personality (rtx personality)
10076 {
10077 fputs ("\t.personality\t", asm_out_file);
10078 output_addr_const (asm_out_file, personality);
10079 fputc ('\n', asm_out_file);
10080 }
10081
10082 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10083
10084 static void
10085 ia64_asm_init_sections (void)
10086 {
10087 exception_section = get_unnamed_section (0, output_section_asm_op,
10088 "\t.handlerdata");
10089 }
10090
10091 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10092
10093 static enum unwind_info_type
10094 ia64_debug_unwind_info (void)
10095 {
10096 return UI_TARGET;
10097 }
10098
10099 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
10100
10101 static enum unwind_info_type
10102 ia64_except_unwind_info (struct gcc_options *opts)
10103 {
10104 /* Honor the --enable-sjlj-exceptions configure switch. */
10105 #ifdef CONFIG_UNWIND_EXCEPTIONS
10106 if (CONFIG_UNWIND_EXCEPTIONS)
10107 return UI_SJLJ;
10108 #endif
10109
10110 /* For simplicity elsewhere in this file, indicate that all unwind
10111 info is disabled if we're not emitting unwind tables. */
10112 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
10113 return UI_NONE;
10114
10115 return UI_TARGET;
10116 }
10117 \f
10118 enum ia64_builtins
10119 {
10120 IA64_BUILTIN_BSP,
10121 IA64_BUILTIN_COPYSIGNQ,
10122 IA64_BUILTIN_FABSQ,
10123 IA64_BUILTIN_FLUSHRS,
10124 IA64_BUILTIN_INFQ,
10125 IA64_BUILTIN_HUGE_VALQ,
10126 IA64_BUILTIN_max
10127 };
10128
10129 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10130
10131 void
10132 ia64_init_builtins (void)
10133 {
10134 tree fpreg_type;
10135 tree float80_type;
10136 tree decl;
10137
10138 /* The __fpreg type. */
10139 fpreg_type = make_node (REAL_TYPE);
10140 TYPE_PRECISION (fpreg_type) = 82;
10141 layout_type (fpreg_type);
10142 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10143
10144 /* The __float80 type. */
10145 float80_type = make_node (REAL_TYPE);
10146 TYPE_PRECISION (float80_type) = 80;
10147 layout_type (float80_type);
10148 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10149
10150 /* The __float128 type. */
10151 if (!TARGET_HPUX)
10152 {
10153 tree ftype;
10154 tree float128_type = make_node (REAL_TYPE);
10155
10156 TYPE_PRECISION (float128_type) = 128;
10157 layout_type (float128_type);
10158 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10159
10160 /* TFmode support builtins. */
10161 ftype = build_function_type_list (float128_type, NULL_TREE);
10162 decl = add_builtin_function ("__builtin_infq", ftype,
10163 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10164 NULL, NULL_TREE);
10165 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10166
10167 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10168 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10169 NULL, NULL_TREE);
10170 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10171
10172 ftype = build_function_type_list (float128_type,
10173 float128_type,
10174 NULL_TREE);
10175 decl = add_builtin_function ("__builtin_fabsq", ftype,
10176 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10177 "__fabstf2", NULL_TREE);
10178 TREE_READONLY (decl) = 1;
10179 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10180
10181 ftype = build_function_type_list (float128_type,
10182 float128_type,
10183 float128_type,
10184 NULL_TREE);
10185 decl = add_builtin_function ("__builtin_copysignq", ftype,
10186 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10187 "__copysigntf3", NULL_TREE);
10188 TREE_READONLY (decl) = 1;
10189 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10190 }
10191 else
10192 /* Under HPUX, this is a synonym for "long double". */
10193 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10194 "__float128");
10195
10196 /* Fwrite on VMS is non-standard. */
10197 if (TARGET_ABI_OPEN_VMS)
10198 {
10199 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
10200 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
10201 }
10202
10203 #define def_builtin(name, type, code) \
10204 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10205 NULL, NULL_TREE)
10206
10207 decl = def_builtin ("__builtin_ia64_bsp",
10208 build_function_type_list (ptr_type_node, NULL_TREE),
10209 IA64_BUILTIN_BSP);
10210 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10211
10212 decl = def_builtin ("__builtin_ia64_flushrs",
10213 build_function_type_list (void_type_node, NULL_TREE),
10214 IA64_BUILTIN_FLUSHRS);
10215 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10216
10217 #undef def_builtin
10218
10219 if (TARGET_HPUX)
10220 {
10221 if (built_in_decls [BUILT_IN_FINITE])
10222 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
10223 "_Isfinite");
10224 if (built_in_decls [BUILT_IN_FINITEF])
10225 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
10226 "_Isfinitef");
10227 if (built_in_decls [BUILT_IN_FINITEL])
10228 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
10229 "_Isfinitef128");
10230 }
10231 }
10232
10233 rtx
10234 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10235 enum machine_mode mode ATTRIBUTE_UNUSED,
10236 int ignore ATTRIBUTE_UNUSED)
10237 {
10238 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10239 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10240
10241 switch (fcode)
10242 {
10243 case IA64_BUILTIN_BSP:
10244 if (! target || ! register_operand (target, DImode))
10245 target = gen_reg_rtx (DImode);
10246 emit_insn (gen_bsp_value (target));
10247 #ifdef POINTERS_EXTEND_UNSIGNED
10248 target = convert_memory_address (ptr_mode, target);
10249 #endif
10250 return target;
10251
10252 case IA64_BUILTIN_FLUSHRS:
10253 emit_insn (gen_flushrs ());
10254 return const0_rtx;
10255
10256 case IA64_BUILTIN_INFQ:
10257 case IA64_BUILTIN_HUGE_VALQ:
10258 {
10259 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10260 REAL_VALUE_TYPE inf;
10261 rtx tmp;
10262
10263 real_inf (&inf);
10264 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10265
10266 tmp = validize_mem (force_const_mem (target_mode, tmp));
10267
10268 if (target == 0)
10269 target = gen_reg_rtx (target_mode);
10270
10271 emit_move_insn (target, tmp);
10272 return target;
10273 }
10274
10275 case IA64_BUILTIN_FABSQ:
10276 case IA64_BUILTIN_COPYSIGNQ:
10277 return expand_call (exp, target, ignore);
10278
10279 default:
10280 gcc_unreachable ();
10281 }
10282
10283 return NULL_RTX;
10284 }
10285
10286 /* Return the ia64 builtin for CODE. */
10287
10288 static tree
10289 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10290 {
10291 if (code >= IA64_BUILTIN_max)
10292 return error_mark_node;
10293
10294 return ia64_builtins[code];
10295 }
10296
10297 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10298 most significant bits of the stack slot. */
10299
10300 enum direction
10301 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
10302 {
10303 /* Exception to normal case for structures/unions/etc. */
10304
10305 if (type && AGGREGATE_TYPE_P (type)
10306 && int_size_in_bytes (type) < UNITS_PER_WORD)
10307 return upward;
10308
10309 /* Fall back to the default. */
10310 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10311 }
10312
10313 /* Emit text to declare externally defined variables and functions, because
10314 the Intel assembler does not support undefined externals. */
10315
10316 void
10317 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10318 {
10319 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10320 set in order to avoid putting out names that are never really
10321 used. */
10322 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10323 {
10324 /* maybe_assemble_visibility will return 1 if the assembler
10325 visibility directive is output. */
10326 int need_visibility = ((*targetm.binds_local_p) (decl)
10327 && maybe_assemble_visibility (decl));
10328
10329 #ifdef DO_CRTL_NAMES
10330 DO_CRTL_NAMES;
10331 #endif
10332
10333 /* GNU as does not need anything here, but the HP linker does
10334 need something for external functions. */
10335 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10336 && TREE_CODE (decl) == FUNCTION_DECL)
10337 (*targetm.asm_out.globalize_decl_name) (file, decl);
10338 else if (need_visibility && !TARGET_GNU_AS)
10339 (*targetm.asm_out.globalize_label) (file, name);
10340 }
10341 }
10342
10343 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10344 modes of word_mode and larger. Rename the TFmode libfuncs using the
10345 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10346 backward compatibility. */
10347
10348 static void
10349 ia64_init_libfuncs (void)
10350 {
10351 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10352 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10353 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10354 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10355
10356 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10357 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10358 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10359 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10360 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10361
10362 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10363 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10364 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10365 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10366 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10367 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10368
10369 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10370 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10371 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10372 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10373 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10374
10375 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10376 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10377 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10378 /* HP-UX 11.23 libc does not have a function for unsigned
10379 SImode-to-TFmode conversion. */
10380 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10381 }
10382
10383 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10384
10385 static void
10386 ia64_hpux_init_libfuncs (void)
10387 {
10388 ia64_init_libfuncs ();
10389
10390 /* The HP SI millicode division and mod functions expect DI arguments.
10391 By turning them off completely we avoid using both libgcc and the
10392 non-standard millicode routines and use the HP DI millicode routines
10393 instead. */
10394
10395 set_optab_libfunc (sdiv_optab, SImode, 0);
10396 set_optab_libfunc (udiv_optab, SImode, 0);
10397 set_optab_libfunc (smod_optab, SImode, 0);
10398 set_optab_libfunc (umod_optab, SImode, 0);
10399
10400 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10401 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10402 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10403 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10404
10405 /* HP-UX libc has TF min/max/abs routines in it. */
10406 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10407 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10408 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10409
10410 /* ia64_expand_compare uses this. */
10411 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10412
10413 /* These should never be used. */
10414 set_optab_libfunc (eq_optab, TFmode, 0);
10415 set_optab_libfunc (ne_optab, TFmode, 0);
10416 set_optab_libfunc (gt_optab, TFmode, 0);
10417 set_optab_libfunc (ge_optab, TFmode, 0);
10418 set_optab_libfunc (lt_optab, TFmode, 0);
10419 set_optab_libfunc (le_optab, TFmode, 0);
10420 }
10421
10422 /* Rename the division and modulus functions in VMS. */
10423
10424 static void
10425 ia64_vms_init_libfuncs (void)
10426 {
10427 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10428 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10429 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10430 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10431 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10432 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10433 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10434 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10435 abort_libfunc = init_one_libfunc ("decc$abort");
10436 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10437 #ifdef MEM_LIBFUNCS_INIT
10438 MEM_LIBFUNCS_INIT;
10439 #endif
10440 }
10441
10442 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10443 the HPUX conventions. */
10444
10445 static void
10446 ia64_sysv4_init_libfuncs (void)
10447 {
10448 ia64_init_libfuncs ();
10449
10450 /* These functions are not part of the HPUX TFmode interface. We
10451 use them instead of _U_Qfcmp, which doesn't work the way we
10452 expect. */
10453 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10454 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10455 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10456 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10457 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10458 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10459
10460 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10461 glibc doesn't have them. */
10462 }
10463
10464 /* Use soft-fp. */
10465
10466 static void
10467 ia64_soft_fp_init_libfuncs (void)
10468 {
10469 }
10470
10471 static bool
10472 ia64_vms_valid_pointer_mode (enum machine_mode mode)
10473 {
10474 return (mode == SImode || mode == DImode);
10475 }
10476 \f
10477 /* For HPUX, it is illegal to have relocations in shared segments. */
10478
10479 static int
10480 ia64_hpux_reloc_rw_mask (void)
10481 {
10482 return 3;
10483 }
10484
10485 /* For others, relax this so that relocations to local data goes in
10486 read-only segments, but we still cannot allow global relocations
10487 in read-only segments. */
10488
10489 static int
10490 ia64_reloc_rw_mask (void)
10491 {
10492 return flag_pic ? 3 : 2;
10493 }
10494
10495 /* Return the section to use for X. The only special thing we do here
10496 is to honor small data. */
10497
10498 static section *
10499 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10500 unsigned HOST_WIDE_INT align)
10501 {
10502 if (GET_MODE_SIZE (mode) > 0
10503 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10504 && !TARGET_NO_SDATA)
10505 return sdata_section;
10506 else
10507 return default_elf_select_rtx_section (mode, x, align);
10508 }
10509
10510 static unsigned int
10511 ia64_section_type_flags (tree decl, const char *name, int reloc)
10512 {
10513 unsigned int flags = 0;
10514
10515 if (strcmp (name, ".sdata") == 0
10516 || strncmp (name, ".sdata.", 7) == 0
10517 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10518 || strncmp (name, ".sdata2.", 8) == 0
10519 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10520 || strcmp (name, ".sbss") == 0
10521 || strncmp (name, ".sbss.", 6) == 0
10522 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10523 flags = SECTION_SMALL;
10524
10525 #if TARGET_ABI_OPEN_VMS
10526 if (decl && DECL_ATTRIBUTES (decl)
10527 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10528 flags |= SECTION_VMS_OVERLAY;
10529 #endif
10530
10531 flags |= default_section_type_flags (decl, name, reloc);
10532 return flags;
10533 }
10534
10535 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10536 structure type and that the address of that type should be passed
10537 in out0, rather than in r8. */
10538
10539 static bool
10540 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10541 {
10542 tree ret_type = TREE_TYPE (fntype);
10543
10544 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10545 as the structure return address parameter, if the return value
10546 type has a non-trivial copy constructor or destructor. It is not
10547 clear if this same convention should be used for other
10548 programming languages. Until G++ 3.4, we incorrectly used r8 for
10549 these return values. */
10550 return (abi_version_at_least (2)
10551 && ret_type
10552 && TYPE_MODE (ret_type) == BLKmode
10553 && TREE_ADDRESSABLE (ret_type)
10554 && strcmp (lang_hooks.name, "GNU C++") == 0);
10555 }
10556
10557 /* Output the assembler code for a thunk function. THUNK_DECL is the
10558 declaration for the thunk function itself, FUNCTION is the decl for
10559 the target function. DELTA is an immediate constant offset to be
10560 added to THIS. If VCALL_OFFSET is nonzero, the word at
10561 *(*this + vcall_offset) should be added to THIS. */
10562
10563 static void
10564 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10565 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10566 tree function)
10567 {
10568 rtx this_rtx, insn, funexp;
10569 unsigned int this_parmno;
10570 unsigned int this_regno;
10571 rtx delta_rtx;
10572
10573 reload_completed = 1;
10574 epilogue_completed = 1;
10575
10576 /* Set things up as ia64_expand_prologue might. */
10577 last_scratch_gr_reg = 15;
10578
10579 memset (&current_frame_info, 0, sizeof (current_frame_info));
10580 current_frame_info.spill_cfa_off = -16;
10581 current_frame_info.n_input_regs = 1;
10582 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10583
10584 /* Mark the end of the (empty) prologue. */
10585 emit_note (NOTE_INSN_PROLOGUE_END);
10586
10587 /* Figure out whether "this" will be the first parameter (the
10588 typical case) or the second parameter (as happens when the
10589 virtual function returns certain class objects). */
10590 this_parmno
10591 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10592 ? 1 : 0);
10593 this_regno = IN_REG (this_parmno);
10594 if (!TARGET_REG_NAMES)
10595 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10596
10597 this_rtx = gen_rtx_REG (Pmode, this_regno);
10598
10599 /* Apply the constant offset, if required. */
10600 delta_rtx = GEN_INT (delta);
10601 if (TARGET_ILP32)
10602 {
10603 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10604 REG_POINTER (tmp) = 1;
10605 if (delta && satisfies_constraint_I (delta_rtx))
10606 {
10607 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10608 delta = 0;
10609 }
10610 else
10611 emit_insn (gen_ptr_extend (this_rtx, tmp));
10612 }
10613 if (delta)
10614 {
10615 if (!satisfies_constraint_I (delta_rtx))
10616 {
10617 rtx tmp = gen_rtx_REG (Pmode, 2);
10618 emit_move_insn (tmp, delta_rtx);
10619 delta_rtx = tmp;
10620 }
10621 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10622 }
10623
10624 /* Apply the offset from the vtable, if required. */
10625 if (vcall_offset)
10626 {
10627 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10628 rtx tmp = gen_rtx_REG (Pmode, 2);
10629
10630 if (TARGET_ILP32)
10631 {
10632 rtx t = gen_rtx_REG (ptr_mode, 2);
10633 REG_POINTER (t) = 1;
10634 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10635 if (satisfies_constraint_I (vcall_offset_rtx))
10636 {
10637 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10638 vcall_offset = 0;
10639 }
10640 else
10641 emit_insn (gen_ptr_extend (tmp, t));
10642 }
10643 else
10644 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10645
10646 if (vcall_offset)
10647 {
10648 if (!satisfies_constraint_J (vcall_offset_rtx))
10649 {
10650 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10651 emit_move_insn (tmp2, vcall_offset_rtx);
10652 vcall_offset_rtx = tmp2;
10653 }
10654 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10655 }
10656
10657 if (TARGET_ILP32)
10658 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10659 else
10660 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10661
10662 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10663 }
10664
10665 /* Generate a tail call to the target function. */
10666 if (! TREE_USED (function))
10667 {
10668 assemble_external (function);
10669 TREE_USED (function) = 1;
10670 }
10671 funexp = XEXP (DECL_RTL (function), 0);
10672 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10673 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10674 insn = get_last_insn ();
10675 SIBLING_CALL_P (insn) = 1;
10676
10677 /* Code generation for calls relies on splitting. */
10678 reload_completed = 1;
10679 epilogue_completed = 1;
10680 try_split (PATTERN (insn), insn, 0);
10681
10682 emit_barrier ();
10683
10684 /* Run just enough of rest_of_compilation to get the insns emitted.
10685 There's not really enough bulk here to make other passes such as
10686 instruction scheduling worth while. Note that use_thunk calls
10687 assemble_start_function and assemble_end_function. */
10688
10689 insn_locators_alloc ();
10690 emit_all_insn_group_barriers (NULL);
10691 insn = get_insns ();
10692 shorten_branches (insn);
10693 final_start_function (insn, file, 1);
10694 final (insn, file, 1);
10695 final_end_function ();
10696
10697 reload_completed = 0;
10698 epilogue_completed = 0;
10699 }
10700
10701 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10702
10703 static rtx
10704 ia64_struct_value_rtx (tree fntype,
10705 int incoming ATTRIBUTE_UNUSED)
10706 {
10707 if (TARGET_ABI_OPEN_VMS ||
10708 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10709 return NULL_RTX;
10710 return gen_rtx_REG (Pmode, GR_REG (8));
10711 }
10712
10713 static bool
10714 ia64_scalar_mode_supported_p (enum machine_mode mode)
10715 {
10716 switch (mode)
10717 {
10718 case QImode:
10719 case HImode:
10720 case SImode:
10721 case DImode:
10722 case TImode:
10723 return true;
10724
10725 case SFmode:
10726 case DFmode:
10727 case XFmode:
10728 case RFmode:
10729 return true;
10730
10731 case TFmode:
10732 return true;
10733
10734 default:
10735 return false;
10736 }
10737 }
10738
10739 static bool
10740 ia64_vector_mode_supported_p (enum machine_mode mode)
10741 {
10742 switch (mode)
10743 {
10744 case V8QImode:
10745 case V4HImode:
10746 case V2SImode:
10747 return true;
10748
10749 case V2SFmode:
10750 return true;
10751
10752 default:
10753 return false;
10754 }
10755 }
10756
10757 /* Implement the FUNCTION_PROFILER macro. */
10758
10759 void
10760 ia64_output_function_profiler (FILE *file, int labelno)
10761 {
10762 bool indirect_call;
10763
10764 /* If the function needs a static chain and the static chain
10765 register is r15, we use an indirect call so as to bypass
10766 the PLT stub in case the executable is dynamically linked,
10767 because the stub clobbers r15 as per 5.3.6 of the psABI.
10768 We don't need to do that in non canonical PIC mode. */
10769
10770 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10771 {
10772 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10773 indirect_call = true;
10774 }
10775 else
10776 indirect_call = false;
10777
10778 if (TARGET_GNU_AS)
10779 fputs ("\t.prologue 4, r40\n", file);
10780 else
10781 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10782 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10783
10784 if (NO_PROFILE_COUNTERS)
10785 fputs ("\tmov out3 = r0\n", file);
10786 else
10787 {
10788 char buf[20];
10789 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10790
10791 if (TARGET_AUTO_PIC)
10792 fputs ("\tmovl out3 = @gprel(", file);
10793 else
10794 fputs ("\taddl out3 = @ltoff(", file);
10795 assemble_name (file, buf);
10796 if (TARGET_AUTO_PIC)
10797 fputs (")\n", file);
10798 else
10799 fputs ("), r1\n", file);
10800 }
10801
10802 if (indirect_call)
10803 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10804 fputs ("\t;;\n", file);
10805
10806 fputs ("\t.save rp, r42\n", file);
10807 fputs ("\tmov out2 = b0\n", file);
10808 if (indirect_call)
10809 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10810 fputs ("\t.body\n", file);
10811 fputs ("\tmov out1 = r1\n", file);
10812 if (indirect_call)
10813 {
10814 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10815 fputs ("\tmov b6 = r16\n", file);
10816 fputs ("\tld8 r1 = [r14]\n", file);
10817 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10818 }
10819 else
10820 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10821 }
10822
10823 static GTY(()) rtx mcount_func_rtx;
10824 static rtx
10825 gen_mcount_func_rtx (void)
10826 {
10827 if (!mcount_func_rtx)
10828 mcount_func_rtx = init_one_libfunc ("_mcount");
10829 return mcount_func_rtx;
10830 }
10831
10832 void
10833 ia64_profile_hook (int labelno)
10834 {
10835 rtx label, ip;
10836
10837 if (NO_PROFILE_COUNTERS)
10838 label = const0_rtx;
10839 else
10840 {
10841 char buf[30];
10842 const char *label_name;
10843 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10844 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10845 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10846 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10847 }
10848 ip = gen_reg_rtx (Pmode);
10849 emit_insn (gen_ip_value (ip));
10850 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10851 VOIDmode, 3,
10852 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10853 ip, Pmode,
10854 label, Pmode);
10855 }
10856
10857 /* Return the mangling of TYPE if it is an extended fundamental type. */
10858
10859 static const char *
10860 ia64_mangle_type (const_tree type)
10861 {
10862 type = TYPE_MAIN_VARIANT (type);
10863
10864 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10865 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10866 return NULL;
10867
10868 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10869 mangled as "e". */
10870 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10871 return "g";
10872 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10873 an extended mangling. Elsewhere, "e" is available since long
10874 double is 80 bits. */
10875 if (TYPE_MODE (type) == XFmode)
10876 return TARGET_HPUX ? "u9__float80" : "e";
10877 if (TYPE_MODE (type) == RFmode)
10878 return "u7__fpreg";
10879 return NULL;
10880 }
10881
10882 /* Return the diagnostic message string if conversion from FROMTYPE to
10883 TOTYPE is not allowed, NULL otherwise. */
10884 static const char *
10885 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10886 {
10887 /* Reject nontrivial conversion to or from __fpreg. */
10888 if (TYPE_MODE (fromtype) == RFmode
10889 && TYPE_MODE (totype) != RFmode
10890 && TYPE_MODE (totype) != VOIDmode)
10891 return N_("invalid conversion from %<__fpreg%>");
10892 if (TYPE_MODE (totype) == RFmode
10893 && TYPE_MODE (fromtype) != RFmode)
10894 return N_("invalid conversion to %<__fpreg%>");
10895 return NULL;
10896 }
10897
10898 /* Return the diagnostic message string if the unary operation OP is
10899 not permitted on TYPE, NULL otherwise. */
10900 static const char *
10901 ia64_invalid_unary_op (int op, const_tree type)
10902 {
10903 /* Reject operations on __fpreg other than unary + or &. */
10904 if (TYPE_MODE (type) == RFmode
10905 && op != CONVERT_EXPR
10906 && op != ADDR_EXPR)
10907 return N_("invalid operation on %<__fpreg%>");
10908 return NULL;
10909 }
10910
10911 /* Return the diagnostic message string if the binary operation OP is
10912 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10913 static const char *
10914 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10915 {
10916 /* Reject operations on __fpreg. */
10917 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10918 return N_("invalid operation on %<__fpreg%>");
10919 return NULL;
10920 }
10921
10922 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
10923 static void
10924 ia64_option_default_params (void)
10925 {
10926 /* Let the scheduler form additional regions. */
10927 set_default_param_value (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS, 2);
10928
10929 /* Set the default values for cache-related parameters. */
10930 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6);
10931 set_default_param_value (PARAM_L1_CACHE_LINE_SIZE, 32);
10932
10933 set_default_param_value (PARAM_SCHED_MEM_TRUE_DEP_COST, 4);
10934 }
10935
10936 /* HP-UX version_id attribute.
10937 For object foo, if the version_id is set to 1234 put out an alias
10938 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10939 other than an alias statement because it is an illegal symbol name. */
10940
10941 static tree
10942 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10943 tree name ATTRIBUTE_UNUSED,
10944 tree args,
10945 int flags ATTRIBUTE_UNUSED,
10946 bool *no_add_attrs)
10947 {
10948 tree arg = TREE_VALUE (args);
10949
10950 if (TREE_CODE (arg) != STRING_CST)
10951 {
10952 error("version attribute is not a string");
10953 *no_add_attrs = true;
10954 return NULL_TREE;
10955 }
10956 return NULL_TREE;
10957 }
10958
10959 /* Target hook for c_mode_for_suffix. */
10960
10961 static enum machine_mode
10962 ia64_c_mode_for_suffix (char suffix)
10963 {
10964 if (suffix == 'q')
10965 return TFmode;
10966 if (suffix == 'w')
10967 return XFmode;
10968
10969 return VOIDmode;
10970 }
10971
10972 static enum machine_mode
10973 ia64_promote_function_mode (const_tree type,
10974 enum machine_mode mode,
10975 int *punsignedp,
10976 const_tree funtype,
10977 int for_return)
10978 {
10979 /* Special processing required for OpenVMS ... */
10980
10981 if (!TARGET_ABI_OPEN_VMS)
10982 return default_promote_function_mode(type, mode, punsignedp, funtype,
10983 for_return);
10984
10985 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10986 HP OpenVMS I64 Version 8.2EFT,
10987 chapter 4 "OpenVMS I64 Conventions"
10988 section 4.7 "Procedure Linkage"
10989 subsection 4.7.5.2, "Normal Register Parameters"
10990
10991 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10992 values passed in registers are zero-filled; signed integral values as
10993 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10994 For all other types passed in the general registers, unused bits are
10995 undefined." */
10996
10997 if (for_return != 2
10998 && GET_MODE_CLASS (mode) == MODE_INT
10999 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
11000 {
11001 if (mode == SImode)
11002 *punsignedp = 0;
11003 return DImode;
11004 }
11005 else
11006 return promote_mode (type, mode, punsignedp);
11007 }
11008
11009 static GTY(()) rtx ia64_dconst_0_5_rtx;
11010
11011 rtx
11012 ia64_dconst_0_5 (void)
11013 {
11014 if (! ia64_dconst_0_5_rtx)
11015 {
11016 REAL_VALUE_TYPE rv;
11017 real_from_string (&rv, "0.5");
11018 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11019 }
11020 return ia64_dconst_0_5_rtx;
11021 }
11022
11023 static GTY(()) rtx ia64_dconst_0_375_rtx;
11024
11025 rtx
11026 ia64_dconst_0_375 (void)
11027 {
11028 if (! ia64_dconst_0_375_rtx)
11029 {
11030 REAL_VALUE_TYPE rv;
11031 real_from_string (&rv, "0.375");
11032 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11033 }
11034 return ia64_dconst_0_375_rtx;
11035 }
11036
11037 static enum machine_mode
11038 ia64_get_reg_raw_mode (int regno)
11039 {
11040 if (FR_REGNO_P (regno))
11041 return XFmode;
11042 return default_get_reg_raw_mode(regno);
11043 }
11044
11045 /* Always default to .text section until HP-UX linker is fixed. */
11046
11047 ATTRIBUTE_UNUSED static section *
11048 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11049 enum node_frequency freq ATTRIBUTE_UNUSED,
11050 bool startup ATTRIBUTE_UNUSED,
11051 bool exit ATTRIBUTE_UNUSED)
11052 {
11053 return NULL;
11054 }
11055
11056 #include "gt-ia64.h"