]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/ia64/ia64.c
ChangeLog gcc/
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "libfuncs.h"
45 #include "diagnostic-core.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "gimple.h"
55 #include "intl.h"
56 #include "df.h"
57 #include "debug.h"
58 #include "params.h"
59 #include "dbgcnt.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
62 #include "reload.h"
63 #include "dwarf2out.h"
64
65 /* This is used for communication between ASM_OUTPUT_LABEL and
66 ASM_OUTPUT_LABELREF. */
67 int ia64_asm_output_label = 0;
68
69 /* Register names for ia64_expand_prologue. */
70 static const char * const ia64_reg_numbers[96] =
71 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
72 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
73 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
74 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
75 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
76 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
77 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
78 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
79 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
80 "r104","r105","r106","r107","r108","r109","r110","r111",
81 "r112","r113","r114","r115","r116","r117","r118","r119",
82 "r120","r121","r122","r123","r124","r125","r126","r127"};
83
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_input_reg_names[8] =
86 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
87
88 /* ??? These strings could be shared with REGISTER_NAMES. */
89 static const char * const ia64_local_reg_names[80] =
90 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
91 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
92 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
93 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
94 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
95 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
96 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
97 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
98 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
99 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
100
101 /* ??? These strings could be shared with REGISTER_NAMES. */
102 static const char * const ia64_output_reg_names[8] =
103 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
104
105 /* Which cpu are we scheduling for. */
106 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
107
108 /* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110 static int ia64_flag_schedule_insns2;
111
112 /* Determines whether we run variable tracking in machine dependent
113 reorganization. */
114 static int ia64_flag_var_tracking;
115
116 /* Variables which are this size or smaller are put in the sdata/sbss
117 sections. */
118
119 unsigned int ia64_section_threshold;
120
121 /* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
123 int bundling_p = 0;
124
125 enum ia64_frame_regs
126 {
127 reg_fp,
128 reg_save_b0,
129 reg_save_pr,
130 reg_save_ar_pfs,
131 reg_save_ar_unat,
132 reg_save_ar_lc,
133 reg_save_gp,
134 number_of_ia64_frame_regs
135 };
136
137 /* Structure to be filled in by ia64_compute_frame_size with register
138 save masks and offsets for the current function. */
139
140 struct ia64_frame_info
141 {
142 HOST_WIDE_INT total_size; /* size of the stack frame, not including
143 the caller's scratch area. */
144 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
145 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
146 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
147 HARD_REG_SET mask; /* mask of saved registers. */
148 unsigned int gr_used_mask; /* mask of registers in use as gr spill
149 registers or long-term scratches. */
150 int n_spilled; /* number of spilled registers. */
151 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
152 int n_input_regs; /* number of input registers used. */
153 int n_local_regs; /* number of local registers used. */
154 int n_output_regs; /* number of output registers used. */
155 int n_rotate_regs; /* number of rotating registers used. */
156
157 char need_regstk; /* true if a .regstk directive needed. */
158 char initialized; /* true if the data is finalized. */
159 };
160
161 /* Current frame information calculated by ia64_compute_frame_size. */
162 static struct ia64_frame_info current_frame_info;
163 /* The actual registers that are emitted. */
164 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
165 \f
166 static int ia64_first_cycle_multipass_dfa_lookahead (void);
167 static void ia64_dependencies_evaluation_hook (rtx, rtx);
168 static void ia64_init_dfa_pre_cycle_insn (void);
169 static rtx ia64_dfa_pre_cycle_insn (void);
170 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
171 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static void ia64_h_i_d_extended (void);
174 static void * ia64_alloc_sched_context (void);
175 static void ia64_init_sched_context (void *, bool);
176 static void ia64_set_sched_context (void *);
177 static void ia64_clear_sched_context (void *);
178 static void ia64_free_sched_context (void *);
179 static int ia64_mode_to_int (enum machine_mode);
180 static void ia64_set_sched_flags (spec_info_t);
181 static ds_t ia64_get_insn_spec_ds (rtx);
182 static ds_t ia64_get_insn_checked_ds (rtx);
183 static bool ia64_skip_rtx_p (const_rtx);
184 static int ia64_speculate_insn (rtx, ds_t, rtx *);
185 static bool ia64_needs_block_p (int);
186 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
187 static int ia64_spec_check_p (rtx);
188 static int ia64_spec_check_src_p (rtx);
189 static rtx gen_tls_get_addr (void);
190 static rtx gen_thread_pointer (void);
191 static int find_gr_spill (enum ia64_frame_regs, int);
192 static int next_scratch_gr_reg (void);
193 static void mark_reg_gr_used_mask (rtx, void *);
194 static void ia64_compute_frame_size (HOST_WIDE_INT);
195 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
196 static void finish_spill_pointers (void);
197 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
198 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
199 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
200 static rtx gen_movdi_x (rtx, rtx, rtx);
201 static rtx gen_fr_spill_x (rtx, rtx, rtx);
202 static rtx gen_fr_restore_x (rtx, rtx, rtx);
203
204 static void ia64_option_override (void);
205 static void ia64_option_default_params (void);
206 static bool ia64_can_eliminate (const int, const int);
207 static enum machine_mode hfa_element_mode (const_tree, bool);
208 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
209 tree, int *, int);
210 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
211 tree, bool);
212 static rtx ia64_function_arg_1 (const CUMULATIVE_ARGS *, enum machine_mode,
213 const_tree, bool, bool);
214 static rtx ia64_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
215 const_tree, bool);
216 static rtx ia64_function_incoming_arg (CUMULATIVE_ARGS *,
217 enum machine_mode, const_tree, bool);
218 static void ia64_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
219 const_tree, bool);
220 static unsigned int ia64_function_arg_boundary (enum machine_mode,
221 const_tree);
222 static bool ia64_function_ok_for_sibcall (tree, tree);
223 static bool ia64_return_in_memory (const_tree, const_tree);
224 static rtx ia64_function_value (const_tree, const_tree, bool);
225 static rtx ia64_libcall_value (enum machine_mode, const_rtx);
226 static bool ia64_function_value_regno_p (const unsigned int);
227 static int ia64_register_move_cost (enum machine_mode, reg_class_t,
228 reg_class_t);
229 static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
230 bool);
231 static bool ia64_rtx_costs (rtx, int, int, int *, bool);
232 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
233 static void fix_range (const char *);
234 static bool ia64_handle_option (size_t, const char *, int);
235 static struct machine_function * ia64_init_machine_status (void);
236 static void emit_insn_group_barriers (FILE *);
237 static void emit_all_insn_group_barriers (FILE *);
238 static void final_emit_insn_group_barriers (FILE *);
239 static void emit_predicate_relation_info (void);
240 static void ia64_reorg (void);
241 static bool ia64_in_small_data_p (const_tree);
242 static void process_epilogue (FILE *, rtx, bool, bool);
243
244 static bool ia64_assemble_integer (rtx, unsigned int, int);
245 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
246 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
247 static void ia64_output_function_end_prologue (FILE *);
248
249 static int ia64_issue_rate (void);
250 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
251 static void ia64_sched_init (FILE *, int, int);
252 static void ia64_sched_init_global (FILE *, int, int);
253 static void ia64_sched_finish_global (FILE *, int);
254 static void ia64_sched_finish (FILE *, int);
255 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
256 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
257 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
258 static int ia64_variable_issue (FILE *, int, rtx, int);
259
260 static void ia64_asm_unwind_emit (FILE *, rtx);
261 static void ia64_asm_emit_except_personality (rtx);
262 static void ia64_asm_init_sections (void);
263
264 static enum unwind_info_type ia64_debug_unwind_info (void);
265 static enum unwind_info_type ia64_except_unwind_info (struct gcc_options *);
266
267 static struct bundle_state *get_free_bundle_state (void);
268 static void free_bundle_state (struct bundle_state *);
269 static void initiate_bundle_states (void);
270 static void finish_bundle_states (void);
271 static unsigned bundle_state_hash (const void *);
272 static int bundle_state_eq_p (const void *, const void *);
273 static int insert_bundle_state (struct bundle_state *);
274 static void initiate_bundle_state_table (void);
275 static void finish_bundle_state_table (void);
276 static int try_issue_nops (struct bundle_state *, int);
277 static int try_issue_insn (struct bundle_state *, rtx);
278 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
279 static int get_max_pos (state_t);
280 static int get_template (state_t, int);
281
282 static rtx get_next_important_insn (rtx, rtx);
283 static bool important_for_bundling_p (rtx);
284 static void bundling (FILE *, int, rtx, rtx);
285
286 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
287 HOST_WIDE_INT, tree);
288 static void ia64_file_start (void);
289 static void ia64_globalize_decl_name (FILE *, tree);
290
291 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
292 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
293 static section *ia64_select_rtx_section (enum machine_mode, rtx,
294 unsigned HOST_WIDE_INT);
295 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
296 ATTRIBUTE_UNUSED;
297 static unsigned int ia64_section_type_flags (tree, const char *, int);
298 static void ia64_init_libfuncs (void)
299 ATTRIBUTE_UNUSED;
300 static void ia64_hpux_init_libfuncs (void)
301 ATTRIBUTE_UNUSED;
302 static void ia64_sysv4_init_libfuncs (void)
303 ATTRIBUTE_UNUSED;
304 static void ia64_vms_init_libfuncs (void)
305 ATTRIBUTE_UNUSED;
306 static void ia64_soft_fp_init_libfuncs (void)
307 ATTRIBUTE_UNUSED;
308 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
309 ATTRIBUTE_UNUSED;
310 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
311 ATTRIBUTE_UNUSED;
312
313 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
314 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
315 static void ia64_encode_section_info (tree, rtx, int);
316 static rtx ia64_struct_value_rtx (tree, int);
317 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
318 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
319 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
320 static bool ia64_cannot_force_const_mem (rtx);
321 static const char *ia64_mangle_type (const_tree);
322 static const char *ia64_invalid_conversion (const_tree, const_tree);
323 static const char *ia64_invalid_unary_op (int, const_tree);
324 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
325 static enum machine_mode ia64_c_mode_for_suffix (char);
326 static enum machine_mode ia64_promote_function_mode (const_tree,
327 enum machine_mode,
328 int *,
329 const_tree,
330 int);
331 static void ia64_trampoline_init (rtx, tree, rtx);
332 static void ia64_override_options_after_change (void);
333
334 static void ia64_dwarf_handle_frame_unspec (const char *, rtx, int);
335 static tree ia64_builtin_decl (unsigned, bool);
336
337 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
338 static enum machine_mode ia64_get_reg_raw_mode (int regno);
339 static section * ia64_hpux_function_section (tree, enum node_frequency,
340 bool, bool);
341 \f
342 /* Table of valid machine attributes. */
343 static const struct attribute_spec ia64_attribute_table[] =
344 {
345 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
346 affects_type_identity } */
347 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
348 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
349 false },
350 #if TARGET_ABI_OPEN_VMS
351 { "common_object", 1, 1, true, false, false,
352 ia64_vms_common_object_attribute, false },
353 #endif
354 { "version_id", 1, 1, true, false, false,
355 ia64_handle_version_id_attribute, false },
356 { NULL, 0, 0, false, false, false, NULL, false }
357 };
358
359 /* Implement overriding of the optimization options. */
360 static const struct default_options ia64_option_optimization_table[] =
361 {
362 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
363 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
364 SUBTARGET_OPTIMIZATION_OPTIONS,
365 #endif
366 { OPT_LEVELS_NONE, 0, NULL, 0 }
367 };
368
369 /* Initialize the GCC target structure. */
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
372
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS ia64_init_builtins
375
376 #undef TARGET_EXPAND_BUILTIN
377 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
378
379 #undef TARGET_BUILTIN_DECL
380 #define TARGET_BUILTIN_DECL ia64_builtin_decl
381
382 #undef TARGET_ASM_BYTE_OP
383 #define TARGET_ASM_BYTE_OP "\tdata1\t"
384 #undef TARGET_ASM_ALIGNED_HI_OP
385 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
386 #undef TARGET_ASM_ALIGNED_SI_OP
387 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
388 #undef TARGET_ASM_ALIGNED_DI_OP
389 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
390 #undef TARGET_ASM_UNALIGNED_HI_OP
391 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
392 #undef TARGET_ASM_UNALIGNED_SI_OP
393 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
394 #undef TARGET_ASM_UNALIGNED_DI_OP
395 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER ia64_assemble_integer
398
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE ia64_option_override
401 #undef TARGET_OPTION_OPTIMIZATION_TABLE
402 #define TARGET_OPTION_OPTIMIZATION_TABLE ia64_option_optimization_table
403 #undef TARGET_OPTION_DEFAULT_PARAMS
404 #define TARGET_OPTION_DEFAULT_PARAMS ia64_option_default_params
405
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
408 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
409 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
410 #undef TARGET_ASM_FUNCTION_EPILOGUE
411 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
412
413 #undef TARGET_IN_SMALL_DATA_P
414 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
415
416 #undef TARGET_SCHED_ADJUST_COST_2
417 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
418 #undef TARGET_SCHED_ISSUE_RATE
419 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
420 #undef TARGET_SCHED_VARIABLE_ISSUE
421 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
422 #undef TARGET_SCHED_INIT
423 #define TARGET_SCHED_INIT ia64_sched_init
424 #undef TARGET_SCHED_FINISH
425 #define TARGET_SCHED_FINISH ia64_sched_finish
426 #undef TARGET_SCHED_INIT_GLOBAL
427 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
428 #undef TARGET_SCHED_FINISH_GLOBAL
429 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
430 #undef TARGET_SCHED_REORDER
431 #define TARGET_SCHED_REORDER ia64_sched_reorder
432 #undef TARGET_SCHED_REORDER2
433 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
434
435 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
436 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
437
438 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
439 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
440
441 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
442 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
443 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
444 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
445
446 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
447 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
448 ia64_first_cycle_multipass_dfa_lookahead_guard
449
450 #undef TARGET_SCHED_DFA_NEW_CYCLE
451 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
452
453 #undef TARGET_SCHED_H_I_D_EXTENDED
454 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
455
456 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
457 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
458
459 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
460 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
461
462 #undef TARGET_SCHED_SET_SCHED_CONTEXT
463 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
464
465 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
466 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
467
468 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
469 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
470
471 #undef TARGET_SCHED_SET_SCHED_FLAGS
472 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
473
474 #undef TARGET_SCHED_GET_INSN_SPEC_DS
475 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
476
477 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
478 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
479
480 #undef TARGET_SCHED_SPECULATE_INSN
481 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
482
483 #undef TARGET_SCHED_NEEDS_BLOCK_P
484 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
485
486 #undef TARGET_SCHED_GEN_SPEC_CHECK
487 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
488
489 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
490 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
491 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
492
493 #undef TARGET_SCHED_SKIP_RTX_P
494 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
495
496 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
497 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
498 #undef TARGET_ARG_PARTIAL_BYTES
499 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
500 #undef TARGET_FUNCTION_ARG
501 #define TARGET_FUNCTION_ARG ia64_function_arg
502 #undef TARGET_FUNCTION_INCOMING_ARG
503 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
504 #undef TARGET_FUNCTION_ARG_ADVANCE
505 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
506 #undef TARGET_FUNCTION_ARG_BOUNDARY
507 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
508
509 #undef TARGET_ASM_OUTPUT_MI_THUNK
510 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
511 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
512 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
513
514 #undef TARGET_ASM_FILE_START
515 #define TARGET_ASM_FILE_START ia64_file_start
516
517 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
518 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
519
520 #undef TARGET_REGISTER_MOVE_COST
521 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
522 #undef TARGET_MEMORY_MOVE_COST
523 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
524 #undef TARGET_RTX_COSTS
525 #define TARGET_RTX_COSTS ia64_rtx_costs
526 #undef TARGET_ADDRESS_COST
527 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
528
529 #undef TARGET_UNSPEC_MAY_TRAP_P
530 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
531
532 #undef TARGET_MACHINE_DEPENDENT_REORG
533 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
534
535 #undef TARGET_ENCODE_SECTION_INFO
536 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
537
538 #undef TARGET_SECTION_TYPE_FLAGS
539 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
540
541 #ifdef HAVE_AS_TLS
542 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
543 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
544 #endif
545
546 #undef TARGET_PROMOTE_FUNCTION_MODE
547 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
548
549 /* ??? Investigate. */
550 #if 0
551 #undef TARGET_PROMOTE_PROTOTYPES
552 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
553 #endif
554
555 #undef TARGET_FUNCTION_VALUE
556 #define TARGET_FUNCTION_VALUE ia64_function_value
557 #undef TARGET_LIBCALL_VALUE
558 #define TARGET_LIBCALL_VALUE ia64_libcall_value
559 #undef TARGET_FUNCTION_VALUE_REGNO_P
560 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
561
562 #undef TARGET_STRUCT_VALUE_RTX
563 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
564 #undef TARGET_RETURN_IN_MEMORY
565 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
566 #undef TARGET_SETUP_INCOMING_VARARGS
567 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
568 #undef TARGET_STRICT_ARGUMENT_NAMING
569 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
570 #undef TARGET_MUST_PASS_IN_STACK
571 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
572 #undef TARGET_GET_RAW_RESULT_MODE
573 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
574 #undef TARGET_GET_RAW_ARG_MODE
575 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
576
577 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
578 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
579
580 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
581 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ia64_dwarf_handle_frame_unspec
582 #undef TARGET_ASM_UNWIND_EMIT
583 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
584 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
585 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
588
589 #undef TARGET_DEBUG_UNWIND_INFO
590 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
591 #undef TARGET_EXCEPT_UNWIND_INFO
592 #define TARGET_EXCEPT_UNWIND_INFO ia64_except_unwind_info
593
594 #undef TARGET_SCALAR_MODE_SUPPORTED_P
595 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
596 #undef TARGET_VECTOR_MODE_SUPPORTED_P
597 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
598
599 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
600 in an order different from the specified program order. */
601 #undef TARGET_RELAXED_ORDERING
602 #define TARGET_RELAXED_ORDERING true
603
604 #undef TARGET_DEFAULT_TARGET_FLAGS
605 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
606 #undef TARGET_HANDLE_OPTION
607 #define TARGET_HANDLE_OPTION ia64_handle_option
608
609 #undef TARGET_CANNOT_FORCE_CONST_MEM
610 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
611
612 #undef TARGET_MANGLE_TYPE
613 #define TARGET_MANGLE_TYPE ia64_mangle_type
614
615 #undef TARGET_INVALID_CONVERSION
616 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
617 #undef TARGET_INVALID_UNARY_OP
618 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
619 #undef TARGET_INVALID_BINARY_OP
620 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
621
622 #undef TARGET_C_MODE_FOR_SUFFIX
623 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
624
625 #undef TARGET_CAN_ELIMINATE
626 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
627
628 #undef TARGET_TRAMPOLINE_INIT
629 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
630
631 #undef TARGET_INVALID_WITHIN_DOLOOP
632 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
633
634 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
635 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
636
637 #undef TARGET_PREFERRED_RELOAD_CLASS
638 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
639
640 struct gcc_target targetm = TARGET_INITIALIZER;
641 \f
642 typedef enum
643 {
644 ADDR_AREA_NORMAL, /* normal address area */
645 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
646 }
647 ia64_addr_area;
648
649 static GTY(()) tree small_ident1;
650 static GTY(()) tree small_ident2;
651
652 static void
653 init_idents (void)
654 {
655 if (small_ident1 == 0)
656 {
657 small_ident1 = get_identifier ("small");
658 small_ident2 = get_identifier ("__small__");
659 }
660 }
661
662 /* Retrieve the address area that has been chosen for the given decl. */
663
664 static ia64_addr_area
665 ia64_get_addr_area (tree decl)
666 {
667 tree model_attr;
668
669 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
670 if (model_attr)
671 {
672 tree id;
673
674 init_idents ();
675 id = TREE_VALUE (TREE_VALUE (model_attr));
676 if (id == small_ident1 || id == small_ident2)
677 return ADDR_AREA_SMALL;
678 }
679 return ADDR_AREA_NORMAL;
680 }
681
682 static tree
683 ia64_handle_model_attribute (tree *node, tree name, tree args,
684 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
685 {
686 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
687 ia64_addr_area area;
688 tree arg, decl = *node;
689
690 init_idents ();
691 arg = TREE_VALUE (args);
692 if (arg == small_ident1 || arg == small_ident2)
693 {
694 addr_area = ADDR_AREA_SMALL;
695 }
696 else
697 {
698 warning (OPT_Wattributes, "invalid argument of %qE attribute",
699 name);
700 *no_add_attrs = true;
701 }
702
703 switch (TREE_CODE (decl))
704 {
705 case VAR_DECL:
706 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
707 == FUNCTION_DECL)
708 && !TREE_STATIC (decl))
709 {
710 error_at (DECL_SOURCE_LOCATION (decl),
711 "an address area attribute cannot be specified for "
712 "local variables");
713 *no_add_attrs = true;
714 }
715 area = ia64_get_addr_area (decl);
716 if (area != ADDR_AREA_NORMAL && addr_area != area)
717 {
718 error ("address area of %q+D conflicts with previous "
719 "declaration", decl);
720 *no_add_attrs = true;
721 }
722 break;
723
724 case FUNCTION_DECL:
725 error_at (DECL_SOURCE_LOCATION (decl),
726 "address area attribute cannot be specified for "
727 "functions");
728 *no_add_attrs = true;
729 break;
730
731 default:
732 warning (OPT_Wattributes, "%qE attribute ignored",
733 name);
734 *no_add_attrs = true;
735 break;
736 }
737
738 return NULL_TREE;
739 }
740
741 /* The section must have global and overlaid attributes. */
742 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
743
744 /* Part of the low level implementation of DEC Ada pragma Common_Object which
745 enables the shared use of variables stored in overlaid linker areas
746 corresponding to the use of Fortran COMMON. */
747
748 static tree
749 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
750 int flags ATTRIBUTE_UNUSED,
751 bool *no_add_attrs)
752 {
753 tree decl = *node;
754 tree id, val;
755 if (! DECL_P (decl))
756 abort ();
757
758 DECL_COMMON (decl) = 1;
759 id = TREE_VALUE (args);
760 if (TREE_CODE (id) == IDENTIFIER_NODE)
761 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
762 else if (TREE_CODE (id) == STRING_CST)
763 val = id;
764 else
765 {
766 warning (OPT_Wattributes,
767 "%qE attribute requires a string constant argument", name);
768 *no_add_attrs = true;
769 return NULL_TREE;
770 }
771 DECL_SECTION_NAME (decl) = val;
772 return NULL_TREE;
773 }
774
775 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
776
777 void
778 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
779 unsigned HOST_WIDE_INT size,
780 unsigned int align)
781 {
782 tree attr = DECL_ATTRIBUTES (decl);
783
784 /* As common_object attribute set DECL_SECTION_NAME check it before
785 looking up the attribute. */
786 if (DECL_SECTION_NAME (decl) && attr)
787 attr = lookup_attribute ("common_object", attr);
788 else
789 attr = NULL_TREE;
790
791 if (!attr)
792 {
793 /* Code from elfos.h. */
794 fprintf (file, "%s", COMMON_ASM_OP);
795 assemble_name (file, name);
796 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
797 size, align / BITS_PER_UNIT);
798 }
799 else
800 {
801 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
802 ASM_OUTPUT_LABEL (file, name);
803 ASM_OUTPUT_SKIP (file, size ? size : 1);
804 }
805 }
806
807 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
808
809 void
810 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
811 tree decl)
812 {
813 if (!(flags & SECTION_VMS_OVERLAY))
814 {
815 default_elf_asm_named_section (name, flags, decl);
816 return;
817 }
818 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
819 abort ();
820
821 if (flags & SECTION_DECLARED)
822 {
823 fprintf (asm_out_file, "\t.section\t%s\n", name);
824 return;
825 }
826
827 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
828 }
829
830 static void
831 ia64_encode_addr_area (tree decl, rtx symbol)
832 {
833 int flags;
834
835 flags = SYMBOL_REF_FLAGS (symbol);
836 switch (ia64_get_addr_area (decl))
837 {
838 case ADDR_AREA_NORMAL: break;
839 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
840 default: gcc_unreachable ();
841 }
842 SYMBOL_REF_FLAGS (symbol) = flags;
843 }
844
845 static void
846 ia64_encode_section_info (tree decl, rtx rtl, int first)
847 {
848 default_encode_section_info (decl, rtl, first);
849
850 /* Careful not to prod global register variables. */
851 if (TREE_CODE (decl) == VAR_DECL
852 && GET_CODE (DECL_RTL (decl)) == MEM
853 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
854 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
855 ia64_encode_addr_area (decl, XEXP (rtl, 0));
856 }
857 \f
858 /* Return 1 if the operands of a move are ok. */
859
860 int
861 ia64_move_ok (rtx dst, rtx src)
862 {
863 /* If we're under init_recog_no_volatile, we'll not be able to use
864 memory_operand. So check the code directly and don't worry about
865 the validity of the underlying address, which should have been
866 checked elsewhere anyway. */
867 if (GET_CODE (dst) != MEM)
868 return 1;
869 if (GET_CODE (src) == MEM)
870 return 0;
871 if (register_operand (src, VOIDmode))
872 return 1;
873
874 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
875 if (INTEGRAL_MODE_P (GET_MODE (dst)))
876 return src == const0_rtx;
877 else
878 return satisfies_constraint_G (src);
879 }
880
881 /* Return 1 if the operands are ok for a floating point load pair. */
882
883 int
884 ia64_load_pair_ok (rtx dst, rtx src)
885 {
886 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
887 return 0;
888 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
889 return 0;
890 switch (GET_CODE (XEXP (src, 0)))
891 {
892 case REG:
893 case POST_INC:
894 break;
895 case POST_DEC:
896 return 0;
897 case POST_MODIFY:
898 {
899 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
900
901 if (GET_CODE (adjust) != CONST_INT
902 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
903 return 0;
904 }
905 break;
906 default:
907 abort ();
908 }
909 return 1;
910 }
911
912 int
913 addp4_optimize_ok (rtx op1, rtx op2)
914 {
915 return (basereg_operand (op1, GET_MODE(op1)) !=
916 basereg_operand (op2, GET_MODE(op2)));
917 }
918
919 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
920 Return the length of the field, or <= 0 on failure. */
921
922 int
923 ia64_depz_field_mask (rtx rop, rtx rshift)
924 {
925 unsigned HOST_WIDE_INT op = INTVAL (rop);
926 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
927
928 /* Get rid of the zero bits we're shifting in. */
929 op >>= shift;
930
931 /* We must now have a solid block of 1's at bit 0. */
932 return exact_log2 (op + 1);
933 }
934
935 /* Return the TLS model to use for ADDR. */
936
937 static enum tls_model
938 tls_symbolic_operand_type (rtx addr)
939 {
940 enum tls_model tls_kind = TLS_MODEL_NONE;
941
942 if (GET_CODE (addr) == CONST)
943 {
944 if (GET_CODE (XEXP (addr, 0)) == PLUS
945 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
946 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
947 }
948 else if (GET_CODE (addr) == SYMBOL_REF)
949 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
950
951 return tls_kind;
952 }
953
954 /* Return true if X is a constant that is valid for some immediate
955 field in an instruction. */
956
957 bool
958 ia64_legitimate_constant_p (rtx x)
959 {
960 switch (GET_CODE (x))
961 {
962 case CONST_INT:
963 case LABEL_REF:
964 return true;
965
966 case CONST_DOUBLE:
967 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
968 || GET_MODE (x) == DFmode)
969 return true;
970 return satisfies_constraint_G (x);
971
972 case CONST:
973 case SYMBOL_REF:
974 /* ??? Short term workaround for PR 28490. We must make the code here
975 match the code in ia64_expand_move and move_operand, even though they
976 are both technically wrong. */
977 if (tls_symbolic_operand_type (x) == 0)
978 {
979 HOST_WIDE_INT addend = 0;
980 rtx op = x;
981
982 if (GET_CODE (op) == CONST
983 && GET_CODE (XEXP (op, 0)) == PLUS
984 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
985 {
986 addend = INTVAL (XEXP (XEXP (op, 0), 1));
987 op = XEXP (XEXP (op, 0), 0);
988 }
989
990 if (any_offset_symbol_operand (op, GET_MODE (op))
991 || function_operand (op, GET_MODE (op)))
992 return true;
993 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
994 return (addend & 0x3fff) == 0;
995 return false;
996 }
997 return false;
998
999 case CONST_VECTOR:
1000 {
1001 enum machine_mode mode = GET_MODE (x);
1002
1003 if (mode == V2SFmode)
1004 return satisfies_constraint_Y (x);
1005
1006 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1007 && GET_MODE_SIZE (mode) <= 8);
1008 }
1009
1010 default:
1011 return false;
1012 }
1013 }
1014
1015 /* Don't allow TLS addresses to get spilled to memory. */
1016
1017 static bool
1018 ia64_cannot_force_const_mem (rtx x)
1019 {
1020 if (GET_MODE (x) == RFmode)
1021 return true;
1022 return tls_symbolic_operand_type (x) != 0;
1023 }
1024
1025 /* Expand a symbolic constant load. */
1026
1027 bool
1028 ia64_expand_load_address (rtx dest, rtx src)
1029 {
1030 gcc_assert (GET_CODE (dest) == REG);
1031
1032 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1033 having to pointer-extend the value afterward. Other forms of address
1034 computation below are also more natural to compute as 64-bit quantities.
1035 If we've been given an SImode destination register, change it. */
1036 if (GET_MODE (dest) != Pmode)
1037 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1038 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1039
1040 if (TARGET_NO_PIC)
1041 return false;
1042 if (small_addr_symbolic_operand (src, VOIDmode))
1043 return false;
1044
1045 if (TARGET_AUTO_PIC)
1046 emit_insn (gen_load_gprel64 (dest, src));
1047 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1048 emit_insn (gen_load_fptr (dest, src));
1049 else if (sdata_symbolic_operand (src, VOIDmode))
1050 emit_insn (gen_load_gprel (dest, src));
1051 else
1052 {
1053 HOST_WIDE_INT addend = 0;
1054 rtx tmp;
1055
1056 /* We did split constant offsets in ia64_expand_move, and we did try
1057 to keep them split in move_operand, but we also allowed reload to
1058 rematerialize arbitrary constants rather than spill the value to
1059 the stack and reload it. So we have to be prepared here to split
1060 them apart again. */
1061 if (GET_CODE (src) == CONST)
1062 {
1063 HOST_WIDE_INT hi, lo;
1064
1065 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1066 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1067 hi = hi - lo;
1068
1069 if (lo != 0)
1070 {
1071 addend = lo;
1072 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1073 }
1074 }
1075
1076 tmp = gen_rtx_HIGH (Pmode, src);
1077 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1078 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1079
1080 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
1081 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1082
1083 if (addend)
1084 {
1085 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1086 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1087 }
1088 }
1089
1090 return true;
1091 }
1092
1093 static GTY(()) rtx gen_tls_tga;
1094 static rtx
1095 gen_tls_get_addr (void)
1096 {
1097 if (!gen_tls_tga)
1098 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1099 return gen_tls_tga;
1100 }
1101
1102 static GTY(()) rtx thread_pointer_rtx;
1103 static rtx
1104 gen_thread_pointer (void)
1105 {
1106 if (!thread_pointer_rtx)
1107 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1108 return thread_pointer_rtx;
1109 }
1110
1111 static rtx
1112 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1113 rtx orig_op1, HOST_WIDE_INT addend)
1114 {
1115 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1116 rtx orig_op0 = op0;
1117 HOST_WIDE_INT addend_lo, addend_hi;
1118
1119 switch (tls_kind)
1120 {
1121 case TLS_MODEL_GLOBAL_DYNAMIC:
1122 start_sequence ();
1123
1124 tga_op1 = gen_reg_rtx (Pmode);
1125 emit_insn (gen_load_dtpmod (tga_op1, op1));
1126
1127 tga_op2 = gen_reg_rtx (Pmode);
1128 emit_insn (gen_load_dtprel (tga_op2, op1));
1129
1130 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1131 LCT_CONST, Pmode, 2, tga_op1,
1132 Pmode, tga_op2, Pmode);
1133
1134 insns = get_insns ();
1135 end_sequence ();
1136
1137 if (GET_MODE (op0) != Pmode)
1138 op0 = tga_ret;
1139 emit_libcall_block (insns, op0, tga_ret, op1);
1140 break;
1141
1142 case TLS_MODEL_LOCAL_DYNAMIC:
1143 /* ??? This isn't the completely proper way to do local-dynamic
1144 If the call to __tls_get_addr is used only by a single symbol,
1145 then we should (somehow) move the dtprel to the second arg
1146 to avoid the extra add. */
1147 start_sequence ();
1148
1149 tga_op1 = gen_reg_rtx (Pmode);
1150 emit_insn (gen_load_dtpmod (tga_op1, op1));
1151
1152 tga_op2 = const0_rtx;
1153
1154 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1155 LCT_CONST, Pmode, 2, tga_op1,
1156 Pmode, tga_op2, Pmode);
1157
1158 insns = get_insns ();
1159 end_sequence ();
1160
1161 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1162 UNSPEC_LD_BASE);
1163 tmp = gen_reg_rtx (Pmode);
1164 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1165
1166 if (!register_operand (op0, Pmode))
1167 op0 = gen_reg_rtx (Pmode);
1168 if (TARGET_TLS64)
1169 {
1170 emit_insn (gen_load_dtprel (op0, op1));
1171 emit_insn (gen_adddi3 (op0, tmp, op0));
1172 }
1173 else
1174 emit_insn (gen_add_dtprel (op0, op1, tmp));
1175 break;
1176
1177 case TLS_MODEL_INITIAL_EXEC:
1178 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1179 addend_hi = addend - addend_lo;
1180
1181 op1 = plus_constant (op1, addend_hi);
1182 addend = addend_lo;
1183
1184 tmp = gen_reg_rtx (Pmode);
1185 emit_insn (gen_load_tprel (tmp, op1));
1186
1187 if (!register_operand (op0, Pmode))
1188 op0 = gen_reg_rtx (Pmode);
1189 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1190 break;
1191
1192 case TLS_MODEL_LOCAL_EXEC:
1193 if (!register_operand (op0, Pmode))
1194 op0 = gen_reg_rtx (Pmode);
1195
1196 op1 = orig_op1;
1197 addend = 0;
1198 if (TARGET_TLS64)
1199 {
1200 emit_insn (gen_load_tprel (op0, op1));
1201 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1202 }
1203 else
1204 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1205 break;
1206
1207 default:
1208 gcc_unreachable ();
1209 }
1210
1211 if (addend)
1212 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1213 orig_op0, 1, OPTAB_DIRECT);
1214 if (orig_op0 == op0)
1215 return NULL_RTX;
1216 if (GET_MODE (orig_op0) == Pmode)
1217 return op0;
1218 return gen_lowpart (GET_MODE (orig_op0), op0);
1219 }
1220
1221 rtx
1222 ia64_expand_move (rtx op0, rtx op1)
1223 {
1224 enum machine_mode mode = GET_MODE (op0);
1225
1226 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1227 op1 = force_reg (mode, op1);
1228
1229 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1230 {
1231 HOST_WIDE_INT addend = 0;
1232 enum tls_model tls_kind;
1233 rtx sym = op1;
1234
1235 if (GET_CODE (op1) == CONST
1236 && GET_CODE (XEXP (op1, 0)) == PLUS
1237 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1238 {
1239 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1240 sym = XEXP (XEXP (op1, 0), 0);
1241 }
1242
1243 tls_kind = tls_symbolic_operand_type (sym);
1244 if (tls_kind)
1245 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1246
1247 if (any_offset_symbol_operand (sym, mode))
1248 addend = 0;
1249 else if (aligned_offset_symbol_operand (sym, mode))
1250 {
1251 HOST_WIDE_INT addend_lo, addend_hi;
1252
1253 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1254 addend_hi = addend - addend_lo;
1255
1256 if (addend_lo != 0)
1257 {
1258 op1 = plus_constant (sym, addend_hi);
1259 addend = addend_lo;
1260 }
1261 else
1262 addend = 0;
1263 }
1264 else
1265 op1 = sym;
1266
1267 if (reload_completed)
1268 {
1269 /* We really should have taken care of this offset earlier. */
1270 gcc_assert (addend == 0);
1271 if (ia64_expand_load_address (op0, op1))
1272 return NULL_RTX;
1273 }
1274
1275 if (addend)
1276 {
1277 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1278
1279 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1280
1281 op1 = expand_simple_binop (mode, PLUS, subtarget,
1282 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1283 if (op0 == op1)
1284 return NULL_RTX;
1285 }
1286 }
1287
1288 return op1;
1289 }
1290
1291 /* Split a move from OP1 to OP0 conditional on COND. */
1292
1293 void
1294 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1295 {
1296 rtx insn, first = get_last_insn ();
1297
1298 emit_move_insn (op0, op1);
1299
1300 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1301 if (INSN_P (insn))
1302 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1303 PATTERN (insn));
1304 }
1305
1306 /* Split a post-reload TImode or TFmode reference into two DImode
1307 components. This is made extra difficult by the fact that we do
1308 not get any scratch registers to work with, because reload cannot
1309 be prevented from giving us a scratch that overlaps the register
1310 pair involved. So instead, when addressing memory, we tweak the
1311 pointer register up and back down with POST_INCs. Or up and not
1312 back down when we can get away with it.
1313
1314 REVERSED is true when the loads must be done in reversed order
1315 (high word first) for correctness. DEAD is true when the pointer
1316 dies with the second insn we generate and therefore the second
1317 address must not carry a postmodify.
1318
1319 May return an insn which is to be emitted after the moves. */
1320
1321 static rtx
1322 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1323 {
1324 rtx fixup = 0;
1325
1326 switch (GET_CODE (in))
1327 {
1328 case REG:
1329 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1330 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1331 break;
1332
1333 case CONST_INT:
1334 case CONST_DOUBLE:
1335 /* Cannot occur reversed. */
1336 gcc_assert (!reversed);
1337
1338 if (GET_MODE (in) != TFmode)
1339 split_double (in, &out[0], &out[1]);
1340 else
1341 /* split_double does not understand how to split a TFmode
1342 quantity into a pair of DImode constants. */
1343 {
1344 REAL_VALUE_TYPE r;
1345 unsigned HOST_WIDE_INT p[2];
1346 long l[4]; /* TFmode is 128 bits */
1347
1348 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1349 real_to_target (l, &r, TFmode);
1350
1351 if (FLOAT_WORDS_BIG_ENDIAN)
1352 {
1353 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1354 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1355 }
1356 else
1357 {
1358 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1359 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1360 }
1361 out[0] = GEN_INT (p[0]);
1362 out[1] = GEN_INT (p[1]);
1363 }
1364 break;
1365
1366 case MEM:
1367 {
1368 rtx base = XEXP (in, 0);
1369 rtx offset;
1370
1371 switch (GET_CODE (base))
1372 {
1373 case REG:
1374 if (!reversed)
1375 {
1376 out[0] = adjust_automodify_address
1377 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1378 out[1] = adjust_automodify_address
1379 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1380 }
1381 else
1382 {
1383 /* Reversal requires a pre-increment, which can only
1384 be done as a separate insn. */
1385 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1386 out[0] = adjust_automodify_address
1387 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1388 out[1] = adjust_address (in, DImode, 0);
1389 }
1390 break;
1391
1392 case POST_INC:
1393 gcc_assert (!reversed && !dead);
1394
1395 /* Just do the increment in two steps. */
1396 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1397 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1398 break;
1399
1400 case POST_DEC:
1401 gcc_assert (!reversed && !dead);
1402
1403 /* Add 8, subtract 24. */
1404 base = XEXP (base, 0);
1405 out[0] = adjust_automodify_address
1406 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1407 out[1] = adjust_automodify_address
1408 (in, DImode,
1409 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1410 8);
1411 break;
1412
1413 case POST_MODIFY:
1414 gcc_assert (!reversed && !dead);
1415
1416 /* Extract and adjust the modification. This case is
1417 trickier than the others, because we might have an
1418 index register, or we might have a combined offset that
1419 doesn't fit a signed 9-bit displacement field. We can
1420 assume the incoming expression is already legitimate. */
1421 offset = XEXP (base, 1);
1422 base = XEXP (base, 0);
1423
1424 out[0] = adjust_automodify_address
1425 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1426
1427 if (GET_CODE (XEXP (offset, 1)) == REG)
1428 {
1429 /* Can't adjust the postmodify to match. Emit the
1430 original, then a separate addition insn. */
1431 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1432 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1433 }
1434 else
1435 {
1436 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1437 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1438 {
1439 /* Again the postmodify cannot be made to match,
1440 but in this case it's more efficient to get rid
1441 of the postmodify entirely and fix up with an
1442 add insn. */
1443 out[1] = adjust_automodify_address (in, DImode, base, 8);
1444 fixup = gen_adddi3
1445 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1446 }
1447 else
1448 {
1449 /* Combined offset still fits in the displacement field.
1450 (We cannot overflow it at the high end.) */
1451 out[1] = adjust_automodify_address
1452 (in, DImode, gen_rtx_POST_MODIFY
1453 (Pmode, base, gen_rtx_PLUS
1454 (Pmode, base,
1455 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1456 8);
1457 }
1458 }
1459 break;
1460
1461 default:
1462 gcc_unreachable ();
1463 }
1464 break;
1465 }
1466
1467 default:
1468 gcc_unreachable ();
1469 }
1470
1471 return fixup;
1472 }
1473
1474 /* Split a TImode or TFmode move instruction after reload.
1475 This is used by *movtf_internal and *movti_internal. */
1476 void
1477 ia64_split_tmode_move (rtx operands[])
1478 {
1479 rtx in[2], out[2], insn;
1480 rtx fixup[2];
1481 bool dead = false;
1482 bool reversed = false;
1483
1484 /* It is possible for reload to decide to overwrite a pointer with
1485 the value it points to. In that case we have to do the loads in
1486 the appropriate order so that the pointer is not destroyed too
1487 early. Also we must not generate a postmodify for that second
1488 load, or rws_access_regno will die. */
1489 if (GET_CODE (operands[1]) == MEM
1490 && reg_overlap_mentioned_p (operands[0], operands[1]))
1491 {
1492 rtx base = XEXP (operands[1], 0);
1493 while (GET_CODE (base) != REG)
1494 base = XEXP (base, 0);
1495
1496 if (REGNO (base) == REGNO (operands[0]))
1497 reversed = true;
1498 dead = true;
1499 }
1500 /* Another reason to do the moves in reversed order is if the first
1501 element of the target register pair is also the second element of
1502 the source register pair. */
1503 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1504 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1505 reversed = true;
1506
1507 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1508 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1509
1510 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1511 if (GET_CODE (EXP) == MEM \
1512 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1513 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1514 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1515 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1516
1517 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1518 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1519 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1520
1521 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1522 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1523 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1524
1525 if (fixup[0])
1526 emit_insn (fixup[0]);
1527 if (fixup[1])
1528 emit_insn (fixup[1]);
1529
1530 #undef MAYBE_ADD_REG_INC_NOTE
1531 }
1532
1533 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1534 through memory plus an extra GR scratch register. Except that you can
1535 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1536 SECONDARY_RELOAD_CLASS, but not both.
1537
1538 We got into problems in the first place by allowing a construct like
1539 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1540 This solution attempts to prevent this situation from occurring. When
1541 we see something like the above, we spill the inner register to memory. */
1542
1543 static rtx
1544 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1545 {
1546 if (GET_CODE (in) == SUBREG
1547 && GET_MODE (SUBREG_REG (in)) == TImode
1548 && GET_CODE (SUBREG_REG (in)) == REG)
1549 {
1550 rtx memt = assign_stack_temp (TImode, 16, 0);
1551 emit_move_insn (memt, SUBREG_REG (in));
1552 return adjust_address (memt, mode, 0);
1553 }
1554 else if (force && GET_CODE (in) == REG)
1555 {
1556 rtx memx = assign_stack_temp (mode, 16, 0);
1557 emit_move_insn (memx, in);
1558 return memx;
1559 }
1560 else
1561 return in;
1562 }
1563
1564 /* Expand the movxf or movrf pattern (MODE says which) with the given
1565 OPERANDS, returning true if the pattern should then invoke
1566 DONE. */
1567
1568 bool
1569 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1570 {
1571 rtx op0 = operands[0];
1572
1573 if (GET_CODE (op0) == SUBREG)
1574 op0 = SUBREG_REG (op0);
1575
1576 /* We must support XFmode loads into general registers for stdarg/vararg,
1577 unprototyped calls, and a rare case where a long double is passed as
1578 an argument after a float HFA fills the FP registers. We split them into
1579 DImode loads for convenience. We also need to support XFmode stores
1580 for the last case. This case does not happen for stdarg/vararg routines,
1581 because we do a block store to memory of unnamed arguments. */
1582
1583 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1584 {
1585 rtx out[2];
1586
1587 /* We're hoping to transform everything that deals with XFmode
1588 quantities and GR registers early in the compiler. */
1589 gcc_assert (can_create_pseudo_p ());
1590
1591 /* Struct to register can just use TImode instead. */
1592 if ((GET_CODE (operands[1]) == SUBREG
1593 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1594 || (GET_CODE (operands[1]) == REG
1595 && GR_REGNO_P (REGNO (operands[1]))))
1596 {
1597 rtx op1 = operands[1];
1598
1599 if (GET_CODE (op1) == SUBREG)
1600 op1 = SUBREG_REG (op1);
1601 else
1602 op1 = gen_rtx_REG (TImode, REGNO (op1));
1603
1604 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1605 return true;
1606 }
1607
1608 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1609 {
1610 /* Don't word-swap when reading in the constant. */
1611 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1612 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1613 0, mode));
1614 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1615 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1616 0, mode));
1617 return true;
1618 }
1619
1620 /* If the quantity is in a register not known to be GR, spill it. */
1621 if (register_operand (operands[1], mode))
1622 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1623
1624 gcc_assert (GET_CODE (operands[1]) == MEM);
1625
1626 /* Don't word-swap when reading in the value. */
1627 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1628 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1629
1630 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1631 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1632 return true;
1633 }
1634
1635 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1636 {
1637 /* We're hoping to transform everything that deals with XFmode
1638 quantities and GR registers early in the compiler. */
1639 gcc_assert (can_create_pseudo_p ());
1640
1641 /* Op0 can't be a GR_REG here, as that case is handled above.
1642 If op0 is a register, then we spill op1, so that we now have a
1643 MEM operand. This requires creating an XFmode subreg of a TImode reg
1644 to force the spill. */
1645 if (register_operand (operands[0], mode))
1646 {
1647 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1648 op1 = gen_rtx_SUBREG (mode, op1, 0);
1649 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1650 }
1651
1652 else
1653 {
1654 rtx in[2];
1655
1656 gcc_assert (GET_CODE (operands[0]) == MEM);
1657
1658 /* Don't word-swap when writing out the value. */
1659 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1660 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1661
1662 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1663 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1664 return true;
1665 }
1666 }
1667
1668 if (!reload_in_progress && !reload_completed)
1669 {
1670 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1671
1672 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1673 {
1674 rtx memt, memx, in = operands[1];
1675 if (CONSTANT_P (in))
1676 in = validize_mem (force_const_mem (mode, in));
1677 if (GET_CODE (in) == MEM)
1678 memt = adjust_address (in, TImode, 0);
1679 else
1680 {
1681 memt = assign_stack_temp (TImode, 16, 0);
1682 memx = adjust_address (memt, mode, 0);
1683 emit_move_insn (memx, in);
1684 }
1685 emit_move_insn (op0, memt);
1686 return true;
1687 }
1688
1689 if (!ia64_move_ok (operands[0], operands[1]))
1690 operands[1] = force_reg (mode, operands[1]);
1691 }
1692
1693 return false;
1694 }
1695
1696 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1697 with the expression that holds the compare result (in VOIDmode). */
1698
1699 static GTY(()) rtx cmptf_libfunc;
1700
1701 void
1702 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1703 {
1704 enum rtx_code code = GET_CODE (*expr);
1705 rtx cmp;
1706
1707 /* If we have a BImode input, then we already have a compare result, and
1708 do not need to emit another comparison. */
1709 if (GET_MODE (*op0) == BImode)
1710 {
1711 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1712 cmp = *op0;
1713 }
1714 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1715 magic number as its third argument, that indicates what to do.
1716 The return value is an integer to be compared against zero. */
1717 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1718 {
1719 enum qfcmp_magic {
1720 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1721 QCMP_UNORD = 2,
1722 QCMP_EQ = 4,
1723 QCMP_LT = 8,
1724 QCMP_GT = 16
1725 };
1726 int magic;
1727 enum rtx_code ncode;
1728 rtx ret, insns;
1729
1730 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1731 switch (code)
1732 {
1733 /* 1 = equal, 0 = not equal. Equality operators do
1734 not raise FP_INVALID when given an SNaN operand. */
1735 case EQ: magic = QCMP_EQ; ncode = NE; break;
1736 case NE: magic = QCMP_EQ; ncode = EQ; break;
1737 /* isunordered() from C99. */
1738 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1739 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1740 /* Relational operators raise FP_INVALID when given
1741 an SNaN operand. */
1742 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1743 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1744 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1745 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1746 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1747 Expanders for buneq etc. weuld have to be added to ia64.md
1748 for this to be useful. */
1749 default: gcc_unreachable ();
1750 }
1751
1752 start_sequence ();
1753
1754 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1755 *op0, TFmode, *op1, TFmode,
1756 GEN_INT (magic), DImode);
1757 cmp = gen_reg_rtx (BImode);
1758 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1759 gen_rtx_fmt_ee (ncode, BImode,
1760 ret, const0_rtx)));
1761
1762 insns = get_insns ();
1763 end_sequence ();
1764
1765 emit_libcall_block (insns, cmp, cmp,
1766 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1767 code = NE;
1768 }
1769 else
1770 {
1771 cmp = gen_reg_rtx (BImode);
1772 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1773 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1774 code = NE;
1775 }
1776
1777 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1778 *op0 = cmp;
1779 *op1 = const0_rtx;
1780 }
1781
1782 /* Generate an integral vector comparison. Return true if the condition has
1783 been reversed, and so the sense of the comparison should be inverted. */
1784
1785 static bool
1786 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1787 rtx dest, rtx op0, rtx op1)
1788 {
1789 bool negate = false;
1790 rtx x;
1791
1792 /* Canonicalize the comparison to EQ, GT, GTU. */
1793 switch (code)
1794 {
1795 case EQ:
1796 case GT:
1797 case GTU:
1798 break;
1799
1800 case NE:
1801 case LE:
1802 case LEU:
1803 code = reverse_condition (code);
1804 negate = true;
1805 break;
1806
1807 case GE:
1808 case GEU:
1809 code = reverse_condition (code);
1810 negate = true;
1811 /* FALLTHRU */
1812
1813 case LT:
1814 case LTU:
1815 code = swap_condition (code);
1816 x = op0, op0 = op1, op1 = x;
1817 break;
1818
1819 default:
1820 gcc_unreachable ();
1821 }
1822
1823 /* Unsigned parallel compare is not supported by the hardware. Play some
1824 tricks to turn this into a signed comparison against 0. */
1825 if (code == GTU)
1826 {
1827 switch (mode)
1828 {
1829 case V2SImode:
1830 {
1831 rtx t1, t2, mask;
1832
1833 /* Subtract (-(INT MAX) - 1) from both operands to make
1834 them signed. */
1835 mask = GEN_INT (0x80000000);
1836 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1837 mask = force_reg (mode, mask);
1838 t1 = gen_reg_rtx (mode);
1839 emit_insn (gen_subv2si3 (t1, op0, mask));
1840 t2 = gen_reg_rtx (mode);
1841 emit_insn (gen_subv2si3 (t2, op1, mask));
1842 op0 = t1;
1843 op1 = t2;
1844 code = GT;
1845 }
1846 break;
1847
1848 case V8QImode:
1849 case V4HImode:
1850 /* Perform a parallel unsigned saturating subtraction. */
1851 x = gen_reg_rtx (mode);
1852 emit_insn (gen_rtx_SET (VOIDmode, x,
1853 gen_rtx_US_MINUS (mode, op0, op1)));
1854
1855 code = EQ;
1856 op0 = x;
1857 op1 = CONST0_RTX (mode);
1858 negate = !negate;
1859 break;
1860
1861 default:
1862 gcc_unreachable ();
1863 }
1864 }
1865
1866 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1867 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1868
1869 return negate;
1870 }
1871
1872 /* Emit an integral vector conditional move. */
1873
1874 void
1875 ia64_expand_vecint_cmov (rtx operands[])
1876 {
1877 enum machine_mode mode = GET_MODE (operands[0]);
1878 enum rtx_code code = GET_CODE (operands[3]);
1879 bool negate;
1880 rtx cmp, x, ot, of;
1881
1882 cmp = gen_reg_rtx (mode);
1883 negate = ia64_expand_vecint_compare (code, mode, cmp,
1884 operands[4], operands[5]);
1885
1886 ot = operands[1+negate];
1887 of = operands[2-negate];
1888
1889 if (ot == CONST0_RTX (mode))
1890 {
1891 if (of == CONST0_RTX (mode))
1892 {
1893 emit_move_insn (operands[0], ot);
1894 return;
1895 }
1896
1897 x = gen_rtx_NOT (mode, cmp);
1898 x = gen_rtx_AND (mode, x, of);
1899 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1900 }
1901 else if (of == CONST0_RTX (mode))
1902 {
1903 x = gen_rtx_AND (mode, cmp, ot);
1904 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1905 }
1906 else
1907 {
1908 rtx t, f;
1909
1910 t = gen_reg_rtx (mode);
1911 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1912 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1913
1914 f = gen_reg_rtx (mode);
1915 x = gen_rtx_NOT (mode, cmp);
1916 x = gen_rtx_AND (mode, x, operands[2-negate]);
1917 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1918
1919 x = gen_rtx_IOR (mode, t, f);
1920 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1921 }
1922 }
1923
1924 /* Emit an integral vector min or max operation. Return true if all done. */
1925
1926 bool
1927 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1928 rtx operands[])
1929 {
1930 rtx xops[6];
1931
1932 /* These four combinations are supported directly. */
1933 if (mode == V8QImode && (code == UMIN || code == UMAX))
1934 return false;
1935 if (mode == V4HImode && (code == SMIN || code == SMAX))
1936 return false;
1937
1938 /* This combination can be implemented with only saturating subtraction. */
1939 if (mode == V4HImode && code == UMAX)
1940 {
1941 rtx x, tmp = gen_reg_rtx (mode);
1942
1943 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1944 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1945
1946 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1947 return true;
1948 }
1949
1950 /* Everything else implemented via vector comparisons. */
1951 xops[0] = operands[0];
1952 xops[4] = xops[1] = operands[1];
1953 xops[5] = xops[2] = operands[2];
1954
1955 switch (code)
1956 {
1957 case UMIN:
1958 code = LTU;
1959 break;
1960 case UMAX:
1961 code = GTU;
1962 break;
1963 case SMIN:
1964 code = LT;
1965 break;
1966 case SMAX:
1967 code = GT;
1968 break;
1969 default:
1970 gcc_unreachable ();
1971 }
1972 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1973
1974 ia64_expand_vecint_cmov (xops);
1975 return true;
1976 }
1977
1978 /* The vectors LO and HI each contain N halves of a double-wide vector.
1979 Reassemble either the first N/2 or the second N/2 elements. */
1980
1981 void
1982 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
1983 {
1984 enum machine_mode mode = GET_MODE (lo);
1985 rtx (*gen) (rtx, rtx, rtx);
1986 rtx x;
1987
1988 switch (mode)
1989 {
1990 case V8QImode:
1991 gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi;
1992 break;
1993 case V4HImode:
1994 gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
1995 break;
1996 default:
1997 gcc_unreachable ();
1998 }
1999
2000 x = gen_lowpart (mode, out);
2001 if (TARGET_BIG_ENDIAN)
2002 x = gen (x, hi, lo);
2003 else
2004 x = gen (x, lo, hi);
2005 emit_insn (x);
2006 }
2007
2008 /* Return a vector of the sign-extension of VEC. */
2009
2010 static rtx
2011 ia64_unpack_sign (rtx vec, bool unsignedp)
2012 {
2013 enum machine_mode mode = GET_MODE (vec);
2014 rtx zero = CONST0_RTX (mode);
2015
2016 if (unsignedp)
2017 return zero;
2018 else
2019 {
2020 rtx sign = gen_reg_rtx (mode);
2021 bool neg;
2022
2023 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2024 gcc_assert (!neg);
2025
2026 return sign;
2027 }
2028 }
2029
2030 /* Emit an integral vector unpack operation. */
2031
2032 void
2033 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2034 {
2035 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2036 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2037 }
2038
2039 /* Emit an integral vector widening sum operations. */
2040
2041 void
2042 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2043 {
2044 enum machine_mode wmode;
2045 rtx l, h, t, sign;
2046
2047 sign = ia64_unpack_sign (operands[1], unsignedp);
2048
2049 wmode = GET_MODE (operands[0]);
2050 l = gen_reg_rtx (wmode);
2051 h = gen_reg_rtx (wmode);
2052
2053 ia64_unpack_assemble (l, operands[1], sign, false);
2054 ia64_unpack_assemble (h, operands[1], sign, true);
2055
2056 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2057 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2058 if (t != operands[0])
2059 emit_move_insn (operands[0], t);
2060 }
2061
2062 /* Emit a signed or unsigned V8QI dot product operation. */
2063
2064 void
2065 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
2066 {
2067 rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
2068 rtx p1, p2, p3, p4, s1, s2, s3;
2069
2070 op1 = operands[1];
2071 op2 = operands[2];
2072 sn1 = ia64_unpack_sign (op1, unsignedp);
2073 sn2 = ia64_unpack_sign (op2, unsignedp);
2074
2075 l1 = gen_reg_rtx (V4HImode);
2076 l2 = gen_reg_rtx (V4HImode);
2077 h1 = gen_reg_rtx (V4HImode);
2078 h2 = gen_reg_rtx (V4HImode);
2079 ia64_unpack_assemble (l1, op1, sn1, false);
2080 ia64_unpack_assemble (l2, op2, sn2, false);
2081 ia64_unpack_assemble (h1, op1, sn1, true);
2082 ia64_unpack_assemble (h2, op2, sn2, true);
2083
2084 p1 = gen_reg_rtx (V2SImode);
2085 p2 = gen_reg_rtx (V2SImode);
2086 p3 = gen_reg_rtx (V2SImode);
2087 p4 = gen_reg_rtx (V2SImode);
2088 emit_insn (gen_pmpy2_even (p1, l1, l2));
2089 emit_insn (gen_pmpy2_even (p2, h1, h2));
2090 emit_insn (gen_pmpy2_odd (p3, l1, l2));
2091 emit_insn (gen_pmpy2_odd (p4, h1, h2));
2092
2093 s1 = gen_reg_rtx (V2SImode);
2094 s2 = gen_reg_rtx (V2SImode);
2095 s3 = gen_reg_rtx (V2SImode);
2096 emit_insn (gen_addv2si3 (s1, p1, p2));
2097 emit_insn (gen_addv2si3 (s2, p3, p4));
2098 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2099 emit_insn (gen_addv2si3 (operands[0], s2, s3));
2100 }
2101
2102 /* Emit the appropriate sequence for a call. */
2103
2104 void
2105 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2106 int sibcall_p)
2107 {
2108 rtx insn, b0;
2109
2110 addr = XEXP (addr, 0);
2111 addr = convert_memory_address (DImode, addr);
2112 b0 = gen_rtx_REG (DImode, R_BR (0));
2113
2114 /* ??? Should do this for functions known to bind local too. */
2115 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2116 {
2117 if (sibcall_p)
2118 insn = gen_sibcall_nogp (addr);
2119 else if (! retval)
2120 insn = gen_call_nogp (addr, b0);
2121 else
2122 insn = gen_call_value_nogp (retval, addr, b0);
2123 insn = emit_call_insn (insn);
2124 }
2125 else
2126 {
2127 if (sibcall_p)
2128 insn = gen_sibcall_gp (addr);
2129 else if (! retval)
2130 insn = gen_call_gp (addr, b0);
2131 else
2132 insn = gen_call_value_gp (retval, addr, b0);
2133 insn = emit_call_insn (insn);
2134
2135 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2136 }
2137
2138 if (sibcall_p)
2139 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2140
2141 if (TARGET_ABI_OPEN_VMS)
2142 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2143 gen_rtx_REG (DImode, GR_REG (25)));
2144 }
2145
2146 static void
2147 reg_emitted (enum ia64_frame_regs r)
2148 {
2149 if (emitted_frame_related_regs[r] == 0)
2150 emitted_frame_related_regs[r] = current_frame_info.r[r];
2151 else
2152 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2153 }
2154
2155 static int
2156 get_reg (enum ia64_frame_regs r)
2157 {
2158 reg_emitted (r);
2159 return current_frame_info.r[r];
2160 }
2161
2162 static bool
2163 is_emitted (int regno)
2164 {
2165 unsigned int r;
2166
2167 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2168 if (emitted_frame_related_regs[r] == regno)
2169 return true;
2170 return false;
2171 }
2172
2173 void
2174 ia64_reload_gp (void)
2175 {
2176 rtx tmp;
2177
2178 if (current_frame_info.r[reg_save_gp])
2179 {
2180 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2181 }
2182 else
2183 {
2184 HOST_WIDE_INT offset;
2185 rtx offset_r;
2186
2187 offset = (current_frame_info.spill_cfa_off
2188 + current_frame_info.spill_size);
2189 if (frame_pointer_needed)
2190 {
2191 tmp = hard_frame_pointer_rtx;
2192 offset = -offset;
2193 }
2194 else
2195 {
2196 tmp = stack_pointer_rtx;
2197 offset = current_frame_info.total_size - offset;
2198 }
2199
2200 offset_r = GEN_INT (offset);
2201 if (satisfies_constraint_I (offset_r))
2202 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2203 else
2204 {
2205 emit_move_insn (pic_offset_table_rtx, offset_r);
2206 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2207 pic_offset_table_rtx, tmp));
2208 }
2209
2210 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2211 }
2212
2213 emit_move_insn (pic_offset_table_rtx, tmp);
2214 }
2215
2216 void
2217 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2218 rtx scratch_b, int noreturn_p, int sibcall_p)
2219 {
2220 rtx insn;
2221 bool is_desc = false;
2222
2223 /* If we find we're calling through a register, then we're actually
2224 calling through a descriptor, so load up the values. */
2225 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2226 {
2227 rtx tmp;
2228 bool addr_dead_p;
2229
2230 /* ??? We are currently constrained to *not* use peep2, because
2231 we can legitimately change the global lifetime of the GP
2232 (in the form of killing where previously live). This is
2233 because a call through a descriptor doesn't use the previous
2234 value of the GP, while a direct call does, and we do not
2235 commit to either form until the split here.
2236
2237 That said, this means that we lack precise life info for
2238 whether ADDR is dead after this call. This is not terribly
2239 important, since we can fix things up essentially for free
2240 with the POST_DEC below, but it's nice to not use it when we
2241 can immediately tell it's not necessary. */
2242 addr_dead_p = ((noreturn_p || sibcall_p
2243 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2244 REGNO (addr)))
2245 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2246
2247 /* Load the code address into scratch_b. */
2248 tmp = gen_rtx_POST_INC (Pmode, addr);
2249 tmp = gen_rtx_MEM (Pmode, tmp);
2250 emit_move_insn (scratch_r, tmp);
2251 emit_move_insn (scratch_b, scratch_r);
2252
2253 /* Load the GP address. If ADDR is not dead here, then we must
2254 revert the change made above via the POST_INCREMENT. */
2255 if (!addr_dead_p)
2256 tmp = gen_rtx_POST_DEC (Pmode, addr);
2257 else
2258 tmp = addr;
2259 tmp = gen_rtx_MEM (Pmode, tmp);
2260 emit_move_insn (pic_offset_table_rtx, tmp);
2261
2262 is_desc = true;
2263 addr = scratch_b;
2264 }
2265
2266 if (sibcall_p)
2267 insn = gen_sibcall_nogp (addr);
2268 else if (retval)
2269 insn = gen_call_value_nogp (retval, addr, retaddr);
2270 else
2271 insn = gen_call_nogp (addr, retaddr);
2272 emit_call_insn (insn);
2273
2274 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2275 ia64_reload_gp ();
2276 }
2277
2278 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2279
2280 This differs from the generic code in that we know about the zero-extending
2281 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2282 also know that ld.acq+cmpxchg.rel equals a full barrier.
2283
2284 The loop we want to generate looks like
2285
2286 cmp_reg = mem;
2287 label:
2288 old_reg = cmp_reg;
2289 new_reg = cmp_reg op val;
2290 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2291 if (cmp_reg != old_reg)
2292 goto label;
2293
2294 Note that we only do the plain load from memory once. Subsequent
2295 iterations use the value loaded by the compare-and-swap pattern. */
2296
2297 void
2298 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2299 rtx old_dst, rtx new_dst)
2300 {
2301 enum machine_mode mode = GET_MODE (mem);
2302 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2303 enum insn_code icode;
2304
2305 /* Special case for using fetchadd. */
2306 if ((mode == SImode || mode == DImode)
2307 && (code == PLUS || code == MINUS)
2308 && fetchadd_operand (val, mode))
2309 {
2310 if (code == MINUS)
2311 val = GEN_INT (-INTVAL (val));
2312
2313 if (!old_dst)
2314 old_dst = gen_reg_rtx (mode);
2315
2316 emit_insn (gen_memory_barrier ());
2317
2318 if (mode == SImode)
2319 icode = CODE_FOR_fetchadd_acq_si;
2320 else
2321 icode = CODE_FOR_fetchadd_acq_di;
2322 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2323
2324 if (new_dst)
2325 {
2326 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2327 true, OPTAB_WIDEN);
2328 if (new_reg != new_dst)
2329 emit_move_insn (new_dst, new_reg);
2330 }
2331 return;
2332 }
2333
2334 /* Because of the volatile mem read, we get an ld.acq, which is the
2335 front half of the full barrier. The end half is the cmpxchg.rel. */
2336 gcc_assert (MEM_VOLATILE_P (mem));
2337
2338 old_reg = gen_reg_rtx (DImode);
2339 cmp_reg = gen_reg_rtx (DImode);
2340 label = gen_label_rtx ();
2341
2342 if (mode != DImode)
2343 {
2344 val = simplify_gen_subreg (DImode, val, mode, 0);
2345 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2346 }
2347 else
2348 emit_move_insn (cmp_reg, mem);
2349
2350 emit_label (label);
2351
2352 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2353 emit_move_insn (old_reg, cmp_reg);
2354 emit_move_insn (ar_ccv, cmp_reg);
2355
2356 if (old_dst)
2357 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2358
2359 new_reg = cmp_reg;
2360 if (code == NOT)
2361 {
2362 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2363 true, OPTAB_DIRECT);
2364 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2365 }
2366 else
2367 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2368 true, OPTAB_DIRECT);
2369
2370 if (mode != DImode)
2371 new_reg = gen_lowpart (mode, new_reg);
2372 if (new_dst)
2373 emit_move_insn (new_dst, new_reg);
2374
2375 switch (mode)
2376 {
2377 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2378 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2379 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2380 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2381 default:
2382 gcc_unreachable ();
2383 }
2384
2385 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2386
2387 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2388 }
2389 \f
2390 /* Begin the assembly file. */
2391
2392 static void
2393 ia64_file_start (void)
2394 {
2395 /* Variable tracking should be run after all optimizations which change order
2396 of insns. It also needs a valid CFG. This can't be done in
2397 ia64_option_override, because flag_var_tracking is finalized after
2398 that. */
2399 ia64_flag_var_tracking = flag_var_tracking;
2400 flag_var_tracking = 0;
2401
2402 default_file_start ();
2403 emit_safe_across_calls ();
2404 }
2405
2406 void
2407 emit_safe_across_calls (void)
2408 {
2409 unsigned int rs, re;
2410 int out_state;
2411
2412 rs = 1;
2413 out_state = 0;
2414 while (1)
2415 {
2416 while (rs < 64 && call_used_regs[PR_REG (rs)])
2417 rs++;
2418 if (rs >= 64)
2419 break;
2420 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2421 continue;
2422 if (out_state == 0)
2423 {
2424 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2425 out_state = 1;
2426 }
2427 else
2428 fputc (',', asm_out_file);
2429 if (re == rs + 1)
2430 fprintf (asm_out_file, "p%u", rs);
2431 else
2432 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2433 rs = re + 1;
2434 }
2435 if (out_state)
2436 fputc ('\n', asm_out_file);
2437 }
2438
2439 /* Globalize a declaration. */
2440
2441 static void
2442 ia64_globalize_decl_name (FILE * stream, tree decl)
2443 {
2444 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2445 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2446 if (version_attr)
2447 {
2448 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2449 const char *p = TREE_STRING_POINTER (v);
2450 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2451 }
2452 targetm.asm_out.globalize_label (stream, name);
2453 if (TREE_CODE (decl) == FUNCTION_DECL)
2454 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2455 }
2456
2457 /* Helper function for ia64_compute_frame_size: find an appropriate general
2458 register to spill some special register to. SPECIAL_SPILL_MASK contains
2459 bits in GR0 to GR31 that have already been allocated by this routine.
2460 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2461
2462 static int
2463 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2464 {
2465 int regno;
2466
2467 if (emitted_frame_related_regs[r] != 0)
2468 {
2469 regno = emitted_frame_related_regs[r];
2470 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2471 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2472 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2473 else if (current_function_is_leaf
2474 && regno >= GR_REG (1) && regno <= GR_REG (31))
2475 current_frame_info.gr_used_mask |= 1 << regno;
2476
2477 return regno;
2478 }
2479
2480 /* If this is a leaf function, first try an otherwise unused
2481 call-clobbered register. */
2482 if (current_function_is_leaf)
2483 {
2484 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2485 if (! df_regs_ever_live_p (regno)
2486 && call_used_regs[regno]
2487 && ! fixed_regs[regno]
2488 && ! global_regs[regno]
2489 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2490 && ! is_emitted (regno))
2491 {
2492 current_frame_info.gr_used_mask |= 1 << regno;
2493 return regno;
2494 }
2495 }
2496
2497 if (try_locals)
2498 {
2499 regno = current_frame_info.n_local_regs;
2500 /* If there is a frame pointer, then we can't use loc79, because
2501 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2502 reg_name switching code in ia64_expand_prologue. */
2503 while (regno < (80 - frame_pointer_needed))
2504 if (! is_emitted (LOC_REG (regno++)))
2505 {
2506 current_frame_info.n_local_regs = regno;
2507 return LOC_REG (regno - 1);
2508 }
2509 }
2510
2511 /* Failed to find a general register to spill to. Must use stack. */
2512 return 0;
2513 }
2514
2515 /* In order to make for nice schedules, we try to allocate every temporary
2516 to a different register. We must of course stay away from call-saved,
2517 fixed, and global registers. We must also stay away from registers
2518 allocated in current_frame_info.gr_used_mask, since those include regs
2519 used all through the prologue.
2520
2521 Any register allocated here must be used immediately. The idea is to
2522 aid scheduling, not to solve data flow problems. */
2523
2524 static int last_scratch_gr_reg;
2525
2526 static int
2527 next_scratch_gr_reg (void)
2528 {
2529 int i, regno;
2530
2531 for (i = 0; i < 32; ++i)
2532 {
2533 regno = (last_scratch_gr_reg + i + 1) & 31;
2534 if (call_used_regs[regno]
2535 && ! fixed_regs[regno]
2536 && ! global_regs[regno]
2537 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2538 {
2539 last_scratch_gr_reg = regno;
2540 return regno;
2541 }
2542 }
2543
2544 /* There must be _something_ available. */
2545 gcc_unreachable ();
2546 }
2547
2548 /* Helper function for ia64_compute_frame_size, called through
2549 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2550
2551 static void
2552 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2553 {
2554 unsigned int regno = REGNO (reg);
2555 if (regno < 32)
2556 {
2557 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2558 for (i = 0; i < n; ++i)
2559 current_frame_info.gr_used_mask |= 1 << (regno + i);
2560 }
2561 }
2562
2563
2564 /* Returns the number of bytes offset between the frame pointer and the stack
2565 pointer for the current function. SIZE is the number of bytes of space
2566 needed for local variables. */
2567
2568 static void
2569 ia64_compute_frame_size (HOST_WIDE_INT size)
2570 {
2571 HOST_WIDE_INT total_size;
2572 HOST_WIDE_INT spill_size = 0;
2573 HOST_WIDE_INT extra_spill_size = 0;
2574 HOST_WIDE_INT pretend_args_size;
2575 HARD_REG_SET mask;
2576 int n_spilled = 0;
2577 int spilled_gr_p = 0;
2578 int spilled_fr_p = 0;
2579 unsigned int regno;
2580 int min_regno;
2581 int max_regno;
2582 int i;
2583
2584 if (current_frame_info.initialized)
2585 return;
2586
2587 memset (&current_frame_info, 0, sizeof current_frame_info);
2588 CLEAR_HARD_REG_SET (mask);
2589
2590 /* Don't allocate scratches to the return register. */
2591 diddle_return_value (mark_reg_gr_used_mask, NULL);
2592
2593 /* Don't allocate scratches to the EH scratch registers. */
2594 if (cfun->machine->ia64_eh_epilogue_sp)
2595 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2596 if (cfun->machine->ia64_eh_epilogue_bsp)
2597 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2598
2599 /* Find the size of the register stack frame. We have only 80 local
2600 registers, because we reserve 8 for the inputs and 8 for the
2601 outputs. */
2602
2603 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2604 since we'll be adjusting that down later. */
2605 regno = LOC_REG (78) + ! frame_pointer_needed;
2606 for (; regno >= LOC_REG (0); regno--)
2607 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2608 break;
2609 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2610
2611 /* For functions marked with the syscall_linkage attribute, we must mark
2612 all eight input registers as in use, so that locals aren't visible to
2613 the caller. */
2614
2615 if (cfun->machine->n_varargs > 0
2616 || lookup_attribute ("syscall_linkage",
2617 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2618 current_frame_info.n_input_regs = 8;
2619 else
2620 {
2621 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2622 if (df_regs_ever_live_p (regno))
2623 break;
2624 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2625 }
2626
2627 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2628 if (df_regs_ever_live_p (regno))
2629 break;
2630 i = regno - OUT_REG (0) + 1;
2631
2632 #ifndef PROFILE_HOOK
2633 /* When -p profiling, we need one output register for the mcount argument.
2634 Likewise for -a profiling for the bb_init_func argument. For -ax
2635 profiling, we need two output registers for the two bb_init_trace_func
2636 arguments. */
2637 if (crtl->profile)
2638 i = MAX (i, 1);
2639 #endif
2640 current_frame_info.n_output_regs = i;
2641
2642 /* ??? No rotating register support yet. */
2643 current_frame_info.n_rotate_regs = 0;
2644
2645 /* Discover which registers need spilling, and how much room that
2646 will take. Begin with floating point and general registers,
2647 which will always wind up on the stack. */
2648
2649 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2650 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2651 {
2652 SET_HARD_REG_BIT (mask, regno);
2653 spill_size += 16;
2654 n_spilled += 1;
2655 spilled_fr_p = 1;
2656 }
2657
2658 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2659 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2660 {
2661 SET_HARD_REG_BIT (mask, regno);
2662 spill_size += 8;
2663 n_spilled += 1;
2664 spilled_gr_p = 1;
2665 }
2666
2667 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2668 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2669 {
2670 SET_HARD_REG_BIT (mask, regno);
2671 spill_size += 8;
2672 n_spilled += 1;
2673 }
2674
2675 /* Now come all special registers that might get saved in other
2676 general registers. */
2677
2678 if (frame_pointer_needed)
2679 {
2680 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2681 /* If we did not get a register, then we take LOC79. This is guaranteed
2682 to be free, even if regs_ever_live is already set, because this is
2683 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2684 as we don't count loc79 above. */
2685 if (current_frame_info.r[reg_fp] == 0)
2686 {
2687 current_frame_info.r[reg_fp] = LOC_REG (79);
2688 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2689 }
2690 }
2691
2692 if (! current_function_is_leaf)
2693 {
2694 /* Emit a save of BR0 if we call other functions. Do this even
2695 if this function doesn't return, as EH depends on this to be
2696 able to unwind the stack. */
2697 SET_HARD_REG_BIT (mask, BR_REG (0));
2698
2699 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2700 if (current_frame_info.r[reg_save_b0] == 0)
2701 {
2702 extra_spill_size += 8;
2703 n_spilled += 1;
2704 }
2705
2706 /* Similarly for ar.pfs. */
2707 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2708 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2709 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2710 {
2711 extra_spill_size += 8;
2712 n_spilled += 1;
2713 }
2714
2715 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2716 registers are clobbered, so we fall back to the stack. */
2717 current_frame_info.r[reg_save_gp]
2718 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2719 if (current_frame_info.r[reg_save_gp] == 0)
2720 {
2721 SET_HARD_REG_BIT (mask, GR_REG (1));
2722 spill_size += 8;
2723 n_spilled += 1;
2724 }
2725 }
2726 else
2727 {
2728 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2729 {
2730 SET_HARD_REG_BIT (mask, BR_REG (0));
2731 extra_spill_size += 8;
2732 n_spilled += 1;
2733 }
2734
2735 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2736 {
2737 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2738 current_frame_info.r[reg_save_ar_pfs]
2739 = find_gr_spill (reg_save_ar_pfs, 1);
2740 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2741 {
2742 extra_spill_size += 8;
2743 n_spilled += 1;
2744 }
2745 }
2746 }
2747
2748 /* Unwind descriptor hackery: things are most efficient if we allocate
2749 consecutive GR save registers for RP, PFS, FP in that order. However,
2750 it is absolutely critical that FP get the only hard register that's
2751 guaranteed to be free, so we allocated it first. If all three did
2752 happen to be allocated hard regs, and are consecutive, rearrange them
2753 into the preferred order now.
2754
2755 If we have already emitted code for any of those registers,
2756 then it's already too late to change. */
2757 min_regno = MIN (current_frame_info.r[reg_fp],
2758 MIN (current_frame_info.r[reg_save_b0],
2759 current_frame_info.r[reg_save_ar_pfs]));
2760 max_regno = MAX (current_frame_info.r[reg_fp],
2761 MAX (current_frame_info.r[reg_save_b0],
2762 current_frame_info.r[reg_save_ar_pfs]));
2763 if (min_regno > 0
2764 && min_regno + 2 == max_regno
2765 && (current_frame_info.r[reg_fp] == min_regno + 1
2766 || current_frame_info.r[reg_save_b0] == min_regno + 1
2767 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2768 && (emitted_frame_related_regs[reg_save_b0] == 0
2769 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2770 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2771 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2772 && (emitted_frame_related_regs[reg_fp] == 0
2773 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2774 {
2775 current_frame_info.r[reg_save_b0] = min_regno;
2776 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2777 current_frame_info.r[reg_fp] = min_regno + 2;
2778 }
2779
2780 /* See if we need to store the predicate register block. */
2781 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2782 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2783 break;
2784 if (regno <= PR_REG (63))
2785 {
2786 SET_HARD_REG_BIT (mask, PR_REG (0));
2787 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2788 if (current_frame_info.r[reg_save_pr] == 0)
2789 {
2790 extra_spill_size += 8;
2791 n_spilled += 1;
2792 }
2793
2794 /* ??? Mark them all as used so that register renaming and such
2795 are free to use them. */
2796 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2797 df_set_regs_ever_live (regno, true);
2798 }
2799
2800 /* If we're forced to use st8.spill, we're forced to save and restore
2801 ar.unat as well. The check for existing liveness allows inline asm
2802 to touch ar.unat. */
2803 if (spilled_gr_p || cfun->machine->n_varargs
2804 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2805 {
2806 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2807 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2808 current_frame_info.r[reg_save_ar_unat]
2809 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2810 if (current_frame_info.r[reg_save_ar_unat] == 0)
2811 {
2812 extra_spill_size += 8;
2813 n_spilled += 1;
2814 }
2815 }
2816
2817 if (df_regs_ever_live_p (AR_LC_REGNUM))
2818 {
2819 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2820 current_frame_info.r[reg_save_ar_lc]
2821 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2822 if (current_frame_info.r[reg_save_ar_lc] == 0)
2823 {
2824 extra_spill_size += 8;
2825 n_spilled += 1;
2826 }
2827 }
2828
2829 /* If we have an odd number of words of pretend arguments written to
2830 the stack, then the FR save area will be unaligned. We round the
2831 size of this area up to keep things 16 byte aligned. */
2832 if (spilled_fr_p)
2833 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2834 else
2835 pretend_args_size = crtl->args.pretend_args_size;
2836
2837 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2838 + crtl->outgoing_args_size);
2839 total_size = IA64_STACK_ALIGN (total_size);
2840
2841 /* We always use the 16-byte scratch area provided by the caller, but
2842 if we are a leaf function, there's no one to which we need to provide
2843 a scratch area. */
2844 if (current_function_is_leaf)
2845 total_size = MAX (0, total_size - 16);
2846
2847 current_frame_info.total_size = total_size;
2848 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2849 current_frame_info.spill_size = spill_size;
2850 current_frame_info.extra_spill_size = extra_spill_size;
2851 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2852 current_frame_info.n_spilled = n_spilled;
2853 current_frame_info.initialized = reload_completed;
2854 }
2855
2856 /* Worker function for TARGET_CAN_ELIMINATE. */
2857
2858 bool
2859 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2860 {
2861 return (to == BR_REG (0) ? current_function_is_leaf : true);
2862 }
2863
2864 /* Compute the initial difference between the specified pair of registers. */
2865
2866 HOST_WIDE_INT
2867 ia64_initial_elimination_offset (int from, int to)
2868 {
2869 HOST_WIDE_INT offset;
2870
2871 ia64_compute_frame_size (get_frame_size ());
2872 switch (from)
2873 {
2874 case FRAME_POINTER_REGNUM:
2875 switch (to)
2876 {
2877 case HARD_FRAME_POINTER_REGNUM:
2878 if (current_function_is_leaf)
2879 offset = -current_frame_info.total_size;
2880 else
2881 offset = -(current_frame_info.total_size
2882 - crtl->outgoing_args_size - 16);
2883 break;
2884
2885 case STACK_POINTER_REGNUM:
2886 if (current_function_is_leaf)
2887 offset = 0;
2888 else
2889 offset = 16 + crtl->outgoing_args_size;
2890 break;
2891
2892 default:
2893 gcc_unreachable ();
2894 }
2895 break;
2896
2897 case ARG_POINTER_REGNUM:
2898 /* Arguments start above the 16 byte save area, unless stdarg
2899 in which case we store through the 16 byte save area. */
2900 switch (to)
2901 {
2902 case HARD_FRAME_POINTER_REGNUM:
2903 offset = 16 - crtl->args.pretend_args_size;
2904 break;
2905
2906 case STACK_POINTER_REGNUM:
2907 offset = (current_frame_info.total_size
2908 + 16 - crtl->args.pretend_args_size);
2909 break;
2910
2911 default:
2912 gcc_unreachable ();
2913 }
2914 break;
2915
2916 default:
2917 gcc_unreachable ();
2918 }
2919
2920 return offset;
2921 }
2922
2923 /* If there are more than a trivial number of register spills, we use
2924 two interleaved iterators so that we can get two memory references
2925 per insn group.
2926
2927 In order to simplify things in the prologue and epilogue expanders,
2928 we use helper functions to fix up the memory references after the
2929 fact with the appropriate offsets to a POST_MODIFY memory mode.
2930 The following data structure tracks the state of the two iterators
2931 while insns are being emitted. */
2932
2933 struct spill_fill_data
2934 {
2935 rtx init_after; /* point at which to emit initializations */
2936 rtx init_reg[2]; /* initial base register */
2937 rtx iter_reg[2]; /* the iterator registers */
2938 rtx *prev_addr[2]; /* address of last memory use */
2939 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2940 HOST_WIDE_INT prev_off[2]; /* last offset */
2941 int n_iter; /* number of iterators in use */
2942 int next_iter; /* next iterator to use */
2943 unsigned int save_gr_used_mask;
2944 };
2945
2946 static struct spill_fill_data spill_fill_data;
2947
2948 static void
2949 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2950 {
2951 int i;
2952
2953 spill_fill_data.init_after = get_last_insn ();
2954 spill_fill_data.init_reg[0] = init_reg;
2955 spill_fill_data.init_reg[1] = init_reg;
2956 spill_fill_data.prev_addr[0] = NULL;
2957 spill_fill_data.prev_addr[1] = NULL;
2958 spill_fill_data.prev_insn[0] = NULL;
2959 spill_fill_data.prev_insn[1] = NULL;
2960 spill_fill_data.prev_off[0] = cfa_off;
2961 spill_fill_data.prev_off[1] = cfa_off;
2962 spill_fill_data.next_iter = 0;
2963 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2964
2965 spill_fill_data.n_iter = 1 + (n_spills > 2);
2966 for (i = 0; i < spill_fill_data.n_iter; ++i)
2967 {
2968 int regno = next_scratch_gr_reg ();
2969 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2970 current_frame_info.gr_used_mask |= 1 << regno;
2971 }
2972 }
2973
2974 static void
2975 finish_spill_pointers (void)
2976 {
2977 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2978 }
2979
2980 static rtx
2981 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2982 {
2983 int iter = spill_fill_data.next_iter;
2984 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2985 rtx disp_rtx = GEN_INT (disp);
2986 rtx mem;
2987
2988 if (spill_fill_data.prev_addr[iter])
2989 {
2990 if (satisfies_constraint_N (disp_rtx))
2991 {
2992 *spill_fill_data.prev_addr[iter]
2993 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2994 gen_rtx_PLUS (DImode,
2995 spill_fill_data.iter_reg[iter],
2996 disp_rtx));
2997 add_reg_note (spill_fill_data.prev_insn[iter],
2998 REG_INC, spill_fill_data.iter_reg[iter]);
2999 }
3000 else
3001 {
3002 /* ??? Could use register post_modify for loads. */
3003 if (!satisfies_constraint_I (disp_rtx))
3004 {
3005 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3006 emit_move_insn (tmp, disp_rtx);
3007 disp_rtx = tmp;
3008 }
3009 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3010 spill_fill_data.iter_reg[iter], disp_rtx));
3011 }
3012 }
3013 /* Micro-optimization: if we've created a frame pointer, it's at
3014 CFA 0, which may allow the real iterator to be initialized lower,
3015 slightly increasing parallelism. Also, if there are few saves
3016 it may eliminate the iterator entirely. */
3017 else if (disp == 0
3018 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3019 && frame_pointer_needed)
3020 {
3021 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3022 set_mem_alias_set (mem, get_varargs_alias_set ());
3023 return mem;
3024 }
3025 else
3026 {
3027 rtx seq, insn;
3028
3029 if (disp == 0)
3030 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3031 spill_fill_data.init_reg[iter]);
3032 else
3033 {
3034 start_sequence ();
3035
3036 if (!satisfies_constraint_I (disp_rtx))
3037 {
3038 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3039 emit_move_insn (tmp, disp_rtx);
3040 disp_rtx = tmp;
3041 }
3042
3043 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3044 spill_fill_data.init_reg[iter],
3045 disp_rtx));
3046
3047 seq = get_insns ();
3048 end_sequence ();
3049 }
3050
3051 /* Careful for being the first insn in a sequence. */
3052 if (spill_fill_data.init_after)
3053 insn = emit_insn_after (seq, spill_fill_data.init_after);
3054 else
3055 {
3056 rtx first = get_insns ();
3057 if (first)
3058 insn = emit_insn_before (seq, first);
3059 else
3060 insn = emit_insn (seq);
3061 }
3062 spill_fill_data.init_after = insn;
3063 }
3064
3065 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3066
3067 /* ??? Not all of the spills are for varargs, but some of them are.
3068 The rest of the spills belong in an alias set of their own. But
3069 it doesn't actually hurt to include them here. */
3070 set_mem_alias_set (mem, get_varargs_alias_set ());
3071
3072 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3073 spill_fill_data.prev_off[iter] = cfa_off;
3074
3075 if (++iter >= spill_fill_data.n_iter)
3076 iter = 0;
3077 spill_fill_data.next_iter = iter;
3078
3079 return mem;
3080 }
3081
3082 static void
3083 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3084 rtx frame_reg)
3085 {
3086 int iter = spill_fill_data.next_iter;
3087 rtx mem, insn;
3088
3089 mem = spill_restore_mem (reg, cfa_off);
3090 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3091 spill_fill_data.prev_insn[iter] = insn;
3092
3093 if (frame_reg)
3094 {
3095 rtx base;
3096 HOST_WIDE_INT off;
3097
3098 RTX_FRAME_RELATED_P (insn) = 1;
3099
3100 /* Don't even pretend that the unwind code can intuit its way
3101 through a pair of interleaved post_modify iterators. Just
3102 provide the correct answer. */
3103
3104 if (frame_pointer_needed)
3105 {
3106 base = hard_frame_pointer_rtx;
3107 off = - cfa_off;
3108 }
3109 else
3110 {
3111 base = stack_pointer_rtx;
3112 off = current_frame_info.total_size - cfa_off;
3113 }
3114
3115 add_reg_note (insn, REG_CFA_OFFSET,
3116 gen_rtx_SET (VOIDmode,
3117 gen_rtx_MEM (GET_MODE (reg),
3118 plus_constant (base, off)),
3119 frame_reg));
3120 }
3121 }
3122
3123 static void
3124 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3125 {
3126 int iter = spill_fill_data.next_iter;
3127 rtx insn;
3128
3129 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3130 GEN_INT (cfa_off)));
3131 spill_fill_data.prev_insn[iter] = insn;
3132 }
3133
3134 /* Wrapper functions that discards the CONST_INT spill offset. These
3135 exist so that we can give gr_spill/gr_fill the offset they need and
3136 use a consistent function interface. */
3137
3138 static rtx
3139 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3140 {
3141 return gen_movdi (dest, src);
3142 }
3143
3144 static rtx
3145 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3146 {
3147 return gen_fr_spill (dest, src);
3148 }
3149
3150 static rtx
3151 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3152 {
3153 return gen_fr_restore (dest, src);
3154 }
3155
3156 /* Called after register allocation to add any instructions needed for the
3157 prologue. Using a prologue insn is favored compared to putting all of the
3158 instructions in output_function_prologue(), since it allows the scheduler
3159 to intermix instructions with the saves of the caller saved registers. In
3160 some cases, it might be necessary to emit a barrier instruction as the last
3161 insn to prevent such scheduling.
3162
3163 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3164 so that the debug info generation code can handle them properly.
3165
3166 The register save area is layed out like so:
3167 cfa+16
3168 [ varargs spill area ]
3169 [ fr register spill area ]
3170 [ br register spill area ]
3171 [ ar register spill area ]
3172 [ pr register spill area ]
3173 [ gr register spill area ] */
3174
3175 /* ??? Get inefficient code when the frame size is larger than can fit in an
3176 adds instruction. */
3177
3178 void
3179 ia64_expand_prologue (void)
3180 {
3181 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3182 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3183 rtx reg, alt_reg;
3184
3185 ia64_compute_frame_size (get_frame_size ());
3186 last_scratch_gr_reg = 15;
3187
3188 if (flag_stack_usage)
3189 current_function_static_stack_size = current_frame_info.total_size;
3190
3191 if (dump_file)
3192 {
3193 fprintf (dump_file, "ia64 frame related registers "
3194 "recorded in current_frame_info.r[]:\n");
3195 #define PRINTREG(a) if (current_frame_info.r[a]) \
3196 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3197 PRINTREG(reg_fp);
3198 PRINTREG(reg_save_b0);
3199 PRINTREG(reg_save_pr);
3200 PRINTREG(reg_save_ar_pfs);
3201 PRINTREG(reg_save_ar_unat);
3202 PRINTREG(reg_save_ar_lc);
3203 PRINTREG(reg_save_gp);
3204 #undef PRINTREG
3205 }
3206
3207 /* If there is no epilogue, then we don't need some prologue insns.
3208 We need to avoid emitting the dead prologue insns, because flow
3209 will complain about them. */
3210 if (optimize)
3211 {
3212 edge e;
3213 edge_iterator ei;
3214
3215 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3216 if ((e->flags & EDGE_FAKE) == 0
3217 && (e->flags & EDGE_FALLTHRU) != 0)
3218 break;
3219 epilogue_p = (e != NULL);
3220 }
3221 else
3222 epilogue_p = 1;
3223
3224 /* Set the local, input, and output register names. We need to do this
3225 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3226 half. If we use in/loc/out register names, then we get assembler errors
3227 in crtn.S because there is no alloc insn or regstk directive in there. */
3228 if (! TARGET_REG_NAMES)
3229 {
3230 int inputs = current_frame_info.n_input_regs;
3231 int locals = current_frame_info.n_local_regs;
3232 int outputs = current_frame_info.n_output_regs;
3233
3234 for (i = 0; i < inputs; i++)
3235 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3236 for (i = 0; i < locals; i++)
3237 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3238 for (i = 0; i < outputs; i++)
3239 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3240 }
3241
3242 /* Set the frame pointer register name. The regnum is logically loc79,
3243 but of course we'll not have allocated that many locals. Rather than
3244 worrying about renumbering the existing rtxs, we adjust the name. */
3245 /* ??? This code means that we can never use one local register when
3246 there is a frame pointer. loc79 gets wasted in this case, as it is
3247 renamed to a register that will never be used. See also the try_locals
3248 code in find_gr_spill. */
3249 if (current_frame_info.r[reg_fp])
3250 {
3251 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3252 reg_names[HARD_FRAME_POINTER_REGNUM]
3253 = reg_names[current_frame_info.r[reg_fp]];
3254 reg_names[current_frame_info.r[reg_fp]] = tmp;
3255 }
3256
3257 /* We don't need an alloc instruction if we've used no outputs or locals. */
3258 if (current_frame_info.n_local_regs == 0
3259 && current_frame_info.n_output_regs == 0
3260 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3261 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3262 {
3263 /* If there is no alloc, but there are input registers used, then we
3264 need a .regstk directive. */
3265 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3266 ar_pfs_save_reg = NULL_RTX;
3267 }
3268 else
3269 {
3270 current_frame_info.need_regstk = 0;
3271
3272 if (current_frame_info.r[reg_save_ar_pfs])
3273 {
3274 regno = current_frame_info.r[reg_save_ar_pfs];
3275 reg_emitted (reg_save_ar_pfs);
3276 }
3277 else
3278 regno = next_scratch_gr_reg ();
3279 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3280
3281 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3282 GEN_INT (current_frame_info.n_input_regs),
3283 GEN_INT (current_frame_info.n_local_regs),
3284 GEN_INT (current_frame_info.n_output_regs),
3285 GEN_INT (current_frame_info.n_rotate_regs)));
3286 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3287 }
3288
3289 /* Set up frame pointer, stack pointer, and spill iterators. */
3290
3291 n_varargs = cfun->machine->n_varargs;
3292 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3293 stack_pointer_rtx, 0);
3294
3295 if (frame_pointer_needed)
3296 {
3297 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3298 RTX_FRAME_RELATED_P (insn) = 1;
3299
3300 /* Force the unwind info to recognize this as defining a new CFA,
3301 rather than some temp register setup. */
3302 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3303 }
3304
3305 if (current_frame_info.total_size != 0)
3306 {
3307 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3308 rtx offset;
3309
3310 if (satisfies_constraint_I (frame_size_rtx))
3311 offset = frame_size_rtx;
3312 else
3313 {
3314 regno = next_scratch_gr_reg ();
3315 offset = gen_rtx_REG (DImode, regno);
3316 emit_move_insn (offset, frame_size_rtx);
3317 }
3318
3319 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3320 stack_pointer_rtx, offset));
3321
3322 if (! frame_pointer_needed)
3323 {
3324 RTX_FRAME_RELATED_P (insn) = 1;
3325 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3326 gen_rtx_SET (VOIDmode,
3327 stack_pointer_rtx,
3328 gen_rtx_PLUS (DImode,
3329 stack_pointer_rtx,
3330 frame_size_rtx)));
3331 }
3332
3333 /* ??? At this point we must generate a magic insn that appears to
3334 modify the stack pointer, the frame pointer, and all spill
3335 iterators. This would allow the most scheduling freedom. For
3336 now, just hard stop. */
3337 emit_insn (gen_blockage ());
3338 }
3339
3340 /* Must copy out ar.unat before doing any integer spills. */
3341 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3342 {
3343 if (current_frame_info.r[reg_save_ar_unat])
3344 {
3345 ar_unat_save_reg
3346 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3347 reg_emitted (reg_save_ar_unat);
3348 }
3349 else
3350 {
3351 alt_regno = next_scratch_gr_reg ();
3352 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3353 current_frame_info.gr_used_mask |= 1 << alt_regno;
3354 }
3355
3356 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3357 insn = emit_move_insn (ar_unat_save_reg, reg);
3358 if (current_frame_info.r[reg_save_ar_unat])
3359 {
3360 RTX_FRAME_RELATED_P (insn) = 1;
3361 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3362 }
3363
3364 /* Even if we're not going to generate an epilogue, we still
3365 need to save the register so that EH works. */
3366 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3367 emit_insn (gen_prologue_use (ar_unat_save_reg));
3368 }
3369 else
3370 ar_unat_save_reg = NULL_RTX;
3371
3372 /* Spill all varargs registers. Do this before spilling any GR registers,
3373 since we want the UNAT bits for the GR registers to override the UNAT
3374 bits from varargs, which we don't care about. */
3375
3376 cfa_off = -16;
3377 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3378 {
3379 reg = gen_rtx_REG (DImode, regno);
3380 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3381 }
3382
3383 /* Locate the bottom of the register save area. */
3384 cfa_off = (current_frame_info.spill_cfa_off
3385 + current_frame_info.spill_size
3386 + current_frame_info.extra_spill_size);
3387
3388 /* Save the predicate register block either in a register or in memory. */
3389 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3390 {
3391 reg = gen_rtx_REG (DImode, PR_REG (0));
3392 if (current_frame_info.r[reg_save_pr] != 0)
3393 {
3394 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3395 reg_emitted (reg_save_pr);
3396 insn = emit_move_insn (alt_reg, reg);
3397
3398 /* ??? Denote pr spill/fill by a DImode move that modifies all
3399 64 hard registers. */
3400 RTX_FRAME_RELATED_P (insn) = 1;
3401 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3402
3403 /* Even if we're not going to generate an epilogue, we still
3404 need to save the register so that EH works. */
3405 if (! epilogue_p)
3406 emit_insn (gen_prologue_use (alt_reg));
3407 }
3408 else
3409 {
3410 alt_regno = next_scratch_gr_reg ();
3411 alt_reg = gen_rtx_REG (DImode, alt_regno);
3412 insn = emit_move_insn (alt_reg, reg);
3413 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3414 cfa_off -= 8;
3415 }
3416 }
3417
3418 /* Handle AR regs in numerical order. All of them get special handling. */
3419 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3420 && current_frame_info.r[reg_save_ar_unat] == 0)
3421 {
3422 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3423 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3424 cfa_off -= 8;
3425 }
3426
3427 /* The alloc insn already copied ar.pfs into a general register. The
3428 only thing we have to do now is copy that register to a stack slot
3429 if we'd not allocated a local register for the job. */
3430 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3431 && current_frame_info.r[reg_save_ar_pfs] == 0)
3432 {
3433 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3434 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3435 cfa_off -= 8;
3436 }
3437
3438 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3439 {
3440 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3441 if (current_frame_info.r[reg_save_ar_lc] != 0)
3442 {
3443 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3444 reg_emitted (reg_save_ar_lc);
3445 insn = emit_move_insn (alt_reg, reg);
3446 RTX_FRAME_RELATED_P (insn) = 1;
3447 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3448
3449 /* Even if we're not going to generate an epilogue, we still
3450 need to save the register so that EH works. */
3451 if (! epilogue_p)
3452 emit_insn (gen_prologue_use (alt_reg));
3453 }
3454 else
3455 {
3456 alt_regno = next_scratch_gr_reg ();
3457 alt_reg = gen_rtx_REG (DImode, alt_regno);
3458 emit_move_insn (alt_reg, reg);
3459 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3460 cfa_off -= 8;
3461 }
3462 }
3463
3464 /* Save the return pointer. */
3465 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3466 {
3467 reg = gen_rtx_REG (DImode, BR_REG (0));
3468 if (current_frame_info.r[reg_save_b0] != 0)
3469 {
3470 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3471 reg_emitted (reg_save_b0);
3472 insn = emit_move_insn (alt_reg, reg);
3473 RTX_FRAME_RELATED_P (insn) = 1;
3474 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3475
3476 /* Even if we're not going to generate an epilogue, we still
3477 need to save the register so that EH works. */
3478 if (! epilogue_p)
3479 emit_insn (gen_prologue_use (alt_reg));
3480 }
3481 else
3482 {
3483 alt_regno = next_scratch_gr_reg ();
3484 alt_reg = gen_rtx_REG (DImode, alt_regno);
3485 emit_move_insn (alt_reg, reg);
3486 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3487 cfa_off -= 8;
3488 }
3489 }
3490
3491 if (current_frame_info.r[reg_save_gp])
3492 {
3493 reg_emitted (reg_save_gp);
3494 insn = emit_move_insn (gen_rtx_REG (DImode,
3495 current_frame_info.r[reg_save_gp]),
3496 pic_offset_table_rtx);
3497 }
3498
3499 /* We should now be at the base of the gr/br/fr spill area. */
3500 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3501 + current_frame_info.spill_size));
3502
3503 /* Spill all general registers. */
3504 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3505 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3506 {
3507 reg = gen_rtx_REG (DImode, regno);
3508 do_spill (gen_gr_spill, reg, cfa_off, reg);
3509 cfa_off -= 8;
3510 }
3511
3512 /* Spill the rest of the BR registers. */
3513 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3514 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3515 {
3516 alt_regno = next_scratch_gr_reg ();
3517 alt_reg = gen_rtx_REG (DImode, alt_regno);
3518 reg = gen_rtx_REG (DImode, regno);
3519 emit_move_insn (alt_reg, reg);
3520 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3521 cfa_off -= 8;
3522 }
3523
3524 /* Align the frame and spill all FR registers. */
3525 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3526 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3527 {
3528 gcc_assert (!(cfa_off & 15));
3529 reg = gen_rtx_REG (XFmode, regno);
3530 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3531 cfa_off -= 16;
3532 }
3533
3534 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3535
3536 finish_spill_pointers ();
3537 }
3538
3539 /* Output the textual info surrounding the prologue. */
3540
3541 void
3542 ia64_start_function (FILE *file, const char *fnname,
3543 tree decl ATTRIBUTE_UNUSED)
3544 {
3545 #if VMS_DEBUGGING_INFO
3546 if (vms_debug_main
3547 && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
3548 {
3549 targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
3550 ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
3551 dwarf2out_vms_debug_main_pointer ();
3552 vms_debug_main = 0;
3553 }
3554 #endif
3555
3556 fputs ("\t.proc ", file);
3557 assemble_name (file, fnname);
3558 fputc ('\n', file);
3559 ASM_OUTPUT_LABEL (file, fnname);
3560 }
3561
3562 /* Called after register allocation to add any instructions needed for the
3563 epilogue. Using an epilogue insn is favored compared to putting all of the
3564 instructions in output_function_prologue(), since it allows the scheduler
3565 to intermix instructions with the saves of the caller saved registers. In
3566 some cases, it might be necessary to emit a barrier instruction as the last
3567 insn to prevent such scheduling. */
3568
3569 void
3570 ia64_expand_epilogue (int sibcall_p)
3571 {
3572 rtx insn, reg, alt_reg, ar_unat_save_reg;
3573 int regno, alt_regno, cfa_off;
3574
3575 ia64_compute_frame_size (get_frame_size ());
3576
3577 /* If there is a frame pointer, then we use it instead of the stack
3578 pointer, so that the stack pointer does not need to be valid when
3579 the epilogue starts. See EXIT_IGNORE_STACK. */
3580 if (frame_pointer_needed)
3581 setup_spill_pointers (current_frame_info.n_spilled,
3582 hard_frame_pointer_rtx, 0);
3583 else
3584 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3585 current_frame_info.total_size);
3586
3587 if (current_frame_info.total_size != 0)
3588 {
3589 /* ??? At this point we must generate a magic insn that appears to
3590 modify the spill iterators and the frame pointer. This would
3591 allow the most scheduling freedom. For now, just hard stop. */
3592 emit_insn (gen_blockage ());
3593 }
3594
3595 /* Locate the bottom of the register save area. */
3596 cfa_off = (current_frame_info.spill_cfa_off
3597 + current_frame_info.spill_size
3598 + current_frame_info.extra_spill_size);
3599
3600 /* Restore the predicate registers. */
3601 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3602 {
3603 if (current_frame_info.r[reg_save_pr] != 0)
3604 {
3605 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3606 reg_emitted (reg_save_pr);
3607 }
3608 else
3609 {
3610 alt_regno = next_scratch_gr_reg ();
3611 alt_reg = gen_rtx_REG (DImode, alt_regno);
3612 do_restore (gen_movdi_x, alt_reg, cfa_off);
3613 cfa_off -= 8;
3614 }
3615 reg = gen_rtx_REG (DImode, PR_REG (0));
3616 emit_move_insn (reg, alt_reg);
3617 }
3618
3619 /* Restore the application registers. */
3620
3621 /* Load the saved unat from the stack, but do not restore it until
3622 after the GRs have been restored. */
3623 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3624 {
3625 if (current_frame_info.r[reg_save_ar_unat] != 0)
3626 {
3627 ar_unat_save_reg
3628 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3629 reg_emitted (reg_save_ar_unat);
3630 }
3631 else
3632 {
3633 alt_regno = next_scratch_gr_reg ();
3634 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3635 current_frame_info.gr_used_mask |= 1 << alt_regno;
3636 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3637 cfa_off -= 8;
3638 }
3639 }
3640 else
3641 ar_unat_save_reg = NULL_RTX;
3642
3643 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3644 {
3645 reg_emitted (reg_save_ar_pfs);
3646 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3647 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3648 emit_move_insn (reg, alt_reg);
3649 }
3650 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3651 {
3652 alt_regno = next_scratch_gr_reg ();
3653 alt_reg = gen_rtx_REG (DImode, alt_regno);
3654 do_restore (gen_movdi_x, alt_reg, cfa_off);
3655 cfa_off -= 8;
3656 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3657 emit_move_insn (reg, alt_reg);
3658 }
3659
3660 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3661 {
3662 if (current_frame_info.r[reg_save_ar_lc] != 0)
3663 {
3664 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3665 reg_emitted (reg_save_ar_lc);
3666 }
3667 else
3668 {
3669 alt_regno = next_scratch_gr_reg ();
3670 alt_reg = gen_rtx_REG (DImode, alt_regno);
3671 do_restore (gen_movdi_x, alt_reg, cfa_off);
3672 cfa_off -= 8;
3673 }
3674 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3675 emit_move_insn (reg, alt_reg);
3676 }
3677
3678 /* Restore the return pointer. */
3679 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3680 {
3681 if (current_frame_info.r[reg_save_b0] != 0)
3682 {
3683 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3684 reg_emitted (reg_save_b0);
3685 }
3686 else
3687 {
3688 alt_regno = next_scratch_gr_reg ();
3689 alt_reg = gen_rtx_REG (DImode, alt_regno);
3690 do_restore (gen_movdi_x, alt_reg, cfa_off);
3691 cfa_off -= 8;
3692 }
3693 reg = gen_rtx_REG (DImode, BR_REG (0));
3694 emit_move_insn (reg, alt_reg);
3695 }
3696
3697 /* We should now be at the base of the gr/br/fr spill area. */
3698 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3699 + current_frame_info.spill_size));
3700
3701 /* The GP may be stored on the stack in the prologue, but it's
3702 never restored in the epilogue. Skip the stack slot. */
3703 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3704 cfa_off -= 8;
3705
3706 /* Restore all general registers. */
3707 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3708 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3709 {
3710 reg = gen_rtx_REG (DImode, regno);
3711 do_restore (gen_gr_restore, reg, cfa_off);
3712 cfa_off -= 8;
3713 }
3714
3715 /* Restore the branch registers. */
3716 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3717 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3718 {
3719 alt_regno = next_scratch_gr_reg ();
3720 alt_reg = gen_rtx_REG (DImode, alt_regno);
3721 do_restore (gen_movdi_x, alt_reg, cfa_off);
3722 cfa_off -= 8;
3723 reg = gen_rtx_REG (DImode, regno);
3724 emit_move_insn (reg, alt_reg);
3725 }
3726
3727 /* Restore floating point registers. */
3728 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3729 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3730 {
3731 gcc_assert (!(cfa_off & 15));
3732 reg = gen_rtx_REG (XFmode, regno);
3733 do_restore (gen_fr_restore_x, reg, cfa_off);
3734 cfa_off -= 16;
3735 }
3736
3737 /* Restore ar.unat for real. */
3738 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3739 {
3740 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3741 emit_move_insn (reg, ar_unat_save_reg);
3742 }
3743
3744 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3745
3746 finish_spill_pointers ();
3747
3748 if (current_frame_info.total_size
3749 || cfun->machine->ia64_eh_epilogue_sp
3750 || frame_pointer_needed)
3751 {
3752 /* ??? At this point we must generate a magic insn that appears to
3753 modify the spill iterators, the stack pointer, and the frame
3754 pointer. This would allow the most scheduling freedom. For now,
3755 just hard stop. */
3756 emit_insn (gen_blockage ());
3757 }
3758
3759 if (cfun->machine->ia64_eh_epilogue_sp)
3760 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3761 else if (frame_pointer_needed)
3762 {
3763 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3764 RTX_FRAME_RELATED_P (insn) = 1;
3765 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
3766 }
3767 else if (current_frame_info.total_size)
3768 {
3769 rtx offset, frame_size_rtx;
3770
3771 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3772 if (satisfies_constraint_I (frame_size_rtx))
3773 offset = frame_size_rtx;
3774 else
3775 {
3776 regno = next_scratch_gr_reg ();
3777 offset = gen_rtx_REG (DImode, regno);
3778 emit_move_insn (offset, frame_size_rtx);
3779 }
3780
3781 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3782 offset));
3783
3784 RTX_FRAME_RELATED_P (insn) = 1;
3785 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3786 gen_rtx_SET (VOIDmode,
3787 stack_pointer_rtx,
3788 gen_rtx_PLUS (DImode,
3789 stack_pointer_rtx,
3790 frame_size_rtx)));
3791 }
3792
3793 if (cfun->machine->ia64_eh_epilogue_bsp)
3794 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3795
3796 if (! sibcall_p)
3797 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3798 else
3799 {
3800 int fp = GR_REG (2);
3801 /* We need a throw away register here, r0 and r1 are reserved,
3802 so r2 is the first available call clobbered register. If
3803 there was a frame_pointer register, we may have swapped the
3804 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
3805 sure we're using the string "r2" when emitting the register
3806 name for the assembler. */
3807 if (current_frame_info.r[reg_fp]
3808 && current_frame_info.r[reg_fp] == GR_REG (2))
3809 fp = HARD_FRAME_POINTER_REGNUM;
3810
3811 /* We must emit an alloc to force the input registers to become output
3812 registers. Otherwise, if the callee tries to pass its parameters
3813 through to another call without an intervening alloc, then these
3814 values get lost. */
3815 /* ??? We don't need to preserve all input registers. We only need to
3816 preserve those input registers used as arguments to the sibling call.
3817 It is unclear how to compute that number here. */
3818 if (current_frame_info.n_input_regs != 0)
3819 {
3820 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3821 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3822 const0_rtx, const0_rtx,
3823 n_inputs, const0_rtx));
3824 RTX_FRAME_RELATED_P (insn) = 1;
3825 }
3826 }
3827 }
3828
3829 /* Return 1 if br.ret can do all the work required to return from a
3830 function. */
3831
3832 int
3833 ia64_direct_return (void)
3834 {
3835 if (reload_completed && ! frame_pointer_needed)
3836 {
3837 ia64_compute_frame_size (get_frame_size ());
3838
3839 return (current_frame_info.total_size == 0
3840 && current_frame_info.n_spilled == 0
3841 && current_frame_info.r[reg_save_b0] == 0
3842 && current_frame_info.r[reg_save_pr] == 0
3843 && current_frame_info.r[reg_save_ar_pfs] == 0
3844 && current_frame_info.r[reg_save_ar_unat] == 0
3845 && current_frame_info.r[reg_save_ar_lc] == 0);
3846 }
3847 return 0;
3848 }
3849
3850 /* Return the magic cookie that we use to hold the return address
3851 during early compilation. */
3852
3853 rtx
3854 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3855 {
3856 if (count != 0)
3857 return NULL;
3858 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3859 }
3860
3861 /* Split this value after reload, now that we know where the return
3862 address is saved. */
3863
3864 void
3865 ia64_split_return_addr_rtx (rtx dest)
3866 {
3867 rtx src;
3868
3869 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3870 {
3871 if (current_frame_info.r[reg_save_b0] != 0)
3872 {
3873 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3874 reg_emitted (reg_save_b0);
3875 }
3876 else
3877 {
3878 HOST_WIDE_INT off;
3879 unsigned int regno;
3880 rtx off_r;
3881
3882 /* Compute offset from CFA for BR0. */
3883 /* ??? Must be kept in sync with ia64_expand_prologue. */
3884 off = (current_frame_info.spill_cfa_off
3885 + current_frame_info.spill_size);
3886 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3887 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3888 off -= 8;
3889
3890 /* Convert CFA offset to a register based offset. */
3891 if (frame_pointer_needed)
3892 src = hard_frame_pointer_rtx;
3893 else
3894 {
3895 src = stack_pointer_rtx;
3896 off += current_frame_info.total_size;
3897 }
3898
3899 /* Load address into scratch register. */
3900 off_r = GEN_INT (off);
3901 if (satisfies_constraint_I (off_r))
3902 emit_insn (gen_adddi3 (dest, src, off_r));
3903 else
3904 {
3905 emit_move_insn (dest, off_r);
3906 emit_insn (gen_adddi3 (dest, src, dest));
3907 }
3908
3909 src = gen_rtx_MEM (Pmode, dest);
3910 }
3911 }
3912 else
3913 src = gen_rtx_REG (DImode, BR_REG (0));
3914
3915 emit_move_insn (dest, src);
3916 }
3917
3918 int
3919 ia64_hard_regno_rename_ok (int from, int to)
3920 {
3921 /* Don't clobber any of the registers we reserved for the prologue. */
3922 unsigned int r;
3923
3924 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3925 if (to == current_frame_info.r[r]
3926 || from == current_frame_info.r[r]
3927 || to == emitted_frame_related_regs[r]
3928 || from == emitted_frame_related_regs[r])
3929 return 0;
3930
3931 /* Don't use output registers outside the register frame. */
3932 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3933 return 0;
3934
3935 /* Retain even/oddness on predicate register pairs. */
3936 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3937 return (from & 1) == (to & 1);
3938
3939 return 1;
3940 }
3941
3942 /* Target hook for assembling integer objects. Handle word-sized
3943 aligned objects and detect the cases when @fptr is needed. */
3944
3945 static bool
3946 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3947 {
3948 if (size == POINTER_SIZE / BITS_PER_UNIT
3949 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3950 && GET_CODE (x) == SYMBOL_REF
3951 && SYMBOL_REF_FUNCTION_P (x))
3952 {
3953 static const char * const directive[2][2] = {
3954 /* 64-bit pointer */ /* 32-bit pointer */
3955 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3956 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3957 };
3958 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3959 output_addr_const (asm_out_file, x);
3960 fputs (")\n", asm_out_file);
3961 return true;
3962 }
3963 return default_assemble_integer (x, size, aligned_p);
3964 }
3965
3966 /* Emit the function prologue. */
3967
3968 static void
3969 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3970 {
3971 int mask, grsave, grsave_prev;
3972
3973 if (current_frame_info.need_regstk)
3974 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3975 current_frame_info.n_input_regs,
3976 current_frame_info.n_local_regs,
3977 current_frame_info.n_output_regs,
3978 current_frame_info.n_rotate_regs);
3979
3980 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
3981 return;
3982
3983 /* Emit the .prologue directive. */
3984
3985 mask = 0;
3986 grsave = grsave_prev = 0;
3987 if (current_frame_info.r[reg_save_b0] != 0)
3988 {
3989 mask |= 8;
3990 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3991 }
3992 if (current_frame_info.r[reg_save_ar_pfs] != 0
3993 && (grsave_prev == 0
3994 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3995 {
3996 mask |= 4;
3997 if (grsave_prev == 0)
3998 grsave = current_frame_info.r[reg_save_ar_pfs];
3999 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4000 }
4001 if (current_frame_info.r[reg_fp] != 0
4002 && (grsave_prev == 0
4003 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4004 {
4005 mask |= 2;
4006 if (grsave_prev == 0)
4007 grsave = HARD_FRAME_POINTER_REGNUM;
4008 grsave_prev = current_frame_info.r[reg_fp];
4009 }
4010 if (current_frame_info.r[reg_save_pr] != 0
4011 && (grsave_prev == 0
4012 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4013 {
4014 mask |= 1;
4015 if (grsave_prev == 0)
4016 grsave = current_frame_info.r[reg_save_pr];
4017 }
4018
4019 if (mask && TARGET_GNU_AS)
4020 fprintf (file, "\t.prologue %d, %d\n", mask,
4021 ia64_dbx_register_number (grsave));
4022 else
4023 fputs ("\t.prologue\n", file);
4024
4025 /* Emit a .spill directive, if necessary, to relocate the base of
4026 the register spill area. */
4027 if (current_frame_info.spill_cfa_off != -16)
4028 fprintf (file, "\t.spill %ld\n",
4029 (long) (current_frame_info.spill_cfa_off
4030 + current_frame_info.spill_size));
4031 }
4032
4033 /* Emit the .body directive at the scheduled end of the prologue. */
4034
4035 static void
4036 ia64_output_function_end_prologue (FILE *file)
4037 {
4038 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4039 return;
4040
4041 fputs ("\t.body\n", file);
4042 }
4043
4044 /* Emit the function epilogue. */
4045
4046 static void
4047 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4048 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4049 {
4050 int i;
4051
4052 if (current_frame_info.r[reg_fp])
4053 {
4054 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4055 reg_names[HARD_FRAME_POINTER_REGNUM]
4056 = reg_names[current_frame_info.r[reg_fp]];
4057 reg_names[current_frame_info.r[reg_fp]] = tmp;
4058 reg_emitted (reg_fp);
4059 }
4060 if (! TARGET_REG_NAMES)
4061 {
4062 for (i = 0; i < current_frame_info.n_input_regs; i++)
4063 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4064 for (i = 0; i < current_frame_info.n_local_regs; i++)
4065 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4066 for (i = 0; i < current_frame_info.n_output_regs; i++)
4067 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4068 }
4069
4070 current_frame_info.initialized = 0;
4071 }
4072
4073 int
4074 ia64_dbx_register_number (int regno)
4075 {
4076 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4077 from its home at loc79 to something inside the register frame. We
4078 must perform the same renumbering here for the debug info. */
4079 if (current_frame_info.r[reg_fp])
4080 {
4081 if (regno == HARD_FRAME_POINTER_REGNUM)
4082 regno = current_frame_info.r[reg_fp];
4083 else if (regno == current_frame_info.r[reg_fp])
4084 regno = HARD_FRAME_POINTER_REGNUM;
4085 }
4086
4087 if (IN_REGNO_P (regno))
4088 return 32 + regno - IN_REG (0);
4089 else if (LOC_REGNO_P (regno))
4090 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4091 else if (OUT_REGNO_P (regno))
4092 return (32 + current_frame_info.n_input_regs
4093 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4094 else
4095 return regno;
4096 }
4097
4098 /* Implement TARGET_TRAMPOLINE_INIT.
4099
4100 The trampoline should set the static chain pointer to value placed
4101 into the trampoline and should branch to the specified routine.
4102 To make the normal indirect-subroutine calling convention work,
4103 the trampoline must look like a function descriptor; the first
4104 word being the target address and the second being the target's
4105 global pointer.
4106
4107 We abuse the concept of a global pointer by arranging for it
4108 to point to the data we need to load. The complete trampoline
4109 has the following form:
4110
4111 +-------------------+ \
4112 TRAMP: | __ia64_trampoline | |
4113 +-------------------+ > fake function descriptor
4114 | TRAMP+16 | |
4115 +-------------------+ /
4116 | target descriptor |
4117 +-------------------+
4118 | static link |
4119 +-------------------+
4120 */
4121
4122 static void
4123 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4124 {
4125 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4126 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4127
4128 /* The Intel assembler requires that the global __ia64_trampoline symbol
4129 be declared explicitly */
4130 if (!TARGET_GNU_AS)
4131 {
4132 static bool declared_ia64_trampoline = false;
4133
4134 if (!declared_ia64_trampoline)
4135 {
4136 declared_ia64_trampoline = true;
4137 (*targetm.asm_out.globalize_label) (asm_out_file,
4138 "__ia64_trampoline");
4139 }
4140 }
4141
4142 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4143 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4144 fnaddr = convert_memory_address (Pmode, fnaddr);
4145 static_chain = convert_memory_address (Pmode, static_chain);
4146
4147 /* Load up our iterator. */
4148 addr_reg = copy_to_reg (addr);
4149 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4150
4151 /* The first two words are the fake descriptor:
4152 __ia64_trampoline, ADDR+16. */
4153 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4154 if (TARGET_ABI_OPEN_VMS)
4155 {
4156 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4157 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4158 relocation against function symbols to make it identical to the
4159 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4160 strict ELF and dereference to get the bare code address. */
4161 rtx reg = gen_reg_rtx (Pmode);
4162 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4163 emit_move_insn (reg, tramp);
4164 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4165 tramp = reg;
4166 }
4167 emit_move_insn (m_tramp, tramp);
4168 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4169 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4170
4171 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4172 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4173 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4174
4175 /* The third word is the target descriptor. */
4176 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4177 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4178 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4179
4180 /* The fourth word is the static chain. */
4181 emit_move_insn (m_tramp, static_chain);
4182 }
4183 \f
4184 /* Do any needed setup for a variadic function. CUM has not been updated
4185 for the last named argument which has type TYPE and mode MODE.
4186
4187 We generate the actual spill instructions during prologue generation. */
4188
4189 static void
4190 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4191 tree type, int * pretend_size,
4192 int second_time ATTRIBUTE_UNUSED)
4193 {
4194 CUMULATIVE_ARGS next_cum = *cum;
4195
4196 /* Skip the current argument. */
4197 ia64_function_arg_advance (&next_cum, mode, type, 1);
4198
4199 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4200 {
4201 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4202 *pretend_size = n * UNITS_PER_WORD;
4203 cfun->machine->n_varargs = n;
4204 }
4205 }
4206
4207 /* Check whether TYPE is a homogeneous floating point aggregate. If
4208 it is, return the mode of the floating point type that appears
4209 in all leafs. If it is not, return VOIDmode.
4210
4211 An aggregate is a homogeneous floating point aggregate is if all
4212 fields/elements in it have the same floating point type (e.g,
4213 SFmode). 128-bit quad-precision floats are excluded.
4214
4215 Variable sized aggregates should never arrive here, since we should
4216 have already decided to pass them by reference. Top-level zero-sized
4217 aggregates are excluded because our parallels crash the middle-end. */
4218
4219 static enum machine_mode
4220 hfa_element_mode (const_tree type, bool nested)
4221 {
4222 enum machine_mode element_mode = VOIDmode;
4223 enum machine_mode mode;
4224 enum tree_code code = TREE_CODE (type);
4225 int know_element_mode = 0;
4226 tree t;
4227
4228 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4229 return VOIDmode;
4230
4231 switch (code)
4232 {
4233 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4234 case BOOLEAN_TYPE: case POINTER_TYPE:
4235 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4236 case LANG_TYPE: case FUNCTION_TYPE:
4237 return VOIDmode;
4238
4239 /* Fortran complex types are supposed to be HFAs, so we need to handle
4240 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4241 types though. */
4242 case COMPLEX_TYPE:
4243 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4244 && TYPE_MODE (type) != TCmode)
4245 return GET_MODE_INNER (TYPE_MODE (type));
4246 else
4247 return VOIDmode;
4248
4249 case REAL_TYPE:
4250 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4251 mode if this is contained within an aggregate. */
4252 if (nested && TYPE_MODE (type) != TFmode)
4253 return TYPE_MODE (type);
4254 else
4255 return VOIDmode;
4256
4257 case ARRAY_TYPE:
4258 return hfa_element_mode (TREE_TYPE (type), 1);
4259
4260 case RECORD_TYPE:
4261 case UNION_TYPE:
4262 case QUAL_UNION_TYPE:
4263 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4264 {
4265 if (TREE_CODE (t) != FIELD_DECL)
4266 continue;
4267
4268 mode = hfa_element_mode (TREE_TYPE (t), 1);
4269 if (know_element_mode)
4270 {
4271 if (mode != element_mode)
4272 return VOIDmode;
4273 }
4274 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4275 return VOIDmode;
4276 else
4277 {
4278 know_element_mode = 1;
4279 element_mode = mode;
4280 }
4281 }
4282 return element_mode;
4283
4284 default:
4285 /* If we reach here, we probably have some front-end specific type
4286 that the backend doesn't know about. This can happen via the
4287 aggregate_value_p call in init_function_start. All we can do is
4288 ignore unknown tree types. */
4289 return VOIDmode;
4290 }
4291
4292 return VOIDmode;
4293 }
4294
4295 /* Return the number of words required to hold a quantity of TYPE and MODE
4296 when passed as an argument. */
4297 static int
4298 ia64_function_arg_words (const_tree type, enum machine_mode mode)
4299 {
4300 int words;
4301
4302 if (mode == BLKmode)
4303 words = int_size_in_bytes (type);
4304 else
4305 words = GET_MODE_SIZE (mode);
4306
4307 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4308 }
4309
4310 /* Return the number of registers that should be skipped so the current
4311 argument (described by TYPE and WORDS) will be properly aligned.
4312
4313 Integer and float arguments larger than 8 bytes start at the next
4314 even boundary. Aggregates larger than 8 bytes start at the next
4315 even boundary if the aggregate has 16 byte alignment. Note that
4316 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4317 but are still to be aligned in registers.
4318
4319 ??? The ABI does not specify how to handle aggregates with
4320 alignment from 9 to 15 bytes, or greater than 16. We handle them
4321 all as if they had 16 byte alignment. Such aggregates can occur
4322 only if gcc extensions are used. */
4323 static int
4324 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4325 const_tree type, int words)
4326 {
4327 /* No registers are skipped on VMS. */
4328 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4329 return 0;
4330
4331 if (type
4332 && TREE_CODE (type) != INTEGER_TYPE
4333 && TREE_CODE (type) != REAL_TYPE)
4334 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4335 else
4336 return words > 1;
4337 }
4338
4339 /* Return rtx for register where argument is passed, or zero if it is passed
4340 on the stack. */
4341 /* ??? 128-bit quad-precision floats are always passed in general
4342 registers. */
4343
4344 static rtx
4345 ia64_function_arg_1 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
4346 const_tree type, bool named, bool incoming)
4347 {
4348 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4349 int words = ia64_function_arg_words (type, mode);
4350 int offset = ia64_function_arg_offset (cum, type, words);
4351 enum machine_mode hfa_mode = VOIDmode;
4352
4353 /* For OPEN VMS, emit the instruction setting up the argument register here,
4354 when we know this will be together with the other arguments setup related
4355 insns. This is not the conceptually best place to do this, but this is
4356 the easiest as we have convenient access to cumulative args info. */
4357
4358 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4359 && named == 1)
4360 {
4361 unsigned HOST_WIDE_INT regval = cum->words;
4362 int i;
4363
4364 for (i = 0; i < 8; i++)
4365 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4366
4367 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4368 GEN_INT (regval));
4369 }
4370
4371 /* If all argument slots are used, then it must go on the stack. */
4372 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4373 return 0;
4374
4375 /* Check for and handle homogeneous FP aggregates. */
4376 if (type)
4377 hfa_mode = hfa_element_mode (type, 0);
4378
4379 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4380 and unprototyped hfas are passed specially. */
4381 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4382 {
4383 rtx loc[16];
4384 int i = 0;
4385 int fp_regs = cum->fp_regs;
4386 int int_regs = cum->words + offset;
4387 int hfa_size = GET_MODE_SIZE (hfa_mode);
4388 int byte_size;
4389 int args_byte_size;
4390
4391 /* If prototyped, pass it in FR regs then GR regs.
4392 If not prototyped, pass it in both FR and GR regs.
4393
4394 If this is an SFmode aggregate, then it is possible to run out of
4395 FR regs while GR regs are still left. In that case, we pass the
4396 remaining part in the GR regs. */
4397
4398 /* Fill the FP regs. We do this always. We stop if we reach the end
4399 of the argument, the last FP register, or the last argument slot. */
4400
4401 byte_size = ((mode == BLKmode)
4402 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4403 args_byte_size = int_regs * UNITS_PER_WORD;
4404 offset = 0;
4405 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4406 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4407 {
4408 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4409 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4410 + fp_regs)),
4411 GEN_INT (offset));
4412 offset += hfa_size;
4413 args_byte_size += hfa_size;
4414 fp_regs++;
4415 }
4416
4417 /* If no prototype, then the whole thing must go in GR regs. */
4418 if (! cum->prototype)
4419 offset = 0;
4420 /* If this is an SFmode aggregate, then we might have some left over
4421 that needs to go in GR regs. */
4422 else if (byte_size != offset)
4423 int_regs += offset / UNITS_PER_WORD;
4424
4425 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4426
4427 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4428 {
4429 enum machine_mode gr_mode = DImode;
4430 unsigned int gr_size;
4431
4432 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4433 then this goes in a GR reg left adjusted/little endian, right
4434 adjusted/big endian. */
4435 /* ??? Currently this is handled wrong, because 4-byte hunks are
4436 always right adjusted/little endian. */
4437 if (offset & 0x4)
4438 gr_mode = SImode;
4439 /* If we have an even 4 byte hunk because the aggregate is a
4440 multiple of 4 bytes in size, then this goes in a GR reg right
4441 adjusted/little endian. */
4442 else if (byte_size - offset == 4)
4443 gr_mode = SImode;
4444
4445 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4446 gen_rtx_REG (gr_mode, (basereg
4447 + int_regs)),
4448 GEN_INT (offset));
4449
4450 gr_size = GET_MODE_SIZE (gr_mode);
4451 offset += gr_size;
4452 if (gr_size == UNITS_PER_WORD
4453 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4454 int_regs++;
4455 else if (gr_size > UNITS_PER_WORD)
4456 int_regs += gr_size / UNITS_PER_WORD;
4457 }
4458 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4459 }
4460
4461 /* On OpenVMS variable argument is either in Rn or Fn. */
4462 else if (TARGET_ABI_OPEN_VMS && named == 0)
4463 {
4464 if (FLOAT_MODE_P (mode))
4465 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4466 else
4467 return gen_rtx_REG (mode, basereg + cum->words);
4468 }
4469
4470 /* Integral and aggregates go in general registers. If we have run out of
4471 FR registers, then FP values must also go in general registers. This can
4472 happen when we have a SFmode HFA. */
4473 else if (mode == TFmode || mode == TCmode
4474 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4475 {
4476 int byte_size = ((mode == BLKmode)
4477 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4478 if (BYTES_BIG_ENDIAN
4479 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4480 && byte_size < UNITS_PER_WORD
4481 && byte_size > 0)
4482 {
4483 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4484 gen_rtx_REG (DImode,
4485 (basereg + cum->words
4486 + offset)),
4487 const0_rtx);
4488 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4489 }
4490 else
4491 return gen_rtx_REG (mode, basereg + cum->words + offset);
4492
4493 }
4494
4495 /* If there is a prototype, then FP values go in a FR register when
4496 named, and in a GR register when unnamed. */
4497 else if (cum->prototype)
4498 {
4499 if (named)
4500 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4501 /* In big-endian mode, an anonymous SFmode value must be represented
4502 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4503 the value into the high half of the general register. */
4504 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4505 return gen_rtx_PARALLEL (mode,
4506 gen_rtvec (1,
4507 gen_rtx_EXPR_LIST (VOIDmode,
4508 gen_rtx_REG (DImode, basereg + cum->words + offset),
4509 const0_rtx)));
4510 else
4511 return gen_rtx_REG (mode, basereg + cum->words + offset);
4512 }
4513 /* If there is no prototype, then FP values go in both FR and GR
4514 registers. */
4515 else
4516 {
4517 /* See comment above. */
4518 enum machine_mode inner_mode =
4519 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4520
4521 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4522 gen_rtx_REG (mode, (FR_ARG_FIRST
4523 + cum->fp_regs)),
4524 const0_rtx);
4525 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4526 gen_rtx_REG (inner_mode,
4527 (basereg + cum->words
4528 + offset)),
4529 const0_rtx);
4530
4531 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4532 }
4533 }
4534
4535 /* Implement TARGET_FUNCION_ARG target hook. */
4536
4537 static rtx
4538 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4539 const_tree type, bool named)
4540 {
4541 return ia64_function_arg_1 (cum, mode, type, named, false);
4542 }
4543
4544 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4545
4546 static rtx
4547 ia64_function_incoming_arg (CUMULATIVE_ARGS *cum,
4548 enum machine_mode mode,
4549 const_tree type, bool named)
4550 {
4551 return ia64_function_arg_1 (cum, mode, type, named, true);
4552 }
4553
4554 /* Return number of bytes, at the beginning of the argument, that must be
4555 put in registers. 0 is the argument is entirely in registers or entirely
4556 in memory. */
4557
4558 static int
4559 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4560 tree type, bool named ATTRIBUTE_UNUSED)
4561 {
4562 int words = ia64_function_arg_words (type, mode);
4563 int offset = ia64_function_arg_offset (cum, type, words);
4564
4565 /* If all argument slots are used, then it must go on the stack. */
4566 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4567 return 0;
4568
4569 /* It doesn't matter whether the argument goes in FR or GR regs. If
4570 it fits within the 8 argument slots, then it goes entirely in
4571 registers. If it extends past the last argument slot, then the rest
4572 goes on the stack. */
4573
4574 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4575 return 0;
4576
4577 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4578 }
4579
4580 /* Return ivms_arg_type based on machine_mode. */
4581
4582 static enum ivms_arg_type
4583 ia64_arg_type (enum machine_mode mode)
4584 {
4585 switch (mode)
4586 {
4587 case SFmode:
4588 return FS;
4589 case DFmode:
4590 return FT;
4591 default:
4592 return I64;
4593 }
4594 }
4595
4596 /* Update CUM to point after this argument. This is patterned after
4597 ia64_function_arg. */
4598
4599 static void
4600 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4601 const_tree type, bool named)
4602 {
4603 int words = ia64_function_arg_words (type, mode);
4604 int offset = ia64_function_arg_offset (cum, type, words);
4605 enum machine_mode hfa_mode = VOIDmode;
4606
4607 /* If all arg slots are already full, then there is nothing to do. */
4608 if (cum->words >= MAX_ARGUMENT_SLOTS)
4609 {
4610 cum->words += words + offset;
4611 return;
4612 }
4613
4614 cum->atypes[cum->words] = ia64_arg_type (mode);
4615 cum->words += words + offset;
4616
4617 /* Check for and handle homogeneous FP aggregates. */
4618 if (type)
4619 hfa_mode = hfa_element_mode (type, 0);
4620
4621 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4622 and unprototyped hfas are passed specially. */
4623 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4624 {
4625 int fp_regs = cum->fp_regs;
4626 /* This is the original value of cum->words + offset. */
4627 int int_regs = cum->words - words;
4628 int hfa_size = GET_MODE_SIZE (hfa_mode);
4629 int byte_size;
4630 int args_byte_size;
4631
4632 /* If prototyped, pass it in FR regs then GR regs.
4633 If not prototyped, pass it in both FR and GR regs.
4634
4635 If this is an SFmode aggregate, then it is possible to run out of
4636 FR regs while GR regs are still left. In that case, we pass the
4637 remaining part in the GR regs. */
4638
4639 /* Fill the FP regs. We do this always. We stop if we reach the end
4640 of the argument, the last FP register, or the last argument slot. */
4641
4642 byte_size = ((mode == BLKmode)
4643 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4644 args_byte_size = int_regs * UNITS_PER_WORD;
4645 offset = 0;
4646 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4647 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4648 {
4649 offset += hfa_size;
4650 args_byte_size += hfa_size;
4651 fp_regs++;
4652 }
4653
4654 cum->fp_regs = fp_regs;
4655 }
4656
4657 /* On OpenVMS variable argument is either in Rn or Fn. */
4658 else if (TARGET_ABI_OPEN_VMS && named == 0)
4659 {
4660 cum->int_regs = cum->words;
4661 cum->fp_regs = cum->words;
4662 }
4663
4664 /* Integral and aggregates go in general registers. So do TFmode FP values.
4665 If we have run out of FR registers, then other FP values must also go in
4666 general registers. This can happen when we have a SFmode HFA. */
4667 else if (mode == TFmode || mode == TCmode
4668 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4669 cum->int_regs = cum->words;
4670
4671 /* If there is a prototype, then FP values go in a FR register when
4672 named, and in a GR register when unnamed. */
4673 else if (cum->prototype)
4674 {
4675 if (! named)
4676 cum->int_regs = cum->words;
4677 else
4678 /* ??? Complex types should not reach here. */
4679 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4680 }
4681 /* If there is no prototype, then FP values go in both FR and GR
4682 registers. */
4683 else
4684 {
4685 /* ??? Complex types should not reach here. */
4686 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4687 cum->int_regs = cum->words;
4688 }
4689 }
4690
4691 /* Arguments with alignment larger than 8 bytes start at the next even
4692 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4693 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4694
4695 static unsigned int
4696 ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
4697 {
4698 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4699 return PARM_BOUNDARY * 2;
4700
4701 if (type)
4702 {
4703 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4704 return PARM_BOUNDARY * 2;
4705 else
4706 return PARM_BOUNDARY;
4707 }
4708
4709 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4710 return PARM_BOUNDARY * 2;
4711 else
4712 return PARM_BOUNDARY;
4713 }
4714
4715 /* True if it is OK to do sibling call optimization for the specified
4716 call expression EXP. DECL will be the called function, or NULL if
4717 this is an indirect call. */
4718 static bool
4719 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4720 {
4721 /* We can't perform a sibcall if the current function has the syscall_linkage
4722 attribute. */
4723 if (lookup_attribute ("syscall_linkage",
4724 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4725 return false;
4726
4727 /* We must always return with our current GP. This means we can
4728 only sibcall to functions defined in the current module unless
4729 TARGET_CONST_GP is set to true. */
4730 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4731 }
4732 \f
4733
4734 /* Implement va_arg. */
4735
4736 static tree
4737 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4738 gimple_seq *post_p)
4739 {
4740 /* Variable sized types are passed by reference. */
4741 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4742 {
4743 tree ptrtype = build_pointer_type (type);
4744 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4745 return build_va_arg_indirect_ref (addr);
4746 }
4747
4748 /* Aggregate arguments with alignment larger than 8 bytes start at
4749 the next even boundary. Integer and floating point arguments
4750 do so if they are larger than 8 bytes, whether or not they are
4751 also aligned larger than 8 bytes. */
4752 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4753 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4754 {
4755 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4756 size_int (2 * UNITS_PER_WORD - 1));
4757 t = fold_convert (sizetype, t);
4758 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4759 size_int (-2 * UNITS_PER_WORD));
4760 t = fold_convert (TREE_TYPE (valist), t);
4761 gimplify_assign (unshare_expr (valist), t, pre_p);
4762 }
4763
4764 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4765 }
4766 \f
4767 /* Return 1 if function return value returned in memory. Return 0 if it is
4768 in a register. */
4769
4770 static bool
4771 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4772 {
4773 enum machine_mode mode;
4774 enum machine_mode hfa_mode;
4775 HOST_WIDE_INT byte_size;
4776
4777 mode = TYPE_MODE (valtype);
4778 byte_size = GET_MODE_SIZE (mode);
4779 if (mode == BLKmode)
4780 {
4781 byte_size = int_size_in_bytes (valtype);
4782 if (byte_size < 0)
4783 return true;
4784 }
4785
4786 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4787
4788 hfa_mode = hfa_element_mode (valtype, 0);
4789 if (hfa_mode != VOIDmode)
4790 {
4791 int hfa_size = GET_MODE_SIZE (hfa_mode);
4792
4793 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4794 return true;
4795 else
4796 return false;
4797 }
4798 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4799 return true;
4800 else
4801 return false;
4802 }
4803
4804 /* Return rtx for register that holds the function return value. */
4805
4806 static rtx
4807 ia64_function_value (const_tree valtype,
4808 const_tree fn_decl_or_type,
4809 bool outgoing ATTRIBUTE_UNUSED)
4810 {
4811 enum machine_mode mode;
4812 enum machine_mode hfa_mode;
4813 int unsignedp;
4814 const_tree func = fn_decl_or_type;
4815
4816 if (fn_decl_or_type
4817 && !DECL_P (fn_decl_or_type))
4818 func = NULL;
4819
4820 mode = TYPE_MODE (valtype);
4821 hfa_mode = hfa_element_mode (valtype, 0);
4822
4823 if (hfa_mode != VOIDmode)
4824 {
4825 rtx loc[8];
4826 int i;
4827 int hfa_size;
4828 int byte_size;
4829 int offset;
4830
4831 hfa_size = GET_MODE_SIZE (hfa_mode);
4832 byte_size = ((mode == BLKmode)
4833 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4834 offset = 0;
4835 for (i = 0; offset < byte_size; i++)
4836 {
4837 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4838 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4839 GEN_INT (offset));
4840 offset += hfa_size;
4841 }
4842 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4843 }
4844 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4845 return gen_rtx_REG (mode, FR_ARG_FIRST);
4846 else
4847 {
4848 bool need_parallel = false;
4849
4850 /* In big-endian mode, we need to manage the layout of aggregates
4851 in the registers so that we get the bits properly aligned in
4852 the highpart of the registers. */
4853 if (BYTES_BIG_ENDIAN
4854 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4855 need_parallel = true;
4856
4857 /* Something like struct S { long double x; char a[0] } is not an
4858 HFA structure, and therefore doesn't go in fp registers. But
4859 the middle-end will give it XFmode anyway, and XFmode values
4860 don't normally fit in integer registers. So we need to smuggle
4861 the value inside a parallel. */
4862 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4863 need_parallel = true;
4864
4865 if (need_parallel)
4866 {
4867 rtx loc[8];
4868 int offset;
4869 int bytesize;
4870 int i;
4871
4872 offset = 0;
4873 bytesize = int_size_in_bytes (valtype);
4874 /* An empty PARALLEL is invalid here, but the return value
4875 doesn't matter for empty structs. */
4876 if (bytesize == 0)
4877 return gen_rtx_REG (mode, GR_RET_FIRST);
4878 for (i = 0; offset < bytesize; i++)
4879 {
4880 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4881 gen_rtx_REG (DImode,
4882 GR_RET_FIRST + i),
4883 GEN_INT (offset));
4884 offset += UNITS_PER_WORD;
4885 }
4886 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4887 }
4888
4889 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4890 func ? TREE_TYPE (func) : NULL_TREE,
4891 true);
4892
4893 return gen_rtx_REG (mode, GR_RET_FIRST);
4894 }
4895 }
4896
4897 /* Worker function for TARGET_LIBCALL_VALUE. */
4898
4899 static rtx
4900 ia64_libcall_value (enum machine_mode mode,
4901 const_rtx fun ATTRIBUTE_UNUSED)
4902 {
4903 return gen_rtx_REG (mode,
4904 (((GET_MODE_CLASS (mode) == MODE_FLOAT
4905 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4906 && (mode) != TFmode)
4907 ? FR_RET_FIRST : GR_RET_FIRST));
4908 }
4909
4910 /* Worker function for FUNCTION_VALUE_REGNO_P. */
4911
4912 static bool
4913 ia64_function_value_regno_p (const unsigned int regno)
4914 {
4915 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
4916 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
4917 }
4918
4919 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4920 We need to emit DTP-relative relocations. */
4921
4922 static void
4923 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4924 {
4925 gcc_assert (size == 4 || size == 8);
4926 if (size == 4)
4927 fputs ("\tdata4.ua\t@dtprel(", file);
4928 else
4929 fputs ("\tdata8.ua\t@dtprel(", file);
4930 output_addr_const (file, x);
4931 fputs (")", file);
4932 }
4933
4934 /* Print a memory address as an operand to reference that memory location. */
4935
4936 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4937 also call this from ia64_print_operand for memory addresses. */
4938
4939 void
4940 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4941 rtx address ATTRIBUTE_UNUSED)
4942 {
4943 }
4944
4945 /* Print an operand to an assembler instruction.
4946 C Swap and print a comparison operator.
4947 D Print an FP comparison operator.
4948 E Print 32 - constant, for SImode shifts as extract.
4949 e Print 64 - constant, for DImode rotates.
4950 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4951 a floating point register emitted normally.
4952 G A floating point constant.
4953 I Invert a predicate register by adding 1.
4954 J Select the proper predicate register for a condition.
4955 j Select the inverse predicate register for a condition.
4956 O Append .acq for volatile load.
4957 P Postincrement of a MEM.
4958 Q Append .rel for volatile store.
4959 R Print .s .d or nothing for a single, double or no truncation.
4960 S Shift amount for shladd instruction.
4961 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4962 for Intel assembler.
4963 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4964 for Intel assembler.
4965 X A pair of floating point registers.
4966 r Print register name, or constant 0 as r0. HP compatibility for
4967 Linux kernel.
4968 v Print vector constant value as an 8-byte integer value. */
4969
4970 void
4971 ia64_print_operand (FILE * file, rtx x, int code)
4972 {
4973 const char *str;
4974
4975 switch (code)
4976 {
4977 case 0:
4978 /* Handled below. */
4979 break;
4980
4981 case 'C':
4982 {
4983 enum rtx_code c = swap_condition (GET_CODE (x));
4984 fputs (GET_RTX_NAME (c), file);
4985 return;
4986 }
4987
4988 case 'D':
4989 switch (GET_CODE (x))
4990 {
4991 case NE:
4992 str = "neq";
4993 break;
4994 case UNORDERED:
4995 str = "unord";
4996 break;
4997 case ORDERED:
4998 str = "ord";
4999 break;
5000 case UNLT:
5001 str = "nge";
5002 break;
5003 case UNLE:
5004 str = "ngt";
5005 break;
5006 case UNGT:
5007 str = "nle";
5008 break;
5009 case UNGE:
5010 str = "nlt";
5011 break;
5012 default:
5013 str = GET_RTX_NAME (GET_CODE (x));
5014 break;
5015 }
5016 fputs (str, file);
5017 return;
5018
5019 case 'E':
5020 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5021 return;
5022
5023 case 'e':
5024 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5025 return;
5026
5027 case 'F':
5028 if (x == CONST0_RTX (GET_MODE (x)))
5029 str = reg_names [FR_REG (0)];
5030 else if (x == CONST1_RTX (GET_MODE (x)))
5031 str = reg_names [FR_REG (1)];
5032 else
5033 {
5034 gcc_assert (GET_CODE (x) == REG);
5035 str = reg_names [REGNO (x)];
5036 }
5037 fputs (str, file);
5038 return;
5039
5040 case 'G':
5041 {
5042 long val[4];
5043 REAL_VALUE_TYPE rv;
5044 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5045 real_to_target (val, &rv, GET_MODE (x));
5046 if (GET_MODE (x) == SFmode)
5047 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5048 else if (GET_MODE (x) == DFmode)
5049 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5050 & 0xffffffff,
5051 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5052 & 0xffffffff);
5053 else
5054 output_operand_lossage ("invalid %%G mode");
5055 }
5056 return;
5057
5058 case 'I':
5059 fputs (reg_names [REGNO (x) + 1], file);
5060 return;
5061
5062 case 'J':
5063 case 'j':
5064 {
5065 unsigned int regno = REGNO (XEXP (x, 0));
5066 if (GET_CODE (x) == EQ)
5067 regno += 1;
5068 if (code == 'j')
5069 regno ^= 1;
5070 fputs (reg_names [regno], file);
5071 }
5072 return;
5073
5074 case 'O':
5075 if (MEM_VOLATILE_P (x))
5076 fputs(".acq", file);
5077 return;
5078
5079 case 'P':
5080 {
5081 HOST_WIDE_INT value;
5082
5083 switch (GET_CODE (XEXP (x, 0)))
5084 {
5085 default:
5086 return;
5087
5088 case POST_MODIFY:
5089 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5090 if (GET_CODE (x) == CONST_INT)
5091 value = INTVAL (x);
5092 else
5093 {
5094 gcc_assert (GET_CODE (x) == REG);
5095 fprintf (file, ", %s", reg_names[REGNO (x)]);
5096 return;
5097 }
5098 break;
5099
5100 case POST_INC:
5101 value = GET_MODE_SIZE (GET_MODE (x));
5102 break;
5103
5104 case POST_DEC:
5105 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5106 break;
5107 }
5108
5109 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5110 return;
5111 }
5112
5113 case 'Q':
5114 if (MEM_VOLATILE_P (x))
5115 fputs(".rel", file);
5116 return;
5117
5118 case 'R':
5119 if (x == CONST0_RTX (GET_MODE (x)))
5120 fputs(".s", file);
5121 else if (x == CONST1_RTX (GET_MODE (x)))
5122 fputs(".d", file);
5123 else if (x == CONST2_RTX (GET_MODE (x)))
5124 ;
5125 else
5126 output_operand_lossage ("invalid %%R value");
5127 return;
5128
5129 case 'S':
5130 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5131 return;
5132
5133 case 'T':
5134 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5135 {
5136 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5137 return;
5138 }
5139 break;
5140
5141 case 'U':
5142 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5143 {
5144 const char *prefix = "0x";
5145 if (INTVAL (x) & 0x80000000)
5146 {
5147 fprintf (file, "0xffffffff");
5148 prefix = "";
5149 }
5150 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5151 return;
5152 }
5153 break;
5154
5155 case 'X':
5156 {
5157 unsigned int regno = REGNO (x);
5158 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5159 }
5160 return;
5161
5162 case 'r':
5163 /* If this operand is the constant zero, write it as register zero.
5164 Any register, zero, or CONST_INT value is OK here. */
5165 if (GET_CODE (x) == REG)
5166 fputs (reg_names[REGNO (x)], file);
5167 else if (x == CONST0_RTX (GET_MODE (x)))
5168 fputs ("r0", file);
5169 else if (GET_CODE (x) == CONST_INT)
5170 output_addr_const (file, x);
5171 else
5172 output_operand_lossage ("invalid %%r value");
5173 return;
5174
5175 case 'v':
5176 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5177 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5178 break;
5179
5180 case '+':
5181 {
5182 const char *which;
5183
5184 /* For conditional branches, returns or calls, substitute
5185 sptk, dptk, dpnt, or spnt for %s. */
5186 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5187 if (x)
5188 {
5189 int pred_val = INTVAL (XEXP (x, 0));
5190
5191 /* Guess top and bottom 10% statically predicted. */
5192 if (pred_val < REG_BR_PROB_BASE / 50
5193 && br_prob_note_reliable_p (x))
5194 which = ".spnt";
5195 else if (pred_val < REG_BR_PROB_BASE / 2)
5196 which = ".dpnt";
5197 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5198 || !br_prob_note_reliable_p (x))
5199 which = ".dptk";
5200 else
5201 which = ".sptk";
5202 }
5203 else if (GET_CODE (current_output_insn) == CALL_INSN)
5204 which = ".sptk";
5205 else
5206 which = ".dptk";
5207
5208 fputs (which, file);
5209 return;
5210 }
5211
5212 case ',':
5213 x = current_insn_predicate;
5214 if (x)
5215 {
5216 unsigned int regno = REGNO (XEXP (x, 0));
5217 if (GET_CODE (x) == EQ)
5218 regno += 1;
5219 fprintf (file, "(%s) ", reg_names [regno]);
5220 }
5221 return;
5222
5223 default:
5224 output_operand_lossage ("ia64_print_operand: unknown code");
5225 return;
5226 }
5227
5228 switch (GET_CODE (x))
5229 {
5230 /* This happens for the spill/restore instructions. */
5231 case POST_INC:
5232 case POST_DEC:
5233 case POST_MODIFY:
5234 x = XEXP (x, 0);
5235 /* ... fall through ... */
5236
5237 case REG:
5238 fputs (reg_names [REGNO (x)], file);
5239 break;
5240
5241 case MEM:
5242 {
5243 rtx addr = XEXP (x, 0);
5244 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5245 addr = XEXP (addr, 0);
5246 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5247 break;
5248 }
5249
5250 default:
5251 output_addr_const (file, x);
5252 break;
5253 }
5254
5255 return;
5256 }
5257 \f
5258 /* Compute a (partial) cost for rtx X. Return true if the complete
5259 cost has been computed, and false if subexpressions should be
5260 scanned. In either case, *TOTAL contains the cost result. */
5261 /* ??? This is incomplete. */
5262
5263 static bool
5264 ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5265 bool speed ATTRIBUTE_UNUSED)
5266 {
5267 switch (code)
5268 {
5269 case CONST_INT:
5270 switch (outer_code)
5271 {
5272 case SET:
5273 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5274 return true;
5275 case PLUS:
5276 if (satisfies_constraint_I (x))
5277 *total = 0;
5278 else if (satisfies_constraint_J (x))
5279 *total = 1;
5280 else
5281 *total = COSTS_N_INSNS (1);
5282 return true;
5283 default:
5284 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5285 *total = 0;
5286 else
5287 *total = COSTS_N_INSNS (1);
5288 return true;
5289 }
5290
5291 case CONST_DOUBLE:
5292 *total = COSTS_N_INSNS (1);
5293 return true;
5294
5295 case CONST:
5296 case SYMBOL_REF:
5297 case LABEL_REF:
5298 *total = COSTS_N_INSNS (3);
5299 return true;
5300
5301 case FMA:
5302 *total = COSTS_N_INSNS (4);
5303 return true;
5304
5305 case MULT:
5306 /* For multiplies wider than HImode, we have to go to the FPU,
5307 which normally involves copies. Plus there's the latency
5308 of the multiply itself, and the latency of the instructions to
5309 transfer integer regs to FP regs. */
5310 if (FLOAT_MODE_P (GET_MODE (x)))
5311 *total = COSTS_N_INSNS (4);
5312 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5313 *total = COSTS_N_INSNS (10);
5314 else
5315 *total = COSTS_N_INSNS (2);
5316 return true;
5317
5318 case PLUS:
5319 case MINUS:
5320 if (FLOAT_MODE_P (GET_MODE (x)))
5321 {
5322 *total = COSTS_N_INSNS (4);
5323 return true;
5324 }
5325 /* FALLTHRU */
5326
5327 case ASHIFT:
5328 case ASHIFTRT:
5329 case LSHIFTRT:
5330 *total = COSTS_N_INSNS (1);
5331 return true;
5332
5333 case DIV:
5334 case UDIV:
5335 case MOD:
5336 case UMOD:
5337 /* We make divide expensive, so that divide-by-constant will be
5338 optimized to a multiply. */
5339 *total = COSTS_N_INSNS (60);
5340 return true;
5341
5342 default:
5343 return false;
5344 }
5345 }
5346
5347 /* Calculate the cost of moving data from a register in class FROM to
5348 one in class TO, using MODE. */
5349
5350 static int
5351 ia64_register_move_cost (enum machine_mode mode, reg_class_t from_i,
5352 reg_class_t to_i)
5353 {
5354 enum reg_class from = (enum reg_class) from_i;
5355 enum reg_class to = (enum reg_class) to_i;
5356
5357 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5358 if (to == ADDL_REGS)
5359 to = GR_REGS;
5360 if (from == ADDL_REGS)
5361 from = GR_REGS;
5362
5363 /* All costs are symmetric, so reduce cases by putting the
5364 lower number class as the destination. */
5365 if (from < to)
5366 {
5367 enum reg_class tmp = to;
5368 to = from, from = tmp;
5369 }
5370
5371 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5372 so that we get secondary memory reloads. Between FR_REGS,
5373 we have to make this at least as expensive as memory_move_cost
5374 to avoid spectacularly poor register class preferencing. */
5375 if (mode == XFmode || mode == RFmode)
5376 {
5377 if (to != GR_REGS || from != GR_REGS)
5378 return memory_move_cost (mode, to, false);
5379 else
5380 return 3;
5381 }
5382
5383 switch (to)
5384 {
5385 case PR_REGS:
5386 /* Moving between PR registers takes two insns. */
5387 if (from == PR_REGS)
5388 return 3;
5389 /* Moving between PR and anything but GR is impossible. */
5390 if (from != GR_REGS)
5391 return memory_move_cost (mode, to, false);
5392 break;
5393
5394 case BR_REGS:
5395 /* Moving between BR and anything but GR is impossible. */
5396 if (from != GR_REGS && from != GR_AND_BR_REGS)
5397 return memory_move_cost (mode, to, false);
5398 break;
5399
5400 case AR_I_REGS:
5401 case AR_M_REGS:
5402 /* Moving between AR and anything but GR is impossible. */
5403 if (from != GR_REGS)
5404 return memory_move_cost (mode, to, false);
5405 break;
5406
5407 case GR_REGS:
5408 case FR_REGS:
5409 case FP_REGS:
5410 case GR_AND_FR_REGS:
5411 case GR_AND_BR_REGS:
5412 case ALL_REGS:
5413 break;
5414
5415 default:
5416 gcc_unreachable ();
5417 }
5418
5419 return 2;
5420 }
5421
5422 /* Calculate the cost of moving data of MODE from a register to or from
5423 memory. */
5424
5425 static int
5426 ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5427 reg_class_t rclass,
5428 bool in ATTRIBUTE_UNUSED)
5429 {
5430 if (rclass == GENERAL_REGS
5431 || rclass == FR_REGS
5432 || rclass == FP_REGS
5433 || rclass == GR_AND_FR_REGS)
5434 return 4;
5435 else
5436 return 10;
5437 }
5438
5439 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5440 on RCLASS to use when copying X into that class. */
5441
5442 static reg_class_t
5443 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5444 {
5445 switch (rclass)
5446 {
5447 case FR_REGS:
5448 case FP_REGS:
5449 /* Don't allow volatile mem reloads into floating point registers.
5450 This is defined to force reload to choose the r/m case instead
5451 of the f/f case when reloading (set (reg fX) (mem/v)). */
5452 if (MEM_P (x) && MEM_VOLATILE_P (x))
5453 return NO_REGS;
5454
5455 /* Force all unrecognized constants into the constant pool. */
5456 if (CONSTANT_P (x))
5457 return NO_REGS;
5458 break;
5459
5460 case AR_M_REGS:
5461 case AR_I_REGS:
5462 if (!OBJECT_P (x))
5463 return NO_REGS;
5464 break;
5465
5466 default:
5467 break;
5468 }
5469
5470 return rclass;
5471 }
5472
5473 /* This function returns the register class required for a secondary
5474 register when copying between one of the registers in RCLASS, and X,
5475 using MODE. A return value of NO_REGS means that no secondary register
5476 is required. */
5477
5478 enum reg_class
5479 ia64_secondary_reload_class (enum reg_class rclass,
5480 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5481 {
5482 int regno = -1;
5483
5484 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5485 regno = true_regnum (x);
5486
5487 switch (rclass)
5488 {
5489 case BR_REGS:
5490 case AR_M_REGS:
5491 case AR_I_REGS:
5492 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5493 interaction. We end up with two pseudos with overlapping lifetimes
5494 both of which are equiv to the same constant, and both which need
5495 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5496 changes depending on the path length, which means the qty_first_reg
5497 check in make_regs_eqv can give different answers at different times.
5498 At some point I'll probably need a reload_indi pattern to handle
5499 this.
5500
5501 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5502 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5503 non-general registers for good measure. */
5504 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5505 return GR_REGS;
5506
5507 /* This is needed if a pseudo used as a call_operand gets spilled to a
5508 stack slot. */
5509 if (GET_CODE (x) == MEM)
5510 return GR_REGS;
5511 break;
5512
5513 case FR_REGS:
5514 case FP_REGS:
5515 /* Need to go through general registers to get to other class regs. */
5516 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5517 return GR_REGS;
5518
5519 /* This can happen when a paradoxical subreg is an operand to the
5520 muldi3 pattern. */
5521 /* ??? This shouldn't be necessary after instruction scheduling is
5522 enabled, because paradoxical subregs are not accepted by
5523 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5524 stop the paradoxical subreg stupidity in the *_operand functions
5525 in recog.c. */
5526 if (GET_CODE (x) == MEM
5527 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5528 || GET_MODE (x) == QImode))
5529 return GR_REGS;
5530
5531 /* This can happen because of the ior/and/etc patterns that accept FP
5532 registers as operands. If the third operand is a constant, then it
5533 needs to be reloaded into a FP register. */
5534 if (GET_CODE (x) == CONST_INT)
5535 return GR_REGS;
5536
5537 /* This can happen because of register elimination in a muldi3 insn.
5538 E.g. `26107 * (unsigned long)&u'. */
5539 if (GET_CODE (x) == PLUS)
5540 return GR_REGS;
5541 break;
5542
5543 case PR_REGS:
5544 /* ??? This happens if we cse/gcse a BImode value across a call,
5545 and the function has a nonlocal goto. This is because global
5546 does not allocate call crossing pseudos to hard registers when
5547 crtl->has_nonlocal_goto is true. This is relatively
5548 common for C++ programs that use exceptions. To reproduce,
5549 return NO_REGS and compile libstdc++. */
5550 if (GET_CODE (x) == MEM)
5551 return GR_REGS;
5552
5553 /* This can happen when we take a BImode subreg of a DImode value,
5554 and that DImode value winds up in some non-GR register. */
5555 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5556 return GR_REGS;
5557 break;
5558
5559 default:
5560 break;
5561 }
5562
5563 return NO_REGS;
5564 }
5565
5566 \f
5567 /* Implement targetm.unspec_may_trap_p hook. */
5568 static int
5569 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5570 {
5571 if (GET_CODE (x) == UNSPEC)
5572 {
5573 switch (XINT (x, 1))
5574 {
5575 case UNSPEC_LDA:
5576 case UNSPEC_LDS:
5577 case UNSPEC_LDSA:
5578 case UNSPEC_LDCCLR:
5579 case UNSPEC_CHKACLR:
5580 case UNSPEC_CHKS:
5581 /* These unspecs are just wrappers. */
5582 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5583 }
5584 }
5585
5586 return default_unspec_may_trap_p (x, flags);
5587 }
5588
5589 \f
5590 /* Parse the -mfixed-range= option string. */
5591
5592 static void
5593 fix_range (const char *const_str)
5594 {
5595 int i, first, last;
5596 char *str, *dash, *comma;
5597
5598 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5599 REG2 are either register names or register numbers. The effect
5600 of this option is to mark the registers in the range from REG1 to
5601 REG2 as ``fixed'' so they won't be used by the compiler. This is
5602 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5603
5604 i = strlen (const_str);
5605 str = (char *) alloca (i + 1);
5606 memcpy (str, const_str, i + 1);
5607
5608 while (1)
5609 {
5610 dash = strchr (str, '-');
5611 if (!dash)
5612 {
5613 warning (0, "value of -mfixed-range must have form REG1-REG2");
5614 return;
5615 }
5616 *dash = '\0';
5617
5618 comma = strchr (dash + 1, ',');
5619 if (comma)
5620 *comma = '\0';
5621
5622 first = decode_reg_name (str);
5623 if (first < 0)
5624 {
5625 warning (0, "unknown register name: %s", str);
5626 return;
5627 }
5628
5629 last = decode_reg_name (dash + 1);
5630 if (last < 0)
5631 {
5632 warning (0, "unknown register name: %s", dash + 1);
5633 return;
5634 }
5635
5636 *dash = '-';
5637
5638 if (first > last)
5639 {
5640 warning (0, "%s-%s is an empty range", str, dash + 1);
5641 return;
5642 }
5643
5644 for (i = first; i <= last; ++i)
5645 fixed_regs[i] = call_used_regs[i] = 1;
5646
5647 if (!comma)
5648 break;
5649
5650 *comma = ',';
5651 str = comma + 1;
5652 }
5653 }
5654
5655 /* Implement TARGET_HANDLE_OPTION. */
5656
5657 static bool
5658 ia64_handle_option (size_t code, const char *arg, int value)
5659 {
5660 switch (code)
5661 {
5662 case OPT_mfixed_range_:
5663 fix_range (arg);
5664 return true;
5665
5666 case OPT_mtls_size_:
5667 if (value != 14 && value != 22 && value != 64)
5668 error ("bad value %<%s%> for -mtls-size= switch", arg);
5669 return true;
5670
5671 case OPT_mtune_:
5672 {
5673 static struct pta
5674 {
5675 const char *name; /* processor name or nickname. */
5676 enum processor_type processor;
5677 }
5678 const processor_alias_table[] =
5679 {
5680 {"itanium2", PROCESSOR_ITANIUM2},
5681 {"mckinley", PROCESSOR_ITANIUM2},
5682 };
5683 int const pta_size = ARRAY_SIZE (processor_alias_table);
5684 int i;
5685
5686 for (i = 0; i < pta_size; i++)
5687 if (!strcmp (arg, processor_alias_table[i].name))
5688 {
5689 ia64_tune = processor_alias_table[i].processor;
5690 break;
5691 }
5692 if (i == pta_size)
5693 error ("bad value %<%s%> for -mtune= switch", arg);
5694 return true;
5695 }
5696
5697 default:
5698 return true;
5699 }
5700 }
5701
5702 /* Implement TARGET_OPTION_OVERRIDE. */
5703
5704 static void
5705 ia64_option_override (void)
5706 {
5707 if (TARGET_AUTO_PIC)
5708 target_flags |= MASK_CONST_GP;
5709
5710 /* Numerous experiment shows that IRA based loop pressure
5711 calculation works better for RTL loop invariant motion on targets
5712 with enough (>= 32) registers. It is an expensive optimization.
5713 So it is on only for peak performance. */
5714 if (optimize >= 3)
5715 flag_ira_loop_pressure = 1;
5716
5717
5718 ia64_section_threshold = (global_options_set.x_g_switch_value
5719 ? g_switch_value
5720 : IA64_DEFAULT_GVALUE);
5721
5722 init_machine_status = ia64_init_machine_status;
5723
5724 if (align_functions <= 0)
5725 align_functions = 64;
5726 if (align_loops <= 0)
5727 align_loops = 32;
5728 if (TARGET_ABI_OPEN_VMS)
5729 flag_no_common = 1;
5730
5731 ia64_override_options_after_change();
5732 }
5733
5734 /* Implement targetm.override_options_after_change. */
5735
5736 static void
5737 ia64_override_options_after_change (void)
5738 {
5739 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5740 flag_schedule_insns_after_reload = 0;
5741
5742 if (optimize >= 3
5743 && !global_options_set.x_flag_selective_scheduling
5744 && !global_options_set.x_flag_selective_scheduling2)
5745 {
5746 flag_selective_scheduling2 = 1;
5747 flag_sel_sched_pipelining = 1;
5748 }
5749 if (mflag_sched_control_spec == 2)
5750 {
5751 /* Control speculation is on by default for the selective scheduler,
5752 but not for the Haifa scheduler. */
5753 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5754 }
5755 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5756 {
5757 /* FIXME: remove this when we'd implement breaking autoinsns as
5758 a transformation. */
5759 flag_auto_inc_dec = 0;
5760 }
5761 }
5762
5763 /* Initialize the record of emitted frame related registers. */
5764
5765 void ia64_init_expanders (void)
5766 {
5767 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5768 }
5769
5770 static struct machine_function *
5771 ia64_init_machine_status (void)
5772 {
5773 return ggc_alloc_cleared_machine_function ();
5774 }
5775 \f
5776 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5777 static enum attr_type ia64_safe_type (rtx);
5778
5779 static enum attr_itanium_class
5780 ia64_safe_itanium_class (rtx insn)
5781 {
5782 if (recog_memoized (insn) >= 0)
5783 return get_attr_itanium_class (insn);
5784 else if (DEBUG_INSN_P (insn))
5785 return ITANIUM_CLASS_IGNORE;
5786 else
5787 return ITANIUM_CLASS_UNKNOWN;
5788 }
5789
5790 static enum attr_type
5791 ia64_safe_type (rtx insn)
5792 {
5793 if (recog_memoized (insn) >= 0)
5794 return get_attr_type (insn);
5795 else
5796 return TYPE_UNKNOWN;
5797 }
5798 \f
5799 /* The following collection of routines emit instruction group stop bits as
5800 necessary to avoid dependencies. */
5801
5802 /* Need to track some additional registers as far as serialization is
5803 concerned so we can properly handle br.call and br.ret. We could
5804 make these registers visible to gcc, but since these registers are
5805 never explicitly used in gcc generated code, it seems wasteful to
5806 do so (plus it would make the call and return patterns needlessly
5807 complex). */
5808 #define REG_RP (BR_REG (0))
5809 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5810 /* This is used for volatile asms which may require a stop bit immediately
5811 before and after them. */
5812 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5813 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5814 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5815
5816 /* For each register, we keep track of how it has been written in the
5817 current instruction group.
5818
5819 If a register is written unconditionally (no qualifying predicate),
5820 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5821
5822 If a register is written if its qualifying predicate P is true, we
5823 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5824 may be written again by the complement of P (P^1) and when this happens,
5825 WRITE_COUNT gets set to 2.
5826
5827 The result of this is that whenever an insn attempts to write a register
5828 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5829
5830 If a predicate register is written by a floating-point insn, we set
5831 WRITTEN_BY_FP to true.
5832
5833 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5834 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5835
5836 #if GCC_VERSION >= 4000
5837 #define RWS_FIELD_TYPE __extension__ unsigned short
5838 #else
5839 #define RWS_FIELD_TYPE unsigned int
5840 #endif
5841 struct reg_write_state
5842 {
5843 RWS_FIELD_TYPE write_count : 2;
5844 RWS_FIELD_TYPE first_pred : 10;
5845 RWS_FIELD_TYPE written_by_fp : 1;
5846 RWS_FIELD_TYPE written_by_and : 1;
5847 RWS_FIELD_TYPE written_by_or : 1;
5848 };
5849
5850 /* Cumulative info for the current instruction group. */
5851 struct reg_write_state rws_sum[NUM_REGS];
5852 #ifdef ENABLE_CHECKING
5853 /* Bitmap whether a register has been written in the current insn. */
5854 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5855 / HOST_BITS_PER_WIDEST_FAST_INT];
5856
5857 static inline void
5858 rws_insn_set (int regno)
5859 {
5860 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5861 SET_HARD_REG_BIT (rws_insn, regno);
5862 }
5863
5864 static inline int
5865 rws_insn_test (int regno)
5866 {
5867 return TEST_HARD_REG_BIT (rws_insn, regno);
5868 }
5869 #else
5870 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5871 unsigned char rws_insn[2];
5872
5873 static inline void
5874 rws_insn_set (int regno)
5875 {
5876 if (regno == REG_AR_CFM)
5877 rws_insn[0] = 1;
5878 else if (regno == REG_VOLATILE)
5879 rws_insn[1] = 1;
5880 }
5881
5882 static inline int
5883 rws_insn_test (int regno)
5884 {
5885 if (regno == REG_AR_CFM)
5886 return rws_insn[0];
5887 if (regno == REG_VOLATILE)
5888 return rws_insn[1];
5889 return 0;
5890 }
5891 #endif
5892
5893 /* Indicates whether this is the first instruction after a stop bit,
5894 in which case we don't need another stop bit. Without this,
5895 ia64_variable_issue will die when scheduling an alloc. */
5896 static int first_instruction;
5897
5898 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5899 RTL for one instruction. */
5900 struct reg_flags
5901 {
5902 unsigned int is_write : 1; /* Is register being written? */
5903 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5904 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5905 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5906 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5907 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5908 };
5909
5910 static void rws_update (int, struct reg_flags, int);
5911 static int rws_access_regno (int, struct reg_flags, int);
5912 static int rws_access_reg (rtx, struct reg_flags, int);
5913 static void update_set_flags (rtx, struct reg_flags *);
5914 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5915 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5916 static void init_insn_group_barriers (void);
5917 static int group_barrier_needed (rtx);
5918 static int safe_group_barrier_needed (rtx);
5919 static int in_safe_group_barrier;
5920
5921 /* Update *RWS for REGNO, which is being written by the current instruction,
5922 with predicate PRED, and associated register flags in FLAGS. */
5923
5924 static void
5925 rws_update (int regno, struct reg_flags flags, int pred)
5926 {
5927 if (pred)
5928 rws_sum[regno].write_count++;
5929 else
5930 rws_sum[regno].write_count = 2;
5931 rws_sum[regno].written_by_fp |= flags.is_fp;
5932 /* ??? Not tracking and/or across differing predicates. */
5933 rws_sum[regno].written_by_and = flags.is_and;
5934 rws_sum[regno].written_by_or = flags.is_or;
5935 rws_sum[regno].first_pred = pred;
5936 }
5937
5938 /* Handle an access to register REGNO of type FLAGS using predicate register
5939 PRED. Update rws_sum array. Return 1 if this access creates
5940 a dependency with an earlier instruction in the same group. */
5941
5942 static int
5943 rws_access_regno (int regno, struct reg_flags flags, int pred)
5944 {
5945 int need_barrier = 0;
5946
5947 gcc_assert (regno < NUM_REGS);
5948
5949 if (! PR_REGNO_P (regno))
5950 flags.is_and = flags.is_or = 0;
5951
5952 if (flags.is_write)
5953 {
5954 int write_count;
5955
5956 rws_insn_set (regno);
5957 write_count = rws_sum[regno].write_count;
5958
5959 switch (write_count)
5960 {
5961 case 0:
5962 /* The register has not been written yet. */
5963 if (!in_safe_group_barrier)
5964 rws_update (regno, flags, pred);
5965 break;
5966
5967 case 1:
5968 /* The register has been written via a predicate. Treat
5969 it like a unconditional write and do not try to check
5970 for complementary pred reg in earlier write. */
5971 if (flags.is_and && rws_sum[regno].written_by_and)
5972 ;
5973 else if (flags.is_or && rws_sum[regno].written_by_or)
5974 ;
5975 else
5976 need_barrier = 1;
5977 if (!in_safe_group_barrier)
5978 rws_update (regno, flags, pred);
5979 break;
5980
5981 case 2:
5982 /* The register has been unconditionally written already. We
5983 need a barrier. */
5984 if (flags.is_and && rws_sum[regno].written_by_and)
5985 ;
5986 else if (flags.is_or && rws_sum[regno].written_by_or)
5987 ;
5988 else
5989 need_barrier = 1;
5990 if (!in_safe_group_barrier)
5991 {
5992 rws_sum[regno].written_by_and = flags.is_and;
5993 rws_sum[regno].written_by_or = flags.is_or;
5994 }
5995 break;
5996
5997 default:
5998 gcc_unreachable ();
5999 }
6000 }
6001 else
6002 {
6003 if (flags.is_branch)
6004 {
6005 /* Branches have several RAW exceptions that allow to avoid
6006 barriers. */
6007
6008 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6009 /* RAW dependencies on branch regs are permissible as long
6010 as the writer is a non-branch instruction. Since we
6011 never generate code that uses a branch register written
6012 by a branch instruction, handling this case is
6013 easy. */
6014 return 0;
6015
6016 if (REGNO_REG_CLASS (regno) == PR_REGS
6017 && ! rws_sum[regno].written_by_fp)
6018 /* The predicates of a branch are available within the
6019 same insn group as long as the predicate was written by
6020 something other than a floating-point instruction. */
6021 return 0;
6022 }
6023
6024 if (flags.is_and && rws_sum[regno].written_by_and)
6025 return 0;
6026 if (flags.is_or && rws_sum[regno].written_by_or)
6027 return 0;
6028
6029 switch (rws_sum[regno].write_count)
6030 {
6031 case 0:
6032 /* The register has not been written yet. */
6033 break;
6034
6035 case 1:
6036 /* The register has been written via a predicate, assume we
6037 need a barrier (don't check for complementary regs). */
6038 need_barrier = 1;
6039 break;
6040
6041 case 2:
6042 /* The register has been unconditionally written already. We
6043 need a barrier. */
6044 need_barrier = 1;
6045 break;
6046
6047 default:
6048 gcc_unreachable ();
6049 }
6050 }
6051
6052 return need_barrier;
6053 }
6054
6055 static int
6056 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6057 {
6058 int regno = REGNO (reg);
6059 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6060
6061 if (n == 1)
6062 return rws_access_regno (regno, flags, pred);
6063 else
6064 {
6065 int need_barrier = 0;
6066 while (--n >= 0)
6067 need_barrier |= rws_access_regno (regno + n, flags, pred);
6068 return need_barrier;
6069 }
6070 }
6071
6072 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6073 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6074
6075 static void
6076 update_set_flags (rtx x, struct reg_flags *pflags)
6077 {
6078 rtx src = SET_SRC (x);
6079
6080 switch (GET_CODE (src))
6081 {
6082 case CALL:
6083 return;
6084
6085 case IF_THEN_ELSE:
6086 /* There are four cases here:
6087 (1) The destination is (pc), in which case this is a branch,
6088 nothing here applies.
6089 (2) The destination is ar.lc, in which case this is a
6090 doloop_end_internal,
6091 (3) The destination is an fp register, in which case this is
6092 an fselect instruction.
6093 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6094 this is a check load.
6095 In all cases, nothing we do in this function applies. */
6096 return;
6097
6098 default:
6099 if (COMPARISON_P (src)
6100 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6101 /* Set pflags->is_fp to 1 so that we know we're dealing
6102 with a floating point comparison when processing the
6103 destination of the SET. */
6104 pflags->is_fp = 1;
6105
6106 /* Discover if this is a parallel comparison. We only handle
6107 and.orcm and or.andcm at present, since we must retain a
6108 strict inverse on the predicate pair. */
6109 else if (GET_CODE (src) == AND)
6110 pflags->is_and = 1;
6111 else if (GET_CODE (src) == IOR)
6112 pflags->is_or = 1;
6113
6114 break;
6115 }
6116 }
6117
6118 /* Subroutine of rtx_needs_barrier; this function determines whether the
6119 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6120 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6121 for this insn. */
6122
6123 static int
6124 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6125 {
6126 int need_barrier = 0;
6127 rtx dst;
6128 rtx src = SET_SRC (x);
6129
6130 if (GET_CODE (src) == CALL)
6131 /* We don't need to worry about the result registers that
6132 get written by subroutine call. */
6133 return rtx_needs_barrier (src, flags, pred);
6134 else if (SET_DEST (x) == pc_rtx)
6135 {
6136 /* X is a conditional branch. */
6137 /* ??? This seems redundant, as the caller sets this bit for
6138 all JUMP_INSNs. */
6139 if (!ia64_spec_check_src_p (src))
6140 flags.is_branch = 1;
6141 return rtx_needs_barrier (src, flags, pred);
6142 }
6143
6144 if (ia64_spec_check_src_p (src))
6145 /* Avoid checking one register twice (in condition
6146 and in 'then' section) for ldc pattern. */
6147 {
6148 gcc_assert (REG_P (XEXP (src, 2)));
6149 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6150
6151 /* We process MEM below. */
6152 src = XEXP (src, 1);
6153 }
6154
6155 need_barrier |= rtx_needs_barrier (src, flags, pred);
6156
6157 dst = SET_DEST (x);
6158 if (GET_CODE (dst) == ZERO_EXTRACT)
6159 {
6160 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6161 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6162 }
6163 return need_barrier;
6164 }
6165
6166 /* Handle an access to rtx X of type FLAGS using predicate register
6167 PRED. Return 1 if this access creates a dependency with an earlier
6168 instruction in the same group. */
6169
6170 static int
6171 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6172 {
6173 int i, j;
6174 int is_complemented = 0;
6175 int need_barrier = 0;
6176 const char *format_ptr;
6177 struct reg_flags new_flags;
6178 rtx cond;
6179
6180 if (! x)
6181 return 0;
6182
6183 new_flags = flags;
6184
6185 switch (GET_CODE (x))
6186 {
6187 case SET:
6188 update_set_flags (x, &new_flags);
6189 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6190 if (GET_CODE (SET_SRC (x)) != CALL)
6191 {
6192 new_flags.is_write = 1;
6193 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6194 }
6195 break;
6196
6197 case CALL:
6198 new_flags.is_write = 0;
6199 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6200
6201 /* Avoid multiple register writes, in case this is a pattern with
6202 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6203 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6204 {
6205 new_flags.is_write = 1;
6206 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6207 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6208 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6209 }
6210 break;
6211
6212 case COND_EXEC:
6213 /* X is a predicated instruction. */
6214
6215 cond = COND_EXEC_TEST (x);
6216 gcc_assert (!pred);
6217 need_barrier = rtx_needs_barrier (cond, flags, 0);
6218
6219 if (GET_CODE (cond) == EQ)
6220 is_complemented = 1;
6221 cond = XEXP (cond, 0);
6222 gcc_assert (GET_CODE (cond) == REG
6223 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6224 pred = REGNO (cond);
6225 if (is_complemented)
6226 ++pred;
6227
6228 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6229 return need_barrier;
6230
6231 case CLOBBER:
6232 case USE:
6233 /* Clobber & use are for earlier compiler-phases only. */
6234 break;
6235
6236 case ASM_OPERANDS:
6237 case ASM_INPUT:
6238 /* We always emit stop bits for traditional asms. We emit stop bits
6239 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6240 if (GET_CODE (x) != ASM_OPERANDS
6241 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6242 {
6243 /* Avoid writing the register multiple times if we have multiple
6244 asm outputs. This avoids a failure in rws_access_reg. */
6245 if (! rws_insn_test (REG_VOLATILE))
6246 {
6247 new_flags.is_write = 1;
6248 rws_access_regno (REG_VOLATILE, new_flags, pred);
6249 }
6250 return 1;
6251 }
6252
6253 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6254 We cannot just fall through here since then we would be confused
6255 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6256 traditional asms unlike their normal usage. */
6257
6258 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6259 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6260 need_barrier = 1;
6261 break;
6262
6263 case PARALLEL:
6264 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6265 {
6266 rtx pat = XVECEXP (x, 0, i);
6267 switch (GET_CODE (pat))
6268 {
6269 case SET:
6270 update_set_flags (pat, &new_flags);
6271 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6272 break;
6273
6274 case USE:
6275 case CALL:
6276 case ASM_OPERANDS:
6277 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6278 break;
6279
6280 case CLOBBER:
6281 if (REG_P (XEXP (pat, 0))
6282 && extract_asm_operands (x) != NULL_RTX
6283 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6284 {
6285 new_flags.is_write = 1;
6286 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6287 new_flags, pred);
6288 new_flags = flags;
6289 }
6290 break;
6291
6292 case RETURN:
6293 break;
6294
6295 default:
6296 gcc_unreachable ();
6297 }
6298 }
6299 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6300 {
6301 rtx pat = XVECEXP (x, 0, i);
6302 if (GET_CODE (pat) == SET)
6303 {
6304 if (GET_CODE (SET_SRC (pat)) != CALL)
6305 {
6306 new_flags.is_write = 1;
6307 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6308 pred);
6309 }
6310 }
6311 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6312 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6313 }
6314 break;
6315
6316 case SUBREG:
6317 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6318 break;
6319 case REG:
6320 if (REGNO (x) == AR_UNAT_REGNUM)
6321 {
6322 for (i = 0; i < 64; ++i)
6323 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6324 }
6325 else
6326 need_barrier = rws_access_reg (x, flags, pred);
6327 break;
6328
6329 case MEM:
6330 /* Find the regs used in memory address computation. */
6331 new_flags.is_write = 0;
6332 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6333 break;
6334
6335 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6336 case SYMBOL_REF: case LABEL_REF: case CONST:
6337 break;
6338
6339 /* Operators with side-effects. */
6340 case POST_INC: case POST_DEC:
6341 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6342
6343 new_flags.is_write = 0;
6344 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6345 new_flags.is_write = 1;
6346 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6347 break;
6348
6349 case POST_MODIFY:
6350 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6351
6352 new_flags.is_write = 0;
6353 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6354 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6355 new_flags.is_write = 1;
6356 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6357 break;
6358
6359 /* Handle common unary and binary ops for efficiency. */
6360 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6361 case MOD: case UDIV: case UMOD: case AND: case IOR:
6362 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6363 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6364 case NE: case EQ: case GE: case GT: case LE:
6365 case LT: case GEU: case GTU: case LEU: case LTU:
6366 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6367 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6368 break;
6369
6370 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6371 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6372 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6373 case SQRT: case FFS: case POPCOUNT:
6374 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6375 break;
6376
6377 case VEC_SELECT:
6378 /* VEC_SELECT's second argument is a PARALLEL with integers that
6379 describe the elements selected. On ia64, those integers are
6380 always constants. Avoid walking the PARALLEL so that we don't
6381 get confused with "normal" parallels and then die. */
6382 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6383 break;
6384
6385 case UNSPEC:
6386 switch (XINT (x, 1))
6387 {
6388 case UNSPEC_LTOFF_DTPMOD:
6389 case UNSPEC_LTOFF_DTPREL:
6390 case UNSPEC_DTPREL:
6391 case UNSPEC_LTOFF_TPREL:
6392 case UNSPEC_TPREL:
6393 case UNSPEC_PRED_REL_MUTEX:
6394 case UNSPEC_PIC_CALL:
6395 case UNSPEC_MF:
6396 case UNSPEC_FETCHADD_ACQ:
6397 case UNSPEC_BSP_VALUE:
6398 case UNSPEC_FLUSHRS:
6399 case UNSPEC_BUNDLE_SELECTOR:
6400 break;
6401
6402 case UNSPEC_GR_SPILL:
6403 case UNSPEC_GR_RESTORE:
6404 {
6405 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6406 HOST_WIDE_INT bit = (offset >> 3) & 63;
6407
6408 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6409 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6410 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6411 new_flags, pred);
6412 break;
6413 }
6414
6415 case UNSPEC_FR_SPILL:
6416 case UNSPEC_FR_RESTORE:
6417 case UNSPEC_GETF_EXP:
6418 case UNSPEC_SETF_EXP:
6419 case UNSPEC_ADDP4:
6420 case UNSPEC_FR_SQRT_RECIP_APPROX:
6421 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6422 case UNSPEC_LDA:
6423 case UNSPEC_LDS:
6424 case UNSPEC_LDS_A:
6425 case UNSPEC_LDSA:
6426 case UNSPEC_CHKACLR:
6427 case UNSPEC_CHKS:
6428 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6429 break;
6430
6431 case UNSPEC_FR_RECIP_APPROX:
6432 case UNSPEC_SHRP:
6433 case UNSPEC_COPYSIGN:
6434 case UNSPEC_FR_RECIP_APPROX_RES:
6435 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6436 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6437 break;
6438
6439 case UNSPEC_CMPXCHG_ACQ:
6440 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6441 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6442 break;
6443
6444 default:
6445 gcc_unreachable ();
6446 }
6447 break;
6448
6449 case UNSPEC_VOLATILE:
6450 switch (XINT (x, 1))
6451 {
6452 case UNSPECV_ALLOC:
6453 /* Alloc must always be the first instruction of a group.
6454 We force this by always returning true. */
6455 /* ??? We might get better scheduling if we explicitly check for
6456 input/local/output register dependencies, and modify the
6457 scheduler so that alloc is always reordered to the start of
6458 the current group. We could then eliminate all of the
6459 first_instruction code. */
6460 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6461
6462 new_flags.is_write = 1;
6463 rws_access_regno (REG_AR_CFM, new_flags, pred);
6464 return 1;
6465
6466 case UNSPECV_SET_BSP:
6467 need_barrier = 1;
6468 break;
6469
6470 case UNSPECV_BLOCKAGE:
6471 case UNSPECV_INSN_GROUP_BARRIER:
6472 case UNSPECV_BREAK:
6473 case UNSPECV_PSAC_ALL:
6474 case UNSPECV_PSAC_NORMAL:
6475 return 0;
6476
6477 default:
6478 gcc_unreachable ();
6479 }
6480 break;
6481
6482 case RETURN:
6483 new_flags.is_write = 0;
6484 need_barrier = rws_access_regno (REG_RP, flags, pred);
6485 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6486
6487 new_flags.is_write = 1;
6488 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6489 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6490 break;
6491
6492 default:
6493 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6494 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6495 switch (format_ptr[i])
6496 {
6497 case '0': /* unused field */
6498 case 'i': /* integer */
6499 case 'n': /* note */
6500 case 'w': /* wide integer */
6501 case 's': /* pointer to string */
6502 case 'S': /* optional pointer to string */
6503 break;
6504
6505 case 'e':
6506 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6507 need_barrier = 1;
6508 break;
6509
6510 case 'E':
6511 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6512 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6513 need_barrier = 1;
6514 break;
6515
6516 default:
6517 gcc_unreachable ();
6518 }
6519 break;
6520 }
6521 return need_barrier;
6522 }
6523
6524 /* Clear out the state for group_barrier_needed at the start of a
6525 sequence of insns. */
6526
6527 static void
6528 init_insn_group_barriers (void)
6529 {
6530 memset (rws_sum, 0, sizeof (rws_sum));
6531 first_instruction = 1;
6532 }
6533
6534 /* Given the current state, determine whether a group barrier (a stop bit) is
6535 necessary before INSN. Return nonzero if so. This modifies the state to
6536 include the effects of INSN as a side-effect. */
6537
6538 static int
6539 group_barrier_needed (rtx insn)
6540 {
6541 rtx pat;
6542 int need_barrier = 0;
6543 struct reg_flags flags;
6544
6545 memset (&flags, 0, sizeof (flags));
6546 switch (GET_CODE (insn))
6547 {
6548 case NOTE:
6549 case DEBUG_INSN:
6550 break;
6551
6552 case BARRIER:
6553 /* A barrier doesn't imply an instruction group boundary. */
6554 break;
6555
6556 case CODE_LABEL:
6557 memset (rws_insn, 0, sizeof (rws_insn));
6558 return 1;
6559
6560 case CALL_INSN:
6561 flags.is_branch = 1;
6562 flags.is_sibcall = SIBLING_CALL_P (insn);
6563 memset (rws_insn, 0, sizeof (rws_insn));
6564
6565 /* Don't bundle a call following another call. */
6566 if ((pat = prev_active_insn (insn))
6567 && GET_CODE (pat) == CALL_INSN)
6568 {
6569 need_barrier = 1;
6570 break;
6571 }
6572
6573 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6574 break;
6575
6576 case JUMP_INSN:
6577 if (!ia64_spec_check_p (insn))
6578 flags.is_branch = 1;
6579
6580 /* Don't bundle a jump following a call. */
6581 if ((pat = prev_active_insn (insn))
6582 && GET_CODE (pat) == CALL_INSN)
6583 {
6584 need_barrier = 1;
6585 break;
6586 }
6587 /* FALLTHRU */
6588
6589 case INSN:
6590 if (GET_CODE (PATTERN (insn)) == USE
6591 || GET_CODE (PATTERN (insn)) == CLOBBER)
6592 /* Don't care about USE and CLOBBER "insns"---those are used to
6593 indicate to the optimizer that it shouldn't get rid of
6594 certain operations. */
6595 break;
6596
6597 pat = PATTERN (insn);
6598
6599 /* Ug. Hack hacks hacked elsewhere. */
6600 switch (recog_memoized (insn))
6601 {
6602 /* We play dependency tricks with the epilogue in order
6603 to get proper schedules. Undo this for dv analysis. */
6604 case CODE_FOR_epilogue_deallocate_stack:
6605 case CODE_FOR_prologue_allocate_stack:
6606 pat = XVECEXP (pat, 0, 0);
6607 break;
6608
6609 /* The pattern we use for br.cloop confuses the code above.
6610 The second element of the vector is representative. */
6611 case CODE_FOR_doloop_end_internal:
6612 pat = XVECEXP (pat, 0, 1);
6613 break;
6614
6615 /* Doesn't generate code. */
6616 case CODE_FOR_pred_rel_mutex:
6617 case CODE_FOR_prologue_use:
6618 return 0;
6619
6620 default:
6621 break;
6622 }
6623
6624 memset (rws_insn, 0, sizeof (rws_insn));
6625 need_barrier = rtx_needs_barrier (pat, flags, 0);
6626
6627 /* Check to see if the previous instruction was a volatile
6628 asm. */
6629 if (! need_barrier)
6630 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6631
6632 break;
6633
6634 default:
6635 gcc_unreachable ();
6636 }
6637
6638 if (first_instruction && INSN_P (insn)
6639 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6640 && GET_CODE (PATTERN (insn)) != USE
6641 && GET_CODE (PATTERN (insn)) != CLOBBER)
6642 {
6643 need_barrier = 0;
6644 first_instruction = 0;
6645 }
6646
6647 return need_barrier;
6648 }
6649
6650 /* Like group_barrier_needed, but do not clobber the current state. */
6651
6652 static int
6653 safe_group_barrier_needed (rtx insn)
6654 {
6655 int saved_first_instruction;
6656 int t;
6657
6658 saved_first_instruction = first_instruction;
6659 in_safe_group_barrier = 1;
6660
6661 t = group_barrier_needed (insn);
6662
6663 first_instruction = saved_first_instruction;
6664 in_safe_group_barrier = 0;
6665
6666 return t;
6667 }
6668
6669 /* Scan the current function and insert stop bits as necessary to
6670 eliminate dependencies. This function assumes that a final
6671 instruction scheduling pass has been run which has already
6672 inserted most of the necessary stop bits. This function only
6673 inserts new ones at basic block boundaries, since these are
6674 invisible to the scheduler. */
6675
6676 static void
6677 emit_insn_group_barriers (FILE *dump)
6678 {
6679 rtx insn;
6680 rtx last_label = 0;
6681 int insns_since_last_label = 0;
6682
6683 init_insn_group_barriers ();
6684
6685 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6686 {
6687 if (GET_CODE (insn) == CODE_LABEL)
6688 {
6689 if (insns_since_last_label)
6690 last_label = insn;
6691 insns_since_last_label = 0;
6692 }
6693 else if (GET_CODE (insn) == NOTE
6694 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6695 {
6696 if (insns_since_last_label)
6697 last_label = insn;
6698 insns_since_last_label = 0;
6699 }
6700 else if (GET_CODE (insn) == INSN
6701 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6702 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6703 {
6704 init_insn_group_barriers ();
6705 last_label = 0;
6706 }
6707 else if (NONDEBUG_INSN_P (insn))
6708 {
6709 insns_since_last_label = 1;
6710
6711 if (group_barrier_needed (insn))
6712 {
6713 if (last_label)
6714 {
6715 if (dump)
6716 fprintf (dump, "Emitting stop before label %d\n",
6717 INSN_UID (last_label));
6718 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6719 insn = last_label;
6720
6721 init_insn_group_barriers ();
6722 last_label = 0;
6723 }
6724 }
6725 }
6726 }
6727 }
6728
6729 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6730 This function has to emit all necessary group barriers. */
6731
6732 static void
6733 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6734 {
6735 rtx insn;
6736
6737 init_insn_group_barriers ();
6738
6739 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6740 {
6741 if (GET_CODE (insn) == BARRIER)
6742 {
6743 rtx last = prev_active_insn (insn);
6744
6745 if (! last)
6746 continue;
6747 if (GET_CODE (last) == JUMP_INSN
6748 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6749 last = prev_active_insn (last);
6750 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6751 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6752
6753 init_insn_group_barriers ();
6754 }
6755 else if (NONDEBUG_INSN_P (insn))
6756 {
6757 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6758 init_insn_group_barriers ();
6759 else if (group_barrier_needed (insn))
6760 {
6761 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6762 init_insn_group_barriers ();
6763 group_barrier_needed (insn);
6764 }
6765 }
6766 }
6767 }
6768
6769 \f
6770
6771 /* Instruction scheduling support. */
6772
6773 #define NR_BUNDLES 10
6774
6775 /* A list of names of all available bundles. */
6776
6777 static const char *bundle_name [NR_BUNDLES] =
6778 {
6779 ".mii",
6780 ".mmi",
6781 ".mfi",
6782 ".mmf",
6783 #if NR_BUNDLES == 10
6784 ".bbb",
6785 ".mbb",
6786 #endif
6787 ".mib",
6788 ".mmb",
6789 ".mfb",
6790 ".mlx"
6791 };
6792
6793 /* Nonzero if we should insert stop bits into the schedule. */
6794
6795 int ia64_final_schedule = 0;
6796
6797 /* Codes of the corresponding queried units: */
6798
6799 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6800 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6801
6802 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6803 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6804
6805 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6806
6807 /* The following variable value is an insn group barrier. */
6808
6809 static rtx dfa_stop_insn;
6810
6811 /* The following variable value is the last issued insn. */
6812
6813 static rtx last_scheduled_insn;
6814
6815 /* The following variable value is pointer to a DFA state used as
6816 temporary variable. */
6817
6818 static state_t temp_dfa_state = NULL;
6819
6820 /* The following variable value is DFA state after issuing the last
6821 insn. */
6822
6823 static state_t prev_cycle_state = NULL;
6824
6825 /* The following array element values are TRUE if the corresponding
6826 insn requires to add stop bits before it. */
6827
6828 static char *stops_p = NULL;
6829
6830 /* The following variable is used to set up the mentioned above array. */
6831
6832 static int stop_before_p = 0;
6833
6834 /* The following variable value is length of the arrays `clocks' and
6835 `add_cycles'. */
6836
6837 static int clocks_length;
6838
6839 /* The following variable value is number of data speculations in progress. */
6840 static int pending_data_specs = 0;
6841
6842 /* Number of memory references on current and three future processor cycles. */
6843 static char mem_ops_in_group[4];
6844
6845 /* Number of current processor cycle (from scheduler's point of view). */
6846 static int current_cycle;
6847
6848 static rtx ia64_single_set (rtx);
6849 static void ia64_emit_insn_before (rtx, rtx);
6850
6851 /* Map a bundle number to its pseudo-op. */
6852
6853 const char *
6854 get_bundle_name (int b)
6855 {
6856 return bundle_name[b];
6857 }
6858
6859
6860 /* Return the maximum number of instructions a cpu can issue. */
6861
6862 static int
6863 ia64_issue_rate (void)
6864 {
6865 return 6;
6866 }
6867
6868 /* Helper function - like single_set, but look inside COND_EXEC. */
6869
6870 static rtx
6871 ia64_single_set (rtx insn)
6872 {
6873 rtx x = PATTERN (insn), ret;
6874 if (GET_CODE (x) == COND_EXEC)
6875 x = COND_EXEC_CODE (x);
6876 if (GET_CODE (x) == SET)
6877 return x;
6878
6879 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6880 Although they are not classical single set, the second set is there just
6881 to protect it from moving past FP-relative stack accesses. */
6882 switch (recog_memoized (insn))
6883 {
6884 case CODE_FOR_prologue_allocate_stack:
6885 case CODE_FOR_epilogue_deallocate_stack:
6886 ret = XVECEXP (x, 0, 0);
6887 break;
6888
6889 default:
6890 ret = single_set_2 (insn, x);
6891 break;
6892 }
6893
6894 return ret;
6895 }
6896
6897 /* Adjust the cost of a scheduling dependency.
6898 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6899 COST is the current cost, DW is dependency weakness. */
6900 static int
6901 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6902 {
6903 enum reg_note dep_type = (enum reg_note) dep_type1;
6904 enum attr_itanium_class dep_class;
6905 enum attr_itanium_class insn_class;
6906
6907 insn_class = ia64_safe_itanium_class (insn);
6908 dep_class = ia64_safe_itanium_class (dep_insn);
6909
6910 /* Treat true memory dependencies separately. Ignore apparent true
6911 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6912 if (dep_type == REG_DEP_TRUE
6913 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6914 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6915 return 0;
6916
6917 if (dw == MIN_DEP_WEAK)
6918 /* Store and load are likely to alias, use higher cost to avoid stall. */
6919 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6920 else if (dw > MIN_DEP_WEAK)
6921 {
6922 /* Store and load are less likely to alias. */
6923 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6924 /* Assume there will be no cache conflict for floating-point data.
6925 For integer data, L1 conflict penalty is huge (17 cycles), so we
6926 never assume it will not cause a conflict. */
6927 return 0;
6928 else
6929 return cost;
6930 }
6931
6932 if (dep_type != REG_DEP_OUTPUT)
6933 return cost;
6934
6935 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6936 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6937 return 0;
6938
6939 return cost;
6940 }
6941
6942 /* Like emit_insn_before, but skip cycle_display notes.
6943 ??? When cycle display notes are implemented, update this. */
6944
6945 static void
6946 ia64_emit_insn_before (rtx insn, rtx before)
6947 {
6948 emit_insn_before (insn, before);
6949 }
6950
6951 /* The following function marks insns who produce addresses for load
6952 and store insns. Such insns will be placed into M slots because it
6953 decrease latency time for Itanium1 (see function
6954 `ia64_produce_address_p' and the DFA descriptions). */
6955
6956 static void
6957 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6958 {
6959 rtx insn, next, next_tail;
6960
6961 /* Before reload, which_alternative is not set, which means that
6962 ia64_safe_itanium_class will produce wrong results for (at least)
6963 move instructions. */
6964 if (!reload_completed)
6965 return;
6966
6967 next_tail = NEXT_INSN (tail);
6968 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6969 if (INSN_P (insn))
6970 insn->call = 0;
6971 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6972 if (INSN_P (insn)
6973 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6974 {
6975 sd_iterator_def sd_it;
6976 dep_t dep;
6977 bool has_mem_op_consumer_p = false;
6978
6979 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6980 {
6981 enum attr_itanium_class c;
6982
6983 if (DEP_TYPE (dep) != REG_DEP_TRUE)
6984 continue;
6985
6986 next = DEP_CON (dep);
6987 c = ia64_safe_itanium_class (next);
6988 if ((c == ITANIUM_CLASS_ST
6989 || c == ITANIUM_CLASS_STF)
6990 && ia64_st_address_bypass_p (insn, next))
6991 {
6992 has_mem_op_consumer_p = true;
6993 break;
6994 }
6995 else if ((c == ITANIUM_CLASS_LD
6996 || c == ITANIUM_CLASS_FLD
6997 || c == ITANIUM_CLASS_FLDP)
6998 && ia64_ld_address_bypass_p (insn, next))
6999 {
7000 has_mem_op_consumer_p = true;
7001 break;
7002 }
7003 }
7004
7005 insn->call = has_mem_op_consumer_p;
7006 }
7007 }
7008
7009 /* We're beginning a new block. Initialize data structures as necessary. */
7010
7011 static void
7012 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7013 int sched_verbose ATTRIBUTE_UNUSED,
7014 int max_ready ATTRIBUTE_UNUSED)
7015 {
7016 #ifdef ENABLE_CHECKING
7017 rtx insn;
7018
7019 if (!sel_sched_p () && reload_completed)
7020 for (insn = NEXT_INSN (current_sched_info->prev_head);
7021 insn != current_sched_info->next_tail;
7022 insn = NEXT_INSN (insn))
7023 gcc_assert (!SCHED_GROUP_P (insn));
7024 #endif
7025 last_scheduled_insn = NULL_RTX;
7026 init_insn_group_barriers ();
7027
7028 current_cycle = 0;
7029 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7030 }
7031
7032 /* We're beginning a scheduling pass. Check assertion. */
7033
7034 static void
7035 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7036 int sched_verbose ATTRIBUTE_UNUSED,
7037 int max_ready ATTRIBUTE_UNUSED)
7038 {
7039 gcc_assert (pending_data_specs == 0);
7040 }
7041
7042 /* Scheduling pass is now finished. Free/reset static variable. */
7043 static void
7044 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7045 int sched_verbose ATTRIBUTE_UNUSED)
7046 {
7047 gcc_assert (pending_data_specs == 0);
7048 }
7049
7050 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7051 speculation check), FALSE otherwise. */
7052 static bool
7053 is_load_p (rtx insn)
7054 {
7055 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7056
7057 return
7058 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7059 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7060 }
7061
7062 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7063 (taking account for 3-cycle cache reference postponing for stores: Intel
7064 Itanium 2 Reference Manual for Software Development and Optimization,
7065 6.7.3.1). */
7066 static void
7067 record_memory_reference (rtx insn)
7068 {
7069 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7070
7071 switch (insn_class) {
7072 case ITANIUM_CLASS_FLD:
7073 case ITANIUM_CLASS_LD:
7074 mem_ops_in_group[current_cycle % 4]++;
7075 break;
7076 case ITANIUM_CLASS_STF:
7077 case ITANIUM_CLASS_ST:
7078 mem_ops_in_group[(current_cycle + 3) % 4]++;
7079 break;
7080 default:;
7081 }
7082 }
7083
7084 /* We are about to being issuing insns for this clock cycle.
7085 Override the default sort algorithm to better slot instructions. */
7086
7087 static int
7088 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
7089 int *pn_ready, int clock_var,
7090 int reorder_type)
7091 {
7092 int n_asms;
7093 int n_ready = *pn_ready;
7094 rtx *e_ready = ready + n_ready;
7095 rtx *insnp;
7096
7097 if (sched_verbose)
7098 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7099
7100 if (reorder_type == 0)
7101 {
7102 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7103 n_asms = 0;
7104 for (insnp = ready; insnp < e_ready; insnp++)
7105 if (insnp < e_ready)
7106 {
7107 rtx insn = *insnp;
7108 enum attr_type t = ia64_safe_type (insn);
7109 if (t == TYPE_UNKNOWN)
7110 {
7111 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7112 || asm_noperands (PATTERN (insn)) >= 0)
7113 {
7114 rtx lowest = ready[n_asms];
7115 ready[n_asms] = insn;
7116 *insnp = lowest;
7117 n_asms++;
7118 }
7119 else
7120 {
7121 rtx highest = ready[n_ready - 1];
7122 ready[n_ready - 1] = insn;
7123 *insnp = highest;
7124 return 1;
7125 }
7126 }
7127 }
7128
7129 if (n_asms < n_ready)
7130 {
7131 /* Some normal insns to process. Skip the asms. */
7132 ready += n_asms;
7133 n_ready -= n_asms;
7134 }
7135 else if (n_ready > 0)
7136 return 1;
7137 }
7138
7139 if (ia64_final_schedule)
7140 {
7141 int deleted = 0;
7142 int nr_need_stop = 0;
7143
7144 for (insnp = ready; insnp < e_ready; insnp++)
7145 if (safe_group_barrier_needed (*insnp))
7146 nr_need_stop++;
7147
7148 if (reorder_type == 1 && n_ready == nr_need_stop)
7149 return 0;
7150 if (reorder_type == 0)
7151 return 1;
7152 insnp = e_ready;
7153 /* Move down everything that needs a stop bit, preserving
7154 relative order. */
7155 while (insnp-- > ready + deleted)
7156 while (insnp >= ready + deleted)
7157 {
7158 rtx insn = *insnp;
7159 if (! safe_group_barrier_needed (insn))
7160 break;
7161 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7162 *ready = insn;
7163 deleted++;
7164 }
7165 n_ready -= deleted;
7166 ready += deleted;
7167 }
7168
7169 current_cycle = clock_var;
7170 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7171 {
7172 int moved = 0;
7173
7174 insnp = e_ready;
7175 /* Move down loads/stores, preserving relative order. */
7176 while (insnp-- > ready + moved)
7177 while (insnp >= ready + moved)
7178 {
7179 rtx insn = *insnp;
7180 if (! is_load_p (insn))
7181 break;
7182 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7183 *ready = insn;
7184 moved++;
7185 }
7186 n_ready -= moved;
7187 ready += moved;
7188 }
7189
7190 return 1;
7191 }
7192
7193 /* We are about to being issuing insns for this clock cycle. Override
7194 the default sort algorithm to better slot instructions. */
7195
7196 static int
7197 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7198 int clock_var)
7199 {
7200 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7201 pn_ready, clock_var, 0);
7202 }
7203
7204 /* Like ia64_sched_reorder, but called after issuing each insn.
7205 Override the default sort algorithm to better slot instructions. */
7206
7207 static int
7208 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7209 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7210 int *pn_ready, int clock_var)
7211 {
7212 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7213 clock_var, 1);
7214 }
7215
7216 /* We are about to issue INSN. Return the number of insns left on the
7217 ready queue that can be issued this cycle. */
7218
7219 static int
7220 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7221 int sched_verbose ATTRIBUTE_UNUSED,
7222 rtx insn ATTRIBUTE_UNUSED,
7223 int can_issue_more ATTRIBUTE_UNUSED)
7224 {
7225 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7226 /* Modulo scheduling does not extend h_i_d when emitting
7227 new instructions. Don't use h_i_d, if we don't have to. */
7228 {
7229 if (DONE_SPEC (insn) & BEGIN_DATA)
7230 pending_data_specs++;
7231 if (CHECK_SPEC (insn) & BEGIN_DATA)
7232 pending_data_specs--;
7233 }
7234
7235 if (DEBUG_INSN_P (insn))
7236 return 1;
7237
7238 last_scheduled_insn = insn;
7239 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7240 if (reload_completed)
7241 {
7242 int needed = group_barrier_needed (insn);
7243
7244 gcc_assert (!needed);
7245 if (GET_CODE (insn) == CALL_INSN)
7246 init_insn_group_barriers ();
7247 stops_p [INSN_UID (insn)] = stop_before_p;
7248 stop_before_p = 0;
7249
7250 record_memory_reference (insn);
7251 }
7252 return 1;
7253 }
7254
7255 /* We are choosing insn from the ready queue. Return nonzero if INSN
7256 can be chosen. */
7257
7258 static int
7259 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7260 {
7261 gcc_assert (insn && INSN_P (insn));
7262 return ((!reload_completed
7263 || !safe_group_barrier_needed (insn))
7264 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7265 && (!mflag_sched_mem_insns_hard_limit
7266 || !is_load_p (insn)
7267 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7268 }
7269
7270 /* We are choosing insn from the ready queue. Return nonzero if INSN
7271 can be chosen. */
7272
7273 static bool
7274 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7275 {
7276 gcc_assert (insn && INSN_P (insn));
7277 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7278 we keep ALAT half-empty. */
7279 return (pending_data_specs < 16
7280 || !(TODO_SPEC (insn) & BEGIN_DATA));
7281 }
7282
7283 /* The following variable value is pseudo-insn used by the DFA insn
7284 scheduler to change the DFA state when the simulated clock is
7285 increased. */
7286
7287 static rtx dfa_pre_cycle_insn;
7288
7289 /* Returns 1 when a meaningful insn was scheduled between the last group
7290 barrier and LAST. */
7291 static int
7292 scheduled_good_insn (rtx last)
7293 {
7294 if (last && recog_memoized (last) >= 0)
7295 return 1;
7296
7297 for ( ;
7298 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7299 && !stops_p[INSN_UID (last)];
7300 last = PREV_INSN (last))
7301 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7302 the ebb we're scheduling. */
7303 if (INSN_P (last) && recog_memoized (last) >= 0)
7304 return 1;
7305
7306 return 0;
7307 }
7308
7309 /* We are about to being issuing INSN. Return nonzero if we cannot
7310 issue it on given cycle CLOCK and return zero if we should not sort
7311 the ready queue on the next clock start. */
7312
7313 static int
7314 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7315 int clock, int *sort_p)
7316 {
7317 gcc_assert (insn && INSN_P (insn));
7318
7319 if (DEBUG_INSN_P (insn))
7320 return 0;
7321
7322 /* When a group barrier is needed for insn, last_scheduled_insn
7323 should be set. */
7324 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7325 || last_scheduled_insn);
7326
7327 if ((reload_completed
7328 && (safe_group_barrier_needed (insn)
7329 || (mflag_sched_stop_bits_after_every_cycle
7330 && last_clock != clock
7331 && last_scheduled_insn
7332 && scheduled_good_insn (last_scheduled_insn))))
7333 || (last_scheduled_insn
7334 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7335 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7336 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7337 {
7338 init_insn_group_barriers ();
7339
7340 if (verbose && dump)
7341 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7342 last_clock == clock ? " + cycle advance" : "");
7343
7344 stop_before_p = 1;
7345 current_cycle = clock;
7346 mem_ops_in_group[current_cycle % 4] = 0;
7347
7348 if (last_clock == clock)
7349 {
7350 state_transition (curr_state, dfa_stop_insn);
7351 if (TARGET_EARLY_STOP_BITS)
7352 *sort_p = (last_scheduled_insn == NULL_RTX
7353 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7354 else
7355 *sort_p = 0;
7356 return 1;
7357 }
7358
7359 if (last_scheduled_insn)
7360 {
7361 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7362 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7363 state_reset (curr_state);
7364 else
7365 {
7366 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7367 state_transition (curr_state, dfa_stop_insn);
7368 state_transition (curr_state, dfa_pre_cycle_insn);
7369 state_transition (curr_state, NULL);
7370 }
7371 }
7372 }
7373 return 0;
7374 }
7375
7376 /* Implement targetm.sched.h_i_d_extended hook.
7377 Extend internal data structures. */
7378 static void
7379 ia64_h_i_d_extended (void)
7380 {
7381 if (stops_p != NULL)
7382 {
7383 int new_clocks_length = get_max_uid () * 3 / 2;
7384 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7385 clocks_length = new_clocks_length;
7386 }
7387 }
7388 \f
7389
7390 /* This structure describes the data used by the backend to guide scheduling.
7391 When the current scheduling point is switched, this data should be saved
7392 and restored later, if the scheduler returns to this point. */
7393 struct _ia64_sched_context
7394 {
7395 state_t prev_cycle_state;
7396 rtx last_scheduled_insn;
7397 struct reg_write_state rws_sum[NUM_REGS];
7398 struct reg_write_state rws_insn[NUM_REGS];
7399 int first_instruction;
7400 int pending_data_specs;
7401 int current_cycle;
7402 char mem_ops_in_group[4];
7403 };
7404 typedef struct _ia64_sched_context *ia64_sched_context_t;
7405
7406 /* Allocates a scheduling context. */
7407 static void *
7408 ia64_alloc_sched_context (void)
7409 {
7410 return xmalloc (sizeof (struct _ia64_sched_context));
7411 }
7412
7413 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7414 the global context otherwise. */
7415 static void
7416 ia64_init_sched_context (void *_sc, bool clean_p)
7417 {
7418 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7419
7420 sc->prev_cycle_state = xmalloc (dfa_state_size);
7421 if (clean_p)
7422 {
7423 state_reset (sc->prev_cycle_state);
7424 sc->last_scheduled_insn = NULL_RTX;
7425 memset (sc->rws_sum, 0, sizeof (rws_sum));
7426 memset (sc->rws_insn, 0, sizeof (rws_insn));
7427 sc->first_instruction = 1;
7428 sc->pending_data_specs = 0;
7429 sc->current_cycle = 0;
7430 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7431 }
7432 else
7433 {
7434 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7435 sc->last_scheduled_insn = last_scheduled_insn;
7436 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7437 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7438 sc->first_instruction = first_instruction;
7439 sc->pending_data_specs = pending_data_specs;
7440 sc->current_cycle = current_cycle;
7441 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7442 }
7443 }
7444
7445 /* Sets the global scheduling context to the one pointed to by _SC. */
7446 static void
7447 ia64_set_sched_context (void *_sc)
7448 {
7449 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7450
7451 gcc_assert (sc != NULL);
7452
7453 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7454 last_scheduled_insn = sc->last_scheduled_insn;
7455 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7456 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7457 first_instruction = sc->first_instruction;
7458 pending_data_specs = sc->pending_data_specs;
7459 current_cycle = sc->current_cycle;
7460 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7461 }
7462
7463 /* Clears the data in the _SC scheduling context. */
7464 static void
7465 ia64_clear_sched_context (void *_sc)
7466 {
7467 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7468
7469 free (sc->prev_cycle_state);
7470 sc->prev_cycle_state = NULL;
7471 }
7472
7473 /* Frees the _SC scheduling context. */
7474 static void
7475 ia64_free_sched_context (void *_sc)
7476 {
7477 gcc_assert (_sc != NULL);
7478
7479 free (_sc);
7480 }
7481
7482 typedef rtx (* gen_func_t) (rtx, rtx);
7483
7484 /* Return a function that will generate a load of mode MODE_NO
7485 with speculation types TS. */
7486 static gen_func_t
7487 get_spec_load_gen_function (ds_t ts, int mode_no)
7488 {
7489 static gen_func_t gen_ld_[] = {
7490 gen_movbi,
7491 gen_movqi_internal,
7492 gen_movhi_internal,
7493 gen_movsi_internal,
7494 gen_movdi_internal,
7495 gen_movsf_internal,
7496 gen_movdf_internal,
7497 gen_movxf_internal,
7498 gen_movti_internal,
7499 gen_zero_extendqidi2,
7500 gen_zero_extendhidi2,
7501 gen_zero_extendsidi2,
7502 };
7503
7504 static gen_func_t gen_ld_a[] = {
7505 gen_movbi_advanced,
7506 gen_movqi_advanced,
7507 gen_movhi_advanced,
7508 gen_movsi_advanced,
7509 gen_movdi_advanced,
7510 gen_movsf_advanced,
7511 gen_movdf_advanced,
7512 gen_movxf_advanced,
7513 gen_movti_advanced,
7514 gen_zero_extendqidi2_advanced,
7515 gen_zero_extendhidi2_advanced,
7516 gen_zero_extendsidi2_advanced,
7517 };
7518 static gen_func_t gen_ld_s[] = {
7519 gen_movbi_speculative,
7520 gen_movqi_speculative,
7521 gen_movhi_speculative,
7522 gen_movsi_speculative,
7523 gen_movdi_speculative,
7524 gen_movsf_speculative,
7525 gen_movdf_speculative,
7526 gen_movxf_speculative,
7527 gen_movti_speculative,
7528 gen_zero_extendqidi2_speculative,
7529 gen_zero_extendhidi2_speculative,
7530 gen_zero_extendsidi2_speculative,
7531 };
7532 static gen_func_t gen_ld_sa[] = {
7533 gen_movbi_speculative_advanced,
7534 gen_movqi_speculative_advanced,
7535 gen_movhi_speculative_advanced,
7536 gen_movsi_speculative_advanced,
7537 gen_movdi_speculative_advanced,
7538 gen_movsf_speculative_advanced,
7539 gen_movdf_speculative_advanced,
7540 gen_movxf_speculative_advanced,
7541 gen_movti_speculative_advanced,
7542 gen_zero_extendqidi2_speculative_advanced,
7543 gen_zero_extendhidi2_speculative_advanced,
7544 gen_zero_extendsidi2_speculative_advanced,
7545 };
7546 static gen_func_t gen_ld_s_a[] = {
7547 gen_movbi_speculative_a,
7548 gen_movqi_speculative_a,
7549 gen_movhi_speculative_a,
7550 gen_movsi_speculative_a,
7551 gen_movdi_speculative_a,
7552 gen_movsf_speculative_a,
7553 gen_movdf_speculative_a,
7554 gen_movxf_speculative_a,
7555 gen_movti_speculative_a,
7556 gen_zero_extendqidi2_speculative_a,
7557 gen_zero_extendhidi2_speculative_a,
7558 gen_zero_extendsidi2_speculative_a,
7559 };
7560
7561 gen_func_t *gen_ld;
7562
7563 if (ts & BEGIN_DATA)
7564 {
7565 if (ts & BEGIN_CONTROL)
7566 gen_ld = gen_ld_sa;
7567 else
7568 gen_ld = gen_ld_a;
7569 }
7570 else if (ts & BEGIN_CONTROL)
7571 {
7572 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7573 || ia64_needs_block_p (ts))
7574 gen_ld = gen_ld_s;
7575 else
7576 gen_ld = gen_ld_s_a;
7577 }
7578 else if (ts == 0)
7579 gen_ld = gen_ld_;
7580 else
7581 gcc_unreachable ();
7582
7583 return gen_ld[mode_no];
7584 }
7585
7586 /* Constants that help mapping 'enum machine_mode' to int. */
7587 enum SPEC_MODES
7588 {
7589 SPEC_MODE_INVALID = -1,
7590 SPEC_MODE_FIRST = 0,
7591 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7592 SPEC_MODE_FOR_EXTEND_LAST = 3,
7593 SPEC_MODE_LAST = 8
7594 };
7595
7596 enum
7597 {
7598 /* Offset to reach ZERO_EXTEND patterns. */
7599 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7600 };
7601
7602 /* Return index of the MODE. */
7603 static int
7604 ia64_mode_to_int (enum machine_mode mode)
7605 {
7606 switch (mode)
7607 {
7608 case BImode: return 0; /* SPEC_MODE_FIRST */
7609 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7610 case HImode: return 2;
7611 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7612 case DImode: return 4;
7613 case SFmode: return 5;
7614 case DFmode: return 6;
7615 case XFmode: return 7;
7616 case TImode:
7617 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7618 mentioned in itanium[12].md. Predicate fp_register_operand also
7619 needs to be defined. Bottom line: better disable for now. */
7620 return SPEC_MODE_INVALID;
7621 default: return SPEC_MODE_INVALID;
7622 }
7623 }
7624
7625 /* Provide information about speculation capabilities. */
7626 static void
7627 ia64_set_sched_flags (spec_info_t spec_info)
7628 {
7629 unsigned int *flags = &(current_sched_info->flags);
7630
7631 if (*flags & SCHED_RGN
7632 || *flags & SCHED_EBB
7633 || *flags & SEL_SCHED)
7634 {
7635 int mask = 0;
7636
7637 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7638 || (mflag_sched_ar_data_spec && reload_completed))
7639 {
7640 mask |= BEGIN_DATA;
7641
7642 if (!sel_sched_p ()
7643 && ((mflag_sched_br_in_data_spec && !reload_completed)
7644 || (mflag_sched_ar_in_data_spec && reload_completed)))
7645 mask |= BE_IN_DATA;
7646 }
7647
7648 if (mflag_sched_control_spec
7649 && (!sel_sched_p ()
7650 || reload_completed))
7651 {
7652 mask |= BEGIN_CONTROL;
7653
7654 if (!sel_sched_p () && mflag_sched_in_control_spec)
7655 mask |= BE_IN_CONTROL;
7656 }
7657
7658 spec_info->mask = mask;
7659
7660 if (mask)
7661 {
7662 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7663
7664 if (mask & BE_IN_SPEC)
7665 *flags |= NEW_BBS;
7666
7667 spec_info->flags = 0;
7668
7669 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7670 spec_info->flags |= PREFER_NON_DATA_SPEC;
7671
7672 if (mask & CONTROL_SPEC)
7673 {
7674 if (mflag_sched_prefer_non_control_spec_insns)
7675 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7676
7677 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7678 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7679 }
7680
7681 if (sched_verbose >= 1)
7682 spec_info->dump = sched_dump;
7683 else
7684 spec_info->dump = 0;
7685
7686 if (mflag_sched_count_spec_in_critical_path)
7687 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7688 }
7689 }
7690 else
7691 spec_info->mask = 0;
7692 }
7693
7694 /* If INSN is an appropriate load return its mode.
7695 Return -1 otherwise. */
7696 static int
7697 get_mode_no_for_insn (rtx insn)
7698 {
7699 rtx reg, mem, mode_rtx;
7700 int mode_no;
7701 bool extend_p;
7702
7703 extract_insn_cached (insn);
7704
7705 /* We use WHICH_ALTERNATIVE only after reload. This will
7706 guarantee that reload won't touch a speculative insn. */
7707
7708 if (recog_data.n_operands != 2)
7709 return -1;
7710
7711 reg = recog_data.operand[0];
7712 mem = recog_data.operand[1];
7713
7714 /* We should use MEM's mode since REG's mode in presence of
7715 ZERO_EXTEND will always be DImode. */
7716 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7717 /* Process non-speculative ld. */
7718 {
7719 if (!reload_completed)
7720 {
7721 /* Do not speculate into regs like ar.lc. */
7722 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7723 return -1;
7724
7725 if (!MEM_P (mem))
7726 return -1;
7727
7728 {
7729 rtx mem_reg = XEXP (mem, 0);
7730
7731 if (!REG_P (mem_reg))
7732 return -1;
7733 }
7734
7735 mode_rtx = mem;
7736 }
7737 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7738 {
7739 gcc_assert (REG_P (reg) && MEM_P (mem));
7740 mode_rtx = mem;
7741 }
7742 else
7743 return -1;
7744 }
7745 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7746 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7747 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7748 /* Process speculative ld or ld.c. */
7749 {
7750 gcc_assert (REG_P (reg) && MEM_P (mem));
7751 mode_rtx = mem;
7752 }
7753 else
7754 {
7755 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7756
7757 if (attr_class == ITANIUM_CLASS_CHK_A
7758 || attr_class == ITANIUM_CLASS_CHK_S_I
7759 || attr_class == ITANIUM_CLASS_CHK_S_F)
7760 /* Process chk. */
7761 mode_rtx = reg;
7762 else
7763 return -1;
7764 }
7765
7766 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7767
7768 if (mode_no == SPEC_MODE_INVALID)
7769 return -1;
7770
7771 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7772
7773 if (extend_p)
7774 {
7775 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7776 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7777 return -1;
7778
7779 mode_no += SPEC_GEN_EXTEND_OFFSET;
7780 }
7781
7782 return mode_no;
7783 }
7784
7785 /* If X is an unspec part of a speculative load, return its code.
7786 Return -1 otherwise. */
7787 static int
7788 get_spec_unspec_code (const_rtx x)
7789 {
7790 if (GET_CODE (x) != UNSPEC)
7791 return -1;
7792
7793 {
7794 int code;
7795
7796 code = XINT (x, 1);
7797
7798 switch (code)
7799 {
7800 case UNSPEC_LDA:
7801 case UNSPEC_LDS:
7802 case UNSPEC_LDS_A:
7803 case UNSPEC_LDSA:
7804 return code;
7805
7806 default:
7807 return -1;
7808 }
7809 }
7810 }
7811
7812 /* Implement skip_rtx_p hook. */
7813 static bool
7814 ia64_skip_rtx_p (const_rtx x)
7815 {
7816 return get_spec_unspec_code (x) != -1;
7817 }
7818
7819 /* If INSN is a speculative load, return its UNSPEC code.
7820 Return -1 otherwise. */
7821 static int
7822 get_insn_spec_code (const_rtx insn)
7823 {
7824 rtx pat, reg, mem;
7825
7826 pat = PATTERN (insn);
7827
7828 if (GET_CODE (pat) == COND_EXEC)
7829 pat = COND_EXEC_CODE (pat);
7830
7831 if (GET_CODE (pat) != SET)
7832 return -1;
7833
7834 reg = SET_DEST (pat);
7835 if (!REG_P (reg))
7836 return -1;
7837
7838 mem = SET_SRC (pat);
7839 if (GET_CODE (mem) == ZERO_EXTEND)
7840 mem = XEXP (mem, 0);
7841
7842 return get_spec_unspec_code (mem);
7843 }
7844
7845 /* If INSN is a speculative load, return a ds with the speculation types.
7846 Otherwise [if INSN is a normal instruction] return 0. */
7847 static ds_t
7848 ia64_get_insn_spec_ds (rtx insn)
7849 {
7850 int code = get_insn_spec_code (insn);
7851
7852 switch (code)
7853 {
7854 case UNSPEC_LDA:
7855 return BEGIN_DATA;
7856
7857 case UNSPEC_LDS:
7858 case UNSPEC_LDS_A:
7859 return BEGIN_CONTROL;
7860
7861 case UNSPEC_LDSA:
7862 return BEGIN_DATA | BEGIN_CONTROL;
7863
7864 default:
7865 return 0;
7866 }
7867 }
7868
7869 /* If INSN is a speculative load return a ds with the speculation types that
7870 will be checked.
7871 Otherwise [if INSN is a normal instruction] return 0. */
7872 static ds_t
7873 ia64_get_insn_checked_ds (rtx insn)
7874 {
7875 int code = get_insn_spec_code (insn);
7876
7877 switch (code)
7878 {
7879 case UNSPEC_LDA:
7880 return BEGIN_DATA | BEGIN_CONTROL;
7881
7882 case UNSPEC_LDS:
7883 return BEGIN_CONTROL;
7884
7885 case UNSPEC_LDS_A:
7886 case UNSPEC_LDSA:
7887 return BEGIN_DATA | BEGIN_CONTROL;
7888
7889 default:
7890 return 0;
7891 }
7892 }
7893
7894 /* If GEN_P is true, calculate the index of needed speculation check and return
7895 speculative pattern for INSN with speculative mode TS, machine mode
7896 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7897 If GEN_P is false, just calculate the index of needed speculation check. */
7898 static rtx
7899 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7900 {
7901 rtx pat, new_pat;
7902 gen_func_t gen_load;
7903
7904 gen_load = get_spec_load_gen_function (ts, mode_no);
7905
7906 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7907 copy_rtx (recog_data.operand[1]));
7908
7909 pat = PATTERN (insn);
7910 if (GET_CODE (pat) == COND_EXEC)
7911 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7912 new_pat);
7913
7914 return new_pat;
7915 }
7916
7917 static bool
7918 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7919 ds_t ds ATTRIBUTE_UNUSED)
7920 {
7921 return false;
7922 }
7923
7924 /* Implement targetm.sched.speculate_insn hook.
7925 Check if the INSN can be TS speculative.
7926 If 'no' - return -1.
7927 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7928 If current pattern of the INSN already provides TS speculation,
7929 return 0. */
7930 static int
7931 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7932 {
7933 int mode_no;
7934 int res;
7935
7936 gcc_assert (!(ts & ~SPECULATIVE));
7937
7938 if (ia64_spec_check_p (insn))
7939 return -1;
7940
7941 if ((ts & BE_IN_SPEC)
7942 && !insn_can_be_in_speculative_p (insn, ts))
7943 return -1;
7944
7945 mode_no = get_mode_no_for_insn (insn);
7946
7947 if (mode_no != SPEC_MODE_INVALID)
7948 {
7949 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7950 res = 0;
7951 else
7952 {
7953 res = 1;
7954 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7955 }
7956 }
7957 else
7958 res = -1;
7959
7960 return res;
7961 }
7962
7963 /* Return a function that will generate a check for speculation TS with mode
7964 MODE_NO.
7965 If simple check is needed, pass true for SIMPLE_CHECK_P.
7966 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7967 static gen_func_t
7968 get_spec_check_gen_function (ds_t ts, int mode_no,
7969 bool simple_check_p, bool clearing_check_p)
7970 {
7971 static gen_func_t gen_ld_c_clr[] = {
7972 gen_movbi_clr,
7973 gen_movqi_clr,
7974 gen_movhi_clr,
7975 gen_movsi_clr,
7976 gen_movdi_clr,
7977 gen_movsf_clr,
7978 gen_movdf_clr,
7979 gen_movxf_clr,
7980 gen_movti_clr,
7981 gen_zero_extendqidi2_clr,
7982 gen_zero_extendhidi2_clr,
7983 gen_zero_extendsidi2_clr,
7984 };
7985 static gen_func_t gen_ld_c_nc[] = {
7986 gen_movbi_nc,
7987 gen_movqi_nc,
7988 gen_movhi_nc,
7989 gen_movsi_nc,
7990 gen_movdi_nc,
7991 gen_movsf_nc,
7992 gen_movdf_nc,
7993 gen_movxf_nc,
7994 gen_movti_nc,
7995 gen_zero_extendqidi2_nc,
7996 gen_zero_extendhidi2_nc,
7997 gen_zero_extendsidi2_nc,
7998 };
7999 static gen_func_t gen_chk_a_clr[] = {
8000 gen_advanced_load_check_clr_bi,
8001 gen_advanced_load_check_clr_qi,
8002 gen_advanced_load_check_clr_hi,
8003 gen_advanced_load_check_clr_si,
8004 gen_advanced_load_check_clr_di,
8005 gen_advanced_load_check_clr_sf,
8006 gen_advanced_load_check_clr_df,
8007 gen_advanced_load_check_clr_xf,
8008 gen_advanced_load_check_clr_ti,
8009 gen_advanced_load_check_clr_di,
8010 gen_advanced_load_check_clr_di,
8011 gen_advanced_load_check_clr_di,
8012 };
8013 static gen_func_t gen_chk_a_nc[] = {
8014 gen_advanced_load_check_nc_bi,
8015 gen_advanced_load_check_nc_qi,
8016 gen_advanced_load_check_nc_hi,
8017 gen_advanced_load_check_nc_si,
8018 gen_advanced_load_check_nc_di,
8019 gen_advanced_load_check_nc_sf,
8020 gen_advanced_load_check_nc_df,
8021 gen_advanced_load_check_nc_xf,
8022 gen_advanced_load_check_nc_ti,
8023 gen_advanced_load_check_nc_di,
8024 gen_advanced_load_check_nc_di,
8025 gen_advanced_load_check_nc_di,
8026 };
8027 static gen_func_t gen_chk_s[] = {
8028 gen_speculation_check_bi,
8029 gen_speculation_check_qi,
8030 gen_speculation_check_hi,
8031 gen_speculation_check_si,
8032 gen_speculation_check_di,
8033 gen_speculation_check_sf,
8034 gen_speculation_check_df,
8035 gen_speculation_check_xf,
8036 gen_speculation_check_ti,
8037 gen_speculation_check_di,
8038 gen_speculation_check_di,
8039 gen_speculation_check_di,
8040 };
8041
8042 gen_func_t *gen_check;
8043
8044 if (ts & BEGIN_DATA)
8045 {
8046 /* We don't need recovery because even if this is ld.sa
8047 ALAT entry will be allocated only if NAT bit is set to zero.
8048 So it is enough to use ld.c here. */
8049
8050 if (simple_check_p)
8051 {
8052 gcc_assert (mflag_sched_spec_ldc);
8053
8054 if (clearing_check_p)
8055 gen_check = gen_ld_c_clr;
8056 else
8057 gen_check = gen_ld_c_nc;
8058 }
8059 else
8060 {
8061 if (clearing_check_p)
8062 gen_check = gen_chk_a_clr;
8063 else
8064 gen_check = gen_chk_a_nc;
8065 }
8066 }
8067 else if (ts & BEGIN_CONTROL)
8068 {
8069 if (simple_check_p)
8070 /* We might want to use ld.sa -> ld.c instead of
8071 ld.s -> chk.s. */
8072 {
8073 gcc_assert (!ia64_needs_block_p (ts));
8074
8075 if (clearing_check_p)
8076 gen_check = gen_ld_c_clr;
8077 else
8078 gen_check = gen_ld_c_nc;
8079 }
8080 else
8081 {
8082 gen_check = gen_chk_s;
8083 }
8084 }
8085 else
8086 gcc_unreachable ();
8087
8088 gcc_assert (mode_no >= 0);
8089 return gen_check[mode_no];
8090 }
8091
8092 /* Return nonzero, if INSN needs branchy recovery check. */
8093 static bool
8094 ia64_needs_block_p (ds_t ts)
8095 {
8096 if (ts & BEGIN_DATA)
8097 return !mflag_sched_spec_ldc;
8098
8099 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8100
8101 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8102 }
8103
8104 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8105 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8106 Otherwise, generate a simple check. */
8107 static rtx
8108 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
8109 {
8110 rtx op1, pat, check_pat;
8111 gen_func_t gen_check;
8112 int mode_no;
8113
8114 mode_no = get_mode_no_for_insn (insn);
8115 gcc_assert (mode_no >= 0);
8116
8117 if (label)
8118 op1 = label;
8119 else
8120 {
8121 gcc_assert (!ia64_needs_block_p (ds));
8122 op1 = copy_rtx (recog_data.operand[1]);
8123 }
8124
8125 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8126 true);
8127
8128 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8129
8130 pat = PATTERN (insn);
8131 if (GET_CODE (pat) == COND_EXEC)
8132 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8133 check_pat);
8134
8135 return check_pat;
8136 }
8137
8138 /* Return nonzero, if X is branchy recovery check. */
8139 static int
8140 ia64_spec_check_p (rtx x)
8141 {
8142 x = PATTERN (x);
8143 if (GET_CODE (x) == COND_EXEC)
8144 x = COND_EXEC_CODE (x);
8145 if (GET_CODE (x) == SET)
8146 return ia64_spec_check_src_p (SET_SRC (x));
8147 return 0;
8148 }
8149
8150 /* Return nonzero, if SRC belongs to recovery check. */
8151 static int
8152 ia64_spec_check_src_p (rtx src)
8153 {
8154 if (GET_CODE (src) == IF_THEN_ELSE)
8155 {
8156 rtx t;
8157
8158 t = XEXP (src, 0);
8159 if (GET_CODE (t) == NE)
8160 {
8161 t = XEXP (t, 0);
8162
8163 if (GET_CODE (t) == UNSPEC)
8164 {
8165 int code;
8166
8167 code = XINT (t, 1);
8168
8169 if (code == UNSPEC_LDCCLR
8170 || code == UNSPEC_LDCNC
8171 || code == UNSPEC_CHKACLR
8172 || code == UNSPEC_CHKANC
8173 || code == UNSPEC_CHKS)
8174 {
8175 gcc_assert (code != 0);
8176 return code;
8177 }
8178 }
8179 }
8180 }
8181 return 0;
8182 }
8183 \f
8184
8185 /* The following page contains abstract data `bundle states' which are
8186 used for bundling insns (inserting nops and template generation). */
8187
8188 /* The following describes state of insn bundling. */
8189
8190 struct bundle_state
8191 {
8192 /* Unique bundle state number to identify them in the debugging
8193 output */
8194 int unique_num;
8195 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
8196 /* number nops before and after the insn */
8197 short before_nops_num, after_nops_num;
8198 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8199 insn */
8200 int cost; /* cost of the state in cycles */
8201 int accumulated_insns_num; /* number of all previous insns including
8202 nops. L is considered as 2 insns */
8203 int branch_deviation; /* deviation of previous branches from 3rd slots */
8204 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8205 struct bundle_state *next; /* next state with the same insn_num */
8206 struct bundle_state *originator; /* originator (previous insn state) */
8207 /* All bundle states are in the following chain. */
8208 struct bundle_state *allocated_states_chain;
8209 /* The DFA State after issuing the insn and the nops. */
8210 state_t dfa_state;
8211 };
8212
8213 /* The following is map insn number to the corresponding bundle state. */
8214
8215 static struct bundle_state **index_to_bundle_states;
8216
8217 /* The unique number of next bundle state. */
8218
8219 static int bundle_states_num;
8220
8221 /* All allocated bundle states are in the following chain. */
8222
8223 static struct bundle_state *allocated_bundle_states_chain;
8224
8225 /* All allocated but not used bundle states are in the following
8226 chain. */
8227
8228 static struct bundle_state *free_bundle_state_chain;
8229
8230
8231 /* The following function returns a free bundle state. */
8232
8233 static struct bundle_state *
8234 get_free_bundle_state (void)
8235 {
8236 struct bundle_state *result;
8237
8238 if (free_bundle_state_chain != NULL)
8239 {
8240 result = free_bundle_state_chain;
8241 free_bundle_state_chain = result->next;
8242 }
8243 else
8244 {
8245 result = XNEW (struct bundle_state);
8246 result->dfa_state = xmalloc (dfa_state_size);
8247 result->allocated_states_chain = allocated_bundle_states_chain;
8248 allocated_bundle_states_chain = result;
8249 }
8250 result->unique_num = bundle_states_num++;
8251 return result;
8252
8253 }
8254
8255 /* The following function frees given bundle state. */
8256
8257 static void
8258 free_bundle_state (struct bundle_state *state)
8259 {
8260 state->next = free_bundle_state_chain;
8261 free_bundle_state_chain = state;
8262 }
8263
8264 /* Start work with abstract data `bundle states'. */
8265
8266 static void
8267 initiate_bundle_states (void)
8268 {
8269 bundle_states_num = 0;
8270 free_bundle_state_chain = NULL;
8271 allocated_bundle_states_chain = NULL;
8272 }
8273
8274 /* Finish work with abstract data `bundle states'. */
8275
8276 static void
8277 finish_bundle_states (void)
8278 {
8279 struct bundle_state *curr_state, *next_state;
8280
8281 for (curr_state = allocated_bundle_states_chain;
8282 curr_state != NULL;
8283 curr_state = next_state)
8284 {
8285 next_state = curr_state->allocated_states_chain;
8286 free (curr_state->dfa_state);
8287 free (curr_state);
8288 }
8289 }
8290
8291 /* Hash table of the bundle states. The key is dfa_state and insn_num
8292 of the bundle states. */
8293
8294 static htab_t bundle_state_table;
8295
8296 /* The function returns hash of BUNDLE_STATE. */
8297
8298 static unsigned
8299 bundle_state_hash (const void *bundle_state)
8300 {
8301 const struct bundle_state *const state
8302 = (const struct bundle_state *) bundle_state;
8303 unsigned result, i;
8304
8305 for (result = i = 0; i < dfa_state_size; i++)
8306 result += (((unsigned char *) state->dfa_state) [i]
8307 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8308 return result + state->insn_num;
8309 }
8310
8311 /* The function returns nonzero if the bundle state keys are equal. */
8312
8313 static int
8314 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8315 {
8316 const struct bundle_state *const state1
8317 = (const struct bundle_state *) bundle_state_1;
8318 const struct bundle_state *const state2
8319 = (const struct bundle_state *) bundle_state_2;
8320
8321 return (state1->insn_num == state2->insn_num
8322 && memcmp (state1->dfa_state, state2->dfa_state,
8323 dfa_state_size) == 0);
8324 }
8325
8326 /* The function inserts the BUNDLE_STATE into the hash table. The
8327 function returns nonzero if the bundle has been inserted into the
8328 table. The table contains the best bundle state with given key. */
8329
8330 static int
8331 insert_bundle_state (struct bundle_state *bundle_state)
8332 {
8333 void **entry_ptr;
8334
8335 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8336 if (*entry_ptr == NULL)
8337 {
8338 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8339 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8340 *entry_ptr = (void *) bundle_state;
8341 return TRUE;
8342 }
8343 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8344 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8345 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8346 > bundle_state->accumulated_insns_num
8347 || (((struct bundle_state *)
8348 *entry_ptr)->accumulated_insns_num
8349 == bundle_state->accumulated_insns_num
8350 && (((struct bundle_state *)
8351 *entry_ptr)->branch_deviation
8352 > bundle_state->branch_deviation
8353 || (((struct bundle_state *)
8354 *entry_ptr)->branch_deviation
8355 == bundle_state->branch_deviation
8356 && ((struct bundle_state *)
8357 *entry_ptr)->middle_bundle_stops
8358 > bundle_state->middle_bundle_stops))))))
8359
8360 {
8361 struct bundle_state temp;
8362
8363 temp = *(struct bundle_state *) *entry_ptr;
8364 *(struct bundle_state *) *entry_ptr = *bundle_state;
8365 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8366 *bundle_state = temp;
8367 }
8368 return FALSE;
8369 }
8370
8371 /* Start work with the hash table. */
8372
8373 static void
8374 initiate_bundle_state_table (void)
8375 {
8376 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8377 (htab_del) 0);
8378 }
8379
8380 /* Finish work with the hash table. */
8381
8382 static void
8383 finish_bundle_state_table (void)
8384 {
8385 htab_delete (bundle_state_table);
8386 }
8387
8388 \f
8389
8390 /* The following variable is a insn `nop' used to check bundle states
8391 with different number of inserted nops. */
8392
8393 static rtx ia64_nop;
8394
8395 /* The following function tries to issue NOPS_NUM nops for the current
8396 state without advancing processor cycle. If it failed, the
8397 function returns FALSE and frees the current state. */
8398
8399 static int
8400 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8401 {
8402 int i;
8403
8404 for (i = 0; i < nops_num; i++)
8405 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8406 {
8407 free_bundle_state (curr_state);
8408 return FALSE;
8409 }
8410 return TRUE;
8411 }
8412
8413 /* The following function tries to issue INSN for the current
8414 state without advancing processor cycle. If it failed, the
8415 function returns FALSE and frees the current state. */
8416
8417 static int
8418 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8419 {
8420 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8421 {
8422 free_bundle_state (curr_state);
8423 return FALSE;
8424 }
8425 return TRUE;
8426 }
8427
8428 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8429 starting with ORIGINATOR without advancing processor cycle. If
8430 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8431 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8432 If it was successful, the function creates new bundle state and
8433 insert into the hash table and into `index_to_bundle_states'. */
8434
8435 static void
8436 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8437 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8438 {
8439 struct bundle_state *curr_state;
8440
8441 curr_state = get_free_bundle_state ();
8442 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8443 curr_state->insn = insn;
8444 curr_state->insn_num = originator->insn_num + 1;
8445 curr_state->cost = originator->cost;
8446 curr_state->originator = originator;
8447 curr_state->before_nops_num = before_nops_num;
8448 curr_state->after_nops_num = 0;
8449 curr_state->accumulated_insns_num
8450 = originator->accumulated_insns_num + before_nops_num;
8451 curr_state->branch_deviation = originator->branch_deviation;
8452 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8453 gcc_assert (insn);
8454 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8455 {
8456 gcc_assert (GET_MODE (insn) != TImode);
8457 if (!try_issue_nops (curr_state, before_nops_num))
8458 return;
8459 if (!try_issue_insn (curr_state, insn))
8460 return;
8461 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8462 if (curr_state->accumulated_insns_num % 3 != 0)
8463 curr_state->middle_bundle_stops++;
8464 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8465 && curr_state->accumulated_insns_num % 3 != 0)
8466 {
8467 free_bundle_state (curr_state);
8468 return;
8469 }
8470 }
8471 else if (GET_MODE (insn) != TImode)
8472 {
8473 if (!try_issue_nops (curr_state, before_nops_num))
8474 return;
8475 if (!try_issue_insn (curr_state, insn))
8476 return;
8477 curr_state->accumulated_insns_num++;
8478 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8479 && asm_noperands (PATTERN (insn)) < 0);
8480
8481 if (ia64_safe_type (insn) == TYPE_L)
8482 curr_state->accumulated_insns_num++;
8483 }
8484 else
8485 {
8486 /* If this is an insn that must be first in a group, then don't allow
8487 nops to be emitted before it. Currently, alloc is the only such
8488 supported instruction. */
8489 /* ??? The bundling automatons should handle this for us, but they do
8490 not yet have support for the first_insn attribute. */
8491 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8492 {
8493 free_bundle_state (curr_state);
8494 return;
8495 }
8496
8497 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8498 state_transition (curr_state->dfa_state, NULL);
8499 curr_state->cost++;
8500 if (!try_issue_nops (curr_state, before_nops_num))
8501 return;
8502 if (!try_issue_insn (curr_state, insn))
8503 return;
8504 curr_state->accumulated_insns_num++;
8505 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8506 || asm_noperands (PATTERN (insn)) >= 0)
8507 {
8508 /* Finish bundle containing asm insn. */
8509 curr_state->after_nops_num
8510 = 3 - curr_state->accumulated_insns_num % 3;
8511 curr_state->accumulated_insns_num
8512 += 3 - curr_state->accumulated_insns_num % 3;
8513 }
8514 else if (ia64_safe_type (insn) == TYPE_L)
8515 curr_state->accumulated_insns_num++;
8516 }
8517 if (ia64_safe_type (insn) == TYPE_B)
8518 curr_state->branch_deviation
8519 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8520 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8521 {
8522 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8523 {
8524 state_t dfa_state;
8525 struct bundle_state *curr_state1;
8526 struct bundle_state *allocated_states_chain;
8527
8528 curr_state1 = get_free_bundle_state ();
8529 dfa_state = curr_state1->dfa_state;
8530 allocated_states_chain = curr_state1->allocated_states_chain;
8531 *curr_state1 = *curr_state;
8532 curr_state1->dfa_state = dfa_state;
8533 curr_state1->allocated_states_chain = allocated_states_chain;
8534 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8535 dfa_state_size);
8536 curr_state = curr_state1;
8537 }
8538 if (!try_issue_nops (curr_state,
8539 3 - curr_state->accumulated_insns_num % 3))
8540 return;
8541 curr_state->after_nops_num
8542 = 3 - curr_state->accumulated_insns_num % 3;
8543 curr_state->accumulated_insns_num
8544 += 3 - curr_state->accumulated_insns_num % 3;
8545 }
8546 if (!insert_bundle_state (curr_state))
8547 free_bundle_state (curr_state);
8548 return;
8549 }
8550
8551 /* The following function returns position in the two window bundle
8552 for given STATE. */
8553
8554 static int
8555 get_max_pos (state_t state)
8556 {
8557 if (cpu_unit_reservation_p (state, pos_6))
8558 return 6;
8559 else if (cpu_unit_reservation_p (state, pos_5))
8560 return 5;
8561 else if (cpu_unit_reservation_p (state, pos_4))
8562 return 4;
8563 else if (cpu_unit_reservation_p (state, pos_3))
8564 return 3;
8565 else if (cpu_unit_reservation_p (state, pos_2))
8566 return 2;
8567 else if (cpu_unit_reservation_p (state, pos_1))
8568 return 1;
8569 else
8570 return 0;
8571 }
8572
8573 /* The function returns code of a possible template for given position
8574 and state. The function should be called only with 2 values of
8575 position equal to 3 or 6. We avoid generating F NOPs by putting
8576 templates containing F insns at the end of the template search
8577 because undocumented anomaly in McKinley derived cores which can
8578 cause stalls if an F-unit insn (including a NOP) is issued within a
8579 six-cycle window after reading certain application registers (such
8580 as ar.bsp). Furthermore, power-considerations also argue against
8581 the use of F-unit instructions unless they're really needed. */
8582
8583 static int
8584 get_template (state_t state, int pos)
8585 {
8586 switch (pos)
8587 {
8588 case 3:
8589 if (cpu_unit_reservation_p (state, _0mmi_))
8590 return 1;
8591 else if (cpu_unit_reservation_p (state, _0mii_))
8592 return 0;
8593 else if (cpu_unit_reservation_p (state, _0mmb_))
8594 return 7;
8595 else if (cpu_unit_reservation_p (state, _0mib_))
8596 return 6;
8597 else if (cpu_unit_reservation_p (state, _0mbb_))
8598 return 5;
8599 else if (cpu_unit_reservation_p (state, _0bbb_))
8600 return 4;
8601 else if (cpu_unit_reservation_p (state, _0mmf_))
8602 return 3;
8603 else if (cpu_unit_reservation_p (state, _0mfi_))
8604 return 2;
8605 else if (cpu_unit_reservation_p (state, _0mfb_))
8606 return 8;
8607 else if (cpu_unit_reservation_p (state, _0mlx_))
8608 return 9;
8609 else
8610 gcc_unreachable ();
8611 case 6:
8612 if (cpu_unit_reservation_p (state, _1mmi_))
8613 return 1;
8614 else if (cpu_unit_reservation_p (state, _1mii_))
8615 return 0;
8616 else if (cpu_unit_reservation_p (state, _1mmb_))
8617 return 7;
8618 else if (cpu_unit_reservation_p (state, _1mib_))
8619 return 6;
8620 else if (cpu_unit_reservation_p (state, _1mbb_))
8621 return 5;
8622 else if (cpu_unit_reservation_p (state, _1bbb_))
8623 return 4;
8624 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8625 return 3;
8626 else if (cpu_unit_reservation_p (state, _1mfi_))
8627 return 2;
8628 else if (cpu_unit_reservation_p (state, _1mfb_))
8629 return 8;
8630 else if (cpu_unit_reservation_p (state, _1mlx_))
8631 return 9;
8632 else
8633 gcc_unreachable ();
8634 default:
8635 gcc_unreachable ();
8636 }
8637 }
8638
8639 /* True when INSN is important for bundling. */
8640 static bool
8641 important_for_bundling_p (rtx insn)
8642 {
8643 return (INSN_P (insn)
8644 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8645 && GET_CODE (PATTERN (insn)) != USE
8646 && GET_CODE (PATTERN (insn)) != CLOBBER);
8647 }
8648
8649 /* The following function returns an insn important for insn bundling
8650 followed by INSN and before TAIL. */
8651
8652 static rtx
8653 get_next_important_insn (rtx insn, rtx tail)
8654 {
8655 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8656 if (important_for_bundling_p (insn))
8657 return insn;
8658 return NULL_RTX;
8659 }
8660
8661 /* Add a bundle selector TEMPLATE0 before INSN. */
8662
8663 static void
8664 ia64_add_bundle_selector_before (int template0, rtx insn)
8665 {
8666 rtx b = gen_bundle_selector (GEN_INT (template0));
8667
8668 ia64_emit_insn_before (b, insn);
8669 #if NR_BUNDLES == 10
8670 if ((template0 == 4 || template0 == 5)
8671 && ia64_except_unwind_info (&global_options) == UI_TARGET)
8672 {
8673 int i;
8674 rtx note = NULL_RTX;
8675
8676 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8677 first or second slot. If it is and has REG_EH_NOTE set, copy it
8678 to following nops, as br.call sets rp to the address of following
8679 bundle and therefore an EH region end must be on a bundle
8680 boundary. */
8681 insn = PREV_INSN (insn);
8682 for (i = 0; i < 3; i++)
8683 {
8684 do
8685 insn = next_active_insn (insn);
8686 while (GET_CODE (insn) == INSN
8687 && get_attr_empty (insn) == EMPTY_YES);
8688 if (GET_CODE (insn) == CALL_INSN)
8689 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8690 else if (note)
8691 {
8692 int code;
8693
8694 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8695 || code == CODE_FOR_nop_b);
8696 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8697 note = NULL_RTX;
8698 else
8699 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8700 }
8701 }
8702 }
8703 #endif
8704 }
8705
8706 /* The following function does insn bundling. Bundling means
8707 inserting templates and nop insns to fit insn groups into permitted
8708 templates. Instruction scheduling uses NDFA (non-deterministic
8709 finite automata) encoding informations about the templates and the
8710 inserted nops. Nondeterminism of the automata permits follows
8711 all possible insn sequences very fast.
8712
8713 Unfortunately it is not possible to get information about inserting
8714 nop insns and used templates from the automata states. The
8715 automata only says that we can issue an insn possibly inserting
8716 some nops before it and using some template. Therefore insn
8717 bundling in this function is implemented by using DFA
8718 (deterministic finite automata). We follow all possible insn
8719 sequences by inserting 0-2 nops (that is what the NDFA describe for
8720 insn scheduling) before/after each insn being bundled. We know the
8721 start of simulated processor cycle from insn scheduling (insn
8722 starting a new cycle has TImode).
8723
8724 Simple implementation of insn bundling would create enormous
8725 number of possible insn sequences satisfying information about new
8726 cycle ticks taken from the insn scheduling. To make the algorithm
8727 practical we use dynamic programming. Each decision (about
8728 inserting nops and implicitly about previous decisions) is described
8729 by structure bundle_state (see above). If we generate the same
8730 bundle state (key is automaton state after issuing the insns and
8731 nops for it), we reuse already generated one. As consequence we
8732 reject some decisions which cannot improve the solution and
8733 reduce memory for the algorithm.
8734
8735 When we reach the end of EBB (extended basic block), we choose the
8736 best sequence and then, moving back in EBB, insert templates for
8737 the best alternative. The templates are taken from querying
8738 automaton state for each insn in chosen bundle states.
8739
8740 So the algorithm makes two (forward and backward) passes through
8741 EBB. */
8742
8743 static void
8744 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8745 {
8746 struct bundle_state *curr_state, *next_state, *best_state;
8747 rtx insn, next_insn;
8748 int insn_num;
8749 int i, bundle_end_p, only_bundle_end_p, asm_p;
8750 int pos = 0, max_pos, template0, template1;
8751 rtx b;
8752 rtx nop;
8753 enum attr_type type;
8754
8755 insn_num = 0;
8756 /* Count insns in the EBB. */
8757 for (insn = NEXT_INSN (prev_head_insn);
8758 insn && insn != tail;
8759 insn = NEXT_INSN (insn))
8760 if (INSN_P (insn))
8761 insn_num++;
8762 if (insn_num == 0)
8763 return;
8764 bundling_p = 1;
8765 dfa_clean_insn_cache ();
8766 initiate_bundle_state_table ();
8767 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8768 /* First (forward) pass -- generation of bundle states. */
8769 curr_state = get_free_bundle_state ();
8770 curr_state->insn = NULL;
8771 curr_state->before_nops_num = 0;
8772 curr_state->after_nops_num = 0;
8773 curr_state->insn_num = 0;
8774 curr_state->cost = 0;
8775 curr_state->accumulated_insns_num = 0;
8776 curr_state->branch_deviation = 0;
8777 curr_state->middle_bundle_stops = 0;
8778 curr_state->next = NULL;
8779 curr_state->originator = NULL;
8780 state_reset (curr_state->dfa_state);
8781 index_to_bundle_states [0] = curr_state;
8782 insn_num = 0;
8783 /* Shift cycle mark if it is put on insn which could be ignored. */
8784 for (insn = NEXT_INSN (prev_head_insn);
8785 insn != tail;
8786 insn = NEXT_INSN (insn))
8787 if (INSN_P (insn)
8788 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8789 || GET_CODE (PATTERN (insn)) == USE
8790 || GET_CODE (PATTERN (insn)) == CLOBBER)
8791 && GET_MODE (insn) == TImode)
8792 {
8793 PUT_MODE (insn, VOIDmode);
8794 for (next_insn = NEXT_INSN (insn);
8795 next_insn != tail;
8796 next_insn = NEXT_INSN (next_insn))
8797 if (INSN_P (next_insn)
8798 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8799 && GET_CODE (PATTERN (next_insn)) != USE
8800 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8801 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8802 {
8803 PUT_MODE (next_insn, TImode);
8804 break;
8805 }
8806 }
8807 /* Forward pass: generation of bundle states. */
8808 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8809 insn != NULL_RTX;
8810 insn = next_insn)
8811 {
8812 gcc_assert (INSN_P (insn)
8813 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8814 && GET_CODE (PATTERN (insn)) != USE
8815 && GET_CODE (PATTERN (insn)) != CLOBBER);
8816 type = ia64_safe_type (insn);
8817 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8818 insn_num++;
8819 index_to_bundle_states [insn_num] = NULL;
8820 for (curr_state = index_to_bundle_states [insn_num - 1];
8821 curr_state != NULL;
8822 curr_state = next_state)
8823 {
8824 pos = curr_state->accumulated_insns_num % 3;
8825 next_state = curr_state->next;
8826 /* We must fill up the current bundle in order to start a
8827 subsequent asm insn in a new bundle. Asm insn is always
8828 placed in a separate bundle. */
8829 only_bundle_end_p
8830 = (next_insn != NULL_RTX
8831 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8832 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8833 /* We may fill up the current bundle if it is the cycle end
8834 without a group barrier. */
8835 bundle_end_p
8836 = (only_bundle_end_p || next_insn == NULL_RTX
8837 || (GET_MODE (next_insn) == TImode
8838 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8839 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8840 || type == TYPE_S)
8841 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8842 only_bundle_end_p);
8843 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8844 only_bundle_end_p);
8845 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8846 only_bundle_end_p);
8847 }
8848 gcc_assert (index_to_bundle_states [insn_num]);
8849 for (curr_state = index_to_bundle_states [insn_num];
8850 curr_state != NULL;
8851 curr_state = curr_state->next)
8852 if (verbose >= 2 && dump)
8853 {
8854 /* This structure is taken from generated code of the
8855 pipeline hazard recognizer (see file insn-attrtab.c).
8856 Please don't forget to change the structure if a new
8857 automaton is added to .md file. */
8858 struct DFA_chip
8859 {
8860 unsigned short one_automaton_state;
8861 unsigned short oneb_automaton_state;
8862 unsigned short two_automaton_state;
8863 unsigned short twob_automaton_state;
8864 };
8865
8866 fprintf
8867 (dump,
8868 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8869 curr_state->unique_num,
8870 (curr_state->originator == NULL
8871 ? -1 : curr_state->originator->unique_num),
8872 curr_state->cost,
8873 curr_state->before_nops_num, curr_state->after_nops_num,
8874 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8875 curr_state->middle_bundle_stops,
8876 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8877 INSN_UID (insn));
8878 }
8879 }
8880
8881 /* We should find a solution because the 2nd insn scheduling has
8882 found one. */
8883 gcc_assert (index_to_bundle_states [insn_num]);
8884 /* Find a state corresponding to the best insn sequence. */
8885 best_state = NULL;
8886 for (curr_state = index_to_bundle_states [insn_num];
8887 curr_state != NULL;
8888 curr_state = curr_state->next)
8889 /* We are just looking at the states with fully filled up last
8890 bundle. The first we prefer insn sequences with minimal cost
8891 then with minimal inserted nops and finally with branch insns
8892 placed in the 3rd slots. */
8893 if (curr_state->accumulated_insns_num % 3 == 0
8894 && (best_state == NULL || best_state->cost > curr_state->cost
8895 || (best_state->cost == curr_state->cost
8896 && (curr_state->accumulated_insns_num
8897 < best_state->accumulated_insns_num
8898 || (curr_state->accumulated_insns_num
8899 == best_state->accumulated_insns_num
8900 && (curr_state->branch_deviation
8901 < best_state->branch_deviation
8902 || (curr_state->branch_deviation
8903 == best_state->branch_deviation
8904 && curr_state->middle_bundle_stops
8905 < best_state->middle_bundle_stops)))))))
8906 best_state = curr_state;
8907 /* Second (backward) pass: adding nops and templates. */
8908 gcc_assert (best_state);
8909 insn_num = best_state->before_nops_num;
8910 template0 = template1 = -1;
8911 for (curr_state = best_state;
8912 curr_state->originator != NULL;
8913 curr_state = curr_state->originator)
8914 {
8915 insn = curr_state->insn;
8916 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8917 || asm_noperands (PATTERN (insn)) >= 0);
8918 insn_num++;
8919 if (verbose >= 2 && dump)
8920 {
8921 struct DFA_chip
8922 {
8923 unsigned short one_automaton_state;
8924 unsigned short oneb_automaton_state;
8925 unsigned short two_automaton_state;
8926 unsigned short twob_automaton_state;
8927 };
8928
8929 fprintf
8930 (dump,
8931 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8932 curr_state->unique_num,
8933 (curr_state->originator == NULL
8934 ? -1 : curr_state->originator->unique_num),
8935 curr_state->cost,
8936 curr_state->before_nops_num, curr_state->after_nops_num,
8937 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8938 curr_state->middle_bundle_stops,
8939 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8940 INSN_UID (insn));
8941 }
8942 /* Find the position in the current bundle window. The window can
8943 contain at most two bundles. Two bundle window means that
8944 the processor will make two bundle rotation. */
8945 max_pos = get_max_pos (curr_state->dfa_state);
8946 if (max_pos == 6
8947 /* The following (negative template number) means that the
8948 processor did one bundle rotation. */
8949 || (max_pos == 3 && template0 < 0))
8950 {
8951 /* We are at the end of the window -- find template(s) for
8952 its bundle(s). */
8953 pos = max_pos;
8954 if (max_pos == 3)
8955 template0 = get_template (curr_state->dfa_state, 3);
8956 else
8957 {
8958 template1 = get_template (curr_state->dfa_state, 3);
8959 template0 = get_template (curr_state->dfa_state, 6);
8960 }
8961 }
8962 if (max_pos > 3 && template1 < 0)
8963 /* It may happen when we have the stop inside a bundle. */
8964 {
8965 gcc_assert (pos <= 3);
8966 template1 = get_template (curr_state->dfa_state, 3);
8967 pos += 3;
8968 }
8969 if (!asm_p)
8970 /* Emit nops after the current insn. */
8971 for (i = 0; i < curr_state->after_nops_num; i++)
8972 {
8973 nop = gen_nop ();
8974 emit_insn_after (nop, insn);
8975 pos--;
8976 gcc_assert (pos >= 0);
8977 if (pos % 3 == 0)
8978 {
8979 /* We are at the start of a bundle: emit the template
8980 (it should be defined). */
8981 gcc_assert (template0 >= 0);
8982 ia64_add_bundle_selector_before (template0, nop);
8983 /* If we have two bundle window, we make one bundle
8984 rotation. Otherwise template0 will be undefined
8985 (negative value). */
8986 template0 = template1;
8987 template1 = -1;
8988 }
8989 }
8990 /* Move the position backward in the window. Group barrier has
8991 no slot. Asm insn takes all bundle. */
8992 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8993 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8994 && asm_noperands (PATTERN (insn)) < 0)
8995 pos--;
8996 /* Long insn takes 2 slots. */
8997 if (ia64_safe_type (insn) == TYPE_L)
8998 pos--;
8999 gcc_assert (pos >= 0);
9000 if (pos % 3 == 0
9001 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9002 && GET_CODE (PATTERN (insn)) != ASM_INPUT
9003 && asm_noperands (PATTERN (insn)) < 0)
9004 {
9005 /* The current insn is at the bundle start: emit the
9006 template. */
9007 gcc_assert (template0 >= 0);
9008 ia64_add_bundle_selector_before (template0, insn);
9009 b = PREV_INSN (insn);
9010 insn = b;
9011 /* See comment above in analogous place for emitting nops
9012 after the insn. */
9013 template0 = template1;
9014 template1 = -1;
9015 }
9016 /* Emit nops after the current insn. */
9017 for (i = 0; i < curr_state->before_nops_num; i++)
9018 {
9019 nop = gen_nop ();
9020 ia64_emit_insn_before (nop, insn);
9021 nop = PREV_INSN (insn);
9022 insn = nop;
9023 pos--;
9024 gcc_assert (pos >= 0);
9025 if (pos % 3 == 0)
9026 {
9027 /* See comment above in analogous place for emitting nops
9028 after the insn. */
9029 gcc_assert (template0 >= 0);
9030 ia64_add_bundle_selector_before (template0, insn);
9031 b = PREV_INSN (insn);
9032 insn = b;
9033 template0 = template1;
9034 template1 = -1;
9035 }
9036 }
9037 }
9038
9039 #ifdef ENABLE_CHECKING
9040 {
9041 /* Assert right calculation of middle_bundle_stops. */
9042 int num = best_state->middle_bundle_stops;
9043 bool start_bundle = true, end_bundle = false;
9044
9045 for (insn = NEXT_INSN (prev_head_insn);
9046 insn && insn != tail;
9047 insn = NEXT_INSN (insn))
9048 {
9049 if (!INSN_P (insn))
9050 continue;
9051 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9052 start_bundle = true;
9053 else
9054 {
9055 rtx next_insn;
9056
9057 for (next_insn = NEXT_INSN (insn);
9058 next_insn && next_insn != tail;
9059 next_insn = NEXT_INSN (next_insn))
9060 if (INSN_P (next_insn)
9061 && (ia64_safe_itanium_class (next_insn)
9062 != ITANIUM_CLASS_IGNORE
9063 || recog_memoized (next_insn)
9064 == CODE_FOR_bundle_selector)
9065 && GET_CODE (PATTERN (next_insn)) != USE
9066 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9067 break;
9068
9069 end_bundle = next_insn == NULL_RTX
9070 || next_insn == tail
9071 || (INSN_P (next_insn)
9072 && recog_memoized (next_insn)
9073 == CODE_FOR_bundle_selector);
9074 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9075 && !start_bundle && !end_bundle
9076 && next_insn
9077 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
9078 && asm_noperands (PATTERN (next_insn)) < 0)
9079 num--;
9080
9081 start_bundle = false;
9082 }
9083 }
9084
9085 gcc_assert (num == 0);
9086 }
9087 #endif
9088
9089 free (index_to_bundle_states);
9090 finish_bundle_state_table ();
9091 bundling_p = 0;
9092 dfa_clean_insn_cache ();
9093 }
9094
9095 /* The following function is called at the end of scheduling BB or
9096 EBB. After reload, it inserts stop bits and does insn bundling. */
9097
9098 static void
9099 ia64_sched_finish (FILE *dump, int sched_verbose)
9100 {
9101 if (sched_verbose)
9102 fprintf (dump, "// Finishing schedule.\n");
9103 if (!reload_completed)
9104 return;
9105 if (reload_completed)
9106 {
9107 final_emit_insn_group_barriers (dump);
9108 bundling (dump, sched_verbose, current_sched_info->prev_head,
9109 current_sched_info->next_tail);
9110 if (sched_verbose && dump)
9111 fprintf (dump, "// finishing %d-%d\n",
9112 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9113 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9114
9115 return;
9116 }
9117 }
9118
9119 /* The following function inserts stop bits in scheduled BB or EBB. */
9120
9121 static void
9122 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9123 {
9124 rtx insn;
9125 int need_barrier_p = 0;
9126 int seen_good_insn = 0;
9127
9128 init_insn_group_barriers ();
9129
9130 for (insn = NEXT_INSN (current_sched_info->prev_head);
9131 insn != current_sched_info->next_tail;
9132 insn = NEXT_INSN (insn))
9133 {
9134 if (GET_CODE (insn) == BARRIER)
9135 {
9136 rtx last = prev_active_insn (insn);
9137
9138 if (! last)
9139 continue;
9140 if (GET_CODE (last) == JUMP_INSN
9141 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9142 last = prev_active_insn (last);
9143 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9144 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9145
9146 init_insn_group_barriers ();
9147 seen_good_insn = 0;
9148 need_barrier_p = 0;
9149 }
9150 else if (NONDEBUG_INSN_P (insn))
9151 {
9152 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9153 {
9154 init_insn_group_barriers ();
9155 seen_good_insn = 0;
9156 need_barrier_p = 0;
9157 }
9158 else if (need_barrier_p || group_barrier_needed (insn)
9159 || (mflag_sched_stop_bits_after_every_cycle
9160 && GET_MODE (insn) == TImode
9161 && seen_good_insn))
9162 {
9163 if (TARGET_EARLY_STOP_BITS)
9164 {
9165 rtx last;
9166
9167 for (last = insn;
9168 last != current_sched_info->prev_head;
9169 last = PREV_INSN (last))
9170 if (INSN_P (last) && GET_MODE (last) == TImode
9171 && stops_p [INSN_UID (last)])
9172 break;
9173 if (last == current_sched_info->prev_head)
9174 last = insn;
9175 last = prev_active_insn (last);
9176 if (last
9177 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9178 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9179 last);
9180 init_insn_group_barriers ();
9181 for (last = NEXT_INSN (last);
9182 last != insn;
9183 last = NEXT_INSN (last))
9184 if (INSN_P (last))
9185 {
9186 group_barrier_needed (last);
9187 if (recog_memoized (last) >= 0
9188 && important_for_bundling_p (last))
9189 seen_good_insn = 1;
9190 }
9191 }
9192 else
9193 {
9194 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9195 insn);
9196 init_insn_group_barriers ();
9197 seen_good_insn = 0;
9198 }
9199 group_barrier_needed (insn);
9200 if (recog_memoized (insn) >= 0
9201 && important_for_bundling_p (insn))
9202 seen_good_insn = 1;
9203 }
9204 else if (recog_memoized (insn) >= 0
9205 && important_for_bundling_p (insn))
9206 seen_good_insn = 1;
9207 need_barrier_p = (GET_CODE (insn) == CALL_INSN
9208 || GET_CODE (PATTERN (insn)) == ASM_INPUT
9209 || asm_noperands (PATTERN (insn)) >= 0);
9210 }
9211 }
9212 }
9213
9214 \f
9215
9216 /* If the following function returns TRUE, we will use the DFA
9217 insn scheduler. */
9218
9219 static int
9220 ia64_first_cycle_multipass_dfa_lookahead (void)
9221 {
9222 return (reload_completed ? 6 : 4);
9223 }
9224
9225 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9226
9227 static void
9228 ia64_init_dfa_pre_cycle_insn (void)
9229 {
9230 if (temp_dfa_state == NULL)
9231 {
9232 dfa_state_size = state_size ();
9233 temp_dfa_state = xmalloc (dfa_state_size);
9234 prev_cycle_state = xmalloc (dfa_state_size);
9235 }
9236 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9237 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9238 recog_memoized (dfa_pre_cycle_insn);
9239 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9240 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9241 recog_memoized (dfa_stop_insn);
9242 }
9243
9244 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9245 used by the DFA insn scheduler. */
9246
9247 static rtx
9248 ia64_dfa_pre_cycle_insn (void)
9249 {
9250 return dfa_pre_cycle_insn;
9251 }
9252
9253 /* The following function returns TRUE if PRODUCER (of type ilog or
9254 ld) produces address for CONSUMER (of type st or stf). */
9255
9256 int
9257 ia64_st_address_bypass_p (rtx producer, rtx consumer)
9258 {
9259 rtx dest, reg, mem;
9260
9261 gcc_assert (producer && consumer);
9262 dest = ia64_single_set (producer);
9263 gcc_assert (dest);
9264 reg = SET_DEST (dest);
9265 gcc_assert (reg);
9266 if (GET_CODE (reg) == SUBREG)
9267 reg = SUBREG_REG (reg);
9268 gcc_assert (GET_CODE (reg) == REG);
9269
9270 dest = ia64_single_set (consumer);
9271 gcc_assert (dest);
9272 mem = SET_DEST (dest);
9273 gcc_assert (mem && GET_CODE (mem) == MEM);
9274 return reg_mentioned_p (reg, mem);
9275 }
9276
9277 /* The following function returns TRUE if PRODUCER (of type ilog or
9278 ld) produces address for CONSUMER (of type ld or fld). */
9279
9280 int
9281 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9282 {
9283 rtx dest, src, reg, mem;
9284
9285 gcc_assert (producer && consumer);
9286 dest = ia64_single_set (producer);
9287 gcc_assert (dest);
9288 reg = SET_DEST (dest);
9289 gcc_assert (reg);
9290 if (GET_CODE (reg) == SUBREG)
9291 reg = SUBREG_REG (reg);
9292 gcc_assert (GET_CODE (reg) == REG);
9293
9294 src = ia64_single_set (consumer);
9295 gcc_assert (src);
9296 mem = SET_SRC (src);
9297 gcc_assert (mem);
9298
9299 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9300 mem = XVECEXP (mem, 0, 0);
9301 else if (GET_CODE (mem) == IF_THEN_ELSE)
9302 /* ??? Is this bypass necessary for ld.c? */
9303 {
9304 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9305 mem = XEXP (mem, 1);
9306 }
9307
9308 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9309 mem = XEXP (mem, 0);
9310
9311 if (GET_CODE (mem) == UNSPEC)
9312 {
9313 int c = XINT (mem, 1);
9314
9315 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9316 || c == UNSPEC_LDSA);
9317 mem = XVECEXP (mem, 0, 0);
9318 }
9319
9320 /* Note that LO_SUM is used for GOT loads. */
9321 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9322
9323 return reg_mentioned_p (reg, mem);
9324 }
9325
9326 /* The following function returns TRUE if INSN produces address for a
9327 load/store insn. We will place such insns into M slot because it
9328 decreases its latency time. */
9329
9330 int
9331 ia64_produce_address_p (rtx insn)
9332 {
9333 return insn->call;
9334 }
9335
9336 \f
9337 /* Emit pseudo-ops for the assembler to describe predicate relations.
9338 At present this assumes that we only consider predicate pairs to
9339 be mutex, and that the assembler can deduce proper values from
9340 straight-line code. */
9341
9342 static void
9343 emit_predicate_relation_info (void)
9344 {
9345 basic_block bb;
9346
9347 FOR_EACH_BB_REVERSE (bb)
9348 {
9349 int r;
9350 rtx head = BB_HEAD (bb);
9351
9352 /* We only need such notes at code labels. */
9353 if (GET_CODE (head) != CODE_LABEL)
9354 continue;
9355 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9356 head = NEXT_INSN (head);
9357
9358 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9359 grabbing the entire block of predicate registers. */
9360 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9361 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9362 {
9363 rtx p = gen_rtx_REG (BImode, r);
9364 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9365 if (head == BB_END (bb))
9366 BB_END (bb) = n;
9367 head = n;
9368 }
9369 }
9370
9371 /* Look for conditional calls that do not return, and protect predicate
9372 relations around them. Otherwise the assembler will assume the call
9373 returns, and complain about uses of call-clobbered predicates after
9374 the call. */
9375 FOR_EACH_BB_REVERSE (bb)
9376 {
9377 rtx insn = BB_HEAD (bb);
9378
9379 while (1)
9380 {
9381 if (GET_CODE (insn) == CALL_INSN
9382 && GET_CODE (PATTERN (insn)) == COND_EXEC
9383 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9384 {
9385 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9386 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9387 if (BB_HEAD (bb) == insn)
9388 BB_HEAD (bb) = b;
9389 if (BB_END (bb) == insn)
9390 BB_END (bb) = a;
9391 }
9392
9393 if (insn == BB_END (bb))
9394 break;
9395 insn = NEXT_INSN (insn);
9396 }
9397 }
9398 }
9399
9400 /* Perform machine dependent operations on the rtl chain INSNS. */
9401
9402 static void
9403 ia64_reorg (void)
9404 {
9405 /* We are freeing block_for_insn in the toplev to keep compatibility
9406 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9407 compute_bb_for_insn ();
9408
9409 /* If optimizing, we'll have split before scheduling. */
9410 if (optimize == 0)
9411 split_all_insns ();
9412
9413 if (optimize && ia64_flag_schedule_insns2
9414 && dbg_cnt (ia64_sched2))
9415 {
9416 timevar_push (TV_SCHED2);
9417 ia64_final_schedule = 1;
9418
9419 initiate_bundle_states ();
9420 ia64_nop = make_insn_raw (gen_nop ());
9421 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9422 recog_memoized (ia64_nop);
9423 clocks_length = get_max_uid () + 1;
9424 stops_p = XCNEWVEC (char, clocks_length);
9425
9426 if (ia64_tune == PROCESSOR_ITANIUM2)
9427 {
9428 pos_1 = get_cpu_unit_code ("2_1");
9429 pos_2 = get_cpu_unit_code ("2_2");
9430 pos_3 = get_cpu_unit_code ("2_3");
9431 pos_4 = get_cpu_unit_code ("2_4");
9432 pos_5 = get_cpu_unit_code ("2_5");
9433 pos_6 = get_cpu_unit_code ("2_6");
9434 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9435 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9436 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9437 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9438 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9439 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9440 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9441 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9442 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9443 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9444 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9445 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9446 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9447 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9448 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9449 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9450 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9451 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9452 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9453 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9454 }
9455 else
9456 {
9457 pos_1 = get_cpu_unit_code ("1_1");
9458 pos_2 = get_cpu_unit_code ("1_2");
9459 pos_3 = get_cpu_unit_code ("1_3");
9460 pos_4 = get_cpu_unit_code ("1_4");
9461 pos_5 = get_cpu_unit_code ("1_5");
9462 pos_6 = get_cpu_unit_code ("1_6");
9463 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9464 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9465 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9466 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9467 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9468 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9469 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9470 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9471 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9472 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9473 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9474 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9475 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9476 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9477 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9478 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9479 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9480 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9481 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9482 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9483 }
9484
9485 if (flag_selective_scheduling2
9486 && !maybe_skip_selective_scheduling ())
9487 run_selective_scheduling ();
9488 else
9489 schedule_ebbs ();
9490
9491 /* Redo alignment computation, as it might gone wrong. */
9492 compute_alignments ();
9493
9494 /* We cannot reuse this one because it has been corrupted by the
9495 evil glat. */
9496 finish_bundle_states ();
9497 free (stops_p);
9498 stops_p = NULL;
9499 emit_insn_group_barriers (dump_file);
9500
9501 ia64_final_schedule = 0;
9502 timevar_pop (TV_SCHED2);
9503 }
9504 else
9505 emit_all_insn_group_barriers (dump_file);
9506
9507 df_analyze ();
9508
9509 /* A call must not be the last instruction in a function, so that the
9510 return address is still within the function, so that unwinding works
9511 properly. Note that IA-64 differs from dwarf2 on this point. */
9512 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9513 {
9514 rtx insn;
9515 int saw_stop = 0;
9516
9517 insn = get_last_insn ();
9518 if (! INSN_P (insn))
9519 insn = prev_active_insn (insn);
9520 if (insn)
9521 {
9522 /* Skip over insns that expand to nothing. */
9523 while (GET_CODE (insn) == INSN
9524 && get_attr_empty (insn) == EMPTY_YES)
9525 {
9526 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9527 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9528 saw_stop = 1;
9529 insn = prev_active_insn (insn);
9530 }
9531 if (GET_CODE (insn) == CALL_INSN)
9532 {
9533 if (! saw_stop)
9534 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9535 emit_insn (gen_break_f ());
9536 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9537 }
9538 }
9539 }
9540
9541 emit_predicate_relation_info ();
9542
9543 if (ia64_flag_var_tracking)
9544 {
9545 timevar_push (TV_VAR_TRACKING);
9546 variable_tracking_main ();
9547 timevar_pop (TV_VAR_TRACKING);
9548 }
9549 df_finish_pass (false);
9550 }
9551 \f
9552 /* Return true if REGNO is used by the epilogue. */
9553
9554 int
9555 ia64_epilogue_uses (int regno)
9556 {
9557 switch (regno)
9558 {
9559 case R_GR (1):
9560 /* With a call to a function in another module, we will write a new
9561 value to "gp". After returning from such a call, we need to make
9562 sure the function restores the original gp-value, even if the
9563 function itself does not use the gp anymore. */
9564 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9565
9566 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9567 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9568 /* For functions defined with the syscall_linkage attribute, all
9569 input registers are marked as live at all function exits. This
9570 prevents the register allocator from using the input registers,
9571 which in turn makes it possible to restart a system call after
9572 an interrupt without having to save/restore the input registers.
9573 This also prevents kernel data from leaking to application code. */
9574 return lookup_attribute ("syscall_linkage",
9575 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9576
9577 case R_BR (0):
9578 /* Conditional return patterns can't represent the use of `b0' as
9579 the return address, so we force the value live this way. */
9580 return 1;
9581
9582 case AR_PFS_REGNUM:
9583 /* Likewise for ar.pfs, which is used by br.ret. */
9584 return 1;
9585
9586 default:
9587 return 0;
9588 }
9589 }
9590
9591 /* Return true if REGNO is used by the frame unwinder. */
9592
9593 int
9594 ia64_eh_uses (int regno)
9595 {
9596 unsigned int r;
9597
9598 if (! reload_completed)
9599 return 0;
9600
9601 if (regno == 0)
9602 return 0;
9603
9604 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9605 if (regno == current_frame_info.r[r]
9606 || regno == emitted_frame_related_regs[r])
9607 return 1;
9608
9609 return 0;
9610 }
9611 \f
9612 /* Return true if this goes in small data/bss. */
9613
9614 /* ??? We could also support own long data here. Generating movl/add/ld8
9615 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9616 code faster because there is one less load. This also includes incomplete
9617 types which can't go in sdata/sbss. */
9618
9619 static bool
9620 ia64_in_small_data_p (const_tree exp)
9621 {
9622 if (TARGET_NO_SDATA)
9623 return false;
9624
9625 /* We want to merge strings, so we never consider them small data. */
9626 if (TREE_CODE (exp) == STRING_CST)
9627 return false;
9628
9629 /* Functions are never small data. */
9630 if (TREE_CODE (exp) == FUNCTION_DECL)
9631 return false;
9632
9633 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9634 {
9635 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9636
9637 if (strcmp (section, ".sdata") == 0
9638 || strncmp (section, ".sdata.", 7) == 0
9639 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9640 || strcmp (section, ".sbss") == 0
9641 || strncmp (section, ".sbss.", 6) == 0
9642 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9643 return true;
9644 }
9645 else
9646 {
9647 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9648
9649 /* If this is an incomplete type with size 0, then we can't put it
9650 in sdata because it might be too big when completed. */
9651 if (size > 0 && size <= ia64_section_threshold)
9652 return true;
9653 }
9654
9655 return false;
9656 }
9657 \f
9658 /* Output assembly directives for prologue regions. */
9659
9660 /* The current basic block number. */
9661
9662 static bool last_block;
9663
9664 /* True if we need a copy_state command at the start of the next block. */
9665
9666 static bool need_copy_state;
9667
9668 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9669 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9670 #endif
9671
9672 /* Emit a debugging label after a call-frame-related insn. We'd
9673 rather output the label right away, but we'd have to output it
9674 after, not before, the instruction, and the instruction has not
9675 been output yet. So we emit the label after the insn, delete it to
9676 avoid introducing basic blocks, and mark it as preserved, such that
9677 it is still output, given that it is referenced in debug info. */
9678
9679 static const char *
9680 ia64_emit_deleted_label_after_insn (rtx insn)
9681 {
9682 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9683 rtx lb = gen_label_rtx ();
9684 rtx label_insn = emit_label_after (lb, insn);
9685
9686 LABEL_PRESERVE_P (lb) = 1;
9687
9688 delete_insn (label_insn);
9689
9690 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9691
9692 return xstrdup (label);
9693 }
9694
9695 /* Define the CFA after INSN with the steady-state definition. */
9696
9697 static void
9698 ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
9699 {
9700 rtx fp = frame_pointer_needed
9701 ? hard_frame_pointer_rtx
9702 : stack_pointer_rtx;
9703 const char *label = ia64_emit_deleted_label_after_insn (insn);
9704
9705 if (!frame)
9706 return;
9707
9708 dwarf2out_def_cfa
9709 (label, REGNO (fp),
9710 ia64_initial_elimination_offset
9711 (REGNO (arg_pointer_rtx), REGNO (fp))
9712 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9713 }
9714
9715 /* All we need to do here is avoid a crash in the generic dwarf2
9716 processing. The real CFA definition is set up above. */
9717
9718 static void
9719 ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label),
9720 rtx ARG_UNUSED (pattern),
9721 int index)
9722 {
9723 gcc_assert (index == UNSPECV_ALLOC);
9724 }
9725
9726 /* The generic dwarf2 frame debug info generator does not define a
9727 separate region for the very end of the epilogue, so refrain from
9728 doing so in the IA64-specific code as well. */
9729
9730 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9731
9732 /* The function emits unwind directives for the start of an epilogue. */
9733
9734 static void
9735 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9736 {
9737 /* If this isn't the last block of the function, then we need to label the
9738 current state, and copy it back in at the start of the next block. */
9739
9740 if (!last_block)
9741 {
9742 if (unwind)
9743 fprintf (asm_out_file, "\t.label_state %d\n",
9744 ++cfun->machine->state_num);
9745 need_copy_state = true;
9746 }
9747
9748 if (unwind)
9749 fprintf (asm_out_file, "\t.restore sp\n");
9750 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9751 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9752 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9753 }
9754
9755 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9756
9757 static void
9758 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9759 bool unwind, bool frame)
9760 {
9761 rtx dest = SET_DEST (pat);
9762 rtx src = SET_SRC (pat);
9763
9764 if (dest == stack_pointer_rtx)
9765 {
9766 if (GET_CODE (src) == PLUS)
9767 {
9768 rtx op0 = XEXP (src, 0);
9769 rtx op1 = XEXP (src, 1);
9770
9771 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9772
9773 if (INTVAL (op1) < 0)
9774 {
9775 gcc_assert (!frame_pointer_needed);
9776 if (unwind)
9777 fprintf (asm_out_file,
9778 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9779 -INTVAL (op1));
9780 ia64_dwarf2out_def_steady_cfa (insn, frame);
9781 }
9782 else
9783 process_epilogue (asm_out_file, insn, unwind, frame);
9784 }
9785 else
9786 {
9787 gcc_assert (src == hard_frame_pointer_rtx);
9788 process_epilogue (asm_out_file, insn, unwind, frame);
9789 }
9790 }
9791 else if (dest == hard_frame_pointer_rtx)
9792 {
9793 gcc_assert (src == stack_pointer_rtx);
9794 gcc_assert (frame_pointer_needed);
9795
9796 if (unwind)
9797 fprintf (asm_out_file, "\t.vframe r%d\n",
9798 ia64_dbx_register_number (REGNO (dest)));
9799 ia64_dwarf2out_def_steady_cfa (insn, frame);
9800 }
9801 else
9802 gcc_unreachable ();
9803 }
9804
9805 /* This function processes a SET pattern for REG_CFA_REGISTER. */
9806
9807 static void
9808 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
9809 {
9810 rtx dest = SET_DEST (pat);
9811 rtx src = SET_SRC (pat);
9812
9813 int dest_regno = REGNO (dest);
9814 int src_regno = REGNO (src);
9815
9816 switch (src_regno)
9817 {
9818 case BR_REG (0):
9819 /* Saving return address pointer. */
9820 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9821 if (unwind)
9822 fprintf (asm_out_file, "\t.save rp, r%d\n",
9823 ia64_dbx_register_number (dest_regno));
9824 break;
9825
9826 case PR_REG (0):
9827 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9828 if (unwind)
9829 fprintf (asm_out_file, "\t.save pr, r%d\n",
9830 ia64_dbx_register_number (dest_regno));
9831 break;
9832
9833 case AR_UNAT_REGNUM:
9834 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9835 if (unwind)
9836 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9837 ia64_dbx_register_number (dest_regno));
9838 break;
9839
9840 case AR_LC_REGNUM:
9841 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9842 if (unwind)
9843 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9844 ia64_dbx_register_number (dest_regno));
9845 break;
9846
9847 default:
9848 /* Everything else should indicate being stored to memory. */
9849 gcc_unreachable ();
9850 }
9851 }
9852
9853 /* This function processes a SET pattern for REG_CFA_OFFSET. */
9854
9855 static void
9856 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
9857 {
9858 rtx dest = SET_DEST (pat);
9859 rtx src = SET_SRC (pat);
9860 int src_regno = REGNO (src);
9861 const char *saveop;
9862 HOST_WIDE_INT off;
9863 rtx base;
9864
9865 gcc_assert (MEM_P (dest));
9866 if (GET_CODE (XEXP (dest, 0)) == REG)
9867 {
9868 base = XEXP (dest, 0);
9869 off = 0;
9870 }
9871 else
9872 {
9873 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9874 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9875 base = XEXP (XEXP (dest, 0), 0);
9876 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9877 }
9878
9879 if (base == hard_frame_pointer_rtx)
9880 {
9881 saveop = ".savepsp";
9882 off = - off;
9883 }
9884 else
9885 {
9886 gcc_assert (base == stack_pointer_rtx);
9887 saveop = ".savesp";
9888 }
9889
9890 src_regno = REGNO (src);
9891 switch (src_regno)
9892 {
9893 case BR_REG (0):
9894 gcc_assert (!current_frame_info.r[reg_save_b0]);
9895 if (unwind)
9896 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
9897 saveop, off);
9898 break;
9899
9900 case PR_REG (0):
9901 gcc_assert (!current_frame_info.r[reg_save_pr]);
9902 if (unwind)
9903 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
9904 saveop, off);
9905 break;
9906
9907 case AR_LC_REGNUM:
9908 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9909 if (unwind)
9910 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
9911 saveop, off);
9912 break;
9913
9914 case AR_PFS_REGNUM:
9915 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9916 if (unwind)
9917 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
9918 saveop, off);
9919 break;
9920
9921 case AR_UNAT_REGNUM:
9922 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9923 if (unwind)
9924 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
9925 saveop, off);
9926 break;
9927
9928 case GR_REG (4):
9929 case GR_REG (5):
9930 case GR_REG (6):
9931 case GR_REG (7):
9932 if (unwind)
9933 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9934 1 << (src_regno - GR_REG (4)));
9935 break;
9936
9937 case BR_REG (1):
9938 case BR_REG (2):
9939 case BR_REG (3):
9940 case BR_REG (4):
9941 case BR_REG (5):
9942 if (unwind)
9943 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9944 1 << (src_regno - BR_REG (1)));
9945 break;
9946
9947 case FR_REG (2):
9948 case FR_REG (3):
9949 case FR_REG (4):
9950 case FR_REG (5):
9951 if (unwind)
9952 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9953 1 << (src_regno - FR_REG (2)));
9954 break;
9955
9956 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9957 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9958 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9959 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9960 if (unwind)
9961 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9962 1 << (src_regno - FR_REG (12)));
9963 break;
9964
9965 default:
9966 /* ??? For some reason we mark other general registers, even those
9967 we can't represent in the unwind info. Ignore them. */
9968 break;
9969 }
9970 }
9971
9972 /* This function looks at a single insn and emits any directives
9973 required to unwind this insn. */
9974
9975 static void
9976 ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
9977 {
9978 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
9979 bool frame = dwarf2out_do_frame ();
9980 rtx note, pat;
9981 bool handled_one;
9982
9983 if (!unwind && !frame)
9984 return;
9985
9986 if (NOTE_INSN_BASIC_BLOCK_P (insn))
9987 {
9988 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9989
9990 /* Restore unwind state from immediately before the epilogue. */
9991 if (need_copy_state)
9992 {
9993 if (unwind)
9994 {
9995 fprintf (asm_out_file, "\t.body\n");
9996 fprintf (asm_out_file, "\t.copy_state %d\n",
9997 cfun->machine->state_num);
9998 }
9999 if (IA64_CHANGE_CFA_IN_EPILOGUE)
10000 ia64_dwarf2out_def_steady_cfa (insn, frame);
10001 need_copy_state = false;
10002 }
10003 }
10004
10005 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
10006 return;
10007
10008 /* Look for the ALLOC insn. */
10009 if (INSN_CODE (insn) == CODE_FOR_alloc)
10010 {
10011 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10012 int dest_regno = REGNO (dest);
10013
10014 /* If this is the final destination for ar.pfs, then this must
10015 be the alloc in the prologue. */
10016 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10017 {
10018 if (unwind)
10019 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10020 ia64_dbx_register_number (dest_regno));
10021 }
10022 else
10023 {
10024 /* This must be an alloc before a sibcall. We must drop the
10025 old frame info. The easiest way to drop the old frame
10026 info is to ensure we had a ".restore sp" directive
10027 followed by a new prologue. If the procedure doesn't
10028 have a memory-stack frame, we'll issue a dummy ".restore
10029 sp" now. */
10030 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10031 /* if haven't done process_epilogue() yet, do it now */
10032 process_epilogue (asm_out_file, insn, unwind, frame);
10033 if (unwind)
10034 fprintf (asm_out_file, "\t.prologue\n");
10035 }
10036 return;
10037 }
10038
10039 handled_one = false;
10040 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10041 switch (REG_NOTE_KIND (note))
10042 {
10043 case REG_CFA_ADJUST_CFA:
10044 pat = XEXP (note, 0);
10045 if (pat == NULL)
10046 pat = PATTERN (insn);
10047 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10048 handled_one = true;
10049 break;
10050
10051 case REG_CFA_OFFSET:
10052 pat = XEXP (note, 0);
10053 if (pat == NULL)
10054 pat = PATTERN (insn);
10055 process_cfa_offset (asm_out_file, pat, unwind);
10056 handled_one = true;
10057 break;
10058
10059 case REG_CFA_REGISTER:
10060 pat = XEXP (note, 0);
10061 if (pat == NULL)
10062 pat = PATTERN (insn);
10063 process_cfa_register (asm_out_file, pat, unwind);
10064 handled_one = true;
10065 break;
10066
10067 case REG_FRAME_RELATED_EXPR:
10068 case REG_CFA_DEF_CFA:
10069 case REG_CFA_EXPRESSION:
10070 case REG_CFA_RESTORE:
10071 case REG_CFA_SET_VDRAP:
10072 /* Not used in the ia64 port. */
10073 gcc_unreachable ();
10074
10075 default:
10076 /* Not a frame-related note. */
10077 break;
10078 }
10079
10080 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10081 explicit action to take. No guessing required. */
10082 gcc_assert (handled_one);
10083 }
10084
10085 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10086
10087 static void
10088 ia64_asm_emit_except_personality (rtx personality)
10089 {
10090 fputs ("\t.personality\t", asm_out_file);
10091 output_addr_const (asm_out_file, personality);
10092 fputc ('\n', asm_out_file);
10093 }
10094
10095 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10096
10097 static void
10098 ia64_asm_init_sections (void)
10099 {
10100 exception_section = get_unnamed_section (0, output_section_asm_op,
10101 "\t.handlerdata");
10102 }
10103
10104 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10105
10106 static enum unwind_info_type
10107 ia64_debug_unwind_info (void)
10108 {
10109 return UI_TARGET;
10110 }
10111
10112 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
10113
10114 static enum unwind_info_type
10115 ia64_except_unwind_info (struct gcc_options *opts)
10116 {
10117 /* Honor the --enable-sjlj-exceptions configure switch. */
10118 #ifdef CONFIG_UNWIND_EXCEPTIONS
10119 if (CONFIG_UNWIND_EXCEPTIONS)
10120 return UI_SJLJ;
10121 #endif
10122
10123 /* For simplicity elsewhere in this file, indicate that all unwind
10124 info is disabled if we're not emitting unwind tables. */
10125 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
10126 return UI_NONE;
10127
10128 return UI_TARGET;
10129 }
10130 \f
10131 enum ia64_builtins
10132 {
10133 IA64_BUILTIN_BSP,
10134 IA64_BUILTIN_COPYSIGNQ,
10135 IA64_BUILTIN_FABSQ,
10136 IA64_BUILTIN_FLUSHRS,
10137 IA64_BUILTIN_INFQ,
10138 IA64_BUILTIN_HUGE_VALQ,
10139 IA64_BUILTIN_max
10140 };
10141
10142 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10143
10144 void
10145 ia64_init_builtins (void)
10146 {
10147 tree fpreg_type;
10148 tree float80_type;
10149 tree decl;
10150
10151 /* The __fpreg type. */
10152 fpreg_type = make_node (REAL_TYPE);
10153 TYPE_PRECISION (fpreg_type) = 82;
10154 layout_type (fpreg_type);
10155 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10156
10157 /* The __float80 type. */
10158 float80_type = make_node (REAL_TYPE);
10159 TYPE_PRECISION (float80_type) = 80;
10160 layout_type (float80_type);
10161 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10162
10163 /* The __float128 type. */
10164 if (!TARGET_HPUX)
10165 {
10166 tree ftype;
10167 tree float128_type = make_node (REAL_TYPE);
10168
10169 TYPE_PRECISION (float128_type) = 128;
10170 layout_type (float128_type);
10171 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10172
10173 /* TFmode support builtins. */
10174 ftype = build_function_type (float128_type, void_list_node);
10175 decl = add_builtin_function ("__builtin_infq", ftype,
10176 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10177 NULL, NULL_TREE);
10178 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10179
10180 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10181 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10182 NULL, NULL_TREE);
10183 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10184
10185 ftype = build_function_type_list (float128_type,
10186 float128_type,
10187 NULL_TREE);
10188 decl = add_builtin_function ("__builtin_fabsq", ftype,
10189 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10190 "__fabstf2", NULL_TREE);
10191 TREE_READONLY (decl) = 1;
10192 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10193
10194 ftype = build_function_type_list (float128_type,
10195 float128_type,
10196 float128_type,
10197 NULL_TREE);
10198 decl = add_builtin_function ("__builtin_copysignq", ftype,
10199 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10200 "__copysigntf3", NULL_TREE);
10201 TREE_READONLY (decl) = 1;
10202 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10203 }
10204 else
10205 /* Under HPUX, this is a synonym for "long double". */
10206 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10207 "__float128");
10208
10209 /* Fwrite on VMS is non-standard. */
10210 if (TARGET_ABI_OPEN_VMS)
10211 {
10212 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
10213 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
10214 }
10215
10216 #define def_builtin(name, type, code) \
10217 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10218 NULL, NULL_TREE)
10219
10220 decl = def_builtin ("__builtin_ia64_bsp",
10221 build_function_type (ptr_type_node, void_list_node),
10222 IA64_BUILTIN_BSP);
10223 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10224
10225 decl = def_builtin ("__builtin_ia64_flushrs",
10226 build_function_type (void_type_node, void_list_node),
10227 IA64_BUILTIN_FLUSHRS);
10228 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10229
10230 #undef def_builtin
10231
10232 if (TARGET_HPUX)
10233 {
10234 if (built_in_decls [BUILT_IN_FINITE])
10235 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
10236 "_Isfinite");
10237 if (built_in_decls [BUILT_IN_FINITEF])
10238 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
10239 "_Isfinitef");
10240 if (built_in_decls [BUILT_IN_FINITEL])
10241 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
10242 "_Isfinitef128");
10243 }
10244 }
10245
10246 rtx
10247 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10248 enum machine_mode mode ATTRIBUTE_UNUSED,
10249 int ignore ATTRIBUTE_UNUSED)
10250 {
10251 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10252 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10253
10254 switch (fcode)
10255 {
10256 case IA64_BUILTIN_BSP:
10257 if (! target || ! register_operand (target, DImode))
10258 target = gen_reg_rtx (DImode);
10259 emit_insn (gen_bsp_value (target));
10260 #ifdef POINTERS_EXTEND_UNSIGNED
10261 target = convert_memory_address (ptr_mode, target);
10262 #endif
10263 return target;
10264
10265 case IA64_BUILTIN_FLUSHRS:
10266 emit_insn (gen_flushrs ());
10267 return const0_rtx;
10268
10269 case IA64_BUILTIN_INFQ:
10270 case IA64_BUILTIN_HUGE_VALQ:
10271 {
10272 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10273 REAL_VALUE_TYPE inf;
10274 rtx tmp;
10275
10276 real_inf (&inf);
10277 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10278
10279 tmp = validize_mem (force_const_mem (target_mode, tmp));
10280
10281 if (target == 0)
10282 target = gen_reg_rtx (target_mode);
10283
10284 emit_move_insn (target, tmp);
10285 return target;
10286 }
10287
10288 case IA64_BUILTIN_FABSQ:
10289 case IA64_BUILTIN_COPYSIGNQ:
10290 return expand_call (exp, target, ignore);
10291
10292 default:
10293 gcc_unreachable ();
10294 }
10295
10296 return NULL_RTX;
10297 }
10298
10299 /* Return the ia64 builtin for CODE. */
10300
10301 static tree
10302 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10303 {
10304 if (code >= IA64_BUILTIN_max)
10305 return error_mark_node;
10306
10307 return ia64_builtins[code];
10308 }
10309
10310 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10311 most significant bits of the stack slot. */
10312
10313 enum direction
10314 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
10315 {
10316 /* Exception to normal case for structures/unions/etc. */
10317
10318 if (type && AGGREGATE_TYPE_P (type)
10319 && int_size_in_bytes (type) < UNITS_PER_WORD)
10320 return upward;
10321
10322 /* Fall back to the default. */
10323 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10324 }
10325
10326 /* Emit text to declare externally defined variables and functions, because
10327 the Intel assembler does not support undefined externals. */
10328
10329 void
10330 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10331 {
10332 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10333 set in order to avoid putting out names that are never really
10334 used. */
10335 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10336 {
10337 /* maybe_assemble_visibility will return 1 if the assembler
10338 visibility directive is output. */
10339 int need_visibility = ((*targetm.binds_local_p) (decl)
10340 && maybe_assemble_visibility (decl));
10341
10342 #ifdef DO_CRTL_NAMES
10343 DO_CRTL_NAMES;
10344 #endif
10345
10346 /* GNU as does not need anything here, but the HP linker does
10347 need something for external functions. */
10348 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10349 && TREE_CODE (decl) == FUNCTION_DECL)
10350 (*targetm.asm_out.globalize_decl_name) (file, decl);
10351 else if (need_visibility && !TARGET_GNU_AS)
10352 (*targetm.asm_out.globalize_label) (file, name);
10353 }
10354 }
10355
10356 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10357 modes of word_mode and larger. Rename the TFmode libfuncs using the
10358 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10359 backward compatibility. */
10360
10361 static void
10362 ia64_init_libfuncs (void)
10363 {
10364 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10365 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10366 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10367 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10368
10369 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10370 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10371 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10372 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10373 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10374
10375 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10376 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10377 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10378 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10379 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10380 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10381
10382 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10383 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10384 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10385 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10386 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10387
10388 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10389 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10390 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10391 /* HP-UX 11.23 libc does not have a function for unsigned
10392 SImode-to-TFmode conversion. */
10393 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10394 }
10395
10396 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10397
10398 static void
10399 ia64_hpux_init_libfuncs (void)
10400 {
10401 ia64_init_libfuncs ();
10402
10403 /* The HP SI millicode division and mod functions expect DI arguments.
10404 By turning them off completely we avoid using both libgcc and the
10405 non-standard millicode routines and use the HP DI millicode routines
10406 instead. */
10407
10408 set_optab_libfunc (sdiv_optab, SImode, 0);
10409 set_optab_libfunc (udiv_optab, SImode, 0);
10410 set_optab_libfunc (smod_optab, SImode, 0);
10411 set_optab_libfunc (umod_optab, SImode, 0);
10412
10413 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10414 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10415 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10416 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10417
10418 /* HP-UX libc has TF min/max/abs routines in it. */
10419 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10420 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10421 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10422
10423 /* ia64_expand_compare uses this. */
10424 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10425
10426 /* These should never be used. */
10427 set_optab_libfunc (eq_optab, TFmode, 0);
10428 set_optab_libfunc (ne_optab, TFmode, 0);
10429 set_optab_libfunc (gt_optab, TFmode, 0);
10430 set_optab_libfunc (ge_optab, TFmode, 0);
10431 set_optab_libfunc (lt_optab, TFmode, 0);
10432 set_optab_libfunc (le_optab, TFmode, 0);
10433 }
10434
10435 /* Rename the division and modulus functions in VMS. */
10436
10437 static void
10438 ia64_vms_init_libfuncs (void)
10439 {
10440 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10441 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10442 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10443 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10444 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10445 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10446 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10447 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10448 abort_libfunc = init_one_libfunc ("decc$abort");
10449 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10450 #ifdef MEM_LIBFUNCS_INIT
10451 MEM_LIBFUNCS_INIT;
10452 #endif
10453 }
10454
10455 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10456 the HPUX conventions. */
10457
10458 static void
10459 ia64_sysv4_init_libfuncs (void)
10460 {
10461 ia64_init_libfuncs ();
10462
10463 /* These functions are not part of the HPUX TFmode interface. We
10464 use them instead of _U_Qfcmp, which doesn't work the way we
10465 expect. */
10466 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10467 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10468 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10469 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10470 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10471 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10472
10473 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10474 glibc doesn't have them. */
10475 }
10476
10477 /* Use soft-fp. */
10478
10479 static void
10480 ia64_soft_fp_init_libfuncs (void)
10481 {
10482 }
10483
10484 static bool
10485 ia64_vms_valid_pointer_mode (enum machine_mode mode)
10486 {
10487 return (mode == SImode || mode == DImode);
10488 }
10489 \f
10490 /* For HPUX, it is illegal to have relocations in shared segments. */
10491
10492 static int
10493 ia64_hpux_reloc_rw_mask (void)
10494 {
10495 return 3;
10496 }
10497
10498 /* For others, relax this so that relocations to local data goes in
10499 read-only segments, but we still cannot allow global relocations
10500 in read-only segments. */
10501
10502 static int
10503 ia64_reloc_rw_mask (void)
10504 {
10505 return flag_pic ? 3 : 2;
10506 }
10507
10508 /* Return the section to use for X. The only special thing we do here
10509 is to honor small data. */
10510
10511 static section *
10512 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10513 unsigned HOST_WIDE_INT align)
10514 {
10515 if (GET_MODE_SIZE (mode) > 0
10516 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10517 && !TARGET_NO_SDATA)
10518 return sdata_section;
10519 else
10520 return default_elf_select_rtx_section (mode, x, align);
10521 }
10522
10523 static unsigned int
10524 ia64_section_type_flags (tree decl, const char *name, int reloc)
10525 {
10526 unsigned int flags = 0;
10527
10528 if (strcmp (name, ".sdata") == 0
10529 || strncmp (name, ".sdata.", 7) == 0
10530 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10531 || strncmp (name, ".sdata2.", 8) == 0
10532 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10533 || strcmp (name, ".sbss") == 0
10534 || strncmp (name, ".sbss.", 6) == 0
10535 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10536 flags = SECTION_SMALL;
10537
10538 #if TARGET_ABI_OPEN_VMS
10539 if (decl && DECL_ATTRIBUTES (decl)
10540 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10541 flags |= SECTION_VMS_OVERLAY;
10542 #endif
10543
10544 flags |= default_section_type_flags (decl, name, reloc);
10545 return flags;
10546 }
10547
10548 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10549 structure type and that the address of that type should be passed
10550 in out0, rather than in r8. */
10551
10552 static bool
10553 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10554 {
10555 tree ret_type = TREE_TYPE (fntype);
10556
10557 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10558 as the structure return address parameter, if the return value
10559 type has a non-trivial copy constructor or destructor. It is not
10560 clear if this same convention should be used for other
10561 programming languages. Until G++ 3.4, we incorrectly used r8 for
10562 these return values. */
10563 return (abi_version_at_least (2)
10564 && ret_type
10565 && TYPE_MODE (ret_type) == BLKmode
10566 && TREE_ADDRESSABLE (ret_type)
10567 && strcmp (lang_hooks.name, "GNU C++") == 0);
10568 }
10569
10570 /* Output the assembler code for a thunk function. THUNK_DECL is the
10571 declaration for the thunk function itself, FUNCTION is the decl for
10572 the target function. DELTA is an immediate constant offset to be
10573 added to THIS. If VCALL_OFFSET is nonzero, the word at
10574 *(*this + vcall_offset) should be added to THIS. */
10575
10576 static void
10577 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10578 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10579 tree function)
10580 {
10581 rtx this_rtx, insn, funexp;
10582 unsigned int this_parmno;
10583 unsigned int this_regno;
10584 rtx delta_rtx;
10585
10586 reload_completed = 1;
10587 epilogue_completed = 1;
10588
10589 /* Set things up as ia64_expand_prologue might. */
10590 last_scratch_gr_reg = 15;
10591
10592 memset (&current_frame_info, 0, sizeof (current_frame_info));
10593 current_frame_info.spill_cfa_off = -16;
10594 current_frame_info.n_input_regs = 1;
10595 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10596
10597 /* Mark the end of the (empty) prologue. */
10598 emit_note (NOTE_INSN_PROLOGUE_END);
10599
10600 /* Figure out whether "this" will be the first parameter (the
10601 typical case) or the second parameter (as happens when the
10602 virtual function returns certain class objects). */
10603 this_parmno
10604 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10605 ? 1 : 0);
10606 this_regno = IN_REG (this_parmno);
10607 if (!TARGET_REG_NAMES)
10608 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10609
10610 this_rtx = gen_rtx_REG (Pmode, this_regno);
10611
10612 /* Apply the constant offset, if required. */
10613 delta_rtx = GEN_INT (delta);
10614 if (TARGET_ILP32)
10615 {
10616 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10617 REG_POINTER (tmp) = 1;
10618 if (delta && satisfies_constraint_I (delta_rtx))
10619 {
10620 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10621 delta = 0;
10622 }
10623 else
10624 emit_insn (gen_ptr_extend (this_rtx, tmp));
10625 }
10626 if (delta)
10627 {
10628 if (!satisfies_constraint_I (delta_rtx))
10629 {
10630 rtx tmp = gen_rtx_REG (Pmode, 2);
10631 emit_move_insn (tmp, delta_rtx);
10632 delta_rtx = tmp;
10633 }
10634 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10635 }
10636
10637 /* Apply the offset from the vtable, if required. */
10638 if (vcall_offset)
10639 {
10640 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10641 rtx tmp = gen_rtx_REG (Pmode, 2);
10642
10643 if (TARGET_ILP32)
10644 {
10645 rtx t = gen_rtx_REG (ptr_mode, 2);
10646 REG_POINTER (t) = 1;
10647 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10648 if (satisfies_constraint_I (vcall_offset_rtx))
10649 {
10650 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10651 vcall_offset = 0;
10652 }
10653 else
10654 emit_insn (gen_ptr_extend (tmp, t));
10655 }
10656 else
10657 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10658
10659 if (vcall_offset)
10660 {
10661 if (!satisfies_constraint_J (vcall_offset_rtx))
10662 {
10663 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10664 emit_move_insn (tmp2, vcall_offset_rtx);
10665 vcall_offset_rtx = tmp2;
10666 }
10667 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10668 }
10669
10670 if (TARGET_ILP32)
10671 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10672 else
10673 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10674
10675 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10676 }
10677
10678 /* Generate a tail call to the target function. */
10679 if (! TREE_USED (function))
10680 {
10681 assemble_external (function);
10682 TREE_USED (function) = 1;
10683 }
10684 funexp = XEXP (DECL_RTL (function), 0);
10685 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10686 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10687 insn = get_last_insn ();
10688 SIBLING_CALL_P (insn) = 1;
10689
10690 /* Code generation for calls relies on splitting. */
10691 reload_completed = 1;
10692 epilogue_completed = 1;
10693 try_split (PATTERN (insn), insn, 0);
10694
10695 emit_barrier ();
10696
10697 /* Run just enough of rest_of_compilation to get the insns emitted.
10698 There's not really enough bulk here to make other passes such as
10699 instruction scheduling worth while. Note that use_thunk calls
10700 assemble_start_function and assemble_end_function. */
10701
10702 insn_locators_alloc ();
10703 emit_all_insn_group_barriers (NULL);
10704 insn = get_insns ();
10705 shorten_branches (insn);
10706 final_start_function (insn, file, 1);
10707 final (insn, file, 1);
10708 final_end_function ();
10709
10710 reload_completed = 0;
10711 epilogue_completed = 0;
10712 }
10713
10714 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10715
10716 static rtx
10717 ia64_struct_value_rtx (tree fntype,
10718 int incoming ATTRIBUTE_UNUSED)
10719 {
10720 if (TARGET_ABI_OPEN_VMS ||
10721 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10722 return NULL_RTX;
10723 return gen_rtx_REG (Pmode, GR_REG (8));
10724 }
10725
10726 static bool
10727 ia64_scalar_mode_supported_p (enum machine_mode mode)
10728 {
10729 switch (mode)
10730 {
10731 case QImode:
10732 case HImode:
10733 case SImode:
10734 case DImode:
10735 case TImode:
10736 return true;
10737
10738 case SFmode:
10739 case DFmode:
10740 case XFmode:
10741 case RFmode:
10742 return true;
10743
10744 case TFmode:
10745 return true;
10746
10747 default:
10748 return false;
10749 }
10750 }
10751
10752 static bool
10753 ia64_vector_mode_supported_p (enum machine_mode mode)
10754 {
10755 switch (mode)
10756 {
10757 case V8QImode:
10758 case V4HImode:
10759 case V2SImode:
10760 return true;
10761
10762 case V2SFmode:
10763 return true;
10764
10765 default:
10766 return false;
10767 }
10768 }
10769
10770 /* Implement the FUNCTION_PROFILER macro. */
10771
10772 void
10773 ia64_output_function_profiler (FILE *file, int labelno)
10774 {
10775 bool indirect_call;
10776
10777 /* If the function needs a static chain and the static chain
10778 register is r15, we use an indirect call so as to bypass
10779 the PLT stub in case the executable is dynamically linked,
10780 because the stub clobbers r15 as per 5.3.6 of the psABI.
10781 We don't need to do that in non canonical PIC mode. */
10782
10783 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10784 {
10785 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10786 indirect_call = true;
10787 }
10788 else
10789 indirect_call = false;
10790
10791 if (TARGET_GNU_AS)
10792 fputs ("\t.prologue 4, r40\n", file);
10793 else
10794 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10795 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10796
10797 if (NO_PROFILE_COUNTERS)
10798 fputs ("\tmov out3 = r0\n", file);
10799 else
10800 {
10801 char buf[20];
10802 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10803
10804 if (TARGET_AUTO_PIC)
10805 fputs ("\tmovl out3 = @gprel(", file);
10806 else
10807 fputs ("\taddl out3 = @ltoff(", file);
10808 assemble_name (file, buf);
10809 if (TARGET_AUTO_PIC)
10810 fputs (")\n", file);
10811 else
10812 fputs ("), r1\n", file);
10813 }
10814
10815 if (indirect_call)
10816 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10817 fputs ("\t;;\n", file);
10818
10819 fputs ("\t.save rp, r42\n", file);
10820 fputs ("\tmov out2 = b0\n", file);
10821 if (indirect_call)
10822 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10823 fputs ("\t.body\n", file);
10824 fputs ("\tmov out1 = r1\n", file);
10825 if (indirect_call)
10826 {
10827 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10828 fputs ("\tmov b6 = r16\n", file);
10829 fputs ("\tld8 r1 = [r14]\n", file);
10830 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10831 }
10832 else
10833 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10834 }
10835
10836 static GTY(()) rtx mcount_func_rtx;
10837 static rtx
10838 gen_mcount_func_rtx (void)
10839 {
10840 if (!mcount_func_rtx)
10841 mcount_func_rtx = init_one_libfunc ("_mcount");
10842 return mcount_func_rtx;
10843 }
10844
10845 void
10846 ia64_profile_hook (int labelno)
10847 {
10848 rtx label, ip;
10849
10850 if (NO_PROFILE_COUNTERS)
10851 label = const0_rtx;
10852 else
10853 {
10854 char buf[30];
10855 const char *label_name;
10856 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10857 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10858 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10859 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10860 }
10861 ip = gen_reg_rtx (Pmode);
10862 emit_insn (gen_ip_value (ip));
10863 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10864 VOIDmode, 3,
10865 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10866 ip, Pmode,
10867 label, Pmode);
10868 }
10869
10870 /* Return the mangling of TYPE if it is an extended fundamental type. */
10871
10872 static const char *
10873 ia64_mangle_type (const_tree type)
10874 {
10875 type = TYPE_MAIN_VARIANT (type);
10876
10877 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10878 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10879 return NULL;
10880
10881 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10882 mangled as "e". */
10883 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10884 return "g";
10885 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10886 an extended mangling. Elsewhere, "e" is available since long
10887 double is 80 bits. */
10888 if (TYPE_MODE (type) == XFmode)
10889 return TARGET_HPUX ? "u9__float80" : "e";
10890 if (TYPE_MODE (type) == RFmode)
10891 return "u7__fpreg";
10892 return NULL;
10893 }
10894
10895 /* Return the diagnostic message string if conversion from FROMTYPE to
10896 TOTYPE is not allowed, NULL otherwise. */
10897 static const char *
10898 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10899 {
10900 /* Reject nontrivial conversion to or from __fpreg. */
10901 if (TYPE_MODE (fromtype) == RFmode
10902 && TYPE_MODE (totype) != RFmode
10903 && TYPE_MODE (totype) != VOIDmode)
10904 return N_("invalid conversion from %<__fpreg%>");
10905 if (TYPE_MODE (totype) == RFmode
10906 && TYPE_MODE (fromtype) != RFmode)
10907 return N_("invalid conversion to %<__fpreg%>");
10908 return NULL;
10909 }
10910
10911 /* Return the diagnostic message string if the unary operation OP is
10912 not permitted on TYPE, NULL otherwise. */
10913 static const char *
10914 ia64_invalid_unary_op (int op, const_tree type)
10915 {
10916 /* Reject operations on __fpreg other than unary + or &. */
10917 if (TYPE_MODE (type) == RFmode
10918 && op != CONVERT_EXPR
10919 && op != ADDR_EXPR)
10920 return N_("invalid operation on %<__fpreg%>");
10921 return NULL;
10922 }
10923
10924 /* Return the diagnostic message string if the binary operation OP is
10925 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10926 static const char *
10927 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10928 {
10929 /* Reject operations on __fpreg. */
10930 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10931 return N_("invalid operation on %<__fpreg%>");
10932 return NULL;
10933 }
10934
10935 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
10936 static void
10937 ia64_option_default_params (void)
10938 {
10939 /* Let the scheduler form additional regions. */
10940 set_default_param_value (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS, 2);
10941
10942 /* Set the default values for cache-related parameters. */
10943 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6);
10944 set_default_param_value (PARAM_L1_CACHE_LINE_SIZE, 32);
10945
10946 set_default_param_value (PARAM_SCHED_MEM_TRUE_DEP_COST, 4);
10947 }
10948
10949 /* HP-UX version_id attribute.
10950 For object foo, if the version_id is set to 1234 put out an alias
10951 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10952 other than an alias statement because it is an illegal symbol name. */
10953
10954 static tree
10955 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10956 tree name ATTRIBUTE_UNUSED,
10957 tree args,
10958 int flags ATTRIBUTE_UNUSED,
10959 bool *no_add_attrs)
10960 {
10961 tree arg = TREE_VALUE (args);
10962
10963 if (TREE_CODE (arg) != STRING_CST)
10964 {
10965 error("version attribute is not a string");
10966 *no_add_attrs = true;
10967 return NULL_TREE;
10968 }
10969 return NULL_TREE;
10970 }
10971
10972 /* Target hook for c_mode_for_suffix. */
10973
10974 static enum machine_mode
10975 ia64_c_mode_for_suffix (char suffix)
10976 {
10977 if (suffix == 'q')
10978 return TFmode;
10979 if (suffix == 'w')
10980 return XFmode;
10981
10982 return VOIDmode;
10983 }
10984
10985 static enum machine_mode
10986 ia64_promote_function_mode (const_tree type,
10987 enum machine_mode mode,
10988 int *punsignedp,
10989 const_tree funtype,
10990 int for_return)
10991 {
10992 /* Special processing required for OpenVMS ... */
10993
10994 if (!TARGET_ABI_OPEN_VMS)
10995 return default_promote_function_mode(type, mode, punsignedp, funtype,
10996 for_return);
10997
10998 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10999 HP OpenVMS I64 Version 8.2EFT,
11000 chapter 4 "OpenVMS I64 Conventions"
11001 section 4.7 "Procedure Linkage"
11002 subsection 4.7.5.2, "Normal Register Parameters"
11003
11004 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
11005 values passed in registers are zero-filled; signed integral values as
11006 well as unsigned 32-bit integral values are sign-extended to 64 bits.
11007 For all other types passed in the general registers, unused bits are
11008 undefined." */
11009
11010 if (!AGGREGATE_TYPE_P (type)
11011 && GET_MODE_CLASS (mode) == MODE_INT
11012 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
11013 {
11014 if (mode == SImode)
11015 *punsignedp = 0;
11016 return DImode;
11017 }
11018 else
11019 return promote_mode (type, mode, punsignedp);
11020 }
11021
11022 static GTY(()) rtx ia64_dconst_0_5_rtx;
11023
11024 rtx
11025 ia64_dconst_0_5 (void)
11026 {
11027 if (! ia64_dconst_0_5_rtx)
11028 {
11029 REAL_VALUE_TYPE rv;
11030 real_from_string (&rv, "0.5");
11031 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11032 }
11033 return ia64_dconst_0_5_rtx;
11034 }
11035
11036 static GTY(()) rtx ia64_dconst_0_375_rtx;
11037
11038 rtx
11039 ia64_dconst_0_375 (void)
11040 {
11041 if (! ia64_dconst_0_375_rtx)
11042 {
11043 REAL_VALUE_TYPE rv;
11044 real_from_string (&rv, "0.375");
11045 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11046 }
11047 return ia64_dconst_0_375_rtx;
11048 }
11049
11050 static enum machine_mode
11051 ia64_get_reg_raw_mode (int regno)
11052 {
11053 if (FR_REGNO_P (regno))
11054 return XFmode;
11055 return default_get_reg_raw_mode(regno);
11056 }
11057
11058 /* Always default to .text section until HP-UX linker is fixed. */
11059
11060 ATTRIBUTE_UNUSED static section *
11061 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11062 enum node_frequency freq ATTRIBUTE_UNUSED,
11063 bool startup ATTRIBUTE_UNUSED,
11064 bool exit ATTRIBUTE_UNUSED)
11065 {
11066 return NULL;
11067 }
11068
11069 #include "gt-ia64.h"