]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/ia64.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
a5544970 2 Copyright (C) 1999-2019 Free Software Foundation, Inc.
c65ebc55 3 Contributed by James E. Wilson <wilson@cygnus.com> and
9c808aad 4 David Mosberger <davidm@hpl.hp.com>.
c65ebc55 5
3bed2930 6This file is part of GCC.
c65ebc55 7
3bed2930 8GCC is free software; you can redistribute it and/or modify
c65ebc55 9it under the terms of the GNU General Public License as published by
2f83c7d6 10the Free Software Foundation; either version 3, or (at your option)
c65ebc55
JW
11any later version.
12
3bed2930 13GCC is distributed in the hope that it will be useful,
c65ebc55
JW
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
2f83c7d6
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
c65ebc55 21
8fcc61f8
RS
22#define IN_TARGET_CODE 1
23
c65ebc55 24#include "config.h"
ed9ccd8a 25#include "system.h"
4977bab6 26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407 28#include "target.h"
c65ebc55 29#include "rtl.h"
e11c4407 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
e11c4407 32#include "cfghooks.h"
c7131fb2 33#include "df.h"
e11c4407
AM
34#include "tm_p.h"
35#include "stringpool.h"
314e6352 36#include "attribs.h"
e11c4407
AM
37#include "optabs.h"
38#include "regs.h"
39#include "emit-rtl.h"
40#include "recog.h"
41#include "diagnostic-core.h"
40e23961 42#include "alias.h"
40e23961 43#include "fold-const.h"
d8a2d370
DN
44#include "stor-layout.h"
45#include "calls.h"
46#include "varasm.h"
c65ebc55
JW
47#include "output.h"
48#include "insn-attr.h"
49#include "flags.h"
36566b39 50#include "explow.h"
c65ebc55 51#include "expr.h"
60393bbc 52#include "cfgrtl.h"
f2972bf8 53#include "libfuncs.h"
2130b7fb 54#include "sched-int.h"
7b84aac0 55#include "common/common-target.h"
08744705 56#include "langhooks.h"
45b0be94 57#include "gimplify.h"
4de67c26 58#include "intl.h"
658f32fd 59#include "debug.h"
bb83aa4b 60#include "params.h"
6fb5fa3c 61#include "dbgcnt.h"
13f70342 62#include "tm-constrs.h"
388092d5 63#include "sel-sched.h"
69e18c09 64#include "reload.h"
96e45421 65#include "opts.h"
7ee2468b 66#include "dumpfile.h"
9b2b7279 67#include "builtins.h"
c65ebc55 68
994c5d85 69/* This file should be included last. */
d58627a0
RS
70#include "target-def.h"
71
c65ebc55
JW
72/* This is used for communication between ASM_OUTPUT_LABEL and
73 ASM_OUTPUT_LABELREF. */
74int ia64_asm_output_label = 0;
75
c65ebc55 76/* Register names for ia64_expand_prologue. */
3b572406 77static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
78{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
79 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
80 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
81 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
82 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
83 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
84 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
85 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
86 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
87 "r104","r105","r106","r107","r108","r109","r110","r111",
88 "r112","r113","r114","r115","r116","r117","r118","r119",
89 "r120","r121","r122","r123","r124","r125","r126","r127"};
90
91/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 92static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
93{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
94
95/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 96static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
97{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
98 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
99 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
100 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
101 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
102 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
103 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
104 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
105 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
106 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
107
108/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 109static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
110{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
111
c65ebc55
JW
112/* Variables which are this size or smaller are put in the sdata/sbss
113 sections. */
114
3b572406 115unsigned int ia64_section_threshold;
30028c85
VM
116
117/* The following variable is used by the DFA insn scheduler. The value is
118 TRUE if we do insn bundling instead of insn scheduling. */
119int bundling_p = 0;
120
6fb5fa3c
DB
121enum ia64_frame_regs
122{
123 reg_fp,
124 reg_save_b0,
125 reg_save_pr,
126 reg_save_ar_pfs,
127 reg_save_ar_unat,
128 reg_save_ar_lc,
129 reg_save_gp,
130 number_of_ia64_frame_regs
131};
132
599aedd9
RH
133/* Structure to be filled in by ia64_compute_frame_size with register
134 save masks and offsets for the current function. */
135
136struct ia64_frame_info
137{
138 HOST_WIDE_INT total_size; /* size of the stack frame, not including
139 the caller's scratch area. */
140 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
141 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
142 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
143 HARD_REG_SET mask; /* mask of saved registers. */
9c808aad 144 unsigned int gr_used_mask; /* mask of registers in use as gr spill
599aedd9
RH
145 registers or long-term scratches. */
146 int n_spilled; /* number of spilled registers. */
6fb5fa3c 147 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
599aedd9
RH
148 int n_input_regs; /* number of input registers used. */
149 int n_local_regs; /* number of local registers used. */
150 int n_output_regs; /* number of output registers used. */
151 int n_rotate_regs; /* number of rotating registers used. */
152
153 char need_regstk; /* true if a .regstk directive needed. */
154 char initialized; /* true if the data is finalized. */
155};
156
157/* Current frame information calculated by ia64_compute_frame_size. */
158static struct ia64_frame_info current_frame_info;
6fb5fa3c
DB
159/* The actual registers that are emitted. */
160static int emitted_frame_related_regs[number_of_ia64_frame_regs];
3b572406 161\f
9c808aad 162static int ia64_first_cycle_multipass_dfa_lookahead (void);
ce1ce33a 163static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
9c808aad
AJ
164static void ia64_init_dfa_pre_cycle_insn (void);
165static rtx ia64_dfa_pre_cycle_insn (void);
ac44248e
DM
166static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
167static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
048d0d36 168static void ia64_h_i_d_extended (void);
388092d5
AB
169static void * ia64_alloc_sched_context (void);
170static void ia64_init_sched_context (void *, bool);
171static void ia64_set_sched_context (void *);
172static void ia64_clear_sched_context (void *);
173static void ia64_free_sched_context (void *);
ef4bddc2 174static int ia64_mode_to_int (machine_mode);
048d0d36 175static void ia64_set_sched_flags (spec_info_t);
ac44248e
DM
176static ds_t ia64_get_insn_spec_ds (rtx_insn *);
177static ds_t ia64_get_insn_checked_ds (rtx_insn *);
388092d5 178static bool ia64_skip_rtx_p (const_rtx);
ac44248e 179static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
8e90de43 180static bool ia64_needs_block_p (ds_t);
ac44248e 181static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
048d0d36
MK
182static int ia64_spec_check_p (rtx);
183static int ia64_spec_check_src_p (rtx);
9c808aad
AJ
184static rtx gen_tls_get_addr (void);
185static rtx gen_thread_pointer (void);
6fb5fa3c 186static int find_gr_spill (enum ia64_frame_regs, int);
9c808aad
AJ
187static int next_scratch_gr_reg (void);
188static void mark_reg_gr_used_mask (rtx, void *);
189static void ia64_compute_frame_size (HOST_WIDE_INT);
190static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
191static void finish_spill_pointers (void);
192static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
193static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
194static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
195static rtx gen_movdi_x (rtx, rtx, rtx);
196static rtx gen_fr_spill_x (rtx, rtx, rtx);
197static rtx gen_fr_restore_x (rtx, rtx, rtx);
198
930572b9 199static void ia64_option_override (void);
7b5cbb57 200static bool ia64_can_eliminate (const int, const int);
ef4bddc2
RS
201static machine_mode hfa_element_mode (const_tree, bool);
202static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
351a758b 203 tree, int *, int);
ef4bddc2 204static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
78a52f11 205 tree, bool);
ef4bddc2 206static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
ffa88471 207 const_tree, bool, bool);
ef4bddc2 208static rtx ia64_function_arg (cumulative_args_t, machine_mode,
ffa88471 209 const_tree, bool);
d5cc9181 210static rtx ia64_function_incoming_arg (cumulative_args_t,
ef4bddc2
RS
211 machine_mode, const_tree, bool);
212static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
ffa88471 213 const_tree, bool);
76b0cbf8 214static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
ef4bddc2 215static unsigned int ia64_function_arg_boundary (machine_mode,
c2ed6cf8 216 const_tree);
9c808aad 217static bool ia64_function_ok_for_sibcall (tree, tree);
586de218 218static bool ia64_return_in_memory (const_tree, const_tree);
ba90d838 219static rtx ia64_function_value (const_tree, const_tree, bool);
ef4bddc2 220static rtx ia64_libcall_value (machine_mode, const_rtx);
ba90d838 221static bool ia64_function_value_regno_p (const unsigned int);
ef4bddc2 222static int ia64_register_move_cost (machine_mode, reg_class_t,
c21fc181 223 reg_class_t);
ef4bddc2 224static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
69e18c09 225 bool);
e548c9df 226static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
215b063c 227static int ia64_unspec_may_trap_p (const_rtx, unsigned);
9c808aad
AJ
228static void fix_range (const char *);
229static struct machine_function * ia64_init_machine_status (void);
230static void emit_insn_group_barriers (FILE *);
231static void emit_all_insn_group_barriers (FILE *);
232static void final_emit_insn_group_barriers (FILE *);
233static void emit_predicate_relation_info (void);
234static void ia64_reorg (void);
3101faab 235static bool ia64_in_small_data_p (const_tree);
658f32fd 236static void process_epilogue (FILE *, rtx, bool, bool);
9c808aad 237
9c808aad 238static bool ia64_assemble_integer (rtx, unsigned int, int);
42776416
RS
239static void ia64_output_function_prologue (FILE *);
240static void ia64_output_function_epilogue (FILE *);
9c808aad
AJ
241static void ia64_output_function_end_prologue (FILE *);
242
5e50b799 243static void ia64_print_operand (FILE *, rtx, int);
cc8ca59e 244static void ia64_print_operand_address (FILE *, machine_mode, rtx);
5e50b799
AS
245static bool ia64_print_operand_punct_valid_p (unsigned char code);
246
9c808aad 247static int ia64_issue_rate (void);
b505225b 248static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
9c808aad 249static void ia64_sched_init (FILE *, int, int);
048d0d36
MK
250static void ia64_sched_init_global (FILE *, int, int);
251static void ia64_sched_finish_global (FILE *, int);
9c808aad 252static void ia64_sched_finish (FILE *, int);
ce1ce33a
DM
253static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
254static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
255static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
ac44248e 256static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
9c808aad 257
ac44248e 258static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
a68b5e52
RH
259static void ia64_asm_emit_except_personality (rtx);
260static void ia64_asm_init_sections (void);
261
f0a0390e 262static enum unwind_info_type ia64_debug_unwind_info (void);
f0a0390e 263
9c808aad
AJ
264static struct bundle_state *get_free_bundle_state (void);
265static void free_bundle_state (struct bundle_state *);
266static void initiate_bundle_states (void);
267static void finish_bundle_states (void);
9c808aad
AJ
268static int insert_bundle_state (struct bundle_state *);
269static void initiate_bundle_state_table (void);
270static void finish_bundle_state_table (void);
271static int try_issue_nops (struct bundle_state *, int);
272static int try_issue_insn (struct bundle_state *, rtx);
b32d5189
DM
273static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
274 int, int);
9c808aad
AJ
275static int get_max_pos (state_t);
276static int get_template (state_t, int);
277
b32d5189 278static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
647d790d
DM
279static bool important_for_bundling_p (rtx_insn *);
280static bool unknown_for_bundling_p (rtx_insn *);
b32d5189 281static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
9c808aad
AJ
282
283static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
284 HOST_WIDE_INT, tree);
285static void ia64_file_start (void);
812b587e 286static void ia64_globalize_decl_name (FILE *, tree);
9c808aad 287
9b580a0b
RH
288static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
289static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
ef4bddc2 290static section *ia64_select_rtx_section (machine_mode, rtx,
d6b5193b 291 unsigned HOST_WIDE_INT);
fdbe66f2
EB
292static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
293 ATTRIBUTE_UNUSED;
abb8b19a 294static unsigned int ia64_section_type_flags (tree, const char *, int);
1f7aa7cd
SE
295static void ia64_init_libfuncs (void)
296 ATTRIBUTE_UNUSED;
c15c90bb
ZW
297static void ia64_hpux_init_libfuncs (void)
298 ATTRIBUTE_UNUSED;
6bc709c1
L
299static void ia64_sysv4_init_libfuncs (void)
300 ATTRIBUTE_UNUSED;
738e7b39
RK
301static void ia64_vms_init_libfuncs (void)
302 ATTRIBUTE_UNUSED;
c252db20
L
303static void ia64_soft_fp_init_libfuncs (void)
304 ATTRIBUTE_UNUSED;
095a2d76 305static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
f2972bf8 306 ATTRIBUTE_UNUSED;
30ed9d3d
TG
307static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
308 ATTRIBUTE_UNUSED;
a5fe455b 309
d8def3cf 310static bool ia64_attribute_takes_identifier_p (const_tree);
a32767e4 311static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
812b587e 312static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
a32767e4 313static void ia64_encode_section_info (tree, rtx, int);
351a758b 314static rtx ia64_struct_value_rtx (tree, int);
726a989a 315static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
18e2a8b8 316static bool ia64_scalar_mode_supported_p (scalar_mode mode);
ef4bddc2 317static bool ia64_vector_mode_supported_p (machine_mode mode);
ef4bddc2
RS
318static bool ia64_legitimate_constant_p (machine_mode, rtx);
319static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
320static bool ia64_cannot_force_const_mem (machine_mode, rtx);
3101faab
KG
321static const char *ia64_mangle_type (const_tree);
322static const char *ia64_invalid_conversion (const_tree, const_tree);
323static const char *ia64_invalid_unary_op (int, const_tree);
324static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
ef4bddc2 325static machine_mode ia64_c_mode_for_suffix (char);
2a1211e5 326static void ia64_trampoline_init (rtx, tree, rtx);
2b7e2984 327static void ia64_override_options_after_change (void);
ef4bddc2 328static bool ia64_member_type_forces_blk (const_tree, machine_mode);
5c255b57 329
b6ca982f 330static tree ia64_fold_builtin (tree, int, tree *, bool);
b14446e2 331static tree ia64_builtin_decl (unsigned, bool);
ab177ad5
AS
332
333static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
ef1d3b57 334static fixed_size_mode ia64_get_reg_raw_mode (int regno);
f16d3f39
JH
335static section * ia64_hpux_function_section (tree, enum node_frequency,
336 bool, bool);
e6431744 337
f151c9e1
RS
338static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
339 const vec_perm_indices &);
e6431744 340
c43f4279 341static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
f939c3e6 342static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
99e1629f 343static bool ia64_modes_tieable_p (machine_mode, machine_mode);
0d803030
RS
344static bool ia64_can_change_mode_class (machine_mode, machine_mode,
345 reg_class_t);
f939c3e6 346
e6431744
RH
347#define MAX_VECT_LEN 8
348
349struct expand_vec_perm_d
350{
351 rtx target, op0, op1;
352 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 353 machine_mode vmode;
e6431744
RH
354 unsigned char nelt;
355 bool one_operand_p;
356 bool testing_p;
357};
358
359static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
360
672a6f42 361\f
e6542f4e
RH
362/* Table of valid machine attributes. */
363static const struct attribute_spec ia64_attribute_table[] =
364{
4849deb1
JJ
365 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
366 affects_type_identity, handler, exclude } */
367 { "syscall_linkage", 0, 0, false, true, true, false, NULL, NULL },
368 { "model", 1, 1, true, false, false, false,
369 ia64_handle_model_attribute, NULL },
30ed9d3d 370#if TARGET_ABI_OPEN_VMS
4849deb1
JJ
371 { "common_object", 1, 1, true, false, false, false,
372 ia64_vms_common_object_attribute, NULL },
30ed9d3d 373#endif
4849deb1
JJ
374 { "version_id", 1, 1, true, false, false, false,
375 ia64_handle_version_id_attribute, NULL },
376 { NULL, 0, 0, false, false, false, false, NULL, NULL }
e6542f4e
RH
377};
378
672a6f42 379/* Initialize the GCC target structure. */
91d231cb
JM
380#undef TARGET_ATTRIBUTE_TABLE
381#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 382
f6155fda
SS
383#undef TARGET_INIT_BUILTINS
384#define TARGET_INIT_BUILTINS ia64_init_builtins
385
b6ca982f
UB
386#undef TARGET_FOLD_BUILTIN
387#define TARGET_FOLD_BUILTIN ia64_fold_builtin
388
f6155fda
SS
389#undef TARGET_EXPAND_BUILTIN
390#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
391
b14446e2
SE
392#undef TARGET_BUILTIN_DECL
393#define TARGET_BUILTIN_DECL ia64_builtin_decl
394
301d03af
RS
395#undef TARGET_ASM_BYTE_OP
396#define TARGET_ASM_BYTE_OP "\tdata1\t"
397#undef TARGET_ASM_ALIGNED_HI_OP
398#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
399#undef TARGET_ASM_ALIGNED_SI_OP
400#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
401#undef TARGET_ASM_ALIGNED_DI_OP
402#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
403#undef TARGET_ASM_UNALIGNED_HI_OP
404#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
405#undef TARGET_ASM_UNALIGNED_SI_OP
406#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
407#undef TARGET_ASM_UNALIGNED_DI_OP
408#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
409#undef TARGET_ASM_INTEGER
410#define TARGET_ASM_INTEGER ia64_assemble_integer
411
930572b9
AS
412#undef TARGET_OPTION_OVERRIDE
413#define TARGET_OPTION_OVERRIDE ia64_option_override
414
08c148a8
NB
415#undef TARGET_ASM_FUNCTION_PROLOGUE
416#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
417#undef TARGET_ASM_FUNCTION_END_PROLOGUE
418#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
419#undef TARGET_ASM_FUNCTION_EPILOGUE
420#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
421
5e50b799
AS
422#undef TARGET_PRINT_OPERAND
423#define TARGET_PRINT_OPERAND ia64_print_operand
424#undef TARGET_PRINT_OPERAND_ADDRESS
425#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
426#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
427#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
428
ae46c4e0
RH
429#undef TARGET_IN_SMALL_DATA_P
430#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
431
b505225b
TS
432#undef TARGET_SCHED_ADJUST_COST
433#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
c237e94a
ZW
434#undef TARGET_SCHED_ISSUE_RATE
435#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
436#undef TARGET_SCHED_VARIABLE_ISSUE
437#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
438#undef TARGET_SCHED_INIT
439#define TARGET_SCHED_INIT ia64_sched_init
440#undef TARGET_SCHED_FINISH
441#define TARGET_SCHED_FINISH ia64_sched_finish
048d0d36
MK
442#undef TARGET_SCHED_INIT_GLOBAL
443#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
444#undef TARGET_SCHED_FINISH_GLOBAL
445#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
c237e94a
ZW
446#undef TARGET_SCHED_REORDER
447#define TARGET_SCHED_REORDER ia64_sched_reorder
448#undef TARGET_SCHED_REORDER2
449#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 450
30028c85
VM
451#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
452#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
453
30028c85
VM
454#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
455#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
456
457#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
458#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
459#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
460#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
461
462#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
463#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
464 ia64_first_cycle_multipass_dfa_lookahead_guard
465
466#undef TARGET_SCHED_DFA_NEW_CYCLE
467#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
468
048d0d36
MK
469#undef TARGET_SCHED_H_I_D_EXTENDED
470#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
471
388092d5
AB
472#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
473#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
474
475#undef TARGET_SCHED_INIT_SCHED_CONTEXT
476#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
477
478#undef TARGET_SCHED_SET_SCHED_CONTEXT
479#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
480
481#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
482#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
483
484#undef TARGET_SCHED_FREE_SCHED_CONTEXT
485#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
486
048d0d36
MK
487#undef TARGET_SCHED_SET_SCHED_FLAGS
488#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
489
388092d5
AB
490#undef TARGET_SCHED_GET_INSN_SPEC_DS
491#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
492
493#undef TARGET_SCHED_GET_INSN_CHECKED_DS
494#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
495
048d0d36
MK
496#undef TARGET_SCHED_SPECULATE_INSN
497#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
498
499#undef TARGET_SCHED_NEEDS_BLOCK_P
500#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
501
e855c69d 502#undef TARGET_SCHED_GEN_SPEC_CHECK
388092d5 503#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
048d0d36 504
388092d5
AB
505#undef TARGET_SCHED_SKIP_RTX_P
506#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
507
599aedd9
RH
508#undef TARGET_FUNCTION_OK_FOR_SIBCALL
509#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
78a52f11
RH
510#undef TARGET_ARG_PARTIAL_BYTES
511#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
ffa88471
SE
512#undef TARGET_FUNCTION_ARG
513#define TARGET_FUNCTION_ARG ia64_function_arg
514#undef TARGET_FUNCTION_INCOMING_ARG
515#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
516#undef TARGET_FUNCTION_ARG_ADVANCE
517#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
76b0cbf8
RS
518#undef TARGET_FUNCTION_ARG_PADDING
519#define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
c2ed6cf8
NF
520#undef TARGET_FUNCTION_ARG_BOUNDARY
521#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
599aedd9 522
c590b625
RH
523#undef TARGET_ASM_OUTPUT_MI_THUNK
524#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 525#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
3101faab 526#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
c590b625 527
1bc7c5b6
ZW
528#undef TARGET_ASM_FILE_START
529#define TARGET_ASM_FILE_START ia64_file_start
530
812b587e
SE
531#undef TARGET_ASM_GLOBALIZE_DECL_NAME
532#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
533
de8f4b07
AS
534#undef TARGET_REGISTER_MOVE_COST
535#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
69e18c09
AS
536#undef TARGET_MEMORY_MOVE_COST
537#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
3c50106f
RH
538#undef TARGET_RTX_COSTS
539#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67 540#undef TARGET_ADDRESS_COST
b413068c 541#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3c50106f 542
215b063c
PB
543#undef TARGET_UNSPEC_MAY_TRAP_P
544#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
545
18dbd950
RS
546#undef TARGET_MACHINE_DEPENDENT_REORG
547#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
548
a32767e4
DM
549#undef TARGET_ENCODE_SECTION_INFO
550#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
551
abb8b19a
AM
552#undef TARGET_SECTION_TYPE_FLAGS
553#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
554
fdbe66f2
EB
555#ifdef HAVE_AS_TLS
556#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
557#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
558#endif
559
351a758b
KH
560/* ??? Investigate. */
561#if 0
562#undef TARGET_PROMOTE_PROTOTYPES
563#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
564#endif
565
ba90d838
AS
566#undef TARGET_FUNCTION_VALUE
567#define TARGET_FUNCTION_VALUE ia64_function_value
568#undef TARGET_LIBCALL_VALUE
569#define TARGET_LIBCALL_VALUE ia64_libcall_value
570#undef TARGET_FUNCTION_VALUE_REGNO_P
571#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
572
351a758b
KH
573#undef TARGET_STRUCT_VALUE_RTX
574#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
575#undef TARGET_RETURN_IN_MEMORY
576#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
351a758b
KH
577#undef TARGET_SETUP_INCOMING_VARARGS
578#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
579#undef TARGET_STRICT_ARGUMENT_NAMING
580#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
fe984136
RH
581#undef TARGET_MUST_PASS_IN_STACK
582#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
ffa88471
SE
583#undef TARGET_GET_RAW_RESULT_MODE
584#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
585#undef TARGET_GET_RAW_ARG_MODE
586#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
351a758b 587
d9886a9e
L
588#undef TARGET_MEMBER_TYPE_FORCES_BLK
589#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
590
cd3ce9b4
JM
591#undef TARGET_GIMPLIFY_VA_ARG_EXPR
592#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
593
38f8b050 594#undef TARGET_ASM_UNWIND_EMIT
a68b5e52
RH
595#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
596#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
597#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
598#undef TARGET_ASM_INIT_SECTIONS
599#define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
951120ea 600
f0a0390e
RH
601#undef TARGET_DEBUG_UNWIND_INFO
602#define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
f0a0390e 603
88ed5ef5
SE
604#undef TARGET_SCALAR_MODE_SUPPORTED_P
605#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
f61134e8
RH
606#undef TARGET_VECTOR_MODE_SUPPORTED_P
607#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
88ed5ef5 608
1a627b35
RS
609#undef TARGET_LEGITIMATE_CONSTANT_P
610#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
903a9601
AS
611#undef TARGET_LEGITIMATE_ADDRESS_P
612#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
1a627b35 613
d81db636
SB
614#undef TARGET_LRA_P
615#define TARGET_LRA_P hook_bool_void_false
616
5e6c8b64
RH
617#undef TARGET_CANNOT_FORCE_CONST_MEM
618#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
619
608063c3
JB
620#undef TARGET_MANGLE_TYPE
621#define TARGET_MANGLE_TYPE ia64_mangle_type
cac24f06 622
4de67c26
JM
623#undef TARGET_INVALID_CONVERSION
624#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
625#undef TARGET_INVALID_UNARY_OP
626#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
627#undef TARGET_INVALID_BINARY_OP
628#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
629
a31fa2e0
SE
630#undef TARGET_C_MODE_FOR_SUFFIX
631#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
632
7b5cbb57
AS
633#undef TARGET_CAN_ELIMINATE
634#define TARGET_CAN_ELIMINATE ia64_can_eliminate
635
2a1211e5
RH
636#undef TARGET_TRAMPOLINE_INIT
637#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
638
1d0216c8
RS
639#undef TARGET_CAN_USE_DOLOOP_P
640#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
810d71d0 641#undef TARGET_INVALID_WITHIN_DOLOOP
ac44248e 642#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
810d71d0 643
2b7e2984
SE
644#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
645#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
646
ab177ad5
AS
647#undef TARGET_PREFERRED_RELOAD_CLASS
648#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
649
2ba42841
AO
650#undef TARGET_DELAY_SCHED2
651#define TARGET_DELAY_SCHED2 true
652
653/* Variable tracking should be run after all optimizations which
654 change order of insns. It also needs a valid CFG. */
655#undef TARGET_DELAY_VARTRACK
656#define TARGET_DELAY_VARTRACK true
657
f151c9e1
RS
658#undef TARGET_VECTORIZE_VEC_PERM_CONST
659#define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const
e6431744 660
d8def3cf
JJ
661#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
662#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
663
86f98087
EB
664#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
665#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
666
c43f4279
RS
667#undef TARGET_HARD_REGNO_NREGS
668#define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
f939c3e6
RS
669#undef TARGET_HARD_REGNO_MODE_OK
670#define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
671
99e1629f
RS
672#undef TARGET_MODES_TIEABLE_P
673#define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
674
0d803030
RS
675#undef TARGET_CAN_CHANGE_MODE_CLASS
676#define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
677
58e17cf8
RS
678#undef TARGET_CONSTANT_ALIGNMENT
679#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
680
f6897b10 681struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 682\f
d8def3cf
JJ
683/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
684 identifier as an argument, so the front end shouldn't look it up. */
685
686static bool
687ia64_attribute_takes_identifier_p (const_tree attr_id)
688{
689 if (is_attribute_p ("model", attr_id))
690 return true;
691#if TARGET_ABI_OPEN_VMS
692 if (is_attribute_p ("common_object", attr_id))
693 return true;
694#endif
695 return false;
696}
697
a32767e4
DM
698typedef enum
699 {
700 ADDR_AREA_NORMAL, /* normal address area */
701 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
702 }
703ia64_addr_area;
704
705static GTY(()) tree small_ident1;
706static GTY(()) tree small_ident2;
707
708static void
709init_idents (void)
710{
711 if (small_ident1 == 0)
712 {
713 small_ident1 = get_identifier ("small");
714 small_ident2 = get_identifier ("__small__");
715 }
716}
717
718/* Retrieve the address area that has been chosen for the given decl. */
719
720static ia64_addr_area
721ia64_get_addr_area (tree decl)
722{
723 tree model_attr;
724
725 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
726 if (model_attr)
727 {
728 tree id;
729
730 init_idents ();
731 id = TREE_VALUE (TREE_VALUE (model_attr));
732 if (id == small_ident1 || id == small_ident2)
733 return ADDR_AREA_SMALL;
734 }
735 return ADDR_AREA_NORMAL;
736}
737
738static tree
f61134e8
RH
739ia64_handle_model_attribute (tree *node, tree name, tree args,
740 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
a32767e4
DM
741{
742 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
743 ia64_addr_area area;
744 tree arg, decl = *node;
745
746 init_idents ();
747 arg = TREE_VALUE (args);
748 if (arg == small_ident1 || arg == small_ident2)
749 {
750 addr_area = ADDR_AREA_SMALL;
751 }
752 else
753 {
29d08eba
JM
754 warning (OPT_Wattributes, "invalid argument of %qE attribute",
755 name);
a32767e4
DM
756 *no_add_attrs = true;
757 }
758
759 switch (TREE_CODE (decl))
760 {
761 case VAR_DECL:
762 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
763 == FUNCTION_DECL)
764 && !TREE_STATIC (decl))
765 {
c5d75364
MLI
766 error_at (DECL_SOURCE_LOCATION (decl),
767 "an address area attribute cannot be specified for "
768 "local variables");
a32767e4
DM
769 *no_add_attrs = true;
770 }
771 area = ia64_get_addr_area (decl);
772 if (area != ADDR_AREA_NORMAL && addr_area != area)
773 {
dee15844
JM
774 error ("address area of %q+D conflicts with previous "
775 "declaration", decl);
a32767e4
DM
776 *no_add_attrs = true;
777 }
778 break;
779
780 case FUNCTION_DECL:
c5d75364 781 error_at (DECL_SOURCE_LOCATION (decl),
d575725b
L
782 "address area attribute cannot be specified for "
783 "functions");
a32767e4
DM
784 *no_add_attrs = true;
785 break;
786
787 default:
29d08eba
JM
788 warning (OPT_Wattributes, "%qE attribute ignored",
789 name);
a32767e4
DM
790 *no_add_attrs = true;
791 break;
792 }
793
794 return NULL_TREE;
795}
796
30ed9d3d
TG
797/* Part of the low level implementation of DEC Ada pragma Common_Object which
798 enables the shared use of variables stored in overlaid linker areas
799 corresponding to the use of Fortran COMMON. */
800
801static tree
802ia64_vms_common_object_attribute (tree *node, tree name, tree args,
803 int flags ATTRIBUTE_UNUSED,
804 bool *no_add_attrs)
805{
806 tree decl = *node;
fe5798c0
TG
807 tree id;
808
809 gcc_assert (DECL_P (decl));
30ed9d3d
TG
810
811 DECL_COMMON (decl) = 1;
812 id = TREE_VALUE (args);
fe5798c0 813 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
30ed9d3d 814 {
fe5798c0 815 error ("%qE attribute requires a string constant argument", name);
30ed9d3d
TG
816 *no_add_attrs = true;
817 return NULL_TREE;
818 }
30ed9d3d
TG
819 return NULL_TREE;
820}
821
822/* Part of the low level implementation of DEC Ada pragma Common_Object. */
823
824void
825ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
826 unsigned HOST_WIDE_INT size,
827 unsigned int align)
828{
829 tree attr = DECL_ATTRIBUTES (decl);
830
fe5798c0 831 if (attr)
30ed9d3d 832 attr = lookup_attribute ("common_object", attr);
fe5798c0 833 if (attr)
30ed9d3d 834 {
fe5798c0
TG
835 tree id = TREE_VALUE (TREE_VALUE (attr));
836 const char *name;
30ed9d3d 837
fe5798c0
TG
838 if (TREE_CODE (id) == IDENTIFIER_NODE)
839 name = IDENTIFIER_POINTER (id);
840 else if (TREE_CODE (id) == STRING_CST)
841 name = TREE_STRING_POINTER (id);
842 else
843 abort ();
30ed9d3d 844
fe5798c0 845 fprintf (file, "\t.vms_common\t\"%s\",", name);
30ed9d3d 846 }
fe5798c0
TG
847 else
848 fprintf (file, "%s", COMMON_ASM_OP);
30ed9d3d 849
fe5798c0
TG
850 /* Code from elfos.h. */
851 assemble_name (file, name);
16998094 852 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
fe5798c0 853 size, align / BITS_PER_UNIT);
30ed9d3d 854
fe5798c0 855 fputc ('\n', file);
30ed9d3d
TG
856}
857
a32767e4
DM
858static void
859ia64_encode_addr_area (tree decl, rtx symbol)
860{
861 int flags;
862
863 flags = SYMBOL_REF_FLAGS (symbol);
864 switch (ia64_get_addr_area (decl))
865 {
866 case ADDR_AREA_NORMAL: break;
867 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
e820471b 868 default: gcc_unreachable ();
a32767e4
DM
869 }
870 SYMBOL_REF_FLAGS (symbol) = flags;
871}
872
873static void
874ia64_encode_section_info (tree decl, rtx rtl, int first)
875{
876 default_encode_section_info (decl, rtl, first);
877
2897f1d4 878 /* Careful not to prod global register variables. */
a32767e4 879 if (TREE_CODE (decl) == VAR_DECL
2897f1d4
L
880 && GET_CODE (DECL_RTL (decl)) == MEM
881 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
a32767e4
DM
882 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
883 ia64_encode_addr_area (decl, XEXP (rtl, 0));
884}
885\f
557b9df5
RH
886/* Return 1 if the operands of a move are ok. */
887
888int
9c808aad 889ia64_move_ok (rtx dst, rtx src)
557b9df5
RH
890{
891 /* If we're under init_recog_no_volatile, we'll not be able to use
892 memory_operand. So check the code directly and don't worry about
893 the validity of the underlying address, which should have been
894 checked elsewhere anyway. */
895 if (GET_CODE (dst) != MEM)
896 return 1;
897 if (GET_CODE (src) == MEM)
898 return 0;
899 if (register_operand (src, VOIDmode))
900 return 1;
901
902 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
903 if (INTEGRAL_MODE_P (GET_MODE (dst)))
904 return src == const0_rtx;
905 else
13f70342 906 return satisfies_constraint_G (src);
557b9df5 907}
9b7bf67d 908
a71aef0b
JB
909/* Return 1 if the operands are ok for a floating point load pair. */
910
911int
912ia64_load_pair_ok (rtx dst, rtx src)
913{
22be5918
EB
914 /* ??? There is a thinko in the implementation of the "x" constraint and the
915 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
916 also return false for it. */
917 if (GET_CODE (dst) != REG
918 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
a71aef0b
JB
919 return 0;
920 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
921 return 0;
922 switch (GET_CODE (XEXP (src, 0)))
923 {
924 case REG:
925 case POST_INC:
926 break;
927 case POST_DEC:
928 return 0;
929 case POST_MODIFY:
930 {
931 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
932
933 if (GET_CODE (adjust) != CONST_INT
934 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
935 return 0;
936 }
937 break;
938 default:
939 abort ();
940 }
941 return 1;
942}
943
08744705 944int
9c808aad 945addp4_optimize_ok (rtx op1, rtx op2)
08744705 946{
08744705
SE
947 return (basereg_operand (op1, GET_MODE(op1)) !=
948 basereg_operand (op2, GET_MODE(op2)));
949}
950
9e4f94de 951/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
952 Return the length of the field, or <= 0 on failure. */
953
954int
9c808aad 955ia64_depz_field_mask (rtx rop, rtx rshift)
041f25e6
RH
956{
957 unsigned HOST_WIDE_INT op = INTVAL (rop);
958 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
959
960 /* Get rid of the zero bits we're shifting in. */
961 op >>= shift;
962
963 /* We must now have a solid block of 1's at bit 0. */
964 return exact_log2 (op + 1);
965}
966
5e6c8b64
RH
967/* Return the TLS model to use for ADDR. */
968
969static enum tls_model
970tls_symbolic_operand_type (rtx addr)
971{
81f40b79 972 enum tls_model tls_kind = TLS_MODEL_NONE;
5e6c8b64
RH
973
974 if (GET_CODE (addr) == CONST)
975 {
976 if (GET_CODE (XEXP (addr, 0)) == PLUS
977 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
978 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
979 }
980 else if (GET_CODE (addr) == SYMBOL_REF)
981 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
982
983 return tls_kind;
984}
985
903a9601
AS
986/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
987 as a base register. */
988
989static inline bool
990ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
991{
992 if (strict
993 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
994 return true;
995 else if (!strict
996 && (GENERAL_REGNO_P (REGNO (reg))
997 || !HARD_REGISTER_P (reg)))
998 return true;
999 else
1000 return false;
1001}
1002
1003static bool
1004ia64_legitimate_address_reg (const_rtx reg, bool strict)
1005{
1006 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1007 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1008 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1009 return true;
1010
1011 return false;
1012}
1013
1014static bool
1015ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1016{
1017 if (GET_CODE (disp) == PLUS
1018 && rtx_equal_p (reg, XEXP (disp, 0))
1019 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1020 || (CONST_INT_P (XEXP (disp, 1))
1021 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1022 return true;
1023
1024 return false;
1025}
1026
1027/* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1028
1029static bool
ef4bddc2 1030ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
903a9601
AS
1031 rtx x, bool strict)
1032{
1033 if (ia64_legitimate_address_reg (x, strict))
1034 return true;
1035 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1036 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1037 && XEXP (x, 0) != arg_pointer_rtx)
1038 return true;
1039 else if (GET_CODE (x) == POST_MODIFY
1040 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1041 && XEXP (x, 0) != arg_pointer_rtx
1042 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1043 return true;
1044 else
1045 return false;
1046}
1047
5e6c8b64
RH
1048/* Return true if X is a constant that is valid for some immediate
1049 field in an instruction. */
1050
1a627b35 1051static bool
ef4bddc2 1052ia64_legitimate_constant_p (machine_mode mode, rtx x)
5e6c8b64
RH
1053{
1054 switch (GET_CODE (x))
1055 {
1056 case CONST_INT:
1057 case LABEL_REF:
1058 return true;
1059
1060 case CONST_DOUBLE:
1a627b35 1061 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
5e6c8b64 1062 return true;
13f70342 1063 return satisfies_constraint_G (x);
5e6c8b64
RH
1064
1065 case CONST:
1066 case SYMBOL_REF:
d0970db2
JW
1067 /* ??? Short term workaround for PR 28490. We must make the code here
1068 match the code in ia64_expand_move and move_operand, even though they
1069 are both technically wrong. */
1070 if (tls_symbolic_operand_type (x) == 0)
1071 {
1072 HOST_WIDE_INT addend = 0;
1073 rtx op = x;
1074
1075 if (GET_CODE (op) == CONST
1076 && GET_CODE (XEXP (op, 0)) == PLUS
1077 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1078 {
1079 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1080 op = XEXP (XEXP (op, 0), 0);
1081 }
1082
1a627b35
RS
1083 if (any_offset_symbol_operand (op, mode)
1084 || function_operand (op, mode))
7ab62966 1085 return true;
1a627b35 1086 if (aligned_offset_symbol_operand (op, mode))
d0970db2
JW
1087 return (addend & 0x3fff) == 0;
1088 return false;
1089 }
1090 return false;
5e6c8b64 1091
b4e3537b 1092 case CONST_VECTOR:
1a627b35
RS
1093 if (mode == V2SFmode)
1094 return satisfies_constraint_Y (x);
b4e3537b 1095
1a627b35
RS
1096 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1097 && GET_MODE_SIZE (mode) <= 8);
b4e3537b 1098
5e6c8b64
RH
1099 default:
1100 return false;
1101 }
1102}
1103
1104/* Don't allow TLS addresses to get spilled to memory. */
1105
1106static bool
ef4bddc2 1107ia64_cannot_force_const_mem (machine_mode mode, rtx x)
5e6c8b64 1108{
fbbf66e7 1109 if (mode == RFmode)
103a6411 1110 return true;
5e6c8b64
RH
1111 return tls_symbolic_operand_type (x) != 0;
1112}
1113
9b7bf67d 1114/* Expand a symbolic constant load. */
9b7bf67d 1115
5e6c8b64 1116bool
9c808aad 1117ia64_expand_load_address (rtx dest, rtx src)
9b7bf67d 1118{
e820471b 1119 gcc_assert (GET_CODE (dest) == REG);
7b6e506e 1120
ae49d6e5
RH
1121 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1122 having to pointer-extend the value afterward. Other forms of address
1123 computation below are also more natural to compute as 64-bit quantities.
1124 If we've been given an SImode destination register, change it. */
1125 if (GET_MODE (dest) != Pmode)
38ae7651
RS
1126 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1127 byte_lowpart_offset (Pmode, GET_MODE (dest)));
ae49d6e5 1128
5e6c8b64
RH
1129 if (TARGET_NO_PIC)
1130 return false;
1131 if (small_addr_symbolic_operand (src, VOIDmode))
1132 return false;
1133
1134 if (TARGET_AUTO_PIC)
1135 emit_insn (gen_load_gprel64 (dest, src));
1cdbd630 1136 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
5e6c8b64 1137 emit_insn (gen_load_fptr (dest, src));
21515593 1138 else if (sdata_symbolic_operand (src, VOIDmode))
5e6c8b64 1139 emit_insn (gen_load_gprel (dest, src));
face88a1
ST
1140 else if (local_symbolic_operand64 (src, VOIDmode))
1141 {
1142 /* We want to use @gprel rather than @ltoff relocations for local
1143 symbols:
1144 - @gprel does not require dynamic linker
1145 - and does not use .sdata section
1146 https://gcc.gnu.org/bugzilla/60465 */
1147 emit_insn (gen_load_gprel64 (dest, src));
1148 }
5e6c8b64 1149 else
21515593 1150 {
5e6c8b64
RH
1151 HOST_WIDE_INT addend = 0;
1152 rtx tmp;
21515593 1153
5e6c8b64
RH
1154 /* We did split constant offsets in ia64_expand_move, and we did try
1155 to keep them split in move_operand, but we also allowed reload to
1156 rematerialize arbitrary constants rather than spill the value to
1157 the stack and reload it. So we have to be prepared here to split
1158 them apart again. */
1159 if (GET_CODE (src) == CONST)
1160 {
1161 HOST_WIDE_INT hi, lo;
9b7bf67d 1162
5e6c8b64
RH
1163 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1164 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1165 hi = hi - lo;
9b7bf67d 1166
5e6c8b64
RH
1167 if (lo != 0)
1168 {
1169 addend = lo;
0a81f074 1170 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
5e6c8b64
RH
1171 }
1172 }
ae49d6e5
RH
1173
1174 tmp = gen_rtx_HIGH (Pmode, src);
1175 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
f7df4a84 1176 emit_insn (gen_rtx_SET (dest, tmp));
ae49d6e5 1177
1f88caaa 1178 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
f7df4a84 1179 emit_insn (gen_rtx_SET (dest, tmp));
5e6c8b64
RH
1180
1181 if (addend)
1182 {
1183 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
f7df4a84 1184 emit_insn (gen_rtx_SET (dest, tmp));
5e6c8b64 1185 }
ae49d6e5 1186 }
5e6c8b64
RH
1187
1188 return true;
9b7bf67d 1189}
97e242b0 1190
e2500fed 1191static GTY(()) rtx gen_tls_tga;
7b6e506e 1192static rtx
9c808aad 1193gen_tls_get_addr (void)
7b6e506e 1194{
e2500fed 1195 if (!gen_tls_tga)
21515593 1196 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 1197 return gen_tls_tga;
7b6e506e
RH
1198}
1199
e2500fed 1200static GTY(()) rtx thread_pointer_rtx;
7b6e506e 1201static rtx
9c808aad 1202gen_thread_pointer (void)
7b6e506e 1203{
e2500fed 1204 if (!thread_pointer_rtx)
389fdba0 1205 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
135ca7b2 1206 return thread_pointer_rtx;
7b6e506e
RH
1207}
1208
21515593 1209static rtx
5e6c8b64 1210ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
b15b83fb 1211 rtx orig_op1, HOST_WIDE_INT addend)
21515593 1212{
dd3d2b35
DM
1213 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1214 rtx_insn *insns;
b15b83fb 1215 rtx orig_op0 = op0;
5e6c8b64
RH
1216 HOST_WIDE_INT addend_lo, addend_hi;
1217
21515593
RH
1218 switch (tls_kind)
1219 {
1220 case TLS_MODEL_GLOBAL_DYNAMIC:
1221 start_sequence ();
1222
1223 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1224 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1225
1226 tga_op2 = gen_reg_rtx (Pmode);
5e6c8b64 1227 emit_insn (gen_load_dtprel (tga_op2, op1));
9c808aad 1228
21515593 1229 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
db69559b
RS
1230 LCT_CONST, Pmode,
1231 tga_op1, Pmode, tga_op2, Pmode);
21515593
RH
1232
1233 insns = get_insns ();
1234 end_sequence ();
1235
0d433a6a
RH
1236 if (GET_MODE (op0) != Pmode)
1237 op0 = tga_ret;
21515593 1238 emit_libcall_block (insns, op0, tga_ret, op1);
0d433a6a 1239 break;
21515593
RH
1240
1241 case TLS_MODEL_LOCAL_DYNAMIC:
1242 /* ??? This isn't the completely proper way to do local-dynamic
1243 If the call to __tls_get_addr is used only by a single symbol,
1244 then we should (somehow) move the dtprel to the second arg
1245 to avoid the extra add. */
1246 start_sequence ();
1247
1248 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1249 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1250
1251 tga_op2 = const0_rtx;
1252
1253 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
db69559b
RS
1254 LCT_CONST, Pmode,
1255 tga_op1, Pmode, tga_op2, Pmode);
21515593
RH
1256
1257 insns = get_insns ();
1258 end_sequence ();
1259
1260 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1261 UNSPEC_LD_BASE);
1262 tmp = gen_reg_rtx (Pmode);
1263 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1264
0d433a6a
RH
1265 if (!register_operand (op0, Pmode))
1266 op0 = gen_reg_rtx (Pmode);
21515593
RH
1267 if (TARGET_TLS64)
1268 {
0d433a6a
RH
1269 emit_insn (gen_load_dtprel (op0, op1));
1270 emit_insn (gen_adddi3 (op0, tmp, op0));
21515593
RH
1271 }
1272 else
5e6c8b64 1273 emit_insn (gen_add_dtprel (op0, op1, tmp));
0d433a6a 1274 break;
21515593
RH
1275
1276 case TLS_MODEL_INITIAL_EXEC:
b15b83fb
JJ
1277 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1278 addend_hi = addend - addend_lo;
1279
0a81f074 1280 op1 = plus_constant (Pmode, op1, addend_hi);
5e6c8b64
RH
1281 addend = addend_lo;
1282
21515593 1283 tmp = gen_reg_rtx (Pmode);
5e6c8b64 1284 emit_insn (gen_load_tprel (tmp, op1));
21515593 1285
0d433a6a
RH
1286 if (!register_operand (op0, Pmode))
1287 op0 = gen_reg_rtx (Pmode);
1288 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1289 break;
21515593
RH
1290
1291 case TLS_MODEL_LOCAL_EXEC:
0d433a6a
RH
1292 if (!register_operand (op0, Pmode))
1293 op0 = gen_reg_rtx (Pmode);
5e6c8b64
RH
1294
1295 op1 = orig_op1;
1296 addend = 0;
21515593
RH
1297 if (TARGET_TLS64)
1298 {
0d433a6a 1299 emit_insn (gen_load_tprel (op0, op1));
5e6c8b64 1300 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
21515593
RH
1301 }
1302 else
5e6c8b64 1303 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
0d433a6a 1304 break;
21515593
RH
1305
1306 default:
e820471b 1307 gcc_unreachable ();
21515593 1308 }
0d433a6a 1309
5e6c8b64
RH
1310 if (addend)
1311 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1312 orig_op0, 1, OPTAB_DIRECT);
0d433a6a
RH
1313 if (orig_op0 == op0)
1314 return NULL_RTX;
1315 if (GET_MODE (orig_op0) == Pmode)
1316 return op0;
1317 return gen_lowpart (GET_MODE (orig_op0), op0);
21515593
RH
1318}
1319
7b6e506e 1320rtx
9c808aad 1321ia64_expand_move (rtx op0, rtx op1)
7b6e506e 1322{
ef4bddc2 1323 machine_mode mode = GET_MODE (op0);
7b6e506e
RH
1324
1325 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1326 op1 = force_reg (mode, op1);
1327
21515593 1328 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e 1329 {
5e6c8b64 1330 HOST_WIDE_INT addend = 0;
7b6e506e 1331 enum tls_model tls_kind;
5e6c8b64
RH
1332 rtx sym = op1;
1333
1334 if (GET_CODE (op1) == CONST
1335 && GET_CODE (XEXP (op1, 0)) == PLUS
1336 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1337 {
1338 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1339 sym = XEXP (XEXP (op1, 0), 0);
1340 }
1341
1342 tls_kind = tls_symbolic_operand_type (sym);
1343 if (tls_kind)
b15b83fb 1344 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
5e6c8b64
RH
1345
1346 if (any_offset_symbol_operand (sym, mode))
1347 addend = 0;
1348 else if (aligned_offset_symbol_operand (sym, mode))
1349 {
1350 HOST_WIDE_INT addend_lo, addend_hi;
1351
1352 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1353 addend_hi = addend - addend_lo;
1354
1355 if (addend_lo != 0)
1356 {
0a81f074 1357 op1 = plus_constant (mode, sym, addend_hi);
5e6c8b64
RH
1358 addend = addend_lo;
1359 }
21e43850
L
1360 else
1361 addend = 0;
5e6c8b64
RH
1362 }
1363 else
1364 op1 = sym;
1365
1366 if (reload_completed)
1367 {
1368 /* We really should have taken care of this offset earlier. */
1369 gcc_assert (addend == 0);
1370 if (ia64_expand_load_address (op0, op1))
1371 return NULL_RTX;
1372 }
21515593 1373
5e6c8b64 1374 if (addend)
7b6e506e 1375 {
b3a13419 1376 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
5e6c8b64 1377
f7df4a84 1378 emit_insn (gen_rtx_SET (subtarget, op1));
5e6c8b64
RH
1379
1380 op1 = expand_simple_binop (mode, PLUS, subtarget,
1381 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1382 if (op0 == op1)
1383 return NULL_RTX;
7b6e506e
RH
1384 }
1385 }
1386
1387 return op1;
1388}
1389
21515593
RH
1390/* Split a move from OP1 to OP0 conditional on COND. */
1391
1392void
9c808aad 1393ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
21515593 1394{
dd3d2b35 1395 rtx_insn *insn, *first = get_last_insn ();
21515593
RH
1396
1397 emit_move_insn (op0, op1);
1398
1399 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1400 if (INSN_P (insn))
1401 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1402 PATTERN (insn));
1403}
1404
f57fc998 1405/* Split a post-reload TImode or TFmode reference into two DImode
2ffe0e02
ZW
1406 components. This is made extra difficult by the fact that we do
1407 not get any scratch registers to work with, because reload cannot
1408 be prevented from giving us a scratch that overlaps the register
1409 pair involved. So instead, when addressing memory, we tweak the
1410 pointer register up and back down with POST_INCs. Or up and not
1411 back down when we can get away with it.
1412
1413 REVERSED is true when the loads must be done in reversed order
1414 (high word first) for correctness. DEAD is true when the pointer
1415 dies with the second insn we generate and therefore the second
1416 address must not carry a postmodify.
1417
1418 May return an insn which is to be emitted after the moves. */
3f622353 1419
f57fc998 1420static rtx
2ffe0e02 1421ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
3f622353 1422{
2ffe0e02
ZW
1423 rtx fixup = 0;
1424
3f622353
RH
1425 switch (GET_CODE (in))
1426 {
1427 case REG:
2ffe0e02
ZW
1428 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1429 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1430 break;
3f622353
RH
1431
1432 case CONST_INT:
1433 case CONST_DOUBLE:
2ffe0e02 1434 /* Cannot occur reversed. */
e820471b 1435 gcc_assert (!reversed);
2ffe0e02 1436
f57fc998
ZW
1437 if (GET_MODE (in) != TFmode)
1438 split_double (in, &out[0], &out[1]);
1439 else
1440 /* split_double does not understand how to split a TFmode
1441 quantity into a pair of DImode constants. */
1442 {
f57fc998
ZW
1443 unsigned HOST_WIDE_INT p[2];
1444 long l[4]; /* TFmode is 128 bits */
1445
34a72c33 1446 real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
f57fc998
ZW
1447
1448 if (FLOAT_WORDS_BIG_ENDIAN)
1449 {
1450 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1451 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1452 }
1453 else
1454 {
9eb578c8
L
1455 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1456 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
f57fc998
ZW
1457 }
1458 out[0] = GEN_INT (p[0]);
1459 out[1] = GEN_INT (p[1]);
1460 }
2ffe0e02
ZW
1461 break;
1462
1463 case MEM:
1464 {
1465 rtx base = XEXP (in, 0);
1466 rtx offset;
1467
1468 switch (GET_CODE (base))
1469 {
1470 case REG:
1471 if (!reversed)
1472 {
1473 out[0] = adjust_automodify_address
1474 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1475 out[1] = adjust_automodify_address
1476 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1477 }
1478 else
1479 {
1480 /* Reversal requires a pre-increment, which can only
1481 be done as a separate insn. */
1482 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1483 out[0] = adjust_automodify_address
1484 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1485 out[1] = adjust_address (in, DImode, 0);
1486 }
1487 break;
1488
1489 case POST_INC:
e820471b
NS
1490 gcc_assert (!reversed && !dead);
1491
2ffe0e02
ZW
1492 /* Just do the increment in two steps. */
1493 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1494 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1495 break;
1496
1497 case POST_DEC:
e820471b
NS
1498 gcc_assert (!reversed && !dead);
1499
2ffe0e02
ZW
1500 /* Add 8, subtract 24. */
1501 base = XEXP (base, 0);
1502 out[0] = adjust_automodify_address
1503 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1504 out[1] = adjust_automodify_address
1505 (in, DImode,
0a81f074
RS
1506 gen_rtx_POST_MODIFY (Pmode, base,
1507 plus_constant (Pmode, base, -24)),
2ffe0e02
ZW
1508 8);
1509 break;
1510
1511 case POST_MODIFY:
e820471b
NS
1512 gcc_assert (!reversed && !dead);
1513
2ffe0e02
ZW
1514 /* Extract and adjust the modification. This case is
1515 trickier than the others, because we might have an
1516 index register, or we might have a combined offset that
1517 doesn't fit a signed 9-bit displacement field. We can
1518 assume the incoming expression is already legitimate. */
1519 offset = XEXP (base, 1);
1520 base = XEXP (base, 0);
1521
1522 out[0] = adjust_automodify_address
1523 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1524
1525 if (GET_CODE (XEXP (offset, 1)) == REG)
1526 {
1527 /* Can't adjust the postmodify to match. Emit the
1528 original, then a separate addition insn. */
1529 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1530 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1531 }
2ffe0e02
ZW
1532 else
1533 {
e820471b
NS
1534 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1535 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1536 {
1537 /* Again the postmodify cannot be made to match,
1538 but in this case it's more efficient to get rid
1539 of the postmodify entirely and fix up with an
1540 add insn. */
1541 out[1] = adjust_automodify_address (in, DImode, base, 8);
1542 fixup = gen_adddi3
1543 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1544 }
1545 else
1546 {
1547 /* Combined offset still fits in the displacement field.
1548 (We cannot overflow it at the high end.) */
1549 out[1] = adjust_automodify_address
1550 (in, DImode, gen_rtx_POST_MODIFY
1551 (Pmode, base, gen_rtx_PLUS
1552 (Pmode, base,
1553 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1554 8);
1555 }
2ffe0e02
ZW
1556 }
1557 break;
1558
1559 default:
e820471b 1560 gcc_unreachable ();
2ffe0e02
ZW
1561 }
1562 break;
1563 }
3f622353
RH
1564
1565 default:
e820471b 1566 gcc_unreachable ();
3f622353 1567 }
2ffe0e02
ZW
1568
1569 return fixup;
3f622353
RH
1570}
1571
f57fc998
ZW
1572/* Split a TImode or TFmode move instruction after reload.
1573 This is used by *movtf_internal and *movti_internal. */
1574void
1575ia64_split_tmode_move (rtx operands[])
1576{
2ffe0e02
ZW
1577 rtx in[2], out[2], insn;
1578 rtx fixup[2];
1579 bool dead = false;
1580 bool reversed = false;
1581
1582 /* It is possible for reload to decide to overwrite a pointer with
1583 the value it points to. In that case we have to do the loads in
1584 the appropriate order so that the pointer is not destroyed too
1585 early. Also we must not generate a postmodify for that second
6d3f673c
KY
1586 load, or rws_access_regno will die. And we must not generate a
1587 postmodify for the second load if the destination register
1588 overlaps with the base register. */
2ffe0e02
ZW
1589 if (GET_CODE (operands[1]) == MEM
1590 && reg_overlap_mentioned_p (operands[0], operands[1]))
f57fc998 1591 {
2ffe0e02
ZW
1592 rtx base = XEXP (operands[1], 0);
1593 while (GET_CODE (base) != REG)
1594 base = XEXP (base, 0);
f57fc998 1595
2ffe0e02 1596 if (REGNO (base) == REGNO (operands[0]))
6d3f673c 1597 reversed = true;
2430d1e2 1598
6d3f673c
KY
1599 if (refers_to_regno_p (REGNO (operands[0]),
1600 REGNO (operands[0])+2,
1601 base, 0))
2430d1e2 1602 dead = true;
2ffe0e02
ZW
1603 }
1604 /* Another reason to do the moves in reversed order is if the first
1605 element of the target register pair is also the second element of
1606 the source register pair. */
1607 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1608 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1609 reversed = true;
1610
1611 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1612 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1613
1614#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1615 if (GET_CODE (EXP) == MEM \
1616 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1617 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1618 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
bbbbb16a 1619 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
2ffe0e02 1620
f7df4a84 1621 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
2ffe0e02
ZW
1622 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1623 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1624
f7df4a84 1625 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
2ffe0e02
ZW
1626 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1627 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1628
1629 if (fixup[0])
1630 emit_insn (fixup[0]);
1631 if (fixup[1])
1632 emit_insn (fixup[1]);
1633
1634#undef MAYBE_ADD_REG_INC_NOTE
f57fc998
ZW
1635}
1636
02befdf4 1637/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
3f622353 1638 through memory plus an extra GR scratch register. Except that you can
f15643d4
RS
1639 either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
1640 from SECONDARY_RELOAD_CLASS, but not both.
3f622353
RH
1641
1642 We got into problems in the first place by allowing a construct like
02befdf4 1643 (subreg:XF (reg:TI)), which we got from a union containing a long double.
f5143c46 1644 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1645 we see something like the above, we spill the inner register to memory. */
1646
4de67c26 1647static rtx
ef4bddc2 1648spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
3f622353
RH
1649{
1650 if (GET_CODE (in) == SUBREG
1651 && GET_MODE (SUBREG_REG (in)) == TImode
1652 && GET_CODE (SUBREG_REG (in)) == REG)
1653 {
9474e8ab 1654 rtx memt = assign_stack_temp (TImode, 16);
68d22aa5 1655 emit_move_insn (memt, SUBREG_REG (in));
4de67c26 1656 return adjust_address (memt, mode, 0);
3f622353
RH
1657 }
1658 else if (force && GET_CODE (in) == REG)
1659 {
9474e8ab 1660 rtx memx = assign_stack_temp (mode, 16);
68d22aa5
RH
1661 emit_move_insn (memx, in);
1662 return memx;
3f622353 1663 }
3f622353
RH
1664 else
1665 return in;
1666}
f2f90c63 1667
4de67c26
JM
1668/* Expand the movxf or movrf pattern (MODE says which) with the given
1669 OPERANDS, returning true if the pattern should then invoke
1670 DONE. */
1671
1672bool
ef4bddc2 1673ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
4de67c26
JM
1674{
1675 rtx op0 = operands[0];
1676
1677 if (GET_CODE (op0) == SUBREG)
1678 op0 = SUBREG_REG (op0);
1679
1680 /* We must support XFmode loads into general registers for stdarg/vararg,
1681 unprototyped calls, and a rare case where a long double is passed as
1682 an argument after a float HFA fills the FP registers. We split them into
1683 DImode loads for convenience. We also need to support XFmode stores
1684 for the last case. This case does not happen for stdarg/vararg routines,
1685 because we do a block store to memory of unnamed arguments. */
1686
1687 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1688 {
1689 rtx out[2];
1690
1691 /* We're hoping to transform everything that deals with XFmode
1692 quantities and GR registers early in the compiler. */
b3a13419 1693 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1694
1695 /* Struct to register can just use TImode instead. */
1696 if ((GET_CODE (operands[1]) == SUBREG
1697 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1698 || (GET_CODE (operands[1]) == REG
1699 && GR_REGNO_P (REGNO (operands[1]))))
1700 {
1701 rtx op1 = operands[1];
1702
1703 if (GET_CODE (op1) == SUBREG)
1704 op1 = SUBREG_REG (op1);
1705 else
1706 op1 = gen_rtx_REG (TImode, REGNO (op1));
1707
1708 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1709 return true;
1710 }
1711
1712 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1713 {
ae4d3291 1714 /* Don't word-swap when reading in the constant. */
4de67c26 1715 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
ae4d3291
JW
1716 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1717 0, mode));
4de67c26 1718 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
ae4d3291
JW
1719 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1720 0, mode));
4de67c26
JM
1721 return true;
1722 }
1723
1724 /* If the quantity is in a register not known to be GR, spill it. */
1725 if (register_operand (operands[1], mode))
1726 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1727
1728 gcc_assert (GET_CODE (operands[1]) == MEM);
1729
ae4d3291
JW
1730 /* Don't word-swap when reading in the value. */
1731 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1732 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
4de67c26
JM
1733
1734 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1735 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1736 return true;
1737 }
1738
1739 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1740 {
1741 /* We're hoping to transform everything that deals with XFmode
1742 quantities and GR registers early in the compiler. */
b3a13419 1743 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1744
1745 /* Op0 can't be a GR_REG here, as that case is handled above.
1746 If op0 is a register, then we spill op1, so that we now have a
1747 MEM operand. This requires creating an XFmode subreg of a TImode reg
1748 to force the spill. */
1749 if (register_operand (operands[0], mode))
1750 {
1751 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1752 op1 = gen_rtx_SUBREG (mode, op1, 0);
1753 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1754 }
1755
1756 else
1757 {
1758 rtx in[2];
1759
ae4d3291
JW
1760 gcc_assert (GET_CODE (operands[0]) == MEM);
1761
1762 /* Don't word-swap when writing out the value. */
1763 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1764 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
4de67c26
JM
1765
1766 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1767 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1768 return true;
1769 }
1770 }
1771
1772 if (!reload_in_progress && !reload_completed)
1773 {
1774 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1775
1776 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1777 {
1778 rtx memt, memx, in = operands[1];
1779 if (CONSTANT_P (in))
1780 in = validize_mem (force_const_mem (mode, in));
1781 if (GET_CODE (in) == MEM)
1782 memt = adjust_address (in, TImode, 0);
1783 else
1784 {
9474e8ab 1785 memt = assign_stack_temp (TImode, 16);
4de67c26
JM
1786 memx = adjust_address (memt, mode, 0);
1787 emit_move_insn (memx, in);
1788 }
1789 emit_move_insn (op0, memt);
1790 return true;
1791 }
1792
1793 if (!ia64_move_ok (operands[0], operands[1]))
1794 operands[1] = force_reg (mode, operands[1]);
1795 }
1796
1797 return false;
1798}
1799
f90b7a5a
PB
1800/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1801 with the expression that holds the compare result (in VOIDmode). */
f2f90c63 1802
24ea7948
ZW
1803static GTY(()) rtx cmptf_libfunc;
1804
f90b7a5a
PB
1805void
1806ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
f2f90c63 1807{
f90b7a5a 1808 enum rtx_code code = GET_CODE (*expr);
f2f90c63
RH
1809 rtx cmp;
1810
1811 /* If we have a BImode input, then we already have a compare result, and
1812 do not need to emit another comparison. */
f90b7a5a 1813 if (GET_MODE (*op0) == BImode)
f2f90c63 1814 {
f90b7a5a
PB
1815 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1816 cmp = *op0;
f2f90c63 1817 }
24ea7948
ZW
1818 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1819 magic number as its third argument, that indicates what to do.
1820 The return value is an integer to be compared against zero. */
f90b7a5a 1821 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
24ea7948
ZW
1822 {
1823 enum qfcmp_magic {
8fc53a5f 1824 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
24ea7948
ZW
1825 QCMP_UNORD = 2,
1826 QCMP_EQ = 4,
1827 QCMP_LT = 8,
1828 QCMP_GT = 16
32e8bb8e
ILT
1829 };
1830 int magic;
24ea7948 1831 enum rtx_code ncode;
9b2ea071 1832 rtx ret;
e820471b 1833
f90b7a5a 1834 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
24ea7948
ZW
1835 switch (code)
1836 {
1837 /* 1 = equal, 0 = not equal. Equality operators do
8fc53a5f 1838 not raise FP_INVALID when given a NaN operand. */
24ea7948
ZW
1839 case EQ: magic = QCMP_EQ; ncode = NE; break;
1840 case NE: magic = QCMP_EQ; ncode = EQ; break;
1841 /* isunordered() from C99. */
1842 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
b1346fa3 1843 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
24ea7948 1844 /* Relational operators raise FP_INVALID when given
8fc53a5f 1845 a NaN operand. */
24ea7948
ZW
1846 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1847 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1848 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1849 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
8fc53a5f
EB
1850 /* Unordered relational operators do not raise FP_INVALID
1851 when given a NaN operand. */
1852 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1853 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1854 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1855 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1856 /* Not supported. */
1857 case UNEQ:
1858 case LTGT:
e820471b 1859 default: gcc_unreachable ();
24ea7948
ZW
1860 }
1861
1862 start_sequence ();
1863
db69559b 1864 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
f90b7a5a 1865 *op0, TFmode, *op1, TFmode,
24ea7948
ZW
1866 GEN_INT (magic), DImode);
1867 cmp = gen_reg_rtx (BImode);
f7df4a84
RS
1868 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1869 ret, const0_rtx)));
24ea7948 1870
9b2ea071 1871 rtx_insn *insns = get_insns ();
24ea7948
ZW
1872 end_sequence ();
1873
1874 emit_libcall_block (insns, cmp, cmp,
f90b7a5a 1875 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
24ea7948
ZW
1876 code = NE;
1877 }
f2f90c63
RH
1878 else
1879 {
1880 cmp = gen_reg_rtx (BImode);
f7df4a84 1881 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
f2f90c63
RH
1882 code = NE;
1883 }
1884
f90b7a5a
PB
1885 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1886 *op0 = cmp;
1887 *op1 = const0_rtx;
f2f90c63 1888}
2ed4af6f 1889
e934ca47
RH
1890/* Generate an integral vector comparison. Return true if the condition has
1891 been reversed, and so the sense of the comparison should be inverted. */
f61134e8
RH
1892
1893static bool
ef4bddc2 1894ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
f61134e8
RH
1895 rtx dest, rtx op0, rtx op1)
1896{
1897 bool negate = false;
1898 rtx x;
1899
e934ca47 1900 /* Canonicalize the comparison to EQ, GT, GTU. */
f61134e8
RH
1901 switch (code)
1902 {
1903 case EQ:
1904 case GT:
e934ca47 1905 case GTU:
f61134e8
RH
1906 break;
1907
1908 case NE:
f61134e8 1909 case LE:
e934ca47
RH
1910 case LEU:
1911 code = reverse_condition (code);
f61134e8
RH
1912 negate = true;
1913 break;
1914
1915 case GE:
e934ca47
RH
1916 case GEU:
1917 code = reverse_condition (code);
f61134e8
RH
1918 negate = true;
1919 /* FALLTHRU */
1920
1921 case LT:
f61134e8 1922 case LTU:
e934ca47
RH
1923 code = swap_condition (code);
1924 x = op0, op0 = op1, op1 = x;
1925 break;
f61134e8 1926
e934ca47
RH
1927 default:
1928 gcc_unreachable ();
1929 }
f61134e8 1930
e934ca47 1931 /* Unsigned parallel compare is not supported by the hardware. Play some
6283ba26 1932 tricks to turn this into a signed comparison against 0. */
e934ca47
RH
1933 if (code == GTU)
1934 {
1935 switch (mode)
1936 {
4e10a5a7 1937 case E_V2SImode:
f61134e8 1938 {
e934ca47
RH
1939 rtx t1, t2, mask;
1940
9540f5ef
SE
1941 /* Subtract (-(INT MAX) - 1) from both operands to make
1942 them signed. */
6a8b00eb 1943 mask = gen_int_mode (0x80000000, SImode);
59d06c05 1944 mask = gen_const_vec_duplicate (V2SImode, mask);
9540f5ef
SE
1945 mask = force_reg (mode, mask);
1946 t1 = gen_reg_rtx (mode);
1947 emit_insn (gen_subv2si3 (t1, op0, mask));
1948 t2 = gen_reg_rtx (mode);
1949 emit_insn (gen_subv2si3 (t2, op1, mask));
1950 op0 = t1;
1951 op1 = t2;
6283ba26 1952 code = GT;
f61134e8 1953 }
e934ca47
RH
1954 break;
1955
4e10a5a7
RS
1956 case E_V8QImode:
1957 case E_V4HImode:
e934ca47
RH
1958 /* Perform a parallel unsigned saturating subtraction. */
1959 x = gen_reg_rtx (mode);
f7df4a84 1960 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
6283ba26
RH
1961
1962 code = EQ;
1963 op0 = x;
1964 op1 = CONST0_RTX (mode);
1965 negate = !negate;
e934ca47
RH
1966 break;
1967
1968 default:
1969 gcc_unreachable ();
1970 }
f61134e8
RH
1971 }
1972
1973 x = gen_rtx_fmt_ee (code, mode, op0, op1);
f7df4a84 1974 emit_insn (gen_rtx_SET (dest, x));
f61134e8
RH
1975
1976 return negate;
1977}
1978
f61134e8
RH
1979/* Emit an integral vector conditional move. */
1980
1981void
1982ia64_expand_vecint_cmov (rtx operands[])
1983{
ef4bddc2 1984 machine_mode mode = GET_MODE (operands[0]);
f61134e8
RH
1985 enum rtx_code code = GET_CODE (operands[3]);
1986 bool negate;
1987 rtx cmp, x, ot, of;
1988
f61134e8
RH
1989 cmp = gen_reg_rtx (mode);
1990 negate = ia64_expand_vecint_compare (code, mode, cmp,
1991 operands[4], operands[5]);
1992
1993 ot = operands[1+negate];
1994 of = operands[2-negate];
1995
1996 if (ot == CONST0_RTX (mode))
1997 {
1998 if (of == CONST0_RTX (mode))
1999 {
2000 emit_move_insn (operands[0], ot);
2001 return;
2002 }
2003
2004 x = gen_rtx_NOT (mode, cmp);
2005 x = gen_rtx_AND (mode, x, of);
f7df4a84 2006 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
2007 }
2008 else if (of == CONST0_RTX (mode))
2009 {
2010 x = gen_rtx_AND (mode, cmp, ot);
f7df4a84 2011 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
2012 }
2013 else
2014 {
2015 rtx t, f;
2016
2017 t = gen_reg_rtx (mode);
2018 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
f7df4a84 2019 emit_insn (gen_rtx_SET (t, x));
f61134e8
RH
2020
2021 f = gen_reg_rtx (mode);
2022 x = gen_rtx_NOT (mode, cmp);
2023 x = gen_rtx_AND (mode, x, operands[2-negate]);
f7df4a84 2024 emit_insn (gen_rtx_SET (f, x));
f61134e8
RH
2025
2026 x = gen_rtx_IOR (mode, t, f);
f7df4a84 2027 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
2028 }
2029}
2030
2031/* Emit an integral vector min or max operation. Return true if all done. */
2032
2033bool
ef4bddc2 2034ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
f61134e8
RH
2035 rtx operands[])
2036{
cabddb23 2037 rtx xops[6];
f61134e8
RH
2038
2039 /* These four combinations are supported directly. */
2040 if (mode == V8QImode && (code == UMIN || code == UMAX))
2041 return false;
2042 if (mode == V4HImode && (code == SMIN || code == SMAX))
2043 return false;
2044
93b4080b
RH
2045 /* This combination can be implemented with only saturating subtraction. */
2046 if (mode == V4HImode && code == UMAX)
2047 {
2048 rtx x, tmp = gen_reg_rtx (mode);
2049
2050 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
f7df4a84 2051 emit_insn (gen_rtx_SET (tmp, x));
93b4080b
RH
2052
2053 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2054 return true;
2055 }
2056
f61134e8
RH
2057 /* Everything else implemented via vector comparisons. */
2058 xops[0] = operands[0];
2059 xops[4] = xops[1] = operands[1];
2060 xops[5] = xops[2] = operands[2];
2061
2062 switch (code)
2063 {
2064 case UMIN:
2065 code = LTU;
2066 break;
2067 case UMAX:
2068 code = GTU;
2069 break;
2070 case SMIN:
2071 code = LT;
2072 break;
2073 case SMAX:
2074 code = GT;
2075 break;
2076 default:
e820471b 2077 gcc_unreachable ();
f61134e8
RH
2078 }
2079 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2080
2081 ia64_expand_vecint_cmov (xops);
2082 return true;
2083}
2084
55eaaa5b
RH
2085/* The vectors LO and HI each contain N halves of a double-wide vector.
2086 Reassemble either the first N/2 or the second N/2 elements. */
604e3ff3
RH
2087
2088void
55eaaa5b 2089ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
604e3ff3 2090{
ef4bddc2 2091 machine_mode vmode = GET_MODE (lo);
e6431744
RH
2092 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2093 struct expand_vec_perm_d d;
2094 bool ok;
604e3ff3 2095
e6431744
RH
2096 d.target = gen_lowpart (vmode, out);
2097 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2098 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2099 d.vmode = vmode;
2100 d.nelt = nelt;
2101 d.one_operand_p = false;
2102 d.testing_p = false;
2103
2104 high = (highp ? nelt / 2 : 0);
2105 for (i = 0; i < nelt / 2; ++i)
604e3ff3 2106 {
e6431744
RH
2107 d.perm[i * 2] = i + high;
2108 d.perm[i * 2 + 1] = i + high + nelt;
604e3ff3
RH
2109 }
2110
e6431744
RH
2111 ok = ia64_expand_vec_perm_const_1 (&d);
2112 gcc_assert (ok);
604e3ff3
RH
2113}
2114
55eaaa5b 2115/* Return a vector of the sign-extension of VEC. */
e898620c 2116
55eaaa5b
RH
2117static rtx
2118ia64_unpack_sign (rtx vec, bool unsignedp)
e898620c 2119{
ef4bddc2 2120 machine_mode mode = GET_MODE (vec);
55eaaa5b 2121 rtx zero = CONST0_RTX (mode);
e898620c 2122
e898620c 2123 if (unsignedp)
55eaaa5b 2124 return zero;
e898620c
RH
2125 else
2126 {
55eaaa5b 2127 rtx sign = gen_reg_rtx (mode);
e898620c
RH
2128 bool neg;
2129
55eaaa5b 2130 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
e898620c 2131 gcc_assert (!neg);
55eaaa5b
RH
2132
2133 return sign;
e898620c 2134 }
55eaaa5b 2135}
e898620c 2136
55eaaa5b 2137/* Emit an integral vector unpack operation. */
e898620c 2138
55eaaa5b
RH
2139void
2140ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2141{
2142 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2143 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
e898620c
RH
2144}
2145
55eaaa5b
RH
2146/* Emit an integral vector widening sum operations. */
2147
604e3ff3 2148void
55eaaa5b 2149ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
604e3ff3 2150{
ef4bddc2 2151 machine_mode wmode;
55eaaa5b 2152 rtx l, h, t, sign;
604e3ff3 2153
55eaaa5b
RH
2154 sign = ia64_unpack_sign (operands[1], unsignedp);
2155
2156 wmode = GET_MODE (operands[0]);
2157 l = gen_reg_rtx (wmode);
2158 h = gen_reg_rtx (wmode);
604e3ff3 2159
55eaaa5b
RH
2160 ia64_unpack_assemble (l, operands[1], sign, false);
2161 ia64_unpack_assemble (h, operands[1], sign, true);
604e3ff3 2162
55eaaa5b
RH
2163 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2164 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2165 if (t != operands[0])
2166 emit_move_insn (operands[0], t);
604e3ff3
RH
2167}
2168
2ed4af6f
RH
2169/* Emit the appropriate sequence for a call. */
2170
2171void
9c808aad
AJ
2172ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2173 int sibcall_p)
2ed4af6f 2174{
599aedd9 2175 rtx insn, b0;
2ed4af6f
RH
2176
2177 addr = XEXP (addr, 0);
c8083186 2178 addr = convert_memory_address (DImode, addr);
2ed4af6f 2179 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 2180
599aedd9 2181 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
2182 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2183 {
2184 if (sibcall_p)
599aedd9 2185 insn = gen_sibcall_nogp (addr);
2ed4af6f 2186 else if (! retval)
599aedd9 2187 insn = gen_call_nogp (addr, b0);
2ed4af6f 2188 else
599aedd9
RH
2189 insn = gen_call_value_nogp (retval, addr, b0);
2190 insn = emit_call_insn (insn);
2ed4af6f 2191 }
2ed4af6f 2192 else
599aedd9
RH
2193 {
2194 if (sibcall_p)
2195 insn = gen_sibcall_gp (addr);
2196 else if (! retval)
2197 insn = gen_call_gp (addr, b0);
2198 else
2199 insn = gen_call_value_gp (retval, addr, b0);
2200 insn = emit_call_insn (insn);
2ed4af6f 2201
599aedd9
RH
2202 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2203 }
6dad5a56 2204
599aedd9 2205 if (sibcall_p)
4e14f1f9 2206 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
f2972bf8
DR
2207
2208 if (TARGET_ABI_OPEN_VMS)
2209 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2210 gen_rtx_REG (DImode, GR_REG (25)));
599aedd9
RH
2211}
2212
6fb5fa3c
DB
2213static void
2214reg_emitted (enum ia64_frame_regs r)
2215{
2216 if (emitted_frame_related_regs[r] == 0)
2217 emitted_frame_related_regs[r] = current_frame_info.r[r];
2218 else
2219 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2220}
2221
2222static int
2223get_reg (enum ia64_frame_regs r)
2224{
2225 reg_emitted (r);
2226 return current_frame_info.r[r];
2227}
2228
2229static bool
2230is_emitted (int regno)
2231{
09639a83 2232 unsigned int r;
6fb5fa3c
DB
2233
2234 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2235 if (emitted_frame_related_regs[r] == regno)
2236 return true;
2237 return false;
2238}
2239
599aedd9 2240void
9c808aad 2241ia64_reload_gp (void)
599aedd9
RH
2242{
2243 rtx tmp;
2244
6fb5fa3c
DB
2245 if (current_frame_info.r[reg_save_gp])
2246 {
2247 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2248 }
2ed4af6f 2249 else
599aedd9
RH
2250 {
2251 HOST_WIDE_INT offset;
13f70342 2252 rtx offset_r;
599aedd9
RH
2253
2254 offset = (current_frame_info.spill_cfa_off
2255 + current_frame_info.spill_size);
2256 if (frame_pointer_needed)
2257 {
2258 tmp = hard_frame_pointer_rtx;
2259 offset = -offset;
2260 }
2261 else
2262 {
2263 tmp = stack_pointer_rtx;
2264 offset = current_frame_info.total_size - offset;
2265 }
2266
13f70342
RH
2267 offset_r = GEN_INT (offset);
2268 if (satisfies_constraint_I (offset_r))
2269 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
599aedd9
RH
2270 else
2271 {
13f70342 2272 emit_move_insn (pic_offset_table_rtx, offset_r);
599aedd9
RH
2273 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2274 pic_offset_table_rtx, tmp));
2275 }
2276
2277 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2278 }
2279
2280 emit_move_insn (pic_offset_table_rtx, tmp);
2281}
2282
2283void
9c808aad
AJ
2284ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2285 rtx scratch_b, int noreturn_p, int sibcall_p)
599aedd9
RH
2286{
2287 rtx insn;
2288 bool is_desc = false;
2289
2290 /* If we find we're calling through a register, then we're actually
2291 calling through a descriptor, so load up the values. */
4e14f1f9 2292 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
599aedd9
RH
2293 {
2294 rtx tmp;
2295 bool addr_dead_p;
2296
2297 /* ??? We are currently constrained to *not* use peep2, because
2a43945f 2298 we can legitimately change the global lifetime of the GP
9c808aad 2299 (in the form of killing where previously live). This is
599aedd9
RH
2300 because a call through a descriptor doesn't use the previous
2301 value of the GP, while a direct call does, and we do not
2302 commit to either form until the split here.
2303
2304 That said, this means that we lack precise life info for
2305 whether ADDR is dead after this call. This is not terribly
2306 important, since we can fix things up essentially for free
2307 with the POST_DEC below, but it's nice to not use it when we
2308 can immediately tell it's not necessary. */
2309 addr_dead_p = ((noreturn_p || sibcall_p
2310 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2311 REGNO (addr)))
2312 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2313
2314 /* Load the code address into scratch_b. */
2315 tmp = gen_rtx_POST_INC (Pmode, addr);
2316 tmp = gen_rtx_MEM (Pmode, tmp);
2317 emit_move_insn (scratch_r, tmp);
2318 emit_move_insn (scratch_b, scratch_r);
2319
2320 /* Load the GP address. If ADDR is not dead here, then we must
2321 revert the change made above via the POST_INCREMENT. */
2322 if (!addr_dead_p)
2323 tmp = gen_rtx_POST_DEC (Pmode, addr);
2324 else
2325 tmp = addr;
2326 tmp = gen_rtx_MEM (Pmode, tmp);
2327 emit_move_insn (pic_offset_table_rtx, tmp);
2328
2329 is_desc = true;
2330 addr = scratch_b;
2331 }
2ed4af6f 2332
6dad5a56 2333 if (sibcall_p)
599aedd9
RH
2334 insn = gen_sibcall_nogp (addr);
2335 else if (retval)
2336 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 2337 else
599aedd9 2338 insn = gen_call_nogp (addr, retaddr);
6dad5a56 2339 emit_call_insn (insn);
2ed4af6f 2340
599aedd9
RH
2341 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2342 ia64_reload_gp ();
2ed4af6f 2343}
16df4ee6
RH
2344
2345/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2346
2347 This differs from the generic code in that we know about the zero-extending
2348 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2349 also know that ld.acq+cmpxchg.rel equals a full barrier.
2350
2351 The loop we want to generate looks like
2352
2353 cmp_reg = mem;
2354 label:
2355 old_reg = cmp_reg;
2356 new_reg = cmp_reg op val;
2357 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2358 if (cmp_reg != old_reg)
2359 goto label;
2360
2361 Note that we only do the plain load from memory once. Subsequent
2362 iterations use the value loaded by the compare-and-swap pattern. */
2363
2364void
2365ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
28875d67 2366 rtx old_dst, rtx new_dst, enum memmodel model)
16df4ee6 2367{
ef4bddc2 2368 machine_mode mode = GET_MODE (mem);
16df4ee6
RH
2369 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2370 enum insn_code icode;
2371
2372 /* Special case for using fetchadd. */
dca13767
JJ
2373 if ((mode == SImode || mode == DImode)
2374 && (code == PLUS || code == MINUS)
2375 && fetchadd_operand (val, mode))
16df4ee6 2376 {
dca13767
JJ
2377 if (code == MINUS)
2378 val = GEN_INT (-INTVAL (val));
2379
16df4ee6
RH
2380 if (!old_dst)
2381 old_dst = gen_reg_rtx (mode);
2382
28875d67
RH
2383 switch (model)
2384 {
2385 case MEMMODEL_ACQ_REL:
2386 case MEMMODEL_SEQ_CST:
46b35980 2387 case MEMMODEL_SYNC_SEQ_CST:
28875d67
RH
2388 emit_insn (gen_memory_barrier ());
2389 /* FALLTHRU */
2390 case MEMMODEL_RELAXED:
2391 case MEMMODEL_ACQUIRE:
46b35980 2392 case MEMMODEL_SYNC_ACQUIRE:
28875d67
RH
2393 case MEMMODEL_CONSUME:
2394 if (mode == SImode)
2395 icode = CODE_FOR_fetchadd_acq_si;
2396 else
2397 icode = CODE_FOR_fetchadd_acq_di;
2398 break;
2399 case MEMMODEL_RELEASE:
46b35980 2400 case MEMMODEL_SYNC_RELEASE:
28875d67
RH
2401 if (mode == SImode)
2402 icode = CODE_FOR_fetchadd_rel_si;
2403 else
2404 icode = CODE_FOR_fetchadd_rel_di;
2405 break;
2406
2407 default:
2408 gcc_unreachable ();
2409 }
16df4ee6 2410
16df4ee6
RH
2411 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2412
2413 if (new_dst)
2414 {
2415 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2416 true, OPTAB_WIDEN);
2417 if (new_reg != new_dst)
2418 emit_move_insn (new_dst, new_reg);
2419 }
2420 return;
2421 }
2422
2423 /* Because of the volatile mem read, we get an ld.acq, which is the
28875d67
RH
2424 front half of the full barrier. The end half is the cmpxchg.rel.
2425 For relaxed and release memory models, we don't need this. But we
2426 also don't bother trying to prevent it either. */
46b35980 2427 gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
28875d67 2428 || MEM_VOLATILE_P (mem));
16df4ee6
RH
2429
2430 old_reg = gen_reg_rtx (DImode);
2431 cmp_reg = gen_reg_rtx (DImode);
2432 label = gen_label_rtx ();
2433
2434 if (mode != DImode)
2435 {
2436 val = simplify_gen_subreg (DImode, val, mode, 0);
2437 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2438 }
2439 else
2440 emit_move_insn (cmp_reg, mem);
2441
2442 emit_label (label);
2443
2444 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2445 emit_move_insn (old_reg, cmp_reg);
2446 emit_move_insn (ar_ccv, cmp_reg);
2447
2448 if (old_dst)
2449 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2450
2451 new_reg = cmp_reg;
2452 if (code == NOT)
2453 {
974920dc
UB
2454 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2455 true, OPTAB_DIRECT);
2456 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
16df4ee6 2457 }
974920dc
UB
2458 else
2459 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2460 true, OPTAB_DIRECT);
16df4ee6
RH
2461
2462 if (mode != DImode)
2463 new_reg = gen_lowpart (mode, new_reg);
2464 if (new_dst)
2465 emit_move_insn (new_dst, new_reg);
2466
28875d67 2467 switch (model)
16df4ee6 2468 {
28875d67
RH
2469 case MEMMODEL_RELAXED:
2470 case MEMMODEL_ACQUIRE:
46b35980 2471 case MEMMODEL_SYNC_ACQUIRE:
28875d67
RH
2472 case MEMMODEL_CONSUME:
2473 switch (mode)
2474 {
4e10a5a7
RS
2475 case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2476 case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2477 case E_SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2478 case E_DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
28875d67
RH
2479 default:
2480 gcc_unreachable ();
2481 }
2482 break;
2483
2484 case MEMMODEL_RELEASE:
46b35980 2485 case MEMMODEL_SYNC_RELEASE:
28875d67
RH
2486 case MEMMODEL_ACQ_REL:
2487 case MEMMODEL_SEQ_CST:
46b35980 2488 case MEMMODEL_SYNC_SEQ_CST:
28875d67
RH
2489 switch (mode)
2490 {
4e10a5a7
RS
2491 case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2492 case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2493 case E_SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2494 case E_DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
28875d67
RH
2495 default:
2496 gcc_unreachable ();
2497 }
2498 break;
2499
16df4ee6
RH
2500 default:
2501 gcc_unreachable ();
2502 }
2503
2504 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2505
6819a463 2506 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
16df4ee6 2507}
809d4ef1 2508\f
3b572406
RH
2509/* Begin the assembly file. */
2510
1bc7c5b6 2511static void
9c808aad 2512ia64_file_start (void)
1bc7c5b6
ZW
2513{
2514 default_file_start ();
2515 emit_safe_across_calls ();
2516}
2517
3b572406 2518void
9c808aad 2519emit_safe_across_calls (void)
3b572406
RH
2520{
2521 unsigned int rs, re;
2522 int out_state;
2523
2524 rs = 1;
2525 out_state = 0;
2526 while (1)
2527 {
2528 while (rs < 64 && call_used_regs[PR_REG (rs)])
2529 rs++;
2530 if (rs >= 64)
2531 break;
2532 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2533 continue;
2534 if (out_state == 0)
2535 {
1bc7c5b6 2536 fputs ("\t.pred.safe_across_calls ", asm_out_file);
3b572406
RH
2537 out_state = 1;
2538 }
2539 else
1bc7c5b6 2540 fputc (',', asm_out_file);
3b572406 2541 if (re == rs + 1)
1bc7c5b6 2542 fprintf (asm_out_file, "p%u", rs);
3b572406 2543 else
1bc7c5b6 2544 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
3b572406
RH
2545 rs = re + 1;
2546 }
2547 if (out_state)
1bc7c5b6 2548 fputc ('\n', asm_out_file);
3b572406
RH
2549}
2550
812b587e
SE
2551/* Globalize a declaration. */
2552
2553static void
2554ia64_globalize_decl_name (FILE * stream, tree decl)
2555{
2556 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2557 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2558 if (version_attr)
2559 {
2560 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2561 const char *p = TREE_STRING_POINTER (v);
2562 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2563 }
2564 targetm.asm_out.globalize_label (stream, name);
2565 if (TREE_CODE (decl) == FUNCTION_DECL)
2566 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2567}
2568
97e242b0
RH
2569/* Helper function for ia64_compute_frame_size: find an appropriate general
2570 register to spill some special register to. SPECIAL_SPILL_MASK contains
2571 bits in GR0 to GR31 that have already been allocated by this routine.
2572 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 2573
97e242b0 2574static int
6fb5fa3c 2575find_gr_spill (enum ia64_frame_regs r, int try_locals)
97e242b0
RH
2576{
2577 int regno;
2578
6fb5fa3c
DB
2579 if (emitted_frame_related_regs[r] != 0)
2580 {
2581 regno = emitted_frame_related_regs[r];
2951f79b
JJ
2582 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2583 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
6fb5fa3c 2584 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
416ff32e 2585 else if (crtl->is_leaf
6fb5fa3c
DB
2586 && regno >= GR_REG (1) && regno <= GR_REG (31))
2587 current_frame_info.gr_used_mask |= 1 << regno;
2588
2589 return regno;
2590 }
2591
97e242b0
RH
2592 /* If this is a leaf function, first try an otherwise unused
2593 call-clobbered register. */
416ff32e 2594 if (crtl->is_leaf)
97e242b0
RH
2595 {
2596 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2597 if (! df_regs_ever_live_p (regno)
97e242b0
RH
2598 && call_used_regs[regno]
2599 && ! fixed_regs[regno]
2600 && ! global_regs[regno]
6fb5fa3c
DB
2601 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2602 && ! is_emitted (regno))
97e242b0
RH
2603 {
2604 current_frame_info.gr_used_mask |= 1 << regno;
2605 return regno;
2606 }
2607 }
2608
2609 if (try_locals)
2610 {
2611 regno = current_frame_info.n_local_regs;
9502c558
JW
2612 /* If there is a frame pointer, then we can't use loc79, because
2613 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2614 reg_name switching code in ia64_expand_prologue. */
2951f79b
JJ
2615 while (regno < (80 - frame_pointer_needed))
2616 if (! is_emitted (LOC_REG (regno++)))
2617 {
2618 current_frame_info.n_local_regs = regno;
2619 return LOC_REG (regno - 1);
2620 }
97e242b0
RH
2621 }
2622
2623 /* Failed to find a general register to spill to. Must use stack. */
2624 return 0;
2625}
2626
2627/* In order to make for nice schedules, we try to allocate every temporary
2628 to a different register. We must of course stay away from call-saved,
2629 fixed, and global registers. We must also stay away from registers
2630 allocated in current_frame_info.gr_used_mask, since those include regs
2631 used all through the prologue.
2632
2633 Any register allocated here must be used immediately. The idea is to
2634 aid scheduling, not to solve data flow problems. */
2635
2636static int last_scratch_gr_reg;
2637
2638static int
9c808aad 2639next_scratch_gr_reg (void)
97e242b0
RH
2640{
2641 int i, regno;
2642
2643 for (i = 0; i < 32; ++i)
2644 {
2645 regno = (last_scratch_gr_reg + i + 1) & 31;
2646 if (call_used_regs[regno]
2647 && ! fixed_regs[regno]
2648 && ! global_regs[regno]
2649 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2650 {
2651 last_scratch_gr_reg = regno;
2652 return regno;
2653 }
2654 }
2655
2656 /* There must be _something_ available. */
e820471b 2657 gcc_unreachable ();
97e242b0
RH
2658}
2659
2660/* Helper function for ia64_compute_frame_size, called through
2661 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2662
2663static void
9c808aad 2664mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
c65ebc55 2665{
97e242b0
RH
2666 unsigned int regno = REGNO (reg);
2667 if (regno < 32)
f95e79cc 2668 {
462a99aa 2669 unsigned int i, n = REG_NREGS (reg);
f95e79cc
RH
2670 for (i = 0; i < n; ++i)
2671 current_frame_info.gr_used_mask |= 1 << (regno + i);
2672 }
c65ebc55
JW
2673}
2674
6fb5fa3c 2675
c65ebc55
JW
2676/* Returns the number of bytes offset between the frame pointer and the stack
2677 pointer for the current function. SIZE is the number of bytes of space
2678 needed for local variables. */
97e242b0
RH
2679
2680static void
9c808aad 2681ia64_compute_frame_size (HOST_WIDE_INT size)
c65ebc55 2682{
97e242b0
RH
2683 HOST_WIDE_INT total_size;
2684 HOST_WIDE_INT spill_size = 0;
2685 HOST_WIDE_INT extra_spill_size = 0;
2686 HOST_WIDE_INT pretend_args_size;
c65ebc55 2687 HARD_REG_SET mask;
97e242b0
RH
2688 int n_spilled = 0;
2689 int spilled_gr_p = 0;
2690 int spilled_fr_p = 0;
2691 unsigned int regno;
2951f79b
JJ
2692 int min_regno;
2693 int max_regno;
97e242b0 2694 int i;
c65ebc55 2695
97e242b0
RH
2696 if (current_frame_info.initialized)
2697 return;
294dac80 2698
97e242b0 2699 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
2700 CLEAR_HARD_REG_SET (mask);
2701
97e242b0
RH
2702 /* Don't allocate scratches to the return register. */
2703 diddle_return_value (mark_reg_gr_used_mask, NULL);
2704
2705 /* Don't allocate scratches to the EH scratch registers. */
2706 if (cfun->machine->ia64_eh_epilogue_sp)
2707 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2708 if (cfun->machine->ia64_eh_epilogue_bsp)
2709 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 2710
7b84aac0 2711 /* Static stack checking uses r2 and r3. */
9c1b56c4
JL
2712 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
2713 || flag_stack_clash_protection)
7b84aac0
EB
2714 current_frame_info.gr_used_mask |= 0xc;
2715
97e242b0
RH
2716 /* Find the size of the register stack frame. We have only 80 local
2717 registers, because we reserve 8 for the inputs and 8 for the
2718 outputs. */
2719
2720 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2721 since we'll be adjusting that down later. */
2722 regno = LOC_REG (78) + ! frame_pointer_needed;
2723 for (; regno >= LOC_REG (0); regno--)
6fb5fa3c 2724 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
97e242b0
RH
2725 break;
2726 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 2727
3f67ac08
DM
2728 /* For functions marked with the syscall_linkage attribute, we must mark
2729 all eight input registers as in use, so that locals aren't visible to
2730 the caller. */
2731
2732 if (cfun->machine->n_varargs > 0
2733 || lookup_attribute ("syscall_linkage",
2734 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
2735 current_frame_info.n_input_regs = 8;
2736 else
2737 {
2738 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
6fb5fa3c 2739 if (df_regs_ever_live_p (regno))
97e242b0
RH
2740 break;
2741 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2742 }
2743
2744 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
6fb5fa3c 2745 if (df_regs_ever_live_p (regno))
97e242b0
RH
2746 break;
2747 i = regno - OUT_REG (0) + 1;
2748
d26afa4f 2749#ifndef PROFILE_HOOK
97e242b0 2750 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 2751 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
2752 profiling, we need two output registers for the two bb_init_trace_func
2753 arguments. */
e3b5732b 2754 if (crtl->profile)
97e242b0 2755 i = MAX (i, 1);
d26afa4f 2756#endif
97e242b0
RH
2757 current_frame_info.n_output_regs = i;
2758
2759 /* ??? No rotating register support yet. */
2760 current_frame_info.n_rotate_regs = 0;
2761
2762 /* Discover which registers need spilling, and how much room that
9c808aad 2763 will take. Begin with floating point and general registers,
97e242b0
RH
2764 which will always wind up on the stack. */
2765
2766 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
6fb5fa3c 2767 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2768 {
2769 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2770 spill_size += 16;
2771 n_spilled += 1;
2772 spilled_fr_p = 1;
c65ebc55
JW
2773 }
2774
97e242b0 2775 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2776 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2777 {
2778 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2779 spill_size += 8;
2780 n_spilled += 1;
2781 spilled_gr_p = 1;
c65ebc55
JW
2782 }
2783
97e242b0 2784 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
6fb5fa3c 2785 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2786 {
2787 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2788 spill_size += 8;
2789 n_spilled += 1;
c65ebc55
JW
2790 }
2791
97e242b0
RH
2792 /* Now come all special registers that might get saved in other
2793 general registers. */
9c808aad 2794
97e242b0
RH
2795 if (frame_pointer_needed)
2796 {
6fb5fa3c 2797 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
0c35f902
JW
2798 /* If we did not get a register, then we take LOC79. This is guaranteed
2799 to be free, even if regs_ever_live is already set, because this is
2800 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2801 as we don't count loc79 above. */
6fb5fa3c 2802 if (current_frame_info.r[reg_fp] == 0)
0c35f902 2803 {
6fb5fa3c
DB
2804 current_frame_info.r[reg_fp] = LOC_REG (79);
2805 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
0c35f902 2806 }
97e242b0
RH
2807 }
2808
416ff32e 2809 if (! crtl->is_leaf)
c65ebc55 2810 {
97e242b0
RH
2811 /* Emit a save of BR0 if we call other functions. Do this even
2812 if this function doesn't return, as EH depends on this to be
2813 able to unwind the stack. */
2814 SET_HARD_REG_BIT (mask, BR_REG (0));
2815
6fb5fa3c
DB
2816 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2817 if (current_frame_info.r[reg_save_b0] == 0)
97e242b0 2818 {
ae1e2d4c 2819 extra_spill_size += 8;
97e242b0
RH
2820 n_spilled += 1;
2821 }
2822
2823 /* Similarly for ar.pfs. */
2824 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2825 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2826 if (current_frame_info.r[reg_save_ar_pfs] == 0)
97e242b0
RH
2827 {
2828 extra_spill_size += 8;
2829 n_spilled += 1;
2830 }
599aedd9
RH
2831
2832 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2833 registers are clobbered, so we fall back to the stack. */
6fb5fa3c 2834 current_frame_info.r[reg_save_gp]
e3b5732b 2835 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
6fb5fa3c 2836 if (current_frame_info.r[reg_save_gp] == 0)
599aedd9
RH
2837 {
2838 SET_HARD_REG_BIT (mask, GR_REG (1));
2839 spill_size += 8;
2840 n_spilled += 1;
2841 }
c65ebc55
JW
2842 }
2843 else
97e242b0 2844 {
6fb5fa3c 2845 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
97e242b0
RH
2846 {
2847 SET_HARD_REG_BIT (mask, BR_REG (0));
ae1e2d4c 2848 extra_spill_size += 8;
97e242b0
RH
2849 n_spilled += 1;
2850 }
f5bdba44 2851
6fb5fa3c 2852 if (df_regs_ever_live_p (AR_PFS_REGNUM))
f5bdba44
RH
2853 {
2854 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2855 current_frame_info.r[reg_save_ar_pfs]
2856 = find_gr_spill (reg_save_ar_pfs, 1);
2857 if (current_frame_info.r[reg_save_ar_pfs] == 0)
f5bdba44
RH
2858 {
2859 extra_spill_size += 8;
2860 n_spilled += 1;
2861 }
2862 }
97e242b0 2863 }
c65ebc55 2864
97e242b0
RH
2865 /* Unwind descriptor hackery: things are most efficient if we allocate
2866 consecutive GR save registers for RP, PFS, FP in that order. However,
2867 it is absolutely critical that FP get the only hard register that's
2868 guaranteed to be free, so we allocated it first. If all three did
2869 happen to be allocated hard regs, and are consecutive, rearrange them
6fb5fa3c
DB
2870 into the preferred order now.
2871
2872 If we have already emitted code for any of those registers,
2873 then it's already too late to change. */
2951f79b
JJ
2874 min_regno = MIN (current_frame_info.r[reg_fp],
2875 MIN (current_frame_info.r[reg_save_b0],
2876 current_frame_info.r[reg_save_ar_pfs]));
2877 max_regno = MAX (current_frame_info.r[reg_fp],
2878 MAX (current_frame_info.r[reg_save_b0],
2879 current_frame_info.r[reg_save_ar_pfs]));
2880 if (min_regno > 0
2881 && min_regno + 2 == max_regno
2882 && (current_frame_info.r[reg_fp] == min_regno + 1
2883 || current_frame_info.r[reg_save_b0] == min_regno + 1
2884 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2885 && (emitted_frame_related_regs[reg_save_b0] == 0
2886 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2887 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2888 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2889 && (emitted_frame_related_regs[reg_fp] == 0
2890 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
5527bf14 2891 {
2951f79b
JJ
2892 current_frame_info.r[reg_save_b0] = min_regno;
2893 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2894 current_frame_info.r[reg_fp] = min_regno + 2;
5527bf14
RH
2895 }
2896
97e242b0
RH
2897 /* See if we need to store the predicate register block. */
2898 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2899 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
97e242b0
RH
2900 break;
2901 if (regno <= PR_REG (63))
c65ebc55 2902 {
97e242b0 2903 SET_HARD_REG_BIT (mask, PR_REG (0));
6fb5fa3c
DB
2904 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2905 if (current_frame_info.r[reg_save_pr] == 0)
97e242b0
RH
2906 {
2907 extra_spill_size += 8;
2908 n_spilled += 1;
2909 }
2910
2911 /* ??? Mark them all as used so that register renaming and such
2912 are free to use them. */
2913 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2914 df_set_regs_ever_live (regno, true);
c65ebc55
JW
2915 }
2916
97e242b0 2917 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
2918 ar.unat as well. The check for existing liveness allows inline asm
2919 to touch ar.unat. */
2920 if (spilled_gr_p || cfun->machine->n_varargs
6fb5fa3c 2921 || df_regs_ever_live_p (AR_UNAT_REGNUM))
97e242b0 2922 {
6fb5fa3c 2923 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
97e242b0 2924 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
6fb5fa3c
DB
2925 current_frame_info.r[reg_save_ar_unat]
2926 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2927 if (current_frame_info.r[reg_save_ar_unat] == 0)
97e242b0
RH
2928 {
2929 extra_spill_size += 8;
2930 n_spilled += 1;
2931 }
2932 }
2933
6fb5fa3c 2934 if (df_regs_ever_live_p (AR_LC_REGNUM))
97e242b0
RH
2935 {
2936 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
6fb5fa3c
DB
2937 current_frame_info.r[reg_save_ar_lc]
2938 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2939 if (current_frame_info.r[reg_save_ar_lc] == 0)
97e242b0
RH
2940 {
2941 extra_spill_size += 8;
2942 n_spilled += 1;
2943 }
2944 }
2945
2946 /* If we have an odd number of words of pretend arguments written to
2947 the stack, then the FR save area will be unaligned. We round the
2948 size of this area up to keep things 16 byte aligned. */
2949 if (spilled_fr_p)
38173d38 2950 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
97e242b0 2951 else
38173d38 2952 pretend_args_size = crtl->args.pretend_args_size;
97e242b0
RH
2953
2954 total_size = (spill_size + extra_spill_size + size + pretend_args_size
38173d38 2955 + crtl->outgoing_args_size);
97e242b0
RH
2956 total_size = IA64_STACK_ALIGN (total_size);
2957
2958 /* We always use the 16-byte scratch area provided by the caller, but
2959 if we are a leaf function, there's no one to which we need to provide
44bd7f65
EB
2960 a scratch area. However, if the function allocates dynamic stack space,
2961 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2962 so we need to cope. */
2963 if (crtl->is_leaf && !cfun->calls_alloca)
97e242b0
RH
2964 total_size = MAX (0, total_size - 16);
2965
c65ebc55 2966 current_frame_info.total_size = total_size;
97e242b0
RH
2967 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2968 current_frame_info.spill_size = spill_size;
2969 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2970 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2971 current_frame_info.n_spilled = n_spilled;
c65ebc55 2972 current_frame_info.initialized = reload_completed;
97e242b0
RH
2973}
2974
7b5cbb57
AS
2975/* Worker function for TARGET_CAN_ELIMINATE. */
2976
2977bool
2978ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2979{
416ff32e 2980 return (to == BR_REG (0) ? crtl->is_leaf : true);
7b5cbb57
AS
2981}
2982
97e242b0
RH
2983/* Compute the initial difference between the specified pair of registers. */
2984
2985HOST_WIDE_INT
9c808aad 2986ia64_initial_elimination_offset (int from, int to)
97e242b0
RH
2987{
2988 HOST_WIDE_INT offset;
2989
2990 ia64_compute_frame_size (get_frame_size ());
2991 switch (from)
2992 {
2993 case FRAME_POINTER_REGNUM:
e820471b 2994 switch (to)
97e242b0 2995 {
e820471b 2996 case HARD_FRAME_POINTER_REGNUM:
44bd7f65
EB
2997 offset = -current_frame_info.total_size;
2998 if (!crtl->is_leaf || cfun->calls_alloca)
2999 offset += 16 + crtl->outgoing_args_size;
e820471b
NS
3000 break;
3001
3002 case STACK_POINTER_REGNUM:
44bd7f65
EB
3003 offset = 0;
3004 if (!crtl->is_leaf || cfun->calls_alloca)
3005 offset += 16 + crtl->outgoing_args_size;
e820471b
NS
3006 break;
3007
3008 default:
3009 gcc_unreachable ();
97e242b0 3010 }
97e242b0 3011 break;
c65ebc55 3012
97e242b0
RH
3013 case ARG_POINTER_REGNUM:
3014 /* Arguments start above the 16 byte save area, unless stdarg
3015 in which case we store through the 16 byte save area. */
e820471b
NS
3016 switch (to)
3017 {
3018 case HARD_FRAME_POINTER_REGNUM:
38173d38 3019 offset = 16 - crtl->args.pretend_args_size;
e820471b
NS
3020 break;
3021
3022 case STACK_POINTER_REGNUM:
3023 offset = (current_frame_info.total_size
38173d38 3024 + 16 - crtl->args.pretend_args_size);
e820471b
NS
3025 break;
3026
3027 default:
3028 gcc_unreachable ();
3029 }
97e242b0
RH
3030 break;
3031
97e242b0 3032 default:
e820471b 3033 gcc_unreachable ();
97e242b0
RH
3034 }
3035
3036 return offset;
c65ebc55
JW
3037}
3038
97e242b0
RH
3039/* If there are more than a trivial number of register spills, we use
3040 two interleaved iterators so that we can get two memory references
3041 per insn group.
3042
3043 In order to simplify things in the prologue and epilogue expanders,
3044 we use helper functions to fix up the memory references after the
3045 fact with the appropriate offsets to a POST_MODIFY memory mode.
3046 The following data structure tracks the state of the two iterators
3047 while insns are being emitted. */
3048
3049struct spill_fill_data
c65ebc55 3050{
dd3d2b35 3051 rtx_insn *init_after; /* point at which to emit initializations */
97e242b0
RH
3052 rtx init_reg[2]; /* initial base register */
3053 rtx iter_reg[2]; /* the iterator registers */
3054 rtx *prev_addr[2]; /* address of last memory use */
dd3d2b35 3055 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
3056 HOST_WIDE_INT prev_off[2]; /* last offset */
3057 int n_iter; /* number of iterators in use */
3058 int next_iter; /* next iterator to use */
3059 unsigned int save_gr_used_mask;
3060};
3061
3062static struct spill_fill_data spill_fill_data;
c65ebc55 3063
97e242b0 3064static void
9c808aad 3065setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
3066{
3067 int i;
3068
3069 spill_fill_data.init_after = get_last_insn ();
3070 spill_fill_data.init_reg[0] = init_reg;
3071 spill_fill_data.init_reg[1] = init_reg;
3072 spill_fill_data.prev_addr[0] = NULL;
3073 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
3074 spill_fill_data.prev_insn[0] = NULL;
3075 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
3076 spill_fill_data.prev_off[0] = cfa_off;
3077 spill_fill_data.prev_off[1] = cfa_off;
3078 spill_fill_data.next_iter = 0;
3079 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3080
3081 spill_fill_data.n_iter = 1 + (n_spills > 2);
3082 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 3083 {
97e242b0
RH
3084 int regno = next_scratch_gr_reg ();
3085 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3086 current_frame_info.gr_used_mask |= 1 << regno;
3087 }
3088}
3089
3090static void
9c808aad 3091finish_spill_pointers (void)
97e242b0
RH
3092{
3093 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3094}
c65ebc55 3095
97e242b0 3096static rtx
9c808aad 3097spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
3098{
3099 int iter = spill_fill_data.next_iter;
3100 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3101 rtx disp_rtx = GEN_INT (disp);
3102 rtx mem;
3103
3104 if (spill_fill_data.prev_addr[iter])
3105 {
13f70342 3106 if (satisfies_constraint_N (disp_rtx))
703cf211
BS
3107 {
3108 *spill_fill_data.prev_addr[iter]
3109 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3110 gen_rtx_PLUS (DImode,
3111 spill_fill_data.iter_reg[iter],
3112 disp_rtx));
bbbbb16a
ILT
3113 add_reg_note (spill_fill_data.prev_insn[iter],
3114 REG_INC, spill_fill_data.iter_reg[iter]);
703cf211 3115 }
c65ebc55
JW
3116 else
3117 {
97e242b0 3118 /* ??? Could use register post_modify for loads. */
13f70342 3119 if (!satisfies_constraint_I (disp_rtx))
97e242b0
RH
3120 {
3121 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3122 emit_move_insn (tmp, disp_rtx);
3123 disp_rtx = tmp;
3124 }
3125 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3126 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 3127 }
97e242b0
RH
3128 }
3129 /* Micro-optimization: if we've created a frame pointer, it's at
3130 CFA 0, which may allow the real iterator to be initialized lower,
3131 slightly increasing parallelism. Also, if there are few saves
3132 it may eliminate the iterator entirely. */
3133 else if (disp == 0
3134 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3135 && frame_pointer_needed)
3136 {
3137 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 3138 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
3139 return mem;
3140 }
3141 else
3142 {
dd3d2b35
DM
3143 rtx seq;
3144 rtx_insn *insn;
809d4ef1 3145
97e242b0
RH
3146 if (disp == 0)
3147 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3148 spill_fill_data.init_reg[iter]);
3149 else
c65ebc55 3150 {
97e242b0
RH
3151 start_sequence ();
3152
13f70342 3153 if (!satisfies_constraint_I (disp_rtx))
c65ebc55 3154 {
97e242b0
RH
3155 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3156 emit_move_insn (tmp, disp_rtx);
3157 disp_rtx = tmp;
c65ebc55 3158 }
97e242b0
RH
3159
3160 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3161 spill_fill_data.init_reg[iter],
3162 disp_rtx));
3163
2f937369 3164 seq = get_insns ();
97e242b0 3165 end_sequence ();
c65ebc55 3166 }
809d4ef1 3167
97e242b0
RH
3168 /* Careful for being the first insn in a sequence. */
3169 if (spill_fill_data.init_after)
892a4e60 3170 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 3171 else
bc08aefe 3172 {
dd3d2b35 3173 rtx_insn *first = get_insns ();
bc08aefe 3174 if (first)
892a4e60 3175 insn = emit_insn_before (seq, first);
bc08aefe 3176 else
892a4e60 3177 insn = emit_insn (seq);
bc08aefe 3178 }
892a4e60 3179 spill_fill_data.init_after = insn;
97e242b0 3180 }
c65ebc55 3181
97e242b0 3182 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 3183
97e242b0
RH
3184 /* ??? Not all of the spills are for varargs, but some of them are.
3185 The rest of the spills belong in an alias set of their own. But
3186 it doesn't actually hurt to include them here. */
ba4828e0 3187 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 3188
97e242b0
RH
3189 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3190 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 3191
97e242b0
RH
3192 if (++iter >= spill_fill_data.n_iter)
3193 iter = 0;
3194 spill_fill_data.next_iter = iter;
c65ebc55 3195
97e242b0
RH
3196 return mem;
3197}
5527bf14 3198
97e242b0 3199static void
9c808aad
AJ
3200do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3201 rtx frame_reg)
97e242b0 3202{
703cf211 3203 int iter = spill_fill_data.next_iter;
dd3d2b35
DM
3204 rtx mem;
3205 rtx_insn *insn;
5527bf14 3206
97e242b0 3207 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 3208 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 3209 spill_fill_data.prev_insn[iter] = insn;
5527bf14 3210
97e242b0
RH
3211 if (frame_reg)
3212 {
3213 rtx base;
3214 HOST_WIDE_INT off;
3215
3216 RTX_FRAME_RELATED_P (insn) = 1;
3217
9c808aad 3218 /* Don't even pretend that the unwind code can intuit its way
97e242b0
RH
3219 through a pair of interleaved post_modify iterators. Just
3220 provide the correct answer. */
3221
3222 if (frame_pointer_needed)
3223 {
3224 base = hard_frame_pointer_rtx;
3225 off = - cfa_off;
5527bf14 3226 }
97e242b0
RH
3227 else
3228 {
3229 base = stack_pointer_rtx;
3230 off = current_frame_info.total_size - cfa_off;
3231 }
3232
5c255b57 3233 add_reg_note (insn, REG_CFA_OFFSET,
f7df4a84 3234 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
0a81f074
RS
3235 plus_constant (Pmode,
3236 base, off)),
bbbbb16a 3237 frame_reg));
c65ebc55
JW
3238 }
3239}
3240
97e242b0 3241static void
9c808aad 3242do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
97e242b0 3243{
703cf211 3244 int iter = spill_fill_data.next_iter;
dd3d2b35 3245 rtx_insn *insn;
703cf211
BS
3246
3247 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3248 GEN_INT (cfa_off)));
3249 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
3250}
3251
870f9ec0
RH
3252/* Wrapper functions that discards the CONST_INT spill offset. These
3253 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 3254 use a consistent function interface. */
870f9ec0
RH
3255
3256static rtx
9c808aad 3257gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3258{
3259 return gen_movdi (dest, src);
3260}
3261
3262static rtx
9c808aad 3263gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3264{
3265 return gen_fr_spill (dest, src);
3266}
3267
3268static rtx
9c808aad 3269gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3270{
3271 return gen_fr_restore (dest, src);
3272}
c65ebc55 3273
7b84aac0
EB
3274#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3275
3276/* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3277#define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3278
3279/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
0dca9cd8
EB
3280 inclusive. These are offsets from the current stack pointer. BS_SIZE
3281 is the size of the backing store. ??? This clobbers r2 and r3. */
7b84aac0
EB
3282
3283static void
0dca9cd8
EB
3284ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3285 int bs_size)
7b84aac0 3286{
7b84aac0
EB
3287 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3288 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
0dca9cd8
EB
3289 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3290
3291 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3292 of the Register Stack Engine. We also need to probe it after checking
3293 that the 2 stacks don't overlap. */
3294 emit_insn (gen_bsp_value (r3));
3295 emit_move_insn (r2, GEN_INT (-(first + size)));
3296
3297 /* Compare current value of BSP and SP registers. */
f7df4a84
RS
3298 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3299 r3, stack_pointer_rtx)));
0dca9cd8
EB
3300
3301 /* Compute the address of the probe for the Backing Store (which grows
3302 towards higher addresses). We probe only at the first offset of
3303 the next page because some OS (eg Linux/ia64) only extend the
3304 backing store when this specific address is hit (but generate a SEGV
3305 on other address). Page size is the worst case (4KB). The reserve
3306 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3307 Also compute the address of the last probe for the memory stack
3308 (which grows towards lower addresses). */
f7df4a84
RS
3309 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3310 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
0dca9cd8
EB
3311
3312 /* Compare them and raise SEGV if the former has topped the latter. */
3313 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3314 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
f7df4a84
RS
3315 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3316 r3, r2))));
3317 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
0dca9cd8
EB
3318 const0_rtx),
3319 const0_rtx));
3320 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3321 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3322 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3323 GEN_INT (11))));
7b84aac0
EB
3324
3325 /* Probe the Backing Store if necessary. */
3326 if (bs_size > 0)
3327 emit_stack_probe (r3);
3328
3329 /* Probe the memory stack if necessary. */
3330 if (size == 0)
3331 ;
3332
3333 /* See if we have a constant small number of probes to generate. If so,
3334 that's the easy case. */
3335 else if (size <= PROBE_INTERVAL)
3336 emit_stack_probe (r2);
3337
73866e0d 3338 /* The run-time loop is made up of 9 insns in the generic case while this
7b84aac0
EB
3339 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3340 else if (size <= 4 * PROBE_INTERVAL)
3341 {
3342 HOST_WIDE_INT i;
3343
3344 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
f7df4a84 3345 emit_insn (gen_rtx_SET (r2,
7b84aac0
EB
3346 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3347 emit_stack_probe (r2);
3348
3349 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3350 it exceeds SIZE. If only two probes are needed, this will not
3351 generate any code. Then probe at FIRST + SIZE. */
3352 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3353 {
f7df4a84 3354 emit_insn (gen_rtx_SET (r2,
f65e3801 3355 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
7b84aac0
EB
3356 emit_stack_probe (r2);
3357 }
3358
f7df4a84 3359 emit_insn (gen_rtx_SET (r2,
f65e3801 3360 plus_constant (Pmode, r2,
7b84aac0
EB
3361 (i - PROBE_INTERVAL) - size)));
3362 emit_stack_probe (r2);
3363 }
3364
3365 /* Otherwise, do the same as above, but in a loop. Note that we must be
3366 extra careful with variables wrapping around because we might be at
3367 the very top (or the very bottom) of the address space and we have
3368 to be able to handle this case properly; in particular, we use an
3369 equality test for the loop condition. */
3370 else
3371 {
3372 HOST_WIDE_INT rounded_size;
3373
3374 emit_move_insn (r2, GEN_INT (-first));
3375
3376
3377 /* Step 1: round SIZE to the previous multiple of the interval. */
3378
3379 rounded_size = size & -PROBE_INTERVAL;
3380
3381
3382 /* Step 2: compute initial and final value of the loop counter. */
3383
3384 /* TEST_ADDR = SP + FIRST. */
f7df4a84 3385 emit_insn (gen_rtx_SET (r2,
7b84aac0
EB
3386 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3387
3388 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3389 if (rounded_size > (1 << 21))
3390 {
3391 emit_move_insn (r3, GEN_INT (-rounded_size));
f7df4a84 3392 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
7b84aac0
EB
3393 }
3394 else
f7df4a84
RS
3395 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3396 GEN_INT (-rounded_size))));
7b84aac0
EB
3397
3398
3399 /* Step 3: the loop
3400
73866e0d 3401 do
7b84aac0
EB
3402 {
3403 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3404 probe at TEST_ADDR
3405 }
73866e0d 3406 while (TEST_ADDR != LAST_ADDR)
7b84aac0
EB
3407
3408 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3409 until it is equal to ROUNDED_SIZE. */
3410
3411 emit_insn (gen_probe_stack_range (r2, r2, r3));
3412
3413
3414 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3415 that SIZE is equal to ROUNDED_SIZE. */
3416
3417 /* TEMP = SIZE - ROUNDED_SIZE. */
3418 if (size != rounded_size)
3419 {
f7df4a84
RS
3420 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3421 rounded_size - size)));
7b84aac0
EB
3422 emit_stack_probe (r2);
3423 }
3424 }
3425
3426 /* Make sure nothing is scheduled before we are done. */
3427 emit_insn (gen_blockage ());
3428}
3429
3430/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3431 absolute addresses. */
3432
3433const char *
3434output_probe_stack_range (rtx reg1, rtx reg2)
3435{
3436 static int labelno = 0;
73866e0d 3437 char loop_lab[32];
7b84aac0
EB
3438 rtx xops[3];
3439
73866e0d 3440 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7b84aac0 3441
73866e0d 3442 /* Loop. */
7b84aac0
EB
3443 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3444
7b84aac0 3445 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
73866e0d 3446 xops[0] = reg1;
7b84aac0
EB
3447 xops[1] = GEN_INT (-PROBE_INTERVAL);
3448 output_asm_insn ("addl %0 = %1, %0", xops);
3449 fputs ("\t;;\n", asm_out_file);
3450
73866e0d 3451 /* Probe at TEST_ADDR. */
7b84aac0 3452 output_asm_insn ("probe.w.fault %0, 0", xops);
73866e0d
EB
3453
3454 /* Test if TEST_ADDR == LAST_ADDR. */
3455 xops[1] = reg2;
3456 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3457 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3458
3459 /* Branch. */
3460 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
7b84aac0
EB
3461 assemble_name_raw (asm_out_file, loop_lab);
3462 fputc ('\n', asm_out_file);
3463
7b84aac0
EB
3464 return "";
3465}
3466
c65ebc55
JW
3467/* Called after register allocation to add any instructions needed for the
3468 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 3469 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3470 to intermix instructions with the saves of the caller saved registers. In
3471 some cases, it might be necessary to emit a barrier instruction as the last
3472 insn to prevent such scheduling.
3473
3474 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
3475 so that the debug info generation code can handle them properly.
3476
073a8998 3477 The register save area is laid out like so:
97e242b0
RH
3478 cfa+16
3479 [ varargs spill area ]
3480 [ fr register spill area ]
3481 [ br register spill area ]
3482 [ ar register spill area ]
3483 [ pr register spill area ]
3484 [ gr register spill area ] */
c65ebc55
JW
3485
3486/* ??? Get inefficient code when the frame size is larger than can fit in an
3487 adds instruction. */
3488
c65ebc55 3489void
9c808aad 3490ia64_expand_prologue (void)
c65ebc55 3491{
dd3d2b35
DM
3492 rtx_insn *insn;
3493 rtx ar_pfs_save_reg, ar_unat_save_reg;
97e242b0
RH
3494 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3495 rtx reg, alt_reg;
3496
3497 ia64_compute_frame_size (get_frame_size ());
3498 last_scratch_gr_reg = 15;
3499
a11e0df4 3500 if (flag_stack_usage_info)
d3c12306
EB
3501 current_function_static_stack_size = current_frame_info.total_size;
3502
9c1b56c4
JL
3503 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
3504 || flag_stack_clash_protection)
0dca9cd8
EB
3505 {
3506 HOST_WIDE_INT size = current_frame_info.total_size;
3507 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3508 + current_frame_info.n_local_regs);
3509
3510 if (crtl->is_leaf && !cfun->calls_alloca)
3511 {
8c1dd970
JL
3512 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
3513 ia64_emit_probe_stack_range (get_stack_check_protect (),
3514 size - get_stack_check_protect (),
0dca9cd8 3515 bs_size);
8c1dd970
JL
3516 else if (size + bs_size > get_stack_check_protect ())
3517 ia64_emit_probe_stack_range (get_stack_check_protect (),
3518 0, bs_size);
0dca9cd8
EB
3519 }
3520 else if (size + bs_size > 0)
8c1dd970 3521 ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
0dca9cd8 3522 }
7b84aac0 3523
6fb5fa3c
DB
3524 if (dump_file)
3525 {
3526 fprintf (dump_file, "ia64 frame related registers "
3527 "recorded in current_frame_info.r[]:\n");
3528#define PRINTREG(a) if (current_frame_info.r[a]) \
3529 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3530 PRINTREG(reg_fp);
3531 PRINTREG(reg_save_b0);
3532 PRINTREG(reg_save_pr);
3533 PRINTREG(reg_save_ar_pfs);
3534 PRINTREG(reg_save_ar_unat);
3535 PRINTREG(reg_save_ar_lc);
3536 PRINTREG(reg_save_gp);
3537#undef PRINTREG
3538 }
3539
97e242b0
RH
3540 /* If there is no epilogue, then we don't need some prologue insns.
3541 We need to avoid emitting the dead prologue insns, because flow
3542 will complain about them. */
c65ebc55
JW
3543 if (optimize)
3544 {
97e242b0 3545 edge e;
9924d7d8 3546 edge_iterator ei;
97e242b0 3547
fefa31b5 3548 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
c65ebc55
JW
3549 if ((e->flags & EDGE_FAKE) == 0
3550 && (e->flags & EDGE_FALLTHRU) != 0)
3551 break;
3552 epilogue_p = (e != NULL);
3553 }
3554 else
3555 epilogue_p = 1;
3556
97e242b0
RH
3557 /* Set the local, input, and output register names. We need to do this
3558 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3559 half. If we use in/loc/out register names, then we get assembler errors
3560 in crtn.S because there is no alloc insn or regstk directive in there. */
3561 if (! TARGET_REG_NAMES)
3562 {
3563 int inputs = current_frame_info.n_input_regs;
3564 int locals = current_frame_info.n_local_regs;
3565 int outputs = current_frame_info.n_output_regs;
3566
3567 for (i = 0; i < inputs; i++)
3568 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3569 for (i = 0; i < locals; i++)
3570 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3571 for (i = 0; i < outputs; i++)
3572 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3573 }
c65ebc55 3574
97e242b0
RH
3575 /* Set the frame pointer register name. The regnum is logically loc79,
3576 but of course we'll not have allocated that many locals. Rather than
3577 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
3578 /* ??? This code means that we can never use one local register when
3579 there is a frame pointer. loc79 gets wasted in this case, as it is
3580 renamed to a register that will never be used. See also the try_locals
3581 code in find_gr_spill. */
6fb5fa3c 3582 if (current_frame_info.r[reg_fp])
97e242b0
RH
3583 {
3584 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3585 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3586 = reg_names[current_frame_info.r[reg_fp]];
3587 reg_names[current_frame_info.r[reg_fp]] = tmp;
97e242b0 3588 }
c65ebc55 3589
97e242b0
RH
3590 /* We don't need an alloc instruction if we've used no outputs or locals. */
3591 if (current_frame_info.n_local_regs == 0
2ed4af6f 3592 && current_frame_info.n_output_regs == 0
38173d38 3593 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
f5bdba44 3594 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
3595 {
3596 /* If there is no alloc, but there are input registers used, then we
3597 need a .regstk directive. */
3598 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3599 ar_pfs_save_reg = NULL_RTX;
3600 }
3601 else
3602 {
3603 current_frame_info.need_regstk = 0;
c65ebc55 3604
6fb5fa3c
DB
3605 if (current_frame_info.r[reg_save_ar_pfs])
3606 {
3607 regno = current_frame_info.r[reg_save_ar_pfs];
3608 reg_emitted (reg_save_ar_pfs);
3609 }
97e242b0
RH
3610 else
3611 regno = next_scratch_gr_reg ();
3612 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3613
9c808aad 3614 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
97e242b0
RH
3615 GEN_INT (current_frame_info.n_input_regs),
3616 GEN_INT (current_frame_info.n_local_regs),
3617 GEN_INT (current_frame_info.n_output_regs),
3618 GEN_INT (current_frame_info.n_rotate_regs)));
9f2ff8e5
RH
3619 if (current_frame_info.r[reg_save_ar_pfs])
3620 {
3621 RTX_FRAME_RELATED_P (insn) = 1;
3622 add_reg_note (insn, REG_CFA_REGISTER,
f7df4a84 3623 gen_rtx_SET (ar_pfs_save_reg,
9f2ff8e5
RH
3624 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3625 }
97e242b0 3626 }
c65ebc55 3627
97e242b0 3628 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 3629
26a110f5 3630 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
3631 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3632 stack_pointer_rtx, 0);
c65ebc55 3633
97e242b0
RH
3634 if (frame_pointer_needed)
3635 {
3636 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3637 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57
RH
3638
3639 /* Force the unwind info to recognize this as defining a new CFA,
3640 rather than some temp register setup. */
3641 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
97e242b0 3642 }
c65ebc55 3643
97e242b0
RH
3644 if (current_frame_info.total_size != 0)
3645 {
3646 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3647 rtx offset;
c65ebc55 3648
13f70342 3649 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3650 offset = frame_size_rtx;
3651 else
3652 {
3653 regno = next_scratch_gr_reg ();
9c808aad 3654 offset = gen_rtx_REG (DImode, regno);
97e242b0
RH
3655 emit_move_insn (offset, frame_size_rtx);
3656 }
c65ebc55 3657
97e242b0
RH
3658 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3659 stack_pointer_rtx, offset));
c65ebc55 3660
97e242b0
RH
3661 if (! frame_pointer_needed)
3662 {
3663 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3664 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 3665 gen_rtx_SET (stack_pointer_rtx,
5c255b57
RH
3666 gen_rtx_PLUS (DImode,
3667 stack_pointer_rtx,
3668 frame_size_rtx)));
97e242b0 3669 }
c65ebc55 3670
97e242b0
RH
3671 /* ??? At this point we must generate a magic insn that appears to
3672 modify the stack pointer, the frame pointer, and all spill
3673 iterators. This would allow the most scheduling freedom. For
3674 now, just hard stop. */
3675 emit_insn (gen_blockage ());
3676 }
c65ebc55 3677
97e242b0
RH
3678 /* Must copy out ar.unat before doing any integer spills. */
3679 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 3680 {
6fb5fa3c
DB
3681 if (current_frame_info.r[reg_save_ar_unat])
3682 {
3683 ar_unat_save_reg
3684 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3685 reg_emitted (reg_save_ar_unat);
3686 }
97e242b0 3687 else
c65ebc55 3688 {
97e242b0
RH
3689 alt_regno = next_scratch_gr_reg ();
3690 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3691 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 3692 }
c65ebc55 3693
97e242b0
RH
3694 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3695 insn = emit_move_insn (ar_unat_save_reg, reg);
5c255b57
RH
3696 if (current_frame_info.r[reg_save_ar_unat])
3697 {
3698 RTX_FRAME_RELATED_P (insn) = 1;
3699 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3700 }
97e242b0
RH
3701
3702 /* Even if we're not going to generate an epilogue, we still
3703 need to save the register so that EH works. */
6fb5fa3c 3704 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
d0e82870 3705 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
3706 }
3707 else
97e242b0
RH
3708 ar_unat_save_reg = NULL_RTX;
3709
3710 /* Spill all varargs registers. Do this before spilling any GR registers,
3711 since we want the UNAT bits for the GR registers to override the UNAT
3712 bits from varargs, which we don't care about. */
c65ebc55 3713
97e242b0
RH
3714 cfa_off = -16;
3715 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 3716 {
97e242b0 3717 reg = gen_rtx_REG (DImode, regno);
870f9ec0 3718 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 3719 }
c65ebc55 3720
97e242b0
RH
3721 /* Locate the bottom of the register save area. */
3722 cfa_off = (current_frame_info.spill_cfa_off
3723 + current_frame_info.spill_size
3724 + current_frame_info.extra_spill_size);
c65ebc55 3725
97e242b0
RH
3726 /* Save the predicate register block either in a register or in memory. */
3727 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3728 {
3729 reg = gen_rtx_REG (DImode, PR_REG (0));
6fb5fa3c 3730 if (current_frame_info.r[reg_save_pr] != 0)
1ff5b671 3731 {
6fb5fa3c
DB
3732 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3733 reg_emitted (reg_save_pr);
97e242b0 3734 insn = emit_move_insn (alt_reg, reg);
1ff5b671 3735
97e242b0
RH
3736 /* ??? Denote pr spill/fill by a DImode move that modifies all
3737 64 hard registers. */
1ff5b671 3738 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3739 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
46327bc5 3740
97e242b0
RH
3741 /* Even if we're not going to generate an epilogue, we still
3742 need to save the register so that EH works. */
3743 if (! epilogue_p)
d0e82870 3744 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
3745 }
3746 else
97e242b0
RH
3747 {
3748 alt_regno = next_scratch_gr_reg ();
3749 alt_reg = gen_rtx_REG (DImode, alt_regno);
3750 insn = emit_move_insn (alt_reg, reg);
870f9ec0 3751 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3752 cfa_off -= 8;
3753 }
c65ebc55
JW
3754 }
3755
97e242b0
RH
3756 /* Handle AR regs in numerical order. All of them get special handling. */
3757 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6fb5fa3c 3758 && current_frame_info.r[reg_save_ar_unat] == 0)
c65ebc55 3759 {
97e242b0 3760 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 3761 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 3762 cfa_off -= 8;
c65ebc55 3763 }
97e242b0
RH
3764
3765 /* The alloc insn already copied ar.pfs into a general register. The
3766 only thing we have to do now is copy that register to a stack slot
3767 if we'd not allocated a local register for the job. */
f5bdba44 3768 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
6fb5fa3c 3769 && current_frame_info.r[reg_save_ar_pfs] == 0)
c65ebc55 3770 {
97e242b0 3771 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 3772 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
3773 cfa_off -= 8;
3774 }
3775
3776 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3777 {
3778 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
6fb5fa3c 3779 if (current_frame_info.r[reg_save_ar_lc] != 0)
97e242b0 3780 {
6fb5fa3c
DB
3781 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3782 reg_emitted (reg_save_ar_lc);
97e242b0
RH
3783 insn = emit_move_insn (alt_reg, reg);
3784 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3785 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
97e242b0
RH
3786
3787 /* Even if we're not going to generate an epilogue, we still
3788 need to save the register so that EH works. */
3789 if (! epilogue_p)
d0e82870 3790 emit_insn (gen_prologue_use (alt_reg));
97e242b0 3791 }
c65ebc55
JW
3792 else
3793 {
97e242b0
RH
3794 alt_regno = next_scratch_gr_reg ();
3795 alt_reg = gen_rtx_REG (DImode, alt_regno);
3796 emit_move_insn (alt_reg, reg);
870f9ec0 3797 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3798 cfa_off -= 8;
3799 }
3800 }
3801
ae1e2d4c
AS
3802 /* Save the return pointer. */
3803 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3804 {
3805 reg = gen_rtx_REG (DImode, BR_REG (0));
6fb5fa3c 3806 if (current_frame_info.r[reg_save_b0] != 0)
ae1e2d4c 3807 {
6fb5fa3c
DB
3808 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3809 reg_emitted (reg_save_b0);
ae1e2d4c
AS
3810 insn = emit_move_insn (alt_reg, reg);
3811 RTX_FRAME_RELATED_P (insn) = 1;
f7df4a84 3812 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
ae1e2d4c
AS
3813
3814 /* Even if we're not going to generate an epilogue, we still
3815 need to save the register so that EH works. */
3816 if (! epilogue_p)
3817 emit_insn (gen_prologue_use (alt_reg));
3818 }
3819 else
3820 {
3821 alt_regno = next_scratch_gr_reg ();
3822 alt_reg = gen_rtx_REG (DImode, alt_regno);
3823 emit_move_insn (alt_reg, reg);
3824 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3825 cfa_off -= 8;
3826 }
3827 }
3828
6fb5fa3c 3829 if (current_frame_info.r[reg_save_gp])
599aedd9 3830 {
6fb5fa3c 3831 reg_emitted (reg_save_gp);
599aedd9 3832 insn = emit_move_insn (gen_rtx_REG (DImode,
6fb5fa3c 3833 current_frame_info.r[reg_save_gp]),
599aedd9 3834 pic_offset_table_rtx);
599aedd9
RH
3835 }
3836
97e242b0 3837 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3838 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3839 + current_frame_info.spill_size));
97e242b0
RH
3840
3841 /* Spill all general registers. */
3842 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3843 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3844 {
3845 reg = gen_rtx_REG (DImode, regno);
3846 do_spill (gen_gr_spill, reg, cfa_off, reg);
3847 cfa_off -= 8;
3848 }
3849
97e242b0
RH
3850 /* Spill the rest of the BR registers. */
3851 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3852 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3853 {
3854 alt_regno = next_scratch_gr_reg ();
3855 alt_reg = gen_rtx_REG (DImode, alt_regno);
3856 reg = gen_rtx_REG (DImode, regno);
3857 emit_move_insn (alt_reg, reg);
870f9ec0 3858 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3859 cfa_off -= 8;
3860 }
3861
3862 /* Align the frame and spill all FR registers. */
3863 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3864 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3865 {
e820471b 3866 gcc_assert (!(cfa_off & 15));
02befdf4 3867 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3868 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
3869 cfa_off -= 16;
3870 }
3871
e820471b 3872 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3873
3874 finish_spill_pointers ();
c65ebc55
JW
3875}
3876
8e7745dc
DR
3877/* Output the textual info surrounding the prologue. */
3878
3879void
3880ia64_start_function (FILE *file, const char *fnname,
3881 tree decl ATTRIBUTE_UNUSED)
3882{
4b12e93d
TG
3883#if TARGET_ABI_OPEN_VMS
3884 vms_start_function (fnname);
8e7745dc
DR
3885#endif
3886
3887 fputs ("\t.proc ", file);
3888 assemble_name (file, fnname);
3889 fputc ('\n', file);
3890 ASM_OUTPUT_LABEL (file, fnname);
3891}
3892
c65ebc55 3893/* Called after register allocation to add any instructions needed for the
5519a4f9 3894 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 3895 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3896 to intermix instructions with the saves of the caller saved registers. In
3897 some cases, it might be necessary to emit a barrier instruction as the last
3898 insn to prevent such scheduling. */
3899
3900void
9c808aad 3901ia64_expand_epilogue (int sibcall_p)
c65ebc55 3902{
dd3d2b35
DM
3903 rtx_insn *insn;
3904 rtx reg, alt_reg, ar_unat_save_reg;
97e242b0
RH
3905 int regno, alt_regno, cfa_off;
3906
3907 ia64_compute_frame_size (get_frame_size ());
3908
3909 /* If there is a frame pointer, then we use it instead of the stack
3910 pointer, so that the stack pointer does not need to be valid when
3911 the epilogue starts. See EXIT_IGNORE_STACK. */
3912 if (frame_pointer_needed)
3913 setup_spill_pointers (current_frame_info.n_spilled,
3914 hard_frame_pointer_rtx, 0);
3915 else
9c808aad 3916 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
97e242b0
RH
3917 current_frame_info.total_size);
3918
3919 if (current_frame_info.total_size != 0)
3920 {
3921 /* ??? At this point we must generate a magic insn that appears to
3922 modify the spill iterators and the frame pointer. This would
3923 allow the most scheduling freedom. For now, just hard stop. */
3924 emit_insn (gen_blockage ());
3925 }
3926
3927 /* Locate the bottom of the register save area. */
3928 cfa_off = (current_frame_info.spill_cfa_off
3929 + current_frame_info.spill_size
3930 + current_frame_info.extra_spill_size);
3931
3932 /* Restore the predicate registers. */
3933 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3934 {
6fb5fa3c
DB
3935 if (current_frame_info.r[reg_save_pr] != 0)
3936 {
3937 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3938 reg_emitted (reg_save_pr);
3939 }
97e242b0
RH
3940 else
3941 {
3942 alt_regno = next_scratch_gr_reg ();
3943 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3944 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3945 cfa_off -= 8;
3946 }
3947 reg = gen_rtx_REG (DImode, PR_REG (0));
3948 emit_move_insn (reg, alt_reg);
3949 }
3950
3951 /* Restore the application registers. */
3952
3953 /* Load the saved unat from the stack, but do not restore it until
3954 after the GRs have been restored. */
3955 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3956 {
6fb5fa3c
DB
3957 if (current_frame_info.r[reg_save_ar_unat] != 0)
3958 {
3959 ar_unat_save_reg
3960 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3961 reg_emitted (reg_save_ar_unat);
3962 }
97e242b0
RH
3963 else
3964 {
3965 alt_regno = next_scratch_gr_reg ();
3966 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3967 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 3968 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
3969 cfa_off -= 8;
3970 }
3971 }
3972 else
3973 ar_unat_save_reg = NULL_RTX;
9c808aad 3974
6fb5fa3c 3975 if (current_frame_info.r[reg_save_ar_pfs] != 0)
97e242b0 3976 {
6fb5fa3c
DB
3977 reg_emitted (reg_save_ar_pfs);
3978 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
97e242b0
RH
3979 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3980 emit_move_insn (reg, alt_reg);
3981 }
4e14f1f9 3982 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
c65ebc55 3983 {
97e242b0
RH
3984 alt_regno = next_scratch_gr_reg ();
3985 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3986 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3987 cfa_off -= 8;
3988 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3989 emit_move_insn (reg, alt_reg);
3990 }
3991
3992 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3993 {
6fb5fa3c
DB
3994 if (current_frame_info.r[reg_save_ar_lc] != 0)
3995 {
3996 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3997 reg_emitted (reg_save_ar_lc);
3998 }
97e242b0
RH
3999 else
4000 {
4001 alt_regno = next_scratch_gr_reg ();
4002 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 4003 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
4004 cfa_off -= 8;
4005 }
4006 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4007 emit_move_insn (reg, alt_reg);
4008 }
4009
ae1e2d4c
AS
4010 /* Restore the return pointer. */
4011 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4012 {
6fb5fa3c
DB
4013 if (current_frame_info.r[reg_save_b0] != 0)
4014 {
4015 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4016 reg_emitted (reg_save_b0);
4017 }
ae1e2d4c
AS
4018 else
4019 {
4020 alt_regno = next_scratch_gr_reg ();
4021 alt_reg = gen_rtx_REG (DImode, alt_regno);
4022 do_restore (gen_movdi_x, alt_reg, cfa_off);
4023 cfa_off -= 8;
4024 }
4025 reg = gen_rtx_REG (DImode, BR_REG (0));
4026 emit_move_insn (reg, alt_reg);
4027 }
4028
97e242b0 4029 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
4030 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4031 + current_frame_info.spill_size));
97e242b0 4032
599aedd9
RH
4033 /* The GP may be stored on the stack in the prologue, but it's
4034 never restored in the epilogue. Skip the stack slot. */
4035 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4036 cfa_off -= 8;
4037
97e242b0 4038 /* Restore all general registers. */
599aedd9 4039 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 4040 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 4041 {
97e242b0
RH
4042 reg = gen_rtx_REG (DImode, regno);
4043 do_restore (gen_gr_restore, reg, cfa_off);
4044 cfa_off -= 8;
0c96007e 4045 }
9c808aad 4046
ae1e2d4c 4047 /* Restore the branch registers. */
97e242b0
RH
4048 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4049 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 4050 {
97e242b0
RH
4051 alt_regno = next_scratch_gr_reg ();
4052 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 4053 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
4054 cfa_off -= 8;
4055 reg = gen_rtx_REG (DImode, regno);
4056 emit_move_insn (reg, alt_reg);
4057 }
c65ebc55 4058
97e242b0
RH
4059 /* Restore floating point registers. */
4060 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4061 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4062 {
e820471b 4063 gcc_assert (!(cfa_off & 15));
02befdf4 4064 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 4065 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 4066 cfa_off -= 16;
0c96007e 4067 }
97e242b0
RH
4068
4069 /* Restore ar.unat for real. */
4070 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4071 {
4072 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4073 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
4074 }
4075
e820471b 4076 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
4077
4078 finish_spill_pointers ();
c65ebc55 4079
c93646bd
JJ
4080 if (current_frame_info.total_size
4081 || cfun->machine->ia64_eh_epilogue_sp
4082 || frame_pointer_needed)
97e242b0
RH
4083 {
4084 /* ??? At this point we must generate a magic insn that appears to
4085 modify the spill iterators, the stack pointer, and the frame
4086 pointer. This would allow the most scheduling freedom. For now,
4087 just hard stop. */
4088 emit_insn (gen_blockage ());
4089 }
c65ebc55 4090
97e242b0
RH
4091 if (cfun->machine->ia64_eh_epilogue_sp)
4092 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4093 else if (frame_pointer_needed)
4094 {
4095 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4096 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 4097 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
97e242b0
RH
4098 }
4099 else if (current_frame_info.total_size)
0c96007e 4100 {
97e242b0
RH
4101 rtx offset, frame_size_rtx;
4102
4103 frame_size_rtx = GEN_INT (current_frame_info.total_size);
13f70342 4104 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
4105 offset = frame_size_rtx;
4106 else
4107 {
4108 regno = next_scratch_gr_reg ();
4109 offset = gen_rtx_REG (DImode, regno);
4110 emit_move_insn (offset, frame_size_rtx);
4111 }
4112
4113 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4114 offset));
4115
4116 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 4117 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 4118 gen_rtx_SET (stack_pointer_rtx,
5c255b57
RH
4119 gen_rtx_PLUS (DImode,
4120 stack_pointer_rtx,
4121 frame_size_rtx)));
0c96007e 4122 }
97e242b0
RH
4123
4124 if (cfun->machine->ia64_eh_epilogue_bsp)
4125 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
9c808aad 4126
2ed4af6f
RH
4127 if (! sibcall_p)
4128 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 4129 else
8206fc89
AM
4130 {
4131 int fp = GR_REG (2);
5c255b57
RH
4132 /* We need a throw away register here, r0 and r1 are reserved,
4133 so r2 is the first available call clobbered register. If
4134 there was a frame_pointer register, we may have swapped the
4135 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4136 sure we're using the string "r2" when emitting the register
4137 name for the assembler. */
6fb5fa3c
DB
4138 if (current_frame_info.r[reg_fp]
4139 && current_frame_info.r[reg_fp] == GR_REG (2))
8206fc89
AM
4140 fp = HARD_FRAME_POINTER_REGNUM;
4141
4142 /* We must emit an alloc to force the input registers to become output
4143 registers. Otherwise, if the callee tries to pass its parameters
4144 through to another call without an intervening alloc, then these
4145 values get lost. */
4146 /* ??? We don't need to preserve all input registers. We only need to
4147 preserve those input registers used as arguments to the sibling call.
4148 It is unclear how to compute that number here. */
4149 if (current_frame_info.n_input_regs != 0)
a8f5224e
DM
4150 {
4151 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
c2b40eba 4152
a8f5224e
DM
4153 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4154 const0_rtx, const0_rtx,
4155 n_inputs, const0_rtx));
4156 RTX_FRAME_RELATED_P (insn) = 1;
c2b40eba
RH
4157
4158 /* ??? We need to mark the alloc as frame-related so that it gets
4159 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4160 But there's nothing dwarf2 related to be done wrt the register
4161 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4162 the empty parallel means dwarf2out will not see anything. */
4163 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4164 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
a8f5224e 4165 }
8206fc89 4166 }
c65ebc55
JW
4167}
4168
97e242b0
RH
4169/* Return 1 if br.ret can do all the work required to return from a
4170 function. */
4171
4172int
9c808aad 4173ia64_direct_return (void)
97e242b0
RH
4174{
4175 if (reload_completed && ! frame_pointer_needed)
4176 {
4177 ia64_compute_frame_size (get_frame_size ());
4178
4179 return (current_frame_info.total_size == 0
4180 && current_frame_info.n_spilled == 0
6fb5fa3c
DB
4181 && current_frame_info.r[reg_save_b0] == 0
4182 && current_frame_info.r[reg_save_pr] == 0
4183 && current_frame_info.r[reg_save_ar_pfs] == 0
4184 && current_frame_info.r[reg_save_ar_unat] == 0
4185 && current_frame_info.r[reg_save_ar_lc] == 0);
97e242b0
RH
4186 }
4187 return 0;
4188}
4189
af1e5518
RH
4190/* Return the magic cookie that we use to hold the return address
4191 during early compilation. */
4192
4193rtx
9c808aad 4194ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
af1e5518
RH
4195{
4196 if (count != 0)
4197 return NULL;
4198 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4199}
4200
4201/* Split this value after reload, now that we know where the return
4202 address is saved. */
4203
4204void
9c808aad 4205ia64_split_return_addr_rtx (rtx dest)
af1e5518
RH
4206{
4207 rtx src;
4208
4209 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4210 {
6fb5fa3c
DB
4211 if (current_frame_info.r[reg_save_b0] != 0)
4212 {
4213 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4214 reg_emitted (reg_save_b0);
4215 }
af1e5518
RH
4216 else
4217 {
4218 HOST_WIDE_INT off;
4219 unsigned int regno;
13f70342 4220 rtx off_r;
af1e5518
RH
4221
4222 /* Compute offset from CFA for BR0. */
4223 /* ??? Must be kept in sync with ia64_expand_prologue. */
4224 off = (current_frame_info.spill_cfa_off
4225 + current_frame_info.spill_size);
4226 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4227 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4228 off -= 8;
4229
4230 /* Convert CFA offset to a register based offset. */
4231 if (frame_pointer_needed)
4232 src = hard_frame_pointer_rtx;
4233 else
4234 {
4235 src = stack_pointer_rtx;
4236 off += current_frame_info.total_size;
4237 }
4238
4239 /* Load address into scratch register. */
13f70342
RH
4240 off_r = GEN_INT (off);
4241 if (satisfies_constraint_I (off_r))
4242 emit_insn (gen_adddi3 (dest, src, off_r));
af1e5518
RH
4243 else
4244 {
13f70342 4245 emit_move_insn (dest, off_r);
af1e5518
RH
4246 emit_insn (gen_adddi3 (dest, src, dest));
4247 }
4248
4249 src = gen_rtx_MEM (Pmode, dest);
4250 }
4251 }
4252 else
4253 src = gen_rtx_REG (DImode, BR_REG (0));
4254
4255 emit_move_insn (dest, src);
4256}
4257
10c9f189 4258int
9c808aad 4259ia64_hard_regno_rename_ok (int from, int to)
10c9f189
RH
4260{
4261 /* Don't clobber any of the registers we reserved for the prologue. */
09639a83 4262 unsigned int r;
10c9f189 4263
6fb5fa3c
DB
4264 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4265 if (to == current_frame_info.r[r]
4266 || from == current_frame_info.r[r]
4267 || to == emitted_frame_related_regs[r]
4268 || from == emitted_frame_related_regs[r])
4269 return 0;
2130b7fb 4270
10c9f189
RH
4271 /* Don't use output registers outside the register frame. */
4272 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4273 return 0;
4274
4275 /* Retain even/oddness on predicate register pairs. */
4276 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4277 return (from & 1) == (to & 1);
4278
4279 return 1;
4280}
4281
c43f4279
RS
4282/* Implement TARGET_HARD_REGNO_NREGS.
4283
4284 ??? We say that BImode PR values require two registers. This allows us to
4285 easily store the normal and inverted values. We use CCImode to indicate
4286 a single predicate register. */
4287
4288static unsigned int
4289ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
4290{
4291 if (regno == PR_REG (0) && mode == DImode)
4292 return 64;
4293 if (PR_REGNO_P (regno) && (mode) == BImode)
4294 return 2;
4295 if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
4296 return 1;
4297 if (FR_REGNO_P (regno) && mode == XFmode)
4298 return 1;
4299 if (FR_REGNO_P (regno) && mode == RFmode)
4300 return 1;
4301 if (FR_REGNO_P (regno) && mode == XCmode)
4302 return 2;
4303 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
4304}
4305
f939c3e6
RS
4306/* Implement TARGET_HARD_REGNO_MODE_OK. */
4307
4308static bool
4309ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
4310{
4311 if (FR_REGNO_P (regno))
4312 return (GET_MODE_CLASS (mode) != MODE_CC
4313 && mode != BImode
4314 && mode != TFmode);
4315
4316 if (PR_REGNO_P (regno))
4317 return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
4318
4319 if (GR_REGNO_P (regno))
4320 return mode != XFmode && mode != XCmode && mode != RFmode;
4321
4322 if (AR_REGNO_P (regno))
4323 return mode == DImode;
4324
4325 if (BR_REGNO_P (regno))
4326 return mode == DImode;
4327
4328 return false;
4329}
4330
99e1629f
RS
4331/* Implement TARGET_MODES_TIEABLE_P.
4332
4333 Don't tie integer and FP modes, as that causes us to get integer registers
4334 allocated for FP instructions. XFmode only supported in FP registers so
4335 we can't tie it with any other modes. */
4336
4337static bool
4338ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
4339{
4340 return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
4341 && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
4342 == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
4343 && (mode1 == BImode) == (mode2 == BImode));
4344}
4345
301d03af
RS
4346/* Target hook for assembling integer objects. Handle word-sized
4347 aligned objects and detect the cases when @fptr is needed. */
4348
4349static bool
9c808aad 4350ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 4351{
b6a41a62 4352 if (size == POINTER_SIZE / BITS_PER_UNIT
301d03af
RS
4353 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4354 && GET_CODE (x) == SYMBOL_REF
1cdbd630 4355 && SYMBOL_REF_FUNCTION_P (x))
301d03af 4356 {
1b79dc38
DM
4357 static const char * const directive[2][2] = {
4358 /* 64-bit pointer */ /* 32-bit pointer */
4359 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4360 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4361 };
4362 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
301d03af
RS
4363 output_addr_const (asm_out_file, x);
4364 fputs (")\n", asm_out_file);
4365 return true;
4366 }
4367 return default_assemble_integer (x, size, aligned_p);
4368}
4369
c65ebc55
JW
4370/* Emit the function prologue. */
4371
08c148a8 4372static void
42776416 4373ia64_output_function_prologue (FILE *file)
c65ebc55 4374{
97e242b0
RH
4375 int mask, grsave, grsave_prev;
4376
4377 if (current_frame_info.need_regstk)
4378 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4379 current_frame_info.n_input_regs,
4380 current_frame_info.n_local_regs,
4381 current_frame_info.n_output_regs,
4382 current_frame_info.n_rotate_regs);
c65ebc55 4383
d5fabb58 4384 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
0c96007e
AM
4385 return;
4386
97e242b0 4387 /* Emit the .prologue directive. */
809d4ef1 4388
97e242b0
RH
4389 mask = 0;
4390 grsave = grsave_prev = 0;
6fb5fa3c 4391 if (current_frame_info.r[reg_save_b0] != 0)
0c96007e 4392 {
97e242b0 4393 mask |= 8;
6fb5fa3c 4394 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
97e242b0 4395 }
6fb5fa3c 4396 if (current_frame_info.r[reg_save_ar_pfs] != 0
97e242b0 4397 && (grsave_prev == 0
6fb5fa3c 4398 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
97e242b0
RH
4399 {
4400 mask |= 4;
4401 if (grsave_prev == 0)
6fb5fa3c
DB
4402 grsave = current_frame_info.r[reg_save_ar_pfs];
4403 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
0c96007e 4404 }
6fb5fa3c 4405 if (current_frame_info.r[reg_fp] != 0
97e242b0 4406 && (grsave_prev == 0
6fb5fa3c 4407 || current_frame_info.r[reg_fp] == grsave_prev + 1))
97e242b0
RH
4408 {
4409 mask |= 2;
4410 if (grsave_prev == 0)
4411 grsave = HARD_FRAME_POINTER_REGNUM;
6fb5fa3c 4412 grsave_prev = current_frame_info.r[reg_fp];
97e242b0 4413 }
6fb5fa3c 4414 if (current_frame_info.r[reg_save_pr] != 0
97e242b0 4415 && (grsave_prev == 0
6fb5fa3c 4416 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
97e242b0
RH
4417 {
4418 mask |= 1;
4419 if (grsave_prev == 0)
6fb5fa3c 4420 grsave = current_frame_info.r[reg_save_pr];
97e242b0
RH
4421 }
4422
738e7b39 4423 if (mask && TARGET_GNU_AS)
97e242b0
RH
4424 fprintf (file, "\t.prologue %d, %d\n", mask,
4425 ia64_dbx_register_number (grsave));
4426 else
4427 fputs ("\t.prologue\n", file);
4428
4429 /* Emit a .spill directive, if necessary, to relocate the base of
4430 the register spill area. */
4431 if (current_frame_info.spill_cfa_off != -16)
4432 fprintf (file, "\t.spill %ld\n",
4433 (long) (current_frame_info.spill_cfa_off
4434 + current_frame_info.spill_size));
c65ebc55
JW
4435}
4436
0186257f
JW
4437/* Emit the .body directive at the scheduled end of the prologue. */
4438
b4c25db2 4439static void
9c808aad 4440ia64_output_function_end_prologue (FILE *file)
0186257f 4441{
d5fabb58 4442 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
0186257f
JW
4443 return;
4444
4445 fputs ("\t.body\n", file);
4446}
4447
c65ebc55
JW
4448/* Emit the function epilogue. */
4449
08c148a8 4450static void
42776416 4451ia64_output_function_epilogue (FILE *)
c65ebc55 4452{
8a959ea5
RH
4453 int i;
4454
6fb5fa3c 4455 if (current_frame_info.r[reg_fp])
97e242b0
RH
4456 {
4457 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4458 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
4459 = reg_names[current_frame_info.r[reg_fp]];
4460 reg_names[current_frame_info.r[reg_fp]] = tmp;
4461 reg_emitted (reg_fp);
97e242b0
RH
4462 }
4463 if (! TARGET_REG_NAMES)
4464 {
97e242b0
RH
4465 for (i = 0; i < current_frame_info.n_input_regs; i++)
4466 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4467 for (i = 0; i < current_frame_info.n_local_regs; i++)
4468 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4469 for (i = 0; i < current_frame_info.n_output_regs; i++)
4470 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4471 }
8a959ea5 4472
97e242b0
RH
4473 current_frame_info.initialized = 0;
4474}
c65ebc55
JW
4475
4476int
9c808aad 4477ia64_dbx_register_number (int regno)
c65ebc55 4478{
97e242b0
RH
4479 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4480 from its home at loc79 to something inside the register frame. We
4481 must perform the same renumbering here for the debug info. */
6fb5fa3c 4482 if (current_frame_info.r[reg_fp])
97e242b0
RH
4483 {
4484 if (regno == HARD_FRAME_POINTER_REGNUM)
6fb5fa3c
DB
4485 regno = current_frame_info.r[reg_fp];
4486 else if (regno == current_frame_info.r[reg_fp])
97e242b0
RH
4487 regno = HARD_FRAME_POINTER_REGNUM;
4488 }
4489
4490 if (IN_REGNO_P (regno))
4491 return 32 + regno - IN_REG (0);
4492 else if (LOC_REGNO_P (regno))
4493 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4494 else if (OUT_REGNO_P (regno))
4495 return (32 + current_frame_info.n_input_regs
4496 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4497 else
4498 return regno;
c65ebc55
JW
4499}
4500
2a1211e5
RH
4501/* Implement TARGET_TRAMPOLINE_INIT.
4502
4503 The trampoline should set the static chain pointer to value placed
4504 into the trampoline and should branch to the specified routine.
4505 To make the normal indirect-subroutine calling convention work,
4506 the trampoline must look like a function descriptor; the first
4507 word being the target address and the second being the target's
4508 global pointer.
4509
4510 We abuse the concept of a global pointer by arranging for it
4511 to point to the data we need to load. The complete trampoline
4512 has the following form:
4513
4514 +-------------------+ \
4515 TRAMP: | __ia64_trampoline | |
4516 +-------------------+ > fake function descriptor
4517 | TRAMP+16 | |
4518 +-------------------+ /
4519 | target descriptor |
4520 +-------------------+
4521 | static link |
4522 +-------------------+
4523*/
4524
4525static void
4526ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
97e242b0 4527{
2a1211e5
RH
4528 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4529 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
97e242b0 4530
738e7b39
RK
4531 /* The Intel assembler requires that the global __ia64_trampoline symbol
4532 be declared explicitly */
4533 if (!TARGET_GNU_AS)
4534 {
4535 static bool declared_ia64_trampoline = false;
4536
4537 if (!declared_ia64_trampoline)
4538 {
4539 declared_ia64_trampoline = true;
b6a41a62
RK
4540 (*targetm.asm_out.globalize_label) (asm_out_file,
4541 "__ia64_trampoline");
738e7b39
RK
4542 }
4543 }
4544
5e89a381 4545 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2a1211e5 4546 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
5e89a381
SE
4547 fnaddr = convert_memory_address (Pmode, fnaddr);
4548 static_chain = convert_memory_address (Pmode, static_chain);
4549
97e242b0 4550 /* Load up our iterator. */
2a1211e5
RH
4551 addr_reg = copy_to_reg (addr);
4552 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
97e242b0
RH
4553
4554 /* The first two words are the fake descriptor:
4555 __ia64_trampoline, ADDR+16. */
f2972bf8
DR
4556 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4557 if (TARGET_ABI_OPEN_VMS)
4558 {
4559 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4560 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4561 relocation against function symbols to make it identical to the
4562 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4563 strict ELF and dereference to get the bare code address. */
4564 rtx reg = gen_reg_rtx (Pmode);
4565 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4566 emit_move_insn (reg, tramp);
4567 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4568 tramp = reg;
4569 }
2a1211e5 4570 emit_move_insn (m_tramp, tramp);
97e242b0 4571 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4572 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0 4573
0a81f074 4574 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
97e242b0 4575 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4576 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4577
4578 /* The third word is the target descriptor. */
2a1211e5 4579 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
97e242b0 4580 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4581 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4582
4583 /* The fourth word is the static chain. */
2a1211e5 4584 emit_move_insn (m_tramp, static_chain);
97e242b0 4585}
c65ebc55
JW
4586\f
4587/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
4588 for the last named argument which has type TYPE and mode MODE.
4589
4590 We generate the actual spill instructions during prologue generation. */
4591
351a758b 4592static void
ef4bddc2 4593ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
351a758b 4594 tree type, int * pretend_size,
9c808aad 4595 int second_time ATTRIBUTE_UNUSED)
c65ebc55 4596{
d5cc9181 4597 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
351a758b 4598
6c535c69 4599 /* Skip the current argument. */
d5cc9181 4600 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
c65ebc55 4601
351a758b 4602 if (next_cum.words < MAX_ARGUMENT_SLOTS)
26a110f5 4603 {
351a758b 4604 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
26a110f5
RH
4605 *pretend_size = n * UNITS_PER_WORD;
4606 cfun->machine->n_varargs = n;
4607 }
c65ebc55
JW
4608}
4609
4610/* Check whether TYPE is a homogeneous floating point aggregate. If
4611 it is, return the mode of the floating point type that appears
4612 in all leafs. If it is not, return VOIDmode.
4613
4614 An aggregate is a homogeneous floating point aggregate is if all
4615 fields/elements in it have the same floating point type (e.g,
3d6a9acd
RH
4616 SFmode). 128-bit quad-precision floats are excluded.
4617
4618 Variable sized aggregates should never arrive here, since we should
4619 have already decided to pass them by reference. Top-level zero-sized
4620 aggregates are excluded because our parallels crash the middle-end. */
c65ebc55 4621
ef4bddc2 4622static machine_mode
586de218 4623hfa_element_mode (const_tree type, bool nested)
c65ebc55 4624{
ef4bddc2
RS
4625 machine_mode element_mode = VOIDmode;
4626 machine_mode mode;
c65ebc55
JW
4627 enum tree_code code = TREE_CODE (type);
4628 int know_element_mode = 0;
4629 tree t;
4630
3d6a9acd
RH
4631 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4632 return VOIDmode;
4633
c65ebc55
JW
4634 switch (code)
4635 {
4636 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
0cc8f5c5 4637 case BOOLEAN_TYPE: case POINTER_TYPE:
c65ebc55 4638 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
5662a50d 4639 case LANG_TYPE: case FUNCTION_TYPE:
c65ebc55
JW
4640 return VOIDmode;
4641
4642 /* Fortran complex types are supposed to be HFAs, so we need to handle
4643 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4644 types though. */
4645 case COMPLEX_TYPE:
16448fd4 4646 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
02befdf4
ZW
4647 && TYPE_MODE (type) != TCmode)
4648 return GET_MODE_INNER (TYPE_MODE (type));
c65ebc55
JW
4649 else
4650 return VOIDmode;
4651
4652 case REAL_TYPE:
4653 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4654 mode if this is contained within an aggregate. */
02befdf4 4655 if (nested && TYPE_MODE (type) != TFmode)
c65ebc55
JW
4656 return TYPE_MODE (type);
4657 else
4658 return VOIDmode;
4659
4660 case ARRAY_TYPE:
46399021 4661 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
4662
4663 case RECORD_TYPE:
4664 case UNION_TYPE:
4665 case QUAL_UNION_TYPE:
910ad8de 4666 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
c65ebc55
JW
4667 {
4668 if (TREE_CODE (t) != FIELD_DECL)
4669 continue;
4670
4671 mode = hfa_element_mode (TREE_TYPE (t), 1);
4672 if (know_element_mode)
4673 {
4674 if (mode != element_mode)
4675 return VOIDmode;
4676 }
4677 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4678 return VOIDmode;
4679 else
4680 {
4681 know_element_mode = 1;
4682 element_mode = mode;
4683 }
4684 }
4685 return element_mode;
4686
4687 default:
4688 /* If we reach here, we probably have some front-end specific type
4689 that the backend doesn't know about. This can happen via the
4690 aggregate_value_p call in init_function_start. All we can do is
4691 ignore unknown tree types. */
4692 return VOIDmode;
4693 }
4694
4695 return VOIDmode;
4696}
4697
f57fc998
ZW
4698/* Return the number of words required to hold a quantity of TYPE and MODE
4699 when passed as an argument. */
4700static int
ef4bddc2 4701ia64_function_arg_words (const_tree type, machine_mode mode)
f57fc998
ZW
4702{
4703 int words;
4704
4705 if (mode == BLKmode)
4706 words = int_size_in_bytes (type);
4707 else
4708 words = GET_MODE_SIZE (mode);
4709
4710 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4711}
4712
4713/* Return the number of registers that should be skipped so the current
4714 argument (described by TYPE and WORDS) will be properly aligned.
4715
4716 Integer and float arguments larger than 8 bytes start at the next
4717 even boundary. Aggregates larger than 8 bytes start at the next
4718 even boundary if the aggregate has 16 byte alignment. Note that
4719 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4720 but are still to be aligned in registers.
4721
4722 ??? The ABI does not specify how to handle aggregates with
4723 alignment from 9 to 15 bytes, or greater than 16. We handle them
4724 all as if they had 16 byte alignment. Such aggregates can occur
4725 only if gcc extensions are used. */
4726static int
ffa88471
SE
4727ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4728 const_tree type, int words)
f57fc998 4729{
f2972bf8
DR
4730 /* No registers are skipped on VMS. */
4731 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
f57fc998
ZW
4732 return 0;
4733
4734 if (type
4735 && TREE_CODE (type) != INTEGER_TYPE
4736 && TREE_CODE (type) != REAL_TYPE)
4737 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4738 else
4739 return words > 1;
4740}
4741
c65ebc55
JW
4742/* Return rtx for register where argument is passed, or zero if it is passed
4743 on the stack. */
c65ebc55
JW
4744/* ??? 128-bit quad-precision floats are always passed in general
4745 registers. */
4746
ffa88471 4747static rtx
ef4bddc2 4748ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
ffa88471 4749 const_tree type, bool named, bool incoming)
c65ebc55 4750{
d5cc9181
JR
4751 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4752
c65ebc55 4753 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
f57fc998
ZW
4754 int words = ia64_function_arg_words (type, mode);
4755 int offset = ia64_function_arg_offset (cum, type, words);
ef4bddc2 4756 machine_mode hfa_mode = VOIDmode;
c65ebc55 4757
f2972bf8
DR
4758 /* For OPEN VMS, emit the instruction setting up the argument register here,
4759 when we know this will be together with the other arguments setup related
4760 insns. This is not the conceptually best place to do this, but this is
4761 the easiest as we have convenient access to cumulative args info. */
4762
4763 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4764 && named == 1)
4765 {
4766 unsigned HOST_WIDE_INT regval = cum->words;
4767 int i;
4768
4769 for (i = 0; i < 8; i++)
4770 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4771
4772 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4773 GEN_INT (regval));
4774 }
4775
c65ebc55
JW
4776 /* If all argument slots are used, then it must go on the stack. */
4777 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4778 return 0;
4779
472b8fdc
TG
4780 /* On OpenVMS argument is either in Rn or Fn. */
4781 if (TARGET_ABI_OPEN_VMS)
4782 {
4783 if (FLOAT_MODE_P (mode))
4784 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4785 else
4786 return gen_rtx_REG (mode, basereg + cum->words);
4787 }
4788
c65ebc55
JW
4789 /* Check for and handle homogeneous FP aggregates. */
4790 if (type)
4791 hfa_mode = hfa_element_mode (type, 0);
4792
4793 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4794 and unprototyped hfas are passed specially. */
4795 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4796 {
4797 rtx loc[16];
4798 int i = 0;
4799 int fp_regs = cum->fp_regs;
4800 int int_regs = cum->words + offset;
4801 int hfa_size = GET_MODE_SIZE (hfa_mode);
4802 int byte_size;
4803 int args_byte_size;
4804
4805 /* If prototyped, pass it in FR regs then GR regs.
4806 If not prototyped, pass it in both FR and GR regs.
4807
4808 If this is an SFmode aggregate, then it is possible to run out of
4809 FR regs while GR regs are still left. In that case, we pass the
4810 remaining part in the GR regs. */
4811
4812 /* Fill the FP regs. We do this always. We stop if we reach the end
4813 of the argument, the last FP register, or the last argument slot. */
4814
4815 byte_size = ((mode == BLKmode)
4816 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4817 args_byte_size = int_regs * UNITS_PER_WORD;
4818 offset = 0;
4819 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4820 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4821 {
4822 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4823 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4824 + fp_regs)),
4825 GEN_INT (offset));
c65ebc55
JW
4826 offset += hfa_size;
4827 args_byte_size += hfa_size;
4828 fp_regs++;
4829 }
4830
4831 /* If no prototype, then the whole thing must go in GR regs. */
4832 if (! cum->prototype)
4833 offset = 0;
4834 /* If this is an SFmode aggregate, then we might have some left over
4835 that needs to go in GR regs. */
4836 else if (byte_size != offset)
4837 int_regs += offset / UNITS_PER_WORD;
4838
4839 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4840
4841 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4842 {
ef4bddc2 4843 machine_mode gr_mode = DImode;
826b47cc 4844 unsigned int gr_size;
c65ebc55
JW
4845
4846 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4847 then this goes in a GR reg left adjusted/little endian, right
4848 adjusted/big endian. */
4849 /* ??? Currently this is handled wrong, because 4-byte hunks are
4850 always right adjusted/little endian. */
4851 if (offset & 0x4)
4852 gr_mode = SImode;
4853 /* If we have an even 4 byte hunk because the aggregate is a
4854 multiple of 4 bytes in size, then this goes in a GR reg right
4855 adjusted/little endian. */
4856 else if (byte_size - offset == 4)
4857 gr_mode = SImode;
4858
4859 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4860 gen_rtx_REG (gr_mode, (basereg
4861 + int_regs)),
4862 GEN_INT (offset));
826b47cc
ZW
4863
4864 gr_size = GET_MODE_SIZE (gr_mode);
4865 offset += gr_size;
4866 if (gr_size == UNITS_PER_WORD
4867 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4868 int_regs++;
4869 else if (gr_size > UNITS_PER_WORD)
4870 int_regs += gr_size / UNITS_PER_WORD;
c65ebc55 4871 }
9dec91d4 4872 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4873 }
f2972bf8 4874
c65ebc55
JW
4875 /* Integral and aggregates go in general registers. If we have run out of
4876 FR registers, then FP values must also go in general registers. This can
4877 happen when we have a SFmode HFA. */
02befdf4
ZW
4878 else if (mode == TFmode || mode == TCmode
4879 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
4880 {
4881 int byte_size = ((mode == BLKmode)
4882 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4883 if (BYTES_BIG_ENDIAN
4884 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4885 && byte_size < UNITS_PER_WORD
4886 && byte_size > 0)
4887 {
4888 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4889 gen_rtx_REG (DImode,
4890 (basereg + cum->words
4891 + offset)),
4892 const0_rtx);
4893 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4894 }
4895 else
4896 return gen_rtx_REG (mode, basereg + cum->words + offset);
4897
4898 }
c65ebc55
JW
4899
4900 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4901 named, and in a GR register when unnamed. */
c65ebc55
JW
4902 else if (cum->prototype)
4903 {
f9c887ac 4904 if (named)
c65ebc55 4905 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
f9c887ac
ZW
4906 /* In big-endian mode, an anonymous SFmode value must be represented
4907 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4908 the value into the high half of the general register. */
4909 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4910 return gen_rtx_PARALLEL (mode,
4911 gen_rtvec (1,
4912 gen_rtx_EXPR_LIST (VOIDmode,
4913 gen_rtx_REG (DImode, basereg + cum->words + offset),
4914 const0_rtx)));
4915 else
4916 return gen_rtx_REG (mode, basereg + cum->words + offset);
c65ebc55
JW
4917 }
4918 /* If there is no prototype, then FP values go in both FR and GR
4919 registers. */
4920 else
4921 {
f9c887ac 4922 /* See comment above. */
ef4bddc2 4923 machine_mode inner_mode =
f9c887ac
ZW
4924 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4925
c65ebc55
JW
4926 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4927 gen_rtx_REG (mode, (FR_ARG_FIRST
4928 + cum->fp_regs)),
4929 const0_rtx);
4930 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
f9c887ac 4931 gen_rtx_REG (inner_mode,
c65ebc55
JW
4932 (basereg + cum->words
4933 + offset)),
4934 const0_rtx);
809d4ef1 4935
c65ebc55
JW
4936 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4937 }
4938}
4939
ffa88471
SE
4940/* Implement TARGET_FUNCION_ARG target hook. */
4941
4942static rtx
ef4bddc2 4943ia64_function_arg (cumulative_args_t cum, machine_mode mode,
ffa88471
SE
4944 const_tree type, bool named)
4945{
4946 return ia64_function_arg_1 (cum, mode, type, named, false);
4947}
4948
4949/* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4950
4951static rtx
d5cc9181 4952ia64_function_incoming_arg (cumulative_args_t cum,
ef4bddc2 4953 machine_mode mode,
ffa88471
SE
4954 const_tree type, bool named)
4955{
4956 return ia64_function_arg_1 (cum, mode, type, named, true);
4957}
4958
78a52f11 4959/* Return number of bytes, at the beginning of the argument, that must be
c65ebc55
JW
4960 put in registers. 0 is the argument is entirely in registers or entirely
4961 in memory. */
4962
78a52f11 4963static int
ef4bddc2 4964ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
78a52f11 4965 tree type, bool named ATTRIBUTE_UNUSED)
c65ebc55 4966{
d5cc9181
JR
4967 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4968
f57fc998
ZW
4969 int words = ia64_function_arg_words (type, mode);
4970 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4971
4972 /* If all argument slots are used, then it must go on the stack. */
4973 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4974 return 0;
4975
4976 /* It doesn't matter whether the argument goes in FR or GR regs. If
4977 it fits within the 8 argument slots, then it goes entirely in
4978 registers. If it extends past the last argument slot, then the rest
4979 goes on the stack. */
4980
4981 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4982 return 0;
4983
78a52f11 4984 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
c65ebc55
JW
4985}
4986
f2972bf8
DR
4987/* Return ivms_arg_type based on machine_mode. */
4988
4989static enum ivms_arg_type
ef4bddc2 4990ia64_arg_type (machine_mode mode)
f2972bf8
DR
4991{
4992 switch (mode)
4993 {
4e10a5a7 4994 case E_SFmode:
f2972bf8 4995 return FS;
4e10a5a7 4996 case E_DFmode:
f2972bf8
DR
4997 return FT;
4998 default:
4999 return I64;
5000 }
5001}
5002
c65ebc55
JW
5003/* Update CUM to point after this argument. This is patterned after
5004 ia64_function_arg. */
5005
ffa88471 5006static void
ef4bddc2 5007ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
ffa88471 5008 const_tree type, bool named)
c65ebc55 5009{
d5cc9181 5010 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
f57fc998
ZW
5011 int words = ia64_function_arg_words (type, mode);
5012 int offset = ia64_function_arg_offset (cum, type, words);
ef4bddc2 5013 machine_mode hfa_mode = VOIDmode;
c65ebc55
JW
5014
5015 /* If all arg slots are already full, then there is nothing to do. */
5016 if (cum->words >= MAX_ARGUMENT_SLOTS)
f2972bf8
DR
5017 {
5018 cum->words += words + offset;
5019 return;
5020 }
c65ebc55 5021
f2972bf8 5022 cum->atypes[cum->words] = ia64_arg_type (mode);
c65ebc55
JW
5023 cum->words += words + offset;
5024
472b8fdc
TG
5025 /* On OpenVMS argument is either in Rn or Fn. */
5026 if (TARGET_ABI_OPEN_VMS)
5027 {
5028 cum->int_regs = cum->words;
5029 cum->fp_regs = cum->words;
5030 return;
5031 }
5032
c65ebc55
JW
5033 /* Check for and handle homogeneous FP aggregates. */
5034 if (type)
5035 hfa_mode = hfa_element_mode (type, 0);
5036
5037 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
5038 and unprototyped hfas are passed specially. */
5039 if (hfa_mode != VOIDmode && (! cum->prototype || named))
5040 {
5041 int fp_regs = cum->fp_regs;
5042 /* This is the original value of cum->words + offset. */
5043 int int_regs = cum->words - words;
5044 int hfa_size = GET_MODE_SIZE (hfa_mode);
5045 int byte_size;
5046 int args_byte_size;
5047
5048 /* If prototyped, pass it in FR regs then GR regs.
5049 If not prototyped, pass it in both FR and GR regs.
5050
5051 If this is an SFmode aggregate, then it is possible to run out of
5052 FR regs while GR regs are still left. In that case, we pass the
5053 remaining part in the GR regs. */
5054
5055 /* Fill the FP regs. We do this always. We stop if we reach the end
5056 of the argument, the last FP register, or the last argument slot. */
5057
5058 byte_size = ((mode == BLKmode)
5059 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
5060 args_byte_size = int_regs * UNITS_PER_WORD;
5061 offset = 0;
5062 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
5063 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
5064 {
c65ebc55
JW
5065 offset += hfa_size;
5066 args_byte_size += hfa_size;
5067 fp_regs++;
5068 }
5069
5070 cum->fp_regs = fp_regs;
5071 }
5072
d13256a3
SE
5073 /* Integral and aggregates go in general registers. So do TFmode FP values.
5074 If we have run out of FR registers, then other FP values must also go in
5075 general registers. This can happen when we have a SFmode HFA. */
5076 else if (mode == TFmode || mode == TCmode
5077 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
648fe28b 5078 cum->int_regs = cum->words;
c65ebc55
JW
5079
5080 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 5081 named, and in a GR register when unnamed. */
c65ebc55
JW
5082 else if (cum->prototype)
5083 {
5084 if (! named)
648fe28b 5085 cum->int_regs = cum->words;
c65ebc55
JW
5086 else
5087 /* ??? Complex types should not reach here. */
5088 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5089 }
5090 /* If there is no prototype, then FP values go in both FR and GR
5091 registers. */
5092 else
9c808aad 5093 {
648fe28b
RH
5094 /* ??? Complex types should not reach here. */
5095 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5096 cum->int_regs = cum->words;
5097 }
c65ebc55 5098}
51dcde6f 5099
d13256a3 5100/* Arguments with alignment larger than 8 bytes start at the next even
93348822 5101 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
d13256a3
SE
5102 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5103
c2ed6cf8 5104static unsigned int
ef4bddc2 5105ia64_function_arg_boundary (machine_mode mode, const_tree type)
d13256a3 5106{
d13256a3
SE
5107 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5108 return PARM_BOUNDARY * 2;
5109
5110 if (type)
5111 {
5112 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5113 return PARM_BOUNDARY * 2;
5114 else
5115 return PARM_BOUNDARY;
5116 }
5117
5118 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5119 return PARM_BOUNDARY * 2;
5120 else
5121 return PARM_BOUNDARY;
5122}
5123
599aedd9
RH
5124/* True if it is OK to do sibling call optimization for the specified
5125 call expression EXP. DECL will be the called function, or NULL if
5126 this is an indirect call. */
5127static bool
9c808aad 5128ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
599aedd9 5129{
097f3d48
JW
5130 /* We can't perform a sibcall if the current function has the syscall_linkage
5131 attribute. */
5132 if (lookup_attribute ("syscall_linkage",
5133 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5134 return false;
5135
b23ba0b8 5136 /* We must always return with our current GP. This means we can
c208436c
SE
5137 only sibcall to functions defined in the current module unless
5138 TARGET_CONST_GP is set to true. */
5139 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
599aedd9 5140}
c65ebc55 5141\f
c65ebc55
JW
5142
5143/* Implement va_arg. */
5144
23a60a04 5145static tree
726a989a
RB
5146ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5147 gimple_seq *post_p)
cd3ce9b4 5148{
cd3ce9b4 5149 /* Variable sized types are passed by reference. */
08b0dc1b 5150 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
cd3ce9b4 5151 {
23a60a04
JM
5152 tree ptrtype = build_pointer_type (type);
5153 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
c2433d7d 5154 return build_va_arg_indirect_ref (addr);
cd3ce9b4
JM
5155 }
5156
5157 /* Aggregate arguments with alignment larger than 8 bytes start at
5158 the next even boundary. Integer and floating point arguments
5159 do so if they are larger than 8 bytes, whether or not they are
5160 also aligned larger than 8 bytes. */
5161 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5162 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5163 {
5d49b6a7 5164 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
47a25a46 5165 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5d49b6a7 5166 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
726a989a 5167 gimplify_assign (unshare_expr (valist), t, pre_p);
cd3ce9b4
JM
5168 }
5169
23a60a04 5170 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4 5171}
c65ebc55
JW
5172\f
5173/* Return 1 if function return value returned in memory. Return 0 if it is
5174 in a register. */
5175
351a758b 5176static bool
586de218 5177ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
c65ebc55 5178{
ef4bddc2
RS
5179 machine_mode mode;
5180 machine_mode hfa_mode;
487b97e0 5181 HOST_WIDE_INT byte_size;
c65ebc55
JW
5182
5183 mode = TYPE_MODE (valtype);
487b97e0
RH
5184 byte_size = GET_MODE_SIZE (mode);
5185 if (mode == BLKmode)
5186 {
5187 byte_size = int_size_in_bytes (valtype);
5188 if (byte_size < 0)
351a758b 5189 return true;
487b97e0 5190 }
c65ebc55
JW
5191
5192 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5193
5194 hfa_mode = hfa_element_mode (valtype, 0);
5195 if (hfa_mode != VOIDmode)
5196 {
5197 int hfa_size = GET_MODE_SIZE (hfa_mode);
5198
c65ebc55 5199 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
351a758b 5200 return true;
c65ebc55 5201 else
351a758b 5202 return false;
c65ebc55 5203 }
c65ebc55 5204 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
351a758b 5205 return true;
c65ebc55 5206 else
351a758b 5207 return false;
c65ebc55
JW
5208}
5209
5210/* Return rtx for register that holds the function return value. */
5211
ba90d838
AS
5212static rtx
5213ia64_function_value (const_tree valtype,
5214 const_tree fn_decl_or_type,
5215 bool outgoing ATTRIBUTE_UNUSED)
c65ebc55 5216{
ef4bddc2
RS
5217 machine_mode mode;
5218 machine_mode hfa_mode;
f2972bf8 5219 int unsignedp;
ba90d838 5220 const_tree func = fn_decl_or_type;
c65ebc55 5221
ba90d838
AS
5222 if (fn_decl_or_type
5223 && !DECL_P (fn_decl_or_type))
5224 func = NULL;
5225
c65ebc55
JW
5226 mode = TYPE_MODE (valtype);
5227 hfa_mode = hfa_element_mode (valtype, 0);
5228
5229 if (hfa_mode != VOIDmode)
5230 {
5231 rtx loc[8];
5232 int i;
5233 int hfa_size;
5234 int byte_size;
5235 int offset;
5236
5237 hfa_size = GET_MODE_SIZE (hfa_mode);
5238 byte_size = ((mode == BLKmode)
5239 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5240 offset = 0;
5241 for (i = 0; offset < byte_size; i++)
5242 {
5243 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5244 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5245 GEN_INT (offset));
c65ebc55
JW
5246 offset += hfa_size;
5247 }
9dec91d4 5248 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 5249 }
f57fc998 5250 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
c65ebc55
JW
5251 return gen_rtx_REG (mode, FR_ARG_FIRST);
5252 else
3870df96 5253 {
8c5cacfd
RH
5254 bool need_parallel = false;
5255
5256 /* In big-endian mode, we need to manage the layout of aggregates
5257 in the registers so that we get the bits properly aligned in
5258 the highpart of the registers. */
3870df96
SE
5259 if (BYTES_BIG_ENDIAN
5260 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
8c5cacfd
RH
5261 need_parallel = true;
5262
5263 /* Something like struct S { long double x; char a[0] } is not an
5264 HFA structure, and therefore doesn't go in fp registers. But
5265 the middle-end will give it XFmode anyway, and XFmode values
5266 don't normally fit in integer registers. So we need to smuggle
5267 the value inside a parallel. */
4de67c26 5268 else if (mode == XFmode || mode == XCmode || mode == RFmode)
8c5cacfd
RH
5269 need_parallel = true;
5270
5271 if (need_parallel)
3870df96
SE
5272 {
5273 rtx loc[8];
5274 int offset;
5275 int bytesize;
5276 int i;
5277
5278 offset = 0;
5279 bytesize = int_size_in_bytes (valtype);
543144ed
JM
5280 /* An empty PARALLEL is invalid here, but the return value
5281 doesn't matter for empty structs. */
5282 if (bytesize == 0)
5283 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96
SE
5284 for (i = 0; offset < bytesize; i++)
5285 {
5286 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5287 gen_rtx_REG (DImode,
5288 GR_RET_FIRST + i),
5289 GEN_INT (offset));
5290 offset += UNITS_PER_WORD;
5291 }
5292 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5293 }
8c5cacfd 5294
8ee95727
TG
5295 mode = promote_function_mode (valtype, mode, &unsignedp,
5296 func ? TREE_TYPE (func) : NULL_TREE,
5297 true);
f2972bf8 5298
8c5cacfd 5299 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96 5300 }
c65ebc55
JW
5301}
5302
ba90d838
AS
5303/* Worker function for TARGET_LIBCALL_VALUE. */
5304
5305static rtx
ef4bddc2 5306ia64_libcall_value (machine_mode mode,
ba90d838
AS
5307 const_rtx fun ATTRIBUTE_UNUSED)
5308{
5309 return gen_rtx_REG (mode,
5310 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5311 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5312 && (mode) != TFmode)
5313 ? FR_RET_FIRST : GR_RET_FIRST));
5314}
5315
5316/* Worker function for FUNCTION_VALUE_REGNO_P. */
5317
5318static bool
5319ia64_function_value_regno_p (const unsigned int regno)
5320{
5321 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5322 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5323}
5324
fdbe66f2 5325/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6b2300b3
JJ
5326 We need to emit DTP-relative relocations. */
5327
fdbe66f2 5328static void
9c808aad 5329ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
6b2300b3 5330{
6f3113ed
SE
5331 gcc_assert (size == 4 || size == 8);
5332 if (size == 4)
5333 fputs ("\tdata4.ua\t@dtprel(", file);
5334 else
5335 fputs ("\tdata8.ua\t@dtprel(", file);
6b2300b3
JJ
5336 output_addr_const (file, x);
5337 fputs (")", file);
5338}
5339
c65ebc55
JW
5340/* Print a memory address as an operand to reference that memory location. */
5341
5342/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5343 also call this from ia64_print_operand for memory addresses. */
5344
5e50b799 5345static void
9c808aad 5346ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
cc8ca59e 5347 machine_mode /*mode*/,
9c808aad 5348 rtx address ATTRIBUTE_UNUSED)
c65ebc55
JW
5349{
5350}
5351
3569057d 5352/* Print an operand to an assembler instruction.
c65ebc55
JW
5353 C Swap and print a comparison operator.
5354 D Print an FP comparison operator.
5355 E Print 32 - constant, for SImode shifts as extract.
66db6b45 5356 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
5357 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5358 a floating point register emitted normally.
735b94a7 5359 G A floating point constant.
c65ebc55 5360 I Invert a predicate register by adding 1.
e5bde68a 5361 J Select the proper predicate register for a condition.
6b6c1201 5362 j Select the inverse predicate register for a condition.
c65ebc55
JW
5363 O Append .acq for volatile load.
5364 P Postincrement of a MEM.
5365 Q Append .rel for volatile store.
4883241c 5366 R Print .s .d or nothing for a single, double or no truncation.
c65ebc55
JW
5367 S Shift amount for shladd instruction.
5368 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5369 for Intel assembler.
5370 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5371 for Intel assembler.
a71aef0b 5372 X A pair of floating point registers.
c65ebc55 5373 r Print register name, or constant 0 as r0. HP compatibility for
f61134e8
RH
5374 Linux kernel.
5375 v Print vector constant value as an 8-byte integer value. */
5376
5e50b799 5377static void
9c808aad 5378ia64_print_operand (FILE * file, rtx x, int code)
c65ebc55 5379{
e57b9d65
RH
5380 const char *str;
5381
c65ebc55
JW
5382 switch (code)
5383 {
c65ebc55
JW
5384 case 0:
5385 /* Handled below. */
5386 break;
809d4ef1 5387
c65ebc55
JW
5388 case 'C':
5389 {
5390 enum rtx_code c = swap_condition (GET_CODE (x));
5391 fputs (GET_RTX_NAME (c), file);
5392 return;
5393 }
5394
5395 case 'D':
e57b9d65
RH
5396 switch (GET_CODE (x))
5397 {
5398 case NE:
5399 str = "neq";
5400 break;
5401 case UNORDERED:
5402 str = "unord";
5403 break;
5404 case ORDERED:
5405 str = "ord";
5406 break;
86ad1da0
SE
5407 case UNLT:
5408 str = "nge";
5409 break;
5410 case UNLE:
5411 str = "ngt";
5412 break;
5413 case UNGT:
5414 str = "nle";
5415 break;
5416 case UNGE:
5417 str = "nlt";
5418 break;
8fc53a5f
EB
5419 case UNEQ:
5420 case LTGT:
5421 gcc_unreachable ();
e57b9d65
RH
5422 default:
5423 str = GET_RTX_NAME (GET_CODE (x));
5424 break;
5425 }
5426 fputs (str, file);
c65ebc55
JW
5427 return;
5428
5429 case 'E':
5430 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5431 return;
5432
66db6b45
RH
5433 case 'e':
5434 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5435 return;
5436
c65ebc55
JW
5437 case 'F':
5438 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 5439 str = reg_names [FR_REG (0)];
c65ebc55 5440 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 5441 str = reg_names [FR_REG (1)];
c65ebc55 5442 else
e820471b
NS
5443 {
5444 gcc_assert (GET_CODE (x) == REG);
5445 str = reg_names [REGNO (x)];
5446 }
e57b9d65 5447 fputs (str, file);
c65ebc55
JW
5448 return;
5449
735b94a7
SE
5450 case 'G':
5451 {
5452 long val[4];
34a72c33 5453 real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
735b94a7
SE
5454 if (GET_MODE (x) == SFmode)
5455 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5456 else if (GET_MODE (x) == DFmode)
5457 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5458 & 0xffffffff,
5459 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5460 & 0xffffffff);
5461 else
5462 output_operand_lossage ("invalid %%G mode");
5463 }
5464 return;
5465
c65ebc55
JW
5466 case 'I':
5467 fputs (reg_names [REGNO (x) + 1], file);
5468 return;
5469
e5bde68a 5470 case 'J':
6b6c1201
RH
5471 case 'j':
5472 {
5473 unsigned int regno = REGNO (XEXP (x, 0));
5474 if (GET_CODE (x) == EQ)
5475 regno += 1;
5476 if (code == 'j')
5477 regno ^= 1;
5478 fputs (reg_names [regno], file);
5479 }
e5bde68a
RH
5480 return;
5481
c65ebc55
JW
5482 case 'O':
5483 if (MEM_VOLATILE_P (x))
5484 fputs(".acq", file);
5485 return;
5486
5487 case 'P':
5488 {
4b983fdc 5489 HOST_WIDE_INT value;
c65ebc55 5490
4b983fdc
RH
5491 switch (GET_CODE (XEXP (x, 0)))
5492 {
5493 default:
5494 return;
5495
5496 case POST_MODIFY:
5497 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5498 if (GET_CODE (x) == CONST_INT)
08012cda 5499 value = INTVAL (x);
e820471b 5500 else
4b983fdc 5501 {
e820471b 5502 gcc_assert (GET_CODE (x) == REG);
08012cda 5503 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
5504 return;
5505 }
4b983fdc 5506 break;
c65ebc55 5507
4b983fdc
RH
5508 case POST_INC:
5509 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 5510 break;
c65ebc55 5511
4b983fdc 5512 case POST_DEC:
08012cda 5513 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
5514 break;
5515 }
809d4ef1 5516
4a0a75dd 5517 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
5518 return;
5519 }
5520
5521 case 'Q':
5522 if (MEM_VOLATILE_P (x))
5523 fputs(".rel", file);
5524 return;
5525
4883241c
SE
5526 case 'R':
5527 if (x == CONST0_RTX (GET_MODE (x)))
5528 fputs(".s", file);
5529 else if (x == CONST1_RTX (GET_MODE (x)))
5530 fputs(".d", file);
5531 else if (x == CONST2_RTX (GET_MODE (x)))
5532 ;
5533 else
5534 output_operand_lossage ("invalid %%R value");
5535 return;
5536
c65ebc55 5537 case 'S':
809d4ef1 5538 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
5539 return;
5540
5541 case 'T':
5542 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5543 {
809d4ef1 5544 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
5545 return;
5546 }
5547 break;
5548
5549 case 'U':
5550 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5551 {
3b572406 5552 const char *prefix = "0x";
c65ebc55
JW
5553 if (INTVAL (x) & 0x80000000)
5554 {
5555 fprintf (file, "0xffffffff");
5556 prefix = "";
5557 }
809d4ef1 5558 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
5559 return;
5560 }
5561 break;
809d4ef1 5562
a71aef0b
JB
5563 case 'X':
5564 {
5565 unsigned int regno = REGNO (x);
5566 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5567 }
5568 return;
5569
c65ebc55 5570 case 'r':
18a3c539
JW
5571 /* If this operand is the constant zero, write it as register zero.
5572 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
5573 if (GET_CODE (x) == REG)
5574 fputs (reg_names[REGNO (x)], file);
5575 else if (x == CONST0_RTX (GET_MODE (x)))
5576 fputs ("r0", file);
18a3c539
JW
5577 else if (GET_CODE (x) == CONST_INT)
5578 output_addr_const (file, x);
c65ebc55
JW
5579 else
5580 output_operand_lossage ("invalid %%r value");
5581 return;
5582
f61134e8
RH
5583 case 'v':
5584 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5585 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5586 break;
5587
85548039
RH
5588 case '+':
5589 {
5590 const char *which;
9c808aad 5591
85548039
RH
5592 /* For conditional branches, returns or calls, substitute
5593 sptk, dptk, dpnt, or spnt for %s. */
5594 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5595 if (x)
5596 {
5fa396ad
JH
5597 int pred_val = profile_probability::from_reg_br_prob_note
5598 (XINT (x, 0)).to_reg_br_prob_base ();
85548039
RH
5599
5600 /* Guess top and bottom 10% statically predicted. */
2c9e13f3
JH
5601 if (pred_val < REG_BR_PROB_BASE / 50
5602 && br_prob_note_reliable_p (x))
85548039
RH
5603 which = ".spnt";
5604 else if (pred_val < REG_BR_PROB_BASE / 2)
5605 which = ".dpnt";
2c9e13f3
JH
5606 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5607 || !br_prob_note_reliable_p (x))
85548039
RH
5608 which = ".dptk";
5609 else
5610 which = ".sptk";
5611 }
b64925dc 5612 else if (CALL_P (current_output_insn))
85548039
RH
5613 which = ".sptk";
5614 else
5615 which = ".dptk";
5616
5617 fputs (which, file);
5618 return;
5619 }
5620
6f8aa100
RH
5621 case ',':
5622 x = current_insn_predicate;
5623 if (x)
5624 {
5625 unsigned int regno = REGNO (XEXP (x, 0));
5626 if (GET_CODE (x) == EQ)
5627 regno += 1;
6f8aa100
RH
5628 fprintf (file, "(%s) ", reg_names [regno]);
5629 }
5630 return;
5631
c65ebc55
JW
5632 default:
5633 output_operand_lossage ("ia64_print_operand: unknown code");
5634 return;
5635 }
5636
5637 switch (GET_CODE (x))
5638 {
5639 /* This happens for the spill/restore instructions. */
5640 case POST_INC:
4b983fdc
RH
5641 case POST_DEC:
5642 case POST_MODIFY:
c65ebc55 5643 x = XEXP (x, 0);
4c74215c 5644 /* fall through */
c65ebc55
JW
5645
5646 case REG:
5647 fputs (reg_names [REGNO (x)], file);
5648 break;
5649
5650 case MEM:
5651 {
5652 rtx addr = XEXP (x, 0);
ec8e098d 5653 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
c65ebc55
JW
5654 addr = XEXP (addr, 0);
5655 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5656 break;
5657 }
809d4ef1 5658
c65ebc55
JW
5659 default:
5660 output_addr_const (file, x);
5661 break;
5662 }
5663
5664 return;
5665}
5e50b799
AS
5666
5667/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5668
5669static bool
5670ia64_print_operand_punct_valid_p (unsigned char code)
5671{
5672 return (code == '+' || code == ',');
5673}
c65ebc55 5674\f
3c50106f
RH
5675/* Compute a (partial) cost for rtx X. Return true if the complete
5676 cost has been computed, and false if subexpressions should be
5677 scanned. In either case, *TOTAL contains the cost result. */
5678/* ??? This is incomplete. */
5679
5680static bool
e548c9df
AM
5681ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5682 int opno ATTRIBUTE_UNUSED,
68f932c4 5683 int *total, bool speed ATTRIBUTE_UNUSED)
3c50106f 5684{
e548c9df
AM
5685 int code = GET_CODE (x);
5686
3c50106f
RH
5687 switch (code)
5688 {
5689 case CONST_INT:
5690 switch (outer_code)
5691 {
5692 case SET:
13f70342 5693 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
3c50106f
RH
5694 return true;
5695 case PLUS:
13f70342 5696 if (satisfies_constraint_I (x))
3c50106f 5697 *total = 0;
13f70342 5698 else if (satisfies_constraint_J (x))
3c50106f
RH
5699 *total = 1;
5700 else
5701 *total = COSTS_N_INSNS (1);
5702 return true;
5703 default:
13f70342 5704 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
3c50106f
RH
5705 *total = 0;
5706 else
5707 *total = COSTS_N_INSNS (1);
5708 return true;
5709 }
5710
5711 case CONST_DOUBLE:
5712 *total = COSTS_N_INSNS (1);
5713 return true;
5714
5715 case CONST:
5716 case SYMBOL_REF:
5717 case LABEL_REF:
5718 *total = COSTS_N_INSNS (3);
5719 return true;
5720
f19f1e5e
RH
5721 case FMA:
5722 *total = COSTS_N_INSNS (4);
5723 return true;
5724
3c50106f
RH
5725 case MULT:
5726 /* For multiplies wider than HImode, we have to go to the FPU,
5727 which normally involves copies. Plus there's the latency
5728 of the multiply itself, and the latency of the instructions to
5729 transfer integer regs to FP regs. */
e548c9df 5730 if (FLOAT_MODE_P (mode))
f19f1e5e 5731 *total = COSTS_N_INSNS (4);
e548c9df 5732 else if (GET_MODE_SIZE (mode) > 2)
3c50106f
RH
5733 *total = COSTS_N_INSNS (10);
5734 else
5735 *total = COSTS_N_INSNS (2);
5736 return true;
5737
5738 case PLUS:
5739 case MINUS:
e548c9df 5740 if (FLOAT_MODE_P (mode))
f19f1e5e
RH
5741 {
5742 *total = COSTS_N_INSNS (4);
5743 return true;
5744 }
5745 /* FALLTHRU */
5746
3c50106f
RH
5747 case ASHIFT:
5748 case ASHIFTRT:
5749 case LSHIFTRT:
5750 *total = COSTS_N_INSNS (1);
5751 return true;
5752
5753 case DIV:
5754 case UDIV:
5755 case MOD:
5756 case UMOD:
5757 /* We make divide expensive, so that divide-by-constant will be
5758 optimized to a multiply. */
5759 *total = COSTS_N_INSNS (60);
5760 return true;
5761
5762 default:
5763 return false;
5764 }
5765}
5766
9e4f94de 5767/* Calculate the cost of moving data from a register in class FROM to
7109d286 5768 one in class TO, using MODE. */
5527bf14 5769
de8f4b07 5770static int
ef4bddc2 5771ia64_register_move_cost (machine_mode mode, reg_class_t from,
6f76a878 5772 reg_class_t to)
a87cf97e 5773{
7109d286
RH
5774 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5775 if (to == ADDL_REGS)
5776 to = GR_REGS;
5777 if (from == ADDL_REGS)
5778 from = GR_REGS;
5779
5780 /* All costs are symmetric, so reduce cases by putting the
5781 lower number class as the destination. */
5782 if (from < to)
5783 {
6f76a878 5784 reg_class_t tmp = to;
7109d286
RH
5785 to = from, from = tmp;
5786 }
5787
02befdf4 5788 /* Moving from FR<->GR in XFmode must be more expensive than 2,
7109d286 5789 so that we get secondary memory reloads. Between FR_REGS,
69e18c09 5790 we have to make this at least as expensive as memory_move_cost
7109d286 5791 to avoid spectacularly poor register class preferencing. */
4de67c26 5792 if (mode == XFmode || mode == RFmode)
7109d286
RH
5793 {
5794 if (to != GR_REGS || from != GR_REGS)
69e18c09 5795 return memory_move_cost (mode, to, false);
7109d286
RH
5796 else
5797 return 3;
5798 }
5799
5800 switch (to)
5801 {
5802 case PR_REGS:
5803 /* Moving between PR registers takes two insns. */
5804 if (from == PR_REGS)
5805 return 3;
5806 /* Moving between PR and anything but GR is impossible. */
5807 if (from != GR_REGS)
69e18c09 5808 return memory_move_cost (mode, to, false);
7109d286
RH
5809 break;
5810
5811 case BR_REGS:
5812 /* Moving between BR and anything but GR is impossible. */
5813 if (from != GR_REGS && from != GR_AND_BR_REGS)
69e18c09 5814 return memory_move_cost (mode, to, false);
7109d286
RH
5815 break;
5816
5817 case AR_I_REGS:
5818 case AR_M_REGS:
5819 /* Moving between AR and anything but GR is impossible. */
5820 if (from != GR_REGS)
69e18c09 5821 return memory_move_cost (mode, to, false);
7109d286
RH
5822 break;
5823
5824 case GR_REGS:
5825 case FR_REGS:
a71aef0b 5826 case FP_REGS:
7109d286
RH
5827 case GR_AND_FR_REGS:
5828 case GR_AND_BR_REGS:
5829 case ALL_REGS:
5830 break;
5831
5832 default:
e820471b 5833 gcc_unreachable ();
7109d286 5834 }
3f622353 5835
5527bf14
RH
5836 return 2;
5837}
c65ebc55 5838
69e18c09
AS
5839/* Calculate the cost of moving data of MODE from a register to or from
5840 memory. */
5841
5842static int
ef4bddc2 5843ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
69e18c09
AS
5844 reg_class_t rclass,
5845 bool in ATTRIBUTE_UNUSED)
5846{
5847 if (rclass == GENERAL_REGS
5848 || rclass == FR_REGS
5849 || rclass == FP_REGS
5850 || rclass == GR_AND_FR_REGS)
5851 return 4;
5852 else
5853 return 10;
5854}
5855
ab177ad5
AS
5856/* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5857 on RCLASS to use when copying X into that class. */
f61134e8 5858
ab177ad5
AS
5859static reg_class_t
5860ia64_preferred_reload_class (rtx x, reg_class_t rclass)
f61134e8 5861{
0a2aaacc 5862 switch (rclass)
f61134e8
RH
5863 {
5864 case FR_REGS:
a71aef0b 5865 case FP_REGS:
f61134e8
RH
5866 /* Don't allow volatile mem reloads into floating point registers.
5867 This is defined to force reload to choose the r/m case instead
5868 of the f/f case when reloading (set (reg fX) (mem/v)). */
5869 if (MEM_P (x) && MEM_VOLATILE_P (x))
5870 return NO_REGS;
5871
5872 /* Force all unrecognized constants into the constant pool. */
5873 if (CONSTANT_P (x))
5874 return NO_REGS;
5875 break;
5876
5877 case AR_M_REGS:
5878 case AR_I_REGS:
5879 if (!OBJECT_P (x))
5880 return NO_REGS;
5881 break;
5882
5883 default:
5884 break;
5885 }
5886
0a2aaacc 5887 return rclass;
f61134e8
RH
5888}
5889
c65ebc55 5890/* This function returns the register class required for a secondary
0a2aaacc 5891 register when copying between one of the registers in RCLASS, and X,
c65ebc55
JW
5892 using MODE. A return value of NO_REGS means that no secondary register
5893 is required. */
5894
5895enum reg_class
0a2aaacc 5896ia64_secondary_reload_class (enum reg_class rclass,
ef4bddc2 5897 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
c65ebc55
JW
5898{
5899 int regno = -1;
5900
5901 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5902 regno = true_regnum (x);
5903
0a2aaacc 5904 switch (rclass)
97e242b0
RH
5905 {
5906 case BR_REGS:
7109d286
RH
5907 case AR_M_REGS:
5908 case AR_I_REGS:
5909 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5910 interaction. We end up with two pseudos with overlapping lifetimes
5911 both of which are equiv to the same constant, and both which need
5912 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5913 changes depending on the path length, which means the qty_first_reg
5914 check in make_regs_eqv can give different answers at different times.
5915 At some point I'll probably need a reload_indi pattern to handle
5916 this.
5917
5918 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5919 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5920 non-general registers for good measure. */
5921 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
5922 return GR_REGS;
5923
5924 /* This is needed if a pseudo used as a call_operand gets spilled to a
5925 stack slot. */
5926 if (GET_CODE (x) == MEM)
5927 return GR_REGS;
5928 break;
5929
5930 case FR_REGS:
a71aef0b 5931 case FP_REGS:
c51e6d85 5932 /* Need to go through general registers to get to other class regs. */
7109d286
RH
5933 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5934 return GR_REGS;
9c808aad 5935
97e242b0
RH
5936 /* This can happen when a paradoxical subreg is an operand to the
5937 muldi3 pattern. */
5938 /* ??? This shouldn't be necessary after instruction scheduling is
5939 enabled, because paradoxical subregs are not accepted by
5940 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5941 stop the paradoxical subreg stupidity in the *_operand functions
5942 in recog.c. */
5943 if (GET_CODE (x) == MEM
5944 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5945 || GET_MODE (x) == QImode))
5946 return GR_REGS;
5947
5948 /* This can happen because of the ior/and/etc patterns that accept FP
5949 registers as operands. If the third operand is a constant, then it
5950 needs to be reloaded into a FP register. */
5951 if (GET_CODE (x) == CONST_INT)
5952 return GR_REGS;
5953
5954 /* This can happen because of register elimination in a muldi3 insn.
5955 E.g. `26107 * (unsigned long)&u'. */
5956 if (GET_CODE (x) == PLUS)
5957 return GR_REGS;
5958 break;
5959
5960 case PR_REGS:
f2f90c63 5961 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
5962 and the function has a nonlocal goto. This is because global
5963 does not allocate call crossing pseudos to hard registers when
e3b5732b 5964 crtl->has_nonlocal_goto is true. This is relatively
97e242b0
RH
5965 common for C++ programs that use exceptions. To reproduce,
5966 return NO_REGS and compile libstdc++. */
5967 if (GET_CODE (x) == MEM)
5968 return GR_REGS;
f2f90c63
RH
5969
5970 /* This can happen when we take a BImode subreg of a DImode value,
5971 and that DImode value winds up in some non-GR register. */
5972 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5973 return GR_REGS;
97e242b0
RH
5974 break;
5975
5976 default:
5977 break;
5978 }
c65ebc55
JW
5979
5980 return NO_REGS;
5981}
5982
215b063c
PB
5983\f
5984/* Implement targetm.unspec_may_trap_p hook. */
5985static int
5986ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5987{
c84a808e
EB
5988 switch (XINT (x, 1))
5989 {
5990 case UNSPEC_LDA:
5991 case UNSPEC_LDS:
5992 case UNSPEC_LDSA:
5993 case UNSPEC_LDCCLR:
5994 case UNSPEC_CHKACLR:
5995 case UNSPEC_CHKS:
5996 /* These unspecs are just wrappers. */
5997 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
215b063c
PB
5998 }
5999
6000 return default_unspec_may_trap_p (x, flags);
6001}
6002
c65ebc55
JW
6003\f
6004/* Parse the -mfixed-range= option string. */
6005
6006static void
9c808aad 6007fix_range (const char *const_str)
c65ebc55
JW
6008{
6009 int i, first, last;
3b572406 6010 char *str, *dash, *comma;
c65ebc55
JW
6011
6012 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6013 REG2 are either register names or register numbers. The effect
6014 of this option is to mark the registers in the range from REG1 to
6015 REG2 as ``fixed'' so they won't be used by the compiler. This is
6016 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
6017
3b572406
RH
6018 i = strlen (const_str);
6019 str = (char *) alloca (i + 1);
6020 memcpy (str, const_str, i + 1);
6021
c65ebc55
JW
6022 while (1)
6023 {
6024 dash = strchr (str, '-');
6025 if (!dash)
6026 {
d4ee4d25 6027 warning (0, "value of -mfixed-range must have form REG1-REG2");
c65ebc55
JW
6028 return;
6029 }
6030 *dash = '\0';
6031
6032 comma = strchr (dash + 1, ',');
6033 if (comma)
6034 *comma = '\0';
6035
6036 first = decode_reg_name (str);
6037 if (first < 0)
6038 {
d4ee4d25 6039 warning (0, "unknown register name: %s", str);
c65ebc55
JW
6040 return;
6041 }
6042
6043 last = decode_reg_name (dash + 1);
6044 if (last < 0)
6045 {
d4ee4d25 6046 warning (0, "unknown register name: %s", dash + 1);
c65ebc55
JW
6047 return;
6048 }
6049
6050 *dash = '-';
6051
6052 if (first > last)
6053 {
d4ee4d25 6054 warning (0, "%s-%s is an empty range", str, dash + 1);
c65ebc55
JW
6055 return;
6056 }
6057
6058 for (i = first; i <= last; ++i)
6059 fixed_regs[i] = call_used_regs[i] = 1;
6060
6061 if (!comma)
6062 break;
6063
6064 *comma = ',';
6065 str = comma + 1;
6066 }
6067}
6068
930572b9 6069/* Implement TARGET_OPTION_OVERRIDE. */
c65ebc55 6070
930572b9
AS
6071static void
6072ia64_option_override (void)
c65ebc55 6073{
e6cc0c98
JM
6074 unsigned int i;
6075 cl_deferred_option *opt;
9771b263
DN
6076 vec<cl_deferred_option> *v
6077 = (vec<cl_deferred_option> *) ia64_deferred_options;
e6cc0c98 6078
9771b263
DN
6079 if (v)
6080 FOR_EACH_VEC_ELT (*v, i, opt)
6081 {
6082 switch (opt->opt_index)
6083 {
6084 case OPT_mfixed_range_:
6085 fix_range (opt->arg);
6086 break;
e6cc0c98 6087
9771b263
DN
6088 default:
6089 gcc_unreachable ();
6090 }
6091 }
e6cc0c98 6092
59da9a7d
JW
6093 if (TARGET_AUTO_PIC)
6094 target_flags |= MASK_CONST_GP;
6095
7e1e7d4c
VM
6096 /* Numerous experiment shows that IRA based loop pressure
6097 calculation works better for RTL loop invariant motion on targets
6098 with enough (>= 32) registers. It is an expensive optimization.
6099 So it is on only for peak performance. */
6100 if (optimize >= 3)
6101 flag_ira_loop_pressure = 1;
6102
6103
fa37ed29
JM
6104 ia64_section_threshold = (global_options_set.x_g_switch_value
6105 ? g_switch_value
6106 : IA64_DEFAULT_GVALUE);
2b7e2984
SE
6107
6108 init_machine_status = ia64_init_machine_status;
6109
c518c102
ML
6110 if (flag_align_functions && !str_align_functions)
6111 str_align_functions = "64";
6112 if (flag_align_loops && !str_align_loops)
6113 str_align_loops = "32";
2b7e2984
SE
6114 if (TARGET_ABI_OPEN_VMS)
6115 flag_no_common = 1;
6116
6117 ia64_override_options_after_change();
6118}
6119
6120/* Implement targetm.override_options_after_change. */
6121
6122static void
6123ia64_override_options_after_change (void)
6124{
388092d5 6125 if (optimize >= 3
d4d24ba4
JM
6126 && !global_options_set.x_flag_selective_scheduling
6127 && !global_options_set.x_flag_selective_scheduling2)
388092d5
AB
6128 {
6129 flag_selective_scheduling2 = 1;
6130 flag_sel_sched_pipelining = 1;
6131 }
6132 if (mflag_sched_control_spec == 2)
6133 {
6134 /* Control speculation is on by default for the selective scheduler,
6135 but not for the Haifa scheduler. */
6136 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6137 }
6138 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6139 {
6140 /* FIXME: remove this when we'd implement breaking autoinsns as
6141 a transformation. */
6142 flag_auto_inc_dec = 0;
6143 }
c65ebc55 6144}
dbdd120f 6145
6fb5fa3c
DB
6146/* Initialize the record of emitted frame related registers. */
6147
6148void ia64_init_expanders (void)
6149{
6150 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6151}
6152
dbdd120f
RH
6153static struct machine_function *
6154ia64_init_machine_status (void)
6155{
766090c2 6156 return ggc_cleared_alloc<machine_function> ();
dbdd120f 6157}
c65ebc55 6158\f
647d790d
DM
6159static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6160static enum attr_type ia64_safe_type (rtx_insn *);
2130b7fb 6161
2130b7fb 6162static enum attr_itanium_class
647d790d 6163ia64_safe_itanium_class (rtx_insn *insn)
2130b7fb
BS
6164{
6165 if (recog_memoized (insn) >= 0)
6166 return get_attr_itanium_class (insn);
b5b8b0ac
AO
6167 else if (DEBUG_INSN_P (insn))
6168 return ITANIUM_CLASS_IGNORE;
2130b7fb
BS
6169 else
6170 return ITANIUM_CLASS_UNKNOWN;
6171}
6172
6173static enum attr_type
647d790d 6174ia64_safe_type (rtx_insn *insn)
2130b7fb
BS
6175{
6176 if (recog_memoized (insn) >= 0)
6177 return get_attr_type (insn);
6178 else
6179 return TYPE_UNKNOWN;
6180}
6181\f
c65ebc55
JW
6182/* The following collection of routines emit instruction group stop bits as
6183 necessary to avoid dependencies. */
6184
6185/* Need to track some additional registers as far as serialization is
6186 concerned so we can properly handle br.call and br.ret. We could
6187 make these registers visible to gcc, but since these registers are
6188 never explicitly used in gcc generated code, it seems wasteful to
6189 do so (plus it would make the call and return patterns needlessly
6190 complex). */
c65ebc55 6191#define REG_RP (BR_REG (0))
c65ebc55 6192#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
6193/* This is used for volatile asms which may require a stop bit immediately
6194 before and after them. */
5527bf14 6195#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
6196#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6197#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 6198
f2f90c63
RH
6199/* For each register, we keep track of how it has been written in the
6200 current instruction group.
6201
6202 If a register is written unconditionally (no qualifying predicate),
6203 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6204
6205 If a register is written if its qualifying predicate P is true, we
6206 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6207 may be written again by the complement of P (P^1) and when this happens,
6208 WRITE_COUNT gets set to 2.
6209
6210 The result of this is that whenever an insn attempts to write a register
e03f5d43 6211 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
6212
6213 If a predicate register is written by a floating-point insn, we set
6214 WRITTEN_BY_FP to true.
6215
6216 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6217 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6218
444a356a
JJ
6219#if GCC_VERSION >= 4000
6220#define RWS_FIELD_TYPE __extension__ unsigned short
6221#else
6222#define RWS_FIELD_TYPE unsigned int
6223#endif
c65ebc55
JW
6224struct reg_write_state
6225{
444a356a
JJ
6226 RWS_FIELD_TYPE write_count : 2;
6227 RWS_FIELD_TYPE first_pred : 10;
6228 RWS_FIELD_TYPE written_by_fp : 1;
6229 RWS_FIELD_TYPE written_by_and : 1;
6230 RWS_FIELD_TYPE written_by_or : 1;
c65ebc55
JW
6231};
6232
6233/* Cumulative info for the current instruction group. */
6234struct reg_write_state rws_sum[NUM_REGS];
e28c2052 6235#if CHECKING_P
444a356a
JJ
6236/* Bitmap whether a register has been written in the current insn. */
6237HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6238 / HOST_BITS_PER_WIDEST_FAST_INT];
6239
6240static inline void
6241rws_insn_set (int regno)
6242{
6243 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6244 SET_HARD_REG_BIT (rws_insn, regno);
6245}
6246
6247static inline int
6248rws_insn_test (int regno)
6249{
6250 return TEST_HARD_REG_BIT (rws_insn, regno);
6251}
6252#else
6253/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6254unsigned char rws_insn[2];
6255
6256static inline void
6257rws_insn_set (int regno)
6258{
6259 if (regno == REG_AR_CFM)
6260 rws_insn[0] = 1;
6261 else if (regno == REG_VOLATILE)
6262 rws_insn[1] = 1;
6263}
6264
6265static inline int
6266rws_insn_test (int regno)
6267{
6268 if (regno == REG_AR_CFM)
6269 return rws_insn[0];
6270 if (regno == REG_VOLATILE)
6271 return rws_insn[1];
6272 return 0;
6273}
6274#endif
c65ebc55 6275
25250265 6276/* Indicates whether this is the first instruction after a stop bit,
e820471b
NS
6277 in which case we don't need another stop bit. Without this,
6278 ia64_variable_issue will die when scheduling an alloc. */
25250265
JW
6279static int first_instruction;
6280
c65ebc55
JW
6281/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6282 RTL for one instruction. */
6283struct reg_flags
6284{
6285 unsigned int is_write : 1; /* Is register being written? */
6286 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6287 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
6288 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6289 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 6290 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
6291};
6292
444a356a 6293static void rws_update (int, struct reg_flags, int);
9c808aad
AJ
6294static int rws_access_regno (int, struct reg_flags, int);
6295static int rws_access_reg (rtx, struct reg_flags, int);
c1bc6ca8
JW
6296static void update_set_flags (rtx, struct reg_flags *);
6297static int set_src_needs_barrier (rtx, struct reg_flags, int);
9c808aad
AJ
6298static int rtx_needs_barrier (rtx, struct reg_flags, int);
6299static void init_insn_group_barriers (void);
647d790d
DM
6300static int group_barrier_needed (rtx_insn *);
6301static int safe_group_barrier_needed (rtx_insn *);
444a356a 6302static int in_safe_group_barrier;
3b572406 6303
c65ebc55
JW
6304/* Update *RWS for REGNO, which is being written by the current instruction,
6305 with predicate PRED, and associated register flags in FLAGS. */
6306
6307static void
444a356a 6308rws_update (int regno, struct reg_flags flags, int pred)
c65ebc55 6309{
3e7c7805 6310 if (pred)
444a356a 6311 rws_sum[regno].write_count++;
3e7c7805 6312 else
444a356a
JJ
6313 rws_sum[regno].write_count = 2;
6314 rws_sum[regno].written_by_fp |= flags.is_fp;
f2f90c63 6315 /* ??? Not tracking and/or across differing predicates. */
444a356a
JJ
6316 rws_sum[regno].written_by_and = flags.is_and;
6317 rws_sum[regno].written_by_or = flags.is_or;
6318 rws_sum[regno].first_pred = pred;
c65ebc55
JW
6319}
6320
6321/* Handle an access to register REGNO of type FLAGS using predicate register
444a356a 6322 PRED. Update rws_sum array. Return 1 if this access creates
c65ebc55
JW
6323 a dependency with an earlier instruction in the same group. */
6324
6325static int
9c808aad 6326rws_access_regno (int regno, struct reg_flags flags, int pred)
c65ebc55
JW
6327{
6328 int need_barrier = 0;
c65ebc55 6329
e820471b 6330 gcc_assert (regno < NUM_REGS);
c65ebc55 6331
f2f90c63
RH
6332 if (! PR_REGNO_P (regno))
6333 flags.is_and = flags.is_or = 0;
6334
c65ebc55
JW
6335 if (flags.is_write)
6336 {
12c2c7aa
JW
6337 int write_count;
6338
444a356a 6339 rws_insn_set (regno);
12c2c7aa 6340 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
6341
6342 switch (write_count)
c65ebc55
JW
6343 {
6344 case 0:
6345 /* The register has not been written yet. */
444a356a
JJ
6346 if (!in_safe_group_barrier)
6347 rws_update (regno, flags, pred);
c65ebc55
JW
6348 break;
6349
6350 case 1:
89774469
SE
6351 /* The register has been written via a predicate. Treat
6352 it like a unconditional write and do not try to check
6353 for complementary pred reg in earlier write. */
f2f90c63 6354 if (flags.is_and && rws_sum[regno].written_by_and)
9c808aad 6355 ;
f2f90c63
RH
6356 else if (flags.is_or && rws_sum[regno].written_by_or)
6357 ;
89774469 6358 else
c65ebc55 6359 need_barrier = 1;
444a356a
JJ
6360 if (!in_safe_group_barrier)
6361 rws_update (regno, flags, pred);
c65ebc55
JW
6362 break;
6363
6364 case 2:
6365 /* The register has been unconditionally written already. We
6366 need a barrier. */
f2f90c63
RH
6367 if (flags.is_and && rws_sum[regno].written_by_and)
6368 ;
6369 else if (flags.is_or && rws_sum[regno].written_by_or)
6370 ;
6371 else
6372 need_barrier = 1;
444a356a
JJ
6373 if (!in_safe_group_barrier)
6374 {
6375 rws_sum[regno].written_by_and = flags.is_and;
6376 rws_sum[regno].written_by_or = flags.is_or;
6377 }
c65ebc55
JW
6378 break;
6379
6380 default:
e820471b 6381 gcc_unreachable ();
c65ebc55
JW
6382 }
6383 }
6384 else
6385 {
6386 if (flags.is_branch)
6387 {
6388 /* Branches have several RAW exceptions that allow to avoid
6389 barriers. */
6390
5527bf14 6391 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
6392 /* RAW dependencies on branch regs are permissible as long
6393 as the writer is a non-branch instruction. Since we
6394 never generate code that uses a branch register written
6395 by a branch instruction, handling this case is
6396 easy. */
5527bf14 6397 return 0;
c65ebc55
JW
6398
6399 if (REGNO_REG_CLASS (regno) == PR_REGS
6400 && ! rws_sum[regno].written_by_fp)
6401 /* The predicates of a branch are available within the
6402 same insn group as long as the predicate was written by
ed168e45 6403 something other than a floating-point instruction. */
c65ebc55
JW
6404 return 0;
6405 }
6406
f2f90c63
RH
6407 if (flags.is_and && rws_sum[regno].written_by_and)
6408 return 0;
6409 if (flags.is_or && rws_sum[regno].written_by_or)
6410 return 0;
6411
c65ebc55
JW
6412 switch (rws_sum[regno].write_count)
6413 {
6414 case 0:
6415 /* The register has not been written yet. */
6416 break;
6417
6418 case 1:
89774469
SE
6419 /* The register has been written via a predicate, assume we
6420 need a barrier (don't check for complementary regs). */
6421 need_barrier = 1;
c65ebc55
JW
6422 break;
6423
6424 case 2:
6425 /* The register has been unconditionally written already. We
6426 need a barrier. */
6427 need_barrier = 1;
6428 break;
6429
6430 default:
e820471b 6431 gcc_unreachable ();
c65ebc55
JW
6432 }
6433 }
6434
6435 return need_barrier;
6436}
6437
97e242b0 6438static int
9c808aad 6439rws_access_reg (rtx reg, struct reg_flags flags, int pred)
97e242b0
RH
6440{
6441 int regno = REGNO (reg);
462a99aa 6442 int n = REG_NREGS (reg);
97e242b0
RH
6443
6444 if (n == 1)
6445 return rws_access_regno (regno, flags, pred);
6446 else
6447 {
6448 int need_barrier = 0;
6449 while (--n >= 0)
6450 need_barrier |= rws_access_regno (regno + n, flags, pred);
6451 return need_barrier;
6452 }
6453}
6454
112333d3
BS
6455/* Examine X, which is a SET rtx, and update the flags, the predicate, and
6456 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6457
6458static void
c1bc6ca8 6459update_set_flags (rtx x, struct reg_flags *pflags)
112333d3
BS
6460{
6461 rtx src = SET_SRC (x);
6462
112333d3
BS
6463 switch (GET_CODE (src))
6464 {
6465 case CALL:
6466 return;
6467
6468 case IF_THEN_ELSE:
048d0d36 6469 /* There are four cases here:
c8d3810f
RH
6470 (1) The destination is (pc), in which case this is a branch,
6471 nothing here applies.
6472 (2) The destination is ar.lc, in which case this is a
6473 doloop_end_internal,
6474 (3) The destination is an fp register, in which case this is
6475 an fselect instruction.
048d0d36
MK
6476 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6477 this is a check load.
c8d3810f
RH
6478 In all cases, nothing we do in this function applies. */
6479 return;
112333d3
BS
6480
6481 default:
ec8e098d 6482 if (COMPARISON_P (src)
c8d3810f 6483 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
112333d3
BS
6484 /* Set pflags->is_fp to 1 so that we know we're dealing
6485 with a floating point comparison when processing the
6486 destination of the SET. */
6487 pflags->is_fp = 1;
6488
6489 /* Discover if this is a parallel comparison. We only handle
6490 and.orcm and or.andcm at present, since we must retain a
6491 strict inverse on the predicate pair. */
6492 else if (GET_CODE (src) == AND)
6493 pflags->is_and = 1;
6494 else if (GET_CODE (src) == IOR)
6495 pflags->is_or = 1;
6496
6497 break;
6498 }
6499}
6500
6501/* Subroutine of rtx_needs_barrier; this function determines whether the
6502 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6503 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6504 for this insn. */
9c808aad 6505
112333d3 6506static int
c1bc6ca8 6507set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
112333d3
BS
6508{
6509 int need_barrier = 0;
6510 rtx dst;
6511 rtx src = SET_SRC (x);
6512
6513 if (GET_CODE (src) == CALL)
6514 /* We don't need to worry about the result registers that
6515 get written by subroutine call. */
6516 return rtx_needs_barrier (src, flags, pred);
6517 else if (SET_DEST (x) == pc_rtx)
6518 {
6519 /* X is a conditional branch. */
6520 /* ??? This seems redundant, as the caller sets this bit for
6521 all JUMP_INSNs. */
048d0d36
MK
6522 if (!ia64_spec_check_src_p (src))
6523 flags.is_branch = 1;
112333d3
BS
6524 return rtx_needs_barrier (src, flags, pred);
6525 }
6526
048d0d36
MK
6527 if (ia64_spec_check_src_p (src))
6528 /* Avoid checking one register twice (in condition
6529 and in 'then' section) for ldc pattern. */
6530 {
6531 gcc_assert (REG_P (XEXP (src, 2)));
6532 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6533
6534 /* We process MEM below. */
6535 src = XEXP (src, 1);
6536 }
6537
6538 need_barrier |= rtx_needs_barrier (src, flags, pred);
112333d3 6539
112333d3
BS
6540 dst = SET_DEST (x);
6541 if (GET_CODE (dst) == ZERO_EXTRACT)
6542 {
6543 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6544 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
112333d3
BS
6545 }
6546 return need_barrier;
6547}
6548
b38ba463
ZW
6549/* Handle an access to rtx X of type FLAGS using predicate register
6550 PRED. Return 1 if this access creates a dependency with an earlier
6551 instruction in the same group. */
c65ebc55
JW
6552
6553static int
9c808aad 6554rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
c65ebc55
JW
6555{
6556 int i, j;
6557 int is_complemented = 0;
6558 int need_barrier = 0;
6559 const char *format_ptr;
6560 struct reg_flags new_flags;
c1bc6ca8 6561 rtx cond;
c65ebc55
JW
6562
6563 if (! x)
6564 return 0;
6565
6566 new_flags = flags;
6567
6568 switch (GET_CODE (x))
6569 {
9c808aad 6570 case SET:
c1bc6ca8
JW
6571 update_set_flags (x, &new_flags);
6572 need_barrier = set_src_needs_barrier (x, new_flags, pred);
112333d3 6573 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 6574 {
112333d3
BS
6575 new_flags.is_write = 1;
6576 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 6577 }
c65ebc55
JW
6578 break;
6579
6580 case CALL:
6581 new_flags.is_write = 0;
97e242b0 6582 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
6583
6584 /* Avoid multiple register writes, in case this is a pattern with
e820471b 6585 multiple CALL rtx. This avoids a failure in rws_access_reg. */
444a356a 6586 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
c65ebc55
JW
6587 {
6588 new_flags.is_write = 1;
97e242b0
RH
6589 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6590 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6591 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6592 }
6593 break;
6594
e5bde68a
RH
6595 case COND_EXEC:
6596 /* X is a predicated instruction. */
6597
6598 cond = COND_EXEC_TEST (x);
e820471b 6599 gcc_assert (!pred);
e5bde68a
RH
6600 need_barrier = rtx_needs_barrier (cond, flags, 0);
6601
6602 if (GET_CODE (cond) == EQ)
6603 is_complemented = 1;
6604 cond = XEXP (cond, 0);
e820471b 6605 gcc_assert (GET_CODE (cond) == REG
c1bc6ca8 6606 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
e5bde68a
RH
6607 pred = REGNO (cond);
6608 if (is_complemented)
6609 ++pred;
6610
6611 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6612 return need_barrier;
6613
c65ebc55 6614 case CLOBBER:
c65ebc55 6615 case USE:
c65ebc55
JW
6616 /* Clobber & use are for earlier compiler-phases only. */
6617 break;
6618
6619 case ASM_OPERANDS:
6620 case ASM_INPUT:
6621 /* We always emit stop bits for traditional asms. We emit stop bits
6622 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6623 if (GET_CODE (x) != ASM_OPERANDS
6624 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6625 {
6626 /* Avoid writing the register multiple times if we have multiple
e820471b 6627 asm outputs. This avoids a failure in rws_access_reg. */
444a356a 6628 if (! rws_insn_test (REG_VOLATILE))
c65ebc55
JW
6629 {
6630 new_flags.is_write = 1;
97e242b0 6631 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
6632 }
6633 return 1;
6634 }
6635
6636 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
1e5f1716 6637 We cannot just fall through here since then we would be confused
c65ebc55
JW
6638 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6639 traditional asms unlike their normal usage. */
6640
6641 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6642 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6643 need_barrier = 1;
6644 break;
6645
6646 case PARALLEL:
6647 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
6648 {
6649 rtx pat = XVECEXP (x, 0, i);
051d8245 6650 switch (GET_CODE (pat))
112333d3 6651 {
051d8245 6652 case SET:
c1bc6ca8
JW
6653 update_set_flags (pat, &new_flags);
6654 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
051d8245
RH
6655 break;
6656
6657 case USE:
6658 case CALL:
6659 case ASM_OPERANDS:
93671519 6660 case ASM_INPUT:
051d8245
RH
6661 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6662 break;
6663
6664 case CLOBBER:
628162ea
JJ
6665 if (REG_P (XEXP (pat, 0))
6666 && extract_asm_operands (x) != NULL_RTX
6667 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6668 {
6669 new_flags.is_write = 1;
6670 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6671 new_flags, pred);
6672 new_flags = flags;
6673 }
6674 break;
6675
051d8245
RH
6676 case RETURN:
6677 break;
6678
6679 default:
6680 gcc_unreachable ();
112333d3 6681 }
112333d3
BS
6682 }
6683 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6684 {
6685 rtx pat = XVECEXP (x, 0, i);
6686 if (GET_CODE (pat) == SET)
6687 {
6688 if (GET_CODE (SET_SRC (pat)) != CALL)
6689 {
6690 new_flags.is_write = 1;
6691 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6692 pred);
6693 }
6694 }
339cb12e 6695 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
6696 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6697 }
c65ebc55
JW
6698 break;
6699
6700 case SUBREG:
077bc924
JM
6701 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6702 break;
c65ebc55 6703 case REG:
870f9ec0
RH
6704 if (REGNO (x) == AR_UNAT_REGNUM)
6705 {
6706 for (i = 0; i < 64; ++i)
6707 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6708 }
6709 else
6710 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
6711 break;
6712
6713 case MEM:
6714 /* Find the regs used in memory address computation. */
6715 new_flags.is_write = 0;
6716 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6717 break;
6718
051d8245 6719 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
c65ebc55
JW
6720 case SYMBOL_REF: case LABEL_REF: case CONST:
6721 break;
6722
6723 /* Operators with side-effects. */
6724 case POST_INC: case POST_DEC:
e820471b 6725 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
c65ebc55
JW
6726
6727 new_flags.is_write = 0;
97e242b0 6728 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 6729 new_flags.is_write = 1;
97e242b0 6730 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6731 break;
6732
6733 case POST_MODIFY:
e820471b 6734 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
4b983fdc
RH
6735
6736 new_flags.is_write = 0;
97e242b0 6737 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6738 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6739 new_flags.is_write = 1;
97e242b0 6740 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
6741 break;
6742
6743 /* Handle common unary and binary ops for efficiency. */
6744 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6745 case MOD: case UDIV: case UMOD: case AND: case IOR:
6746 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6747 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6748 case NE: case EQ: case GE: case GT: case LE:
6749 case LT: case GEU: case GTU: case LEU: case LTU:
6750 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6751 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6752 break;
6753
6754 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6755 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6756 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 6757 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
6758 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6759 break;
6760
051d8245
RH
6761 case VEC_SELECT:
6762 /* VEC_SELECT's second argument is a PARALLEL with integers that
6763 describe the elements selected. On ia64, those integers are
6764 always constants. Avoid walking the PARALLEL so that we don't
e820471b 6765 get confused with "normal" parallels and then die. */
051d8245
RH
6766 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6767 break;
6768
c65ebc55
JW
6769 case UNSPEC:
6770 switch (XINT (x, 1))
6771 {
7b6e506e
RH
6772 case UNSPEC_LTOFF_DTPMOD:
6773 case UNSPEC_LTOFF_DTPREL:
6774 case UNSPEC_DTPREL:
6775 case UNSPEC_LTOFF_TPREL:
6776 case UNSPEC_TPREL:
6777 case UNSPEC_PRED_REL_MUTEX:
6778 case UNSPEC_PIC_CALL:
6779 case UNSPEC_MF:
6780 case UNSPEC_FETCHADD_ACQ:
28875d67 6781 case UNSPEC_FETCHADD_REL:
7b6e506e
RH
6782 case UNSPEC_BSP_VALUE:
6783 case UNSPEC_FLUSHRS:
6784 case UNSPEC_BUNDLE_SELECTOR:
6785 break;
6786
086c0f96
RH
6787 case UNSPEC_GR_SPILL:
6788 case UNSPEC_GR_RESTORE:
870f9ec0
RH
6789 {
6790 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6791 HOST_WIDE_INT bit = (offset >> 3) & 63;
6792
6793 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
83338d15 6794 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
870f9ec0
RH
6795 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6796 new_flags, pred);
6797 break;
6798 }
9c808aad 6799
086c0f96
RH
6800 case UNSPEC_FR_SPILL:
6801 case UNSPEC_FR_RESTORE:
c407570a 6802 case UNSPEC_GETF_EXP:
b38ba463 6803 case UNSPEC_SETF_EXP:
086c0f96 6804 case UNSPEC_ADDP4:
b38ba463 6805 case UNSPEC_FR_SQRT_RECIP_APPROX:
07acc7b3 6806 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
048d0d36
MK
6807 case UNSPEC_LDA:
6808 case UNSPEC_LDS:
388092d5 6809 case UNSPEC_LDS_A:
048d0d36
MK
6810 case UNSPEC_LDSA:
6811 case UNSPEC_CHKACLR:
6812 case UNSPEC_CHKS:
6dd12198
SE
6813 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6814 break;
6815
086c0f96 6816 case UNSPEC_FR_RECIP_APPROX:
f526a3c8 6817 case UNSPEC_SHRP:
046625fa 6818 case UNSPEC_COPYSIGN:
1def9c3f 6819 case UNSPEC_FR_RECIP_APPROX_RES:
655f2eb9
RH
6820 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6821 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6822 break;
6823
086c0f96 6824 case UNSPEC_CMPXCHG_ACQ:
28875d67 6825 case UNSPEC_CMPXCHG_REL:
0551c32d
RH
6826 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6827 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6828 break;
6829
c65ebc55 6830 default:
e820471b 6831 gcc_unreachable ();
c65ebc55
JW
6832 }
6833 break;
6834
6835 case UNSPEC_VOLATILE:
6836 switch (XINT (x, 1))
6837 {
086c0f96 6838 case UNSPECV_ALLOC:
25250265
JW
6839 /* Alloc must always be the first instruction of a group.
6840 We force this by always returning true. */
6841 /* ??? We might get better scheduling if we explicitly check for
6842 input/local/output register dependencies, and modify the
6843 scheduler so that alloc is always reordered to the start of
6844 the current group. We could then eliminate all of the
6845 first_instruction code. */
6846 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6847
6848 new_flags.is_write = 1;
25250265
JW
6849 rws_access_regno (REG_AR_CFM, new_flags, pred);
6850 return 1;
c65ebc55 6851
086c0f96 6852 case UNSPECV_SET_BSP:
7b84aac0 6853 case UNSPECV_PROBE_STACK_RANGE:
3b572406
RH
6854 need_barrier = 1;
6855 break;
6856
086c0f96
RH
6857 case UNSPECV_BLOCKAGE:
6858 case UNSPECV_INSN_GROUP_BARRIER:
6859 case UNSPECV_BREAK:
6860 case UNSPECV_PSAC_ALL:
6861 case UNSPECV_PSAC_NORMAL:
3b572406 6862 return 0;
0c96007e 6863
7b84aac0
EB
6864 case UNSPECV_PROBE_STACK_ADDRESS:
6865 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6866 break;
6867
c65ebc55 6868 default:
e820471b 6869 gcc_unreachable ();
c65ebc55
JW
6870 }
6871 break;
6872
6873 case RETURN:
6874 new_flags.is_write = 0;
97e242b0
RH
6875 need_barrier = rws_access_regno (REG_RP, flags, pred);
6876 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6877
6878 new_flags.is_write = 1;
97e242b0
RH
6879 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6880 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6881 break;
6882
6883 default:
6884 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6885 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6886 switch (format_ptr[i])
6887 {
6888 case '0': /* unused field */
6889 case 'i': /* integer */
6890 case 'n': /* note */
6891 case 'w': /* wide integer */
6892 case 's': /* pointer to string */
6893 case 'S': /* optional pointer to string */
6894 break;
6895
6896 case 'e':
6897 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6898 need_barrier = 1;
6899 break;
6900
6901 case 'E':
6902 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6903 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6904 need_barrier = 1;
6905 break;
6906
6907 default:
e820471b 6908 gcc_unreachable ();
c65ebc55 6909 }
2ed4af6f 6910 break;
c65ebc55
JW
6911 }
6912 return need_barrier;
6913}
6914
c1bc6ca8 6915/* Clear out the state for group_barrier_needed at the start of a
2130b7fb
BS
6916 sequence of insns. */
6917
6918static void
9c808aad 6919init_insn_group_barriers (void)
2130b7fb
BS
6920{
6921 memset (rws_sum, 0, sizeof (rws_sum));
25250265 6922 first_instruction = 1;
2130b7fb
BS
6923}
6924
c1bc6ca8
JW
6925/* Given the current state, determine whether a group barrier (a stop bit) is
6926 necessary before INSN. Return nonzero if so. This modifies the state to
6927 include the effects of INSN as a side-effect. */
2130b7fb
BS
6928
6929static int
647d790d 6930group_barrier_needed (rtx_insn *insn)
2130b7fb
BS
6931{
6932 rtx pat;
6933 int need_barrier = 0;
6934 struct reg_flags flags;
6935
6936 memset (&flags, 0, sizeof (flags));
6937 switch (GET_CODE (insn))
6938 {
6939 case NOTE:
b5b8b0ac 6940 case DEBUG_INSN:
2130b7fb
BS
6941 break;
6942
6943 case BARRIER:
6944 /* A barrier doesn't imply an instruction group boundary. */
6945 break;
6946
6947 case CODE_LABEL:
6948 memset (rws_insn, 0, sizeof (rws_insn));
6949 return 1;
6950
6951 case CALL_INSN:
6952 flags.is_branch = 1;
6953 flags.is_sibcall = SIBLING_CALL_P (insn);
6954 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
6955
6956 /* Don't bundle a call following another call. */
b64925dc 6957 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
f12f25a7
RH
6958 {
6959 need_barrier = 1;
6960 break;
6961 }
6962
2130b7fb
BS
6963 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6964 break;
6965
6966 case JUMP_INSN:
048d0d36
MK
6967 if (!ia64_spec_check_p (insn))
6968 flags.is_branch = 1;
f12f25a7
RH
6969
6970 /* Don't bundle a jump following a call. */
b64925dc 6971 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
f12f25a7
RH
6972 {
6973 need_barrier = 1;
6974 break;
6975 }
5efb1046 6976 /* FALLTHRU */
2130b7fb
BS
6977
6978 case INSN:
6979 if (GET_CODE (PATTERN (insn)) == USE
6980 || GET_CODE (PATTERN (insn)) == CLOBBER)
6981 /* Don't care about USE and CLOBBER "insns"---those are used to
6982 indicate to the optimizer that it shouldn't get rid of
6983 certain operations. */
6984 break;
6985
6986 pat = PATTERN (insn);
6987
6988 /* Ug. Hack hacks hacked elsewhere. */
6989 switch (recog_memoized (insn))
6990 {
6991 /* We play dependency tricks with the epilogue in order
6992 to get proper schedules. Undo this for dv analysis. */
6993 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 6994 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
6995 pat = XVECEXP (pat, 0, 0);
6996 break;
6997
6998 /* The pattern we use for br.cloop confuses the code above.
6999 The second element of the vector is representative. */
7000 case CODE_FOR_doloop_end_internal:
7001 pat = XVECEXP (pat, 0, 1);
7002 break;
7003
7004 /* Doesn't generate code. */
7005 case CODE_FOR_pred_rel_mutex:
d0e82870 7006 case CODE_FOR_prologue_use:
2130b7fb
BS
7007 return 0;
7008
7009 default:
7010 break;
7011 }
7012
7013 memset (rws_insn, 0, sizeof (rws_insn));
7014 need_barrier = rtx_needs_barrier (pat, flags, 0);
7015
7016 /* Check to see if the previous instruction was a volatile
7017 asm. */
7018 if (! need_barrier)
7019 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
388092d5 7020
2130b7fb
BS
7021 break;
7022
7023 default:
e820471b 7024 gcc_unreachable ();
2130b7fb 7025 }
25250265 7026
7b84aac0 7027 if (first_instruction && important_for_bundling_p (insn))
25250265
JW
7028 {
7029 need_barrier = 0;
7030 first_instruction = 0;
7031 }
7032
2130b7fb
BS
7033 return need_barrier;
7034}
7035
c1bc6ca8 7036/* Like group_barrier_needed, but do not clobber the current state. */
2130b7fb
BS
7037
7038static int
647d790d 7039safe_group_barrier_needed (rtx_insn *insn)
2130b7fb 7040{
25250265 7041 int saved_first_instruction;
2130b7fb 7042 int t;
25250265 7043
25250265 7044 saved_first_instruction = first_instruction;
444a356a 7045 in_safe_group_barrier = 1;
25250265 7046
c1bc6ca8 7047 t = group_barrier_needed (insn);
25250265 7048
25250265 7049 first_instruction = saved_first_instruction;
444a356a 7050 in_safe_group_barrier = 0;
25250265 7051
2130b7fb
BS
7052 return t;
7053}
7054
18dbd950
RS
7055/* Scan the current function and insert stop bits as necessary to
7056 eliminate dependencies. This function assumes that a final
7057 instruction scheduling pass has been run which has already
7058 inserted most of the necessary stop bits. This function only
7059 inserts new ones at basic block boundaries, since these are
7060 invisible to the scheduler. */
2130b7fb
BS
7061
7062static void
9c808aad 7063emit_insn_group_barriers (FILE *dump)
2130b7fb 7064{
dd3d2b35
DM
7065 rtx_insn *insn;
7066 rtx_insn *last_label = 0;
2130b7fb
BS
7067 int insns_since_last_label = 0;
7068
7069 init_insn_group_barriers ();
7070
18dbd950 7071 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb 7072 {
b64925dc 7073 if (LABEL_P (insn))
2130b7fb
BS
7074 {
7075 if (insns_since_last_label)
7076 last_label = insn;
7077 insns_since_last_label = 0;
7078 }
b64925dc 7079 else if (NOTE_P (insn)
a38e7aa5 7080 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
2130b7fb
BS
7081 {
7082 if (insns_since_last_label)
7083 last_label = insn;
7084 insns_since_last_label = 0;
7085 }
b64925dc 7086 else if (NONJUMP_INSN_P (insn)
2130b7fb 7087 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 7088 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
7089 {
7090 init_insn_group_barriers ();
7091 last_label = 0;
7092 }
b5b8b0ac 7093 else if (NONDEBUG_INSN_P (insn))
2130b7fb
BS
7094 {
7095 insns_since_last_label = 1;
7096
c1bc6ca8 7097 if (group_barrier_needed (insn))
2130b7fb
BS
7098 {
7099 if (last_label)
7100 {
7101 if (dump)
7102 fprintf (dump, "Emitting stop before label %d\n",
7103 INSN_UID (last_label));
7104 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7105 insn = last_label;
112333d3
BS
7106
7107 init_insn_group_barriers ();
7108 last_label = 0;
2130b7fb 7109 }
2130b7fb
BS
7110 }
7111 }
7112 }
7113}
f4d578da
BS
7114
7115/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7116 This function has to emit all necessary group barriers. */
7117
7118static void
9c808aad 7119emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
f4d578da 7120{
dd3d2b35 7121 rtx_insn *insn;
f4d578da
BS
7122
7123 init_insn_group_barriers ();
7124
18dbd950 7125 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 7126 {
b64925dc 7127 if (BARRIER_P (insn))
bd7b9a0f 7128 {
dd3d2b35 7129 rtx_insn *last = prev_active_insn (insn);
bd7b9a0f
RH
7130
7131 if (! last)
7132 continue;
34f0d87a 7133 if (JUMP_TABLE_DATA_P (last))
bd7b9a0f
RH
7134 last = prev_active_insn (last);
7135 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7136 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7137
7138 init_insn_group_barriers ();
7139 }
b5b8b0ac 7140 else if (NONDEBUG_INSN_P (insn))
f4d578da 7141 {
bd7b9a0f
RH
7142 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7143 init_insn_group_barriers ();
c1bc6ca8 7144 else if (group_barrier_needed (insn))
f4d578da
BS
7145 {
7146 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7147 init_insn_group_barriers ();
c1bc6ca8 7148 group_barrier_needed (insn);
f4d578da
BS
7149 }
7150 }
7151 }
7152}
30028c85 7153
2130b7fb 7154\f
2130b7fb 7155
30028c85 7156/* Instruction scheduling support. */
2130b7fb
BS
7157
7158#define NR_BUNDLES 10
7159
30028c85 7160/* A list of names of all available bundles. */
2130b7fb 7161
30028c85 7162static const char *bundle_name [NR_BUNDLES] =
2130b7fb 7163{
30028c85
VM
7164 ".mii",
7165 ".mmi",
7166 ".mfi",
7167 ".mmf",
2130b7fb 7168#if NR_BUNDLES == 10
30028c85
VM
7169 ".bbb",
7170 ".mbb",
2130b7fb 7171#endif
30028c85
VM
7172 ".mib",
7173 ".mmb",
7174 ".mfb",
7175 ".mlx"
2130b7fb
BS
7176};
7177
30028c85 7178/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 7179
30028c85 7180int ia64_final_schedule = 0;
2130b7fb 7181
35fd3193 7182/* Codes of the corresponding queried units: */
2130b7fb 7183
30028c85
VM
7184static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7185static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 7186
30028c85
VM
7187static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7188static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 7189
30028c85
VM
7190static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7191
7192/* The following variable value is an insn group barrier. */
7193
dd3d2b35 7194static rtx_insn *dfa_stop_insn;
30028c85
VM
7195
7196/* The following variable value is the last issued insn. */
7197
b32d5189 7198static rtx_insn *last_scheduled_insn;
30028c85 7199
30028c85
VM
7200/* The following variable value is pointer to a DFA state used as
7201 temporary variable. */
7202
7203static state_t temp_dfa_state = NULL;
7204
7205/* The following variable value is DFA state after issuing the last
7206 insn. */
7207
7208static state_t prev_cycle_state = NULL;
7209
7210/* The following array element values are TRUE if the corresponding
9e4f94de 7211 insn requires to add stop bits before it. */
30028c85 7212
048d0d36
MK
7213static char *stops_p = NULL;
7214
30028c85
VM
7215/* The following variable is used to set up the mentioned above array. */
7216
7217static int stop_before_p = 0;
7218
7219/* The following variable value is length of the arrays `clocks' and
7220 `add_cycles'. */
7221
7222static int clocks_length;
7223
048d0d36
MK
7224/* The following variable value is number of data speculations in progress. */
7225static int pending_data_specs = 0;
7226
388092d5
AB
7227/* Number of memory references on current and three future processor cycles. */
7228static char mem_ops_in_group[4];
7229
7230/* Number of current processor cycle (from scheduler's point of view). */
7231static int current_cycle;
7232
647d790d 7233static rtx ia64_single_set (rtx_insn *);
017fdefe 7234static void ia64_emit_insn_before (rtx, rtx_insn *);
2130b7fb
BS
7235
7236/* Map a bundle number to its pseudo-op. */
7237
7238const char *
9c808aad 7239get_bundle_name (int b)
2130b7fb 7240{
30028c85 7241 return bundle_name[b];
2130b7fb
BS
7242}
7243
2130b7fb
BS
7244
7245/* Return the maximum number of instructions a cpu can issue. */
7246
c237e94a 7247static int
9c808aad 7248ia64_issue_rate (void)
2130b7fb
BS
7249{
7250 return 6;
7251}
7252
7253/* Helper function - like single_set, but look inside COND_EXEC. */
7254
7255static rtx
647d790d 7256ia64_single_set (rtx_insn *insn)
2130b7fb 7257{
30fa7e33 7258 rtx x = PATTERN (insn), ret;
2130b7fb
BS
7259 if (GET_CODE (x) == COND_EXEC)
7260 x = COND_EXEC_CODE (x);
7261 if (GET_CODE (x) == SET)
7262 return x;
bdbe5b8d
RH
7263
7264 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7265 Although they are not classical single set, the second set is there just
7266 to protect it from moving past FP-relative stack accesses. */
7267 switch (recog_memoized (insn))
30fa7e33 7268 {
bdbe5b8d 7269 case CODE_FOR_prologue_allocate_stack:
9eb8c09f 7270 case CODE_FOR_prologue_allocate_stack_pr:
bdbe5b8d 7271 case CODE_FOR_epilogue_deallocate_stack:
9eb8c09f 7272 case CODE_FOR_epilogue_deallocate_stack_pr:
bdbe5b8d
RH
7273 ret = XVECEXP (x, 0, 0);
7274 break;
7275
7276 default:
7277 ret = single_set_2 (insn, x);
7278 break;
30fa7e33 7279 }
bdbe5b8d 7280
30fa7e33 7281 return ret;
2130b7fb
BS
7282}
7283
388092d5
AB
7284/* Adjust the cost of a scheduling dependency.
7285 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7286 COST is the current cost, DW is dependency weakness. */
c237e94a 7287static int
b505225b
TS
7288ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7289 int cost, dw_t dw)
2130b7fb 7290{
388092d5 7291 enum reg_note dep_type = (enum reg_note) dep_type1;
2130b7fb
BS
7292 enum attr_itanium_class dep_class;
7293 enum attr_itanium_class insn_class;
2130b7fb 7294
2130b7fb 7295 insn_class = ia64_safe_itanium_class (insn);
30028c85 7296 dep_class = ia64_safe_itanium_class (dep_insn);
388092d5
AB
7297
7298 /* Treat true memory dependencies separately. Ignore apparent true
7299 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7300 if (dep_type == REG_DEP_TRUE
7301 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7302 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7303 return 0;
7304
7305 if (dw == MIN_DEP_WEAK)
7306 /* Store and load are likely to alias, use higher cost to avoid stall. */
7307 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7308 else if (dw > MIN_DEP_WEAK)
7309 {
7310 /* Store and load are less likely to alias. */
7311 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7312 /* Assume there will be no cache conflict for floating-point data.
7313 For integer data, L1 conflict penalty is huge (17 cycles), so we
7314 never assume it will not cause a conflict. */
7315 return 0;
7316 else
7317 return cost;
7318 }
7319
7320 if (dep_type != REG_DEP_OUTPUT)
7321 return cost;
7322
30028c85
VM
7323 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7324 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
7325 return 0;
7326
2130b7fb
BS
7327 return cost;
7328}
7329
14d118d6
DM
7330/* Like emit_insn_before, but skip cycle_display notes.
7331 ??? When cycle display notes are implemented, update this. */
7332
7333static void
d8485bdb 7334ia64_emit_insn_before (rtx insn, rtx_insn *before)
14d118d6
DM
7335{
7336 emit_insn_before (insn, before);
7337}
7338
30028c85
VM
7339/* The following function marks insns who produce addresses for load
7340 and store insns. Such insns will be placed into M slots because it
7341 decrease latency time for Itanium1 (see function
7342 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
7343
7344static void
ce1ce33a 7345ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
2130b7fb 7346{
ce1ce33a 7347 rtx_insn *insn, *next, *next_tail;
9c808aad 7348
f12b785d
RH
7349 /* Before reload, which_alternative is not set, which means that
7350 ia64_safe_itanium_class will produce wrong results for (at least)
7351 move instructions. */
7352 if (!reload_completed)
7353 return;
7354
30028c85
VM
7355 next_tail = NEXT_INSN (tail);
7356 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7357 if (INSN_P (insn))
7358 insn->call = 0;
7359 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7360 if (INSN_P (insn)
7361 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7362 {
e2f6ff94
MK
7363 sd_iterator_def sd_it;
7364 dep_t dep;
7365 bool has_mem_op_consumer_p = false;
b198261f 7366
e2f6ff94 7367 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
30028c85 7368 {
a71aef0b
JB
7369 enum attr_itanium_class c;
7370
e2f6ff94 7371 if (DEP_TYPE (dep) != REG_DEP_TRUE)
f12b785d 7372 continue;
b198261f 7373
e2f6ff94 7374 next = DEP_CON (dep);
a71aef0b
JB
7375 c = ia64_safe_itanium_class (next);
7376 if ((c == ITANIUM_CLASS_ST
7377 || c == ITANIUM_CLASS_STF)
30028c85 7378 && ia64_st_address_bypass_p (insn, next))
e2f6ff94
MK
7379 {
7380 has_mem_op_consumer_p = true;
7381 break;
7382 }
a71aef0b
JB
7383 else if ((c == ITANIUM_CLASS_LD
7384 || c == ITANIUM_CLASS_FLD
7385 || c == ITANIUM_CLASS_FLDP)
30028c85 7386 && ia64_ld_address_bypass_p (insn, next))
e2f6ff94
MK
7387 {
7388 has_mem_op_consumer_p = true;
7389 break;
7390 }
30028c85 7391 }
e2f6ff94
MK
7392
7393 insn->call = has_mem_op_consumer_p;
30028c85
VM
7394 }
7395}
2130b7fb 7396
30028c85 7397/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 7398
30028c85 7399static void
9c808aad
AJ
7400ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7401 int sched_verbose ATTRIBUTE_UNUSED,
7402 int max_ready ATTRIBUTE_UNUSED)
30028c85 7403{
e28c2052
MM
7404 if (flag_checking && !sel_sched_p () && reload_completed)
7405 {
7406 for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7407 insn != current_sched_info->next_tail;
7408 insn = NEXT_INSN (insn))
7409 gcc_assert (!SCHED_GROUP_P (insn));
7410 }
b32d5189 7411 last_scheduled_insn = NULL;
30028c85 7412 init_insn_group_barriers ();
388092d5
AB
7413
7414 current_cycle = 0;
7415 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
2130b7fb
BS
7416}
7417
048d0d36
MK
7418/* We're beginning a scheduling pass. Check assertion. */
7419
7420static void
7421ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7422 int sched_verbose ATTRIBUTE_UNUSED,
7423 int max_ready ATTRIBUTE_UNUSED)
7424{
388092d5 7425 gcc_assert (pending_data_specs == 0);
048d0d36
MK
7426}
7427
7428/* Scheduling pass is now finished. Free/reset static variable. */
7429static void
7430ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7431 int sched_verbose ATTRIBUTE_UNUSED)
7432{
388092d5
AB
7433 gcc_assert (pending_data_specs == 0);
7434}
7435
7436/* Return TRUE if INSN is a load (either normal or speculative, but not a
7437 speculation check), FALSE otherwise. */
7438static bool
647d790d 7439is_load_p (rtx_insn *insn)
388092d5
AB
7440{
7441 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7442
7443 return
7444 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7445 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7446}
7447
7448/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7449 (taking account for 3-cycle cache reference postponing for stores: Intel
7450 Itanium 2 Reference Manual for Software Development and Optimization,
7451 6.7.3.1). */
7452static void
647d790d 7453record_memory_reference (rtx_insn *insn)
388092d5
AB
7454{
7455 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7456
7457 switch (insn_class) {
7458 case ITANIUM_CLASS_FLD:
7459 case ITANIUM_CLASS_LD:
7460 mem_ops_in_group[current_cycle % 4]++;
7461 break;
7462 case ITANIUM_CLASS_STF:
7463 case ITANIUM_CLASS_ST:
7464 mem_ops_in_group[(current_cycle + 3) % 4]++;
7465 break;
7466 default:;
7467 }
048d0d36
MK
7468}
7469
30028c85
VM
7470/* We are about to being issuing insns for this clock cycle.
7471 Override the default sort algorithm to better slot instructions. */
2130b7fb 7472
30028c85 7473static int
ce1ce33a 7474ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
388092d5 7475 int *pn_ready, int clock_var,
9c808aad 7476 int reorder_type)
2130b7fb 7477{
30028c85
VM
7478 int n_asms;
7479 int n_ready = *pn_ready;
ce1ce33a
DM
7480 rtx_insn **e_ready = ready + n_ready;
7481 rtx_insn **insnp;
2130b7fb 7482
30028c85
VM
7483 if (sched_verbose)
7484 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 7485
30028c85 7486 if (reorder_type == 0)
2130b7fb 7487 {
30028c85
VM
7488 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7489 n_asms = 0;
7490 for (insnp = ready; insnp < e_ready; insnp++)
7491 if (insnp < e_ready)
7492 {
ce1ce33a 7493 rtx_insn *insn = *insnp;
30028c85
VM
7494 enum attr_type t = ia64_safe_type (insn);
7495 if (t == TYPE_UNKNOWN)
7496 {
7497 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7498 || asm_noperands (PATTERN (insn)) >= 0)
7499 {
ce1ce33a 7500 rtx_insn *lowest = ready[n_asms];
30028c85
VM
7501 ready[n_asms] = insn;
7502 *insnp = lowest;
7503 n_asms++;
7504 }
7505 else
7506 {
ce1ce33a 7507 rtx_insn *highest = ready[n_ready - 1];
30028c85
VM
7508 ready[n_ready - 1] = insn;
7509 *insnp = highest;
7510 return 1;
7511 }
7512 }
7513 }
98d2b17e 7514
30028c85 7515 if (n_asms < n_ready)
98d2b17e 7516 {
30028c85
VM
7517 /* Some normal insns to process. Skip the asms. */
7518 ready += n_asms;
7519 n_ready -= n_asms;
98d2b17e 7520 }
30028c85
VM
7521 else if (n_ready > 0)
7522 return 1;
2130b7fb
BS
7523 }
7524
30028c85 7525 if (ia64_final_schedule)
2130b7fb 7526 {
30028c85
VM
7527 int deleted = 0;
7528 int nr_need_stop = 0;
7529
7530 for (insnp = ready; insnp < e_ready; insnp++)
c1bc6ca8 7531 if (safe_group_barrier_needed (*insnp))
30028c85 7532 nr_need_stop++;
9c808aad 7533
30028c85
VM
7534 if (reorder_type == 1 && n_ready == nr_need_stop)
7535 return 0;
7536 if (reorder_type == 0)
7537 return 1;
7538 insnp = e_ready;
7539 /* Move down everything that needs a stop bit, preserving
7540 relative order. */
7541 while (insnp-- > ready + deleted)
7542 while (insnp >= ready + deleted)
7543 {
ce1ce33a 7544 rtx_insn *insn = *insnp;
c1bc6ca8 7545 if (! safe_group_barrier_needed (insn))
30028c85
VM
7546 break;
7547 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7548 *ready = insn;
7549 deleted++;
7550 }
7551 n_ready -= deleted;
7552 ready += deleted;
2130b7fb 7553 }
2130b7fb 7554
388092d5
AB
7555 current_cycle = clock_var;
7556 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7557 {
7558 int moved = 0;
7559
7560 insnp = e_ready;
7561 /* Move down loads/stores, preserving relative order. */
7562 while (insnp-- > ready + moved)
7563 while (insnp >= ready + moved)
7564 {
ce1ce33a 7565 rtx_insn *insn = *insnp;
388092d5
AB
7566 if (! is_load_p (insn))
7567 break;
7568 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7569 *ready = insn;
7570 moved++;
7571 }
7572 n_ready -= moved;
7573 ready += moved;
7574 }
7575
30028c85 7576 return 1;
2130b7fb 7577}
6b6c1201 7578
30028c85
VM
7579/* We are about to being issuing insns for this clock cycle. Override
7580 the default sort algorithm to better slot instructions. */
c65ebc55 7581
30028c85 7582static int
ce1ce33a
DM
7583ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7584 int *pn_ready, int clock_var)
2130b7fb 7585{
30028c85
VM
7586 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7587 pn_ready, clock_var, 0);
2130b7fb
BS
7588}
7589
30028c85
VM
7590/* Like ia64_sched_reorder, but called after issuing each insn.
7591 Override the default sort algorithm to better slot instructions. */
2130b7fb 7592
30028c85 7593static int
9c808aad 7594ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
ce1ce33a 7595 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
9c808aad 7596 int *pn_ready, int clock_var)
30028c85 7597{
30028c85
VM
7598 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7599 clock_var, 1);
2130b7fb
BS
7600}
7601
30028c85
VM
7602/* We are about to issue INSN. Return the number of insns left on the
7603 ready queue that can be issued this cycle. */
2130b7fb 7604
30028c85 7605static int
9c808aad
AJ
7606ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7607 int sched_verbose ATTRIBUTE_UNUSED,
ac44248e 7608 rtx_insn *insn,
9c808aad 7609 int can_issue_more ATTRIBUTE_UNUSED)
2130b7fb 7610{
388092d5 7611 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
048d0d36 7612 /* Modulo scheduling does not extend h_i_d when emitting
388092d5 7613 new instructions. Don't use h_i_d, if we don't have to. */
048d0d36
MK
7614 {
7615 if (DONE_SPEC (insn) & BEGIN_DATA)
7616 pending_data_specs++;
7617 if (CHECK_SPEC (insn) & BEGIN_DATA)
7618 pending_data_specs--;
7619 }
7620
b5b8b0ac
AO
7621 if (DEBUG_INSN_P (insn))
7622 return 1;
7623
30028c85
VM
7624 last_scheduled_insn = insn;
7625 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7626 if (reload_completed)
2130b7fb 7627 {
c1bc6ca8 7628 int needed = group_barrier_needed (insn);
e820471b
NS
7629
7630 gcc_assert (!needed);
b64925dc 7631 if (CALL_P (insn))
30028c85
VM
7632 init_insn_group_barriers ();
7633 stops_p [INSN_UID (insn)] = stop_before_p;
7634 stop_before_p = 0;
388092d5
AB
7635
7636 record_memory_reference (insn);
2130b7fb 7637 }
30028c85
VM
7638 return 1;
7639}
c65ebc55 7640
4960a0cb 7641/* We are choosing insn from the ready queue. Return zero if INSN
30028c85 7642 can be chosen. */
c65ebc55 7643
30028c85 7644static int
ac44248e 7645ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30028c85 7646{
388092d5 7647 gcc_assert (insn && INSN_P (insn));
048d0d36 7648
4960a0cb
MK
7649 /* Size of ALAT is 32. As far as we perform conservative
7650 data speculation, we keep ALAT half-empty. */
31815ed7 7651 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
4960a0cb 7652 return ready_index == 0 ? -1 : 1;
048d0d36 7653
4960a0cb
MK
7654 if (ready_index == 0)
7655 return 0;
7656
7657 if ((!reload_completed
7658 || !safe_group_barrier_needed (insn))
7659 && (!mflag_sched_mem_insns_hard_limit
7660 || !is_load_p (insn)
7661 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7662 return 0;
676cad4d
MK
7663
7664 return 1;
2130b7fb
BS
7665}
7666
30028c85
VM
7667/* The following variable value is pseudo-insn used by the DFA insn
7668 scheduler to change the DFA state when the simulated clock is
7669 increased. */
2130b7fb 7670
dd3d2b35 7671static rtx_insn *dfa_pre_cycle_insn;
2130b7fb 7672
388092d5
AB
7673/* Returns 1 when a meaningful insn was scheduled between the last group
7674 barrier and LAST. */
7675static int
b32d5189 7676scheduled_good_insn (rtx_insn *last)
388092d5
AB
7677{
7678 if (last && recog_memoized (last) >= 0)
7679 return 1;
7680
7681 for ( ;
7682 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7683 && !stops_p[INSN_UID (last)];
7684 last = PREV_INSN (last))
7685 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7686 the ebb we're scheduling. */
7687 if (INSN_P (last) && recog_memoized (last) >= 0)
7688 return 1;
7689
7690 return 0;
7691}
7692
1e5f1716 7693/* We are about to being issuing INSN. Return nonzero if we cannot
30028c85
VM
7694 issue it on given cycle CLOCK and return zero if we should not sort
7695 the ready queue on the next clock start. */
2130b7fb
BS
7696
7697static int
ac44248e 7698ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
9c808aad 7699 int clock, int *sort_p)
2130b7fb 7700{
e820471b 7701 gcc_assert (insn && INSN_P (insn));
b5b8b0ac
AO
7702
7703 if (DEBUG_INSN_P (insn))
7704 return 0;
7705
388092d5
AB
7706 /* When a group barrier is needed for insn, last_scheduled_insn
7707 should be set. */
7708 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7709 || last_scheduled_insn);
7710
7711 if ((reload_completed
7712 && (safe_group_barrier_needed (insn)
7713 || (mflag_sched_stop_bits_after_every_cycle
7714 && last_clock != clock
7715 && last_scheduled_insn
7716 && scheduled_good_insn (last_scheduled_insn))))
30028c85 7717 || (last_scheduled_insn
b64925dc 7718 && (CALL_P (last_scheduled_insn)
7b84aac0 7719 || unknown_for_bundling_p (last_scheduled_insn))))
2130b7fb 7720 {
30028c85 7721 init_insn_group_barriers ();
388092d5 7722
30028c85
VM
7723 if (verbose && dump)
7724 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7725 last_clock == clock ? " + cycle advance" : "");
388092d5 7726
30028c85 7727 stop_before_p = 1;
388092d5
AB
7728 current_cycle = clock;
7729 mem_ops_in_group[current_cycle % 4] = 0;
7730
30028c85 7731 if (last_clock == clock)
2130b7fb 7732 {
30028c85
VM
7733 state_transition (curr_state, dfa_stop_insn);
7734 if (TARGET_EARLY_STOP_BITS)
7735 *sort_p = (last_scheduled_insn == NULL_RTX
b64925dc 7736 || ! CALL_P (last_scheduled_insn));
30028c85
VM
7737 else
7738 *sort_p = 0;
7739 return 1;
7740 }
388092d5
AB
7741
7742 if (last_scheduled_insn)
25069b42 7743 {
7b84aac0 7744 if (unknown_for_bundling_p (last_scheduled_insn))
388092d5
AB
7745 state_reset (curr_state);
7746 else
7747 {
7748 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7749 state_transition (curr_state, dfa_stop_insn);
7750 state_transition (curr_state, dfa_pre_cycle_insn);
7751 state_transition (curr_state, NULL);
7752 }
25069b42 7753 }
30028c85 7754 }
30028c85 7755 return 0;
2130b7fb
BS
7756}
7757
048d0d36
MK
7758/* Implement targetm.sched.h_i_d_extended hook.
7759 Extend internal data structures. */
7760static void
7761ia64_h_i_d_extended (void)
7762{
048d0d36
MK
7763 if (stops_p != NULL)
7764 {
388092d5 7765 int new_clocks_length = get_max_uid () * 3 / 2;
5ead67f6 7766 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
048d0d36
MK
7767 clocks_length = new_clocks_length;
7768 }
7769}
388092d5
AB
7770\f
7771
7772/* This structure describes the data used by the backend to guide scheduling.
7773 When the current scheduling point is switched, this data should be saved
7774 and restored later, if the scheduler returns to this point. */
7775struct _ia64_sched_context
7776{
7777 state_t prev_cycle_state;
b32d5189 7778 rtx_insn *last_scheduled_insn;
388092d5
AB
7779 struct reg_write_state rws_sum[NUM_REGS];
7780 struct reg_write_state rws_insn[NUM_REGS];
7781 int first_instruction;
7782 int pending_data_specs;
7783 int current_cycle;
7784 char mem_ops_in_group[4];
7785};
7786typedef struct _ia64_sched_context *ia64_sched_context_t;
7787
7788/* Allocates a scheduling context. */
7789static void *
7790ia64_alloc_sched_context (void)
7791{
7792 return xmalloc (sizeof (struct _ia64_sched_context));
7793}
7794
7795/* Initializes the _SC context with clean data, if CLEAN_P, and from
7796 the global context otherwise. */
7797static void
7798ia64_init_sched_context (void *_sc, bool clean_p)
7799{
7800 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7801
7802 sc->prev_cycle_state = xmalloc (dfa_state_size);
7803 if (clean_p)
7804 {
7805 state_reset (sc->prev_cycle_state);
b32d5189 7806 sc->last_scheduled_insn = NULL;
388092d5
AB
7807 memset (sc->rws_sum, 0, sizeof (rws_sum));
7808 memset (sc->rws_insn, 0, sizeof (rws_insn));
7809 sc->first_instruction = 1;
7810 sc->pending_data_specs = 0;
7811 sc->current_cycle = 0;
7812 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7813 }
7814 else
7815 {
7816 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7817 sc->last_scheduled_insn = last_scheduled_insn;
7818 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7819 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7820 sc->first_instruction = first_instruction;
7821 sc->pending_data_specs = pending_data_specs;
7822 sc->current_cycle = current_cycle;
7823 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7824 }
7825}
7826
7827/* Sets the global scheduling context to the one pointed to by _SC. */
7828static void
7829ia64_set_sched_context (void *_sc)
7830{
7831 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7832
7833 gcc_assert (sc != NULL);
7834
7835 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7836 last_scheduled_insn = sc->last_scheduled_insn;
7837 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7838 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7839 first_instruction = sc->first_instruction;
7840 pending_data_specs = sc->pending_data_specs;
7841 current_cycle = sc->current_cycle;
7842 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7843}
7844
7845/* Clears the data in the _SC scheduling context. */
7846static void
7847ia64_clear_sched_context (void *_sc)
7848{
7849 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7850
7851 free (sc->prev_cycle_state);
7852 sc->prev_cycle_state = NULL;
7853}
7854
7855/* Frees the _SC scheduling context. */
7856static void
7857ia64_free_sched_context (void *_sc)
7858{
7859 gcc_assert (_sc != NULL);
7860
7861 free (_sc);
7862}
7863
7864typedef rtx (* gen_func_t) (rtx, rtx);
7865
7866/* Return a function that will generate a load of mode MODE_NO
7867 with speculation types TS. */
7868static gen_func_t
7869get_spec_load_gen_function (ds_t ts, int mode_no)
7870{
7871 static gen_func_t gen_ld_[] = {
7872 gen_movbi,
7873 gen_movqi_internal,
7874 gen_movhi_internal,
7875 gen_movsi_internal,
7876 gen_movdi_internal,
7877 gen_movsf_internal,
7878 gen_movdf_internal,
7879 gen_movxf_internal,
7880 gen_movti_internal,
7881 gen_zero_extendqidi2,
7882 gen_zero_extendhidi2,
7883 gen_zero_extendsidi2,
7884 };
7885
7886 static gen_func_t gen_ld_a[] = {
7887 gen_movbi_advanced,
7888 gen_movqi_advanced,
7889 gen_movhi_advanced,
7890 gen_movsi_advanced,
7891 gen_movdi_advanced,
7892 gen_movsf_advanced,
7893 gen_movdf_advanced,
7894 gen_movxf_advanced,
7895 gen_movti_advanced,
7896 gen_zero_extendqidi2_advanced,
7897 gen_zero_extendhidi2_advanced,
7898 gen_zero_extendsidi2_advanced,
7899 };
7900 static gen_func_t gen_ld_s[] = {
7901 gen_movbi_speculative,
7902 gen_movqi_speculative,
7903 gen_movhi_speculative,
7904 gen_movsi_speculative,
7905 gen_movdi_speculative,
7906 gen_movsf_speculative,
7907 gen_movdf_speculative,
7908 gen_movxf_speculative,
7909 gen_movti_speculative,
7910 gen_zero_extendqidi2_speculative,
7911 gen_zero_extendhidi2_speculative,
7912 gen_zero_extendsidi2_speculative,
7913 };
7914 static gen_func_t gen_ld_sa[] = {
7915 gen_movbi_speculative_advanced,
7916 gen_movqi_speculative_advanced,
7917 gen_movhi_speculative_advanced,
7918 gen_movsi_speculative_advanced,
7919 gen_movdi_speculative_advanced,
7920 gen_movsf_speculative_advanced,
7921 gen_movdf_speculative_advanced,
7922 gen_movxf_speculative_advanced,
7923 gen_movti_speculative_advanced,
7924 gen_zero_extendqidi2_speculative_advanced,
7925 gen_zero_extendhidi2_speculative_advanced,
7926 gen_zero_extendsidi2_speculative_advanced,
7927 };
7928 static gen_func_t gen_ld_s_a[] = {
7929 gen_movbi_speculative_a,
7930 gen_movqi_speculative_a,
7931 gen_movhi_speculative_a,
7932 gen_movsi_speculative_a,
7933 gen_movdi_speculative_a,
7934 gen_movsf_speculative_a,
7935 gen_movdf_speculative_a,
7936 gen_movxf_speculative_a,
7937 gen_movti_speculative_a,
7938 gen_zero_extendqidi2_speculative_a,
7939 gen_zero_extendhidi2_speculative_a,
7940 gen_zero_extendsidi2_speculative_a,
7941 };
7942
7943 gen_func_t *gen_ld;
7944
7945 if (ts & BEGIN_DATA)
7946 {
7947 if (ts & BEGIN_CONTROL)
7948 gen_ld = gen_ld_sa;
7949 else
7950 gen_ld = gen_ld_a;
7951 }
7952 else if (ts & BEGIN_CONTROL)
7953 {
7954 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7955 || ia64_needs_block_p (ts))
7956 gen_ld = gen_ld_s;
7957 else
7958 gen_ld = gen_ld_s_a;
7959 }
7960 else if (ts == 0)
7961 gen_ld = gen_ld_;
7962 else
7963 gcc_unreachable ();
7964
7965 return gen_ld[mode_no];
7966}
048d0d36 7967
ef4bddc2 7968/* Constants that help mapping 'machine_mode' to int. */
048d0d36
MK
7969enum SPEC_MODES
7970 {
7971 SPEC_MODE_INVALID = -1,
7972 SPEC_MODE_FIRST = 0,
7973 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7974 SPEC_MODE_FOR_EXTEND_LAST = 3,
7975 SPEC_MODE_LAST = 8
7976 };
7977
388092d5
AB
7978enum
7979 {
7980 /* Offset to reach ZERO_EXTEND patterns. */
7981 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7982 };
7983
048d0d36
MK
7984/* Return index of the MODE. */
7985static int
ef4bddc2 7986ia64_mode_to_int (machine_mode mode)
048d0d36
MK
7987{
7988 switch (mode)
7989 {
4e10a5a7
RS
7990 case E_BImode: return 0; /* SPEC_MODE_FIRST */
7991 case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7992 case E_HImode: return 2;
7993 case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7994 case E_DImode: return 4;
7995 case E_SFmode: return 5;
7996 case E_DFmode: return 6;
7997 case E_XFmode: return 7;
7998 case E_TImode:
048d0d36
MK
7999 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
8000 mentioned in itanium[12].md. Predicate fp_register_operand also
8001 needs to be defined. Bottom line: better disable for now. */
8002 return SPEC_MODE_INVALID;
8003 default: return SPEC_MODE_INVALID;
8004 }
8005}
8006
8007/* Provide information about speculation capabilities. */
8008static void
8009ia64_set_sched_flags (spec_info_t spec_info)
8010{
8011 unsigned int *flags = &(current_sched_info->flags);
8012
8013 if (*flags & SCHED_RGN
388092d5
AB
8014 || *flags & SCHED_EBB
8015 || *flags & SEL_SCHED)
048d0d36
MK
8016 {
8017 int mask = 0;
8018
a57aee2a 8019 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
388092d5 8020 || (mflag_sched_ar_data_spec && reload_completed))
048d0d36
MK
8021 {
8022 mask |= BEGIN_DATA;
388092d5
AB
8023
8024 if (!sel_sched_p ()
8025 && ((mflag_sched_br_in_data_spec && !reload_completed)
8026 || (mflag_sched_ar_in_data_spec && reload_completed)))
048d0d36
MK
8027 mask |= BE_IN_DATA;
8028 }
8029
388092d5
AB
8030 if (mflag_sched_control_spec
8031 && (!sel_sched_p ()
8032 || reload_completed))
048d0d36
MK
8033 {
8034 mask |= BEGIN_CONTROL;
8035
388092d5 8036 if (!sel_sched_p () && mflag_sched_in_control_spec)
048d0d36
MK
8037 mask |= BE_IN_CONTROL;
8038 }
8039
7ab5df48
AB
8040 spec_info->mask = mask;
8041
048d0d36
MK
8042 if (mask)
8043 {
6fb5fa3c
DB
8044 *flags |= USE_DEPS_LIST | DO_SPECULATION;
8045
8046 if (mask & BE_IN_SPEC)
8047 *flags |= NEW_BBS;
048d0d36 8048
048d0d36
MK
8049 spec_info->flags = 0;
8050
16d83dd6
MK
8051 if ((mask & CONTROL_SPEC)
8052 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
8053 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
388092d5
AB
8054
8055 if (sched_verbose >= 1)
8056 spec_info->dump = sched_dump;
048d0d36
MK
8057 else
8058 spec_info->dump = 0;
8059
8060 if (mflag_sched_count_spec_in_critical_path)
8061 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
8062 }
8063 }
cd510f15
AM
8064 else
8065 spec_info->mask = 0;
048d0d36
MK
8066}
8067
388092d5
AB
8068/* If INSN is an appropriate load return its mode.
8069 Return -1 otherwise. */
048d0d36 8070static int
647d790d 8071get_mode_no_for_insn (rtx_insn *insn)
388092d5
AB
8072{
8073 rtx reg, mem, mode_rtx;
8074 int mode_no;
048d0d36 8075 bool extend_p;
048d0d36 8076
388092d5 8077 extract_insn_cached (insn);
048d0d36 8078
388092d5
AB
8079 /* We use WHICH_ALTERNATIVE only after reload. This will
8080 guarantee that reload won't touch a speculative insn. */
f6ec1d11 8081
388092d5 8082 if (recog_data.n_operands != 2)
048d0d36
MK
8083 return -1;
8084
388092d5
AB
8085 reg = recog_data.operand[0];
8086 mem = recog_data.operand[1];
f6ec1d11 8087
388092d5
AB
8088 /* We should use MEM's mode since REG's mode in presence of
8089 ZERO_EXTEND will always be DImode. */
8090 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8091 /* Process non-speculative ld. */
8092 {
8093 if (!reload_completed)
8094 {
8095 /* Do not speculate into regs like ar.lc. */
8096 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8097 return -1;
8098
8099 if (!MEM_P (mem))
8100 return -1;
8101
8102 {
8103 rtx mem_reg = XEXP (mem, 0);
8104
8105 if (!REG_P (mem_reg))
8106 return -1;
8107 }
8108
8109 mode_rtx = mem;
8110 }
8111 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8112 {
8113 gcc_assert (REG_P (reg) && MEM_P (mem));
8114 mode_rtx = mem;
8115 }
8116 else
8117 return -1;
8118 }
8119 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8120 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8121 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8122 /* Process speculative ld or ld.c. */
048d0d36 8123 {
388092d5
AB
8124 gcc_assert (REG_P (reg) && MEM_P (mem));
8125 mode_rtx = mem;
048d0d36
MK
8126 }
8127 else
048d0d36 8128 {
388092d5 8129 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
048d0d36 8130
388092d5
AB
8131 if (attr_class == ITANIUM_CLASS_CHK_A
8132 || attr_class == ITANIUM_CLASS_CHK_S_I
8133 || attr_class == ITANIUM_CLASS_CHK_S_F)
8134 /* Process chk. */
8135 mode_rtx = reg;
8136 else
8137 return -1;
048d0d36 8138 }
f6ec1d11 8139
388092d5 8140 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
f6ec1d11 8141
388092d5 8142 if (mode_no == SPEC_MODE_INVALID)
048d0d36
MK
8143 return -1;
8144
388092d5
AB
8145 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8146
8147 if (extend_p)
8148 {
8149 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8150 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8151 return -1;
f6ec1d11 8152
388092d5
AB
8153 mode_no += SPEC_GEN_EXTEND_OFFSET;
8154 }
048d0d36 8155
388092d5 8156 return mode_no;
048d0d36
MK
8157}
8158
388092d5
AB
8159/* If X is an unspec part of a speculative load, return its code.
8160 Return -1 otherwise. */
8161static int
8162get_spec_unspec_code (const_rtx x)
8163{
8164 if (GET_CODE (x) != UNSPEC)
8165 return -1;
048d0d36 8166
048d0d36 8167 {
388092d5 8168 int code;
048d0d36 8169
388092d5 8170 code = XINT (x, 1);
048d0d36 8171
388092d5
AB
8172 switch (code)
8173 {
8174 case UNSPEC_LDA:
8175 case UNSPEC_LDS:
8176 case UNSPEC_LDS_A:
8177 case UNSPEC_LDSA:
8178 return code;
048d0d36 8179
388092d5
AB
8180 default:
8181 return -1;
8182 }
8183 }
8184}
048d0d36 8185
388092d5
AB
8186/* Implement skip_rtx_p hook. */
8187static bool
8188ia64_skip_rtx_p (const_rtx x)
8189{
8190 return get_spec_unspec_code (x) != -1;
8191}
048d0d36 8192
388092d5
AB
8193/* If INSN is a speculative load, return its UNSPEC code.
8194 Return -1 otherwise. */
8195static int
8196get_insn_spec_code (const_rtx insn)
8197{
8198 rtx pat, reg, mem;
048d0d36 8199
388092d5 8200 pat = PATTERN (insn);
048d0d36 8201
388092d5
AB
8202 if (GET_CODE (pat) == COND_EXEC)
8203 pat = COND_EXEC_CODE (pat);
048d0d36 8204
388092d5
AB
8205 if (GET_CODE (pat) != SET)
8206 return -1;
8207
8208 reg = SET_DEST (pat);
8209 if (!REG_P (reg))
8210 return -1;
8211
8212 mem = SET_SRC (pat);
8213 if (GET_CODE (mem) == ZERO_EXTEND)
8214 mem = XEXP (mem, 0);
8215
8216 return get_spec_unspec_code (mem);
8217}
8218
8219/* If INSN is a speculative load, return a ds with the speculation types.
8220 Otherwise [if INSN is a normal instruction] return 0. */
8221static ds_t
ac44248e 8222ia64_get_insn_spec_ds (rtx_insn *insn)
388092d5
AB
8223{
8224 int code = get_insn_spec_code (insn);
8225
8226 switch (code)
048d0d36 8227 {
388092d5
AB
8228 case UNSPEC_LDA:
8229 return BEGIN_DATA;
048d0d36 8230
388092d5
AB
8231 case UNSPEC_LDS:
8232 case UNSPEC_LDS_A:
8233 return BEGIN_CONTROL;
048d0d36 8234
388092d5
AB
8235 case UNSPEC_LDSA:
8236 return BEGIN_DATA | BEGIN_CONTROL;
048d0d36 8237
388092d5
AB
8238 default:
8239 return 0;
048d0d36 8240 }
388092d5
AB
8241}
8242
8243/* If INSN is a speculative load return a ds with the speculation types that
8244 will be checked.
8245 Otherwise [if INSN is a normal instruction] return 0. */
8246static ds_t
ac44248e 8247ia64_get_insn_checked_ds (rtx_insn *insn)
388092d5
AB
8248{
8249 int code = get_insn_spec_code (insn);
8250
8251 switch (code)
048d0d36 8252 {
388092d5
AB
8253 case UNSPEC_LDA:
8254 return BEGIN_DATA | BEGIN_CONTROL;
8255
8256 case UNSPEC_LDS:
8257 return BEGIN_CONTROL;
8258
8259 case UNSPEC_LDS_A:
8260 case UNSPEC_LDSA:
8261 return BEGIN_DATA | BEGIN_CONTROL;
8262
8263 default:
8264 return 0;
048d0d36 8265 }
388092d5 8266}
048d0d36 8267
388092d5
AB
8268/* If GEN_P is true, calculate the index of needed speculation check and return
8269 speculative pattern for INSN with speculative mode TS, machine mode
8270 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8271 If GEN_P is false, just calculate the index of needed speculation check. */
8272static rtx
8273ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8274{
8275 rtx pat, new_pat;
8276 gen_func_t gen_load;
048d0d36 8277
388092d5 8278 gen_load = get_spec_load_gen_function (ts, mode_no);
048d0d36 8279
388092d5
AB
8280 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8281 copy_rtx (recog_data.operand[1]));
048d0d36
MK
8282
8283 pat = PATTERN (insn);
8284 if (GET_CODE (pat) == COND_EXEC)
388092d5
AB
8285 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8286 new_pat);
048d0d36
MK
8287
8288 return new_pat;
8289}
8290
048d0d36 8291static bool
388092d5
AB
8292insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8293 ds_t ds ATTRIBUTE_UNUSED)
048d0d36 8294{
388092d5
AB
8295 return false;
8296}
048d0d36 8297
388092d5
AB
8298/* Implement targetm.sched.speculate_insn hook.
8299 Check if the INSN can be TS speculative.
8300 If 'no' - return -1.
8301 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8302 If current pattern of the INSN already provides TS speculation,
8303 return 0. */
8304static int
ac44248e 8305ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
388092d5
AB
8306{
8307 int mode_no;
8308 int res;
8309
8310 gcc_assert (!(ts & ~SPECULATIVE));
048d0d36 8311
388092d5
AB
8312 if (ia64_spec_check_p (insn))
8313 return -1;
048d0d36 8314
388092d5
AB
8315 if ((ts & BE_IN_SPEC)
8316 && !insn_can_be_in_speculative_p (insn, ts))
8317 return -1;
048d0d36 8318
388092d5 8319 mode_no = get_mode_no_for_insn (insn);
048d0d36 8320
388092d5
AB
8321 if (mode_no != SPEC_MODE_INVALID)
8322 {
8323 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8324 res = 0;
8325 else
8326 {
8327 res = 1;
8328 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8329 }
8330 }
8331 else
8332 res = -1;
048d0d36 8333
388092d5
AB
8334 return res;
8335}
048d0d36 8336
388092d5
AB
8337/* Return a function that will generate a check for speculation TS with mode
8338 MODE_NO.
8339 If simple check is needed, pass true for SIMPLE_CHECK_P.
8340 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8341static gen_func_t
8342get_spec_check_gen_function (ds_t ts, int mode_no,
8343 bool simple_check_p, bool clearing_check_p)
8344{
8345 static gen_func_t gen_ld_c_clr[] = {
048d0d36
MK
8346 gen_movbi_clr,
8347 gen_movqi_clr,
8348 gen_movhi_clr,
8349 gen_movsi_clr,
8350 gen_movdi_clr,
8351 gen_movsf_clr,
8352 gen_movdf_clr,
8353 gen_movxf_clr,
8354 gen_movti_clr,
8355 gen_zero_extendqidi2_clr,
8356 gen_zero_extendhidi2_clr,
8357 gen_zero_extendsidi2_clr,
388092d5
AB
8358 };
8359 static gen_func_t gen_ld_c_nc[] = {
8360 gen_movbi_nc,
8361 gen_movqi_nc,
8362 gen_movhi_nc,
8363 gen_movsi_nc,
8364 gen_movdi_nc,
8365 gen_movsf_nc,
8366 gen_movdf_nc,
8367 gen_movxf_nc,
8368 gen_movti_nc,
8369 gen_zero_extendqidi2_nc,
8370 gen_zero_extendhidi2_nc,
8371 gen_zero_extendsidi2_nc,
8372 };
8373 static gen_func_t gen_chk_a_clr[] = {
048d0d36
MK
8374 gen_advanced_load_check_clr_bi,
8375 gen_advanced_load_check_clr_qi,
8376 gen_advanced_load_check_clr_hi,
8377 gen_advanced_load_check_clr_si,
8378 gen_advanced_load_check_clr_di,
8379 gen_advanced_load_check_clr_sf,
8380 gen_advanced_load_check_clr_df,
8381 gen_advanced_load_check_clr_xf,
8382 gen_advanced_load_check_clr_ti,
8383 gen_advanced_load_check_clr_di,
8384 gen_advanced_load_check_clr_di,
8385 gen_advanced_load_check_clr_di,
388092d5
AB
8386 };
8387 static gen_func_t gen_chk_a_nc[] = {
8388 gen_advanced_load_check_nc_bi,
8389 gen_advanced_load_check_nc_qi,
8390 gen_advanced_load_check_nc_hi,
8391 gen_advanced_load_check_nc_si,
8392 gen_advanced_load_check_nc_di,
8393 gen_advanced_load_check_nc_sf,
8394 gen_advanced_load_check_nc_df,
8395 gen_advanced_load_check_nc_xf,
8396 gen_advanced_load_check_nc_ti,
8397 gen_advanced_load_check_nc_di,
8398 gen_advanced_load_check_nc_di,
8399 gen_advanced_load_check_nc_di,
8400 };
8401 static gen_func_t gen_chk_s[] = {
048d0d36
MK
8402 gen_speculation_check_bi,
8403 gen_speculation_check_qi,
8404 gen_speculation_check_hi,
8405 gen_speculation_check_si,
8406 gen_speculation_check_di,
8407 gen_speculation_check_sf,
8408 gen_speculation_check_df,
8409 gen_speculation_check_xf,
8410 gen_speculation_check_ti,
8411 gen_speculation_check_di,
8412 gen_speculation_check_di,
388092d5 8413 gen_speculation_check_di,
048d0d36
MK
8414 };
8415
388092d5 8416 gen_func_t *gen_check;
048d0d36 8417
388092d5 8418 if (ts & BEGIN_DATA)
048d0d36 8419 {
388092d5
AB
8420 /* We don't need recovery because even if this is ld.sa
8421 ALAT entry will be allocated only if NAT bit is set to zero.
8422 So it is enough to use ld.c here. */
8423
8424 if (simple_check_p)
8425 {
8426 gcc_assert (mflag_sched_spec_ldc);
8427
8428 if (clearing_check_p)
8429 gen_check = gen_ld_c_clr;
8430 else
8431 gen_check = gen_ld_c_nc;
8432 }
8433 else
8434 {
8435 if (clearing_check_p)
8436 gen_check = gen_chk_a_clr;
8437 else
8438 gen_check = gen_chk_a_nc;
8439 }
048d0d36 8440 }
388092d5 8441 else if (ts & BEGIN_CONTROL)
048d0d36 8442 {
388092d5
AB
8443 if (simple_check_p)
8444 /* We might want to use ld.sa -> ld.c instead of
8445 ld.s -> chk.s. */
048d0d36 8446 {
388092d5 8447 gcc_assert (!ia64_needs_block_p (ts));
048d0d36 8448
388092d5
AB
8449 if (clearing_check_p)
8450 gen_check = gen_ld_c_clr;
8451 else
8452 gen_check = gen_ld_c_nc;
8453 }
8454 else
8455 {
8456 gen_check = gen_chk_s;
048d0d36 8457 }
388092d5
AB
8458 }
8459 else
8460 gcc_unreachable ();
8461
8462 gcc_assert (mode_no >= 0);
8463 return gen_check[mode_no];
8464}
8465
8466/* Return nonzero, if INSN needs branchy recovery check. */
8467static bool
8468ia64_needs_block_p (ds_t ts)
8469{
8470 if (ts & BEGIN_DATA)
8471 return !mflag_sched_spec_ldc;
8472
8473 gcc_assert ((ts & BEGIN_CONTROL) != 0);
048d0d36 8474
388092d5
AB
8475 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8476}
8477
8e90de43 8478/* Generate (or regenerate) a recovery check for INSN. */
388092d5 8479static rtx
ac44248e 8480ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
388092d5
AB
8481{
8482 rtx op1, pat, check_pat;
8483 gen_func_t gen_check;
8484 int mode_no;
8485
8486 mode_no = get_mode_no_for_insn (insn);
8487 gcc_assert (mode_no >= 0);
8488
8489 if (label)
8490 op1 = label;
8491 else
8492 {
8493 gcc_assert (!ia64_needs_block_p (ds));
8494 op1 = copy_rtx (recog_data.operand[1]);
048d0d36 8495 }
388092d5
AB
8496
8497 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8498 true);
048d0d36 8499
388092d5 8500 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
048d0d36
MK
8501
8502 pat = PATTERN (insn);
8503 if (GET_CODE (pat) == COND_EXEC)
8504 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8505 check_pat);
8506
8507 return check_pat;
8508}
8509
8510/* Return nonzero, if X is branchy recovery check. */
8511static int
8512ia64_spec_check_p (rtx x)
8513{
8514 x = PATTERN (x);
8515 if (GET_CODE (x) == COND_EXEC)
8516 x = COND_EXEC_CODE (x);
8517 if (GET_CODE (x) == SET)
8518 return ia64_spec_check_src_p (SET_SRC (x));
8519 return 0;
8520}
8521
8522/* Return nonzero, if SRC belongs to recovery check. */
8523static int
8524ia64_spec_check_src_p (rtx src)
8525{
8526 if (GET_CODE (src) == IF_THEN_ELSE)
8527 {
8528 rtx t;
8529
8530 t = XEXP (src, 0);
8531 if (GET_CODE (t) == NE)
8532 {
8533 t = XEXP (t, 0);
8534
8535 if (GET_CODE (t) == UNSPEC)
8536 {
8537 int code;
8538
8539 code = XINT (t, 1);
8540
388092d5
AB
8541 if (code == UNSPEC_LDCCLR
8542 || code == UNSPEC_LDCNC
8543 || code == UNSPEC_CHKACLR
8544 || code == UNSPEC_CHKANC
8545 || code == UNSPEC_CHKS)
048d0d36
MK
8546 {
8547 gcc_assert (code != 0);
8548 return code;
8549 }
8550 }
8551 }
8552 }
8553 return 0;
8554}
30028c85 8555\f
2130b7fb 8556
30028c85
VM
8557/* The following page contains abstract data `bundle states' which are
8558 used for bundling insns (inserting nops and template generation). */
8559
8560/* The following describes state of insn bundling. */
8561
8562struct bundle_state
8563{
8564 /* Unique bundle state number to identify them in the debugging
8565 output */
8566 int unique_num;
b32d5189 8567 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
30028c85
VM
8568 /* number nops before and after the insn */
8569 short before_nops_num, after_nops_num;
8570 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8571 insn */
8572 int cost; /* cost of the state in cycles */
8573 int accumulated_insns_num; /* number of all previous insns including
8574 nops. L is considered as 2 insns */
8575 int branch_deviation; /* deviation of previous branches from 3rd slots */
388092d5 8576 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
30028c85
VM
8577 struct bundle_state *next; /* next state with the same insn_num */
8578 struct bundle_state *originator; /* originator (previous insn state) */
8579 /* All bundle states are in the following chain. */
8580 struct bundle_state *allocated_states_chain;
8581 /* The DFA State after issuing the insn and the nops. */
8582 state_t dfa_state;
8583};
2130b7fb 8584
30028c85 8585/* The following is map insn number to the corresponding bundle state. */
2130b7fb 8586
30028c85 8587static struct bundle_state **index_to_bundle_states;
2130b7fb 8588
30028c85 8589/* The unique number of next bundle state. */
2130b7fb 8590
30028c85 8591static int bundle_states_num;
2130b7fb 8592
30028c85 8593/* All allocated bundle states are in the following chain. */
2130b7fb 8594
30028c85 8595static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 8596
30028c85
VM
8597/* All allocated but not used bundle states are in the following
8598 chain. */
870f9ec0 8599
30028c85 8600static struct bundle_state *free_bundle_state_chain;
2130b7fb 8601
2130b7fb 8602
30028c85 8603/* The following function returns a free bundle state. */
2130b7fb 8604
30028c85 8605static struct bundle_state *
9c808aad 8606get_free_bundle_state (void)
30028c85
VM
8607{
8608 struct bundle_state *result;
2130b7fb 8609
30028c85 8610 if (free_bundle_state_chain != NULL)
2130b7fb 8611 {
30028c85
VM
8612 result = free_bundle_state_chain;
8613 free_bundle_state_chain = result->next;
2130b7fb 8614 }
30028c85 8615 else
2130b7fb 8616 {
5ead67f6 8617 result = XNEW (struct bundle_state);
30028c85
VM
8618 result->dfa_state = xmalloc (dfa_state_size);
8619 result->allocated_states_chain = allocated_bundle_states_chain;
8620 allocated_bundle_states_chain = result;
2130b7fb 8621 }
30028c85
VM
8622 result->unique_num = bundle_states_num++;
8623 return result;
9c808aad 8624
30028c85 8625}
2130b7fb 8626
30028c85 8627/* The following function frees given bundle state. */
2130b7fb 8628
30028c85 8629static void
9c808aad 8630free_bundle_state (struct bundle_state *state)
30028c85
VM
8631{
8632 state->next = free_bundle_state_chain;
8633 free_bundle_state_chain = state;
8634}
2130b7fb 8635
30028c85 8636/* Start work with abstract data `bundle states'. */
2130b7fb 8637
30028c85 8638static void
9c808aad 8639initiate_bundle_states (void)
30028c85
VM
8640{
8641 bundle_states_num = 0;
8642 free_bundle_state_chain = NULL;
8643 allocated_bundle_states_chain = NULL;
2130b7fb
BS
8644}
8645
30028c85 8646/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
8647
8648static void
9c808aad 8649finish_bundle_states (void)
2130b7fb 8650{
30028c85
VM
8651 struct bundle_state *curr_state, *next_state;
8652
8653 for (curr_state = allocated_bundle_states_chain;
8654 curr_state != NULL;
8655 curr_state = next_state)
2130b7fb 8656 {
30028c85
VM
8657 next_state = curr_state->allocated_states_chain;
8658 free (curr_state->dfa_state);
8659 free (curr_state);
2130b7fb 8660 }
2130b7fb
BS
8661}
8662
3a4f280b 8663/* Hashtable helpers. */
2130b7fb 8664
8d67ee55 8665struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
3a4f280b 8666{
67f58944
TS
8667 static inline hashval_t hash (const bundle_state *);
8668 static inline bool equal (const bundle_state *, const bundle_state *);
3a4f280b 8669};
2130b7fb 8670
30028c85 8671/* The function returns hash of BUNDLE_STATE. */
2130b7fb 8672
3a4f280b 8673inline hashval_t
67f58944 8674bundle_state_hasher::hash (const bundle_state *state)
30028c85 8675{
30028c85 8676 unsigned result, i;
2130b7fb 8677
30028c85
VM
8678 for (result = i = 0; i < dfa_state_size; i++)
8679 result += (((unsigned char *) state->dfa_state) [i]
8680 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8681 return result + state->insn_num;
8682}
2130b7fb 8683
30028c85 8684/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 8685
3a4f280b 8686inline bool
67f58944
TS
8687bundle_state_hasher::equal (const bundle_state *state1,
8688 const bundle_state *state2)
30028c85 8689{
30028c85
VM
8690 return (state1->insn_num == state2->insn_num
8691 && memcmp (state1->dfa_state, state2->dfa_state,
8692 dfa_state_size) == 0);
8693}
2130b7fb 8694
3a4f280b
LC
8695/* Hash table of the bundle states. The key is dfa_state and insn_num
8696 of the bundle states. */
8697
c203e8a7 8698static hash_table<bundle_state_hasher> *bundle_state_table;
3a4f280b 8699
30028c85
VM
8700/* The function inserts the BUNDLE_STATE into the hash table. The
8701 function returns nonzero if the bundle has been inserted into the
8702 table. The table contains the best bundle state with given key. */
2130b7fb 8703
30028c85 8704static int
9c808aad 8705insert_bundle_state (struct bundle_state *bundle_state)
30028c85 8706{
3a4f280b 8707 struct bundle_state **entry_ptr;
2130b7fb 8708
c203e8a7 8709 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
30028c85
VM
8710 if (*entry_ptr == NULL)
8711 {
8712 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8713 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
3a4f280b 8714 *entry_ptr = bundle_state;
30028c85 8715 return TRUE;
2130b7fb 8716 }
3a4f280b
LC
8717 else if (bundle_state->cost < (*entry_ptr)->cost
8718 || (bundle_state->cost == (*entry_ptr)->cost
8719 && ((*entry_ptr)->accumulated_insns_num
30028c85 8720 > bundle_state->accumulated_insns_num
3a4f280b 8721 || ((*entry_ptr)->accumulated_insns_num
30028c85 8722 == bundle_state->accumulated_insns_num
3a4f280b 8723 && ((*entry_ptr)->branch_deviation
388092d5 8724 > bundle_state->branch_deviation
3a4f280b 8725 || ((*entry_ptr)->branch_deviation
388092d5 8726 == bundle_state->branch_deviation
3a4f280b 8727 && (*entry_ptr)->middle_bundle_stops
388092d5 8728 > bundle_state->middle_bundle_stops))))))
9c808aad 8729
2130b7fb 8730 {
30028c85
VM
8731 struct bundle_state temp;
8732
3a4f280b
LC
8733 temp = **entry_ptr;
8734 **entry_ptr = *bundle_state;
8735 (*entry_ptr)->next = temp.next;
30028c85 8736 *bundle_state = temp;
2130b7fb 8737 }
30028c85
VM
8738 return FALSE;
8739}
2130b7fb 8740
30028c85
VM
8741/* Start work with the hash table. */
8742
8743static void
9c808aad 8744initiate_bundle_state_table (void)
30028c85 8745{
c203e8a7 8746 bundle_state_table = new hash_table<bundle_state_hasher> (50);
2130b7fb
BS
8747}
8748
30028c85 8749/* Finish work with the hash table. */
e4027dab
BS
8750
8751static void
9c808aad 8752finish_bundle_state_table (void)
e4027dab 8753{
c203e8a7
TS
8754 delete bundle_state_table;
8755 bundle_state_table = NULL;
e4027dab
BS
8756}
8757
30028c85 8758\f
a0a7b566 8759
30028c85
VM
8760/* The following variable is a insn `nop' used to check bundle states
8761 with different number of inserted nops. */
a0a7b566 8762
dd3d2b35 8763static rtx_insn *ia64_nop;
a0a7b566 8764
30028c85
VM
8765/* The following function tries to issue NOPS_NUM nops for the current
8766 state without advancing processor cycle. If it failed, the
8767 function returns FALSE and frees the current state. */
8768
8769static int
9c808aad 8770try_issue_nops (struct bundle_state *curr_state, int nops_num)
a0a7b566 8771{
30028c85 8772 int i;
a0a7b566 8773
30028c85
VM
8774 for (i = 0; i < nops_num; i++)
8775 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8776 {
8777 free_bundle_state (curr_state);
8778 return FALSE;
8779 }
8780 return TRUE;
8781}
a0a7b566 8782
30028c85
VM
8783/* The following function tries to issue INSN for the current
8784 state without advancing processor cycle. If it failed, the
8785 function returns FALSE and frees the current state. */
a0a7b566 8786
30028c85 8787static int
9c808aad 8788try_issue_insn (struct bundle_state *curr_state, rtx insn)
30028c85
VM
8789{
8790 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8791 {
8792 free_bundle_state (curr_state);
8793 return FALSE;
8794 }
8795 return TRUE;
8796}
a0a7b566 8797
30028c85
VM
8798/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8799 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
8800 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8801 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8802 If it was successful, the function creates new bundle state and
8803 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 8804
30028c85 8805static void
9c808aad 8806issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
b32d5189
DM
8807 rtx_insn *insn, int try_bundle_end_p,
8808 int only_bundle_end_p)
30028c85
VM
8809{
8810 struct bundle_state *curr_state;
8811
8812 curr_state = get_free_bundle_state ();
8813 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8814 curr_state->insn = insn;
8815 curr_state->insn_num = originator->insn_num + 1;
8816 curr_state->cost = originator->cost;
8817 curr_state->originator = originator;
8818 curr_state->before_nops_num = before_nops_num;
8819 curr_state->after_nops_num = 0;
8820 curr_state->accumulated_insns_num
8821 = originator->accumulated_insns_num + before_nops_num;
8822 curr_state->branch_deviation = originator->branch_deviation;
388092d5 8823 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
e820471b
NS
8824 gcc_assert (insn);
8825 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
30028c85 8826 {
e820471b 8827 gcc_assert (GET_MODE (insn) != TImode);
30028c85
VM
8828 if (!try_issue_nops (curr_state, before_nops_num))
8829 return;
8830 if (!try_issue_insn (curr_state, insn))
8831 return;
8832 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
388092d5
AB
8833 if (curr_state->accumulated_insns_num % 3 != 0)
8834 curr_state->middle_bundle_stops++;
30028c85
VM
8835 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8836 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 8837 {
30028c85
VM
8838 free_bundle_state (curr_state);
8839 return;
a0a7b566 8840 }
a0a7b566 8841 }
30028c85 8842 else if (GET_MODE (insn) != TImode)
a0a7b566 8843 {
30028c85
VM
8844 if (!try_issue_nops (curr_state, before_nops_num))
8845 return;
8846 if (!try_issue_insn (curr_state, insn))
8847 return;
f32360c7 8848 curr_state->accumulated_insns_num++;
7b84aac0 8849 gcc_assert (!unknown_for_bundling_p (insn));
e820471b 8850
30028c85
VM
8851 if (ia64_safe_type (insn) == TYPE_L)
8852 curr_state->accumulated_insns_num++;
8853 }
8854 else
8855 {
68e11b42
JW
8856 /* If this is an insn that must be first in a group, then don't allow
8857 nops to be emitted before it. Currently, alloc is the only such
8858 supported instruction. */
8859 /* ??? The bundling automatons should handle this for us, but they do
8860 not yet have support for the first_insn attribute. */
8861 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8862 {
8863 free_bundle_state (curr_state);
8864 return;
8865 }
8866
30028c85
VM
8867 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8868 state_transition (curr_state->dfa_state, NULL);
8869 curr_state->cost++;
8870 if (!try_issue_nops (curr_state, before_nops_num))
8871 return;
8872 if (!try_issue_insn (curr_state, insn))
8873 return;
f32360c7 8874 curr_state->accumulated_insns_num++;
7b84aac0 8875 if (unknown_for_bundling_p (insn))
f32360c7
VM
8876 {
8877 /* Finish bundle containing asm insn. */
8878 curr_state->after_nops_num
8879 = 3 - curr_state->accumulated_insns_num % 3;
8880 curr_state->accumulated_insns_num
8881 += 3 - curr_state->accumulated_insns_num % 3;
8882 }
8883 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
8884 curr_state->accumulated_insns_num++;
8885 }
8886 if (ia64_safe_type (insn) == TYPE_B)
8887 curr_state->branch_deviation
8888 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8889 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8890 {
f32360c7 8891 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 8892 {
30028c85
VM
8893 state_t dfa_state;
8894 struct bundle_state *curr_state1;
8895 struct bundle_state *allocated_states_chain;
8896
8897 curr_state1 = get_free_bundle_state ();
8898 dfa_state = curr_state1->dfa_state;
8899 allocated_states_chain = curr_state1->allocated_states_chain;
8900 *curr_state1 = *curr_state;
8901 curr_state1->dfa_state = dfa_state;
8902 curr_state1->allocated_states_chain = allocated_states_chain;
8903 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8904 dfa_state_size);
8905 curr_state = curr_state1;
a0a7b566 8906 }
30028c85
VM
8907 if (!try_issue_nops (curr_state,
8908 3 - curr_state->accumulated_insns_num % 3))
8909 return;
8910 curr_state->after_nops_num
8911 = 3 - curr_state->accumulated_insns_num % 3;
8912 curr_state->accumulated_insns_num
8913 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 8914 }
30028c85
VM
8915 if (!insert_bundle_state (curr_state))
8916 free_bundle_state (curr_state);
8917 return;
8918}
e013f3c7 8919
30028c85
VM
8920/* The following function returns position in the two window bundle
8921 for given STATE. */
8922
8923static int
9c808aad 8924get_max_pos (state_t state)
30028c85
VM
8925{
8926 if (cpu_unit_reservation_p (state, pos_6))
8927 return 6;
8928 else if (cpu_unit_reservation_p (state, pos_5))
8929 return 5;
8930 else if (cpu_unit_reservation_p (state, pos_4))
8931 return 4;
8932 else if (cpu_unit_reservation_p (state, pos_3))
8933 return 3;
8934 else if (cpu_unit_reservation_p (state, pos_2))
8935 return 2;
8936 else if (cpu_unit_reservation_p (state, pos_1))
8937 return 1;
8938 else
8939 return 0;
a0a7b566
BS
8940}
8941
30028c85
VM
8942/* The function returns code of a possible template for given position
8943 and state. The function should be called only with 2 values of
96ddf8ef
VM
8944 position equal to 3 or 6. We avoid generating F NOPs by putting
8945 templates containing F insns at the end of the template search
8946 because undocumented anomaly in McKinley derived cores which can
8947 cause stalls if an F-unit insn (including a NOP) is issued within a
8948 six-cycle window after reading certain application registers (such
8949 as ar.bsp). Furthermore, power-considerations also argue against
8950 the use of F-unit instructions unless they're really needed. */
2130b7fb 8951
c237e94a 8952static int
9c808aad 8953get_template (state_t state, int pos)
2130b7fb 8954{
30028c85 8955 switch (pos)
2130b7fb 8956 {
30028c85 8957 case 3:
96ddf8ef 8958 if (cpu_unit_reservation_p (state, _0mmi_))
30028c85 8959 return 1;
96ddf8ef
VM
8960 else if (cpu_unit_reservation_p (state, _0mii_))
8961 return 0;
30028c85
VM
8962 else if (cpu_unit_reservation_p (state, _0mmb_))
8963 return 7;
96ddf8ef
VM
8964 else if (cpu_unit_reservation_p (state, _0mib_))
8965 return 6;
8966 else if (cpu_unit_reservation_p (state, _0mbb_))
8967 return 5;
8968 else if (cpu_unit_reservation_p (state, _0bbb_))
8969 return 4;
8970 else if (cpu_unit_reservation_p (state, _0mmf_))
8971 return 3;
8972 else if (cpu_unit_reservation_p (state, _0mfi_))
8973 return 2;
30028c85
VM
8974 else if (cpu_unit_reservation_p (state, _0mfb_))
8975 return 8;
8976 else if (cpu_unit_reservation_p (state, _0mlx_))
8977 return 9;
8978 else
e820471b 8979 gcc_unreachable ();
30028c85 8980 case 6:
96ddf8ef 8981 if (cpu_unit_reservation_p (state, _1mmi_))
30028c85 8982 return 1;
96ddf8ef
VM
8983 else if (cpu_unit_reservation_p (state, _1mii_))
8984 return 0;
30028c85
VM
8985 else if (cpu_unit_reservation_p (state, _1mmb_))
8986 return 7;
96ddf8ef
VM
8987 else if (cpu_unit_reservation_p (state, _1mib_))
8988 return 6;
8989 else if (cpu_unit_reservation_p (state, _1mbb_))
8990 return 5;
8991 else if (cpu_unit_reservation_p (state, _1bbb_))
8992 return 4;
8993 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8994 return 3;
8995 else if (cpu_unit_reservation_p (state, _1mfi_))
8996 return 2;
30028c85
VM
8997 else if (cpu_unit_reservation_p (state, _1mfb_))
8998 return 8;
8999 else if (cpu_unit_reservation_p (state, _1mlx_))
9000 return 9;
9001 else
e820471b 9002 gcc_unreachable ();
30028c85 9003 default:
e820471b 9004 gcc_unreachable ();
2130b7fb 9005 }
30028c85 9006}
2130b7fb 9007
388092d5 9008/* True when INSN is important for bundling. */
7b84aac0 9009
388092d5 9010static bool
647d790d 9011important_for_bundling_p (rtx_insn *insn)
388092d5
AB
9012{
9013 return (INSN_P (insn)
9014 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
9015 && GET_CODE (PATTERN (insn)) != USE
9016 && GET_CODE (PATTERN (insn)) != CLOBBER);
9017}
9018
30028c85
VM
9019/* The following function returns an insn important for insn bundling
9020 followed by INSN and before TAIL. */
a0a7b566 9021
b32d5189
DM
9022static rtx_insn *
9023get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
30028c85
VM
9024{
9025 for (; insn && insn != tail; insn = NEXT_INSN (insn))
388092d5 9026 if (important_for_bundling_p (insn))
30028c85 9027 return insn;
b32d5189 9028 return NULL;
30028c85
VM
9029}
9030
7b84aac0
EB
9031/* True when INSN is unknown, but important, for bundling. */
9032
9033static bool
647d790d 9034unknown_for_bundling_p (rtx_insn *insn)
7b84aac0
EB
9035{
9036 return (INSN_P (insn)
9037 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
9038 && GET_CODE (PATTERN (insn)) != USE
9039 && GET_CODE (PATTERN (insn)) != CLOBBER);
9040}
9041
4a4cd49c
JJ
9042/* Add a bundle selector TEMPLATE0 before INSN. */
9043
9044static void
b32d5189 9045ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
4a4cd49c
JJ
9046{
9047 rtx b = gen_bundle_selector (GEN_INT (template0));
9048
9049 ia64_emit_insn_before (b, insn);
9050#if NR_BUNDLES == 10
9051 if ((template0 == 4 || template0 == 5)
d5fabb58 9052 && ia64_except_unwind_info (&global_options) == UI_TARGET)
4a4cd49c
JJ
9053 {
9054 int i;
9055 rtx note = NULL_RTX;
9056
9057 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9058 first or second slot. If it is and has REG_EH_NOTE set, copy it
9059 to following nops, as br.call sets rp to the address of following
9060 bundle and therefore an EH region end must be on a bundle
9061 boundary. */
9062 insn = PREV_INSN (insn);
9063 for (i = 0; i < 3; i++)
9064 {
9065 do
9066 insn = next_active_insn (insn);
b64925dc 9067 while (NONJUMP_INSN_P (insn)
4a4cd49c 9068 && get_attr_empty (insn) == EMPTY_YES);
b64925dc 9069 if (CALL_P (insn))
4a4cd49c
JJ
9070 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9071 else if (note)
9072 {
9073 int code;
9074
9075 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9076 || code == CODE_FOR_nop_b);
9077 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9078 note = NULL_RTX;
9079 else
bbbbb16a 9080 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
4a4cd49c
JJ
9081 }
9082 }
9083 }
9084#endif
9085}
9086
c856f536
VM
9087/* The following function does insn bundling. Bundling means
9088 inserting templates and nop insns to fit insn groups into permitted
9089 templates. Instruction scheduling uses NDFA (non-deterministic
9090 finite automata) encoding informations about the templates and the
9091 inserted nops. Nondeterminism of the automata permits follows
9092 all possible insn sequences very fast.
9093
9094 Unfortunately it is not possible to get information about inserting
9095 nop insns and used templates from the automata states. The
9096 automata only says that we can issue an insn possibly inserting
9097 some nops before it and using some template. Therefore insn
9098 bundling in this function is implemented by using DFA
048d0d36 9099 (deterministic finite automata). We follow all possible insn
c856f536
VM
9100 sequences by inserting 0-2 nops (that is what the NDFA describe for
9101 insn scheduling) before/after each insn being bundled. We know the
9102 start of simulated processor cycle from insn scheduling (insn
9103 starting a new cycle has TImode).
9104
9105 Simple implementation of insn bundling would create enormous
9106 number of possible insn sequences satisfying information about new
9107 cycle ticks taken from the insn scheduling. To make the algorithm
9108 practical we use dynamic programming. Each decision (about
9109 inserting nops and implicitly about previous decisions) is described
9110 by structure bundle_state (see above). If we generate the same
9111 bundle state (key is automaton state after issuing the insns and
9112 nops for it), we reuse already generated one. As consequence we
1e5f1716 9113 reject some decisions which cannot improve the solution and
c856f536
VM
9114 reduce memory for the algorithm.
9115
9116 When we reach the end of EBB (extended basic block), we choose the
9117 best sequence and then, moving back in EBB, insert templates for
9118 the best alternative. The templates are taken from querying
9119 automaton state for each insn in chosen bundle states.
9120
9121 So the algorithm makes two (forward and backward) passes through
7400e46b 9122 EBB. */
a0a7b566 9123
30028c85 9124static void
b32d5189 9125bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
30028c85
VM
9126{
9127 struct bundle_state *curr_state, *next_state, *best_state;
b32d5189 9128 rtx_insn *insn, *next_insn;
30028c85 9129 int insn_num;
f32360c7 9130 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 9131 int pos = 0, max_pos, template0, template1;
b32d5189 9132 rtx_insn *b;
30028c85 9133 enum attr_type type;
2d1b811d 9134
30028c85 9135 insn_num = 0;
c856f536 9136 /* Count insns in the EBB. */
30028c85
VM
9137 for (insn = NEXT_INSN (prev_head_insn);
9138 insn && insn != tail;
9139 insn = NEXT_INSN (insn))
9140 if (INSN_P (insn))
9141 insn_num++;
9142 if (insn_num == 0)
9143 return;
9144 bundling_p = 1;
9145 dfa_clean_insn_cache ();
9146 initiate_bundle_state_table ();
5ead67f6 9147 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
ff482c8d 9148 /* First (forward) pass -- generation of bundle states. */
30028c85
VM
9149 curr_state = get_free_bundle_state ();
9150 curr_state->insn = NULL;
9151 curr_state->before_nops_num = 0;
9152 curr_state->after_nops_num = 0;
9153 curr_state->insn_num = 0;
9154 curr_state->cost = 0;
9155 curr_state->accumulated_insns_num = 0;
9156 curr_state->branch_deviation = 0;
388092d5 9157 curr_state->middle_bundle_stops = 0;
30028c85
VM
9158 curr_state->next = NULL;
9159 curr_state->originator = NULL;
9160 state_reset (curr_state->dfa_state);
9161 index_to_bundle_states [0] = curr_state;
9162 insn_num = 0;
c856f536 9163 /* Shift cycle mark if it is put on insn which could be ignored. */
30028c85
VM
9164 for (insn = NEXT_INSN (prev_head_insn);
9165 insn != tail;
9166 insn = NEXT_INSN (insn))
9167 if (INSN_P (insn)
7b84aac0 9168 && !important_for_bundling_p (insn)
30028c85 9169 && GET_MODE (insn) == TImode)
2130b7fb 9170 {
30028c85
VM
9171 PUT_MODE (insn, VOIDmode);
9172 for (next_insn = NEXT_INSN (insn);
9173 next_insn != tail;
9174 next_insn = NEXT_INSN (next_insn))
7b84aac0 9175 if (important_for_bundling_p (next_insn)
388092d5 9176 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
30028c85
VM
9177 {
9178 PUT_MODE (next_insn, TImode);
9179 break;
9180 }
2130b7fb 9181 }
048d0d36 9182 /* Forward pass: generation of bundle states. */
30028c85
VM
9183 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9184 insn != NULL_RTX;
9185 insn = next_insn)
1ad72cef 9186 {
7b84aac0 9187 gcc_assert (important_for_bundling_p (insn));
f32360c7 9188 type = ia64_safe_type (insn);
30028c85
VM
9189 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9190 insn_num++;
9191 index_to_bundle_states [insn_num] = NULL;
9192 for (curr_state = index_to_bundle_states [insn_num - 1];
9193 curr_state != NULL;
9194 curr_state = next_state)
f83594c4 9195 {
30028c85 9196 pos = curr_state->accumulated_insns_num % 3;
30028c85 9197 next_state = curr_state->next;
c856f536
VM
9198 /* We must fill up the current bundle in order to start a
9199 subsequent asm insn in a new bundle. Asm insn is always
9200 placed in a separate bundle. */
f32360c7
VM
9201 only_bundle_end_p
9202 = (next_insn != NULL_RTX
9203 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7b84aac0 9204 && unknown_for_bundling_p (next_insn));
c856f536
VM
9205 /* We may fill up the current bundle if it is the cycle end
9206 without a group barrier. */
30028c85 9207 bundle_end_p
f32360c7 9208 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
9209 || (GET_MODE (next_insn) == TImode
9210 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9211 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7400e46b 9212 || type == TYPE_S)
f32360c7
VM
9213 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9214 only_bundle_end_p);
9215 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9216 only_bundle_end_p);
9217 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9218 only_bundle_end_p);
f83594c4 9219 }
e820471b 9220 gcc_assert (index_to_bundle_states [insn_num]);
30028c85
VM
9221 for (curr_state = index_to_bundle_states [insn_num];
9222 curr_state != NULL;
9223 curr_state = curr_state->next)
9224 if (verbose >= 2 && dump)
9225 {
c856f536
VM
9226 /* This structure is taken from generated code of the
9227 pipeline hazard recognizer (see file insn-attrtab.c).
9228 Please don't forget to change the structure if a new
9229 automaton is added to .md file. */
30028c85
VM
9230 struct DFA_chip
9231 {
9232 unsigned short one_automaton_state;
9233 unsigned short oneb_automaton_state;
9234 unsigned short two_automaton_state;
9235 unsigned short twob_automaton_state;
9236 };
9c808aad 9237
30028c85
VM
9238 fprintf
9239 (dump,
388092d5 9240 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
30028c85
VM
9241 curr_state->unique_num,
9242 (curr_state->originator == NULL
9243 ? -1 : curr_state->originator->unique_num),
9244 curr_state->cost,
9245 curr_state->before_nops_num, curr_state->after_nops_num,
9246 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 9247 curr_state->middle_bundle_stops,
7400e46b 9248 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85
VM
9249 INSN_UID (insn));
9250 }
1ad72cef 9251 }
e820471b
NS
9252
9253 /* We should find a solution because the 2nd insn scheduling has
9254 found one. */
9255 gcc_assert (index_to_bundle_states [insn_num]);
c856f536 9256 /* Find a state corresponding to the best insn sequence. */
30028c85
VM
9257 best_state = NULL;
9258 for (curr_state = index_to_bundle_states [insn_num];
9259 curr_state != NULL;
9260 curr_state = curr_state->next)
c856f536
VM
9261 /* We are just looking at the states with fully filled up last
9262 bundle. The first we prefer insn sequences with minimal cost
9263 then with minimal inserted nops and finally with branch insns
9264 placed in the 3rd slots. */
30028c85
VM
9265 if (curr_state->accumulated_insns_num % 3 == 0
9266 && (best_state == NULL || best_state->cost > curr_state->cost
9267 || (best_state->cost == curr_state->cost
9268 && (curr_state->accumulated_insns_num
9269 < best_state->accumulated_insns_num
9270 || (curr_state->accumulated_insns_num
9271 == best_state->accumulated_insns_num
388092d5
AB
9272 && (curr_state->branch_deviation
9273 < best_state->branch_deviation
9274 || (curr_state->branch_deviation
9275 == best_state->branch_deviation
9276 && curr_state->middle_bundle_stops
9277 < best_state->middle_bundle_stops)))))))
30028c85 9278 best_state = curr_state;
c856f536 9279 /* Second (backward) pass: adding nops and templates. */
388092d5 9280 gcc_assert (best_state);
30028c85
VM
9281 insn_num = best_state->before_nops_num;
9282 template0 = template1 = -1;
9283 for (curr_state = best_state;
9284 curr_state->originator != NULL;
9285 curr_state = curr_state->originator)
9286 {
9287 insn = curr_state->insn;
7b84aac0 9288 asm_p = unknown_for_bundling_p (insn);
30028c85
VM
9289 insn_num++;
9290 if (verbose >= 2 && dump)
2130b7fb 9291 {
30028c85
VM
9292 struct DFA_chip
9293 {
9294 unsigned short one_automaton_state;
9295 unsigned short oneb_automaton_state;
9296 unsigned short two_automaton_state;
9297 unsigned short twob_automaton_state;
9298 };
9c808aad 9299
30028c85
VM
9300 fprintf
9301 (dump,
388092d5 9302 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
30028c85
VM
9303 curr_state->unique_num,
9304 (curr_state->originator == NULL
9305 ? -1 : curr_state->originator->unique_num),
9306 curr_state->cost,
9307 curr_state->before_nops_num, curr_state->after_nops_num,
9308 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 9309 curr_state->middle_bundle_stops,
7400e46b 9310 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85 9311 INSN_UID (insn));
2130b7fb 9312 }
c856f536
VM
9313 /* Find the position in the current bundle window. The window can
9314 contain at most two bundles. Two bundle window means that
9315 the processor will make two bundle rotation. */
30028c85 9316 max_pos = get_max_pos (curr_state->dfa_state);
c856f536
VM
9317 if (max_pos == 6
9318 /* The following (negative template number) means that the
9319 processor did one bundle rotation. */
9320 || (max_pos == 3 && template0 < 0))
2130b7fb 9321 {
c856f536
VM
9322 /* We are at the end of the window -- find template(s) for
9323 its bundle(s). */
30028c85
VM
9324 pos = max_pos;
9325 if (max_pos == 3)
9326 template0 = get_template (curr_state->dfa_state, 3);
9327 else
9328 {
9329 template1 = get_template (curr_state->dfa_state, 3);
9330 template0 = get_template (curr_state->dfa_state, 6);
9331 }
9332 }
9333 if (max_pos > 3 && template1 < 0)
c856f536 9334 /* It may happen when we have the stop inside a bundle. */
30028c85 9335 {
e820471b 9336 gcc_assert (pos <= 3);
30028c85
VM
9337 template1 = get_template (curr_state->dfa_state, 3);
9338 pos += 3;
9339 }
f32360c7 9340 if (!asm_p)
c856f536 9341 /* Emit nops after the current insn. */
f32360c7
VM
9342 for (i = 0; i < curr_state->after_nops_num; i++)
9343 {
b32d5189
DM
9344 rtx nop_pat = gen_nop ();
9345 rtx_insn *nop = emit_insn_after (nop_pat, insn);
f32360c7 9346 pos--;
e820471b 9347 gcc_assert (pos >= 0);
f32360c7
VM
9348 if (pos % 3 == 0)
9349 {
c856f536
VM
9350 /* We are at the start of a bundle: emit the template
9351 (it should be defined). */
e820471b 9352 gcc_assert (template0 >= 0);
4a4cd49c 9353 ia64_add_bundle_selector_before (template0, nop);
c856f536
VM
9354 /* If we have two bundle window, we make one bundle
9355 rotation. Otherwise template0 will be undefined
9356 (negative value). */
f32360c7
VM
9357 template0 = template1;
9358 template1 = -1;
9359 }
9360 }
c856f536
VM
9361 /* Move the position backward in the window. Group barrier has
9362 no slot. Asm insn takes all bundle. */
30028c85 9363 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7b84aac0 9364 && !unknown_for_bundling_p (insn))
30028c85 9365 pos--;
c856f536 9366 /* Long insn takes 2 slots. */
30028c85
VM
9367 if (ia64_safe_type (insn) == TYPE_L)
9368 pos--;
e820471b 9369 gcc_assert (pos >= 0);
30028c85
VM
9370 if (pos % 3 == 0
9371 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7b84aac0 9372 && !unknown_for_bundling_p (insn))
30028c85 9373 {
c856f536
VM
9374 /* The current insn is at the bundle start: emit the
9375 template. */
e820471b 9376 gcc_assert (template0 >= 0);
4a4cd49c 9377 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
9378 b = PREV_INSN (insn);
9379 insn = b;
68776c43 9380 /* See comment above in analogous place for emitting nops
c856f536 9381 after the insn. */
30028c85
VM
9382 template0 = template1;
9383 template1 = -1;
9384 }
c856f536 9385 /* Emit nops after the current insn. */
30028c85
VM
9386 for (i = 0; i < curr_state->before_nops_num; i++)
9387 {
b32d5189
DM
9388 rtx nop_pat = gen_nop ();
9389 ia64_emit_insn_before (nop_pat, insn);
9390 rtx_insn *nop = PREV_INSN (insn);
30028c85
VM
9391 insn = nop;
9392 pos--;
e820471b 9393 gcc_assert (pos >= 0);
30028c85
VM
9394 if (pos % 3 == 0)
9395 {
68776c43 9396 /* See comment above in analogous place for emitting nops
c856f536 9397 after the insn. */
e820471b 9398 gcc_assert (template0 >= 0);
4a4cd49c 9399 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
9400 b = PREV_INSN (insn);
9401 insn = b;
9402 template0 = template1;
9403 template1 = -1;
9404 }
2130b7fb
BS
9405 }
9406 }
388092d5 9407
e28c2052
MM
9408 if (flag_checking)
9409 {
9410 /* Assert right calculation of middle_bundle_stops. */
9411 int num = best_state->middle_bundle_stops;
9412 bool start_bundle = true, end_bundle = false;
388092d5 9413
e28c2052
MM
9414 for (insn = NEXT_INSN (prev_head_insn);
9415 insn && insn != tail;
9416 insn = NEXT_INSN (insn))
9417 {
9418 if (!INSN_P (insn))
9419 continue;
9420 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9421 start_bundle = true;
9422 else
9423 {
9424 rtx_insn *next_insn;
9425
9426 for (next_insn = NEXT_INSN (insn);
9427 next_insn && next_insn != tail;
9428 next_insn = NEXT_INSN (next_insn))
9429 if (INSN_P (next_insn)
9430 && (ia64_safe_itanium_class (next_insn)
9431 != ITANIUM_CLASS_IGNORE
9432 || recog_memoized (next_insn)
9433 == CODE_FOR_bundle_selector)
9434 && GET_CODE (PATTERN (next_insn)) != USE
9435 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9436 break;
9437
9438 end_bundle = next_insn == NULL_RTX
9439 || next_insn == tail
9440 || (INSN_P (next_insn)
9441 && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9442 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9443 && !start_bundle && !end_bundle
9444 && next_insn
9445 && !unknown_for_bundling_p (next_insn))
9446 num--;
9447
9448 start_bundle = false;
9449 }
9450 }
388092d5 9451
e28c2052
MM
9452 gcc_assert (num == 0);
9453 }
388092d5 9454
30028c85
VM
9455 free (index_to_bundle_states);
9456 finish_bundle_state_table ();
9457 bundling_p = 0;
9458 dfa_clean_insn_cache ();
2130b7fb 9459}
c65ebc55 9460
30028c85
VM
9461/* The following function is called at the end of scheduling BB or
9462 EBB. After reload, it inserts stop bits and does insn bundling. */
9463
9464static void
9c808aad 9465ia64_sched_finish (FILE *dump, int sched_verbose)
c237e94a 9466{
30028c85
VM
9467 if (sched_verbose)
9468 fprintf (dump, "// Finishing schedule.\n");
9469 if (!reload_completed)
9470 return;
9471 if (reload_completed)
9472 {
9473 final_emit_insn_group_barriers (dump);
9474 bundling (dump, sched_verbose, current_sched_info->prev_head,
9475 current_sched_info->next_tail);
9476 if (sched_verbose && dump)
9477 fprintf (dump, "// finishing %d-%d\n",
9478 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9479 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9c808aad 9480
30028c85
VM
9481 return;
9482 }
c237e94a
ZW
9483}
9484
30028c85 9485/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 9486
30028c85 9487static void
9c808aad 9488final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
2130b7fb 9489{
dd3d2b35 9490 rtx_insn *insn;
30028c85 9491 int need_barrier_p = 0;
388092d5 9492 int seen_good_insn = 0;
2130b7fb 9493
30028c85 9494 init_insn_group_barriers ();
2130b7fb 9495
30028c85
VM
9496 for (insn = NEXT_INSN (current_sched_info->prev_head);
9497 insn != current_sched_info->next_tail;
9498 insn = NEXT_INSN (insn))
9499 {
b64925dc 9500 if (BARRIER_P (insn))
b395ddbe 9501 {
dd3d2b35 9502 rtx_insn *last = prev_active_insn (insn);
14d118d6 9503
30028c85 9504 if (! last)
b395ddbe 9505 continue;
34f0d87a 9506 if (JUMP_TABLE_DATA_P (last))
30028c85
VM
9507 last = prev_active_insn (last);
9508 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9509 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 9510
30028c85 9511 init_insn_group_barriers ();
388092d5 9512 seen_good_insn = 0;
30028c85 9513 need_barrier_p = 0;
b395ddbe 9514 }
b5b8b0ac 9515 else if (NONDEBUG_INSN_P (insn))
2130b7fb 9516 {
30028c85 9517 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 9518 {
30028c85 9519 init_insn_group_barriers ();
388092d5 9520 seen_good_insn = 0;
30028c85 9521 need_barrier_p = 0;
c65ebc55 9522 }
388092d5
AB
9523 else if (need_barrier_p || group_barrier_needed (insn)
9524 || (mflag_sched_stop_bits_after_every_cycle
9525 && GET_MODE (insn) == TImode
9526 && seen_good_insn))
2130b7fb 9527 {
30028c85
VM
9528 if (TARGET_EARLY_STOP_BITS)
9529 {
dd3d2b35 9530 rtx_insn *last;
9c808aad 9531
30028c85
VM
9532 for (last = insn;
9533 last != current_sched_info->prev_head;
9534 last = PREV_INSN (last))
9535 if (INSN_P (last) && GET_MODE (last) == TImode
9536 && stops_p [INSN_UID (last)])
9537 break;
9538 if (last == current_sched_info->prev_head)
9539 last = insn;
9540 last = prev_active_insn (last);
9541 if (last
9542 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9543 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9544 last);
9545 init_insn_group_barriers ();
9546 for (last = NEXT_INSN (last);
9547 last != insn;
9548 last = NEXT_INSN (last))
9549 if (INSN_P (last))
388092d5
AB
9550 {
9551 group_barrier_needed (last);
9552 if (recog_memoized (last) >= 0
9553 && important_for_bundling_p (last))
9554 seen_good_insn = 1;
9555 }
30028c85
VM
9556 }
9557 else
9558 {
9559 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9560 insn);
9561 init_insn_group_barriers ();
388092d5 9562 seen_good_insn = 0;
30028c85 9563 }
c1bc6ca8 9564 group_barrier_needed (insn);
388092d5
AB
9565 if (recog_memoized (insn) >= 0
9566 && important_for_bundling_p (insn))
9567 seen_good_insn = 1;
2130b7fb 9568 }
388092d5
AB
9569 else if (recog_memoized (insn) >= 0
9570 && important_for_bundling_p (insn))
034288ef 9571 seen_good_insn = 1;
b64925dc 9572 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
c65ebc55 9573 }
2130b7fb 9574 }
30028c85 9575}
2130b7fb 9576
30028c85 9577\f
2130b7fb 9578
a4d05547 9579/* If the following function returns TRUE, we will use the DFA
30028c85 9580 insn scheduler. */
2130b7fb 9581
c237e94a 9582static int
9c808aad 9583ia64_first_cycle_multipass_dfa_lookahead (void)
2130b7fb 9584{
30028c85
VM
9585 return (reload_completed ? 6 : 4);
9586}
2130b7fb 9587
30028c85 9588/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 9589
30028c85 9590static void
9c808aad 9591ia64_init_dfa_pre_cycle_insn (void)
30028c85
VM
9592{
9593 if (temp_dfa_state == NULL)
2130b7fb 9594 {
30028c85
VM
9595 dfa_state_size = state_size ();
9596 temp_dfa_state = xmalloc (dfa_state_size);
9597 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 9598 }
30028c85 9599 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
0f82e5c9 9600 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
30028c85
VM
9601 recog_memoized (dfa_pre_cycle_insn);
9602 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
0f82e5c9 9603 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
30028c85
VM
9604 recog_memoized (dfa_stop_insn);
9605}
2130b7fb 9606
30028c85
VM
9607/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9608 used by the DFA insn scheduler. */
2130b7fb 9609
30028c85 9610static rtx
9c808aad 9611ia64_dfa_pre_cycle_insn (void)
30028c85
VM
9612{
9613 return dfa_pre_cycle_insn;
9614}
2130b7fb 9615
30028c85
VM
9616/* The following function returns TRUE if PRODUCER (of type ilog or
9617 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 9618
30028c85 9619int
647d790d 9620ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
30028c85
VM
9621{
9622 rtx dest, reg, mem;
2130b7fb 9623
e820471b 9624 gcc_assert (producer && consumer);
30028c85 9625 dest = ia64_single_set (producer);
e820471b
NS
9626 gcc_assert (dest);
9627 reg = SET_DEST (dest);
9628 gcc_assert (reg);
30028c85
VM
9629 if (GET_CODE (reg) == SUBREG)
9630 reg = SUBREG_REG (reg);
e820471b
NS
9631 gcc_assert (GET_CODE (reg) == REG);
9632
30028c85 9633 dest = ia64_single_set (consumer);
e820471b
NS
9634 gcc_assert (dest);
9635 mem = SET_DEST (dest);
9636 gcc_assert (mem && GET_CODE (mem) == MEM);
30028c85 9637 return reg_mentioned_p (reg, mem);
2130b7fb
BS
9638}
9639
30028c85
VM
9640/* The following function returns TRUE if PRODUCER (of type ilog or
9641 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 9642
30028c85 9643int
647d790d 9644ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
2130b7fb 9645{
30028c85
VM
9646 rtx dest, src, reg, mem;
9647
e820471b 9648 gcc_assert (producer && consumer);
30028c85 9649 dest = ia64_single_set (producer);
e820471b
NS
9650 gcc_assert (dest);
9651 reg = SET_DEST (dest);
9652 gcc_assert (reg);
30028c85
VM
9653 if (GET_CODE (reg) == SUBREG)
9654 reg = SUBREG_REG (reg);
e820471b
NS
9655 gcc_assert (GET_CODE (reg) == REG);
9656
30028c85 9657 src = ia64_single_set (consumer);
e820471b
NS
9658 gcc_assert (src);
9659 mem = SET_SRC (src);
9660 gcc_assert (mem);
048d0d36 9661
30028c85
VM
9662 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9663 mem = XVECEXP (mem, 0, 0);
048d0d36 9664 else if (GET_CODE (mem) == IF_THEN_ELSE)
917f1b7e 9665 /* ??? Is this bypass necessary for ld.c? */
048d0d36
MK
9666 {
9667 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9668 mem = XEXP (mem, 1);
9669 }
9670
30028c85
VM
9671 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9672 mem = XEXP (mem, 0);
ef1ecf87 9673
048d0d36
MK
9674 if (GET_CODE (mem) == UNSPEC)
9675 {
9676 int c = XINT (mem, 1);
9677
388092d5
AB
9678 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9679 || c == UNSPEC_LDSA);
048d0d36
MK
9680 mem = XVECEXP (mem, 0, 0);
9681 }
9682
ef1ecf87 9683 /* Note that LO_SUM is used for GOT loads. */
e820471b 9684 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
ef1ecf87 9685
30028c85
VM
9686 return reg_mentioned_p (reg, mem);
9687}
9688
9689/* The following function returns TRUE if INSN produces address for a
9690 load/store insn. We will place such insns into M slot because it
ff482c8d 9691 decreases its latency time. */
30028c85
VM
9692
9693int
9c808aad 9694ia64_produce_address_p (rtx insn)
30028c85
VM
9695{
9696 return insn->call;
2130b7fb 9697}
30028c85 9698
2130b7fb 9699\f
3b572406
RH
9700/* Emit pseudo-ops for the assembler to describe predicate relations.
9701 At present this assumes that we only consider predicate pairs to
9702 be mutex, and that the assembler can deduce proper values from
9703 straight-line code. */
9704
9705static void
9c808aad 9706emit_predicate_relation_info (void)
3b572406 9707{
e0082a72 9708 basic_block bb;
3b572406 9709
4f42035e 9710 FOR_EACH_BB_REVERSE_FN (bb, cfun)
3b572406 9711 {
3b572406 9712 int r;
dd3d2b35 9713 rtx_insn *head = BB_HEAD (bb);
3b572406
RH
9714
9715 /* We only need such notes at code labels. */
b64925dc 9716 if (! LABEL_P (head))
3b572406 9717 continue;
740aeb38 9718 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
3b572406
RH
9719 head = NEXT_INSN (head);
9720
9f3b8452
RH
9721 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9722 grabbing the entire block of predicate registers. */
9723 for (r = PR_REG (2); r < PR_REG (64); r += 2)
6fb5fa3c 9724 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
3b572406 9725 {
f2f90c63 9726 rtx p = gen_rtx_REG (BImode, r);
dd3d2b35 9727 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
a813c111 9728 if (head == BB_END (bb))
1130d5e3 9729 BB_END (bb) = n;
3b572406
RH
9730 head = n;
9731 }
9732 }
ca3920ad
JW
9733
9734 /* Look for conditional calls that do not return, and protect predicate
9735 relations around them. Otherwise the assembler will assume the call
9736 returns, and complain about uses of call-clobbered predicates after
9737 the call. */
4f42035e 9738 FOR_EACH_BB_REVERSE_FN (bb, cfun)
ca3920ad 9739 {
dd3d2b35 9740 rtx_insn *insn = BB_HEAD (bb);
9c808aad 9741
ca3920ad
JW
9742 while (1)
9743 {
b64925dc 9744 if (CALL_P (insn)
ca3920ad
JW
9745 && GET_CODE (PATTERN (insn)) == COND_EXEC
9746 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9747 {
dd3d2b35
DM
9748 rtx_insn *b =
9749 emit_insn_before (gen_safe_across_calls_all (), insn);
9750 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
a813c111 9751 if (BB_HEAD (bb) == insn)
1130d5e3 9752 BB_HEAD (bb) = b;
a813c111 9753 if (BB_END (bb) == insn)
1130d5e3 9754 BB_END (bb) = a;
ca3920ad 9755 }
9c808aad 9756
a813c111 9757 if (insn == BB_END (bb))
ca3920ad
JW
9758 break;
9759 insn = NEXT_INSN (insn);
9760 }
9761 }
3b572406
RH
9762}
9763
c65ebc55
JW
9764/* Perform machine dependent operations on the rtl chain INSNS. */
9765
18dbd950 9766static void
9c808aad 9767ia64_reorg (void)
c65ebc55 9768{
1e3881c2
JH
9769 /* We are freeing block_for_insn in the toplev to keep compatibility
9770 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 9771 compute_bb_for_insn ();
a00fe19f
RH
9772
9773 /* If optimizing, we'll have split before scheduling. */
9774 if (optimize == 0)
6fb5fa3c 9775 split_all_insns ();
2130b7fb 9776
2ba42841 9777 if (optimize && flag_schedule_insns_after_reload
388092d5 9778 && dbg_cnt (ia64_sched2))
f4d578da 9779 {
547fdef8 9780 basic_block bb;
eced69b5 9781 timevar_push (TV_SCHED2);
f4d578da 9782 ia64_final_schedule = 1;
30028c85 9783
547fdef8
BS
9784 /* We can't let modulo-sched prevent us from scheduling any bbs,
9785 since we need the final schedule to produce bundle information. */
11cd3bed 9786 FOR_EACH_BB_FN (bb, cfun)
547fdef8
BS
9787 bb->flags &= ~BB_DISABLE_SCHEDULE;
9788
30028c85
VM
9789 initiate_bundle_states ();
9790 ia64_nop = make_insn_raw (gen_nop ());
0f82e5c9 9791 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
30028c85
VM
9792 recog_memoized (ia64_nop);
9793 clocks_length = get_max_uid () + 1;
5ead67f6 9794 stops_p = XCNEWVEC (char, clocks_length);
7400e46b 9795
30028c85
VM
9796 if (ia64_tune == PROCESSOR_ITANIUM2)
9797 {
9798 pos_1 = get_cpu_unit_code ("2_1");
9799 pos_2 = get_cpu_unit_code ("2_2");
9800 pos_3 = get_cpu_unit_code ("2_3");
9801 pos_4 = get_cpu_unit_code ("2_4");
9802 pos_5 = get_cpu_unit_code ("2_5");
9803 pos_6 = get_cpu_unit_code ("2_6");
9804 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9805 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9806 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9807 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9808 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9809 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9810 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9811 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9812 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9813 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9814 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9815 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9816 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9817 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9818 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9819 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9820 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9821 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9822 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9823 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9824 }
9825 else
9826 {
9827 pos_1 = get_cpu_unit_code ("1_1");
9828 pos_2 = get_cpu_unit_code ("1_2");
9829 pos_3 = get_cpu_unit_code ("1_3");
9830 pos_4 = get_cpu_unit_code ("1_4");
9831 pos_5 = get_cpu_unit_code ("1_5");
9832 pos_6 = get_cpu_unit_code ("1_6");
9833 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9834 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9835 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9836 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9837 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9838 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9839 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9840 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9841 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9842 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9843 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9844 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9845 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9846 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9847 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9848 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9849 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9850 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9851 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9852 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9853 }
388092d5
AB
9854
9855 if (flag_selective_scheduling2
9856 && !maybe_skip_selective_scheduling ())
9857 run_selective_scheduling ();
9858 else
9859 schedule_ebbs ();
9860
9861 /* Redo alignment computation, as it might gone wrong. */
9862 compute_alignments ();
9863
6fb5fa3c
DB
9864 /* We cannot reuse this one because it has been corrupted by the
9865 evil glat. */
30028c85 9866 finish_bundle_states ();
30028c85 9867 free (stops_p);
048d0d36 9868 stops_p = NULL;
c263766c 9869 emit_insn_group_barriers (dump_file);
30028c85 9870
f4d578da 9871 ia64_final_schedule = 0;
eced69b5 9872 timevar_pop (TV_SCHED2);
f4d578da
BS
9873 }
9874 else
c263766c 9875 emit_all_insn_group_barriers (dump_file);
f2f90c63 9876
6fb5fa3c
DB
9877 df_analyze ();
9878
f12f25a7
RH
9879 /* A call must not be the last instruction in a function, so that the
9880 return address is still within the function, so that unwinding works
9881 properly. Note that IA-64 differs from dwarf2 on this point. */
d5fabb58 9882 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
f12f25a7 9883 {
dd3d2b35 9884 rtx_insn *insn;
f12f25a7
RH
9885 int saw_stop = 0;
9886
9887 insn = get_last_insn ();
9888 if (! INSN_P (insn))
9889 insn = prev_active_insn (insn);
2ca57608 9890 if (insn)
f12f25a7 9891 {
2ca57608 9892 /* Skip over insns that expand to nothing. */
b64925dc 9893 while (NONJUMP_INSN_P (insn)
2ca57608
L
9894 && get_attr_empty (insn) == EMPTY_YES)
9895 {
9896 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9897 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9898 saw_stop = 1;
9899 insn = prev_active_insn (insn);
9900 }
b64925dc 9901 if (CALL_P (insn))
2ca57608
L
9902 {
9903 if (! saw_stop)
9904 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9905 emit_insn (gen_break_f ());
9906 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9907 }
f12f25a7
RH
9908 }
9909 }
9910
f2f90c63 9911 emit_predicate_relation_info ();
014a1138 9912
2ba42841 9913 if (flag_var_tracking)
014a1138
JZ
9914 {
9915 timevar_push (TV_VAR_TRACKING);
9916 variable_tracking_main ();
9917 timevar_pop (TV_VAR_TRACKING);
9918 }
0d475361 9919 df_finish_pass (false);
c65ebc55
JW
9920}
9921\f
9922/* Return true if REGNO is used by the epilogue. */
9923
9924int
9c808aad 9925ia64_epilogue_uses (int regno)
c65ebc55 9926{
6ca3c22f
RH
9927 switch (regno)
9928 {
9929 case R_GR (1):
b23ba0b8
RH
9930 /* With a call to a function in another module, we will write a new
9931 value to "gp". After returning from such a call, we need to make
9932 sure the function restores the original gp-value, even if the
9933 function itself does not use the gp anymore. */
9934 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
6ca3c22f
RH
9935
9936 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9937 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9938 /* For functions defined with the syscall_linkage attribute, all
9939 input registers are marked as live at all function exits. This
9940 prevents the register allocator from using the input registers,
9941 which in turn makes it possible to restart a system call after
9942 an interrupt without having to save/restore the input registers.
9943 This also prevents kernel data from leaking to application code. */
9944 return lookup_attribute ("syscall_linkage",
9945 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9946
9947 case R_BR (0):
9948 /* Conditional return patterns can't represent the use of `b0' as
9949 the return address, so we force the value live this way. */
9950 return 1;
6b6c1201 9951
6ca3c22f
RH
9952 case AR_PFS_REGNUM:
9953 /* Likewise for ar.pfs, which is used by br.ret. */
9954 return 1;
5527bf14 9955
6ca3c22f
RH
9956 default:
9957 return 0;
9958 }
c65ebc55 9959}
15b5aef3
RH
9960
9961/* Return true if REGNO is used by the frame unwinder. */
9962
9963int
9c808aad 9964ia64_eh_uses (int regno)
15b5aef3 9965{
09639a83 9966 unsigned int r;
6fb5fa3c 9967
15b5aef3
RH
9968 if (! reload_completed)
9969 return 0;
9970
6fb5fa3c
DB
9971 if (regno == 0)
9972 return 0;
9973
9974 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9975 if (regno == current_frame_info.r[r]
9976 || regno == emitted_frame_related_regs[r])
9977 return 1;
15b5aef3
RH
9978
9979 return 0;
9980}
c65ebc55 9981\f
1cdbd630 9982/* Return true if this goes in small data/bss. */
c65ebc55
JW
9983
9984/* ??? We could also support own long data here. Generating movl/add/ld8
9985 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9986 code faster because there is one less load. This also includes incomplete
9987 types which can't go in sdata/sbss. */
9988
ae46c4e0 9989static bool
3101faab 9990ia64_in_small_data_p (const_tree exp)
ae46c4e0
RH
9991{
9992 if (TARGET_NO_SDATA)
9993 return false;
9994
3907500b
RH
9995 /* We want to merge strings, so we never consider them small data. */
9996 if (TREE_CODE (exp) == STRING_CST)
9997 return false;
9998
4c494a15
ZW
9999 /* Functions are never small data. */
10000 if (TREE_CODE (exp) == FUNCTION_DECL)
10001 return false;
10002
ae46c4e0
RH
10003 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
10004 {
f961457f 10005 const char *section = DECL_SECTION_NAME (exp);
826eb7ed 10006
ae46c4e0 10007 if (strcmp (section, ".sdata") == 0
826eb7ed
JB
10008 || strncmp (section, ".sdata.", 7) == 0
10009 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
10010 || strcmp (section, ".sbss") == 0
10011 || strncmp (section, ".sbss.", 6) == 0
10012 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
ae46c4e0
RH
10013 return true;
10014 }
10015 else
10016 {
10017 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
10018
10019 /* If this is an incomplete type with size 0, then we can't put it
10020 in sdata because it might be too big when completed. */
10021 if (size > 0 && size <= ia64_section_threshold)
10022 return true;
10023 }
10024
10025 return false;
10026}
0c96007e 10027\f
ad0fc698
JW
10028/* Output assembly directives for prologue regions. */
10029
10030/* The current basic block number. */
10031
e0082a72 10032static bool last_block;
ad0fc698
JW
10033
10034/* True if we need a copy_state command at the start of the next block. */
10035
e0082a72 10036static bool need_copy_state;
ad0fc698 10037
658f32fd
AO
10038#ifndef MAX_ARTIFICIAL_LABEL_BYTES
10039# define MAX_ARTIFICIAL_LABEL_BYTES 30
10040#endif
10041
ad0fc698
JW
10042/* The function emits unwind directives for the start of an epilogue. */
10043
10044static void
7d3c6cd8
RH
10045process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
10046 bool unwind, bool frame ATTRIBUTE_UNUSED)
ad0fc698
JW
10047{
10048 /* If this isn't the last block of the function, then we need to label the
10049 current state, and copy it back in at the start of the next block. */
10050
e0082a72 10051 if (!last_block)
ad0fc698 10052 {
658f32fd
AO
10053 if (unwind)
10054 fprintf (asm_out_file, "\t.label_state %d\n",
10055 ++cfun->machine->state_num);
e0082a72 10056 need_copy_state = true;
ad0fc698
JW
10057 }
10058
658f32fd
AO
10059 if (unwind)
10060 fprintf (asm_out_file, "\t.restore sp\n");
ad0fc698 10061}
0c96007e 10062
5c255b57 10063/* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
97e242b0 10064
5c255b57
RH
10065static void
10066process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10067 bool unwind, bool frame)
0c96007e 10068{
0c96007e 10069 rtx dest = SET_DEST (pat);
5c255b57 10070 rtx src = SET_SRC (pat);
0c96007e 10071
5c255b57 10072 if (dest == stack_pointer_rtx)
0c96007e
AM
10073 {
10074 if (GET_CODE (src) == PLUS)
5c255b57 10075 {
0c96007e
AM
10076 rtx op0 = XEXP (src, 0);
10077 rtx op1 = XEXP (src, 1);
e820471b
NS
10078
10079 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10080
10081 if (INTVAL (op1) < 0)
658f32fd
AO
10082 {
10083 gcc_assert (!frame_pointer_needed);
10084 if (unwind)
5c255b57 10085 fprintf (asm_out_file,
16998094 10086 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
658f32fd 10087 -INTVAL (op1));
658f32fd 10088 }
0186257f 10089 else
658f32fd 10090 process_epilogue (asm_out_file, insn, unwind, frame);
0c96007e 10091 }
0186257f 10092 else
e820471b 10093 {
5c255b57 10094 gcc_assert (src == hard_frame_pointer_rtx);
658f32fd 10095 process_epilogue (asm_out_file, insn, unwind, frame);
e820471b 10096 }
5c255b57
RH
10097 }
10098 else if (dest == hard_frame_pointer_rtx)
10099 {
10100 gcc_assert (src == stack_pointer_rtx);
10101 gcc_assert (frame_pointer_needed);
0186257f 10102
5c255b57
RH
10103 if (unwind)
10104 fprintf (asm_out_file, "\t.vframe r%d\n",
10105 ia64_dbx_register_number (REGNO (dest)));
0c96007e 10106 }
5c255b57
RH
10107 else
10108 gcc_unreachable ();
10109}
0c96007e 10110
5c255b57 10111/* This function processes a SET pattern for REG_CFA_REGISTER. */
97e242b0 10112
5c255b57
RH
10113static void
10114process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10115{
10116 rtx dest = SET_DEST (pat);
10117 rtx src = SET_SRC (pat);
5c255b57 10118 int dest_regno = REGNO (dest);
5f740973 10119 int src_regno;
97e242b0 10120
5f740973 10121 if (src == pc_rtx)
5c255b57 10122 {
5c255b57 10123 /* Saving return address pointer. */
5c255b57
RH
10124 if (unwind)
10125 fprintf (asm_out_file, "\t.save rp, r%d\n",
10126 ia64_dbx_register_number (dest_regno));
5f740973
RH
10127 return;
10128 }
10129
10130 src_regno = REGNO (src);
97e242b0 10131
5f740973
RH
10132 switch (src_regno)
10133 {
5c255b57
RH
10134 case PR_REG (0):
10135 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10136 if (unwind)
10137 fprintf (asm_out_file, "\t.save pr, r%d\n",
10138 ia64_dbx_register_number (dest_regno));
10139 break;
97e242b0 10140
5c255b57
RH
10141 case AR_UNAT_REGNUM:
10142 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10143 if (unwind)
10144 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10145 ia64_dbx_register_number (dest_regno));
10146 break;
97e242b0 10147
5c255b57
RH
10148 case AR_LC_REGNUM:
10149 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10150 if (unwind)
10151 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10152 ia64_dbx_register_number (dest_regno));
10153 break;
10154
10155 default:
10156 /* Everything else should indicate being stored to memory. */
10157 gcc_unreachable ();
0c96007e 10158 }
5c255b57 10159}
97e242b0 10160
5c255b57 10161/* This function processes a SET pattern for REG_CFA_OFFSET. */
97e242b0 10162
5c255b57
RH
10163static void
10164process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10165{
10166 rtx dest = SET_DEST (pat);
10167 rtx src = SET_SRC (pat);
10168 int src_regno = REGNO (src);
10169 const char *saveop;
10170 HOST_WIDE_INT off;
10171 rtx base;
0c96007e 10172
5c255b57
RH
10173 gcc_assert (MEM_P (dest));
10174 if (GET_CODE (XEXP (dest, 0)) == REG)
10175 {
10176 base = XEXP (dest, 0);
10177 off = 0;
10178 }
10179 else
10180 {
10181 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10182 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10183 base = XEXP (XEXP (dest, 0), 0);
10184 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10185 }
97e242b0 10186
5c255b57
RH
10187 if (base == hard_frame_pointer_rtx)
10188 {
10189 saveop = ".savepsp";
10190 off = - off;
10191 }
10192 else
10193 {
10194 gcc_assert (base == stack_pointer_rtx);
10195 saveop = ".savesp";
10196 }
97e242b0 10197
5c255b57
RH
10198 src_regno = REGNO (src);
10199 switch (src_regno)
10200 {
10201 case BR_REG (0):
10202 gcc_assert (!current_frame_info.r[reg_save_b0]);
10203 if (unwind)
10204 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10205 saveop, off);
10206 break;
97e242b0 10207
5c255b57
RH
10208 case PR_REG (0):
10209 gcc_assert (!current_frame_info.r[reg_save_pr]);
10210 if (unwind)
10211 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10212 saveop, off);
10213 break;
97e242b0 10214
5c255b57
RH
10215 case AR_LC_REGNUM:
10216 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10217 if (unwind)
10218 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10219 saveop, off);
10220 break;
97e242b0 10221
5c255b57
RH
10222 case AR_PFS_REGNUM:
10223 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10224 if (unwind)
10225 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10226 saveop, off);
10227 break;
97e242b0 10228
5c255b57
RH
10229 case AR_UNAT_REGNUM:
10230 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10231 if (unwind)
10232 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10233 saveop, off);
10234 break;
97e242b0 10235
5c255b57
RH
10236 case GR_REG (4):
10237 case GR_REG (5):
10238 case GR_REG (6):
10239 case GR_REG (7):
10240 if (unwind)
10241 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10242 1 << (src_regno - GR_REG (4)));
10243 break;
97e242b0 10244
5c255b57
RH
10245 case BR_REG (1):
10246 case BR_REG (2):
10247 case BR_REG (3):
10248 case BR_REG (4):
10249 case BR_REG (5):
10250 if (unwind)
10251 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10252 1 << (src_regno - BR_REG (1)));
10253 break;
97e242b0 10254
5c255b57
RH
10255 case FR_REG (2):
10256 case FR_REG (3):
10257 case FR_REG (4):
10258 case FR_REG (5):
10259 if (unwind)
10260 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10261 1 << (src_regno - FR_REG (2)));
10262 break;
97e242b0 10263
5c255b57
RH
10264 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10265 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10266 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10267 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10268 if (unwind)
10269 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10270 1 << (src_regno - FR_REG (12)));
10271 break;
97e242b0 10272
5c255b57
RH
10273 default:
10274 /* ??? For some reason we mark other general registers, even those
10275 we can't represent in the unwind info. Ignore them. */
10276 break;
10277 }
0c96007e
AM
10278}
10279
0c96007e
AM
10280/* This function looks at a single insn and emits any directives
10281 required to unwind this insn. */
5c255b57 10282
a68b5e52 10283static void
ac44248e 10284ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
0c96007e 10285{
d5fabb58 10286 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
658f32fd 10287 bool frame = dwarf2out_do_frame ();
5c255b57
RH
10288 rtx note, pat;
10289 bool handled_one;
10290
10291 if (!unwind && !frame)
10292 return;
658f32fd 10293
5c255b57 10294 if (NOTE_INSN_BASIC_BLOCK_P (insn))
0c96007e 10295 {
fefa31b5
DM
10296 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10297 == EXIT_BLOCK_PTR_FOR_FN (cfun);
97e242b0 10298
5c255b57
RH
10299 /* Restore unwind state from immediately before the epilogue. */
10300 if (need_copy_state)
ad0fc698 10301 {
5c255b57 10302 if (unwind)
ad0fc698 10303 {
5c255b57
RH
10304 fprintf (asm_out_file, "\t.body\n");
10305 fprintf (asm_out_file, "\t.copy_state %d\n",
10306 cfun->machine->state_num);
ad0fc698 10307 }
5c255b57 10308 need_copy_state = false;
ad0fc698 10309 }
5c255b57 10310 }
ad0fc698 10311
b64925dc 10312 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
5c255b57
RH
10313 return;
10314
10315 /* Look for the ALLOC insn. */
10316 if (INSN_CODE (insn) == CODE_FOR_alloc)
10317 {
10318 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10319 int dest_regno = REGNO (dest);
ad0fc698 10320
5c255b57
RH
10321 /* If this is the final destination for ar.pfs, then this must
10322 be the alloc in the prologue. */
10323 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10324 {
10325 if (unwind)
10326 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10327 ia64_dbx_register_number (dest_regno));
10328 }
97e242b0 10329 else
5c255b57
RH
10330 {
10331 /* This must be an alloc before a sibcall. We must drop the
10332 old frame info. The easiest way to drop the old frame
10333 info is to ensure we had a ".restore sp" directive
10334 followed by a new prologue. If the procedure doesn't
10335 have a memory-stack frame, we'll issue a dummy ".restore
10336 sp" now. */
10337 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10338 /* if haven't done process_epilogue() yet, do it now */
10339 process_epilogue (asm_out_file, insn, unwind, frame);
10340 if (unwind)
10341 fprintf (asm_out_file, "\t.prologue\n");
10342 }
10343 return;
10344 }
0c96007e 10345
5c255b57
RH
10346 handled_one = false;
10347 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10348 switch (REG_NOTE_KIND (note))
10349 {
10350 case REG_CFA_ADJUST_CFA:
10351 pat = XEXP (note, 0);
10352 if (pat == NULL)
10353 pat = PATTERN (insn);
10354 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10355 handled_one = true;
10356 break;
809d4ef1 10357
5c255b57
RH
10358 case REG_CFA_OFFSET:
10359 pat = XEXP (note, 0);
10360 if (pat == NULL)
10361 pat = PATTERN (insn);
10362 process_cfa_offset (asm_out_file, pat, unwind);
10363 handled_one = true;
10364 break;
809d4ef1 10365
5c255b57
RH
10366 case REG_CFA_REGISTER:
10367 pat = XEXP (note, 0);
10368 if (pat == NULL)
10369 pat = PATTERN (insn);
10370 process_cfa_register (asm_out_file, pat, unwind);
10371 handled_one = true;
10372 break;
10373
10374 case REG_FRAME_RELATED_EXPR:
10375 case REG_CFA_DEF_CFA:
10376 case REG_CFA_EXPRESSION:
10377 case REG_CFA_RESTORE:
10378 case REG_CFA_SET_VDRAP:
10379 /* Not used in the ia64 port. */
10380 gcc_unreachable ();
10381
10382 default:
10383 /* Not a frame-related note. */
10384 break;
10385 }
10386
10387 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10388 explicit action to take. No guessing required. */
10389 gcc_assert (handled_one);
0c96007e 10390}
c65ebc55 10391
a68b5e52
RH
10392/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10393
10394static void
10395ia64_asm_emit_except_personality (rtx personality)
10396{
10397 fputs ("\t.personality\t", asm_out_file);
10398 output_addr_const (asm_out_file, personality);
10399 fputc ('\n', asm_out_file);
10400}
10401
10402/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10403
10404static void
10405ia64_asm_init_sections (void)
10406{
10407 exception_section = get_unnamed_section (0, output_section_asm_op,
10408 "\t.handlerdata");
10409}
f0a0390e
RH
10410
10411/* Implement TARGET_DEBUG_UNWIND_INFO. */
10412
10413static enum unwind_info_type
10414ia64_debug_unwind_info (void)
10415{
10416 return UI_TARGET;
10417}
0551c32d 10418\f
af795c3c
RH
10419enum ia64_builtins
10420{
10421 IA64_BUILTIN_BSP,
c252db20
L
10422 IA64_BUILTIN_COPYSIGNQ,
10423 IA64_BUILTIN_FABSQ,
10424 IA64_BUILTIN_FLUSHRS,
fcb82ab0 10425 IA64_BUILTIN_INFQ,
b14446e2 10426 IA64_BUILTIN_HUGE_VALQ,
b6ca982f
UB
10427 IA64_BUILTIN_NANQ,
10428 IA64_BUILTIN_NANSQ,
b14446e2 10429 IA64_BUILTIN_max
af795c3c
RH
10430};
10431
b14446e2
SE
10432static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10433
c65ebc55 10434void
9c808aad 10435ia64_init_builtins (void)
c65ebc55 10436{
9649812a 10437 tree fpreg_type;
bf9ab6b6 10438 tree float80_type;
b14446e2 10439 tree decl;
9649812a
MM
10440
10441 /* The __fpreg type. */
10442 fpreg_type = make_node (REAL_TYPE);
4de67c26 10443 TYPE_PRECISION (fpreg_type) = 82;
9649812a
MM
10444 layout_type (fpreg_type);
10445 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10446
10447 /* The __float80 type. */
c65699ef
JM
10448 if (float64x_type_node != NULL_TREE
10449 && TYPE_MODE (float64x_type_node) == XFmode)
10450 float80_type = float64x_type_node;
10451 else
10452 {
10453 float80_type = make_node (REAL_TYPE);
10454 TYPE_PRECISION (float80_type) = 80;
10455 layout_type (float80_type);
10456 }
bf9ab6b6 10457 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9649812a
MM
10458
10459 /* The __float128 type. */
02befdf4 10460 if (!TARGET_HPUX)
9649812a 10461 {
b14446e2 10462 tree ftype;
b6ca982f
UB
10463 tree const_string_type
10464 = build_pointer_type (build_qualified_type
10465 (char_type_node, TYPE_QUAL_CONST));
c252db20 10466
c65699ef
JM
10467 (*lang_hooks.types.register_builtin_type) (float128_type_node,
10468 "__float128");
c252db20
L
10469
10470 /* TFmode support builtins. */
c65699ef 10471 ftype = build_function_type_list (float128_type_node, NULL_TREE);
b14446e2
SE
10472 decl = add_builtin_function ("__builtin_infq", ftype,
10473 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10474 NULL, NULL_TREE);
10475 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
c252db20 10476
b14446e2
SE
10477 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10478 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10479 NULL, NULL_TREE);
10480 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
fcb82ab0 10481
c65699ef 10482 ftype = build_function_type_list (float128_type_node,
b6ca982f
UB
10483 const_string_type,
10484 NULL_TREE);
10485 decl = add_builtin_function ("__builtin_nanq", ftype,
10486 IA64_BUILTIN_NANQ, BUILT_IN_MD,
10487 "nanq", NULL_TREE);
10488 TREE_READONLY (decl) = 1;
10489 ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10490
10491 decl = add_builtin_function ("__builtin_nansq", ftype,
10492 IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10493 "nansq", NULL_TREE);
10494 TREE_READONLY (decl) = 1;
10495 ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10496
c65699ef
JM
10497 ftype = build_function_type_list (float128_type_node,
10498 float128_type_node,
c252db20
L
10499 NULL_TREE);
10500 decl = add_builtin_function ("__builtin_fabsq", ftype,
10501 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10502 "__fabstf2", NULL_TREE);
10503 TREE_READONLY (decl) = 1;
b14446e2 10504 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
c252db20 10505
c65699ef
JM
10506 ftype = build_function_type_list (float128_type_node,
10507 float128_type_node,
10508 float128_type_node,
c252db20
L
10509 NULL_TREE);
10510 decl = add_builtin_function ("__builtin_copysignq", ftype,
10511 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10512 "__copysigntf3", NULL_TREE);
10513 TREE_READONLY (decl) = 1;
b14446e2 10514 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
9649812a
MM
10515 }
10516 else
02befdf4 10517 /* Under HPUX, this is a synonym for "long double". */
9649812a
MM
10518 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10519 "__float128");
10520
f2972bf8 10521 /* Fwrite on VMS is non-standard. */
171da07a
RH
10522#if TARGET_ABI_OPEN_VMS
10523 vms_patch_builtins ();
10524#endif
f2972bf8 10525
6e34d3a3 10526#define def_builtin(name, type, code) \
c79efc4d
RÁE
10527 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10528 NULL, NULL_TREE)
0551c32d 10529
b14446e2 10530 decl = def_builtin ("__builtin_ia64_bsp",
c0676219
NF
10531 build_function_type_list (ptr_type_node, NULL_TREE),
10532 IA64_BUILTIN_BSP);
b14446e2 10533 ia64_builtins[IA64_BUILTIN_BSP] = decl;
ce152ef8 10534
b14446e2 10535 decl = def_builtin ("__builtin_ia64_flushrs",
c0676219
NF
10536 build_function_type_list (void_type_node, NULL_TREE),
10537 IA64_BUILTIN_FLUSHRS);
b14446e2 10538 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
ce152ef8 10539
0551c32d 10540#undef def_builtin
7d522000
SE
10541
10542 if (TARGET_HPUX)
10543 {
ccea4a27 10544 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
e79983f4 10545 set_user_assembler_name (decl, "_Isfinite");
ccea4a27 10546 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
e79983f4 10547 set_user_assembler_name (decl, "_Isfinitef");
ccea4a27 10548 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
e79983f4 10549 set_user_assembler_name (decl, "_Isfinitef128");
7d522000 10550 }
c65ebc55
JW
10551}
10552
b6ca982f
UB
10553static tree
10554ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10555 tree *args, bool ignore ATTRIBUTE_UNUSED)
10556{
10557 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10558 {
10559 enum ia64_builtins fn_code = (enum ia64_builtins)
10560 DECL_FUNCTION_CODE (fndecl);
10561 switch (fn_code)
10562 {
10563 case IA64_BUILTIN_NANQ:
10564 case IA64_BUILTIN_NANSQ:
10565 {
10566 tree type = TREE_TYPE (TREE_TYPE (fndecl));
10567 const char *str = c_getstr (*args);
10568 int quiet = fn_code == IA64_BUILTIN_NANQ;
10569 REAL_VALUE_TYPE real;
10570
10571 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10572 return build_real (type, real);
10573 return NULL_TREE;
10574 }
10575
10576 default:
10577 break;
10578 }
10579 }
10580
10581#ifdef SUBTARGET_FOLD_BUILTIN
10582 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10583#endif
10584
10585 return NULL_TREE;
10586}
10587
c65ebc55 10588rtx
9c808aad 10589ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
ef4bddc2 10590 machine_mode mode ATTRIBUTE_UNUSED,
9c808aad 10591 int ignore ATTRIBUTE_UNUSED)
c65ebc55 10592{
767fad4c 10593 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
97e242b0 10594 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
c65ebc55
JW
10595
10596 switch (fcode)
10597 {
ce152ef8 10598 case IA64_BUILTIN_BSP:
0551c32d
RH
10599 if (! target || ! register_operand (target, DImode))
10600 target = gen_reg_rtx (DImode);
10601 emit_insn (gen_bsp_value (target));
8419b675
RK
10602#ifdef POINTERS_EXTEND_UNSIGNED
10603 target = convert_memory_address (ptr_mode, target);
10604#endif
0551c32d 10605 return target;
ce152ef8
AM
10606
10607 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
10608 emit_insn (gen_flushrs ());
10609 return const0_rtx;
ce152ef8 10610
c252db20 10611 case IA64_BUILTIN_INFQ:
fcb82ab0 10612 case IA64_BUILTIN_HUGE_VALQ:
c252db20 10613 {
ef4bddc2 10614 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
c252db20
L
10615 REAL_VALUE_TYPE inf;
10616 rtx tmp;
10617
10618 real_inf (&inf);
555affd7 10619 tmp = const_double_from_real_value (inf, target_mode);
c252db20 10620
6aad068a 10621 tmp = validize_mem (force_const_mem (target_mode, tmp));
c252db20
L
10622
10623 if (target == 0)
6aad068a 10624 target = gen_reg_rtx (target_mode);
c252db20
L
10625
10626 emit_move_insn (target, tmp);
10627 return target;
10628 }
10629
b6ca982f
UB
10630 case IA64_BUILTIN_NANQ:
10631 case IA64_BUILTIN_NANSQ:
c252db20
L
10632 case IA64_BUILTIN_FABSQ:
10633 case IA64_BUILTIN_COPYSIGNQ:
10634 return expand_call (exp, target, ignore);
10635
c65ebc55 10636 default:
c252db20 10637 gcc_unreachable ();
c65ebc55
JW
10638 }
10639
0551c32d 10640 return NULL_RTX;
c65ebc55 10641}
0d7839da 10642
b14446e2
SE
10643/* Return the ia64 builtin for CODE. */
10644
10645static tree
10646ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10647{
10648 if (code >= IA64_BUILTIN_max)
10649 return error_mark_node;
10650
10651 return ia64_builtins[code];
10652}
10653
76b0cbf8
RS
10654/* Implement TARGET_FUNCTION_ARG_PADDING.
10655
10656 For the HP-UX IA64 aggregate parameters are passed stored in the
0d7839da
SE
10657 most significant bits of the stack slot. */
10658
76b0cbf8
RS
10659static pad_direction
10660ia64_function_arg_padding (machine_mode mode, const_tree type)
0d7839da 10661{
76b0cbf8
RS
10662 /* Exception to normal case for structures/unions/etc. */
10663 if (TARGET_HPUX
10664 && type
10665 && AGGREGATE_TYPE_P (type)
10666 && int_size_in_bytes (type) < UNITS_PER_WORD)
10667 return PAD_UPWARD;
0d7839da 10668
76b0cbf8
RS
10669 /* Fall back to the default. */
10670 return default_function_arg_padding (mode, type);
0d7839da 10671}
686f3bf0 10672
c47c29c8
L
10673/* Emit text to declare externally defined variables and functions, because
10674 the Intel assembler does not support undefined externals. */
686f3bf0 10675
c47c29c8
L
10676void
10677ia64_asm_output_external (FILE *file, tree decl, const char *name)
686f3bf0 10678{
c47c29c8
L
10679 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10680 set in order to avoid putting out names that are never really
10681 used. */
10682 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
686f3bf0 10683 {
c47c29c8 10684 /* maybe_assemble_visibility will return 1 if the assembler
2e226e66 10685 visibility directive is output. */
c47c29c8
L
10686 int need_visibility = ((*targetm.binds_local_p) (decl)
10687 && maybe_assemble_visibility (decl));
57d4f65c 10688
c47c29c8
L
10689 /* GNU as does not need anything here, but the HP linker does
10690 need something for external functions. */
10691 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10692 && TREE_CODE (decl) == FUNCTION_DECL)
812b587e 10693 (*targetm.asm_out.globalize_decl_name) (file, decl);
c47c29c8
L
10694 else if (need_visibility && !TARGET_GNU_AS)
10695 (*targetm.asm_out.globalize_label) (file, name);
686f3bf0
SE
10696 }
10697}
10698
1f7aa7cd 10699/* Set SImode div/mod functions, init_integral_libfuncs only initializes
6bc709c1
L
10700 modes of word_mode and larger. Rename the TFmode libfuncs using the
10701 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10702 backward compatibility. */
1f7aa7cd
SE
10703
10704static void
10705ia64_init_libfuncs (void)
10706{
10707 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10708 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10709 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10710 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
6bc709c1
L
10711
10712 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10713 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10714 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10715 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10716 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10717
10718 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10719 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10720 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10721 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10722 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10723 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10724
10725 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10726 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
4a73d865 10727 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
6bc709c1
L
10728 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10729 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10730
10731 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10732 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
4a73d865 10733 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
2a3ebe77
JM
10734 /* HP-UX 11.23 libc does not have a function for unsigned
10735 SImode-to-TFmode conversion. */
10736 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
1f7aa7cd
SE
10737}
10738
c15c90bb 10739/* Rename all the TFmode libfuncs using the HPUX conventions. */
738e7b39 10740
c15c90bb
ZW
10741static void
10742ia64_hpux_init_libfuncs (void)
10743{
1f7aa7cd
SE
10744 ia64_init_libfuncs ();
10745
bdbba3c2
SE
10746 /* The HP SI millicode division and mod functions expect DI arguments.
10747 By turning them off completely we avoid using both libgcc and the
10748 non-standard millicode routines and use the HP DI millicode routines
10749 instead. */
10750
10751 set_optab_libfunc (sdiv_optab, SImode, 0);
10752 set_optab_libfunc (udiv_optab, SImode, 0);
10753 set_optab_libfunc (smod_optab, SImode, 0);
10754 set_optab_libfunc (umod_optab, SImode, 0);
10755
10756 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10757 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10758 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10759 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10760
10761 /* HP-UX libc has TF min/max/abs routines in it. */
c15c90bb
ZW
10762 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10763 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10764 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
c15c90bb 10765
24ea7948
ZW
10766 /* ia64_expand_compare uses this. */
10767 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10768
10769 /* These should never be used. */
10770 set_optab_libfunc (eq_optab, TFmode, 0);
10771 set_optab_libfunc (ne_optab, TFmode, 0);
10772 set_optab_libfunc (gt_optab, TFmode, 0);
10773 set_optab_libfunc (ge_optab, TFmode, 0);
10774 set_optab_libfunc (lt_optab, TFmode, 0);
10775 set_optab_libfunc (le_optab, TFmode, 0);
c15c90bb 10776}
738e7b39
RK
10777
10778/* Rename the division and modulus functions in VMS. */
10779
10780static void
10781ia64_vms_init_libfuncs (void)
10782{
10783 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10784 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10785 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10786 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10787 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10788 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10789 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10790 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
f2972bf8
DR
10791#ifdef MEM_LIBFUNCS_INIT
10792 MEM_LIBFUNCS_INIT;
10793#endif
738e7b39 10794}
6bc709c1
L
10795
10796/* Rename the TFmode libfuncs available from soft-fp in glibc using
10797 the HPUX conventions. */
10798
10799static void
10800ia64_sysv4_init_libfuncs (void)
10801{
10802 ia64_init_libfuncs ();
10803
10804 /* These functions are not part of the HPUX TFmode interface. We
10805 use them instead of _U_Qfcmp, which doesn't work the way we
10806 expect. */
10807 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10808 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10809 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10810 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10811 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10812 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10813
10814 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10815 glibc doesn't have them. */
10816}
c252db20
L
10817
10818/* Use soft-fp. */
10819
10820static void
10821ia64_soft_fp_init_libfuncs (void)
10822{
10823}
f2972bf8
DR
10824
10825static bool
095a2d76 10826ia64_vms_valid_pointer_mode (scalar_int_mode mode)
f2972bf8
DR
10827{
10828 return (mode == SImode || mode == DImode);
10829}
ae46c4e0 10830\f
9b580a0b
RH
10831/* For HPUX, it is illegal to have relocations in shared segments. */
10832
10833static int
10834ia64_hpux_reloc_rw_mask (void)
10835{
10836 return 3;
10837}
10838
10839/* For others, relax this so that relocations to local data goes in
10840 read-only segments, but we still cannot allow global relocations
10841 in read-only segments. */
10842
10843static int
10844ia64_reloc_rw_mask (void)
10845{
10846 return flag_pic ? 3 : 2;
10847}
10848
d6b5193b
RS
10849/* Return the section to use for X. The only special thing we do here
10850 is to honor small data. */
b64a1b53 10851
d6b5193b 10852static section *
ef4bddc2 10853ia64_select_rtx_section (machine_mode mode, rtx x,
9c808aad 10854 unsigned HOST_WIDE_INT align)
b64a1b53
RH
10855{
10856 if (GET_MODE_SIZE (mode) > 0
1f4a2e84
SE
10857 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10858 && !TARGET_NO_SDATA)
d6b5193b 10859 return sdata_section;
b64a1b53 10860 else
d6b5193b 10861 return default_elf_select_rtx_section (mode, x, align);
b64a1b53
RH
10862}
10863
1e1bd14e 10864static unsigned int
abb8b19a
AM
10865ia64_section_type_flags (tree decl, const char *name, int reloc)
10866{
10867 unsigned int flags = 0;
10868
10869 if (strcmp (name, ".sdata") == 0
10870 || strncmp (name, ".sdata.", 7) == 0
10871 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10872 || strncmp (name, ".sdata2.", 8) == 0
10873 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10874 || strcmp (name, ".sbss") == 0
10875 || strncmp (name, ".sbss.", 6) == 0
10876 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10877 flags = SECTION_SMALL;
10878
9b580a0b 10879 flags |= default_section_type_flags (decl, name, reloc);
abb8b19a 10880 return flags;
1e1bd14e
RH
10881}
10882
57782ad8
MM
10883/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10884 structure type and that the address of that type should be passed
10885 in out0, rather than in r8. */
10886
10887static bool
10888ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10889{
10890 tree ret_type = TREE_TYPE (fntype);
10891
10892 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10893 as the structure return address parameter, if the return value
10894 type has a non-trivial copy constructor or destructor. It is not
10895 clear if this same convention should be used for other
10896 programming languages. Until G++ 3.4, we incorrectly used r8 for
10897 these return values. */
10898 return (abi_version_at_least (2)
10899 && ret_type
10900 && TYPE_MODE (ret_type) == BLKmode
10901 && TREE_ADDRESSABLE (ret_type)
dcc97066 10902 && lang_GNU_CXX ());
57782ad8 10903}
1e1bd14e 10904
5f13cfc6
RH
10905/* Output the assembler code for a thunk function. THUNK_DECL is the
10906 declaration for the thunk function itself, FUNCTION is the decl for
10907 the target function. DELTA is an immediate constant offset to be
272d0bee 10908 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
10909 *(*this + vcall_offset) should be added to THIS. */
10910
c590b625 10911static void
9c808aad
AJ
10912ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10913 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10914 tree function)
483ab821 10915{
dd3d2b35
DM
10916 rtx this_rtx, funexp;
10917 rtx_insn *insn;
57782ad8
MM
10918 unsigned int this_parmno;
10919 unsigned int this_regno;
13f70342 10920 rtx delta_rtx;
5f13cfc6 10921
599aedd9 10922 reload_completed = 1;
fe3ad572 10923 epilogue_completed = 1;
599aedd9 10924
5f13cfc6
RH
10925 /* Set things up as ia64_expand_prologue might. */
10926 last_scratch_gr_reg = 15;
10927
10928 memset (&current_frame_info, 0, sizeof (current_frame_info));
10929 current_frame_info.spill_cfa_off = -16;
10930 current_frame_info.n_input_regs = 1;
10931 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10932
5f13cfc6 10933 /* Mark the end of the (empty) prologue. */
2e040219 10934 emit_note (NOTE_INSN_PROLOGUE_END);
5f13cfc6 10935
57782ad8
MM
10936 /* Figure out whether "this" will be the first parameter (the
10937 typical case) or the second parameter (as happens when the
10938 virtual function returns certain class objects). */
10939 this_parmno
10940 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10941 ? 1 : 0);
10942 this_regno = IN_REG (this_parmno);
10943 if (!TARGET_REG_NAMES)
10944 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10945
0a2aaacc 10946 this_rtx = gen_rtx_REG (Pmode, this_regno);
13f70342
RH
10947
10948 /* Apply the constant offset, if required. */
10949 delta_rtx = GEN_INT (delta);
36c216e5
MM
10950 if (TARGET_ILP32)
10951 {
57782ad8 10952 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
36c216e5 10953 REG_POINTER (tmp) = 1;
13f70342 10954 if (delta && satisfies_constraint_I (delta_rtx))
36c216e5 10955 {
0a2aaacc 10956 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
36c216e5
MM
10957 delta = 0;
10958 }
10959 else
0a2aaacc 10960 emit_insn (gen_ptr_extend (this_rtx, tmp));
36c216e5 10961 }
5f13cfc6
RH
10962 if (delta)
10963 {
13f70342 10964 if (!satisfies_constraint_I (delta_rtx))
5f13cfc6
RH
10965 {
10966 rtx tmp = gen_rtx_REG (Pmode, 2);
10967 emit_move_insn (tmp, delta_rtx);
10968 delta_rtx = tmp;
10969 }
0a2aaacc 10970 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
5f13cfc6
RH
10971 }
10972
10973 /* Apply the offset from the vtable, if required. */
10974 if (vcall_offset)
10975 {
10976 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10977 rtx tmp = gen_rtx_REG (Pmode, 2);
10978
36c216e5
MM
10979 if (TARGET_ILP32)
10980 {
10981 rtx t = gen_rtx_REG (ptr_mode, 2);
10982 REG_POINTER (t) = 1;
0a2aaacc 10983 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
13f70342 10984 if (satisfies_constraint_I (vcall_offset_rtx))
36c216e5 10985 {
13f70342 10986 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
36c216e5
MM
10987 vcall_offset = 0;
10988 }
10989 else
10990 emit_insn (gen_ptr_extend (tmp, t));
10991 }
10992 else
0a2aaacc 10993 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
5f13cfc6 10994
36c216e5 10995 if (vcall_offset)
5f13cfc6 10996 {
13f70342 10997 if (!satisfies_constraint_J (vcall_offset_rtx))
36c216e5
MM
10998 {
10999 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
11000 emit_move_insn (tmp2, vcall_offset_rtx);
11001 vcall_offset_rtx = tmp2;
11002 }
11003 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
5f13cfc6 11004 }
5f13cfc6 11005
36c216e5 11006 if (TARGET_ILP32)
13f70342 11007 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
36c216e5
MM
11008 else
11009 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
5f13cfc6 11010
0a2aaacc 11011 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
5f13cfc6
RH
11012 }
11013
11014 /* Generate a tail call to the target function. */
11015 if (! TREE_USED (function))
11016 {
11017 assemble_external (function);
11018 TREE_USED (function) = 1;
11019 }
11020 funexp = XEXP (DECL_RTL (function), 0);
11021 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11022 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
11023 insn = get_last_insn ();
11024 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
11025
11026 /* Code generation for calls relies on splitting. */
11027 reload_completed = 1;
fe3ad572 11028 epilogue_completed = 1;
599aedd9
RH
11029 try_split (PATTERN (insn), insn, 0);
11030
5f13cfc6
RH
11031 emit_barrier ();
11032
11033 /* Run just enough of rest_of_compilation to get the insns emitted.
11034 There's not really enough bulk here to make other passes such as
11035 instruction scheduling worth while. Note that use_thunk calls
11036 assemble_start_function and assemble_end_function. */
599aedd9 11037
18dbd950 11038 emit_all_insn_group_barriers (NULL);
5f13cfc6 11039 insn = get_insns ();
5f13cfc6
RH
11040 shorten_branches (insn);
11041 final_start_function (insn, file, 1);
c9d691e9 11042 final (insn, file, 1);
5f13cfc6 11043 final_end_function ();
599aedd9
RH
11044
11045 reload_completed = 0;
fe3ad572 11046 epilogue_completed = 0;
483ab821
MM
11047}
11048
351a758b
KH
11049/* Worker function for TARGET_STRUCT_VALUE_RTX. */
11050
11051static rtx
57782ad8 11052ia64_struct_value_rtx (tree fntype,
351a758b
KH
11053 int incoming ATTRIBUTE_UNUSED)
11054{
f2972bf8
DR
11055 if (TARGET_ABI_OPEN_VMS ||
11056 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
57782ad8 11057 return NULL_RTX;
351a758b
KH
11058 return gen_rtx_REG (Pmode, GR_REG (8));
11059}
11060
88ed5ef5 11061static bool
18e2a8b8 11062ia64_scalar_mode_supported_p (scalar_mode mode)
88ed5ef5
SE
11063{
11064 switch (mode)
11065 {
4e10a5a7
RS
11066 case E_QImode:
11067 case E_HImode:
11068 case E_SImode:
11069 case E_DImode:
11070 case E_TImode:
88ed5ef5
SE
11071 return true;
11072
4e10a5a7
RS
11073 case E_SFmode:
11074 case E_DFmode:
11075 case E_XFmode:
11076 case E_RFmode:
88ed5ef5
SE
11077 return true;
11078
4e10a5a7 11079 case E_TFmode:
c252db20 11080 return true;
88ed5ef5
SE
11081
11082 default:
11083 return false;
11084 }
11085}
11086
f61134e8 11087static bool
ef4bddc2 11088ia64_vector_mode_supported_p (machine_mode mode)
f61134e8
RH
11089{
11090 switch (mode)
11091 {
4e10a5a7
RS
11092 case E_V8QImode:
11093 case E_V4HImode:
11094 case E_V2SImode:
f61134e8
RH
11095 return true;
11096
4e10a5a7 11097 case E_V2SFmode:
f61134e8
RH
11098 return true;
11099
11100 default:
11101 return false;
11102 }
11103}
11104
694a2f6e
EB
11105/* Implement the FUNCTION_PROFILER macro. */
11106
2b4f149b
RH
11107void
11108ia64_output_function_profiler (FILE *file, int labelno)
11109{
694a2f6e
EB
11110 bool indirect_call;
11111
11112 /* If the function needs a static chain and the static chain
11113 register is r15, we use an indirect call so as to bypass
11114 the PLT stub in case the executable is dynamically linked,
11115 because the stub clobbers r15 as per 5.3.6 of the psABI.
11116 We don't need to do that in non canonical PIC mode. */
11117
11118 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11119 {
11120 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11121 indirect_call = true;
11122 }
11123 else
11124 indirect_call = false;
11125
2b4f149b
RH
11126 if (TARGET_GNU_AS)
11127 fputs ("\t.prologue 4, r40\n", file);
11128 else
11129 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11130 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
bd8633a3
RH
11131
11132 if (NO_PROFILE_COUNTERS)
694a2f6e 11133 fputs ("\tmov out3 = r0\n", file);
bd8633a3
RH
11134 else
11135 {
11136 char buf[20];
11137 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11138
11139 if (TARGET_AUTO_PIC)
11140 fputs ("\tmovl out3 = @gprel(", file);
11141 else
11142 fputs ("\taddl out3 = @ltoff(", file);
11143 assemble_name (file, buf);
11144 if (TARGET_AUTO_PIC)
694a2f6e 11145 fputs (")\n", file);
bd8633a3 11146 else
694a2f6e 11147 fputs ("), r1\n", file);
bd8633a3
RH
11148 }
11149
694a2f6e
EB
11150 if (indirect_call)
11151 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11152 fputs ("\t;;\n", file);
11153
2b4f149b 11154 fputs ("\t.save rp, r42\n", file);
bd8633a3 11155 fputs ("\tmov out2 = b0\n", file);
694a2f6e
EB
11156 if (indirect_call)
11157 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
2b4f149b 11158 fputs ("\t.body\n", file);
2b4f149b 11159 fputs ("\tmov out1 = r1\n", file);
694a2f6e
EB
11160 if (indirect_call)
11161 {
11162 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11163 fputs ("\tmov b6 = r16\n", file);
11164 fputs ("\tld8 r1 = [r14]\n", file);
11165 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11166 }
11167 else
11168 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
2b4f149b
RH
11169}
11170
d26afa4f
SE
11171static GTY(()) rtx mcount_func_rtx;
11172static rtx
11173gen_mcount_func_rtx (void)
11174{
11175 if (!mcount_func_rtx)
11176 mcount_func_rtx = init_one_libfunc ("_mcount");
11177 return mcount_func_rtx;
11178}
11179
11180void
11181ia64_profile_hook (int labelno)
11182{
11183 rtx label, ip;
11184
11185 if (NO_PROFILE_COUNTERS)
11186 label = const0_rtx;
11187 else
11188 {
11189 char buf[30];
11190 const char *label_name;
11191 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
55504c7c 11192 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
d26afa4f
SE
11193 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11194 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11195 }
11196 ip = gen_reg_rtx (Pmode);
11197 emit_insn (gen_ip_value (ip));
11198 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
db69559b 11199 VOIDmode,
d26afa4f
SE
11200 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11201 ip, Pmode,
11202 label, Pmode);
11203}
11204
cac24f06
JM
11205/* Return the mangling of TYPE if it is an extended fundamental type. */
11206
11207static const char *
3101faab 11208ia64_mangle_type (const_tree type)
cac24f06 11209{
608063c3
JB
11210 type = TYPE_MAIN_VARIANT (type);
11211
11212 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11213 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11214 return NULL;
11215
cac24f06
JM
11216 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11217 mangled as "e". */
11218 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11219 return "g";
11220 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11221 an extended mangling. Elsewhere, "e" is available since long
11222 double is 80 bits. */
11223 if (TYPE_MODE (type) == XFmode)
11224 return TARGET_HPUX ? "u9__float80" : "e";
4de67c26
JM
11225 if (TYPE_MODE (type) == RFmode)
11226 return "u7__fpreg";
11227 return NULL;
11228}
11229
11230/* Return the diagnostic message string if conversion from FROMTYPE to
11231 TOTYPE is not allowed, NULL otherwise. */
11232static const char *
3101faab 11233ia64_invalid_conversion (const_tree fromtype, const_tree totype)
4de67c26
JM
11234{
11235 /* Reject nontrivial conversion to or from __fpreg. */
11236 if (TYPE_MODE (fromtype) == RFmode
11237 && TYPE_MODE (totype) != RFmode
11238 && TYPE_MODE (totype) != VOIDmode)
11239 return N_("invalid conversion from %<__fpreg%>");
11240 if (TYPE_MODE (totype) == RFmode
11241 && TYPE_MODE (fromtype) != RFmode)
11242 return N_("invalid conversion to %<__fpreg%>");
11243 return NULL;
11244}
11245
11246/* Return the diagnostic message string if the unary operation OP is
11247 not permitted on TYPE, NULL otherwise. */
11248static const char *
3101faab 11249ia64_invalid_unary_op (int op, const_tree type)
4de67c26
JM
11250{
11251 /* Reject operations on __fpreg other than unary + or &. */
11252 if (TYPE_MODE (type) == RFmode
11253 && op != CONVERT_EXPR
11254 && op != ADDR_EXPR)
11255 return N_("invalid operation on %<__fpreg%>");
11256 return NULL;
11257}
11258
11259/* Return the diagnostic message string if the binary operation OP is
11260 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11261static const char *
3101faab 11262ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
4de67c26
JM
11263{
11264 /* Reject operations on __fpreg. */
11265 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11266 return N_("invalid operation on %<__fpreg%>");
cac24f06
JM
11267 return NULL;
11268}
11269
812b587e
SE
11270/* HP-UX version_id attribute.
11271 For object foo, if the version_id is set to 1234 put out an alias
11272 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11273 other than an alias statement because it is an illegal symbol name. */
11274
11275static tree
11276ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11277 tree name ATTRIBUTE_UNUSED,
11278 tree args,
11279 int flags ATTRIBUTE_UNUSED,
11280 bool *no_add_attrs)
11281{
11282 tree arg = TREE_VALUE (args);
11283
11284 if (TREE_CODE (arg) != STRING_CST)
11285 {
11286 error("version attribute is not a string");
11287 *no_add_attrs = true;
11288 return NULL_TREE;
11289 }
11290 return NULL_TREE;
11291}
11292
a31fa2e0
SE
11293/* Target hook for c_mode_for_suffix. */
11294
ef4bddc2 11295static machine_mode
a31fa2e0
SE
11296ia64_c_mode_for_suffix (char suffix)
11297{
11298 if (suffix == 'q')
11299 return TFmode;
11300 if (suffix == 'w')
11301 return XFmode;
11302
11303 return VOIDmode;
11304}
11305
f3a83111
SE
11306static GTY(()) rtx ia64_dconst_0_5_rtx;
11307
11308rtx
11309ia64_dconst_0_5 (void)
11310{
11311 if (! ia64_dconst_0_5_rtx)
11312 {
11313 REAL_VALUE_TYPE rv;
11314 real_from_string (&rv, "0.5");
11315 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11316 }
11317 return ia64_dconst_0_5_rtx;
11318}
11319
11320static GTY(()) rtx ia64_dconst_0_375_rtx;
11321
11322rtx
11323ia64_dconst_0_375 (void)
11324{
11325 if (! ia64_dconst_0_375_rtx)
11326 {
11327 REAL_VALUE_TYPE rv;
11328 real_from_string (&rv, "0.375");
11329 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11330 }
11331 return ia64_dconst_0_375_rtx;
11332}
11333
ef1d3b57 11334static fixed_size_mode
ffa88471
SE
11335ia64_get_reg_raw_mode (int regno)
11336{
11337 if (FR_REGNO_P (regno))
11338 return XFmode;
11339 return default_get_reg_raw_mode(regno);
11340}
f3a83111 11341
d9886a9e
L
11342/* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11343 anymore. */
11344
11345bool
ef4bddc2 11346ia64_member_type_forces_blk (const_tree, machine_mode mode)
d9886a9e
L
11347{
11348 return TARGET_HPUX && mode == TFmode;
11349}
11350
f16d3f39
JH
11351/* Always default to .text section until HP-UX linker is fixed. */
11352
11353ATTRIBUTE_UNUSED static section *
11354ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11355 enum node_frequency freq ATTRIBUTE_UNUSED,
11356 bool startup ATTRIBUTE_UNUSED,
11357 bool exit ATTRIBUTE_UNUSED)
11358{
11359 return NULL;
11360}
e6431744
RH
11361\f
11362/* Construct (set target (vec_select op0 (parallel perm))) and
11363 return true if that's a valid instruction in the active ISA. */
11364
11365static bool
11366expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11367{
11368 rtx rperm[MAX_VECT_LEN], x;
11369 unsigned i;
11370
11371 for (i = 0; i < nelt; ++i)
11372 rperm[i] = GEN_INT (perm[i]);
11373
11374 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11375 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
f7df4a84 11376 x = gen_rtx_SET (target, x);
e6431744 11377
647d790d
DM
11378 rtx_insn *insn = emit_insn (x);
11379 if (recog_memoized (insn) < 0)
e6431744 11380 {
647d790d 11381 remove_insn (insn);
e6431744
RH
11382 return false;
11383 }
11384 return true;
11385}
11386
11387/* Similar, but generate a vec_concat from op0 and op1 as well. */
11388
11389static bool
11390expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11391 const unsigned char *perm, unsigned nelt)
11392{
ef4bddc2 11393 machine_mode v2mode;
e6431744
RH
11394 rtx x;
11395
490d0f6c
RS
11396 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
11397 return false;
e6431744
RH
11398 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11399 return expand_vselect (target, x, perm, nelt);
11400}
11401
11402/* Try to expand a no-op permutation. */
11403
11404static bool
11405expand_vec_perm_identity (struct expand_vec_perm_d *d)
11406{
11407 unsigned i, nelt = d->nelt;
11408
11409 for (i = 0; i < nelt; ++i)
11410 if (d->perm[i] != i)
11411 return false;
11412
11413 if (!d->testing_p)
11414 emit_move_insn (d->target, d->op0);
11415
11416 return true;
11417}
11418
11419/* Try to expand D via a shrp instruction. */
11420
11421static bool
11422expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11423{
11424 unsigned i, nelt = d->nelt, shift, mask;
2d130b31 11425 rtx tmp, hi, lo;
e6431744
RH
11426
11427 /* ??? Don't force V2SFmode into the integer registers. */
11428 if (d->vmode == V2SFmode)
11429 return false;
11430
11431 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11432
11433 shift = d->perm[0];
2d130b31
UB
11434 if (BYTES_BIG_ENDIAN && shift > nelt)
11435 return false;
11436
e6431744
RH
11437 for (i = 1; i < nelt; ++i)
11438 if (d->perm[i] != ((shift + i) & mask))
11439 return false;
11440
11441 if (d->testing_p)
11442 return true;
11443
2d130b31
UB
11444 hi = shift < nelt ? d->op1 : d->op0;
11445 lo = shift < nelt ? d->op0 : d->op1;
11446
11447 shift %= nelt;
11448
e6431744
RH
11449 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11450
11451 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11452 gcc_assert (IN_RANGE (shift, 1, 63));
11453
11454 /* Recall that big-endian elements are numbered starting at the top of
11455 the register. Ideally we'd have a shift-left-pair. But since we
11456 don't, convert to a shift the other direction. */
11457 if (BYTES_BIG_ENDIAN)
11458 shift = 64 - shift;
11459
11460 tmp = gen_reg_rtx (DImode);
2d130b31
UB
11461 hi = gen_lowpart (DImode, hi);
11462 lo = gen_lowpart (DImode, lo);
11463 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
e6431744
RH
11464
11465 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11466 return true;
11467}
11468
11469/* Try to instantiate D in a single instruction. */
11470
11471static bool
11472expand_vec_perm_1 (struct expand_vec_perm_d *d)
11473{
11474 unsigned i, nelt = d->nelt;
11475 unsigned char perm2[MAX_VECT_LEN];
11476
11477 /* Try single-operand selections. */
11478 if (d->one_operand_p)
11479 {
11480 if (expand_vec_perm_identity (d))
11481 return true;
11482 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11483 return true;
11484 }
11485
11486 /* Try two operand selections. */
11487 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11488 return true;
11489
11490 /* Recognize interleave style patterns with reversed operands. */
11491 if (!d->one_operand_p)
11492 {
11493 for (i = 0; i < nelt; ++i)
11494 {
11495 unsigned e = d->perm[i];
11496 if (e >= nelt)
11497 e -= nelt;
11498 else
11499 e += nelt;
11500 perm2[i] = e;
11501 }
11502
11503 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11504 return true;
11505 }
11506
11507 if (expand_vec_perm_shrp (d))
11508 return true;
11509
11510 /* ??? Look for deposit-like permutations where most of the result
11511 comes from one vector unchanged and the rest comes from a
11512 sequential hunk of the other vector. */
11513
11514 return false;
11515}
11516
11517/* Pattern match broadcast permutations. */
11518
11519static bool
11520expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11521{
11522 unsigned i, elt, nelt = d->nelt;
11523 unsigned char perm2[2];
11524 rtx temp;
11525 bool ok;
11526
11527 if (!d->one_operand_p)
11528 return false;
11529
11530 elt = d->perm[0];
11531 for (i = 1; i < nelt; ++i)
11532 if (d->perm[i] != elt)
11533 return false;
11534
11535 switch (d->vmode)
11536 {
4e10a5a7
RS
11537 case E_V2SImode:
11538 case E_V2SFmode:
e6431744
RH
11539 /* Implementable by interleave. */
11540 perm2[0] = elt;
11541 perm2[1] = elt + 2;
11542 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11543 gcc_assert (ok);
11544 break;
11545
4e10a5a7 11546 case E_V8QImode:
e6431744
RH
11547 /* Implementable by extract + broadcast. */
11548 if (BYTES_BIG_ENDIAN)
11549 elt = 7 - elt;
11550 elt *= BITS_PER_UNIT;
11551 temp = gen_reg_rtx (DImode);
11552 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
96fda42c 11553 GEN_INT (8), GEN_INT (elt)));
e6431744
RH
11554 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11555 break;
11556
4e10a5a7 11557 case E_V4HImode:
e6431744
RH
11558 /* Should have been matched directly by vec_select. */
11559 default:
11560 gcc_unreachable ();
11561 }
11562
11563 return true;
11564}
11565
11566/* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11567 two vector permutation into a single vector permutation by using
11568 an interleave operation to merge the vectors. */
11569
11570static bool
11571expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11572{
11573 struct expand_vec_perm_d dremap, dfinal;
11574 unsigned char remap[2 * MAX_VECT_LEN];
11575 unsigned contents, i, nelt, nelt2;
11576 unsigned h0, h1, h2, h3;
dd3d2b35 11577 rtx_insn *seq;
e6431744
RH
11578 bool ok;
11579
11580 if (d->one_operand_p)
11581 return false;
11582
11583 nelt = d->nelt;
11584 nelt2 = nelt / 2;
11585
11586 /* Examine from whence the elements come. */
11587 contents = 0;
11588 for (i = 0; i < nelt; ++i)
11589 contents |= 1u << d->perm[i];
11590
11591 memset (remap, 0xff, sizeof (remap));
11592 dremap = *d;
11593
11594 h0 = (1u << nelt2) - 1;
11595 h1 = h0 << nelt2;
11596 h2 = h0 << nelt;
11597 h3 = h0 << (nelt + nelt2);
11598
11599 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11600 {
11601 for (i = 0; i < nelt; ++i)
11602 {
11603 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11604 remap[which] = i;
11605 dremap.perm[i] = which;
11606 }
11607 }
11608 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11609 {
11610 for (i = 0; i < nelt; ++i)
11611 {
11612 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11613 remap[which] = i;
11614 dremap.perm[i] = which;
11615 }
11616 }
11617 else if ((contents & 0x5555) == contents) /* mix even elements */
11618 {
11619 for (i = 0; i < nelt; ++i)
11620 {
11621 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11622 remap[which] = i;
11623 dremap.perm[i] = which;
11624 }
11625 }
11626 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11627 {
11628 for (i = 0; i < nelt; ++i)
11629 {
11630 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11631 remap[which] = i;
11632 dremap.perm[i] = which;
11633 }
11634 }
11635 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11636 {
11637 unsigned shift = ctz_hwi (contents);
11638 for (i = 0; i < nelt; ++i)
11639 {
11640 unsigned which = (i + shift) & (2 * nelt - 1);
11641 remap[which] = i;
11642 dremap.perm[i] = which;
11643 }
11644 }
11645 else
11646 return false;
11647
11648 /* Use the remapping array set up above to move the elements from their
11649 swizzled locations into their final destinations. */
11650 dfinal = *d;
11651 for (i = 0; i < nelt; ++i)
11652 {
11653 unsigned e = remap[d->perm[i]];
11654 gcc_assert (e < nelt);
11655 dfinal.perm[i] = e;
11656 }
b4b78e2d
EB
11657 if (d->testing_p)
11658 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11659 else
11660 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
e6431744
RH
11661 dfinal.op1 = dfinal.op0;
11662 dfinal.one_operand_p = true;
11663 dremap.target = dfinal.op0;
11664
11665 /* Test if the final remap can be done with a single insn. For V4HImode
11666 this *will* succeed. For V8QImode or V2SImode it may not. */
11667 start_sequence ();
11668 ok = expand_vec_perm_1 (&dfinal);
11669 seq = get_insns ();
11670 end_sequence ();
11671 if (!ok)
11672 return false;
11673 if (d->testing_p)
11674 return true;
11675
11676 ok = expand_vec_perm_1 (&dremap);
11677 gcc_assert (ok);
11678
11679 emit_insn (seq);
11680 return true;
11681}
11682
11683/* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11684 constant permutation via two mux2 and a merge. */
11685
11686static bool
11687expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11688{
11689 unsigned char perm2[4];
11690 rtx rmask[4];
11691 unsigned i;
11692 rtx t0, t1, mask, x;
11693 bool ok;
11694
11695 if (d->vmode != V4HImode || d->one_operand_p)
11696 return false;
11697 if (d->testing_p)
11698 return true;
11699
11700 for (i = 0; i < 4; ++i)
11701 {
11702 perm2[i] = d->perm[i] & 3;
11703 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11704 }
11705 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11706 mask = force_reg (V4HImode, mask);
11707
11708 t0 = gen_reg_rtx (V4HImode);
11709 t1 = gen_reg_rtx (V4HImode);
11710
11711 ok = expand_vselect (t0, d->op0, perm2, 4);
11712 gcc_assert (ok);
11713 ok = expand_vselect (t1, d->op1, perm2, 4);
11714 gcc_assert (ok);
11715
11716 x = gen_rtx_AND (V4HImode, mask, t0);
f7df4a84 11717 emit_insn (gen_rtx_SET (t0, x));
e6431744
RH
11718
11719 x = gen_rtx_NOT (V4HImode, mask);
11720 x = gen_rtx_AND (V4HImode, x, t1);
f7df4a84 11721 emit_insn (gen_rtx_SET (t1, x));
e6431744
RH
11722
11723 x = gen_rtx_IOR (V4HImode, t0, t1);
f7df4a84 11724 emit_insn (gen_rtx_SET (d->target, x));
e6431744
RH
11725
11726 return true;
11727}
11728
11729/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11730 With all of the interface bits taken care of, perform the expansion
11731 in D and return true on success. */
11732
11733static bool
11734ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11735{
11736 if (expand_vec_perm_1 (d))
11737 return true;
11738 if (expand_vec_perm_broadcast (d))
11739 return true;
11740 if (expand_vec_perm_interleave_2 (d))
11741 return true;
11742 if (expand_vec_perm_v4hi_5 (d))
11743 return true;
11744 return false;
11745}
11746
f151c9e1
RS
11747/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
11748
11749static bool
11750ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
11751 rtx op1, const vec_perm_indices &sel)
e6431744
RH
11752{
11753 struct expand_vec_perm_d d;
11754 unsigned char perm[MAX_VECT_LEN];
f151c9e1 11755 unsigned int i, nelt, which;
e6431744 11756
f151c9e1
RS
11757 d.target = target;
11758 d.op0 = op0;
11759 d.op1 = op1;
e6431744 11760
f151c9e1 11761 d.vmode = vmode;
e6431744
RH
11762 gcc_assert (VECTOR_MODE_P (d.vmode));
11763 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
f151c9e1 11764 d.testing_p = !target;
e6431744 11765
f151c9e1 11766 gcc_assert (sel.length () == nelt);
e6431744
RH
11767 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11768
11769 for (i = which = 0; i < nelt; ++i)
11770 {
f151c9e1 11771 unsigned int ei = sel[i] & (2 * nelt - 1);
e6431744
RH
11772
11773 which |= (ei < nelt ? 1 : 2);
11774 d.perm[i] = ei;
11775 perm[i] = ei;
11776 }
11777
11778 switch (which)
11779 {
11780 default:
11781 gcc_unreachable();
11782
11783 case 3:
f151c9e1 11784 if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
e6431744
RH
11785 {
11786 d.one_operand_p = false;
11787 break;
11788 }
11789
11790 /* The elements of PERM do not suggest that only the first operand
11791 is used, but both operands are identical. Allow easier matching
11792 of the permutation by folding the permutation into the single
11793 input vector. */
11794 for (i = 0; i < nelt; ++i)
11795 if (d.perm[i] >= nelt)
11796 d.perm[i] -= nelt;
11797 /* FALLTHRU */
11798
11799 case 1:
11800 d.op1 = d.op0;
11801 d.one_operand_p = true;
11802 break;
11803
11804 case 2:
11805 for (i = 0; i < nelt; ++i)
11806 d.perm[i] -= nelt;
11807 d.op0 = d.op1;
11808 d.one_operand_p = true;
11809 break;
11810 }
11811
f151c9e1
RS
11812 if (d.testing_p)
11813 {
11814 /* We have to go through the motions and see if we can
11815 figure out how to generate the requested permutation. */
11816 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11817 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11818 if (!d.one_operand_p)
11819 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11820
11821 start_sequence ();
11822 bool ret = ia64_expand_vec_perm_const_1 (&d);
11823 end_sequence ();
11824
11825 return ret;
11826 }
11827
e6431744
RH
11828 if (ia64_expand_vec_perm_const_1 (&d))
11829 return true;
11830
11831 /* If the mask says both arguments are needed, but they are the same,
11832 the above tried to expand with one_operand_p true. If that didn't
11833 work, retry with one_operand_p false, as that's what we used in _ok. */
11834 if (which == 3 && d.one_operand_p)
11835 {
11836 memcpy (d.perm, perm, sizeof (perm));
11837 d.one_operand_p = false;
11838 return ia64_expand_vec_perm_const_1 (&d);
11839 }
11840
11841 return false;
11842}
11843
e6431744
RH
11844void
11845ia64_expand_vec_setv2sf (rtx operands[3])
11846{
11847 struct expand_vec_perm_d d;
11848 unsigned int which;
11849 bool ok;
11850
11851 d.target = operands[0];
11852 d.op0 = operands[0];
11853 d.op1 = gen_reg_rtx (V2SFmode);
11854 d.vmode = V2SFmode;
11855 d.nelt = 2;
11856 d.one_operand_p = false;
11857 d.testing_p = false;
11858
11859 which = INTVAL (operands[2]);
11860 gcc_assert (which <= 1);
11861 d.perm[0] = 1 - which;
11862 d.perm[1] = which + 2;
11863
11864 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11865
11866 ok = ia64_expand_vec_perm_const_1 (&d);
11867 gcc_assert (ok);
11868}
11869
11870void
11871ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11872{
11873 struct expand_vec_perm_d d;
ef4bddc2 11874 machine_mode vmode = GET_MODE (target);
e6431744
RH
11875 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11876 bool ok;
11877
11878 d.target = target;
11879 d.op0 = op0;
11880 d.op1 = op1;
11881 d.vmode = vmode;
11882 d.nelt = nelt;
11883 d.one_operand_p = false;
11884 d.testing_p = false;
11885
11886 for (i = 0; i < nelt; ++i)
11887 d.perm[i] = i * 2 + odd;
11888
11889 ok = ia64_expand_vec_perm_const_1 (&d);
11890 gcc_assert (ok);
11891}
f16d3f39 11892
0d803030
RS
11893/* Implement TARGET_CAN_CHANGE_MODE_CLASS.
11894
11895 In BR regs, we can't change the DImode at all.
11896 In FP regs, we can't change FP values to integer values and vice versa,
11897 but we can change e.g. DImode to SImode, and V2SFmode into DImode. */
11898
11899static bool
11900ia64_can_change_mode_class (machine_mode from, machine_mode to,
11901 reg_class_t rclass)
11902{
11903 if (reg_classes_intersect_p (rclass, BR_REGS))
11904 return from == to;
11905 if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to))
11906 return !reg_classes_intersect_p (rclass, FR_REGS);
11907 return true;
11908}
11909
e2500fed 11910#include "gt-ia64.h"