]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/ia64.c
Add gen_(const_)vec_duplicate helpers
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
cbe34bb5 2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
c65ebc55 3 Contributed by James E. Wilson <wilson@cygnus.com> and
9c808aad 4 David Mosberger <davidm@hpl.hp.com>.
c65ebc55 5
3bed2930 6This file is part of GCC.
c65ebc55 7
3bed2930 8GCC is free software; you can redistribute it and/or modify
c65ebc55 9it under the terms of the GNU General Public License as published by
2f83c7d6 10the Free Software Foundation; either version 3, or (at your option)
c65ebc55
JW
11any later version.
12
3bed2930 13GCC is distributed in the hope that it will be useful,
c65ebc55
JW
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
2f83c7d6
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
c65ebc55 21
c65ebc55 22#include "config.h"
ed9ccd8a 23#include "system.h"
4977bab6 24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e11c4407 26#include "target.h"
c65ebc55 27#include "rtl.h"
e11c4407 28#include "tree.h"
e73cf9a2 29#include "memmodel.h"
e11c4407 30#include "cfghooks.h"
c7131fb2 31#include "df.h"
e11c4407
AM
32#include "tm_p.h"
33#include "stringpool.h"
314e6352 34#include "attribs.h"
e11c4407
AM
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "diagnostic-core.h"
40e23961 40#include "alias.h"
40e23961 41#include "fold-const.h"
d8a2d370
DN
42#include "stor-layout.h"
43#include "calls.h"
44#include "varasm.h"
c65ebc55
JW
45#include "output.h"
46#include "insn-attr.h"
47#include "flags.h"
36566b39 48#include "explow.h"
c65ebc55 49#include "expr.h"
60393bbc 50#include "cfgrtl.h"
f2972bf8 51#include "libfuncs.h"
2130b7fb 52#include "sched-int.h"
7b84aac0 53#include "common/common-target.h"
08744705 54#include "langhooks.h"
45b0be94 55#include "gimplify.h"
4de67c26 56#include "intl.h"
658f32fd 57#include "debug.h"
bb83aa4b 58#include "params.h"
6fb5fa3c 59#include "dbgcnt.h"
13f70342 60#include "tm-constrs.h"
388092d5 61#include "sel-sched.h"
69e18c09 62#include "reload.h"
96e45421 63#include "opts.h"
7ee2468b 64#include "dumpfile.h"
9b2b7279 65#include "builtins.h"
c65ebc55 66
994c5d85 67/* This file should be included last. */
d58627a0
RS
68#include "target-def.h"
69
c65ebc55
JW
70/* This is used for communication between ASM_OUTPUT_LABEL and
71 ASM_OUTPUT_LABELREF. */
72int ia64_asm_output_label = 0;
73
c65ebc55 74/* Register names for ia64_expand_prologue. */
3b572406 75static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
76{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
77 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
78 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
79 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
80 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
81 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
82 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
83 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
84 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
85 "r104","r105","r106","r107","r108","r109","r110","r111",
86 "r112","r113","r114","r115","r116","r117","r118","r119",
87 "r120","r121","r122","r123","r124","r125","r126","r127"};
88
89/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 90static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
91{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
92
93/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 94static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
95{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
96 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
97 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
98 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
99 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
100 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
101 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
102 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
103 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
104 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
105
106/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 107static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
108{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
109
c65ebc55
JW
110/* Variables which are this size or smaller are put in the sdata/sbss
111 sections. */
112
3b572406 113unsigned int ia64_section_threshold;
30028c85
VM
114
115/* The following variable is used by the DFA insn scheduler. The value is
116 TRUE if we do insn bundling instead of insn scheduling. */
117int bundling_p = 0;
118
6fb5fa3c
DB
119enum ia64_frame_regs
120{
121 reg_fp,
122 reg_save_b0,
123 reg_save_pr,
124 reg_save_ar_pfs,
125 reg_save_ar_unat,
126 reg_save_ar_lc,
127 reg_save_gp,
128 number_of_ia64_frame_regs
129};
130
599aedd9
RH
131/* Structure to be filled in by ia64_compute_frame_size with register
132 save masks and offsets for the current function. */
133
134struct ia64_frame_info
135{
136 HOST_WIDE_INT total_size; /* size of the stack frame, not including
137 the caller's scratch area. */
138 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
139 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
140 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
141 HARD_REG_SET mask; /* mask of saved registers. */
9c808aad 142 unsigned int gr_used_mask; /* mask of registers in use as gr spill
599aedd9
RH
143 registers or long-term scratches. */
144 int n_spilled; /* number of spilled registers. */
6fb5fa3c 145 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
599aedd9
RH
146 int n_input_regs; /* number of input registers used. */
147 int n_local_regs; /* number of local registers used. */
148 int n_output_regs; /* number of output registers used. */
149 int n_rotate_regs; /* number of rotating registers used. */
150
151 char need_regstk; /* true if a .regstk directive needed. */
152 char initialized; /* true if the data is finalized. */
153};
154
155/* Current frame information calculated by ia64_compute_frame_size. */
156static struct ia64_frame_info current_frame_info;
6fb5fa3c
DB
157/* The actual registers that are emitted. */
158static int emitted_frame_related_regs[number_of_ia64_frame_regs];
3b572406 159\f
9c808aad 160static int ia64_first_cycle_multipass_dfa_lookahead (void);
ce1ce33a 161static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
9c808aad
AJ
162static void ia64_init_dfa_pre_cycle_insn (void);
163static rtx ia64_dfa_pre_cycle_insn (void);
ac44248e
DM
164static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
165static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
048d0d36 166static void ia64_h_i_d_extended (void);
388092d5
AB
167static void * ia64_alloc_sched_context (void);
168static void ia64_init_sched_context (void *, bool);
169static void ia64_set_sched_context (void *);
170static void ia64_clear_sched_context (void *);
171static void ia64_free_sched_context (void *);
ef4bddc2 172static int ia64_mode_to_int (machine_mode);
048d0d36 173static void ia64_set_sched_flags (spec_info_t);
ac44248e
DM
174static ds_t ia64_get_insn_spec_ds (rtx_insn *);
175static ds_t ia64_get_insn_checked_ds (rtx_insn *);
388092d5 176static bool ia64_skip_rtx_p (const_rtx);
ac44248e 177static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
8e90de43 178static bool ia64_needs_block_p (ds_t);
ac44248e 179static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
048d0d36
MK
180static int ia64_spec_check_p (rtx);
181static int ia64_spec_check_src_p (rtx);
9c808aad
AJ
182static rtx gen_tls_get_addr (void);
183static rtx gen_thread_pointer (void);
6fb5fa3c 184static int find_gr_spill (enum ia64_frame_regs, int);
9c808aad
AJ
185static int next_scratch_gr_reg (void);
186static void mark_reg_gr_used_mask (rtx, void *);
187static void ia64_compute_frame_size (HOST_WIDE_INT);
188static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
189static void finish_spill_pointers (void);
190static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
191static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
192static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
193static rtx gen_movdi_x (rtx, rtx, rtx);
194static rtx gen_fr_spill_x (rtx, rtx, rtx);
195static rtx gen_fr_restore_x (rtx, rtx, rtx);
196
930572b9 197static void ia64_option_override (void);
7b5cbb57 198static bool ia64_can_eliminate (const int, const int);
ef4bddc2
RS
199static machine_mode hfa_element_mode (const_tree, bool);
200static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
351a758b 201 tree, int *, int);
ef4bddc2 202static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
78a52f11 203 tree, bool);
ef4bddc2 204static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
ffa88471 205 const_tree, bool, bool);
ef4bddc2 206static rtx ia64_function_arg (cumulative_args_t, machine_mode,
ffa88471 207 const_tree, bool);
d5cc9181 208static rtx ia64_function_incoming_arg (cumulative_args_t,
ef4bddc2
RS
209 machine_mode, const_tree, bool);
210static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
ffa88471 211 const_tree, bool);
76b0cbf8 212static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
ef4bddc2 213static unsigned int ia64_function_arg_boundary (machine_mode,
c2ed6cf8 214 const_tree);
9c808aad 215static bool ia64_function_ok_for_sibcall (tree, tree);
586de218 216static bool ia64_return_in_memory (const_tree, const_tree);
ba90d838 217static rtx ia64_function_value (const_tree, const_tree, bool);
ef4bddc2 218static rtx ia64_libcall_value (machine_mode, const_rtx);
ba90d838 219static bool ia64_function_value_regno_p (const unsigned int);
ef4bddc2 220static int ia64_register_move_cost (machine_mode, reg_class_t,
c21fc181 221 reg_class_t);
ef4bddc2 222static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
69e18c09 223 bool);
e548c9df 224static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
215b063c 225static int ia64_unspec_may_trap_p (const_rtx, unsigned);
9c808aad
AJ
226static void fix_range (const char *);
227static struct machine_function * ia64_init_machine_status (void);
228static void emit_insn_group_barriers (FILE *);
229static void emit_all_insn_group_barriers (FILE *);
230static void final_emit_insn_group_barriers (FILE *);
231static void emit_predicate_relation_info (void);
232static void ia64_reorg (void);
3101faab 233static bool ia64_in_small_data_p (const_tree);
658f32fd 234static void process_epilogue (FILE *, rtx, bool, bool);
9c808aad 235
9c808aad 236static bool ia64_assemble_integer (rtx, unsigned int, int);
42776416
RS
237static void ia64_output_function_prologue (FILE *);
238static void ia64_output_function_epilogue (FILE *);
9c808aad
AJ
239static void ia64_output_function_end_prologue (FILE *);
240
5e50b799 241static void ia64_print_operand (FILE *, rtx, int);
cc8ca59e 242static void ia64_print_operand_address (FILE *, machine_mode, rtx);
5e50b799
AS
243static bool ia64_print_operand_punct_valid_p (unsigned char code);
244
9c808aad 245static int ia64_issue_rate (void);
b505225b 246static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
9c808aad 247static void ia64_sched_init (FILE *, int, int);
048d0d36
MK
248static void ia64_sched_init_global (FILE *, int, int);
249static void ia64_sched_finish_global (FILE *, int);
9c808aad 250static void ia64_sched_finish (FILE *, int);
ce1ce33a
DM
251static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
252static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
253static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
ac44248e 254static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
9c808aad 255
ac44248e 256static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
a68b5e52
RH
257static void ia64_asm_emit_except_personality (rtx);
258static void ia64_asm_init_sections (void);
259
f0a0390e 260static enum unwind_info_type ia64_debug_unwind_info (void);
f0a0390e 261
9c808aad
AJ
262static struct bundle_state *get_free_bundle_state (void);
263static void free_bundle_state (struct bundle_state *);
264static void initiate_bundle_states (void);
265static void finish_bundle_states (void);
9c808aad
AJ
266static int insert_bundle_state (struct bundle_state *);
267static void initiate_bundle_state_table (void);
268static void finish_bundle_state_table (void);
269static int try_issue_nops (struct bundle_state *, int);
270static int try_issue_insn (struct bundle_state *, rtx);
b32d5189
DM
271static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
272 int, int);
9c808aad
AJ
273static int get_max_pos (state_t);
274static int get_template (state_t, int);
275
b32d5189 276static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
647d790d
DM
277static bool important_for_bundling_p (rtx_insn *);
278static bool unknown_for_bundling_p (rtx_insn *);
b32d5189 279static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
9c808aad
AJ
280
281static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
282 HOST_WIDE_INT, tree);
283static void ia64_file_start (void);
812b587e 284static void ia64_globalize_decl_name (FILE *, tree);
9c808aad 285
9b580a0b
RH
286static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
287static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
ef4bddc2 288static section *ia64_select_rtx_section (machine_mode, rtx,
d6b5193b 289 unsigned HOST_WIDE_INT);
fdbe66f2
EB
290static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
291 ATTRIBUTE_UNUSED;
abb8b19a 292static unsigned int ia64_section_type_flags (tree, const char *, int);
1f7aa7cd
SE
293static void ia64_init_libfuncs (void)
294 ATTRIBUTE_UNUSED;
c15c90bb
ZW
295static void ia64_hpux_init_libfuncs (void)
296 ATTRIBUTE_UNUSED;
6bc709c1
L
297static void ia64_sysv4_init_libfuncs (void)
298 ATTRIBUTE_UNUSED;
738e7b39
RK
299static void ia64_vms_init_libfuncs (void)
300 ATTRIBUTE_UNUSED;
c252db20
L
301static void ia64_soft_fp_init_libfuncs (void)
302 ATTRIBUTE_UNUSED;
095a2d76 303static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
f2972bf8 304 ATTRIBUTE_UNUSED;
30ed9d3d
TG
305static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
306 ATTRIBUTE_UNUSED;
a5fe455b 307
d8def3cf 308static bool ia64_attribute_takes_identifier_p (const_tree);
a32767e4 309static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
812b587e 310static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
a32767e4 311static void ia64_encode_section_info (tree, rtx, int);
351a758b 312static rtx ia64_struct_value_rtx (tree, int);
726a989a 313static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
18e2a8b8 314static bool ia64_scalar_mode_supported_p (scalar_mode mode);
ef4bddc2 315static bool ia64_vector_mode_supported_p (machine_mode mode);
ef4bddc2
RS
316static bool ia64_legitimate_constant_p (machine_mode, rtx);
317static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
318static bool ia64_cannot_force_const_mem (machine_mode, rtx);
3101faab
KG
319static const char *ia64_mangle_type (const_tree);
320static const char *ia64_invalid_conversion (const_tree, const_tree);
321static const char *ia64_invalid_unary_op (int, const_tree);
322static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
ef4bddc2 323static machine_mode ia64_c_mode_for_suffix (char);
2a1211e5 324static void ia64_trampoline_init (rtx, tree, rtx);
2b7e2984 325static void ia64_override_options_after_change (void);
ef4bddc2 326static bool ia64_member_type_forces_blk (const_tree, machine_mode);
5c255b57 327
b6ca982f 328static tree ia64_fold_builtin (tree, int, tree *, bool);
b14446e2 329static tree ia64_builtin_decl (unsigned, bool);
ab177ad5
AS
330
331static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
ef4bddc2 332static machine_mode ia64_get_reg_raw_mode (int regno);
f16d3f39
JH
333static section * ia64_hpux_function_section (tree, enum node_frequency,
334 bool, bool);
e6431744 335
579f3687 336static bool ia64_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
e6431744 337
c43f4279 338static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
f939c3e6 339static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
99e1629f 340static bool ia64_modes_tieable_p (machine_mode, machine_mode);
0d803030
RS
341static bool ia64_can_change_mode_class (machine_mode, machine_mode,
342 reg_class_t);
f939c3e6 343
e6431744
RH
344#define MAX_VECT_LEN 8
345
346struct expand_vec_perm_d
347{
348 rtx target, op0, op1;
349 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 350 machine_mode vmode;
e6431744
RH
351 unsigned char nelt;
352 bool one_operand_p;
353 bool testing_p;
354};
355
356static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
357
672a6f42 358\f
e6542f4e
RH
359/* Table of valid machine attributes. */
360static const struct attribute_spec ia64_attribute_table[] =
361{
62d784f7
KT
362 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
363 affects_type_identity } */
364 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
365 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
366 false },
30ed9d3d 367#if TARGET_ABI_OPEN_VMS
62d784f7
KT
368 { "common_object", 1, 1, true, false, false,
369 ia64_vms_common_object_attribute, false },
30ed9d3d 370#endif
812b587e 371 { "version_id", 1, 1, true, false, false,
62d784f7
KT
372 ia64_handle_version_id_attribute, false },
373 { NULL, 0, 0, false, false, false, NULL, false }
e6542f4e
RH
374};
375
672a6f42 376/* Initialize the GCC target structure. */
91d231cb
JM
377#undef TARGET_ATTRIBUTE_TABLE
378#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 379
f6155fda
SS
380#undef TARGET_INIT_BUILTINS
381#define TARGET_INIT_BUILTINS ia64_init_builtins
382
b6ca982f
UB
383#undef TARGET_FOLD_BUILTIN
384#define TARGET_FOLD_BUILTIN ia64_fold_builtin
385
f6155fda
SS
386#undef TARGET_EXPAND_BUILTIN
387#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
388
b14446e2
SE
389#undef TARGET_BUILTIN_DECL
390#define TARGET_BUILTIN_DECL ia64_builtin_decl
391
301d03af
RS
392#undef TARGET_ASM_BYTE_OP
393#define TARGET_ASM_BYTE_OP "\tdata1\t"
394#undef TARGET_ASM_ALIGNED_HI_OP
395#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
396#undef TARGET_ASM_ALIGNED_SI_OP
397#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
398#undef TARGET_ASM_ALIGNED_DI_OP
399#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
400#undef TARGET_ASM_UNALIGNED_HI_OP
401#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
402#undef TARGET_ASM_UNALIGNED_SI_OP
403#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
404#undef TARGET_ASM_UNALIGNED_DI_OP
405#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
406#undef TARGET_ASM_INTEGER
407#define TARGET_ASM_INTEGER ia64_assemble_integer
408
930572b9
AS
409#undef TARGET_OPTION_OVERRIDE
410#define TARGET_OPTION_OVERRIDE ia64_option_override
411
08c148a8
NB
412#undef TARGET_ASM_FUNCTION_PROLOGUE
413#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
414#undef TARGET_ASM_FUNCTION_END_PROLOGUE
415#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
416#undef TARGET_ASM_FUNCTION_EPILOGUE
417#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
418
5e50b799
AS
419#undef TARGET_PRINT_OPERAND
420#define TARGET_PRINT_OPERAND ia64_print_operand
421#undef TARGET_PRINT_OPERAND_ADDRESS
422#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
423#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
424#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
425
ae46c4e0
RH
426#undef TARGET_IN_SMALL_DATA_P
427#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
428
b505225b
TS
429#undef TARGET_SCHED_ADJUST_COST
430#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
c237e94a
ZW
431#undef TARGET_SCHED_ISSUE_RATE
432#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
433#undef TARGET_SCHED_VARIABLE_ISSUE
434#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
435#undef TARGET_SCHED_INIT
436#define TARGET_SCHED_INIT ia64_sched_init
437#undef TARGET_SCHED_FINISH
438#define TARGET_SCHED_FINISH ia64_sched_finish
048d0d36
MK
439#undef TARGET_SCHED_INIT_GLOBAL
440#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
441#undef TARGET_SCHED_FINISH_GLOBAL
442#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
c237e94a
ZW
443#undef TARGET_SCHED_REORDER
444#define TARGET_SCHED_REORDER ia64_sched_reorder
445#undef TARGET_SCHED_REORDER2
446#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 447
30028c85
VM
448#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
449#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
450
30028c85
VM
451#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
452#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
453
454#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
455#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
456#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
457#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
458
459#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
460#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
461 ia64_first_cycle_multipass_dfa_lookahead_guard
462
463#undef TARGET_SCHED_DFA_NEW_CYCLE
464#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
465
048d0d36
MK
466#undef TARGET_SCHED_H_I_D_EXTENDED
467#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
468
388092d5
AB
469#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
470#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
471
472#undef TARGET_SCHED_INIT_SCHED_CONTEXT
473#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
474
475#undef TARGET_SCHED_SET_SCHED_CONTEXT
476#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
477
478#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
479#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
480
481#undef TARGET_SCHED_FREE_SCHED_CONTEXT
482#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
483
048d0d36
MK
484#undef TARGET_SCHED_SET_SCHED_FLAGS
485#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
486
388092d5
AB
487#undef TARGET_SCHED_GET_INSN_SPEC_DS
488#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
489
490#undef TARGET_SCHED_GET_INSN_CHECKED_DS
491#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
492
048d0d36
MK
493#undef TARGET_SCHED_SPECULATE_INSN
494#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
495
496#undef TARGET_SCHED_NEEDS_BLOCK_P
497#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
498
e855c69d 499#undef TARGET_SCHED_GEN_SPEC_CHECK
388092d5 500#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
048d0d36 501
388092d5
AB
502#undef TARGET_SCHED_SKIP_RTX_P
503#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
504
599aedd9
RH
505#undef TARGET_FUNCTION_OK_FOR_SIBCALL
506#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
78a52f11
RH
507#undef TARGET_ARG_PARTIAL_BYTES
508#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
ffa88471
SE
509#undef TARGET_FUNCTION_ARG
510#define TARGET_FUNCTION_ARG ia64_function_arg
511#undef TARGET_FUNCTION_INCOMING_ARG
512#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
513#undef TARGET_FUNCTION_ARG_ADVANCE
514#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
76b0cbf8
RS
515#undef TARGET_FUNCTION_ARG_PADDING
516#define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
c2ed6cf8
NF
517#undef TARGET_FUNCTION_ARG_BOUNDARY
518#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
599aedd9 519
c590b625
RH
520#undef TARGET_ASM_OUTPUT_MI_THUNK
521#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 522#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
3101faab 523#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
c590b625 524
1bc7c5b6
ZW
525#undef TARGET_ASM_FILE_START
526#define TARGET_ASM_FILE_START ia64_file_start
527
812b587e
SE
528#undef TARGET_ASM_GLOBALIZE_DECL_NAME
529#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
530
de8f4b07
AS
531#undef TARGET_REGISTER_MOVE_COST
532#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
69e18c09
AS
533#undef TARGET_MEMORY_MOVE_COST
534#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
3c50106f
RH
535#undef TARGET_RTX_COSTS
536#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67 537#undef TARGET_ADDRESS_COST
b413068c 538#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3c50106f 539
215b063c
PB
540#undef TARGET_UNSPEC_MAY_TRAP_P
541#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
542
18dbd950
RS
543#undef TARGET_MACHINE_DEPENDENT_REORG
544#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
545
a32767e4
DM
546#undef TARGET_ENCODE_SECTION_INFO
547#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
548
abb8b19a
AM
549#undef TARGET_SECTION_TYPE_FLAGS
550#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
551
fdbe66f2
EB
552#ifdef HAVE_AS_TLS
553#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
554#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
555#endif
556
351a758b
KH
557/* ??? Investigate. */
558#if 0
559#undef TARGET_PROMOTE_PROTOTYPES
560#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
561#endif
562
ba90d838
AS
563#undef TARGET_FUNCTION_VALUE
564#define TARGET_FUNCTION_VALUE ia64_function_value
565#undef TARGET_LIBCALL_VALUE
566#define TARGET_LIBCALL_VALUE ia64_libcall_value
567#undef TARGET_FUNCTION_VALUE_REGNO_P
568#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
569
351a758b
KH
570#undef TARGET_STRUCT_VALUE_RTX
571#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
572#undef TARGET_RETURN_IN_MEMORY
573#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
351a758b
KH
574#undef TARGET_SETUP_INCOMING_VARARGS
575#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
576#undef TARGET_STRICT_ARGUMENT_NAMING
577#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
fe984136
RH
578#undef TARGET_MUST_PASS_IN_STACK
579#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
ffa88471
SE
580#undef TARGET_GET_RAW_RESULT_MODE
581#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
582#undef TARGET_GET_RAW_ARG_MODE
583#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
351a758b 584
d9886a9e
L
585#undef TARGET_MEMBER_TYPE_FORCES_BLK
586#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
587
cd3ce9b4
JM
588#undef TARGET_GIMPLIFY_VA_ARG_EXPR
589#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
590
38f8b050 591#undef TARGET_ASM_UNWIND_EMIT
a68b5e52
RH
592#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
593#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
594#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
595#undef TARGET_ASM_INIT_SECTIONS
596#define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
951120ea 597
f0a0390e
RH
598#undef TARGET_DEBUG_UNWIND_INFO
599#define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
f0a0390e 600
88ed5ef5
SE
601#undef TARGET_SCALAR_MODE_SUPPORTED_P
602#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
f61134e8
RH
603#undef TARGET_VECTOR_MODE_SUPPORTED_P
604#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
88ed5ef5 605
1a627b35
RS
606#undef TARGET_LEGITIMATE_CONSTANT_P
607#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
903a9601
AS
608#undef TARGET_LEGITIMATE_ADDRESS_P
609#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
1a627b35 610
d81db636
SB
611#undef TARGET_LRA_P
612#define TARGET_LRA_P hook_bool_void_false
613
5e6c8b64
RH
614#undef TARGET_CANNOT_FORCE_CONST_MEM
615#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
616
608063c3
JB
617#undef TARGET_MANGLE_TYPE
618#define TARGET_MANGLE_TYPE ia64_mangle_type
cac24f06 619
4de67c26
JM
620#undef TARGET_INVALID_CONVERSION
621#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
622#undef TARGET_INVALID_UNARY_OP
623#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
624#undef TARGET_INVALID_BINARY_OP
625#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
626
a31fa2e0
SE
627#undef TARGET_C_MODE_FOR_SUFFIX
628#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
629
7b5cbb57
AS
630#undef TARGET_CAN_ELIMINATE
631#define TARGET_CAN_ELIMINATE ia64_can_eliminate
632
2a1211e5
RH
633#undef TARGET_TRAMPOLINE_INIT
634#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
635
1d0216c8
RS
636#undef TARGET_CAN_USE_DOLOOP_P
637#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
810d71d0 638#undef TARGET_INVALID_WITHIN_DOLOOP
ac44248e 639#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
810d71d0 640
2b7e2984
SE
641#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
642#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
643
ab177ad5
AS
644#undef TARGET_PREFERRED_RELOAD_CLASS
645#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
646
2ba42841
AO
647#undef TARGET_DELAY_SCHED2
648#define TARGET_DELAY_SCHED2 true
649
650/* Variable tracking should be run after all optimizations which
651 change order of insns. It also needs a valid CFG. */
652#undef TARGET_DELAY_VARTRACK
653#define TARGET_DELAY_VARTRACK true
654
e6431744
RH
655#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
657
d8def3cf
JJ
658#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
659#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
660
86f98087
EB
661#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
662#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
663
c43f4279
RS
664#undef TARGET_HARD_REGNO_NREGS
665#define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
f939c3e6
RS
666#undef TARGET_HARD_REGNO_MODE_OK
667#define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
668
99e1629f
RS
669#undef TARGET_MODES_TIEABLE_P
670#define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
671
0d803030
RS
672#undef TARGET_CAN_CHANGE_MODE_CLASS
673#define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
674
58e17cf8
RS
675#undef TARGET_CONSTANT_ALIGNMENT
676#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
677
f6897b10 678struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 679\f
d8def3cf
JJ
680/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
681 identifier as an argument, so the front end shouldn't look it up. */
682
683static bool
684ia64_attribute_takes_identifier_p (const_tree attr_id)
685{
686 if (is_attribute_p ("model", attr_id))
687 return true;
688#if TARGET_ABI_OPEN_VMS
689 if (is_attribute_p ("common_object", attr_id))
690 return true;
691#endif
692 return false;
693}
694
a32767e4
DM
695typedef enum
696 {
697 ADDR_AREA_NORMAL, /* normal address area */
698 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
699 }
700ia64_addr_area;
701
702static GTY(()) tree small_ident1;
703static GTY(()) tree small_ident2;
704
705static void
706init_idents (void)
707{
708 if (small_ident1 == 0)
709 {
710 small_ident1 = get_identifier ("small");
711 small_ident2 = get_identifier ("__small__");
712 }
713}
714
715/* Retrieve the address area that has been chosen for the given decl. */
716
717static ia64_addr_area
718ia64_get_addr_area (tree decl)
719{
720 tree model_attr;
721
722 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
723 if (model_attr)
724 {
725 tree id;
726
727 init_idents ();
728 id = TREE_VALUE (TREE_VALUE (model_attr));
729 if (id == small_ident1 || id == small_ident2)
730 return ADDR_AREA_SMALL;
731 }
732 return ADDR_AREA_NORMAL;
733}
734
735static tree
f61134e8
RH
736ia64_handle_model_attribute (tree *node, tree name, tree args,
737 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
a32767e4
DM
738{
739 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
740 ia64_addr_area area;
741 tree arg, decl = *node;
742
743 init_idents ();
744 arg = TREE_VALUE (args);
745 if (arg == small_ident1 || arg == small_ident2)
746 {
747 addr_area = ADDR_AREA_SMALL;
748 }
749 else
750 {
29d08eba
JM
751 warning (OPT_Wattributes, "invalid argument of %qE attribute",
752 name);
a32767e4
DM
753 *no_add_attrs = true;
754 }
755
756 switch (TREE_CODE (decl))
757 {
758 case VAR_DECL:
759 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
760 == FUNCTION_DECL)
761 && !TREE_STATIC (decl))
762 {
c5d75364
MLI
763 error_at (DECL_SOURCE_LOCATION (decl),
764 "an address area attribute cannot be specified for "
765 "local variables");
a32767e4
DM
766 *no_add_attrs = true;
767 }
768 area = ia64_get_addr_area (decl);
769 if (area != ADDR_AREA_NORMAL && addr_area != area)
770 {
dee15844
JM
771 error ("address area of %q+D conflicts with previous "
772 "declaration", decl);
a32767e4
DM
773 *no_add_attrs = true;
774 }
775 break;
776
777 case FUNCTION_DECL:
c5d75364 778 error_at (DECL_SOURCE_LOCATION (decl),
d575725b
L
779 "address area attribute cannot be specified for "
780 "functions");
a32767e4
DM
781 *no_add_attrs = true;
782 break;
783
784 default:
29d08eba
JM
785 warning (OPT_Wattributes, "%qE attribute ignored",
786 name);
a32767e4
DM
787 *no_add_attrs = true;
788 break;
789 }
790
791 return NULL_TREE;
792}
793
30ed9d3d
TG
794/* Part of the low level implementation of DEC Ada pragma Common_Object which
795 enables the shared use of variables stored in overlaid linker areas
796 corresponding to the use of Fortran COMMON. */
797
798static tree
799ia64_vms_common_object_attribute (tree *node, tree name, tree args,
800 int flags ATTRIBUTE_UNUSED,
801 bool *no_add_attrs)
802{
803 tree decl = *node;
fe5798c0
TG
804 tree id;
805
806 gcc_assert (DECL_P (decl));
30ed9d3d
TG
807
808 DECL_COMMON (decl) = 1;
809 id = TREE_VALUE (args);
fe5798c0 810 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
30ed9d3d 811 {
fe5798c0 812 error ("%qE attribute requires a string constant argument", name);
30ed9d3d
TG
813 *no_add_attrs = true;
814 return NULL_TREE;
815 }
30ed9d3d
TG
816 return NULL_TREE;
817}
818
819/* Part of the low level implementation of DEC Ada pragma Common_Object. */
820
821void
822ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
823 unsigned HOST_WIDE_INT size,
824 unsigned int align)
825{
826 tree attr = DECL_ATTRIBUTES (decl);
827
fe5798c0 828 if (attr)
30ed9d3d 829 attr = lookup_attribute ("common_object", attr);
fe5798c0 830 if (attr)
30ed9d3d 831 {
fe5798c0
TG
832 tree id = TREE_VALUE (TREE_VALUE (attr));
833 const char *name;
30ed9d3d 834
fe5798c0
TG
835 if (TREE_CODE (id) == IDENTIFIER_NODE)
836 name = IDENTIFIER_POINTER (id);
837 else if (TREE_CODE (id) == STRING_CST)
838 name = TREE_STRING_POINTER (id);
839 else
840 abort ();
30ed9d3d 841
fe5798c0 842 fprintf (file, "\t.vms_common\t\"%s\",", name);
30ed9d3d 843 }
fe5798c0
TG
844 else
845 fprintf (file, "%s", COMMON_ASM_OP);
30ed9d3d 846
fe5798c0
TG
847 /* Code from elfos.h. */
848 assemble_name (file, name);
16998094 849 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
fe5798c0 850 size, align / BITS_PER_UNIT);
30ed9d3d 851
fe5798c0 852 fputc ('\n', file);
30ed9d3d
TG
853}
854
a32767e4
DM
855static void
856ia64_encode_addr_area (tree decl, rtx symbol)
857{
858 int flags;
859
860 flags = SYMBOL_REF_FLAGS (symbol);
861 switch (ia64_get_addr_area (decl))
862 {
863 case ADDR_AREA_NORMAL: break;
864 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
e820471b 865 default: gcc_unreachable ();
a32767e4
DM
866 }
867 SYMBOL_REF_FLAGS (symbol) = flags;
868}
869
870static void
871ia64_encode_section_info (tree decl, rtx rtl, int first)
872{
873 default_encode_section_info (decl, rtl, first);
874
2897f1d4 875 /* Careful not to prod global register variables. */
a32767e4 876 if (TREE_CODE (decl) == VAR_DECL
2897f1d4
L
877 && GET_CODE (DECL_RTL (decl)) == MEM
878 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
a32767e4
DM
879 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
880 ia64_encode_addr_area (decl, XEXP (rtl, 0));
881}
882\f
557b9df5
RH
883/* Return 1 if the operands of a move are ok. */
884
885int
9c808aad 886ia64_move_ok (rtx dst, rtx src)
557b9df5
RH
887{
888 /* If we're under init_recog_no_volatile, we'll not be able to use
889 memory_operand. So check the code directly and don't worry about
890 the validity of the underlying address, which should have been
891 checked elsewhere anyway. */
892 if (GET_CODE (dst) != MEM)
893 return 1;
894 if (GET_CODE (src) == MEM)
895 return 0;
896 if (register_operand (src, VOIDmode))
897 return 1;
898
899 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
900 if (INTEGRAL_MODE_P (GET_MODE (dst)))
901 return src == const0_rtx;
902 else
13f70342 903 return satisfies_constraint_G (src);
557b9df5 904}
9b7bf67d 905
a71aef0b
JB
906/* Return 1 if the operands are ok for a floating point load pair. */
907
908int
909ia64_load_pair_ok (rtx dst, rtx src)
910{
22be5918
EB
911 /* ??? There is a thinko in the implementation of the "x" constraint and the
912 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
913 also return false for it. */
914 if (GET_CODE (dst) != REG
915 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
a71aef0b
JB
916 return 0;
917 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
918 return 0;
919 switch (GET_CODE (XEXP (src, 0)))
920 {
921 case REG:
922 case POST_INC:
923 break;
924 case POST_DEC:
925 return 0;
926 case POST_MODIFY:
927 {
928 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
929
930 if (GET_CODE (adjust) != CONST_INT
931 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
932 return 0;
933 }
934 break;
935 default:
936 abort ();
937 }
938 return 1;
939}
940
08744705 941int
9c808aad 942addp4_optimize_ok (rtx op1, rtx op2)
08744705 943{
08744705
SE
944 return (basereg_operand (op1, GET_MODE(op1)) !=
945 basereg_operand (op2, GET_MODE(op2)));
946}
947
9e4f94de 948/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
949 Return the length of the field, or <= 0 on failure. */
950
951int
9c808aad 952ia64_depz_field_mask (rtx rop, rtx rshift)
041f25e6
RH
953{
954 unsigned HOST_WIDE_INT op = INTVAL (rop);
955 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
956
957 /* Get rid of the zero bits we're shifting in. */
958 op >>= shift;
959
960 /* We must now have a solid block of 1's at bit 0. */
961 return exact_log2 (op + 1);
962}
963
5e6c8b64
RH
964/* Return the TLS model to use for ADDR. */
965
966static enum tls_model
967tls_symbolic_operand_type (rtx addr)
968{
81f40b79 969 enum tls_model tls_kind = TLS_MODEL_NONE;
5e6c8b64
RH
970
971 if (GET_CODE (addr) == CONST)
972 {
973 if (GET_CODE (XEXP (addr, 0)) == PLUS
974 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
975 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
976 }
977 else if (GET_CODE (addr) == SYMBOL_REF)
978 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
979
980 return tls_kind;
981}
982
903a9601
AS
983/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
984 as a base register. */
985
986static inline bool
987ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
988{
989 if (strict
990 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
991 return true;
992 else if (!strict
993 && (GENERAL_REGNO_P (REGNO (reg))
994 || !HARD_REGISTER_P (reg)))
995 return true;
996 else
997 return false;
998}
999
1000static bool
1001ia64_legitimate_address_reg (const_rtx reg, bool strict)
1002{
1003 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1004 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1005 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1006 return true;
1007
1008 return false;
1009}
1010
1011static bool
1012ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1013{
1014 if (GET_CODE (disp) == PLUS
1015 && rtx_equal_p (reg, XEXP (disp, 0))
1016 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1017 || (CONST_INT_P (XEXP (disp, 1))
1018 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1019 return true;
1020
1021 return false;
1022}
1023
1024/* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1025
1026static bool
ef4bddc2 1027ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
903a9601
AS
1028 rtx x, bool strict)
1029{
1030 if (ia64_legitimate_address_reg (x, strict))
1031 return true;
1032 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1033 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1034 && XEXP (x, 0) != arg_pointer_rtx)
1035 return true;
1036 else if (GET_CODE (x) == POST_MODIFY
1037 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1038 && XEXP (x, 0) != arg_pointer_rtx
1039 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1040 return true;
1041 else
1042 return false;
1043}
1044
5e6c8b64
RH
1045/* Return true if X is a constant that is valid for some immediate
1046 field in an instruction. */
1047
1a627b35 1048static bool
ef4bddc2 1049ia64_legitimate_constant_p (machine_mode mode, rtx x)
5e6c8b64
RH
1050{
1051 switch (GET_CODE (x))
1052 {
1053 case CONST_INT:
1054 case LABEL_REF:
1055 return true;
1056
1057 case CONST_DOUBLE:
1a627b35 1058 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
5e6c8b64 1059 return true;
13f70342 1060 return satisfies_constraint_G (x);
5e6c8b64
RH
1061
1062 case CONST:
1063 case SYMBOL_REF:
d0970db2
JW
1064 /* ??? Short term workaround for PR 28490. We must make the code here
1065 match the code in ia64_expand_move and move_operand, even though they
1066 are both technically wrong. */
1067 if (tls_symbolic_operand_type (x) == 0)
1068 {
1069 HOST_WIDE_INT addend = 0;
1070 rtx op = x;
1071
1072 if (GET_CODE (op) == CONST
1073 && GET_CODE (XEXP (op, 0)) == PLUS
1074 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1075 {
1076 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1077 op = XEXP (XEXP (op, 0), 0);
1078 }
1079
1a627b35
RS
1080 if (any_offset_symbol_operand (op, mode)
1081 || function_operand (op, mode))
7ab62966 1082 return true;
1a627b35 1083 if (aligned_offset_symbol_operand (op, mode))
d0970db2
JW
1084 return (addend & 0x3fff) == 0;
1085 return false;
1086 }
1087 return false;
5e6c8b64 1088
b4e3537b 1089 case CONST_VECTOR:
1a627b35
RS
1090 if (mode == V2SFmode)
1091 return satisfies_constraint_Y (x);
b4e3537b 1092
1a627b35
RS
1093 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1094 && GET_MODE_SIZE (mode) <= 8);
b4e3537b 1095
5e6c8b64
RH
1096 default:
1097 return false;
1098 }
1099}
1100
1101/* Don't allow TLS addresses to get spilled to memory. */
1102
1103static bool
ef4bddc2 1104ia64_cannot_force_const_mem (machine_mode mode, rtx x)
5e6c8b64 1105{
fbbf66e7 1106 if (mode == RFmode)
103a6411 1107 return true;
5e6c8b64
RH
1108 return tls_symbolic_operand_type (x) != 0;
1109}
1110
9b7bf67d 1111/* Expand a symbolic constant load. */
9b7bf67d 1112
5e6c8b64 1113bool
9c808aad 1114ia64_expand_load_address (rtx dest, rtx src)
9b7bf67d 1115{
e820471b 1116 gcc_assert (GET_CODE (dest) == REG);
7b6e506e 1117
ae49d6e5
RH
1118 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1119 having to pointer-extend the value afterward. Other forms of address
1120 computation below are also more natural to compute as 64-bit quantities.
1121 If we've been given an SImode destination register, change it. */
1122 if (GET_MODE (dest) != Pmode)
38ae7651
RS
1123 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1124 byte_lowpart_offset (Pmode, GET_MODE (dest)));
ae49d6e5 1125
5e6c8b64
RH
1126 if (TARGET_NO_PIC)
1127 return false;
1128 if (small_addr_symbolic_operand (src, VOIDmode))
1129 return false;
1130
1131 if (TARGET_AUTO_PIC)
1132 emit_insn (gen_load_gprel64 (dest, src));
1cdbd630 1133 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
5e6c8b64 1134 emit_insn (gen_load_fptr (dest, src));
21515593 1135 else if (sdata_symbolic_operand (src, VOIDmode))
5e6c8b64 1136 emit_insn (gen_load_gprel (dest, src));
face88a1
ST
1137 else if (local_symbolic_operand64 (src, VOIDmode))
1138 {
1139 /* We want to use @gprel rather than @ltoff relocations for local
1140 symbols:
1141 - @gprel does not require dynamic linker
1142 - and does not use .sdata section
1143 https://gcc.gnu.org/bugzilla/60465 */
1144 emit_insn (gen_load_gprel64 (dest, src));
1145 }
5e6c8b64 1146 else
21515593 1147 {
5e6c8b64
RH
1148 HOST_WIDE_INT addend = 0;
1149 rtx tmp;
21515593 1150
5e6c8b64
RH
1151 /* We did split constant offsets in ia64_expand_move, and we did try
1152 to keep them split in move_operand, but we also allowed reload to
1153 rematerialize arbitrary constants rather than spill the value to
1154 the stack and reload it. So we have to be prepared here to split
1155 them apart again. */
1156 if (GET_CODE (src) == CONST)
1157 {
1158 HOST_WIDE_INT hi, lo;
9b7bf67d 1159
5e6c8b64
RH
1160 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1161 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1162 hi = hi - lo;
9b7bf67d 1163
5e6c8b64
RH
1164 if (lo != 0)
1165 {
1166 addend = lo;
0a81f074 1167 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
5e6c8b64
RH
1168 }
1169 }
ae49d6e5
RH
1170
1171 tmp = gen_rtx_HIGH (Pmode, src);
1172 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
f7df4a84 1173 emit_insn (gen_rtx_SET (dest, tmp));
ae49d6e5 1174
1f88caaa 1175 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
f7df4a84 1176 emit_insn (gen_rtx_SET (dest, tmp));
5e6c8b64
RH
1177
1178 if (addend)
1179 {
1180 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
f7df4a84 1181 emit_insn (gen_rtx_SET (dest, tmp));
5e6c8b64 1182 }
ae49d6e5 1183 }
5e6c8b64
RH
1184
1185 return true;
9b7bf67d 1186}
97e242b0 1187
e2500fed 1188static GTY(()) rtx gen_tls_tga;
7b6e506e 1189static rtx
9c808aad 1190gen_tls_get_addr (void)
7b6e506e 1191{
e2500fed 1192 if (!gen_tls_tga)
21515593 1193 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 1194 return gen_tls_tga;
7b6e506e
RH
1195}
1196
e2500fed 1197static GTY(()) rtx thread_pointer_rtx;
7b6e506e 1198static rtx
9c808aad 1199gen_thread_pointer (void)
7b6e506e 1200{
e2500fed 1201 if (!thread_pointer_rtx)
389fdba0 1202 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
135ca7b2 1203 return thread_pointer_rtx;
7b6e506e
RH
1204}
1205
21515593 1206static rtx
5e6c8b64 1207ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
b15b83fb 1208 rtx orig_op1, HOST_WIDE_INT addend)
21515593 1209{
dd3d2b35
DM
1210 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1211 rtx_insn *insns;
b15b83fb 1212 rtx orig_op0 = op0;
5e6c8b64
RH
1213 HOST_WIDE_INT addend_lo, addend_hi;
1214
21515593
RH
1215 switch (tls_kind)
1216 {
1217 case TLS_MODEL_GLOBAL_DYNAMIC:
1218 start_sequence ();
1219
1220 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1221 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1222
1223 tga_op2 = gen_reg_rtx (Pmode);
5e6c8b64 1224 emit_insn (gen_load_dtprel (tga_op2, op1));
9c808aad 1225
21515593 1226 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
db69559b
RS
1227 LCT_CONST, Pmode,
1228 tga_op1, Pmode, tga_op2, Pmode);
21515593
RH
1229
1230 insns = get_insns ();
1231 end_sequence ();
1232
0d433a6a
RH
1233 if (GET_MODE (op0) != Pmode)
1234 op0 = tga_ret;
21515593 1235 emit_libcall_block (insns, op0, tga_ret, op1);
0d433a6a 1236 break;
21515593
RH
1237
1238 case TLS_MODEL_LOCAL_DYNAMIC:
1239 /* ??? This isn't the completely proper way to do local-dynamic
1240 If the call to __tls_get_addr is used only by a single symbol,
1241 then we should (somehow) move the dtprel to the second arg
1242 to avoid the extra add. */
1243 start_sequence ();
1244
1245 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1246 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1247
1248 tga_op2 = const0_rtx;
1249
1250 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
db69559b
RS
1251 LCT_CONST, Pmode,
1252 tga_op1, Pmode, tga_op2, Pmode);
21515593
RH
1253
1254 insns = get_insns ();
1255 end_sequence ();
1256
1257 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1258 UNSPEC_LD_BASE);
1259 tmp = gen_reg_rtx (Pmode);
1260 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1261
0d433a6a
RH
1262 if (!register_operand (op0, Pmode))
1263 op0 = gen_reg_rtx (Pmode);
21515593
RH
1264 if (TARGET_TLS64)
1265 {
0d433a6a
RH
1266 emit_insn (gen_load_dtprel (op0, op1));
1267 emit_insn (gen_adddi3 (op0, tmp, op0));
21515593
RH
1268 }
1269 else
5e6c8b64 1270 emit_insn (gen_add_dtprel (op0, op1, tmp));
0d433a6a 1271 break;
21515593
RH
1272
1273 case TLS_MODEL_INITIAL_EXEC:
b15b83fb
JJ
1274 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1275 addend_hi = addend - addend_lo;
1276
0a81f074 1277 op1 = plus_constant (Pmode, op1, addend_hi);
5e6c8b64
RH
1278 addend = addend_lo;
1279
21515593 1280 tmp = gen_reg_rtx (Pmode);
5e6c8b64 1281 emit_insn (gen_load_tprel (tmp, op1));
21515593 1282
0d433a6a
RH
1283 if (!register_operand (op0, Pmode))
1284 op0 = gen_reg_rtx (Pmode);
1285 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1286 break;
21515593
RH
1287
1288 case TLS_MODEL_LOCAL_EXEC:
0d433a6a
RH
1289 if (!register_operand (op0, Pmode))
1290 op0 = gen_reg_rtx (Pmode);
5e6c8b64
RH
1291
1292 op1 = orig_op1;
1293 addend = 0;
21515593
RH
1294 if (TARGET_TLS64)
1295 {
0d433a6a 1296 emit_insn (gen_load_tprel (op0, op1));
5e6c8b64 1297 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
21515593
RH
1298 }
1299 else
5e6c8b64 1300 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
0d433a6a 1301 break;
21515593
RH
1302
1303 default:
e820471b 1304 gcc_unreachable ();
21515593 1305 }
0d433a6a 1306
5e6c8b64
RH
1307 if (addend)
1308 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1309 orig_op0, 1, OPTAB_DIRECT);
0d433a6a
RH
1310 if (orig_op0 == op0)
1311 return NULL_RTX;
1312 if (GET_MODE (orig_op0) == Pmode)
1313 return op0;
1314 return gen_lowpart (GET_MODE (orig_op0), op0);
21515593
RH
1315}
1316
7b6e506e 1317rtx
9c808aad 1318ia64_expand_move (rtx op0, rtx op1)
7b6e506e 1319{
ef4bddc2 1320 machine_mode mode = GET_MODE (op0);
7b6e506e
RH
1321
1322 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1323 op1 = force_reg (mode, op1);
1324
21515593 1325 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e 1326 {
5e6c8b64 1327 HOST_WIDE_INT addend = 0;
7b6e506e 1328 enum tls_model tls_kind;
5e6c8b64
RH
1329 rtx sym = op1;
1330
1331 if (GET_CODE (op1) == CONST
1332 && GET_CODE (XEXP (op1, 0)) == PLUS
1333 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1334 {
1335 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1336 sym = XEXP (XEXP (op1, 0), 0);
1337 }
1338
1339 tls_kind = tls_symbolic_operand_type (sym);
1340 if (tls_kind)
b15b83fb 1341 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
5e6c8b64
RH
1342
1343 if (any_offset_symbol_operand (sym, mode))
1344 addend = 0;
1345 else if (aligned_offset_symbol_operand (sym, mode))
1346 {
1347 HOST_WIDE_INT addend_lo, addend_hi;
1348
1349 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1350 addend_hi = addend - addend_lo;
1351
1352 if (addend_lo != 0)
1353 {
0a81f074 1354 op1 = plus_constant (mode, sym, addend_hi);
5e6c8b64
RH
1355 addend = addend_lo;
1356 }
21e43850
L
1357 else
1358 addend = 0;
5e6c8b64
RH
1359 }
1360 else
1361 op1 = sym;
1362
1363 if (reload_completed)
1364 {
1365 /* We really should have taken care of this offset earlier. */
1366 gcc_assert (addend == 0);
1367 if (ia64_expand_load_address (op0, op1))
1368 return NULL_RTX;
1369 }
21515593 1370
5e6c8b64 1371 if (addend)
7b6e506e 1372 {
b3a13419 1373 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
5e6c8b64 1374
f7df4a84 1375 emit_insn (gen_rtx_SET (subtarget, op1));
5e6c8b64
RH
1376
1377 op1 = expand_simple_binop (mode, PLUS, subtarget,
1378 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1379 if (op0 == op1)
1380 return NULL_RTX;
7b6e506e
RH
1381 }
1382 }
1383
1384 return op1;
1385}
1386
21515593
RH
1387/* Split a move from OP1 to OP0 conditional on COND. */
1388
1389void
9c808aad 1390ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
21515593 1391{
dd3d2b35 1392 rtx_insn *insn, *first = get_last_insn ();
21515593
RH
1393
1394 emit_move_insn (op0, op1);
1395
1396 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1397 if (INSN_P (insn))
1398 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1399 PATTERN (insn));
1400}
1401
f57fc998 1402/* Split a post-reload TImode or TFmode reference into two DImode
2ffe0e02
ZW
1403 components. This is made extra difficult by the fact that we do
1404 not get any scratch registers to work with, because reload cannot
1405 be prevented from giving us a scratch that overlaps the register
1406 pair involved. So instead, when addressing memory, we tweak the
1407 pointer register up and back down with POST_INCs. Or up and not
1408 back down when we can get away with it.
1409
1410 REVERSED is true when the loads must be done in reversed order
1411 (high word first) for correctness. DEAD is true when the pointer
1412 dies with the second insn we generate and therefore the second
1413 address must not carry a postmodify.
1414
1415 May return an insn which is to be emitted after the moves. */
3f622353 1416
f57fc998 1417static rtx
2ffe0e02 1418ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
3f622353 1419{
2ffe0e02
ZW
1420 rtx fixup = 0;
1421
3f622353
RH
1422 switch (GET_CODE (in))
1423 {
1424 case REG:
2ffe0e02
ZW
1425 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1426 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1427 break;
3f622353
RH
1428
1429 case CONST_INT:
1430 case CONST_DOUBLE:
2ffe0e02 1431 /* Cannot occur reversed. */
e820471b 1432 gcc_assert (!reversed);
2ffe0e02 1433
f57fc998
ZW
1434 if (GET_MODE (in) != TFmode)
1435 split_double (in, &out[0], &out[1]);
1436 else
1437 /* split_double does not understand how to split a TFmode
1438 quantity into a pair of DImode constants. */
1439 {
f57fc998
ZW
1440 unsigned HOST_WIDE_INT p[2];
1441 long l[4]; /* TFmode is 128 bits */
1442
34a72c33 1443 real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
f57fc998
ZW
1444
1445 if (FLOAT_WORDS_BIG_ENDIAN)
1446 {
1447 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1448 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1449 }
1450 else
1451 {
9eb578c8
L
1452 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1453 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
f57fc998
ZW
1454 }
1455 out[0] = GEN_INT (p[0]);
1456 out[1] = GEN_INT (p[1]);
1457 }
2ffe0e02
ZW
1458 break;
1459
1460 case MEM:
1461 {
1462 rtx base = XEXP (in, 0);
1463 rtx offset;
1464
1465 switch (GET_CODE (base))
1466 {
1467 case REG:
1468 if (!reversed)
1469 {
1470 out[0] = adjust_automodify_address
1471 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1472 out[1] = adjust_automodify_address
1473 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1474 }
1475 else
1476 {
1477 /* Reversal requires a pre-increment, which can only
1478 be done as a separate insn. */
1479 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1480 out[0] = adjust_automodify_address
1481 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1482 out[1] = adjust_address (in, DImode, 0);
1483 }
1484 break;
1485
1486 case POST_INC:
e820471b
NS
1487 gcc_assert (!reversed && !dead);
1488
2ffe0e02
ZW
1489 /* Just do the increment in two steps. */
1490 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1491 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1492 break;
1493
1494 case POST_DEC:
e820471b
NS
1495 gcc_assert (!reversed && !dead);
1496
2ffe0e02
ZW
1497 /* Add 8, subtract 24. */
1498 base = XEXP (base, 0);
1499 out[0] = adjust_automodify_address
1500 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1501 out[1] = adjust_automodify_address
1502 (in, DImode,
0a81f074
RS
1503 gen_rtx_POST_MODIFY (Pmode, base,
1504 plus_constant (Pmode, base, -24)),
2ffe0e02
ZW
1505 8);
1506 break;
1507
1508 case POST_MODIFY:
e820471b
NS
1509 gcc_assert (!reversed && !dead);
1510
2ffe0e02
ZW
1511 /* Extract and adjust the modification. This case is
1512 trickier than the others, because we might have an
1513 index register, or we might have a combined offset that
1514 doesn't fit a signed 9-bit displacement field. We can
1515 assume the incoming expression is already legitimate. */
1516 offset = XEXP (base, 1);
1517 base = XEXP (base, 0);
1518
1519 out[0] = adjust_automodify_address
1520 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1521
1522 if (GET_CODE (XEXP (offset, 1)) == REG)
1523 {
1524 /* Can't adjust the postmodify to match. Emit the
1525 original, then a separate addition insn. */
1526 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1527 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1528 }
2ffe0e02
ZW
1529 else
1530 {
e820471b
NS
1531 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1532 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1533 {
1534 /* Again the postmodify cannot be made to match,
1535 but in this case it's more efficient to get rid
1536 of the postmodify entirely and fix up with an
1537 add insn. */
1538 out[1] = adjust_automodify_address (in, DImode, base, 8);
1539 fixup = gen_adddi3
1540 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1541 }
1542 else
1543 {
1544 /* Combined offset still fits in the displacement field.
1545 (We cannot overflow it at the high end.) */
1546 out[1] = adjust_automodify_address
1547 (in, DImode, gen_rtx_POST_MODIFY
1548 (Pmode, base, gen_rtx_PLUS
1549 (Pmode, base,
1550 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1551 8);
1552 }
2ffe0e02
ZW
1553 }
1554 break;
1555
1556 default:
e820471b 1557 gcc_unreachable ();
2ffe0e02
ZW
1558 }
1559 break;
1560 }
3f622353
RH
1561
1562 default:
e820471b 1563 gcc_unreachable ();
3f622353 1564 }
2ffe0e02
ZW
1565
1566 return fixup;
3f622353
RH
1567}
1568
f57fc998
ZW
1569/* Split a TImode or TFmode move instruction after reload.
1570 This is used by *movtf_internal and *movti_internal. */
1571void
1572ia64_split_tmode_move (rtx operands[])
1573{
2ffe0e02
ZW
1574 rtx in[2], out[2], insn;
1575 rtx fixup[2];
1576 bool dead = false;
1577 bool reversed = false;
1578
1579 /* It is possible for reload to decide to overwrite a pointer with
1580 the value it points to. In that case we have to do the loads in
1581 the appropriate order so that the pointer is not destroyed too
1582 early. Also we must not generate a postmodify for that second
6d3f673c
KY
1583 load, or rws_access_regno will die. And we must not generate a
1584 postmodify for the second load if the destination register
1585 overlaps with the base register. */
2ffe0e02
ZW
1586 if (GET_CODE (operands[1]) == MEM
1587 && reg_overlap_mentioned_p (operands[0], operands[1]))
f57fc998 1588 {
2ffe0e02
ZW
1589 rtx base = XEXP (operands[1], 0);
1590 while (GET_CODE (base) != REG)
1591 base = XEXP (base, 0);
f57fc998 1592
2ffe0e02 1593 if (REGNO (base) == REGNO (operands[0]))
6d3f673c 1594 reversed = true;
2430d1e2 1595
6d3f673c
KY
1596 if (refers_to_regno_p (REGNO (operands[0]),
1597 REGNO (operands[0])+2,
1598 base, 0))
2430d1e2 1599 dead = true;
2ffe0e02
ZW
1600 }
1601 /* Another reason to do the moves in reversed order is if the first
1602 element of the target register pair is also the second element of
1603 the source register pair. */
1604 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1605 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1606 reversed = true;
1607
1608 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1609 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1610
1611#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1612 if (GET_CODE (EXP) == MEM \
1613 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1614 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1615 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
bbbbb16a 1616 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
2ffe0e02 1617
f7df4a84 1618 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
2ffe0e02
ZW
1619 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1620 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1621
f7df4a84 1622 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
2ffe0e02
ZW
1623 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1624 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1625
1626 if (fixup[0])
1627 emit_insn (fixup[0]);
1628 if (fixup[1])
1629 emit_insn (fixup[1]);
1630
1631#undef MAYBE_ADD_REG_INC_NOTE
f57fc998
ZW
1632}
1633
02befdf4 1634/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
3f622353 1635 through memory plus an extra GR scratch register. Except that you can
f15643d4
RS
1636 either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
1637 from SECONDARY_RELOAD_CLASS, but not both.
3f622353
RH
1638
1639 We got into problems in the first place by allowing a construct like
02befdf4 1640 (subreg:XF (reg:TI)), which we got from a union containing a long double.
f5143c46 1641 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1642 we see something like the above, we spill the inner register to memory. */
1643
4de67c26 1644static rtx
ef4bddc2 1645spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
3f622353
RH
1646{
1647 if (GET_CODE (in) == SUBREG
1648 && GET_MODE (SUBREG_REG (in)) == TImode
1649 && GET_CODE (SUBREG_REG (in)) == REG)
1650 {
9474e8ab 1651 rtx memt = assign_stack_temp (TImode, 16);
68d22aa5 1652 emit_move_insn (memt, SUBREG_REG (in));
4de67c26 1653 return adjust_address (memt, mode, 0);
3f622353
RH
1654 }
1655 else if (force && GET_CODE (in) == REG)
1656 {
9474e8ab 1657 rtx memx = assign_stack_temp (mode, 16);
68d22aa5
RH
1658 emit_move_insn (memx, in);
1659 return memx;
3f622353 1660 }
3f622353
RH
1661 else
1662 return in;
1663}
f2f90c63 1664
4de67c26
JM
1665/* Expand the movxf or movrf pattern (MODE says which) with the given
1666 OPERANDS, returning true if the pattern should then invoke
1667 DONE. */
1668
1669bool
ef4bddc2 1670ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
4de67c26
JM
1671{
1672 rtx op0 = operands[0];
1673
1674 if (GET_CODE (op0) == SUBREG)
1675 op0 = SUBREG_REG (op0);
1676
1677 /* We must support XFmode loads into general registers for stdarg/vararg,
1678 unprototyped calls, and a rare case where a long double is passed as
1679 an argument after a float HFA fills the FP registers. We split them into
1680 DImode loads for convenience. We also need to support XFmode stores
1681 for the last case. This case does not happen for stdarg/vararg routines,
1682 because we do a block store to memory of unnamed arguments. */
1683
1684 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1685 {
1686 rtx out[2];
1687
1688 /* We're hoping to transform everything that deals with XFmode
1689 quantities and GR registers early in the compiler. */
b3a13419 1690 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1691
1692 /* Struct to register can just use TImode instead. */
1693 if ((GET_CODE (operands[1]) == SUBREG
1694 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1695 || (GET_CODE (operands[1]) == REG
1696 && GR_REGNO_P (REGNO (operands[1]))))
1697 {
1698 rtx op1 = operands[1];
1699
1700 if (GET_CODE (op1) == SUBREG)
1701 op1 = SUBREG_REG (op1);
1702 else
1703 op1 = gen_rtx_REG (TImode, REGNO (op1));
1704
1705 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1706 return true;
1707 }
1708
1709 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1710 {
ae4d3291 1711 /* Don't word-swap when reading in the constant. */
4de67c26 1712 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
ae4d3291
JW
1713 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1714 0, mode));
4de67c26 1715 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
ae4d3291
JW
1716 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1717 0, mode));
4de67c26
JM
1718 return true;
1719 }
1720
1721 /* If the quantity is in a register not known to be GR, spill it. */
1722 if (register_operand (operands[1], mode))
1723 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1724
1725 gcc_assert (GET_CODE (operands[1]) == MEM);
1726
ae4d3291
JW
1727 /* Don't word-swap when reading in the value. */
1728 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1729 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
4de67c26
JM
1730
1731 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1732 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1733 return true;
1734 }
1735
1736 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1737 {
1738 /* We're hoping to transform everything that deals with XFmode
1739 quantities and GR registers early in the compiler. */
b3a13419 1740 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1741
1742 /* Op0 can't be a GR_REG here, as that case is handled above.
1743 If op0 is a register, then we spill op1, so that we now have a
1744 MEM operand. This requires creating an XFmode subreg of a TImode reg
1745 to force the spill. */
1746 if (register_operand (operands[0], mode))
1747 {
1748 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1749 op1 = gen_rtx_SUBREG (mode, op1, 0);
1750 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1751 }
1752
1753 else
1754 {
1755 rtx in[2];
1756
ae4d3291
JW
1757 gcc_assert (GET_CODE (operands[0]) == MEM);
1758
1759 /* Don't word-swap when writing out the value. */
1760 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1761 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
4de67c26
JM
1762
1763 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1764 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1765 return true;
1766 }
1767 }
1768
1769 if (!reload_in_progress && !reload_completed)
1770 {
1771 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1772
1773 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1774 {
1775 rtx memt, memx, in = operands[1];
1776 if (CONSTANT_P (in))
1777 in = validize_mem (force_const_mem (mode, in));
1778 if (GET_CODE (in) == MEM)
1779 memt = adjust_address (in, TImode, 0);
1780 else
1781 {
9474e8ab 1782 memt = assign_stack_temp (TImode, 16);
4de67c26
JM
1783 memx = adjust_address (memt, mode, 0);
1784 emit_move_insn (memx, in);
1785 }
1786 emit_move_insn (op0, memt);
1787 return true;
1788 }
1789
1790 if (!ia64_move_ok (operands[0], operands[1]))
1791 operands[1] = force_reg (mode, operands[1]);
1792 }
1793
1794 return false;
1795}
1796
f90b7a5a
PB
1797/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1798 with the expression that holds the compare result (in VOIDmode). */
f2f90c63 1799
24ea7948
ZW
1800static GTY(()) rtx cmptf_libfunc;
1801
f90b7a5a
PB
1802void
1803ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
f2f90c63 1804{
f90b7a5a 1805 enum rtx_code code = GET_CODE (*expr);
f2f90c63
RH
1806 rtx cmp;
1807
1808 /* If we have a BImode input, then we already have a compare result, and
1809 do not need to emit another comparison. */
f90b7a5a 1810 if (GET_MODE (*op0) == BImode)
f2f90c63 1811 {
f90b7a5a
PB
1812 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1813 cmp = *op0;
f2f90c63 1814 }
24ea7948
ZW
1815 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1816 magic number as its third argument, that indicates what to do.
1817 The return value is an integer to be compared against zero. */
f90b7a5a 1818 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
24ea7948
ZW
1819 {
1820 enum qfcmp_magic {
8fc53a5f 1821 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
24ea7948
ZW
1822 QCMP_UNORD = 2,
1823 QCMP_EQ = 4,
1824 QCMP_LT = 8,
1825 QCMP_GT = 16
32e8bb8e
ILT
1826 };
1827 int magic;
24ea7948 1828 enum rtx_code ncode;
9b2ea071 1829 rtx ret;
e820471b 1830
f90b7a5a 1831 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
24ea7948
ZW
1832 switch (code)
1833 {
1834 /* 1 = equal, 0 = not equal. Equality operators do
8fc53a5f 1835 not raise FP_INVALID when given a NaN operand. */
24ea7948
ZW
1836 case EQ: magic = QCMP_EQ; ncode = NE; break;
1837 case NE: magic = QCMP_EQ; ncode = EQ; break;
1838 /* isunordered() from C99. */
1839 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
b1346fa3 1840 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
24ea7948 1841 /* Relational operators raise FP_INVALID when given
8fc53a5f 1842 a NaN operand. */
24ea7948
ZW
1843 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1844 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1845 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1846 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
8fc53a5f
EB
1847 /* Unordered relational operators do not raise FP_INVALID
1848 when given a NaN operand. */
1849 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1850 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1851 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1852 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1853 /* Not supported. */
1854 case UNEQ:
1855 case LTGT:
e820471b 1856 default: gcc_unreachable ();
24ea7948
ZW
1857 }
1858
1859 start_sequence ();
1860
db69559b 1861 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
f90b7a5a 1862 *op0, TFmode, *op1, TFmode,
24ea7948
ZW
1863 GEN_INT (magic), DImode);
1864 cmp = gen_reg_rtx (BImode);
f7df4a84
RS
1865 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1866 ret, const0_rtx)));
24ea7948 1867
9b2ea071 1868 rtx_insn *insns = get_insns ();
24ea7948
ZW
1869 end_sequence ();
1870
1871 emit_libcall_block (insns, cmp, cmp,
f90b7a5a 1872 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
24ea7948
ZW
1873 code = NE;
1874 }
f2f90c63
RH
1875 else
1876 {
1877 cmp = gen_reg_rtx (BImode);
f7df4a84 1878 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
f2f90c63
RH
1879 code = NE;
1880 }
1881
f90b7a5a
PB
1882 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1883 *op0 = cmp;
1884 *op1 = const0_rtx;
f2f90c63 1885}
2ed4af6f 1886
e934ca47
RH
1887/* Generate an integral vector comparison. Return true if the condition has
1888 been reversed, and so the sense of the comparison should be inverted. */
f61134e8
RH
1889
1890static bool
ef4bddc2 1891ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
f61134e8
RH
1892 rtx dest, rtx op0, rtx op1)
1893{
1894 bool negate = false;
1895 rtx x;
1896
e934ca47 1897 /* Canonicalize the comparison to EQ, GT, GTU. */
f61134e8
RH
1898 switch (code)
1899 {
1900 case EQ:
1901 case GT:
e934ca47 1902 case GTU:
f61134e8
RH
1903 break;
1904
1905 case NE:
f61134e8 1906 case LE:
e934ca47
RH
1907 case LEU:
1908 code = reverse_condition (code);
f61134e8
RH
1909 negate = true;
1910 break;
1911
1912 case GE:
e934ca47
RH
1913 case GEU:
1914 code = reverse_condition (code);
f61134e8
RH
1915 negate = true;
1916 /* FALLTHRU */
1917
1918 case LT:
f61134e8 1919 case LTU:
e934ca47
RH
1920 code = swap_condition (code);
1921 x = op0, op0 = op1, op1 = x;
1922 break;
f61134e8 1923
e934ca47
RH
1924 default:
1925 gcc_unreachable ();
1926 }
f61134e8 1927
e934ca47 1928 /* Unsigned parallel compare is not supported by the hardware. Play some
6283ba26 1929 tricks to turn this into a signed comparison against 0. */
e934ca47
RH
1930 if (code == GTU)
1931 {
1932 switch (mode)
1933 {
4e10a5a7 1934 case E_V2SImode:
f61134e8 1935 {
e934ca47
RH
1936 rtx t1, t2, mask;
1937
9540f5ef
SE
1938 /* Subtract (-(INT MAX) - 1) from both operands to make
1939 them signed. */
6a8b00eb 1940 mask = gen_int_mode (0x80000000, SImode);
59d06c05 1941 mask = gen_const_vec_duplicate (V2SImode, mask);
9540f5ef
SE
1942 mask = force_reg (mode, mask);
1943 t1 = gen_reg_rtx (mode);
1944 emit_insn (gen_subv2si3 (t1, op0, mask));
1945 t2 = gen_reg_rtx (mode);
1946 emit_insn (gen_subv2si3 (t2, op1, mask));
1947 op0 = t1;
1948 op1 = t2;
6283ba26 1949 code = GT;
f61134e8 1950 }
e934ca47
RH
1951 break;
1952
4e10a5a7
RS
1953 case E_V8QImode:
1954 case E_V4HImode:
e934ca47
RH
1955 /* Perform a parallel unsigned saturating subtraction. */
1956 x = gen_reg_rtx (mode);
f7df4a84 1957 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
6283ba26
RH
1958
1959 code = EQ;
1960 op0 = x;
1961 op1 = CONST0_RTX (mode);
1962 negate = !negate;
e934ca47
RH
1963 break;
1964
1965 default:
1966 gcc_unreachable ();
1967 }
f61134e8
RH
1968 }
1969
1970 x = gen_rtx_fmt_ee (code, mode, op0, op1);
f7df4a84 1971 emit_insn (gen_rtx_SET (dest, x));
f61134e8
RH
1972
1973 return negate;
1974}
1975
f61134e8
RH
1976/* Emit an integral vector conditional move. */
1977
1978void
1979ia64_expand_vecint_cmov (rtx operands[])
1980{
ef4bddc2 1981 machine_mode mode = GET_MODE (operands[0]);
f61134e8
RH
1982 enum rtx_code code = GET_CODE (operands[3]);
1983 bool negate;
1984 rtx cmp, x, ot, of;
1985
f61134e8
RH
1986 cmp = gen_reg_rtx (mode);
1987 negate = ia64_expand_vecint_compare (code, mode, cmp,
1988 operands[4], operands[5]);
1989
1990 ot = operands[1+negate];
1991 of = operands[2-negate];
1992
1993 if (ot == CONST0_RTX (mode))
1994 {
1995 if (of == CONST0_RTX (mode))
1996 {
1997 emit_move_insn (operands[0], ot);
1998 return;
1999 }
2000
2001 x = gen_rtx_NOT (mode, cmp);
2002 x = gen_rtx_AND (mode, x, of);
f7df4a84 2003 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
2004 }
2005 else if (of == CONST0_RTX (mode))
2006 {
2007 x = gen_rtx_AND (mode, cmp, ot);
f7df4a84 2008 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
2009 }
2010 else
2011 {
2012 rtx t, f;
2013
2014 t = gen_reg_rtx (mode);
2015 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
f7df4a84 2016 emit_insn (gen_rtx_SET (t, x));
f61134e8
RH
2017
2018 f = gen_reg_rtx (mode);
2019 x = gen_rtx_NOT (mode, cmp);
2020 x = gen_rtx_AND (mode, x, operands[2-negate]);
f7df4a84 2021 emit_insn (gen_rtx_SET (f, x));
f61134e8
RH
2022
2023 x = gen_rtx_IOR (mode, t, f);
f7df4a84 2024 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
2025 }
2026}
2027
2028/* Emit an integral vector min or max operation. Return true if all done. */
2029
2030bool
ef4bddc2 2031ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
f61134e8
RH
2032 rtx operands[])
2033{
cabddb23 2034 rtx xops[6];
f61134e8
RH
2035
2036 /* These four combinations are supported directly. */
2037 if (mode == V8QImode && (code == UMIN || code == UMAX))
2038 return false;
2039 if (mode == V4HImode && (code == SMIN || code == SMAX))
2040 return false;
2041
93b4080b
RH
2042 /* This combination can be implemented with only saturating subtraction. */
2043 if (mode == V4HImode && code == UMAX)
2044 {
2045 rtx x, tmp = gen_reg_rtx (mode);
2046
2047 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
f7df4a84 2048 emit_insn (gen_rtx_SET (tmp, x));
93b4080b
RH
2049
2050 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2051 return true;
2052 }
2053
f61134e8
RH
2054 /* Everything else implemented via vector comparisons. */
2055 xops[0] = operands[0];
2056 xops[4] = xops[1] = operands[1];
2057 xops[5] = xops[2] = operands[2];
2058
2059 switch (code)
2060 {
2061 case UMIN:
2062 code = LTU;
2063 break;
2064 case UMAX:
2065 code = GTU;
2066 break;
2067 case SMIN:
2068 code = LT;
2069 break;
2070 case SMAX:
2071 code = GT;
2072 break;
2073 default:
e820471b 2074 gcc_unreachable ();
f61134e8
RH
2075 }
2076 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2077
2078 ia64_expand_vecint_cmov (xops);
2079 return true;
2080}
2081
55eaaa5b
RH
2082/* The vectors LO and HI each contain N halves of a double-wide vector.
2083 Reassemble either the first N/2 or the second N/2 elements. */
604e3ff3
RH
2084
2085void
55eaaa5b 2086ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
604e3ff3 2087{
ef4bddc2 2088 machine_mode vmode = GET_MODE (lo);
e6431744
RH
2089 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2090 struct expand_vec_perm_d d;
2091 bool ok;
604e3ff3 2092
e6431744
RH
2093 d.target = gen_lowpart (vmode, out);
2094 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2095 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2096 d.vmode = vmode;
2097 d.nelt = nelt;
2098 d.one_operand_p = false;
2099 d.testing_p = false;
2100
2101 high = (highp ? nelt / 2 : 0);
2102 for (i = 0; i < nelt / 2; ++i)
604e3ff3 2103 {
e6431744
RH
2104 d.perm[i * 2] = i + high;
2105 d.perm[i * 2 + 1] = i + high + nelt;
604e3ff3
RH
2106 }
2107
e6431744
RH
2108 ok = ia64_expand_vec_perm_const_1 (&d);
2109 gcc_assert (ok);
604e3ff3
RH
2110}
2111
55eaaa5b 2112/* Return a vector of the sign-extension of VEC. */
e898620c 2113
55eaaa5b
RH
2114static rtx
2115ia64_unpack_sign (rtx vec, bool unsignedp)
e898620c 2116{
ef4bddc2 2117 machine_mode mode = GET_MODE (vec);
55eaaa5b 2118 rtx zero = CONST0_RTX (mode);
e898620c 2119
e898620c 2120 if (unsignedp)
55eaaa5b 2121 return zero;
e898620c
RH
2122 else
2123 {
55eaaa5b 2124 rtx sign = gen_reg_rtx (mode);
e898620c
RH
2125 bool neg;
2126
55eaaa5b 2127 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
e898620c 2128 gcc_assert (!neg);
55eaaa5b
RH
2129
2130 return sign;
e898620c 2131 }
55eaaa5b 2132}
e898620c 2133
55eaaa5b 2134/* Emit an integral vector unpack operation. */
e898620c 2135
55eaaa5b
RH
2136void
2137ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2138{
2139 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2140 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
e898620c
RH
2141}
2142
55eaaa5b
RH
2143/* Emit an integral vector widening sum operations. */
2144
604e3ff3 2145void
55eaaa5b 2146ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
604e3ff3 2147{
ef4bddc2 2148 machine_mode wmode;
55eaaa5b 2149 rtx l, h, t, sign;
604e3ff3 2150
55eaaa5b
RH
2151 sign = ia64_unpack_sign (operands[1], unsignedp);
2152
2153 wmode = GET_MODE (operands[0]);
2154 l = gen_reg_rtx (wmode);
2155 h = gen_reg_rtx (wmode);
604e3ff3 2156
55eaaa5b
RH
2157 ia64_unpack_assemble (l, operands[1], sign, false);
2158 ia64_unpack_assemble (h, operands[1], sign, true);
604e3ff3 2159
55eaaa5b
RH
2160 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2161 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2162 if (t != operands[0])
2163 emit_move_insn (operands[0], t);
604e3ff3
RH
2164}
2165
2ed4af6f
RH
2166/* Emit the appropriate sequence for a call. */
2167
2168void
9c808aad
AJ
2169ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2170 int sibcall_p)
2ed4af6f 2171{
599aedd9 2172 rtx insn, b0;
2ed4af6f
RH
2173
2174 addr = XEXP (addr, 0);
c8083186 2175 addr = convert_memory_address (DImode, addr);
2ed4af6f 2176 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 2177
599aedd9 2178 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
2179 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2180 {
2181 if (sibcall_p)
599aedd9 2182 insn = gen_sibcall_nogp (addr);
2ed4af6f 2183 else if (! retval)
599aedd9 2184 insn = gen_call_nogp (addr, b0);
2ed4af6f 2185 else
599aedd9
RH
2186 insn = gen_call_value_nogp (retval, addr, b0);
2187 insn = emit_call_insn (insn);
2ed4af6f 2188 }
2ed4af6f 2189 else
599aedd9
RH
2190 {
2191 if (sibcall_p)
2192 insn = gen_sibcall_gp (addr);
2193 else if (! retval)
2194 insn = gen_call_gp (addr, b0);
2195 else
2196 insn = gen_call_value_gp (retval, addr, b0);
2197 insn = emit_call_insn (insn);
2ed4af6f 2198
599aedd9
RH
2199 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2200 }
6dad5a56 2201
599aedd9 2202 if (sibcall_p)
4e14f1f9 2203 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
f2972bf8
DR
2204
2205 if (TARGET_ABI_OPEN_VMS)
2206 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2207 gen_rtx_REG (DImode, GR_REG (25)));
599aedd9
RH
2208}
2209
6fb5fa3c
DB
2210static void
2211reg_emitted (enum ia64_frame_regs r)
2212{
2213 if (emitted_frame_related_regs[r] == 0)
2214 emitted_frame_related_regs[r] = current_frame_info.r[r];
2215 else
2216 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2217}
2218
2219static int
2220get_reg (enum ia64_frame_regs r)
2221{
2222 reg_emitted (r);
2223 return current_frame_info.r[r];
2224}
2225
2226static bool
2227is_emitted (int regno)
2228{
09639a83 2229 unsigned int r;
6fb5fa3c
DB
2230
2231 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2232 if (emitted_frame_related_regs[r] == regno)
2233 return true;
2234 return false;
2235}
2236
599aedd9 2237void
9c808aad 2238ia64_reload_gp (void)
599aedd9
RH
2239{
2240 rtx tmp;
2241
6fb5fa3c
DB
2242 if (current_frame_info.r[reg_save_gp])
2243 {
2244 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2245 }
2ed4af6f 2246 else
599aedd9
RH
2247 {
2248 HOST_WIDE_INT offset;
13f70342 2249 rtx offset_r;
599aedd9
RH
2250
2251 offset = (current_frame_info.spill_cfa_off
2252 + current_frame_info.spill_size);
2253 if (frame_pointer_needed)
2254 {
2255 tmp = hard_frame_pointer_rtx;
2256 offset = -offset;
2257 }
2258 else
2259 {
2260 tmp = stack_pointer_rtx;
2261 offset = current_frame_info.total_size - offset;
2262 }
2263
13f70342
RH
2264 offset_r = GEN_INT (offset);
2265 if (satisfies_constraint_I (offset_r))
2266 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
599aedd9
RH
2267 else
2268 {
13f70342 2269 emit_move_insn (pic_offset_table_rtx, offset_r);
599aedd9
RH
2270 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2271 pic_offset_table_rtx, tmp));
2272 }
2273
2274 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2275 }
2276
2277 emit_move_insn (pic_offset_table_rtx, tmp);
2278}
2279
2280void
9c808aad
AJ
2281ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2282 rtx scratch_b, int noreturn_p, int sibcall_p)
599aedd9
RH
2283{
2284 rtx insn;
2285 bool is_desc = false;
2286
2287 /* If we find we're calling through a register, then we're actually
2288 calling through a descriptor, so load up the values. */
4e14f1f9 2289 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
599aedd9
RH
2290 {
2291 rtx tmp;
2292 bool addr_dead_p;
2293
2294 /* ??? We are currently constrained to *not* use peep2, because
2a43945f 2295 we can legitimately change the global lifetime of the GP
9c808aad 2296 (in the form of killing where previously live). This is
599aedd9
RH
2297 because a call through a descriptor doesn't use the previous
2298 value of the GP, while a direct call does, and we do not
2299 commit to either form until the split here.
2300
2301 That said, this means that we lack precise life info for
2302 whether ADDR is dead after this call. This is not terribly
2303 important, since we can fix things up essentially for free
2304 with the POST_DEC below, but it's nice to not use it when we
2305 can immediately tell it's not necessary. */
2306 addr_dead_p = ((noreturn_p || sibcall_p
2307 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2308 REGNO (addr)))
2309 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2310
2311 /* Load the code address into scratch_b. */
2312 tmp = gen_rtx_POST_INC (Pmode, addr);
2313 tmp = gen_rtx_MEM (Pmode, tmp);
2314 emit_move_insn (scratch_r, tmp);
2315 emit_move_insn (scratch_b, scratch_r);
2316
2317 /* Load the GP address. If ADDR is not dead here, then we must
2318 revert the change made above via the POST_INCREMENT. */
2319 if (!addr_dead_p)
2320 tmp = gen_rtx_POST_DEC (Pmode, addr);
2321 else
2322 tmp = addr;
2323 tmp = gen_rtx_MEM (Pmode, tmp);
2324 emit_move_insn (pic_offset_table_rtx, tmp);
2325
2326 is_desc = true;
2327 addr = scratch_b;
2328 }
2ed4af6f 2329
6dad5a56 2330 if (sibcall_p)
599aedd9
RH
2331 insn = gen_sibcall_nogp (addr);
2332 else if (retval)
2333 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 2334 else
599aedd9 2335 insn = gen_call_nogp (addr, retaddr);
6dad5a56 2336 emit_call_insn (insn);
2ed4af6f 2337
599aedd9
RH
2338 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2339 ia64_reload_gp ();
2ed4af6f 2340}
16df4ee6
RH
2341
2342/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2343
2344 This differs from the generic code in that we know about the zero-extending
2345 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2346 also know that ld.acq+cmpxchg.rel equals a full barrier.
2347
2348 The loop we want to generate looks like
2349
2350 cmp_reg = mem;
2351 label:
2352 old_reg = cmp_reg;
2353 new_reg = cmp_reg op val;
2354 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2355 if (cmp_reg != old_reg)
2356 goto label;
2357
2358 Note that we only do the plain load from memory once. Subsequent
2359 iterations use the value loaded by the compare-and-swap pattern. */
2360
2361void
2362ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
28875d67 2363 rtx old_dst, rtx new_dst, enum memmodel model)
16df4ee6 2364{
ef4bddc2 2365 machine_mode mode = GET_MODE (mem);
16df4ee6
RH
2366 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2367 enum insn_code icode;
2368
2369 /* Special case for using fetchadd. */
dca13767
JJ
2370 if ((mode == SImode || mode == DImode)
2371 && (code == PLUS || code == MINUS)
2372 && fetchadd_operand (val, mode))
16df4ee6 2373 {
dca13767
JJ
2374 if (code == MINUS)
2375 val = GEN_INT (-INTVAL (val));
2376
16df4ee6
RH
2377 if (!old_dst)
2378 old_dst = gen_reg_rtx (mode);
2379
28875d67
RH
2380 switch (model)
2381 {
2382 case MEMMODEL_ACQ_REL:
2383 case MEMMODEL_SEQ_CST:
46b35980 2384 case MEMMODEL_SYNC_SEQ_CST:
28875d67
RH
2385 emit_insn (gen_memory_barrier ());
2386 /* FALLTHRU */
2387 case MEMMODEL_RELAXED:
2388 case MEMMODEL_ACQUIRE:
46b35980 2389 case MEMMODEL_SYNC_ACQUIRE:
28875d67
RH
2390 case MEMMODEL_CONSUME:
2391 if (mode == SImode)
2392 icode = CODE_FOR_fetchadd_acq_si;
2393 else
2394 icode = CODE_FOR_fetchadd_acq_di;
2395 break;
2396 case MEMMODEL_RELEASE:
46b35980 2397 case MEMMODEL_SYNC_RELEASE:
28875d67
RH
2398 if (mode == SImode)
2399 icode = CODE_FOR_fetchadd_rel_si;
2400 else
2401 icode = CODE_FOR_fetchadd_rel_di;
2402 break;
2403
2404 default:
2405 gcc_unreachable ();
2406 }
16df4ee6 2407
16df4ee6
RH
2408 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2409
2410 if (new_dst)
2411 {
2412 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2413 true, OPTAB_WIDEN);
2414 if (new_reg != new_dst)
2415 emit_move_insn (new_dst, new_reg);
2416 }
2417 return;
2418 }
2419
2420 /* Because of the volatile mem read, we get an ld.acq, which is the
28875d67
RH
2421 front half of the full barrier. The end half is the cmpxchg.rel.
2422 For relaxed and release memory models, we don't need this. But we
2423 also don't bother trying to prevent it either. */
46b35980 2424 gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
28875d67 2425 || MEM_VOLATILE_P (mem));
16df4ee6
RH
2426
2427 old_reg = gen_reg_rtx (DImode);
2428 cmp_reg = gen_reg_rtx (DImode);
2429 label = gen_label_rtx ();
2430
2431 if (mode != DImode)
2432 {
2433 val = simplify_gen_subreg (DImode, val, mode, 0);
2434 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2435 }
2436 else
2437 emit_move_insn (cmp_reg, mem);
2438
2439 emit_label (label);
2440
2441 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2442 emit_move_insn (old_reg, cmp_reg);
2443 emit_move_insn (ar_ccv, cmp_reg);
2444
2445 if (old_dst)
2446 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2447
2448 new_reg = cmp_reg;
2449 if (code == NOT)
2450 {
974920dc
UB
2451 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2452 true, OPTAB_DIRECT);
2453 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
16df4ee6 2454 }
974920dc
UB
2455 else
2456 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2457 true, OPTAB_DIRECT);
16df4ee6
RH
2458
2459 if (mode != DImode)
2460 new_reg = gen_lowpart (mode, new_reg);
2461 if (new_dst)
2462 emit_move_insn (new_dst, new_reg);
2463
28875d67 2464 switch (model)
16df4ee6 2465 {
28875d67
RH
2466 case MEMMODEL_RELAXED:
2467 case MEMMODEL_ACQUIRE:
46b35980 2468 case MEMMODEL_SYNC_ACQUIRE:
28875d67
RH
2469 case MEMMODEL_CONSUME:
2470 switch (mode)
2471 {
4e10a5a7
RS
2472 case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2473 case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2474 case E_SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2475 case E_DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
28875d67
RH
2476 default:
2477 gcc_unreachable ();
2478 }
2479 break;
2480
2481 case MEMMODEL_RELEASE:
46b35980 2482 case MEMMODEL_SYNC_RELEASE:
28875d67
RH
2483 case MEMMODEL_ACQ_REL:
2484 case MEMMODEL_SEQ_CST:
46b35980 2485 case MEMMODEL_SYNC_SEQ_CST:
28875d67
RH
2486 switch (mode)
2487 {
4e10a5a7
RS
2488 case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2489 case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2490 case E_SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2491 case E_DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
28875d67
RH
2492 default:
2493 gcc_unreachable ();
2494 }
2495 break;
2496
16df4ee6
RH
2497 default:
2498 gcc_unreachable ();
2499 }
2500
2501 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2502
6819a463 2503 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
16df4ee6 2504}
809d4ef1 2505\f
3b572406
RH
2506/* Begin the assembly file. */
2507
1bc7c5b6 2508static void
9c808aad 2509ia64_file_start (void)
1bc7c5b6
ZW
2510{
2511 default_file_start ();
2512 emit_safe_across_calls ();
2513}
2514
3b572406 2515void
9c808aad 2516emit_safe_across_calls (void)
3b572406
RH
2517{
2518 unsigned int rs, re;
2519 int out_state;
2520
2521 rs = 1;
2522 out_state = 0;
2523 while (1)
2524 {
2525 while (rs < 64 && call_used_regs[PR_REG (rs)])
2526 rs++;
2527 if (rs >= 64)
2528 break;
2529 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2530 continue;
2531 if (out_state == 0)
2532 {
1bc7c5b6 2533 fputs ("\t.pred.safe_across_calls ", asm_out_file);
3b572406
RH
2534 out_state = 1;
2535 }
2536 else
1bc7c5b6 2537 fputc (',', asm_out_file);
3b572406 2538 if (re == rs + 1)
1bc7c5b6 2539 fprintf (asm_out_file, "p%u", rs);
3b572406 2540 else
1bc7c5b6 2541 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
3b572406
RH
2542 rs = re + 1;
2543 }
2544 if (out_state)
1bc7c5b6 2545 fputc ('\n', asm_out_file);
3b572406
RH
2546}
2547
812b587e
SE
2548/* Globalize a declaration. */
2549
2550static void
2551ia64_globalize_decl_name (FILE * stream, tree decl)
2552{
2553 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2554 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2555 if (version_attr)
2556 {
2557 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2558 const char *p = TREE_STRING_POINTER (v);
2559 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2560 }
2561 targetm.asm_out.globalize_label (stream, name);
2562 if (TREE_CODE (decl) == FUNCTION_DECL)
2563 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2564}
2565
97e242b0
RH
2566/* Helper function for ia64_compute_frame_size: find an appropriate general
2567 register to spill some special register to. SPECIAL_SPILL_MASK contains
2568 bits in GR0 to GR31 that have already been allocated by this routine.
2569 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 2570
97e242b0 2571static int
6fb5fa3c 2572find_gr_spill (enum ia64_frame_regs r, int try_locals)
97e242b0
RH
2573{
2574 int regno;
2575
6fb5fa3c
DB
2576 if (emitted_frame_related_regs[r] != 0)
2577 {
2578 regno = emitted_frame_related_regs[r];
2951f79b
JJ
2579 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2580 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
6fb5fa3c 2581 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
416ff32e 2582 else if (crtl->is_leaf
6fb5fa3c
DB
2583 && regno >= GR_REG (1) && regno <= GR_REG (31))
2584 current_frame_info.gr_used_mask |= 1 << regno;
2585
2586 return regno;
2587 }
2588
97e242b0
RH
2589 /* If this is a leaf function, first try an otherwise unused
2590 call-clobbered register. */
416ff32e 2591 if (crtl->is_leaf)
97e242b0
RH
2592 {
2593 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2594 if (! df_regs_ever_live_p (regno)
97e242b0
RH
2595 && call_used_regs[regno]
2596 && ! fixed_regs[regno]
2597 && ! global_regs[regno]
6fb5fa3c
DB
2598 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2599 && ! is_emitted (regno))
97e242b0
RH
2600 {
2601 current_frame_info.gr_used_mask |= 1 << regno;
2602 return regno;
2603 }
2604 }
2605
2606 if (try_locals)
2607 {
2608 regno = current_frame_info.n_local_regs;
9502c558
JW
2609 /* If there is a frame pointer, then we can't use loc79, because
2610 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2611 reg_name switching code in ia64_expand_prologue. */
2951f79b
JJ
2612 while (regno < (80 - frame_pointer_needed))
2613 if (! is_emitted (LOC_REG (regno++)))
2614 {
2615 current_frame_info.n_local_regs = regno;
2616 return LOC_REG (regno - 1);
2617 }
97e242b0
RH
2618 }
2619
2620 /* Failed to find a general register to spill to. Must use stack. */
2621 return 0;
2622}
2623
2624/* In order to make for nice schedules, we try to allocate every temporary
2625 to a different register. We must of course stay away from call-saved,
2626 fixed, and global registers. We must also stay away from registers
2627 allocated in current_frame_info.gr_used_mask, since those include regs
2628 used all through the prologue.
2629
2630 Any register allocated here must be used immediately. The idea is to
2631 aid scheduling, not to solve data flow problems. */
2632
2633static int last_scratch_gr_reg;
2634
2635static int
9c808aad 2636next_scratch_gr_reg (void)
97e242b0
RH
2637{
2638 int i, regno;
2639
2640 for (i = 0; i < 32; ++i)
2641 {
2642 regno = (last_scratch_gr_reg + i + 1) & 31;
2643 if (call_used_regs[regno]
2644 && ! fixed_regs[regno]
2645 && ! global_regs[regno]
2646 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2647 {
2648 last_scratch_gr_reg = regno;
2649 return regno;
2650 }
2651 }
2652
2653 /* There must be _something_ available. */
e820471b 2654 gcc_unreachable ();
97e242b0
RH
2655}
2656
2657/* Helper function for ia64_compute_frame_size, called through
2658 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2659
2660static void
9c808aad 2661mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
c65ebc55 2662{
97e242b0
RH
2663 unsigned int regno = REGNO (reg);
2664 if (regno < 32)
f95e79cc 2665 {
462a99aa 2666 unsigned int i, n = REG_NREGS (reg);
f95e79cc
RH
2667 for (i = 0; i < n; ++i)
2668 current_frame_info.gr_used_mask |= 1 << (regno + i);
2669 }
c65ebc55
JW
2670}
2671
6fb5fa3c 2672
c65ebc55
JW
2673/* Returns the number of bytes offset between the frame pointer and the stack
2674 pointer for the current function. SIZE is the number of bytes of space
2675 needed for local variables. */
97e242b0
RH
2676
2677static void
9c808aad 2678ia64_compute_frame_size (HOST_WIDE_INT size)
c65ebc55 2679{
97e242b0
RH
2680 HOST_WIDE_INT total_size;
2681 HOST_WIDE_INT spill_size = 0;
2682 HOST_WIDE_INT extra_spill_size = 0;
2683 HOST_WIDE_INT pretend_args_size;
c65ebc55 2684 HARD_REG_SET mask;
97e242b0
RH
2685 int n_spilled = 0;
2686 int spilled_gr_p = 0;
2687 int spilled_fr_p = 0;
2688 unsigned int regno;
2951f79b
JJ
2689 int min_regno;
2690 int max_regno;
97e242b0 2691 int i;
c65ebc55 2692
97e242b0
RH
2693 if (current_frame_info.initialized)
2694 return;
294dac80 2695
97e242b0 2696 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
2697 CLEAR_HARD_REG_SET (mask);
2698
97e242b0
RH
2699 /* Don't allocate scratches to the return register. */
2700 diddle_return_value (mark_reg_gr_used_mask, NULL);
2701
2702 /* Don't allocate scratches to the EH scratch registers. */
2703 if (cfun->machine->ia64_eh_epilogue_sp)
2704 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2705 if (cfun->machine->ia64_eh_epilogue_bsp)
2706 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 2707
7b84aac0 2708 /* Static stack checking uses r2 and r3. */
9c1b56c4
JL
2709 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
2710 || flag_stack_clash_protection)
7b84aac0
EB
2711 current_frame_info.gr_used_mask |= 0xc;
2712
97e242b0
RH
2713 /* Find the size of the register stack frame. We have only 80 local
2714 registers, because we reserve 8 for the inputs and 8 for the
2715 outputs. */
2716
2717 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2718 since we'll be adjusting that down later. */
2719 regno = LOC_REG (78) + ! frame_pointer_needed;
2720 for (; regno >= LOC_REG (0); regno--)
6fb5fa3c 2721 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
97e242b0
RH
2722 break;
2723 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 2724
3f67ac08
DM
2725 /* For functions marked with the syscall_linkage attribute, we must mark
2726 all eight input registers as in use, so that locals aren't visible to
2727 the caller. */
2728
2729 if (cfun->machine->n_varargs > 0
2730 || lookup_attribute ("syscall_linkage",
2731 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
2732 current_frame_info.n_input_regs = 8;
2733 else
2734 {
2735 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
6fb5fa3c 2736 if (df_regs_ever_live_p (regno))
97e242b0
RH
2737 break;
2738 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2739 }
2740
2741 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
6fb5fa3c 2742 if (df_regs_ever_live_p (regno))
97e242b0
RH
2743 break;
2744 i = regno - OUT_REG (0) + 1;
2745
d26afa4f 2746#ifndef PROFILE_HOOK
97e242b0 2747 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 2748 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
2749 profiling, we need two output registers for the two bb_init_trace_func
2750 arguments. */
e3b5732b 2751 if (crtl->profile)
97e242b0 2752 i = MAX (i, 1);
d26afa4f 2753#endif
97e242b0
RH
2754 current_frame_info.n_output_regs = i;
2755
2756 /* ??? No rotating register support yet. */
2757 current_frame_info.n_rotate_regs = 0;
2758
2759 /* Discover which registers need spilling, and how much room that
9c808aad 2760 will take. Begin with floating point and general registers,
97e242b0
RH
2761 which will always wind up on the stack. */
2762
2763 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
6fb5fa3c 2764 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2765 {
2766 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2767 spill_size += 16;
2768 n_spilled += 1;
2769 spilled_fr_p = 1;
c65ebc55
JW
2770 }
2771
97e242b0 2772 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2773 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2774 {
2775 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2776 spill_size += 8;
2777 n_spilled += 1;
2778 spilled_gr_p = 1;
c65ebc55
JW
2779 }
2780
97e242b0 2781 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
6fb5fa3c 2782 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2783 {
2784 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2785 spill_size += 8;
2786 n_spilled += 1;
c65ebc55
JW
2787 }
2788
97e242b0
RH
2789 /* Now come all special registers that might get saved in other
2790 general registers. */
9c808aad 2791
97e242b0
RH
2792 if (frame_pointer_needed)
2793 {
6fb5fa3c 2794 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
0c35f902
JW
2795 /* If we did not get a register, then we take LOC79. This is guaranteed
2796 to be free, even if regs_ever_live is already set, because this is
2797 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2798 as we don't count loc79 above. */
6fb5fa3c 2799 if (current_frame_info.r[reg_fp] == 0)
0c35f902 2800 {
6fb5fa3c
DB
2801 current_frame_info.r[reg_fp] = LOC_REG (79);
2802 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
0c35f902 2803 }
97e242b0
RH
2804 }
2805
416ff32e 2806 if (! crtl->is_leaf)
c65ebc55 2807 {
97e242b0
RH
2808 /* Emit a save of BR0 if we call other functions. Do this even
2809 if this function doesn't return, as EH depends on this to be
2810 able to unwind the stack. */
2811 SET_HARD_REG_BIT (mask, BR_REG (0));
2812
6fb5fa3c
DB
2813 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2814 if (current_frame_info.r[reg_save_b0] == 0)
97e242b0 2815 {
ae1e2d4c 2816 extra_spill_size += 8;
97e242b0
RH
2817 n_spilled += 1;
2818 }
2819
2820 /* Similarly for ar.pfs. */
2821 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2822 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2823 if (current_frame_info.r[reg_save_ar_pfs] == 0)
97e242b0
RH
2824 {
2825 extra_spill_size += 8;
2826 n_spilled += 1;
2827 }
599aedd9
RH
2828
2829 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2830 registers are clobbered, so we fall back to the stack. */
6fb5fa3c 2831 current_frame_info.r[reg_save_gp]
e3b5732b 2832 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
6fb5fa3c 2833 if (current_frame_info.r[reg_save_gp] == 0)
599aedd9
RH
2834 {
2835 SET_HARD_REG_BIT (mask, GR_REG (1));
2836 spill_size += 8;
2837 n_spilled += 1;
2838 }
c65ebc55
JW
2839 }
2840 else
97e242b0 2841 {
6fb5fa3c 2842 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
97e242b0
RH
2843 {
2844 SET_HARD_REG_BIT (mask, BR_REG (0));
ae1e2d4c 2845 extra_spill_size += 8;
97e242b0
RH
2846 n_spilled += 1;
2847 }
f5bdba44 2848
6fb5fa3c 2849 if (df_regs_ever_live_p (AR_PFS_REGNUM))
f5bdba44
RH
2850 {
2851 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2852 current_frame_info.r[reg_save_ar_pfs]
2853 = find_gr_spill (reg_save_ar_pfs, 1);
2854 if (current_frame_info.r[reg_save_ar_pfs] == 0)
f5bdba44
RH
2855 {
2856 extra_spill_size += 8;
2857 n_spilled += 1;
2858 }
2859 }
97e242b0 2860 }
c65ebc55 2861
97e242b0
RH
2862 /* Unwind descriptor hackery: things are most efficient if we allocate
2863 consecutive GR save registers for RP, PFS, FP in that order. However,
2864 it is absolutely critical that FP get the only hard register that's
2865 guaranteed to be free, so we allocated it first. If all three did
2866 happen to be allocated hard regs, and are consecutive, rearrange them
6fb5fa3c
DB
2867 into the preferred order now.
2868
2869 If we have already emitted code for any of those registers,
2870 then it's already too late to change. */
2951f79b
JJ
2871 min_regno = MIN (current_frame_info.r[reg_fp],
2872 MIN (current_frame_info.r[reg_save_b0],
2873 current_frame_info.r[reg_save_ar_pfs]));
2874 max_regno = MAX (current_frame_info.r[reg_fp],
2875 MAX (current_frame_info.r[reg_save_b0],
2876 current_frame_info.r[reg_save_ar_pfs]));
2877 if (min_regno > 0
2878 && min_regno + 2 == max_regno
2879 && (current_frame_info.r[reg_fp] == min_regno + 1
2880 || current_frame_info.r[reg_save_b0] == min_regno + 1
2881 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2882 && (emitted_frame_related_regs[reg_save_b0] == 0
2883 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2884 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2885 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2886 && (emitted_frame_related_regs[reg_fp] == 0
2887 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
5527bf14 2888 {
2951f79b
JJ
2889 current_frame_info.r[reg_save_b0] = min_regno;
2890 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2891 current_frame_info.r[reg_fp] = min_regno + 2;
5527bf14
RH
2892 }
2893
97e242b0
RH
2894 /* See if we need to store the predicate register block. */
2895 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2896 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
97e242b0
RH
2897 break;
2898 if (regno <= PR_REG (63))
c65ebc55 2899 {
97e242b0 2900 SET_HARD_REG_BIT (mask, PR_REG (0));
6fb5fa3c
DB
2901 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2902 if (current_frame_info.r[reg_save_pr] == 0)
97e242b0
RH
2903 {
2904 extra_spill_size += 8;
2905 n_spilled += 1;
2906 }
2907
2908 /* ??? Mark them all as used so that register renaming and such
2909 are free to use them. */
2910 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2911 df_set_regs_ever_live (regno, true);
c65ebc55
JW
2912 }
2913
97e242b0 2914 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
2915 ar.unat as well. The check for existing liveness allows inline asm
2916 to touch ar.unat. */
2917 if (spilled_gr_p || cfun->machine->n_varargs
6fb5fa3c 2918 || df_regs_ever_live_p (AR_UNAT_REGNUM))
97e242b0 2919 {
6fb5fa3c 2920 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
97e242b0 2921 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
6fb5fa3c
DB
2922 current_frame_info.r[reg_save_ar_unat]
2923 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2924 if (current_frame_info.r[reg_save_ar_unat] == 0)
97e242b0
RH
2925 {
2926 extra_spill_size += 8;
2927 n_spilled += 1;
2928 }
2929 }
2930
6fb5fa3c 2931 if (df_regs_ever_live_p (AR_LC_REGNUM))
97e242b0
RH
2932 {
2933 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
6fb5fa3c
DB
2934 current_frame_info.r[reg_save_ar_lc]
2935 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2936 if (current_frame_info.r[reg_save_ar_lc] == 0)
97e242b0
RH
2937 {
2938 extra_spill_size += 8;
2939 n_spilled += 1;
2940 }
2941 }
2942
2943 /* If we have an odd number of words of pretend arguments written to
2944 the stack, then the FR save area will be unaligned. We round the
2945 size of this area up to keep things 16 byte aligned. */
2946 if (spilled_fr_p)
38173d38 2947 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
97e242b0 2948 else
38173d38 2949 pretend_args_size = crtl->args.pretend_args_size;
97e242b0
RH
2950
2951 total_size = (spill_size + extra_spill_size + size + pretend_args_size
38173d38 2952 + crtl->outgoing_args_size);
97e242b0
RH
2953 total_size = IA64_STACK_ALIGN (total_size);
2954
2955 /* We always use the 16-byte scratch area provided by the caller, but
2956 if we are a leaf function, there's no one to which we need to provide
44bd7f65
EB
2957 a scratch area. However, if the function allocates dynamic stack space,
2958 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2959 so we need to cope. */
2960 if (crtl->is_leaf && !cfun->calls_alloca)
97e242b0
RH
2961 total_size = MAX (0, total_size - 16);
2962
c65ebc55 2963 current_frame_info.total_size = total_size;
97e242b0
RH
2964 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2965 current_frame_info.spill_size = spill_size;
2966 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2967 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2968 current_frame_info.n_spilled = n_spilled;
c65ebc55 2969 current_frame_info.initialized = reload_completed;
97e242b0
RH
2970}
2971
7b5cbb57
AS
2972/* Worker function for TARGET_CAN_ELIMINATE. */
2973
2974bool
2975ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2976{
416ff32e 2977 return (to == BR_REG (0) ? crtl->is_leaf : true);
7b5cbb57
AS
2978}
2979
97e242b0
RH
2980/* Compute the initial difference between the specified pair of registers. */
2981
2982HOST_WIDE_INT
9c808aad 2983ia64_initial_elimination_offset (int from, int to)
97e242b0
RH
2984{
2985 HOST_WIDE_INT offset;
2986
2987 ia64_compute_frame_size (get_frame_size ());
2988 switch (from)
2989 {
2990 case FRAME_POINTER_REGNUM:
e820471b 2991 switch (to)
97e242b0 2992 {
e820471b 2993 case HARD_FRAME_POINTER_REGNUM:
44bd7f65
EB
2994 offset = -current_frame_info.total_size;
2995 if (!crtl->is_leaf || cfun->calls_alloca)
2996 offset += 16 + crtl->outgoing_args_size;
e820471b
NS
2997 break;
2998
2999 case STACK_POINTER_REGNUM:
44bd7f65
EB
3000 offset = 0;
3001 if (!crtl->is_leaf || cfun->calls_alloca)
3002 offset += 16 + crtl->outgoing_args_size;
e820471b
NS
3003 break;
3004
3005 default:
3006 gcc_unreachable ();
97e242b0 3007 }
97e242b0 3008 break;
c65ebc55 3009
97e242b0
RH
3010 case ARG_POINTER_REGNUM:
3011 /* Arguments start above the 16 byte save area, unless stdarg
3012 in which case we store through the 16 byte save area. */
e820471b
NS
3013 switch (to)
3014 {
3015 case HARD_FRAME_POINTER_REGNUM:
38173d38 3016 offset = 16 - crtl->args.pretend_args_size;
e820471b
NS
3017 break;
3018
3019 case STACK_POINTER_REGNUM:
3020 offset = (current_frame_info.total_size
38173d38 3021 + 16 - crtl->args.pretend_args_size);
e820471b
NS
3022 break;
3023
3024 default:
3025 gcc_unreachable ();
3026 }
97e242b0
RH
3027 break;
3028
97e242b0 3029 default:
e820471b 3030 gcc_unreachable ();
97e242b0
RH
3031 }
3032
3033 return offset;
c65ebc55
JW
3034}
3035
97e242b0
RH
3036/* If there are more than a trivial number of register spills, we use
3037 two interleaved iterators so that we can get two memory references
3038 per insn group.
3039
3040 In order to simplify things in the prologue and epilogue expanders,
3041 we use helper functions to fix up the memory references after the
3042 fact with the appropriate offsets to a POST_MODIFY memory mode.
3043 The following data structure tracks the state of the two iterators
3044 while insns are being emitted. */
3045
3046struct spill_fill_data
c65ebc55 3047{
dd3d2b35 3048 rtx_insn *init_after; /* point at which to emit initializations */
97e242b0
RH
3049 rtx init_reg[2]; /* initial base register */
3050 rtx iter_reg[2]; /* the iterator registers */
3051 rtx *prev_addr[2]; /* address of last memory use */
dd3d2b35 3052 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
3053 HOST_WIDE_INT prev_off[2]; /* last offset */
3054 int n_iter; /* number of iterators in use */
3055 int next_iter; /* next iterator to use */
3056 unsigned int save_gr_used_mask;
3057};
3058
3059static struct spill_fill_data spill_fill_data;
c65ebc55 3060
97e242b0 3061static void
9c808aad 3062setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
3063{
3064 int i;
3065
3066 spill_fill_data.init_after = get_last_insn ();
3067 spill_fill_data.init_reg[0] = init_reg;
3068 spill_fill_data.init_reg[1] = init_reg;
3069 spill_fill_data.prev_addr[0] = NULL;
3070 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
3071 spill_fill_data.prev_insn[0] = NULL;
3072 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
3073 spill_fill_data.prev_off[0] = cfa_off;
3074 spill_fill_data.prev_off[1] = cfa_off;
3075 spill_fill_data.next_iter = 0;
3076 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3077
3078 spill_fill_data.n_iter = 1 + (n_spills > 2);
3079 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 3080 {
97e242b0
RH
3081 int regno = next_scratch_gr_reg ();
3082 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3083 current_frame_info.gr_used_mask |= 1 << regno;
3084 }
3085}
3086
3087static void
9c808aad 3088finish_spill_pointers (void)
97e242b0
RH
3089{
3090 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3091}
c65ebc55 3092
97e242b0 3093static rtx
9c808aad 3094spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
3095{
3096 int iter = spill_fill_data.next_iter;
3097 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3098 rtx disp_rtx = GEN_INT (disp);
3099 rtx mem;
3100
3101 if (spill_fill_data.prev_addr[iter])
3102 {
13f70342 3103 if (satisfies_constraint_N (disp_rtx))
703cf211
BS
3104 {
3105 *spill_fill_data.prev_addr[iter]
3106 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3107 gen_rtx_PLUS (DImode,
3108 spill_fill_data.iter_reg[iter],
3109 disp_rtx));
bbbbb16a
ILT
3110 add_reg_note (spill_fill_data.prev_insn[iter],
3111 REG_INC, spill_fill_data.iter_reg[iter]);
703cf211 3112 }
c65ebc55
JW
3113 else
3114 {
97e242b0 3115 /* ??? Could use register post_modify for loads. */
13f70342 3116 if (!satisfies_constraint_I (disp_rtx))
97e242b0
RH
3117 {
3118 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3119 emit_move_insn (tmp, disp_rtx);
3120 disp_rtx = tmp;
3121 }
3122 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3123 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 3124 }
97e242b0
RH
3125 }
3126 /* Micro-optimization: if we've created a frame pointer, it's at
3127 CFA 0, which may allow the real iterator to be initialized lower,
3128 slightly increasing parallelism. Also, if there are few saves
3129 it may eliminate the iterator entirely. */
3130 else if (disp == 0
3131 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3132 && frame_pointer_needed)
3133 {
3134 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 3135 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
3136 return mem;
3137 }
3138 else
3139 {
dd3d2b35
DM
3140 rtx seq;
3141 rtx_insn *insn;
809d4ef1 3142
97e242b0
RH
3143 if (disp == 0)
3144 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3145 spill_fill_data.init_reg[iter]);
3146 else
c65ebc55 3147 {
97e242b0
RH
3148 start_sequence ();
3149
13f70342 3150 if (!satisfies_constraint_I (disp_rtx))
c65ebc55 3151 {
97e242b0
RH
3152 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3153 emit_move_insn (tmp, disp_rtx);
3154 disp_rtx = tmp;
c65ebc55 3155 }
97e242b0
RH
3156
3157 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3158 spill_fill_data.init_reg[iter],
3159 disp_rtx));
3160
2f937369 3161 seq = get_insns ();
97e242b0 3162 end_sequence ();
c65ebc55 3163 }
809d4ef1 3164
97e242b0
RH
3165 /* Careful for being the first insn in a sequence. */
3166 if (spill_fill_data.init_after)
892a4e60 3167 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 3168 else
bc08aefe 3169 {
dd3d2b35 3170 rtx_insn *first = get_insns ();
bc08aefe 3171 if (first)
892a4e60 3172 insn = emit_insn_before (seq, first);
bc08aefe 3173 else
892a4e60 3174 insn = emit_insn (seq);
bc08aefe 3175 }
892a4e60 3176 spill_fill_data.init_after = insn;
97e242b0 3177 }
c65ebc55 3178
97e242b0 3179 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 3180
97e242b0
RH
3181 /* ??? Not all of the spills are for varargs, but some of them are.
3182 The rest of the spills belong in an alias set of their own. But
3183 it doesn't actually hurt to include them here. */
ba4828e0 3184 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 3185
97e242b0
RH
3186 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3187 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 3188
97e242b0
RH
3189 if (++iter >= spill_fill_data.n_iter)
3190 iter = 0;
3191 spill_fill_data.next_iter = iter;
c65ebc55 3192
97e242b0
RH
3193 return mem;
3194}
5527bf14 3195
97e242b0 3196static void
9c808aad
AJ
3197do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3198 rtx frame_reg)
97e242b0 3199{
703cf211 3200 int iter = spill_fill_data.next_iter;
dd3d2b35
DM
3201 rtx mem;
3202 rtx_insn *insn;
5527bf14 3203
97e242b0 3204 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 3205 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 3206 spill_fill_data.prev_insn[iter] = insn;
5527bf14 3207
97e242b0
RH
3208 if (frame_reg)
3209 {
3210 rtx base;
3211 HOST_WIDE_INT off;
3212
3213 RTX_FRAME_RELATED_P (insn) = 1;
3214
9c808aad 3215 /* Don't even pretend that the unwind code can intuit its way
97e242b0
RH
3216 through a pair of interleaved post_modify iterators. Just
3217 provide the correct answer. */
3218
3219 if (frame_pointer_needed)
3220 {
3221 base = hard_frame_pointer_rtx;
3222 off = - cfa_off;
5527bf14 3223 }
97e242b0
RH
3224 else
3225 {
3226 base = stack_pointer_rtx;
3227 off = current_frame_info.total_size - cfa_off;
3228 }
3229
5c255b57 3230 add_reg_note (insn, REG_CFA_OFFSET,
f7df4a84 3231 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
0a81f074
RS
3232 plus_constant (Pmode,
3233 base, off)),
bbbbb16a 3234 frame_reg));
c65ebc55
JW
3235 }
3236}
3237
97e242b0 3238static void
9c808aad 3239do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
97e242b0 3240{
703cf211 3241 int iter = spill_fill_data.next_iter;
dd3d2b35 3242 rtx_insn *insn;
703cf211
BS
3243
3244 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3245 GEN_INT (cfa_off)));
3246 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
3247}
3248
870f9ec0
RH
3249/* Wrapper functions that discards the CONST_INT spill offset. These
3250 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 3251 use a consistent function interface. */
870f9ec0
RH
3252
3253static rtx
9c808aad 3254gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3255{
3256 return gen_movdi (dest, src);
3257}
3258
3259static rtx
9c808aad 3260gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3261{
3262 return gen_fr_spill (dest, src);
3263}
3264
3265static rtx
9c808aad 3266gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3267{
3268 return gen_fr_restore (dest, src);
3269}
c65ebc55 3270
7b84aac0
EB
3271#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3272
3273/* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3274#define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3275
3276/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
0dca9cd8
EB
3277 inclusive. These are offsets from the current stack pointer. BS_SIZE
3278 is the size of the backing store. ??? This clobbers r2 and r3. */
7b84aac0
EB
3279
3280static void
0dca9cd8
EB
3281ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3282 int bs_size)
7b84aac0 3283{
7b84aac0
EB
3284 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3285 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
0dca9cd8
EB
3286 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3287
3288 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3289 of the Register Stack Engine. We also need to probe it after checking
3290 that the 2 stacks don't overlap. */
3291 emit_insn (gen_bsp_value (r3));
3292 emit_move_insn (r2, GEN_INT (-(first + size)));
3293
3294 /* Compare current value of BSP and SP registers. */
f7df4a84
RS
3295 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3296 r3, stack_pointer_rtx)));
0dca9cd8
EB
3297
3298 /* Compute the address of the probe for the Backing Store (which grows
3299 towards higher addresses). We probe only at the first offset of
3300 the next page because some OS (eg Linux/ia64) only extend the
3301 backing store when this specific address is hit (but generate a SEGV
3302 on other address). Page size is the worst case (4KB). The reserve
3303 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3304 Also compute the address of the last probe for the memory stack
3305 (which grows towards lower addresses). */
f7df4a84
RS
3306 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3307 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
0dca9cd8
EB
3308
3309 /* Compare them and raise SEGV if the former has topped the latter. */
3310 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3311 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
f7df4a84
RS
3312 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3313 r3, r2))));
3314 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
0dca9cd8
EB
3315 const0_rtx),
3316 const0_rtx));
3317 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3318 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3319 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3320 GEN_INT (11))));
7b84aac0
EB
3321
3322 /* Probe the Backing Store if necessary. */
3323 if (bs_size > 0)
3324 emit_stack_probe (r3);
3325
3326 /* Probe the memory stack if necessary. */
3327 if (size == 0)
3328 ;
3329
3330 /* See if we have a constant small number of probes to generate. If so,
3331 that's the easy case. */
3332 else if (size <= PROBE_INTERVAL)
3333 emit_stack_probe (r2);
3334
73866e0d 3335 /* The run-time loop is made up of 9 insns in the generic case while this
7b84aac0
EB
3336 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3337 else if (size <= 4 * PROBE_INTERVAL)
3338 {
3339 HOST_WIDE_INT i;
3340
3341 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
f7df4a84 3342 emit_insn (gen_rtx_SET (r2,
7b84aac0
EB
3343 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3344 emit_stack_probe (r2);
3345
3346 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3347 it exceeds SIZE. If only two probes are needed, this will not
3348 generate any code. Then probe at FIRST + SIZE. */
3349 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3350 {
f7df4a84 3351 emit_insn (gen_rtx_SET (r2,
f65e3801 3352 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
7b84aac0
EB
3353 emit_stack_probe (r2);
3354 }
3355
f7df4a84 3356 emit_insn (gen_rtx_SET (r2,
f65e3801 3357 plus_constant (Pmode, r2,
7b84aac0
EB
3358 (i - PROBE_INTERVAL) - size)));
3359 emit_stack_probe (r2);
3360 }
3361
3362 /* Otherwise, do the same as above, but in a loop. Note that we must be
3363 extra careful with variables wrapping around because we might be at
3364 the very top (or the very bottom) of the address space and we have
3365 to be able to handle this case properly; in particular, we use an
3366 equality test for the loop condition. */
3367 else
3368 {
3369 HOST_WIDE_INT rounded_size;
3370
3371 emit_move_insn (r2, GEN_INT (-first));
3372
3373
3374 /* Step 1: round SIZE to the previous multiple of the interval. */
3375
3376 rounded_size = size & -PROBE_INTERVAL;
3377
3378
3379 /* Step 2: compute initial and final value of the loop counter. */
3380
3381 /* TEST_ADDR = SP + FIRST. */
f7df4a84 3382 emit_insn (gen_rtx_SET (r2,
7b84aac0
EB
3383 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3384
3385 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3386 if (rounded_size > (1 << 21))
3387 {
3388 emit_move_insn (r3, GEN_INT (-rounded_size));
f7df4a84 3389 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
7b84aac0
EB
3390 }
3391 else
f7df4a84
RS
3392 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3393 GEN_INT (-rounded_size))));
7b84aac0
EB
3394
3395
3396 /* Step 3: the loop
3397
73866e0d 3398 do
7b84aac0
EB
3399 {
3400 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3401 probe at TEST_ADDR
3402 }
73866e0d 3403 while (TEST_ADDR != LAST_ADDR)
7b84aac0
EB
3404
3405 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3406 until it is equal to ROUNDED_SIZE. */
3407
3408 emit_insn (gen_probe_stack_range (r2, r2, r3));
3409
3410
3411 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3412 that SIZE is equal to ROUNDED_SIZE. */
3413
3414 /* TEMP = SIZE - ROUNDED_SIZE. */
3415 if (size != rounded_size)
3416 {
f7df4a84
RS
3417 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3418 rounded_size - size)));
7b84aac0
EB
3419 emit_stack_probe (r2);
3420 }
3421 }
3422
3423 /* Make sure nothing is scheduled before we are done. */
3424 emit_insn (gen_blockage ());
3425}
3426
3427/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3428 absolute addresses. */
3429
3430const char *
3431output_probe_stack_range (rtx reg1, rtx reg2)
3432{
3433 static int labelno = 0;
73866e0d 3434 char loop_lab[32];
7b84aac0
EB
3435 rtx xops[3];
3436
73866e0d 3437 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7b84aac0 3438
73866e0d 3439 /* Loop. */
7b84aac0
EB
3440 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3441
7b84aac0 3442 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
73866e0d 3443 xops[0] = reg1;
7b84aac0
EB
3444 xops[1] = GEN_INT (-PROBE_INTERVAL);
3445 output_asm_insn ("addl %0 = %1, %0", xops);
3446 fputs ("\t;;\n", asm_out_file);
3447
73866e0d 3448 /* Probe at TEST_ADDR. */
7b84aac0 3449 output_asm_insn ("probe.w.fault %0, 0", xops);
73866e0d
EB
3450
3451 /* Test if TEST_ADDR == LAST_ADDR. */
3452 xops[1] = reg2;
3453 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3454 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3455
3456 /* Branch. */
3457 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
7b84aac0
EB
3458 assemble_name_raw (asm_out_file, loop_lab);
3459 fputc ('\n', asm_out_file);
3460
7b84aac0
EB
3461 return "";
3462}
3463
c65ebc55
JW
3464/* Called after register allocation to add any instructions needed for the
3465 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 3466 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3467 to intermix instructions with the saves of the caller saved registers. In
3468 some cases, it might be necessary to emit a barrier instruction as the last
3469 insn to prevent such scheduling.
3470
3471 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
3472 so that the debug info generation code can handle them properly.
3473
073a8998 3474 The register save area is laid out like so:
97e242b0
RH
3475 cfa+16
3476 [ varargs spill area ]
3477 [ fr register spill area ]
3478 [ br register spill area ]
3479 [ ar register spill area ]
3480 [ pr register spill area ]
3481 [ gr register spill area ] */
c65ebc55
JW
3482
3483/* ??? Get inefficient code when the frame size is larger than can fit in an
3484 adds instruction. */
3485
c65ebc55 3486void
9c808aad 3487ia64_expand_prologue (void)
c65ebc55 3488{
dd3d2b35
DM
3489 rtx_insn *insn;
3490 rtx ar_pfs_save_reg, ar_unat_save_reg;
97e242b0
RH
3491 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3492 rtx reg, alt_reg;
3493
3494 ia64_compute_frame_size (get_frame_size ());
3495 last_scratch_gr_reg = 15;
3496
a11e0df4 3497 if (flag_stack_usage_info)
d3c12306
EB
3498 current_function_static_stack_size = current_frame_info.total_size;
3499
9c1b56c4
JL
3500 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
3501 || flag_stack_clash_protection)
0dca9cd8
EB
3502 {
3503 HOST_WIDE_INT size = current_frame_info.total_size;
3504 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3505 + current_frame_info.n_local_regs);
3506
3507 if (crtl->is_leaf && !cfun->calls_alloca)
3508 {
8c1dd970
JL
3509 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
3510 ia64_emit_probe_stack_range (get_stack_check_protect (),
3511 size - get_stack_check_protect (),
0dca9cd8 3512 bs_size);
8c1dd970
JL
3513 else if (size + bs_size > get_stack_check_protect ())
3514 ia64_emit_probe_stack_range (get_stack_check_protect (),
3515 0, bs_size);
0dca9cd8
EB
3516 }
3517 else if (size + bs_size > 0)
8c1dd970 3518 ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
0dca9cd8 3519 }
7b84aac0 3520
6fb5fa3c
DB
3521 if (dump_file)
3522 {
3523 fprintf (dump_file, "ia64 frame related registers "
3524 "recorded in current_frame_info.r[]:\n");
3525#define PRINTREG(a) if (current_frame_info.r[a]) \
3526 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3527 PRINTREG(reg_fp);
3528 PRINTREG(reg_save_b0);
3529 PRINTREG(reg_save_pr);
3530 PRINTREG(reg_save_ar_pfs);
3531 PRINTREG(reg_save_ar_unat);
3532 PRINTREG(reg_save_ar_lc);
3533 PRINTREG(reg_save_gp);
3534#undef PRINTREG
3535 }
3536
97e242b0
RH
3537 /* If there is no epilogue, then we don't need some prologue insns.
3538 We need to avoid emitting the dead prologue insns, because flow
3539 will complain about them. */
c65ebc55
JW
3540 if (optimize)
3541 {
97e242b0 3542 edge e;
9924d7d8 3543 edge_iterator ei;
97e242b0 3544
fefa31b5 3545 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
c65ebc55
JW
3546 if ((e->flags & EDGE_FAKE) == 0
3547 && (e->flags & EDGE_FALLTHRU) != 0)
3548 break;
3549 epilogue_p = (e != NULL);
3550 }
3551 else
3552 epilogue_p = 1;
3553
97e242b0
RH
3554 /* Set the local, input, and output register names. We need to do this
3555 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3556 half. If we use in/loc/out register names, then we get assembler errors
3557 in crtn.S because there is no alloc insn or regstk directive in there. */
3558 if (! TARGET_REG_NAMES)
3559 {
3560 int inputs = current_frame_info.n_input_regs;
3561 int locals = current_frame_info.n_local_regs;
3562 int outputs = current_frame_info.n_output_regs;
3563
3564 for (i = 0; i < inputs; i++)
3565 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3566 for (i = 0; i < locals; i++)
3567 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3568 for (i = 0; i < outputs; i++)
3569 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3570 }
c65ebc55 3571
97e242b0
RH
3572 /* Set the frame pointer register name. The regnum is logically loc79,
3573 but of course we'll not have allocated that many locals. Rather than
3574 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
3575 /* ??? This code means that we can never use one local register when
3576 there is a frame pointer. loc79 gets wasted in this case, as it is
3577 renamed to a register that will never be used. See also the try_locals
3578 code in find_gr_spill. */
6fb5fa3c 3579 if (current_frame_info.r[reg_fp])
97e242b0
RH
3580 {
3581 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3582 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3583 = reg_names[current_frame_info.r[reg_fp]];
3584 reg_names[current_frame_info.r[reg_fp]] = tmp;
97e242b0 3585 }
c65ebc55 3586
97e242b0
RH
3587 /* We don't need an alloc instruction if we've used no outputs or locals. */
3588 if (current_frame_info.n_local_regs == 0
2ed4af6f 3589 && current_frame_info.n_output_regs == 0
38173d38 3590 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
f5bdba44 3591 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
3592 {
3593 /* If there is no alloc, but there are input registers used, then we
3594 need a .regstk directive. */
3595 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3596 ar_pfs_save_reg = NULL_RTX;
3597 }
3598 else
3599 {
3600 current_frame_info.need_regstk = 0;
c65ebc55 3601
6fb5fa3c
DB
3602 if (current_frame_info.r[reg_save_ar_pfs])
3603 {
3604 regno = current_frame_info.r[reg_save_ar_pfs];
3605 reg_emitted (reg_save_ar_pfs);
3606 }
97e242b0
RH
3607 else
3608 regno = next_scratch_gr_reg ();
3609 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3610
9c808aad 3611 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
97e242b0
RH
3612 GEN_INT (current_frame_info.n_input_regs),
3613 GEN_INT (current_frame_info.n_local_regs),
3614 GEN_INT (current_frame_info.n_output_regs),
3615 GEN_INT (current_frame_info.n_rotate_regs)));
9f2ff8e5
RH
3616 if (current_frame_info.r[reg_save_ar_pfs])
3617 {
3618 RTX_FRAME_RELATED_P (insn) = 1;
3619 add_reg_note (insn, REG_CFA_REGISTER,
f7df4a84 3620 gen_rtx_SET (ar_pfs_save_reg,
9f2ff8e5
RH
3621 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3622 }
97e242b0 3623 }
c65ebc55 3624
97e242b0 3625 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 3626
26a110f5 3627 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
3628 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3629 stack_pointer_rtx, 0);
c65ebc55 3630
97e242b0
RH
3631 if (frame_pointer_needed)
3632 {
3633 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3634 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57
RH
3635
3636 /* Force the unwind info to recognize this as defining a new CFA,
3637 rather than some temp register setup. */
3638 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
97e242b0 3639 }
c65ebc55 3640
97e242b0
RH
3641 if (current_frame_info.total_size != 0)
3642 {
3643 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3644 rtx offset;
c65ebc55 3645
13f70342 3646 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3647 offset = frame_size_rtx;
3648 else
3649 {
3650 regno = next_scratch_gr_reg ();
9c808aad 3651 offset = gen_rtx_REG (DImode, regno);
97e242b0
RH
3652 emit_move_insn (offset, frame_size_rtx);
3653 }
c65ebc55 3654
97e242b0
RH
3655 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3656 stack_pointer_rtx, offset));
c65ebc55 3657
97e242b0
RH
3658 if (! frame_pointer_needed)
3659 {
3660 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3661 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 3662 gen_rtx_SET (stack_pointer_rtx,
5c255b57
RH
3663 gen_rtx_PLUS (DImode,
3664 stack_pointer_rtx,
3665 frame_size_rtx)));
97e242b0 3666 }
c65ebc55 3667
97e242b0
RH
3668 /* ??? At this point we must generate a magic insn that appears to
3669 modify the stack pointer, the frame pointer, and all spill
3670 iterators. This would allow the most scheduling freedom. For
3671 now, just hard stop. */
3672 emit_insn (gen_blockage ());
3673 }
c65ebc55 3674
97e242b0
RH
3675 /* Must copy out ar.unat before doing any integer spills. */
3676 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 3677 {
6fb5fa3c
DB
3678 if (current_frame_info.r[reg_save_ar_unat])
3679 {
3680 ar_unat_save_reg
3681 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3682 reg_emitted (reg_save_ar_unat);
3683 }
97e242b0 3684 else
c65ebc55 3685 {
97e242b0
RH
3686 alt_regno = next_scratch_gr_reg ();
3687 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3688 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 3689 }
c65ebc55 3690
97e242b0
RH
3691 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3692 insn = emit_move_insn (ar_unat_save_reg, reg);
5c255b57
RH
3693 if (current_frame_info.r[reg_save_ar_unat])
3694 {
3695 RTX_FRAME_RELATED_P (insn) = 1;
3696 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3697 }
97e242b0
RH
3698
3699 /* Even if we're not going to generate an epilogue, we still
3700 need to save the register so that EH works. */
6fb5fa3c 3701 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
d0e82870 3702 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
3703 }
3704 else
97e242b0
RH
3705 ar_unat_save_reg = NULL_RTX;
3706
3707 /* Spill all varargs registers. Do this before spilling any GR registers,
3708 since we want the UNAT bits for the GR registers to override the UNAT
3709 bits from varargs, which we don't care about. */
c65ebc55 3710
97e242b0
RH
3711 cfa_off = -16;
3712 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 3713 {
97e242b0 3714 reg = gen_rtx_REG (DImode, regno);
870f9ec0 3715 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 3716 }
c65ebc55 3717
97e242b0
RH
3718 /* Locate the bottom of the register save area. */
3719 cfa_off = (current_frame_info.spill_cfa_off
3720 + current_frame_info.spill_size
3721 + current_frame_info.extra_spill_size);
c65ebc55 3722
97e242b0
RH
3723 /* Save the predicate register block either in a register or in memory. */
3724 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3725 {
3726 reg = gen_rtx_REG (DImode, PR_REG (0));
6fb5fa3c 3727 if (current_frame_info.r[reg_save_pr] != 0)
1ff5b671 3728 {
6fb5fa3c
DB
3729 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3730 reg_emitted (reg_save_pr);
97e242b0 3731 insn = emit_move_insn (alt_reg, reg);
1ff5b671 3732
97e242b0
RH
3733 /* ??? Denote pr spill/fill by a DImode move that modifies all
3734 64 hard registers. */
1ff5b671 3735 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3736 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
46327bc5 3737
97e242b0
RH
3738 /* Even if we're not going to generate an epilogue, we still
3739 need to save the register so that EH works. */
3740 if (! epilogue_p)
d0e82870 3741 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
3742 }
3743 else
97e242b0
RH
3744 {
3745 alt_regno = next_scratch_gr_reg ();
3746 alt_reg = gen_rtx_REG (DImode, alt_regno);
3747 insn = emit_move_insn (alt_reg, reg);
870f9ec0 3748 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3749 cfa_off -= 8;
3750 }
c65ebc55
JW
3751 }
3752
97e242b0
RH
3753 /* Handle AR regs in numerical order. All of them get special handling. */
3754 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6fb5fa3c 3755 && current_frame_info.r[reg_save_ar_unat] == 0)
c65ebc55 3756 {
97e242b0 3757 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 3758 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 3759 cfa_off -= 8;
c65ebc55 3760 }
97e242b0
RH
3761
3762 /* The alloc insn already copied ar.pfs into a general register. The
3763 only thing we have to do now is copy that register to a stack slot
3764 if we'd not allocated a local register for the job. */
f5bdba44 3765 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
6fb5fa3c 3766 && current_frame_info.r[reg_save_ar_pfs] == 0)
c65ebc55 3767 {
97e242b0 3768 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 3769 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
3770 cfa_off -= 8;
3771 }
3772
3773 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3774 {
3775 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
6fb5fa3c 3776 if (current_frame_info.r[reg_save_ar_lc] != 0)
97e242b0 3777 {
6fb5fa3c
DB
3778 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3779 reg_emitted (reg_save_ar_lc);
97e242b0
RH
3780 insn = emit_move_insn (alt_reg, reg);
3781 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3782 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
97e242b0
RH
3783
3784 /* Even if we're not going to generate an epilogue, we still
3785 need to save the register so that EH works. */
3786 if (! epilogue_p)
d0e82870 3787 emit_insn (gen_prologue_use (alt_reg));
97e242b0 3788 }
c65ebc55
JW
3789 else
3790 {
97e242b0
RH
3791 alt_regno = next_scratch_gr_reg ();
3792 alt_reg = gen_rtx_REG (DImode, alt_regno);
3793 emit_move_insn (alt_reg, reg);
870f9ec0 3794 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3795 cfa_off -= 8;
3796 }
3797 }
3798
ae1e2d4c
AS
3799 /* Save the return pointer. */
3800 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3801 {
3802 reg = gen_rtx_REG (DImode, BR_REG (0));
6fb5fa3c 3803 if (current_frame_info.r[reg_save_b0] != 0)
ae1e2d4c 3804 {
6fb5fa3c
DB
3805 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3806 reg_emitted (reg_save_b0);
ae1e2d4c
AS
3807 insn = emit_move_insn (alt_reg, reg);
3808 RTX_FRAME_RELATED_P (insn) = 1;
f7df4a84 3809 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
ae1e2d4c
AS
3810
3811 /* Even if we're not going to generate an epilogue, we still
3812 need to save the register so that EH works. */
3813 if (! epilogue_p)
3814 emit_insn (gen_prologue_use (alt_reg));
3815 }
3816 else
3817 {
3818 alt_regno = next_scratch_gr_reg ();
3819 alt_reg = gen_rtx_REG (DImode, alt_regno);
3820 emit_move_insn (alt_reg, reg);
3821 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3822 cfa_off -= 8;
3823 }
3824 }
3825
6fb5fa3c 3826 if (current_frame_info.r[reg_save_gp])
599aedd9 3827 {
6fb5fa3c 3828 reg_emitted (reg_save_gp);
599aedd9 3829 insn = emit_move_insn (gen_rtx_REG (DImode,
6fb5fa3c 3830 current_frame_info.r[reg_save_gp]),
599aedd9 3831 pic_offset_table_rtx);
599aedd9
RH
3832 }
3833
97e242b0 3834 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3835 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3836 + current_frame_info.spill_size));
97e242b0
RH
3837
3838 /* Spill all general registers. */
3839 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3840 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3841 {
3842 reg = gen_rtx_REG (DImode, regno);
3843 do_spill (gen_gr_spill, reg, cfa_off, reg);
3844 cfa_off -= 8;
3845 }
3846
97e242b0
RH
3847 /* Spill the rest of the BR registers. */
3848 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3849 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3850 {
3851 alt_regno = next_scratch_gr_reg ();
3852 alt_reg = gen_rtx_REG (DImode, alt_regno);
3853 reg = gen_rtx_REG (DImode, regno);
3854 emit_move_insn (alt_reg, reg);
870f9ec0 3855 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3856 cfa_off -= 8;
3857 }
3858
3859 /* Align the frame and spill all FR registers. */
3860 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3861 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3862 {
e820471b 3863 gcc_assert (!(cfa_off & 15));
02befdf4 3864 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3865 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
3866 cfa_off -= 16;
3867 }
3868
e820471b 3869 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3870
3871 finish_spill_pointers ();
c65ebc55
JW
3872}
3873
8e7745dc
DR
3874/* Output the textual info surrounding the prologue. */
3875
3876void
3877ia64_start_function (FILE *file, const char *fnname,
3878 tree decl ATTRIBUTE_UNUSED)
3879{
4b12e93d
TG
3880#if TARGET_ABI_OPEN_VMS
3881 vms_start_function (fnname);
8e7745dc
DR
3882#endif
3883
3884 fputs ("\t.proc ", file);
3885 assemble_name (file, fnname);
3886 fputc ('\n', file);
3887 ASM_OUTPUT_LABEL (file, fnname);
3888}
3889
c65ebc55 3890/* Called after register allocation to add any instructions needed for the
5519a4f9 3891 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 3892 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3893 to intermix instructions with the saves of the caller saved registers. In
3894 some cases, it might be necessary to emit a barrier instruction as the last
3895 insn to prevent such scheduling. */
3896
3897void
9c808aad 3898ia64_expand_epilogue (int sibcall_p)
c65ebc55 3899{
dd3d2b35
DM
3900 rtx_insn *insn;
3901 rtx reg, alt_reg, ar_unat_save_reg;
97e242b0
RH
3902 int regno, alt_regno, cfa_off;
3903
3904 ia64_compute_frame_size (get_frame_size ());
3905
3906 /* If there is a frame pointer, then we use it instead of the stack
3907 pointer, so that the stack pointer does not need to be valid when
3908 the epilogue starts. See EXIT_IGNORE_STACK. */
3909 if (frame_pointer_needed)
3910 setup_spill_pointers (current_frame_info.n_spilled,
3911 hard_frame_pointer_rtx, 0);
3912 else
9c808aad 3913 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
97e242b0
RH
3914 current_frame_info.total_size);
3915
3916 if (current_frame_info.total_size != 0)
3917 {
3918 /* ??? At this point we must generate a magic insn that appears to
3919 modify the spill iterators and the frame pointer. This would
3920 allow the most scheduling freedom. For now, just hard stop. */
3921 emit_insn (gen_blockage ());
3922 }
3923
3924 /* Locate the bottom of the register save area. */
3925 cfa_off = (current_frame_info.spill_cfa_off
3926 + current_frame_info.spill_size
3927 + current_frame_info.extra_spill_size);
3928
3929 /* Restore the predicate registers. */
3930 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3931 {
6fb5fa3c
DB
3932 if (current_frame_info.r[reg_save_pr] != 0)
3933 {
3934 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3935 reg_emitted (reg_save_pr);
3936 }
97e242b0
RH
3937 else
3938 {
3939 alt_regno = next_scratch_gr_reg ();
3940 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3941 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3942 cfa_off -= 8;
3943 }
3944 reg = gen_rtx_REG (DImode, PR_REG (0));
3945 emit_move_insn (reg, alt_reg);
3946 }
3947
3948 /* Restore the application registers. */
3949
3950 /* Load the saved unat from the stack, but do not restore it until
3951 after the GRs have been restored. */
3952 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3953 {
6fb5fa3c
DB
3954 if (current_frame_info.r[reg_save_ar_unat] != 0)
3955 {
3956 ar_unat_save_reg
3957 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3958 reg_emitted (reg_save_ar_unat);
3959 }
97e242b0
RH
3960 else
3961 {
3962 alt_regno = next_scratch_gr_reg ();
3963 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3964 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 3965 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
3966 cfa_off -= 8;
3967 }
3968 }
3969 else
3970 ar_unat_save_reg = NULL_RTX;
9c808aad 3971
6fb5fa3c 3972 if (current_frame_info.r[reg_save_ar_pfs] != 0)
97e242b0 3973 {
6fb5fa3c
DB
3974 reg_emitted (reg_save_ar_pfs);
3975 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
97e242b0
RH
3976 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3977 emit_move_insn (reg, alt_reg);
3978 }
4e14f1f9 3979 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
c65ebc55 3980 {
97e242b0
RH
3981 alt_regno = next_scratch_gr_reg ();
3982 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3983 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3984 cfa_off -= 8;
3985 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3986 emit_move_insn (reg, alt_reg);
3987 }
3988
3989 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3990 {
6fb5fa3c
DB
3991 if (current_frame_info.r[reg_save_ar_lc] != 0)
3992 {
3993 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3994 reg_emitted (reg_save_ar_lc);
3995 }
97e242b0
RH
3996 else
3997 {
3998 alt_regno = next_scratch_gr_reg ();
3999 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 4000 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
4001 cfa_off -= 8;
4002 }
4003 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4004 emit_move_insn (reg, alt_reg);
4005 }
4006
ae1e2d4c
AS
4007 /* Restore the return pointer. */
4008 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4009 {
6fb5fa3c
DB
4010 if (current_frame_info.r[reg_save_b0] != 0)
4011 {
4012 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4013 reg_emitted (reg_save_b0);
4014 }
ae1e2d4c
AS
4015 else
4016 {
4017 alt_regno = next_scratch_gr_reg ();
4018 alt_reg = gen_rtx_REG (DImode, alt_regno);
4019 do_restore (gen_movdi_x, alt_reg, cfa_off);
4020 cfa_off -= 8;
4021 }
4022 reg = gen_rtx_REG (DImode, BR_REG (0));
4023 emit_move_insn (reg, alt_reg);
4024 }
4025
97e242b0 4026 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
4027 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4028 + current_frame_info.spill_size));
97e242b0 4029
599aedd9
RH
4030 /* The GP may be stored on the stack in the prologue, but it's
4031 never restored in the epilogue. Skip the stack slot. */
4032 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4033 cfa_off -= 8;
4034
97e242b0 4035 /* Restore all general registers. */
599aedd9 4036 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 4037 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 4038 {
97e242b0
RH
4039 reg = gen_rtx_REG (DImode, regno);
4040 do_restore (gen_gr_restore, reg, cfa_off);
4041 cfa_off -= 8;
0c96007e 4042 }
9c808aad 4043
ae1e2d4c 4044 /* Restore the branch registers. */
97e242b0
RH
4045 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4046 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 4047 {
97e242b0
RH
4048 alt_regno = next_scratch_gr_reg ();
4049 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 4050 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
4051 cfa_off -= 8;
4052 reg = gen_rtx_REG (DImode, regno);
4053 emit_move_insn (reg, alt_reg);
4054 }
c65ebc55 4055
97e242b0
RH
4056 /* Restore floating point registers. */
4057 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4058 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4059 {
e820471b 4060 gcc_assert (!(cfa_off & 15));
02befdf4 4061 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 4062 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 4063 cfa_off -= 16;
0c96007e 4064 }
97e242b0
RH
4065
4066 /* Restore ar.unat for real. */
4067 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4068 {
4069 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4070 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
4071 }
4072
e820471b 4073 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
4074
4075 finish_spill_pointers ();
c65ebc55 4076
c93646bd
JJ
4077 if (current_frame_info.total_size
4078 || cfun->machine->ia64_eh_epilogue_sp
4079 || frame_pointer_needed)
97e242b0
RH
4080 {
4081 /* ??? At this point we must generate a magic insn that appears to
4082 modify the spill iterators, the stack pointer, and the frame
4083 pointer. This would allow the most scheduling freedom. For now,
4084 just hard stop. */
4085 emit_insn (gen_blockage ());
4086 }
c65ebc55 4087
97e242b0
RH
4088 if (cfun->machine->ia64_eh_epilogue_sp)
4089 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4090 else if (frame_pointer_needed)
4091 {
4092 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4093 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 4094 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
97e242b0
RH
4095 }
4096 else if (current_frame_info.total_size)
0c96007e 4097 {
97e242b0
RH
4098 rtx offset, frame_size_rtx;
4099
4100 frame_size_rtx = GEN_INT (current_frame_info.total_size);
13f70342 4101 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
4102 offset = frame_size_rtx;
4103 else
4104 {
4105 regno = next_scratch_gr_reg ();
4106 offset = gen_rtx_REG (DImode, regno);
4107 emit_move_insn (offset, frame_size_rtx);
4108 }
4109
4110 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4111 offset));
4112
4113 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 4114 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 4115 gen_rtx_SET (stack_pointer_rtx,
5c255b57
RH
4116 gen_rtx_PLUS (DImode,
4117 stack_pointer_rtx,
4118 frame_size_rtx)));
0c96007e 4119 }
97e242b0
RH
4120
4121 if (cfun->machine->ia64_eh_epilogue_bsp)
4122 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
9c808aad 4123
2ed4af6f
RH
4124 if (! sibcall_p)
4125 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 4126 else
8206fc89
AM
4127 {
4128 int fp = GR_REG (2);
5c255b57
RH
4129 /* We need a throw away register here, r0 and r1 are reserved,
4130 so r2 is the first available call clobbered register. If
4131 there was a frame_pointer register, we may have swapped the
4132 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4133 sure we're using the string "r2" when emitting the register
4134 name for the assembler. */
6fb5fa3c
DB
4135 if (current_frame_info.r[reg_fp]
4136 && current_frame_info.r[reg_fp] == GR_REG (2))
8206fc89
AM
4137 fp = HARD_FRAME_POINTER_REGNUM;
4138
4139 /* We must emit an alloc to force the input registers to become output
4140 registers. Otherwise, if the callee tries to pass its parameters
4141 through to another call without an intervening alloc, then these
4142 values get lost. */
4143 /* ??? We don't need to preserve all input registers. We only need to
4144 preserve those input registers used as arguments to the sibling call.
4145 It is unclear how to compute that number here. */
4146 if (current_frame_info.n_input_regs != 0)
a8f5224e
DM
4147 {
4148 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
c2b40eba 4149
a8f5224e
DM
4150 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4151 const0_rtx, const0_rtx,
4152 n_inputs, const0_rtx));
4153 RTX_FRAME_RELATED_P (insn) = 1;
c2b40eba
RH
4154
4155 /* ??? We need to mark the alloc as frame-related so that it gets
4156 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4157 But there's nothing dwarf2 related to be done wrt the register
4158 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4159 the empty parallel means dwarf2out will not see anything. */
4160 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4161 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
a8f5224e 4162 }
8206fc89 4163 }
c65ebc55
JW
4164}
4165
97e242b0
RH
4166/* Return 1 if br.ret can do all the work required to return from a
4167 function. */
4168
4169int
9c808aad 4170ia64_direct_return (void)
97e242b0
RH
4171{
4172 if (reload_completed && ! frame_pointer_needed)
4173 {
4174 ia64_compute_frame_size (get_frame_size ());
4175
4176 return (current_frame_info.total_size == 0
4177 && current_frame_info.n_spilled == 0
6fb5fa3c
DB
4178 && current_frame_info.r[reg_save_b0] == 0
4179 && current_frame_info.r[reg_save_pr] == 0
4180 && current_frame_info.r[reg_save_ar_pfs] == 0
4181 && current_frame_info.r[reg_save_ar_unat] == 0
4182 && current_frame_info.r[reg_save_ar_lc] == 0);
97e242b0
RH
4183 }
4184 return 0;
4185}
4186
af1e5518
RH
4187/* Return the magic cookie that we use to hold the return address
4188 during early compilation. */
4189
4190rtx
9c808aad 4191ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
af1e5518
RH
4192{
4193 if (count != 0)
4194 return NULL;
4195 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4196}
4197
4198/* Split this value after reload, now that we know where the return
4199 address is saved. */
4200
4201void
9c808aad 4202ia64_split_return_addr_rtx (rtx dest)
af1e5518
RH
4203{
4204 rtx src;
4205
4206 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4207 {
6fb5fa3c
DB
4208 if (current_frame_info.r[reg_save_b0] != 0)
4209 {
4210 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4211 reg_emitted (reg_save_b0);
4212 }
af1e5518
RH
4213 else
4214 {
4215 HOST_WIDE_INT off;
4216 unsigned int regno;
13f70342 4217 rtx off_r;
af1e5518
RH
4218
4219 /* Compute offset from CFA for BR0. */
4220 /* ??? Must be kept in sync with ia64_expand_prologue. */
4221 off = (current_frame_info.spill_cfa_off
4222 + current_frame_info.spill_size);
4223 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4224 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4225 off -= 8;
4226
4227 /* Convert CFA offset to a register based offset. */
4228 if (frame_pointer_needed)
4229 src = hard_frame_pointer_rtx;
4230 else
4231 {
4232 src = stack_pointer_rtx;
4233 off += current_frame_info.total_size;
4234 }
4235
4236 /* Load address into scratch register. */
13f70342
RH
4237 off_r = GEN_INT (off);
4238 if (satisfies_constraint_I (off_r))
4239 emit_insn (gen_adddi3 (dest, src, off_r));
af1e5518
RH
4240 else
4241 {
13f70342 4242 emit_move_insn (dest, off_r);
af1e5518
RH
4243 emit_insn (gen_adddi3 (dest, src, dest));
4244 }
4245
4246 src = gen_rtx_MEM (Pmode, dest);
4247 }
4248 }
4249 else
4250 src = gen_rtx_REG (DImode, BR_REG (0));
4251
4252 emit_move_insn (dest, src);
4253}
4254
10c9f189 4255int
9c808aad 4256ia64_hard_regno_rename_ok (int from, int to)
10c9f189
RH
4257{
4258 /* Don't clobber any of the registers we reserved for the prologue. */
09639a83 4259 unsigned int r;
10c9f189 4260
6fb5fa3c
DB
4261 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4262 if (to == current_frame_info.r[r]
4263 || from == current_frame_info.r[r]
4264 || to == emitted_frame_related_regs[r]
4265 || from == emitted_frame_related_regs[r])
4266 return 0;
2130b7fb 4267
10c9f189
RH
4268 /* Don't use output registers outside the register frame. */
4269 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4270 return 0;
4271
4272 /* Retain even/oddness on predicate register pairs. */
4273 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4274 return (from & 1) == (to & 1);
4275
4276 return 1;
4277}
4278
c43f4279
RS
4279/* Implement TARGET_HARD_REGNO_NREGS.
4280
4281 ??? We say that BImode PR values require two registers. This allows us to
4282 easily store the normal and inverted values. We use CCImode to indicate
4283 a single predicate register. */
4284
4285static unsigned int
4286ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
4287{
4288 if (regno == PR_REG (0) && mode == DImode)
4289 return 64;
4290 if (PR_REGNO_P (regno) && (mode) == BImode)
4291 return 2;
4292 if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
4293 return 1;
4294 if (FR_REGNO_P (regno) && mode == XFmode)
4295 return 1;
4296 if (FR_REGNO_P (regno) && mode == RFmode)
4297 return 1;
4298 if (FR_REGNO_P (regno) && mode == XCmode)
4299 return 2;
4300 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
4301}
4302
f939c3e6
RS
4303/* Implement TARGET_HARD_REGNO_MODE_OK. */
4304
4305static bool
4306ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
4307{
4308 if (FR_REGNO_P (regno))
4309 return (GET_MODE_CLASS (mode) != MODE_CC
4310 && mode != BImode
4311 && mode != TFmode);
4312
4313 if (PR_REGNO_P (regno))
4314 return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
4315
4316 if (GR_REGNO_P (regno))
4317 return mode != XFmode && mode != XCmode && mode != RFmode;
4318
4319 if (AR_REGNO_P (regno))
4320 return mode == DImode;
4321
4322 if (BR_REGNO_P (regno))
4323 return mode == DImode;
4324
4325 return false;
4326}
4327
99e1629f
RS
4328/* Implement TARGET_MODES_TIEABLE_P.
4329
4330 Don't tie integer and FP modes, as that causes us to get integer registers
4331 allocated for FP instructions. XFmode only supported in FP registers so
4332 we can't tie it with any other modes. */
4333
4334static bool
4335ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
4336{
4337 return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
4338 && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
4339 == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
4340 && (mode1 == BImode) == (mode2 == BImode));
4341}
4342
301d03af
RS
4343/* Target hook for assembling integer objects. Handle word-sized
4344 aligned objects and detect the cases when @fptr is needed. */
4345
4346static bool
9c808aad 4347ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 4348{
b6a41a62 4349 if (size == POINTER_SIZE / BITS_PER_UNIT
301d03af
RS
4350 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4351 && GET_CODE (x) == SYMBOL_REF
1cdbd630 4352 && SYMBOL_REF_FUNCTION_P (x))
301d03af 4353 {
1b79dc38
DM
4354 static const char * const directive[2][2] = {
4355 /* 64-bit pointer */ /* 32-bit pointer */
4356 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4357 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4358 };
4359 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
301d03af
RS
4360 output_addr_const (asm_out_file, x);
4361 fputs (")\n", asm_out_file);
4362 return true;
4363 }
4364 return default_assemble_integer (x, size, aligned_p);
4365}
4366
c65ebc55
JW
4367/* Emit the function prologue. */
4368
08c148a8 4369static void
42776416 4370ia64_output_function_prologue (FILE *file)
c65ebc55 4371{
97e242b0
RH
4372 int mask, grsave, grsave_prev;
4373
4374 if (current_frame_info.need_regstk)
4375 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4376 current_frame_info.n_input_regs,
4377 current_frame_info.n_local_regs,
4378 current_frame_info.n_output_regs,
4379 current_frame_info.n_rotate_regs);
c65ebc55 4380
d5fabb58 4381 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
0c96007e
AM
4382 return;
4383
97e242b0 4384 /* Emit the .prologue directive. */
809d4ef1 4385
97e242b0
RH
4386 mask = 0;
4387 grsave = grsave_prev = 0;
6fb5fa3c 4388 if (current_frame_info.r[reg_save_b0] != 0)
0c96007e 4389 {
97e242b0 4390 mask |= 8;
6fb5fa3c 4391 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
97e242b0 4392 }
6fb5fa3c 4393 if (current_frame_info.r[reg_save_ar_pfs] != 0
97e242b0 4394 && (grsave_prev == 0
6fb5fa3c 4395 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
97e242b0
RH
4396 {
4397 mask |= 4;
4398 if (grsave_prev == 0)
6fb5fa3c
DB
4399 grsave = current_frame_info.r[reg_save_ar_pfs];
4400 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
0c96007e 4401 }
6fb5fa3c 4402 if (current_frame_info.r[reg_fp] != 0
97e242b0 4403 && (grsave_prev == 0
6fb5fa3c 4404 || current_frame_info.r[reg_fp] == grsave_prev + 1))
97e242b0
RH
4405 {
4406 mask |= 2;
4407 if (grsave_prev == 0)
4408 grsave = HARD_FRAME_POINTER_REGNUM;
6fb5fa3c 4409 grsave_prev = current_frame_info.r[reg_fp];
97e242b0 4410 }
6fb5fa3c 4411 if (current_frame_info.r[reg_save_pr] != 0
97e242b0 4412 && (grsave_prev == 0
6fb5fa3c 4413 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
97e242b0
RH
4414 {
4415 mask |= 1;
4416 if (grsave_prev == 0)
6fb5fa3c 4417 grsave = current_frame_info.r[reg_save_pr];
97e242b0
RH
4418 }
4419
738e7b39 4420 if (mask && TARGET_GNU_AS)
97e242b0
RH
4421 fprintf (file, "\t.prologue %d, %d\n", mask,
4422 ia64_dbx_register_number (grsave));
4423 else
4424 fputs ("\t.prologue\n", file);
4425
4426 /* Emit a .spill directive, if necessary, to relocate the base of
4427 the register spill area. */
4428 if (current_frame_info.spill_cfa_off != -16)
4429 fprintf (file, "\t.spill %ld\n",
4430 (long) (current_frame_info.spill_cfa_off
4431 + current_frame_info.spill_size));
c65ebc55
JW
4432}
4433
0186257f
JW
4434/* Emit the .body directive at the scheduled end of the prologue. */
4435
b4c25db2 4436static void
9c808aad 4437ia64_output_function_end_prologue (FILE *file)
0186257f 4438{
d5fabb58 4439 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
0186257f
JW
4440 return;
4441
4442 fputs ("\t.body\n", file);
4443}
4444
c65ebc55
JW
4445/* Emit the function epilogue. */
4446
08c148a8 4447static void
42776416 4448ia64_output_function_epilogue (FILE *)
c65ebc55 4449{
8a959ea5
RH
4450 int i;
4451
6fb5fa3c 4452 if (current_frame_info.r[reg_fp])
97e242b0
RH
4453 {
4454 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4455 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
4456 = reg_names[current_frame_info.r[reg_fp]];
4457 reg_names[current_frame_info.r[reg_fp]] = tmp;
4458 reg_emitted (reg_fp);
97e242b0
RH
4459 }
4460 if (! TARGET_REG_NAMES)
4461 {
97e242b0
RH
4462 for (i = 0; i < current_frame_info.n_input_regs; i++)
4463 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4464 for (i = 0; i < current_frame_info.n_local_regs; i++)
4465 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4466 for (i = 0; i < current_frame_info.n_output_regs; i++)
4467 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4468 }
8a959ea5 4469
97e242b0
RH
4470 current_frame_info.initialized = 0;
4471}
c65ebc55
JW
4472
4473int
9c808aad 4474ia64_dbx_register_number (int regno)
c65ebc55 4475{
97e242b0
RH
4476 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4477 from its home at loc79 to something inside the register frame. We
4478 must perform the same renumbering here for the debug info. */
6fb5fa3c 4479 if (current_frame_info.r[reg_fp])
97e242b0
RH
4480 {
4481 if (regno == HARD_FRAME_POINTER_REGNUM)
6fb5fa3c
DB
4482 regno = current_frame_info.r[reg_fp];
4483 else if (regno == current_frame_info.r[reg_fp])
97e242b0
RH
4484 regno = HARD_FRAME_POINTER_REGNUM;
4485 }
4486
4487 if (IN_REGNO_P (regno))
4488 return 32 + regno - IN_REG (0);
4489 else if (LOC_REGNO_P (regno))
4490 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4491 else if (OUT_REGNO_P (regno))
4492 return (32 + current_frame_info.n_input_regs
4493 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4494 else
4495 return regno;
c65ebc55
JW
4496}
4497
2a1211e5
RH
4498/* Implement TARGET_TRAMPOLINE_INIT.
4499
4500 The trampoline should set the static chain pointer to value placed
4501 into the trampoline and should branch to the specified routine.
4502 To make the normal indirect-subroutine calling convention work,
4503 the trampoline must look like a function descriptor; the first
4504 word being the target address and the second being the target's
4505 global pointer.
4506
4507 We abuse the concept of a global pointer by arranging for it
4508 to point to the data we need to load. The complete trampoline
4509 has the following form:
4510
4511 +-------------------+ \
4512 TRAMP: | __ia64_trampoline | |
4513 +-------------------+ > fake function descriptor
4514 | TRAMP+16 | |
4515 +-------------------+ /
4516 | target descriptor |
4517 +-------------------+
4518 | static link |
4519 +-------------------+
4520*/
4521
4522static void
4523ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
97e242b0 4524{
2a1211e5
RH
4525 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4526 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
97e242b0 4527
738e7b39
RK
4528 /* The Intel assembler requires that the global __ia64_trampoline symbol
4529 be declared explicitly */
4530 if (!TARGET_GNU_AS)
4531 {
4532 static bool declared_ia64_trampoline = false;
4533
4534 if (!declared_ia64_trampoline)
4535 {
4536 declared_ia64_trampoline = true;
b6a41a62
RK
4537 (*targetm.asm_out.globalize_label) (asm_out_file,
4538 "__ia64_trampoline");
738e7b39
RK
4539 }
4540 }
4541
5e89a381 4542 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2a1211e5 4543 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
5e89a381
SE
4544 fnaddr = convert_memory_address (Pmode, fnaddr);
4545 static_chain = convert_memory_address (Pmode, static_chain);
4546
97e242b0 4547 /* Load up our iterator. */
2a1211e5
RH
4548 addr_reg = copy_to_reg (addr);
4549 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
97e242b0
RH
4550
4551 /* The first two words are the fake descriptor:
4552 __ia64_trampoline, ADDR+16. */
f2972bf8
DR
4553 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4554 if (TARGET_ABI_OPEN_VMS)
4555 {
4556 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4557 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4558 relocation against function symbols to make it identical to the
4559 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4560 strict ELF and dereference to get the bare code address. */
4561 rtx reg = gen_reg_rtx (Pmode);
4562 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4563 emit_move_insn (reg, tramp);
4564 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4565 tramp = reg;
4566 }
2a1211e5 4567 emit_move_insn (m_tramp, tramp);
97e242b0 4568 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4569 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0 4570
0a81f074 4571 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
97e242b0 4572 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4573 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4574
4575 /* The third word is the target descriptor. */
2a1211e5 4576 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
97e242b0 4577 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4578 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4579
4580 /* The fourth word is the static chain. */
2a1211e5 4581 emit_move_insn (m_tramp, static_chain);
97e242b0 4582}
c65ebc55
JW
4583\f
4584/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
4585 for the last named argument which has type TYPE and mode MODE.
4586
4587 We generate the actual spill instructions during prologue generation. */
4588
351a758b 4589static void
ef4bddc2 4590ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
351a758b 4591 tree type, int * pretend_size,
9c808aad 4592 int second_time ATTRIBUTE_UNUSED)
c65ebc55 4593{
d5cc9181 4594 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
351a758b 4595
6c535c69 4596 /* Skip the current argument. */
d5cc9181 4597 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
c65ebc55 4598
351a758b 4599 if (next_cum.words < MAX_ARGUMENT_SLOTS)
26a110f5 4600 {
351a758b 4601 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
26a110f5
RH
4602 *pretend_size = n * UNITS_PER_WORD;
4603 cfun->machine->n_varargs = n;
4604 }
c65ebc55
JW
4605}
4606
4607/* Check whether TYPE is a homogeneous floating point aggregate. If
4608 it is, return the mode of the floating point type that appears
4609 in all leafs. If it is not, return VOIDmode.
4610
4611 An aggregate is a homogeneous floating point aggregate is if all
4612 fields/elements in it have the same floating point type (e.g,
3d6a9acd
RH
4613 SFmode). 128-bit quad-precision floats are excluded.
4614
4615 Variable sized aggregates should never arrive here, since we should
4616 have already decided to pass them by reference. Top-level zero-sized
4617 aggregates are excluded because our parallels crash the middle-end. */
c65ebc55 4618
ef4bddc2 4619static machine_mode
586de218 4620hfa_element_mode (const_tree type, bool nested)
c65ebc55 4621{
ef4bddc2
RS
4622 machine_mode element_mode = VOIDmode;
4623 machine_mode mode;
c65ebc55
JW
4624 enum tree_code code = TREE_CODE (type);
4625 int know_element_mode = 0;
4626 tree t;
4627
3d6a9acd
RH
4628 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4629 return VOIDmode;
4630
c65ebc55
JW
4631 switch (code)
4632 {
4633 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
0cc8f5c5 4634 case BOOLEAN_TYPE: case POINTER_TYPE:
c65ebc55 4635 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
5662a50d 4636 case LANG_TYPE: case FUNCTION_TYPE:
c65ebc55
JW
4637 return VOIDmode;
4638
4639 /* Fortran complex types are supposed to be HFAs, so we need to handle
4640 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4641 types though. */
4642 case COMPLEX_TYPE:
16448fd4 4643 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
02befdf4
ZW
4644 && TYPE_MODE (type) != TCmode)
4645 return GET_MODE_INNER (TYPE_MODE (type));
c65ebc55
JW
4646 else
4647 return VOIDmode;
4648
4649 case REAL_TYPE:
4650 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4651 mode if this is contained within an aggregate. */
02befdf4 4652 if (nested && TYPE_MODE (type) != TFmode)
c65ebc55
JW
4653 return TYPE_MODE (type);
4654 else
4655 return VOIDmode;
4656
4657 case ARRAY_TYPE:
46399021 4658 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
4659
4660 case RECORD_TYPE:
4661 case UNION_TYPE:
4662 case QUAL_UNION_TYPE:
910ad8de 4663 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
c65ebc55
JW
4664 {
4665 if (TREE_CODE (t) != FIELD_DECL)
4666 continue;
4667
4668 mode = hfa_element_mode (TREE_TYPE (t), 1);
4669 if (know_element_mode)
4670 {
4671 if (mode != element_mode)
4672 return VOIDmode;
4673 }
4674 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4675 return VOIDmode;
4676 else
4677 {
4678 know_element_mode = 1;
4679 element_mode = mode;
4680 }
4681 }
4682 return element_mode;
4683
4684 default:
4685 /* If we reach here, we probably have some front-end specific type
4686 that the backend doesn't know about. This can happen via the
4687 aggregate_value_p call in init_function_start. All we can do is
4688 ignore unknown tree types. */
4689 return VOIDmode;
4690 }
4691
4692 return VOIDmode;
4693}
4694
f57fc998
ZW
4695/* Return the number of words required to hold a quantity of TYPE and MODE
4696 when passed as an argument. */
4697static int
ef4bddc2 4698ia64_function_arg_words (const_tree type, machine_mode mode)
f57fc998
ZW
4699{
4700 int words;
4701
4702 if (mode == BLKmode)
4703 words = int_size_in_bytes (type);
4704 else
4705 words = GET_MODE_SIZE (mode);
4706
4707 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4708}
4709
4710/* Return the number of registers that should be skipped so the current
4711 argument (described by TYPE and WORDS) will be properly aligned.
4712
4713 Integer and float arguments larger than 8 bytes start at the next
4714 even boundary. Aggregates larger than 8 bytes start at the next
4715 even boundary if the aggregate has 16 byte alignment. Note that
4716 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4717 but are still to be aligned in registers.
4718
4719 ??? The ABI does not specify how to handle aggregates with
4720 alignment from 9 to 15 bytes, or greater than 16. We handle them
4721 all as if they had 16 byte alignment. Such aggregates can occur
4722 only if gcc extensions are used. */
4723static int
ffa88471
SE
4724ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4725 const_tree type, int words)
f57fc998 4726{
f2972bf8
DR
4727 /* No registers are skipped on VMS. */
4728 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
f57fc998
ZW
4729 return 0;
4730
4731 if (type
4732 && TREE_CODE (type) != INTEGER_TYPE
4733 && TREE_CODE (type) != REAL_TYPE)
4734 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4735 else
4736 return words > 1;
4737}
4738
c65ebc55
JW
4739/* Return rtx for register where argument is passed, or zero if it is passed
4740 on the stack. */
c65ebc55
JW
4741/* ??? 128-bit quad-precision floats are always passed in general
4742 registers. */
4743
ffa88471 4744static rtx
ef4bddc2 4745ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
ffa88471 4746 const_tree type, bool named, bool incoming)
c65ebc55 4747{
d5cc9181
JR
4748 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4749
c65ebc55 4750 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
f57fc998
ZW
4751 int words = ia64_function_arg_words (type, mode);
4752 int offset = ia64_function_arg_offset (cum, type, words);
ef4bddc2 4753 machine_mode hfa_mode = VOIDmode;
c65ebc55 4754
f2972bf8
DR
4755 /* For OPEN VMS, emit the instruction setting up the argument register here,
4756 when we know this will be together with the other arguments setup related
4757 insns. This is not the conceptually best place to do this, but this is
4758 the easiest as we have convenient access to cumulative args info. */
4759
4760 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4761 && named == 1)
4762 {
4763 unsigned HOST_WIDE_INT regval = cum->words;
4764 int i;
4765
4766 for (i = 0; i < 8; i++)
4767 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4768
4769 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4770 GEN_INT (regval));
4771 }
4772
c65ebc55
JW
4773 /* If all argument slots are used, then it must go on the stack. */
4774 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4775 return 0;
4776
472b8fdc
TG
4777 /* On OpenVMS argument is either in Rn or Fn. */
4778 if (TARGET_ABI_OPEN_VMS)
4779 {
4780 if (FLOAT_MODE_P (mode))
4781 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4782 else
4783 return gen_rtx_REG (mode, basereg + cum->words);
4784 }
4785
c65ebc55
JW
4786 /* Check for and handle homogeneous FP aggregates. */
4787 if (type)
4788 hfa_mode = hfa_element_mode (type, 0);
4789
4790 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4791 and unprototyped hfas are passed specially. */
4792 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4793 {
4794 rtx loc[16];
4795 int i = 0;
4796 int fp_regs = cum->fp_regs;
4797 int int_regs = cum->words + offset;
4798 int hfa_size = GET_MODE_SIZE (hfa_mode);
4799 int byte_size;
4800 int args_byte_size;
4801
4802 /* If prototyped, pass it in FR regs then GR regs.
4803 If not prototyped, pass it in both FR and GR regs.
4804
4805 If this is an SFmode aggregate, then it is possible to run out of
4806 FR regs while GR regs are still left. In that case, we pass the
4807 remaining part in the GR regs. */
4808
4809 /* Fill the FP regs. We do this always. We stop if we reach the end
4810 of the argument, the last FP register, or the last argument slot. */
4811
4812 byte_size = ((mode == BLKmode)
4813 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4814 args_byte_size = int_regs * UNITS_PER_WORD;
4815 offset = 0;
4816 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4817 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4818 {
4819 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4820 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4821 + fp_regs)),
4822 GEN_INT (offset));
c65ebc55
JW
4823 offset += hfa_size;
4824 args_byte_size += hfa_size;
4825 fp_regs++;
4826 }
4827
4828 /* If no prototype, then the whole thing must go in GR regs. */
4829 if (! cum->prototype)
4830 offset = 0;
4831 /* If this is an SFmode aggregate, then we might have some left over
4832 that needs to go in GR regs. */
4833 else if (byte_size != offset)
4834 int_regs += offset / UNITS_PER_WORD;
4835
4836 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4837
4838 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4839 {
ef4bddc2 4840 machine_mode gr_mode = DImode;
826b47cc 4841 unsigned int gr_size;
c65ebc55
JW
4842
4843 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4844 then this goes in a GR reg left adjusted/little endian, right
4845 adjusted/big endian. */
4846 /* ??? Currently this is handled wrong, because 4-byte hunks are
4847 always right adjusted/little endian. */
4848 if (offset & 0x4)
4849 gr_mode = SImode;
4850 /* If we have an even 4 byte hunk because the aggregate is a
4851 multiple of 4 bytes in size, then this goes in a GR reg right
4852 adjusted/little endian. */
4853 else if (byte_size - offset == 4)
4854 gr_mode = SImode;
4855
4856 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4857 gen_rtx_REG (gr_mode, (basereg
4858 + int_regs)),
4859 GEN_INT (offset));
826b47cc
ZW
4860
4861 gr_size = GET_MODE_SIZE (gr_mode);
4862 offset += gr_size;
4863 if (gr_size == UNITS_PER_WORD
4864 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4865 int_regs++;
4866 else if (gr_size > UNITS_PER_WORD)
4867 int_regs += gr_size / UNITS_PER_WORD;
c65ebc55 4868 }
9dec91d4 4869 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4870 }
f2972bf8 4871
c65ebc55
JW
4872 /* Integral and aggregates go in general registers. If we have run out of
4873 FR registers, then FP values must also go in general registers. This can
4874 happen when we have a SFmode HFA. */
02befdf4
ZW
4875 else if (mode == TFmode || mode == TCmode
4876 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
4877 {
4878 int byte_size = ((mode == BLKmode)
4879 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4880 if (BYTES_BIG_ENDIAN
4881 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4882 && byte_size < UNITS_PER_WORD
4883 && byte_size > 0)
4884 {
4885 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4886 gen_rtx_REG (DImode,
4887 (basereg + cum->words
4888 + offset)),
4889 const0_rtx);
4890 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4891 }
4892 else
4893 return gen_rtx_REG (mode, basereg + cum->words + offset);
4894
4895 }
c65ebc55
JW
4896
4897 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4898 named, and in a GR register when unnamed. */
c65ebc55
JW
4899 else if (cum->prototype)
4900 {
f9c887ac 4901 if (named)
c65ebc55 4902 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
f9c887ac
ZW
4903 /* In big-endian mode, an anonymous SFmode value must be represented
4904 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4905 the value into the high half of the general register. */
4906 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4907 return gen_rtx_PARALLEL (mode,
4908 gen_rtvec (1,
4909 gen_rtx_EXPR_LIST (VOIDmode,
4910 gen_rtx_REG (DImode, basereg + cum->words + offset),
4911 const0_rtx)));
4912 else
4913 return gen_rtx_REG (mode, basereg + cum->words + offset);
c65ebc55
JW
4914 }
4915 /* If there is no prototype, then FP values go in both FR and GR
4916 registers. */
4917 else
4918 {
f9c887ac 4919 /* See comment above. */
ef4bddc2 4920 machine_mode inner_mode =
f9c887ac
ZW
4921 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4922
c65ebc55
JW
4923 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4924 gen_rtx_REG (mode, (FR_ARG_FIRST
4925 + cum->fp_regs)),
4926 const0_rtx);
4927 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
f9c887ac 4928 gen_rtx_REG (inner_mode,
c65ebc55
JW
4929 (basereg + cum->words
4930 + offset)),
4931 const0_rtx);
809d4ef1 4932
c65ebc55
JW
4933 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4934 }
4935}
4936
ffa88471
SE
4937/* Implement TARGET_FUNCION_ARG target hook. */
4938
4939static rtx
ef4bddc2 4940ia64_function_arg (cumulative_args_t cum, machine_mode mode,
ffa88471
SE
4941 const_tree type, bool named)
4942{
4943 return ia64_function_arg_1 (cum, mode, type, named, false);
4944}
4945
4946/* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4947
4948static rtx
d5cc9181 4949ia64_function_incoming_arg (cumulative_args_t cum,
ef4bddc2 4950 machine_mode mode,
ffa88471
SE
4951 const_tree type, bool named)
4952{
4953 return ia64_function_arg_1 (cum, mode, type, named, true);
4954}
4955
78a52f11 4956/* Return number of bytes, at the beginning of the argument, that must be
c65ebc55
JW
4957 put in registers. 0 is the argument is entirely in registers or entirely
4958 in memory. */
4959
78a52f11 4960static int
ef4bddc2 4961ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
78a52f11 4962 tree type, bool named ATTRIBUTE_UNUSED)
c65ebc55 4963{
d5cc9181
JR
4964 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4965
f57fc998
ZW
4966 int words = ia64_function_arg_words (type, mode);
4967 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4968
4969 /* If all argument slots are used, then it must go on the stack. */
4970 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4971 return 0;
4972
4973 /* It doesn't matter whether the argument goes in FR or GR regs. If
4974 it fits within the 8 argument slots, then it goes entirely in
4975 registers. If it extends past the last argument slot, then the rest
4976 goes on the stack. */
4977
4978 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4979 return 0;
4980
78a52f11 4981 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
c65ebc55
JW
4982}
4983
f2972bf8
DR
4984/* Return ivms_arg_type based on machine_mode. */
4985
4986static enum ivms_arg_type
ef4bddc2 4987ia64_arg_type (machine_mode mode)
f2972bf8
DR
4988{
4989 switch (mode)
4990 {
4e10a5a7 4991 case E_SFmode:
f2972bf8 4992 return FS;
4e10a5a7 4993 case E_DFmode:
f2972bf8
DR
4994 return FT;
4995 default:
4996 return I64;
4997 }
4998}
4999
c65ebc55
JW
5000/* Update CUM to point after this argument. This is patterned after
5001 ia64_function_arg. */
5002
ffa88471 5003static void
ef4bddc2 5004ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
ffa88471 5005 const_tree type, bool named)
c65ebc55 5006{
d5cc9181 5007 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
f57fc998
ZW
5008 int words = ia64_function_arg_words (type, mode);
5009 int offset = ia64_function_arg_offset (cum, type, words);
ef4bddc2 5010 machine_mode hfa_mode = VOIDmode;
c65ebc55
JW
5011
5012 /* If all arg slots are already full, then there is nothing to do. */
5013 if (cum->words >= MAX_ARGUMENT_SLOTS)
f2972bf8
DR
5014 {
5015 cum->words += words + offset;
5016 return;
5017 }
c65ebc55 5018
f2972bf8 5019 cum->atypes[cum->words] = ia64_arg_type (mode);
c65ebc55
JW
5020 cum->words += words + offset;
5021
472b8fdc
TG
5022 /* On OpenVMS argument is either in Rn or Fn. */
5023 if (TARGET_ABI_OPEN_VMS)
5024 {
5025 cum->int_regs = cum->words;
5026 cum->fp_regs = cum->words;
5027 return;
5028 }
5029
c65ebc55
JW
5030 /* Check for and handle homogeneous FP aggregates. */
5031 if (type)
5032 hfa_mode = hfa_element_mode (type, 0);
5033
5034 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
5035 and unprototyped hfas are passed specially. */
5036 if (hfa_mode != VOIDmode && (! cum->prototype || named))
5037 {
5038 int fp_regs = cum->fp_regs;
5039 /* This is the original value of cum->words + offset. */
5040 int int_regs = cum->words - words;
5041 int hfa_size = GET_MODE_SIZE (hfa_mode);
5042 int byte_size;
5043 int args_byte_size;
5044
5045 /* If prototyped, pass it in FR regs then GR regs.
5046 If not prototyped, pass it in both FR and GR regs.
5047
5048 If this is an SFmode aggregate, then it is possible to run out of
5049 FR regs while GR regs are still left. In that case, we pass the
5050 remaining part in the GR regs. */
5051
5052 /* Fill the FP regs. We do this always. We stop if we reach the end
5053 of the argument, the last FP register, or the last argument slot. */
5054
5055 byte_size = ((mode == BLKmode)
5056 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
5057 args_byte_size = int_regs * UNITS_PER_WORD;
5058 offset = 0;
5059 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
5060 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
5061 {
c65ebc55
JW
5062 offset += hfa_size;
5063 args_byte_size += hfa_size;
5064 fp_regs++;
5065 }
5066
5067 cum->fp_regs = fp_regs;
5068 }
5069
d13256a3
SE
5070 /* Integral and aggregates go in general registers. So do TFmode FP values.
5071 If we have run out of FR registers, then other FP values must also go in
5072 general registers. This can happen when we have a SFmode HFA. */
5073 else if (mode == TFmode || mode == TCmode
5074 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
648fe28b 5075 cum->int_regs = cum->words;
c65ebc55
JW
5076
5077 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 5078 named, and in a GR register when unnamed. */
c65ebc55
JW
5079 else if (cum->prototype)
5080 {
5081 if (! named)
648fe28b 5082 cum->int_regs = cum->words;
c65ebc55
JW
5083 else
5084 /* ??? Complex types should not reach here. */
5085 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5086 }
5087 /* If there is no prototype, then FP values go in both FR and GR
5088 registers. */
5089 else
9c808aad 5090 {
648fe28b
RH
5091 /* ??? Complex types should not reach here. */
5092 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5093 cum->int_regs = cum->words;
5094 }
c65ebc55 5095}
51dcde6f 5096
d13256a3 5097/* Arguments with alignment larger than 8 bytes start at the next even
93348822 5098 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
d13256a3
SE
5099 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5100
c2ed6cf8 5101static unsigned int
ef4bddc2 5102ia64_function_arg_boundary (machine_mode mode, const_tree type)
d13256a3 5103{
d13256a3
SE
5104 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5105 return PARM_BOUNDARY * 2;
5106
5107 if (type)
5108 {
5109 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5110 return PARM_BOUNDARY * 2;
5111 else
5112 return PARM_BOUNDARY;
5113 }
5114
5115 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5116 return PARM_BOUNDARY * 2;
5117 else
5118 return PARM_BOUNDARY;
5119}
5120
599aedd9
RH
5121/* True if it is OK to do sibling call optimization for the specified
5122 call expression EXP. DECL will be the called function, or NULL if
5123 this is an indirect call. */
5124static bool
9c808aad 5125ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
599aedd9 5126{
097f3d48
JW
5127 /* We can't perform a sibcall if the current function has the syscall_linkage
5128 attribute. */
5129 if (lookup_attribute ("syscall_linkage",
5130 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5131 return false;
5132
b23ba0b8 5133 /* We must always return with our current GP. This means we can
c208436c
SE
5134 only sibcall to functions defined in the current module unless
5135 TARGET_CONST_GP is set to true. */
5136 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
599aedd9 5137}
c65ebc55 5138\f
c65ebc55
JW
5139
5140/* Implement va_arg. */
5141
23a60a04 5142static tree
726a989a
RB
5143ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5144 gimple_seq *post_p)
cd3ce9b4 5145{
cd3ce9b4 5146 /* Variable sized types are passed by reference. */
08b0dc1b 5147 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
cd3ce9b4 5148 {
23a60a04
JM
5149 tree ptrtype = build_pointer_type (type);
5150 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
c2433d7d 5151 return build_va_arg_indirect_ref (addr);
cd3ce9b4
JM
5152 }
5153
5154 /* Aggregate arguments with alignment larger than 8 bytes start at
5155 the next even boundary. Integer and floating point arguments
5156 do so if they are larger than 8 bytes, whether or not they are
5157 also aligned larger than 8 bytes. */
5158 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5159 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5160 {
5d49b6a7 5161 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
47a25a46 5162 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5d49b6a7 5163 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
726a989a 5164 gimplify_assign (unshare_expr (valist), t, pre_p);
cd3ce9b4
JM
5165 }
5166
23a60a04 5167 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4 5168}
c65ebc55
JW
5169\f
5170/* Return 1 if function return value returned in memory. Return 0 if it is
5171 in a register. */
5172
351a758b 5173static bool
586de218 5174ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
c65ebc55 5175{
ef4bddc2
RS
5176 machine_mode mode;
5177 machine_mode hfa_mode;
487b97e0 5178 HOST_WIDE_INT byte_size;
c65ebc55
JW
5179
5180 mode = TYPE_MODE (valtype);
487b97e0
RH
5181 byte_size = GET_MODE_SIZE (mode);
5182 if (mode == BLKmode)
5183 {
5184 byte_size = int_size_in_bytes (valtype);
5185 if (byte_size < 0)
351a758b 5186 return true;
487b97e0 5187 }
c65ebc55
JW
5188
5189 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5190
5191 hfa_mode = hfa_element_mode (valtype, 0);
5192 if (hfa_mode != VOIDmode)
5193 {
5194 int hfa_size = GET_MODE_SIZE (hfa_mode);
5195
c65ebc55 5196 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
351a758b 5197 return true;
c65ebc55 5198 else
351a758b 5199 return false;
c65ebc55 5200 }
c65ebc55 5201 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
351a758b 5202 return true;
c65ebc55 5203 else
351a758b 5204 return false;
c65ebc55
JW
5205}
5206
5207/* Return rtx for register that holds the function return value. */
5208
ba90d838
AS
5209static rtx
5210ia64_function_value (const_tree valtype,
5211 const_tree fn_decl_or_type,
5212 bool outgoing ATTRIBUTE_UNUSED)
c65ebc55 5213{
ef4bddc2
RS
5214 machine_mode mode;
5215 machine_mode hfa_mode;
f2972bf8 5216 int unsignedp;
ba90d838 5217 const_tree func = fn_decl_or_type;
c65ebc55 5218
ba90d838
AS
5219 if (fn_decl_or_type
5220 && !DECL_P (fn_decl_or_type))
5221 func = NULL;
5222
c65ebc55
JW
5223 mode = TYPE_MODE (valtype);
5224 hfa_mode = hfa_element_mode (valtype, 0);
5225
5226 if (hfa_mode != VOIDmode)
5227 {
5228 rtx loc[8];
5229 int i;
5230 int hfa_size;
5231 int byte_size;
5232 int offset;
5233
5234 hfa_size = GET_MODE_SIZE (hfa_mode);
5235 byte_size = ((mode == BLKmode)
5236 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5237 offset = 0;
5238 for (i = 0; offset < byte_size; i++)
5239 {
5240 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5241 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5242 GEN_INT (offset));
c65ebc55
JW
5243 offset += hfa_size;
5244 }
9dec91d4 5245 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 5246 }
f57fc998 5247 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
c65ebc55
JW
5248 return gen_rtx_REG (mode, FR_ARG_FIRST);
5249 else
3870df96 5250 {
8c5cacfd
RH
5251 bool need_parallel = false;
5252
5253 /* In big-endian mode, we need to manage the layout of aggregates
5254 in the registers so that we get the bits properly aligned in
5255 the highpart of the registers. */
3870df96
SE
5256 if (BYTES_BIG_ENDIAN
5257 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
8c5cacfd
RH
5258 need_parallel = true;
5259
5260 /* Something like struct S { long double x; char a[0] } is not an
5261 HFA structure, and therefore doesn't go in fp registers. But
5262 the middle-end will give it XFmode anyway, and XFmode values
5263 don't normally fit in integer registers. So we need to smuggle
5264 the value inside a parallel. */
4de67c26 5265 else if (mode == XFmode || mode == XCmode || mode == RFmode)
8c5cacfd
RH
5266 need_parallel = true;
5267
5268 if (need_parallel)
3870df96
SE
5269 {
5270 rtx loc[8];
5271 int offset;
5272 int bytesize;
5273 int i;
5274
5275 offset = 0;
5276 bytesize = int_size_in_bytes (valtype);
543144ed
JM
5277 /* An empty PARALLEL is invalid here, but the return value
5278 doesn't matter for empty structs. */
5279 if (bytesize == 0)
5280 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96
SE
5281 for (i = 0; offset < bytesize; i++)
5282 {
5283 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5284 gen_rtx_REG (DImode,
5285 GR_RET_FIRST + i),
5286 GEN_INT (offset));
5287 offset += UNITS_PER_WORD;
5288 }
5289 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5290 }
8c5cacfd 5291
8ee95727
TG
5292 mode = promote_function_mode (valtype, mode, &unsignedp,
5293 func ? TREE_TYPE (func) : NULL_TREE,
5294 true);
f2972bf8 5295
8c5cacfd 5296 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96 5297 }
c65ebc55
JW
5298}
5299
ba90d838
AS
5300/* Worker function for TARGET_LIBCALL_VALUE. */
5301
5302static rtx
ef4bddc2 5303ia64_libcall_value (machine_mode mode,
ba90d838
AS
5304 const_rtx fun ATTRIBUTE_UNUSED)
5305{
5306 return gen_rtx_REG (mode,
5307 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5308 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5309 && (mode) != TFmode)
5310 ? FR_RET_FIRST : GR_RET_FIRST));
5311}
5312
5313/* Worker function for FUNCTION_VALUE_REGNO_P. */
5314
5315static bool
5316ia64_function_value_regno_p (const unsigned int regno)
5317{
5318 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5319 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5320}
5321
fdbe66f2 5322/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6b2300b3
JJ
5323 We need to emit DTP-relative relocations. */
5324
fdbe66f2 5325static void
9c808aad 5326ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
6b2300b3 5327{
6f3113ed
SE
5328 gcc_assert (size == 4 || size == 8);
5329 if (size == 4)
5330 fputs ("\tdata4.ua\t@dtprel(", file);
5331 else
5332 fputs ("\tdata8.ua\t@dtprel(", file);
6b2300b3
JJ
5333 output_addr_const (file, x);
5334 fputs (")", file);
5335}
5336
c65ebc55
JW
5337/* Print a memory address as an operand to reference that memory location. */
5338
5339/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5340 also call this from ia64_print_operand for memory addresses. */
5341
5e50b799 5342static void
9c808aad 5343ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
cc8ca59e 5344 machine_mode /*mode*/,
9c808aad 5345 rtx address ATTRIBUTE_UNUSED)
c65ebc55
JW
5346{
5347}
5348
3569057d 5349/* Print an operand to an assembler instruction.
c65ebc55
JW
5350 C Swap and print a comparison operator.
5351 D Print an FP comparison operator.
5352 E Print 32 - constant, for SImode shifts as extract.
66db6b45 5353 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
5354 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5355 a floating point register emitted normally.
735b94a7 5356 G A floating point constant.
c65ebc55 5357 I Invert a predicate register by adding 1.
e5bde68a 5358 J Select the proper predicate register for a condition.
6b6c1201 5359 j Select the inverse predicate register for a condition.
c65ebc55
JW
5360 O Append .acq for volatile load.
5361 P Postincrement of a MEM.
5362 Q Append .rel for volatile store.
4883241c 5363 R Print .s .d or nothing for a single, double or no truncation.
c65ebc55
JW
5364 S Shift amount for shladd instruction.
5365 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5366 for Intel assembler.
5367 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5368 for Intel assembler.
a71aef0b 5369 X A pair of floating point registers.
c65ebc55 5370 r Print register name, or constant 0 as r0. HP compatibility for
f61134e8
RH
5371 Linux kernel.
5372 v Print vector constant value as an 8-byte integer value. */
5373
5e50b799 5374static void
9c808aad 5375ia64_print_operand (FILE * file, rtx x, int code)
c65ebc55 5376{
e57b9d65
RH
5377 const char *str;
5378
c65ebc55
JW
5379 switch (code)
5380 {
c65ebc55
JW
5381 case 0:
5382 /* Handled below. */
5383 break;
809d4ef1 5384
c65ebc55
JW
5385 case 'C':
5386 {
5387 enum rtx_code c = swap_condition (GET_CODE (x));
5388 fputs (GET_RTX_NAME (c), file);
5389 return;
5390 }
5391
5392 case 'D':
e57b9d65
RH
5393 switch (GET_CODE (x))
5394 {
5395 case NE:
5396 str = "neq";
5397 break;
5398 case UNORDERED:
5399 str = "unord";
5400 break;
5401 case ORDERED:
5402 str = "ord";
5403 break;
86ad1da0
SE
5404 case UNLT:
5405 str = "nge";
5406 break;
5407 case UNLE:
5408 str = "ngt";
5409 break;
5410 case UNGT:
5411 str = "nle";
5412 break;
5413 case UNGE:
5414 str = "nlt";
5415 break;
8fc53a5f
EB
5416 case UNEQ:
5417 case LTGT:
5418 gcc_unreachable ();
e57b9d65
RH
5419 default:
5420 str = GET_RTX_NAME (GET_CODE (x));
5421 break;
5422 }
5423 fputs (str, file);
c65ebc55
JW
5424 return;
5425
5426 case 'E':
5427 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5428 return;
5429
66db6b45
RH
5430 case 'e':
5431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5432 return;
5433
c65ebc55
JW
5434 case 'F':
5435 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 5436 str = reg_names [FR_REG (0)];
c65ebc55 5437 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 5438 str = reg_names [FR_REG (1)];
c65ebc55 5439 else
e820471b
NS
5440 {
5441 gcc_assert (GET_CODE (x) == REG);
5442 str = reg_names [REGNO (x)];
5443 }
e57b9d65 5444 fputs (str, file);
c65ebc55
JW
5445 return;
5446
735b94a7
SE
5447 case 'G':
5448 {
5449 long val[4];
34a72c33 5450 real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
735b94a7
SE
5451 if (GET_MODE (x) == SFmode)
5452 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5453 else if (GET_MODE (x) == DFmode)
5454 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5455 & 0xffffffff,
5456 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5457 & 0xffffffff);
5458 else
5459 output_operand_lossage ("invalid %%G mode");
5460 }
5461 return;
5462
c65ebc55
JW
5463 case 'I':
5464 fputs (reg_names [REGNO (x) + 1], file);
5465 return;
5466
e5bde68a 5467 case 'J':
6b6c1201
RH
5468 case 'j':
5469 {
5470 unsigned int regno = REGNO (XEXP (x, 0));
5471 if (GET_CODE (x) == EQ)
5472 regno += 1;
5473 if (code == 'j')
5474 regno ^= 1;
5475 fputs (reg_names [regno], file);
5476 }
e5bde68a
RH
5477 return;
5478
c65ebc55
JW
5479 case 'O':
5480 if (MEM_VOLATILE_P (x))
5481 fputs(".acq", file);
5482 return;
5483
5484 case 'P':
5485 {
4b983fdc 5486 HOST_WIDE_INT value;
c65ebc55 5487
4b983fdc
RH
5488 switch (GET_CODE (XEXP (x, 0)))
5489 {
5490 default:
5491 return;
5492
5493 case POST_MODIFY:
5494 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5495 if (GET_CODE (x) == CONST_INT)
08012cda 5496 value = INTVAL (x);
e820471b 5497 else
4b983fdc 5498 {
e820471b 5499 gcc_assert (GET_CODE (x) == REG);
08012cda 5500 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
5501 return;
5502 }
4b983fdc 5503 break;
c65ebc55 5504
4b983fdc
RH
5505 case POST_INC:
5506 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 5507 break;
c65ebc55 5508
4b983fdc 5509 case POST_DEC:
08012cda 5510 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
5511 break;
5512 }
809d4ef1 5513
4a0a75dd 5514 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
5515 return;
5516 }
5517
5518 case 'Q':
5519 if (MEM_VOLATILE_P (x))
5520 fputs(".rel", file);
5521 return;
5522
4883241c
SE
5523 case 'R':
5524 if (x == CONST0_RTX (GET_MODE (x)))
5525 fputs(".s", file);
5526 else if (x == CONST1_RTX (GET_MODE (x)))
5527 fputs(".d", file);
5528 else if (x == CONST2_RTX (GET_MODE (x)))
5529 ;
5530 else
5531 output_operand_lossage ("invalid %%R value");
5532 return;
5533
c65ebc55 5534 case 'S':
809d4ef1 5535 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
5536 return;
5537
5538 case 'T':
5539 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5540 {
809d4ef1 5541 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
5542 return;
5543 }
5544 break;
5545
5546 case 'U':
5547 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5548 {
3b572406 5549 const char *prefix = "0x";
c65ebc55
JW
5550 if (INTVAL (x) & 0x80000000)
5551 {
5552 fprintf (file, "0xffffffff");
5553 prefix = "";
5554 }
809d4ef1 5555 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
5556 return;
5557 }
5558 break;
809d4ef1 5559
a71aef0b
JB
5560 case 'X':
5561 {
5562 unsigned int regno = REGNO (x);
5563 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5564 }
5565 return;
5566
c65ebc55 5567 case 'r':
18a3c539
JW
5568 /* If this operand is the constant zero, write it as register zero.
5569 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
5570 if (GET_CODE (x) == REG)
5571 fputs (reg_names[REGNO (x)], file);
5572 else if (x == CONST0_RTX (GET_MODE (x)))
5573 fputs ("r0", file);
18a3c539
JW
5574 else if (GET_CODE (x) == CONST_INT)
5575 output_addr_const (file, x);
c65ebc55
JW
5576 else
5577 output_operand_lossage ("invalid %%r value");
5578 return;
5579
f61134e8
RH
5580 case 'v':
5581 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5582 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5583 break;
5584
85548039
RH
5585 case '+':
5586 {
5587 const char *which;
9c808aad 5588
85548039
RH
5589 /* For conditional branches, returns or calls, substitute
5590 sptk, dptk, dpnt, or spnt for %s. */
5591 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5592 if (x)
5593 {
5fa396ad
JH
5594 int pred_val = profile_probability::from_reg_br_prob_note
5595 (XINT (x, 0)).to_reg_br_prob_base ();
85548039
RH
5596
5597 /* Guess top and bottom 10% statically predicted. */
2c9e13f3
JH
5598 if (pred_val < REG_BR_PROB_BASE / 50
5599 && br_prob_note_reliable_p (x))
85548039
RH
5600 which = ".spnt";
5601 else if (pred_val < REG_BR_PROB_BASE / 2)
5602 which = ".dpnt";
2c9e13f3
JH
5603 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5604 || !br_prob_note_reliable_p (x))
85548039
RH
5605 which = ".dptk";
5606 else
5607 which = ".sptk";
5608 }
b64925dc 5609 else if (CALL_P (current_output_insn))
85548039
RH
5610 which = ".sptk";
5611 else
5612 which = ".dptk";
5613
5614 fputs (which, file);
5615 return;
5616 }
5617
6f8aa100
RH
5618 case ',':
5619 x = current_insn_predicate;
5620 if (x)
5621 {
5622 unsigned int regno = REGNO (XEXP (x, 0));
5623 if (GET_CODE (x) == EQ)
5624 regno += 1;
6f8aa100
RH
5625 fprintf (file, "(%s) ", reg_names [regno]);
5626 }
5627 return;
5628
c65ebc55
JW
5629 default:
5630 output_operand_lossage ("ia64_print_operand: unknown code");
5631 return;
5632 }
5633
5634 switch (GET_CODE (x))
5635 {
5636 /* This happens for the spill/restore instructions. */
5637 case POST_INC:
4b983fdc
RH
5638 case POST_DEC:
5639 case POST_MODIFY:
c65ebc55 5640 x = XEXP (x, 0);
4c74215c 5641 /* fall through */
c65ebc55
JW
5642
5643 case REG:
5644 fputs (reg_names [REGNO (x)], file);
5645 break;
5646
5647 case MEM:
5648 {
5649 rtx addr = XEXP (x, 0);
ec8e098d 5650 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
c65ebc55
JW
5651 addr = XEXP (addr, 0);
5652 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5653 break;
5654 }
809d4ef1 5655
c65ebc55
JW
5656 default:
5657 output_addr_const (file, x);
5658 break;
5659 }
5660
5661 return;
5662}
5e50b799
AS
5663
5664/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5665
5666static bool
5667ia64_print_operand_punct_valid_p (unsigned char code)
5668{
5669 return (code == '+' || code == ',');
5670}
c65ebc55 5671\f
3c50106f
RH
5672/* Compute a (partial) cost for rtx X. Return true if the complete
5673 cost has been computed, and false if subexpressions should be
5674 scanned. In either case, *TOTAL contains the cost result. */
5675/* ??? This is incomplete. */
5676
5677static bool
e548c9df
AM
5678ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5679 int opno ATTRIBUTE_UNUSED,
68f932c4 5680 int *total, bool speed ATTRIBUTE_UNUSED)
3c50106f 5681{
e548c9df
AM
5682 int code = GET_CODE (x);
5683
3c50106f
RH
5684 switch (code)
5685 {
5686 case CONST_INT:
5687 switch (outer_code)
5688 {
5689 case SET:
13f70342 5690 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
3c50106f
RH
5691 return true;
5692 case PLUS:
13f70342 5693 if (satisfies_constraint_I (x))
3c50106f 5694 *total = 0;
13f70342 5695 else if (satisfies_constraint_J (x))
3c50106f
RH
5696 *total = 1;
5697 else
5698 *total = COSTS_N_INSNS (1);
5699 return true;
5700 default:
13f70342 5701 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
3c50106f
RH
5702 *total = 0;
5703 else
5704 *total = COSTS_N_INSNS (1);
5705 return true;
5706 }
5707
5708 case CONST_DOUBLE:
5709 *total = COSTS_N_INSNS (1);
5710 return true;
5711
5712 case CONST:
5713 case SYMBOL_REF:
5714 case LABEL_REF:
5715 *total = COSTS_N_INSNS (3);
5716 return true;
5717
f19f1e5e
RH
5718 case FMA:
5719 *total = COSTS_N_INSNS (4);
5720 return true;
5721
3c50106f
RH
5722 case MULT:
5723 /* For multiplies wider than HImode, we have to go to the FPU,
5724 which normally involves copies. Plus there's the latency
5725 of the multiply itself, and the latency of the instructions to
5726 transfer integer regs to FP regs. */
e548c9df 5727 if (FLOAT_MODE_P (mode))
f19f1e5e 5728 *total = COSTS_N_INSNS (4);
e548c9df 5729 else if (GET_MODE_SIZE (mode) > 2)
3c50106f
RH
5730 *total = COSTS_N_INSNS (10);
5731 else
5732 *total = COSTS_N_INSNS (2);
5733 return true;
5734
5735 case PLUS:
5736 case MINUS:
e548c9df 5737 if (FLOAT_MODE_P (mode))
f19f1e5e
RH
5738 {
5739 *total = COSTS_N_INSNS (4);
5740 return true;
5741 }
5742 /* FALLTHRU */
5743
3c50106f
RH
5744 case ASHIFT:
5745 case ASHIFTRT:
5746 case LSHIFTRT:
5747 *total = COSTS_N_INSNS (1);
5748 return true;
5749
5750 case DIV:
5751 case UDIV:
5752 case MOD:
5753 case UMOD:
5754 /* We make divide expensive, so that divide-by-constant will be
5755 optimized to a multiply. */
5756 *total = COSTS_N_INSNS (60);
5757 return true;
5758
5759 default:
5760 return false;
5761 }
5762}
5763
9e4f94de 5764/* Calculate the cost of moving data from a register in class FROM to
7109d286 5765 one in class TO, using MODE. */
5527bf14 5766
de8f4b07 5767static int
ef4bddc2 5768ia64_register_move_cost (machine_mode mode, reg_class_t from,
6f76a878 5769 reg_class_t to)
a87cf97e 5770{
7109d286
RH
5771 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5772 if (to == ADDL_REGS)
5773 to = GR_REGS;
5774 if (from == ADDL_REGS)
5775 from = GR_REGS;
5776
5777 /* All costs are symmetric, so reduce cases by putting the
5778 lower number class as the destination. */
5779 if (from < to)
5780 {
6f76a878 5781 reg_class_t tmp = to;
7109d286
RH
5782 to = from, from = tmp;
5783 }
5784
02befdf4 5785 /* Moving from FR<->GR in XFmode must be more expensive than 2,
7109d286 5786 so that we get secondary memory reloads. Between FR_REGS,
69e18c09 5787 we have to make this at least as expensive as memory_move_cost
7109d286 5788 to avoid spectacularly poor register class preferencing. */
4de67c26 5789 if (mode == XFmode || mode == RFmode)
7109d286
RH
5790 {
5791 if (to != GR_REGS || from != GR_REGS)
69e18c09 5792 return memory_move_cost (mode, to, false);
7109d286
RH
5793 else
5794 return 3;
5795 }
5796
5797 switch (to)
5798 {
5799 case PR_REGS:
5800 /* Moving between PR registers takes two insns. */
5801 if (from == PR_REGS)
5802 return 3;
5803 /* Moving between PR and anything but GR is impossible. */
5804 if (from != GR_REGS)
69e18c09 5805 return memory_move_cost (mode, to, false);
7109d286
RH
5806 break;
5807
5808 case BR_REGS:
5809 /* Moving between BR and anything but GR is impossible. */
5810 if (from != GR_REGS && from != GR_AND_BR_REGS)
69e18c09 5811 return memory_move_cost (mode, to, false);
7109d286
RH
5812 break;
5813
5814 case AR_I_REGS:
5815 case AR_M_REGS:
5816 /* Moving between AR and anything but GR is impossible. */
5817 if (from != GR_REGS)
69e18c09 5818 return memory_move_cost (mode, to, false);
7109d286
RH
5819 break;
5820
5821 case GR_REGS:
5822 case FR_REGS:
a71aef0b 5823 case FP_REGS:
7109d286
RH
5824 case GR_AND_FR_REGS:
5825 case GR_AND_BR_REGS:
5826 case ALL_REGS:
5827 break;
5828
5829 default:
e820471b 5830 gcc_unreachable ();
7109d286 5831 }
3f622353 5832
5527bf14
RH
5833 return 2;
5834}
c65ebc55 5835
69e18c09
AS
5836/* Calculate the cost of moving data of MODE from a register to or from
5837 memory. */
5838
5839static int
ef4bddc2 5840ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
69e18c09
AS
5841 reg_class_t rclass,
5842 bool in ATTRIBUTE_UNUSED)
5843{
5844 if (rclass == GENERAL_REGS
5845 || rclass == FR_REGS
5846 || rclass == FP_REGS
5847 || rclass == GR_AND_FR_REGS)
5848 return 4;
5849 else
5850 return 10;
5851}
5852
ab177ad5
AS
5853/* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5854 on RCLASS to use when copying X into that class. */
f61134e8 5855
ab177ad5
AS
5856static reg_class_t
5857ia64_preferred_reload_class (rtx x, reg_class_t rclass)
f61134e8 5858{
0a2aaacc 5859 switch (rclass)
f61134e8
RH
5860 {
5861 case FR_REGS:
a71aef0b 5862 case FP_REGS:
f61134e8
RH
5863 /* Don't allow volatile mem reloads into floating point registers.
5864 This is defined to force reload to choose the r/m case instead
5865 of the f/f case when reloading (set (reg fX) (mem/v)). */
5866 if (MEM_P (x) && MEM_VOLATILE_P (x))
5867 return NO_REGS;
5868
5869 /* Force all unrecognized constants into the constant pool. */
5870 if (CONSTANT_P (x))
5871 return NO_REGS;
5872 break;
5873
5874 case AR_M_REGS:
5875 case AR_I_REGS:
5876 if (!OBJECT_P (x))
5877 return NO_REGS;
5878 break;
5879
5880 default:
5881 break;
5882 }
5883
0a2aaacc 5884 return rclass;
f61134e8
RH
5885}
5886
c65ebc55 5887/* This function returns the register class required for a secondary
0a2aaacc 5888 register when copying between one of the registers in RCLASS, and X,
c65ebc55
JW
5889 using MODE. A return value of NO_REGS means that no secondary register
5890 is required. */
5891
5892enum reg_class
0a2aaacc 5893ia64_secondary_reload_class (enum reg_class rclass,
ef4bddc2 5894 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
c65ebc55
JW
5895{
5896 int regno = -1;
5897
5898 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5899 regno = true_regnum (x);
5900
0a2aaacc 5901 switch (rclass)
97e242b0
RH
5902 {
5903 case BR_REGS:
7109d286
RH
5904 case AR_M_REGS:
5905 case AR_I_REGS:
5906 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5907 interaction. We end up with two pseudos with overlapping lifetimes
5908 both of which are equiv to the same constant, and both which need
5909 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5910 changes depending on the path length, which means the qty_first_reg
5911 check in make_regs_eqv can give different answers at different times.
5912 At some point I'll probably need a reload_indi pattern to handle
5913 this.
5914
5915 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5916 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5917 non-general registers for good measure. */
5918 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
5919 return GR_REGS;
5920
5921 /* This is needed if a pseudo used as a call_operand gets spilled to a
5922 stack slot. */
5923 if (GET_CODE (x) == MEM)
5924 return GR_REGS;
5925 break;
5926
5927 case FR_REGS:
a71aef0b 5928 case FP_REGS:
c51e6d85 5929 /* Need to go through general registers to get to other class regs. */
7109d286
RH
5930 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5931 return GR_REGS;
9c808aad 5932
97e242b0
RH
5933 /* This can happen when a paradoxical subreg is an operand to the
5934 muldi3 pattern. */
5935 /* ??? This shouldn't be necessary after instruction scheduling is
5936 enabled, because paradoxical subregs are not accepted by
5937 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5938 stop the paradoxical subreg stupidity in the *_operand functions
5939 in recog.c. */
5940 if (GET_CODE (x) == MEM
5941 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5942 || GET_MODE (x) == QImode))
5943 return GR_REGS;
5944
5945 /* This can happen because of the ior/and/etc patterns that accept FP
5946 registers as operands. If the third operand is a constant, then it
5947 needs to be reloaded into a FP register. */
5948 if (GET_CODE (x) == CONST_INT)
5949 return GR_REGS;
5950
5951 /* This can happen because of register elimination in a muldi3 insn.
5952 E.g. `26107 * (unsigned long)&u'. */
5953 if (GET_CODE (x) == PLUS)
5954 return GR_REGS;
5955 break;
5956
5957 case PR_REGS:
f2f90c63 5958 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
5959 and the function has a nonlocal goto. This is because global
5960 does not allocate call crossing pseudos to hard registers when
e3b5732b 5961 crtl->has_nonlocal_goto is true. This is relatively
97e242b0
RH
5962 common for C++ programs that use exceptions. To reproduce,
5963 return NO_REGS and compile libstdc++. */
5964 if (GET_CODE (x) == MEM)
5965 return GR_REGS;
f2f90c63
RH
5966
5967 /* This can happen when we take a BImode subreg of a DImode value,
5968 and that DImode value winds up in some non-GR register. */
5969 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5970 return GR_REGS;
97e242b0
RH
5971 break;
5972
5973 default:
5974 break;
5975 }
c65ebc55
JW
5976
5977 return NO_REGS;
5978}
5979
215b063c
PB
5980\f
5981/* Implement targetm.unspec_may_trap_p hook. */
5982static int
5983ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5984{
c84a808e
EB
5985 switch (XINT (x, 1))
5986 {
5987 case UNSPEC_LDA:
5988 case UNSPEC_LDS:
5989 case UNSPEC_LDSA:
5990 case UNSPEC_LDCCLR:
5991 case UNSPEC_CHKACLR:
5992 case UNSPEC_CHKS:
5993 /* These unspecs are just wrappers. */
5994 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
215b063c
PB
5995 }
5996
5997 return default_unspec_may_trap_p (x, flags);
5998}
5999
c65ebc55
JW
6000\f
6001/* Parse the -mfixed-range= option string. */
6002
6003static void
9c808aad 6004fix_range (const char *const_str)
c65ebc55
JW
6005{
6006 int i, first, last;
3b572406 6007 char *str, *dash, *comma;
c65ebc55
JW
6008
6009 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6010 REG2 are either register names or register numbers. The effect
6011 of this option is to mark the registers in the range from REG1 to
6012 REG2 as ``fixed'' so they won't be used by the compiler. This is
6013 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
6014
3b572406
RH
6015 i = strlen (const_str);
6016 str = (char *) alloca (i + 1);
6017 memcpy (str, const_str, i + 1);
6018
c65ebc55
JW
6019 while (1)
6020 {
6021 dash = strchr (str, '-');
6022 if (!dash)
6023 {
d4ee4d25 6024 warning (0, "value of -mfixed-range must have form REG1-REG2");
c65ebc55
JW
6025 return;
6026 }
6027 *dash = '\0';
6028
6029 comma = strchr (dash + 1, ',');
6030 if (comma)
6031 *comma = '\0';
6032
6033 first = decode_reg_name (str);
6034 if (first < 0)
6035 {
d4ee4d25 6036 warning (0, "unknown register name: %s", str);
c65ebc55
JW
6037 return;
6038 }
6039
6040 last = decode_reg_name (dash + 1);
6041 if (last < 0)
6042 {
d4ee4d25 6043 warning (0, "unknown register name: %s", dash + 1);
c65ebc55
JW
6044 return;
6045 }
6046
6047 *dash = '-';
6048
6049 if (first > last)
6050 {
d4ee4d25 6051 warning (0, "%s-%s is an empty range", str, dash + 1);
c65ebc55
JW
6052 return;
6053 }
6054
6055 for (i = first; i <= last; ++i)
6056 fixed_regs[i] = call_used_regs[i] = 1;
6057
6058 if (!comma)
6059 break;
6060
6061 *comma = ',';
6062 str = comma + 1;
6063 }
6064}
6065
930572b9 6066/* Implement TARGET_OPTION_OVERRIDE. */
c65ebc55 6067
930572b9
AS
6068static void
6069ia64_option_override (void)
c65ebc55 6070{
e6cc0c98
JM
6071 unsigned int i;
6072 cl_deferred_option *opt;
9771b263
DN
6073 vec<cl_deferred_option> *v
6074 = (vec<cl_deferred_option> *) ia64_deferred_options;
e6cc0c98 6075
9771b263
DN
6076 if (v)
6077 FOR_EACH_VEC_ELT (*v, i, opt)
6078 {
6079 switch (opt->opt_index)
6080 {
6081 case OPT_mfixed_range_:
6082 fix_range (opt->arg);
6083 break;
e6cc0c98 6084
9771b263
DN
6085 default:
6086 gcc_unreachable ();
6087 }
6088 }
e6cc0c98 6089
59da9a7d
JW
6090 if (TARGET_AUTO_PIC)
6091 target_flags |= MASK_CONST_GP;
6092
7e1e7d4c
VM
6093 /* Numerous experiment shows that IRA based loop pressure
6094 calculation works better for RTL loop invariant motion on targets
6095 with enough (>= 32) registers. It is an expensive optimization.
6096 So it is on only for peak performance. */
6097 if (optimize >= 3)
6098 flag_ira_loop_pressure = 1;
6099
6100
fa37ed29
JM
6101 ia64_section_threshold = (global_options_set.x_g_switch_value
6102 ? g_switch_value
6103 : IA64_DEFAULT_GVALUE);
2b7e2984
SE
6104
6105 init_machine_status = ia64_init_machine_status;
6106
6107 if (align_functions <= 0)
6108 align_functions = 64;
6109 if (align_loops <= 0)
6110 align_loops = 32;
6111 if (TARGET_ABI_OPEN_VMS)
6112 flag_no_common = 1;
6113
6114 ia64_override_options_after_change();
6115}
6116
6117/* Implement targetm.override_options_after_change. */
6118
6119static void
6120ia64_override_options_after_change (void)
6121{
388092d5 6122 if (optimize >= 3
d4d24ba4
JM
6123 && !global_options_set.x_flag_selective_scheduling
6124 && !global_options_set.x_flag_selective_scheduling2)
388092d5
AB
6125 {
6126 flag_selective_scheduling2 = 1;
6127 flag_sel_sched_pipelining = 1;
6128 }
6129 if (mflag_sched_control_spec == 2)
6130 {
6131 /* Control speculation is on by default for the selective scheduler,
6132 but not for the Haifa scheduler. */
6133 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6134 }
6135 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6136 {
6137 /* FIXME: remove this when we'd implement breaking autoinsns as
6138 a transformation. */
6139 flag_auto_inc_dec = 0;
6140 }
c65ebc55 6141}
dbdd120f 6142
6fb5fa3c
DB
6143/* Initialize the record of emitted frame related registers. */
6144
6145void ia64_init_expanders (void)
6146{
6147 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6148}
6149
dbdd120f
RH
6150static struct machine_function *
6151ia64_init_machine_status (void)
6152{
766090c2 6153 return ggc_cleared_alloc<machine_function> ();
dbdd120f 6154}
c65ebc55 6155\f
647d790d
DM
6156static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6157static enum attr_type ia64_safe_type (rtx_insn *);
2130b7fb 6158
2130b7fb 6159static enum attr_itanium_class
647d790d 6160ia64_safe_itanium_class (rtx_insn *insn)
2130b7fb
BS
6161{
6162 if (recog_memoized (insn) >= 0)
6163 return get_attr_itanium_class (insn);
b5b8b0ac
AO
6164 else if (DEBUG_INSN_P (insn))
6165 return ITANIUM_CLASS_IGNORE;
2130b7fb
BS
6166 else
6167 return ITANIUM_CLASS_UNKNOWN;
6168}
6169
6170static enum attr_type
647d790d 6171ia64_safe_type (rtx_insn *insn)
2130b7fb
BS
6172{
6173 if (recog_memoized (insn) >= 0)
6174 return get_attr_type (insn);
6175 else
6176 return TYPE_UNKNOWN;
6177}
6178\f
c65ebc55
JW
6179/* The following collection of routines emit instruction group stop bits as
6180 necessary to avoid dependencies. */
6181
6182/* Need to track some additional registers as far as serialization is
6183 concerned so we can properly handle br.call and br.ret. We could
6184 make these registers visible to gcc, but since these registers are
6185 never explicitly used in gcc generated code, it seems wasteful to
6186 do so (plus it would make the call and return patterns needlessly
6187 complex). */
c65ebc55 6188#define REG_RP (BR_REG (0))
c65ebc55 6189#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
6190/* This is used for volatile asms which may require a stop bit immediately
6191 before and after them. */
5527bf14 6192#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
6193#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6194#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 6195
f2f90c63
RH
6196/* For each register, we keep track of how it has been written in the
6197 current instruction group.
6198
6199 If a register is written unconditionally (no qualifying predicate),
6200 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6201
6202 If a register is written if its qualifying predicate P is true, we
6203 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6204 may be written again by the complement of P (P^1) and when this happens,
6205 WRITE_COUNT gets set to 2.
6206
6207 The result of this is that whenever an insn attempts to write a register
e03f5d43 6208 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
6209
6210 If a predicate register is written by a floating-point insn, we set
6211 WRITTEN_BY_FP to true.
6212
6213 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6214 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6215
444a356a
JJ
6216#if GCC_VERSION >= 4000
6217#define RWS_FIELD_TYPE __extension__ unsigned short
6218#else
6219#define RWS_FIELD_TYPE unsigned int
6220#endif
c65ebc55
JW
6221struct reg_write_state
6222{
444a356a
JJ
6223 RWS_FIELD_TYPE write_count : 2;
6224 RWS_FIELD_TYPE first_pred : 10;
6225 RWS_FIELD_TYPE written_by_fp : 1;
6226 RWS_FIELD_TYPE written_by_and : 1;
6227 RWS_FIELD_TYPE written_by_or : 1;
c65ebc55
JW
6228};
6229
6230/* Cumulative info for the current instruction group. */
6231struct reg_write_state rws_sum[NUM_REGS];
e28c2052 6232#if CHECKING_P
444a356a
JJ
6233/* Bitmap whether a register has been written in the current insn. */
6234HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6235 / HOST_BITS_PER_WIDEST_FAST_INT];
6236
6237static inline void
6238rws_insn_set (int regno)
6239{
6240 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6241 SET_HARD_REG_BIT (rws_insn, regno);
6242}
6243
6244static inline int
6245rws_insn_test (int regno)
6246{
6247 return TEST_HARD_REG_BIT (rws_insn, regno);
6248}
6249#else
6250/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6251unsigned char rws_insn[2];
6252
6253static inline void
6254rws_insn_set (int regno)
6255{
6256 if (regno == REG_AR_CFM)
6257 rws_insn[0] = 1;
6258 else if (regno == REG_VOLATILE)
6259 rws_insn[1] = 1;
6260}
6261
6262static inline int
6263rws_insn_test (int regno)
6264{
6265 if (regno == REG_AR_CFM)
6266 return rws_insn[0];
6267 if (regno == REG_VOLATILE)
6268 return rws_insn[1];
6269 return 0;
6270}
6271#endif
c65ebc55 6272
25250265 6273/* Indicates whether this is the first instruction after a stop bit,
e820471b
NS
6274 in which case we don't need another stop bit. Without this,
6275 ia64_variable_issue will die when scheduling an alloc. */
25250265
JW
6276static int first_instruction;
6277
c65ebc55
JW
6278/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6279 RTL for one instruction. */
6280struct reg_flags
6281{
6282 unsigned int is_write : 1; /* Is register being written? */
6283 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6284 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
6285 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6286 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 6287 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
6288};
6289
444a356a 6290static void rws_update (int, struct reg_flags, int);
9c808aad
AJ
6291static int rws_access_regno (int, struct reg_flags, int);
6292static int rws_access_reg (rtx, struct reg_flags, int);
c1bc6ca8
JW
6293static void update_set_flags (rtx, struct reg_flags *);
6294static int set_src_needs_barrier (rtx, struct reg_flags, int);
9c808aad
AJ
6295static int rtx_needs_barrier (rtx, struct reg_flags, int);
6296static void init_insn_group_barriers (void);
647d790d
DM
6297static int group_barrier_needed (rtx_insn *);
6298static int safe_group_barrier_needed (rtx_insn *);
444a356a 6299static int in_safe_group_barrier;
3b572406 6300
c65ebc55
JW
6301/* Update *RWS for REGNO, which is being written by the current instruction,
6302 with predicate PRED, and associated register flags in FLAGS. */
6303
6304static void
444a356a 6305rws_update (int regno, struct reg_flags flags, int pred)
c65ebc55 6306{
3e7c7805 6307 if (pred)
444a356a 6308 rws_sum[regno].write_count++;
3e7c7805 6309 else
444a356a
JJ
6310 rws_sum[regno].write_count = 2;
6311 rws_sum[regno].written_by_fp |= flags.is_fp;
f2f90c63 6312 /* ??? Not tracking and/or across differing predicates. */
444a356a
JJ
6313 rws_sum[regno].written_by_and = flags.is_and;
6314 rws_sum[regno].written_by_or = flags.is_or;
6315 rws_sum[regno].first_pred = pred;
c65ebc55
JW
6316}
6317
6318/* Handle an access to register REGNO of type FLAGS using predicate register
444a356a 6319 PRED. Update rws_sum array. Return 1 if this access creates
c65ebc55
JW
6320 a dependency with an earlier instruction in the same group. */
6321
6322static int
9c808aad 6323rws_access_regno (int regno, struct reg_flags flags, int pred)
c65ebc55
JW
6324{
6325 int need_barrier = 0;
c65ebc55 6326
e820471b 6327 gcc_assert (regno < NUM_REGS);
c65ebc55 6328
f2f90c63
RH
6329 if (! PR_REGNO_P (regno))
6330 flags.is_and = flags.is_or = 0;
6331
c65ebc55
JW
6332 if (flags.is_write)
6333 {
12c2c7aa
JW
6334 int write_count;
6335
444a356a 6336 rws_insn_set (regno);
12c2c7aa 6337 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
6338
6339 switch (write_count)
c65ebc55
JW
6340 {
6341 case 0:
6342 /* The register has not been written yet. */
444a356a
JJ
6343 if (!in_safe_group_barrier)
6344 rws_update (regno, flags, pred);
c65ebc55
JW
6345 break;
6346
6347 case 1:
89774469
SE
6348 /* The register has been written via a predicate. Treat
6349 it like a unconditional write and do not try to check
6350 for complementary pred reg in earlier write. */
f2f90c63 6351 if (flags.is_and && rws_sum[regno].written_by_and)
9c808aad 6352 ;
f2f90c63
RH
6353 else if (flags.is_or && rws_sum[regno].written_by_or)
6354 ;
89774469 6355 else
c65ebc55 6356 need_barrier = 1;
444a356a
JJ
6357 if (!in_safe_group_barrier)
6358 rws_update (regno, flags, pred);
c65ebc55
JW
6359 break;
6360
6361 case 2:
6362 /* The register has been unconditionally written already. We
6363 need a barrier. */
f2f90c63
RH
6364 if (flags.is_and && rws_sum[regno].written_by_and)
6365 ;
6366 else if (flags.is_or && rws_sum[regno].written_by_or)
6367 ;
6368 else
6369 need_barrier = 1;
444a356a
JJ
6370 if (!in_safe_group_barrier)
6371 {
6372 rws_sum[regno].written_by_and = flags.is_and;
6373 rws_sum[regno].written_by_or = flags.is_or;
6374 }
c65ebc55
JW
6375 break;
6376
6377 default:
e820471b 6378 gcc_unreachable ();
c65ebc55
JW
6379 }
6380 }
6381 else
6382 {
6383 if (flags.is_branch)
6384 {
6385 /* Branches have several RAW exceptions that allow to avoid
6386 barriers. */
6387
5527bf14 6388 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
6389 /* RAW dependencies on branch regs are permissible as long
6390 as the writer is a non-branch instruction. Since we
6391 never generate code that uses a branch register written
6392 by a branch instruction, handling this case is
6393 easy. */
5527bf14 6394 return 0;
c65ebc55
JW
6395
6396 if (REGNO_REG_CLASS (regno) == PR_REGS
6397 && ! rws_sum[regno].written_by_fp)
6398 /* The predicates of a branch are available within the
6399 same insn group as long as the predicate was written by
ed168e45 6400 something other than a floating-point instruction. */
c65ebc55
JW
6401 return 0;
6402 }
6403
f2f90c63
RH
6404 if (flags.is_and && rws_sum[regno].written_by_and)
6405 return 0;
6406 if (flags.is_or && rws_sum[regno].written_by_or)
6407 return 0;
6408
c65ebc55
JW
6409 switch (rws_sum[regno].write_count)
6410 {
6411 case 0:
6412 /* The register has not been written yet. */
6413 break;
6414
6415 case 1:
89774469
SE
6416 /* The register has been written via a predicate, assume we
6417 need a barrier (don't check for complementary regs). */
6418 need_barrier = 1;
c65ebc55
JW
6419 break;
6420
6421 case 2:
6422 /* The register has been unconditionally written already. We
6423 need a barrier. */
6424 need_barrier = 1;
6425 break;
6426
6427 default:
e820471b 6428 gcc_unreachable ();
c65ebc55
JW
6429 }
6430 }
6431
6432 return need_barrier;
6433}
6434
97e242b0 6435static int
9c808aad 6436rws_access_reg (rtx reg, struct reg_flags flags, int pred)
97e242b0
RH
6437{
6438 int regno = REGNO (reg);
462a99aa 6439 int n = REG_NREGS (reg);
97e242b0
RH
6440
6441 if (n == 1)
6442 return rws_access_regno (regno, flags, pred);
6443 else
6444 {
6445 int need_barrier = 0;
6446 while (--n >= 0)
6447 need_barrier |= rws_access_regno (regno + n, flags, pred);
6448 return need_barrier;
6449 }
6450}
6451
112333d3
BS
6452/* Examine X, which is a SET rtx, and update the flags, the predicate, and
6453 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6454
6455static void
c1bc6ca8 6456update_set_flags (rtx x, struct reg_flags *pflags)
112333d3
BS
6457{
6458 rtx src = SET_SRC (x);
6459
112333d3
BS
6460 switch (GET_CODE (src))
6461 {
6462 case CALL:
6463 return;
6464
6465 case IF_THEN_ELSE:
048d0d36 6466 /* There are four cases here:
c8d3810f
RH
6467 (1) The destination is (pc), in which case this is a branch,
6468 nothing here applies.
6469 (2) The destination is ar.lc, in which case this is a
6470 doloop_end_internal,
6471 (3) The destination is an fp register, in which case this is
6472 an fselect instruction.
048d0d36
MK
6473 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6474 this is a check load.
c8d3810f
RH
6475 In all cases, nothing we do in this function applies. */
6476 return;
112333d3
BS
6477
6478 default:
ec8e098d 6479 if (COMPARISON_P (src)
c8d3810f 6480 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
112333d3
BS
6481 /* Set pflags->is_fp to 1 so that we know we're dealing
6482 with a floating point comparison when processing the
6483 destination of the SET. */
6484 pflags->is_fp = 1;
6485
6486 /* Discover if this is a parallel comparison. We only handle
6487 and.orcm and or.andcm at present, since we must retain a
6488 strict inverse on the predicate pair. */
6489 else if (GET_CODE (src) == AND)
6490 pflags->is_and = 1;
6491 else if (GET_CODE (src) == IOR)
6492 pflags->is_or = 1;
6493
6494 break;
6495 }
6496}
6497
6498/* Subroutine of rtx_needs_barrier; this function determines whether the
6499 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6500 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6501 for this insn. */
9c808aad 6502
112333d3 6503static int
c1bc6ca8 6504set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
112333d3
BS
6505{
6506 int need_barrier = 0;
6507 rtx dst;
6508 rtx src = SET_SRC (x);
6509
6510 if (GET_CODE (src) == CALL)
6511 /* We don't need to worry about the result registers that
6512 get written by subroutine call. */
6513 return rtx_needs_barrier (src, flags, pred);
6514 else if (SET_DEST (x) == pc_rtx)
6515 {
6516 /* X is a conditional branch. */
6517 /* ??? This seems redundant, as the caller sets this bit for
6518 all JUMP_INSNs. */
048d0d36
MK
6519 if (!ia64_spec_check_src_p (src))
6520 flags.is_branch = 1;
112333d3
BS
6521 return rtx_needs_barrier (src, flags, pred);
6522 }
6523
048d0d36
MK
6524 if (ia64_spec_check_src_p (src))
6525 /* Avoid checking one register twice (in condition
6526 and in 'then' section) for ldc pattern. */
6527 {
6528 gcc_assert (REG_P (XEXP (src, 2)));
6529 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6530
6531 /* We process MEM below. */
6532 src = XEXP (src, 1);
6533 }
6534
6535 need_barrier |= rtx_needs_barrier (src, flags, pred);
112333d3 6536
112333d3
BS
6537 dst = SET_DEST (x);
6538 if (GET_CODE (dst) == ZERO_EXTRACT)
6539 {
6540 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6541 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
112333d3
BS
6542 }
6543 return need_barrier;
6544}
6545
b38ba463
ZW
6546/* Handle an access to rtx X of type FLAGS using predicate register
6547 PRED. Return 1 if this access creates a dependency with an earlier
6548 instruction in the same group. */
c65ebc55
JW
6549
6550static int
9c808aad 6551rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
c65ebc55
JW
6552{
6553 int i, j;
6554 int is_complemented = 0;
6555 int need_barrier = 0;
6556 const char *format_ptr;
6557 struct reg_flags new_flags;
c1bc6ca8 6558 rtx cond;
c65ebc55
JW
6559
6560 if (! x)
6561 return 0;
6562
6563 new_flags = flags;
6564
6565 switch (GET_CODE (x))
6566 {
9c808aad 6567 case SET:
c1bc6ca8
JW
6568 update_set_flags (x, &new_flags);
6569 need_barrier = set_src_needs_barrier (x, new_flags, pred);
112333d3 6570 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 6571 {
112333d3
BS
6572 new_flags.is_write = 1;
6573 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 6574 }
c65ebc55
JW
6575 break;
6576
6577 case CALL:
6578 new_flags.is_write = 0;
97e242b0 6579 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
6580
6581 /* Avoid multiple register writes, in case this is a pattern with
e820471b 6582 multiple CALL rtx. This avoids a failure in rws_access_reg. */
444a356a 6583 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
c65ebc55
JW
6584 {
6585 new_flags.is_write = 1;
97e242b0
RH
6586 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6587 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6588 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6589 }
6590 break;
6591
e5bde68a
RH
6592 case COND_EXEC:
6593 /* X is a predicated instruction. */
6594
6595 cond = COND_EXEC_TEST (x);
e820471b 6596 gcc_assert (!pred);
e5bde68a
RH
6597 need_barrier = rtx_needs_barrier (cond, flags, 0);
6598
6599 if (GET_CODE (cond) == EQ)
6600 is_complemented = 1;
6601 cond = XEXP (cond, 0);
e820471b 6602 gcc_assert (GET_CODE (cond) == REG
c1bc6ca8 6603 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
e5bde68a
RH
6604 pred = REGNO (cond);
6605 if (is_complemented)
6606 ++pred;
6607
6608 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6609 return need_barrier;
6610
c65ebc55 6611 case CLOBBER:
c65ebc55 6612 case USE:
c65ebc55
JW
6613 /* Clobber & use are for earlier compiler-phases only. */
6614 break;
6615
6616 case ASM_OPERANDS:
6617 case ASM_INPUT:
6618 /* We always emit stop bits for traditional asms. We emit stop bits
6619 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6620 if (GET_CODE (x) != ASM_OPERANDS
6621 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6622 {
6623 /* Avoid writing the register multiple times if we have multiple
e820471b 6624 asm outputs. This avoids a failure in rws_access_reg. */
444a356a 6625 if (! rws_insn_test (REG_VOLATILE))
c65ebc55
JW
6626 {
6627 new_flags.is_write = 1;
97e242b0 6628 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
6629 }
6630 return 1;
6631 }
6632
6633 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
1e5f1716 6634 We cannot just fall through here since then we would be confused
c65ebc55
JW
6635 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6636 traditional asms unlike their normal usage. */
6637
6638 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6639 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6640 need_barrier = 1;
6641 break;
6642
6643 case PARALLEL:
6644 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
6645 {
6646 rtx pat = XVECEXP (x, 0, i);
051d8245 6647 switch (GET_CODE (pat))
112333d3 6648 {
051d8245 6649 case SET:
c1bc6ca8
JW
6650 update_set_flags (pat, &new_flags);
6651 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
051d8245
RH
6652 break;
6653
6654 case USE:
6655 case CALL:
6656 case ASM_OPERANDS:
93671519 6657 case ASM_INPUT:
051d8245
RH
6658 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6659 break;
6660
6661 case CLOBBER:
628162ea
JJ
6662 if (REG_P (XEXP (pat, 0))
6663 && extract_asm_operands (x) != NULL_RTX
6664 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6665 {
6666 new_flags.is_write = 1;
6667 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6668 new_flags, pred);
6669 new_flags = flags;
6670 }
6671 break;
6672
051d8245
RH
6673 case RETURN:
6674 break;
6675
6676 default:
6677 gcc_unreachable ();
112333d3 6678 }
112333d3
BS
6679 }
6680 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6681 {
6682 rtx pat = XVECEXP (x, 0, i);
6683 if (GET_CODE (pat) == SET)
6684 {
6685 if (GET_CODE (SET_SRC (pat)) != CALL)
6686 {
6687 new_flags.is_write = 1;
6688 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6689 pred);
6690 }
6691 }
339cb12e 6692 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
6693 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6694 }
c65ebc55
JW
6695 break;
6696
6697 case SUBREG:
077bc924
JM
6698 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6699 break;
c65ebc55 6700 case REG:
870f9ec0
RH
6701 if (REGNO (x) == AR_UNAT_REGNUM)
6702 {
6703 for (i = 0; i < 64; ++i)
6704 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6705 }
6706 else
6707 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
6708 break;
6709
6710 case MEM:
6711 /* Find the regs used in memory address computation. */
6712 new_flags.is_write = 0;
6713 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6714 break;
6715
051d8245 6716 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
c65ebc55
JW
6717 case SYMBOL_REF: case LABEL_REF: case CONST:
6718 break;
6719
6720 /* Operators with side-effects. */
6721 case POST_INC: case POST_DEC:
e820471b 6722 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
c65ebc55
JW
6723
6724 new_flags.is_write = 0;
97e242b0 6725 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 6726 new_flags.is_write = 1;
97e242b0 6727 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6728 break;
6729
6730 case POST_MODIFY:
e820471b 6731 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
4b983fdc
RH
6732
6733 new_flags.is_write = 0;
97e242b0 6734 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6735 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6736 new_flags.is_write = 1;
97e242b0 6737 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
6738 break;
6739
6740 /* Handle common unary and binary ops for efficiency. */
6741 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6742 case MOD: case UDIV: case UMOD: case AND: case IOR:
6743 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6744 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6745 case NE: case EQ: case GE: case GT: case LE:
6746 case LT: case GEU: case GTU: case LEU: case LTU:
6747 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6748 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6749 break;
6750
6751 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6752 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6753 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 6754 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
6755 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6756 break;
6757
051d8245
RH
6758 case VEC_SELECT:
6759 /* VEC_SELECT's second argument is a PARALLEL with integers that
6760 describe the elements selected. On ia64, those integers are
6761 always constants. Avoid walking the PARALLEL so that we don't
e820471b 6762 get confused with "normal" parallels and then die. */
051d8245
RH
6763 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6764 break;
6765
c65ebc55
JW
6766 case UNSPEC:
6767 switch (XINT (x, 1))
6768 {
7b6e506e
RH
6769 case UNSPEC_LTOFF_DTPMOD:
6770 case UNSPEC_LTOFF_DTPREL:
6771 case UNSPEC_DTPREL:
6772 case UNSPEC_LTOFF_TPREL:
6773 case UNSPEC_TPREL:
6774 case UNSPEC_PRED_REL_MUTEX:
6775 case UNSPEC_PIC_CALL:
6776 case UNSPEC_MF:
6777 case UNSPEC_FETCHADD_ACQ:
28875d67 6778 case UNSPEC_FETCHADD_REL:
7b6e506e
RH
6779 case UNSPEC_BSP_VALUE:
6780 case UNSPEC_FLUSHRS:
6781 case UNSPEC_BUNDLE_SELECTOR:
6782 break;
6783
086c0f96
RH
6784 case UNSPEC_GR_SPILL:
6785 case UNSPEC_GR_RESTORE:
870f9ec0
RH
6786 {
6787 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6788 HOST_WIDE_INT bit = (offset >> 3) & 63;
6789
6790 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
83338d15 6791 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
870f9ec0
RH
6792 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6793 new_flags, pred);
6794 break;
6795 }
9c808aad 6796
086c0f96
RH
6797 case UNSPEC_FR_SPILL:
6798 case UNSPEC_FR_RESTORE:
c407570a 6799 case UNSPEC_GETF_EXP:
b38ba463 6800 case UNSPEC_SETF_EXP:
086c0f96 6801 case UNSPEC_ADDP4:
b38ba463 6802 case UNSPEC_FR_SQRT_RECIP_APPROX:
07acc7b3 6803 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
048d0d36
MK
6804 case UNSPEC_LDA:
6805 case UNSPEC_LDS:
388092d5 6806 case UNSPEC_LDS_A:
048d0d36
MK
6807 case UNSPEC_LDSA:
6808 case UNSPEC_CHKACLR:
6809 case UNSPEC_CHKS:
6dd12198
SE
6810 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6811 break;
6812
086c0f96 6813 case UNSPEC_FR_RECIP_APPROX:
f526a3c8 6814 case UNSPEC_SHRP:
046625fa 6815 case UNSPEC_COPYSIGN:
1def9c3f 6816 case UNSPEC_FR_RECIP_APPROX_RES:
655f2eb9
RH
6817 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6818 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6819 break;
6820
086c0f96 6821 case UNSPEC_CMPXCHG_ACQ:
28875d67 6822 case UNSPEC_CMPXCHG_REL:
0551c32d
RH
6823 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6824 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6825 break;
6826
c65ebc55 6827 default:
e820471b 6828 gcc_unreachable ();
c65ebc55
JW
6829 }
6830 break;
6831
6832 case UNSPEC_VOLATILE:
6833 switch (XINT (x, 1))
6834 {
086c0f96 6835 case UNSPECV_ALLOC:
25250265
JW
6836 /* Alloc must always be the first instruction of a group.
6837 We force this by always returning true. */
6838 /* ??? We might get better scheduling if we explicitly check for
6839 input/local/output register dependencies, and modify the
6840 scheduler so that alloc is always reordered to the start of
6841 the current group. We could then eliminate all of the
6842 first_instruction code. */
6843 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6844
6845 new_flags.is_write = 1;
25250265
JW
6846 rws_access_regno (REG_AR_CFM, new_flags, pred);
6847 return 1;
c65ebc55 6848
086c0f96 6849 case UNSPECV_SET_BSP:
7b84aac0 6850 case UNSPECV_PROBE_STACK_RANGE:
3b572406
RH
6851 need_barrier = 1;
6852 break;
6853
086c0f96
RH
6854 case UNSPECV_BLOCKAGE:
6855 case UNSPECV_INSN_GROUP_BARRIER:
6856 case UNSPECV_BREAK:
6857 case UNSPECV_PSAC_ALL:
6858 case UNSPECV_PSAC_NORMAL:
3b572406 6859 return 0;
0c96007e 6860
7b84aac0
EB
6861 case UNSPECV_PROBE_STACK_ADDRESS:
6862 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6863 break;
6864
c65ebc55 6865 default:
e820471b 6866 gcc_unreachable ();
c65ebc55
JW
6867 }
6868 break;
6869
6870 case RETURN:
6871 new_flags.is_write = 0;
97e242b0
RH
6872 need_barrier = rws_access_regno (REG_RP, flags, pred);
6873 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6874
6875 new_flags.is_write = 1;
97e242b0
RH
6876 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6877 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6878 break;
6879
6880 default:
6881 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6882 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6883 switch (format_ptr[i])
6884 {
6885 case '0': /* unused field */
6886 case 'i': /* integer */
6887 case 'n': /* note */
6888 case 'w': /* wide integer */
6889 case 's': /* pointer to string */
6890 case 'S': /* optional pointer to string */
6891 break;
6892
6893 case 'e':
6894 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6895 need_barrier = 1;
6896 break;
6897
6898 case 'E':
6899 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6900 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6901 need_barrier = 1;
6902 break;
6903
6904 default:
e820471b 6905 gcc_unreachable ();
c65ebc55 6906 }
2ed4af6f 6907 break;
c65ebc55
JW
6908 }
6909 return need_barrier;
6910}
6911
c1bc6ca8 6912/* Clear out the state for group_barrier_needed at the start of a
2130b7fb
BS
6913 sequence of insns. */
6914
6915static void
9c808aad 6916init_insn_group_barriers (void)
2130b7fb
BS
6917{
6918 memset (rws_sum, 0, sizeof (rws_sum));
25250265 6919 first_instruction = 1;
2130b7fb
BS
6920}
6921
c1bc6ca8
JW
6922/* Given the current state, determine whether a group barrier (a stop bit) is
6923 necessary before INSN. Return nonzero if so. This modifies the state to
6924 include the effects of INSN as a side-effect. */
2130b7fb
BS
6925
6926static int
647d790d 6927group_barrier_needed (rtx_insn *insn)
2130b7fb
BS
6928{
6929 rtx pat;
6930 int need_barrier = 0;
6931 struct reg_flags flags;
6932
6933 memset (&flags, 0, sizeof (flags));
6934 switch (GET_CODE (insn))
6935 {
6936 case NOTE:
b5b8b0ac 6937 case DEBUG_INSN:
2130b7fb
BS
6938 break;
6939
6940 case BARRIER:
6941 /* A barrier doesn't imply an instruction group boundary. */
6942 break;
6943
6944 case CODE_LABEL:
6945 memset (rws_insn, 0, sizeof (rws_insn));
6946 return 1;
6947
6948 case CALL_INSN:
6949 flags.is_branch = 1;
6950 flags.is_sibcall = SIBLING_CALL_P (insn);
6951 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
6952
6953 /* Don't bundle a call following another call. */
b64925dc 6954 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
f12f25a7
RH
6955 {
6956 need_barrier = 1;
6957 break;
6958 }
6959
2130b7fb
BS
6960 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6961 break;
6962
6963 case JUMP_INSN:
048d0d36
MK
6964 if (!ia64_spec_check_p (insn))
6965 flags.is_branch = 1;
f12f25a7
RH
6966
6967 /* Don't bundle a jump following a call. */
b64925dc 6968 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
f12f25a7
RH
6969 {
6970 need_barrier = 1;
6971 break;
6972 }
5efb1046 6973 /* FALLTHRU */
2130b7fb
BS
6974
6975 case INSN:
6976 if (GET_CODE (PATTERN (insn)) == USE
6977 || GET_CODE (PATTERN (insn)) == CLOBBER)
6978 /* Don't care about USE and CLOBBER "insns"---those are used to
6979 indicate to the optimizer that it shouldn't get rid of
6980 certain operations. */
6981 break;
6982
6983 pat = PATTERN (insn);
6984
6985 /* Ug. Hack hacks hacked elsewhere. */
6986 switch (recog_memoized (insn))
6987 {
6988 /* We play dependency tricks with the epilogue in order
6989 to get proper schedules. Undo this for dv analysis. */
6990 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 6991 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
6992 pat = XVECEXP (pat, 0, 0);
6993 break;
6994
6995 /* The pattern we use for br.cloop confuses the code above.
6996 The second element of the vector is representative. */
6997 case CODE_FOR_doloop_end_internal:
6998 pat = XVECEXP (pat, 0, 1);
6999 break;
7000
7001 /* Doesn't generate code. */
7002 case CODE_FOR_pred_rel_mutex:
d0e82870 7003 case CODE_FOR_prologue_use:
2130b7fb
BS
7004 return 0;
7005
7006 default:
7007 break;
7008 }
7009
7010 memset (rws_insn, 0, sizeof (rws_insn));
7011 need_barrier = rtx_needs_barrier (pat, flags, 0);
7012
7013 /* Check to see if the previous instruction was a volatile
7014 asm. */
7015 if (! need_barrier)
7016 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
388092d5 7017
2130b7fb
BS
7018 break;
7019
7020 default:
e820471b 7021 gcc_unreachable ();
2130b7fb 7022 }
25250265 7023
7b84aac0 7024 if (first_instruction && important_for_bundling_p (insn))
25250265
JW
7025 {
7026 need_barrier = 0;
7027 first_instruction = 0;
7028 }
7029
2130b7fb
BS
7030 return need_barrier;
7031}
7032
c1bc6ca8 7033/* Like group_barrier_needed, but do not clobber the current state. */
2130b7fb
BS
7034
7035static int
647d790d 7036safe_group_barrier_needed (rtx_insn *insn)
2130b7fb 7037{
25250265 7038 int saved_first_instruction;
2130b7fb 7039 int t;
25250265 7040
25250265 7041 saved_first_instruction = first_instruction;
444a356a 7042 in_safe_group_barrier = 1;
25250265 7043
c1bc6ca8 7044 t = group_barrier_needed (insn);
25250265 7045
25250265 7046 first_instruction = saved_first_instruction;
444a356a 7047 in_safe_group_barrier = 0;
25250265 7048
2130b7fb
BS
7049 return t;
7050}
7051
18dbd950
RS
7052/* Scan the current function and insert stop bits as necessary to
7053 eliminate dependencies. This function assumes that a final
7054 instruction scheduling pass has been run which has already
7055 inserted most of the necessary stop bits. This function only
7056 inserts new ones at basic block boundaries, since these are
7057 invisible to the scheduler. */
2130b7fb
BS
7058
7059static void
9c808aad 7060emit_insn_group_barriers (FILE *dump)
2130b7fb 7061{
dd3d2b35
DM
7062 rtx_insn *insn;
7063 rtx_insn *last_label = 0;
2130b7fb
BS
7064 int insns_since_last_label = 0;
7065
7066 init_insn_group_barriers ();
7067
18dbd950 7068 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb 7069 {
b64925dc 7070 if (LABEL_P (insn))
2130b7fb
BS
7071 {
7072 if (insns_since_last_label)
7073 last_label = insn;
7074 insns_since_last_label = 0;
7075 }
b64925dc 7076 else if (NOTE_P (insn)
a38e7aa5 7077 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
2130b7fb
BS
7078 {
7079 if (insns_since_last_label)
7080 last_label = insn;
7081 insns_since_last_label = 0;
7082 }
b64925dc 7083 else if (NONJUMP_INSN_P (insn)
2130b7fb 7084 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 7085 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
7086 {
7087 init_insn_group_barriers ();
7088 last_label = 0;
7089 }
b5b8b0ac 7090 else if (NONDEBUG_INSN_P (insn))
2130b7fb
BS
7091 {
7092 insns_since_last_label = 1;
7093
c1bc6ca8 7094 if (group_barrier_needed (insn))
2130b7fb
BS
7095 {
7096 if (last_label)
7097 {
7098 if (dump)
7099 fprintf (dump, "Emitting stop before label %d\n",
7100 INSN_UID (last_label));
7101 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7102 insn = last_label;
112333d3
BS
7103
7104 init_insn_group_barriers ();
7105 last_label = 0;
2130b7fb 7106 }
2130b7fb
BS
7107 }
7108 }
7109 }
7110}
f4d578da
BS
7111
7112/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7113 This function has to emit all necessary group barriers. */
7114
7115static void
9c808aad 7116emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
f4d578da 7117{
dd3d2b35 7118 rtx_insn *insn;
f4d578da
BS
7119
7120 init_insn_group_barriers ();
7121
18dbd950 7122 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 7123 {
b64925dc 7124 if (BARRIER_P (insn))
bd7b9a0f 7125 {
dd3d2b35 7126 rtx_insn *last = prev_active_insn (insn);
bd7b9a0f
RH
7127
7128 if (! last)
7129 continue;
34f0d87a 7130 if (JUMP_TABLE_DATA_P (last))
bd7b9a0f
RH
7131 last = prev_active_insn (last);
7132 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7133 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7134
7135 init_insn_group_barriers ();
7136 }
b5b8b0ac 7137 else if (NONDEBUG_INSN_P (insn))
f4d578da 7138 {
bd7b9a0f
RH
7139 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7140 init_insn_group_barriers ();
c1bc6ca8 7141 else if (group_barrier_needed (insn))
f4d578da
BS
7142 {
7143 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7144 init_insn_group_barriers ();
c1bc6ca8 7145 group_barrier_needed (insn);
f4d578da
BS
7146 }
7147 }
7148 }
7149}
30028c85 7150
2130b7fb 7151\f
2130b7fb 7152
30028c85 7153/* Instruction scheduling support. */
2130b7fb
BS
7154
7155#define NR_BUNDLES 10
7156
30028c85 7157/* A list of names of all available bundles. */
2130b7fb 7158
30028c85 7159static const char *bundle_name [NR_BUNDLES] =
2130b7fb 7160{
30028c85
VM
7161 ".mii",
7162 ".mmi",
7163 ".mfi",
7164 ".mmf",
2130b7fb 7165#if NR_BUNDLES == 10
30028c85
VM
7166 ".bbb",
7167 ".mbb",
2130b7fb 7168#endif
30028c85
VM
7169 ".mib",
7170 ".mmb",
7171 ".mfb",
7172 ".mlx"
2130b7fb
BS
7173};
7174
30028c85 7175/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 7176
30028c85 7177int ia64_final_schedule = 0;
2130b7fb 7178
35fd3193 7179/* Codes of the corresponding queried units: */
2130b7fb 7180
30028c85
VM
7181static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7182static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 7183
30028c85
VM
7184static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7185static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 7186
30028c85
VM
7187static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7188
7189/* The following variable value is an insn group barrier. */
7190
dd3d2b35 7191static rtx_insn *dfa_stop_insn;
30028c85
VM
7192
7193/* The following variable value is the last issued insn. */
7194
b32d5189 7195static rtx_insn *last_scheduled_insn;
30028c85 7196
30028c85
VM
7197/* The following variable value is pointer to a DFA state used as
7198 temporary variable. */
7199
7200static state_t temp_dfa_state = NULL;
7201
7202/* The following variable value is DFA state after issuing the last
7203 insn. */
7204
7205static state_t prev_cycle_state = NULL;
7206
7207/* The following array element values are TRUE if the corresponding
9e4f94de 7208 insn requires to add stop bits before it. */
30028c85 7209
048d0d36
MK
7210static char *stops_p = NULL;
7211
30028c85
VM
7212/* The following variable is used to set up the mentioned above array. */
7213
7214static int stop_before_p = 0;
7215
7216/* The following variable value is length of the arrays `clocks' and
7217 `add_cycles'. */
7218
7219static int clocks_length;
7220
048d0d36
MK
7221/* The following variable value is number of data speculations in progress. */
7222static int pending_data_specs = 0;
7223
388092d5
AB
7224/* Number of memory references on current and three future processor cycles. */
7225static char mem_ops_in_group[4];
7226
7227/* Number of current processor cycle (from scheduler's point of view). */
7228static int current_cycle;
7229
647d790d 7230static rtx ia64_single_set (rtx_insn *);
017fdefe 7231static void ia64_emit_insn_before (rtx, rtx_insn *);
2130b7fb
BS
7232
7233/* Map a bundle number to its pseudo-op. */
7234
7235const char *
9c808aad 7236get_bundle_name (int b)
2130b7fb 7237{
30028c85 7238 return bundle_name[b];
2130b7fb
BS
7239}
7240
2130b7fb
BS
7241
7242/* Return the maximum number of instructions a cpu can issue. */
7243
c237e94a 7244static int
9c808aad 7245ia64_issue_rate (void)
2130b7fb
BS
7246{
7247 return 6;
7248}
7249
7250/* Helper function - like single_set, but look inside COND_EXEC. */
7251
7252static rtx
647d790d 7253ia64_single_set (rtx_insn *insn)
2130b7fb 7254{
30fa7e33 7255 rtx x = PATTERN (insn), ret;
2130b7fb
BS
7256 if (GET_CODE (x) == COND_EXEC)
7257 x = COND_EXEC_CODE (x);
7258 if (GET_CODE (x) == SET)
7259 return x;
bdbe5b8d
RH
7260
7261 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7262 Although they are not classical single set, the second set is there just
7263 to protect it from moving past FP-relative stack accesses. */
7264 switch (recog_memoized (insn))
30fa7e33 7265 {
bdbe5b8d 7266 case CODE_FOR_prologue_allocate_stack:
9eb8c09f 7267 case CODE_FOR_prologue_allocate_stack_pr:
bdbe5b8d 7268 case CODE_FOR_epilogue_deallocate_stack:
9eb8c09f 7269 case CODE_FOR_epilogue_deallocate_stack_pr:
bdbe5b8d
RH
7270 ret = XVECEXP (x, 0, 0);
7271 break;
7272
7273 default:
7274 ret = single_set_2 (insn, x);
7275 break;
30fa7e33 7276 }
bdbe5b8d 7277
30fa7e33 7278 return ret;
2130b7fb
BS
7279}
7280
388092d5
AB
7281/* Adjust the cost of a scheduling dependency.
7282 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7283 COST is the current cost, DW is dependency weakness. */
c237e94a 7284static int
b505225b
TS
7285ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7286 int cost, dw_t dw)
2130b7fb 7287{
388092d5 7288 enum reg_note dep_type = (enum reg_note) dep_type1;
2130b7fb
BS
7289 enum attr_itanium_class dep_class;
7290 enum attr_itanium_class insn_class;
2130b7fb 7291
2130b7fb 7292 insn_class = ia64_safe_itanium_class (insn);
30028c85 7293 dep_class = ia64_safe_itanium_class (dep_insn);
388092d5
AB
7294
7295 /* Treat true memory dependencies separately. Ignore apparent true
7296 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7297 if (dep_type == REG_DEP_TRUE
7298 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7299 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7300 return 0;
7301
7302 if (dw == MIN_DEP_WEAK)
7303 /* Store and load are likely to alias, use higher cost to avoid stall. */
7304 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7305 else if (dw > MIN_DEP_WEAK)
7306 {
7307 /* Store and load are less likely to alias. */
7308 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7309 /* Assume there will be no cache conflict for floating-point data.
7310 For integer data, L1 conflict penalty is huge (17 cycles), so we
7311 never assume it will not cause a conflict. */
7312 return 0;
7313 else
7314 return cost;
7315 }
7316
7317 if (dep_type != REG_DEP_OUTPUT)
7318 return cost;
7319
30028c85
VM
7320 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7321 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
7322 return 0;
7323
2130b7fb
BS
7324 return cost;
7325}
7326
14d118d6
DM
7327/* Like emit_insn_before, but skip cycle_display notes.
7328 ??? When cycle display notes are implemented, update this. */
7329
7330static void
d8485bdb 7331ia64_emit_insn_before (rtx insn, rtx_insn *before)
14d118d6
DM
7332{
7333 emit_insn_before (insn, before);
7334}
7335
30028c85
VM
7336/* The following function marks insns who produce addresses for load
7337 and store insns. Such insns will be placed into M slots because it
7338 decrease latency time for Itanium1 (see function
7339 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
7340
7341static void
ce1ce33a 7342ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
2130b7fb 7343{
ce1ce33a 7344 rtx_insn *insn, *next, *next_tail;
9c808aad 7345
f12b785d
RH
7346 /* Before reload, which_alternative is not set, which means that
7347 ia64_safe_itanium_class will produce wrong results for (at least)
7348 move instructions. */
7349 if (!reload_completed)
7350 return;
7351
30028c85
VM
7352 next_tail = NEXT_INSN (tail);
7353 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7354 if (INSN_P (insn))
7355 insn->call = 0;
7356 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7357 if (INSN_P (insn)
7358 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7359 {
e2f6ff94
MK
7360 sd_iterator_def sd_it;
7361 dep_t dep;
7362 bool has_mem_op_consumer_p = false;
b198261f 7363
e2f6ff94 7364 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
30028c85 7365 {
a71aef0b
JB
7366 enum attr_itanium_class c;
7367
e2f6ff94 7368 if (DEP_TYPE (dep) != REG_DEP_TRUE)
f12b785d 7369 continue;
b198261f 7370
e2f6ff94 7371 next = DEP_CON (dep);
a71aef0b
JB
7372 c = ia64_safe_itanium_class (next);
7373 if ((c == ITANIUM_CLASS_ST
7374 || c == ITANIUM_CLASS_STF)
30028c85 7375 && ia64_st_address_bypass_p (insn, next))
e2f6ff94
MK
7376 {
7377 has_mem_op_consumer_p = true;
7378 break;
7379 }
a71aef0b
JB
7380 else if ((c == ITANIUM_CLASS_LD
7381 || c == ITANIUM_CLASS_FLD
7382 || c == ITANIUM_CLASS_FLDP)
30028c85 7383 && ia64_ld_address_bypass_p (insn, next))
e2f6ff94
MK
7384 {
7385 has_mem_op_consumer_p = true;
7386 break;
7387 }
30028c85 7388 }
e2f6ff94
MK
7389
7390 insn->call = has_mem_op_consumer_p;
30028c85
VM
7391 }
7392}
2130b7fb 7393
30028c85 7394/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 7395
30028c85 7396static void
9c808aad
AJ
7397ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7398 int sched_verbose ATTRIBUTE_UNUSED,
7399 int max_ready ATTRIBUTE_UNUSED)
30028c85 7400{
e28c2052
MM
7401 if (flag_checking && !sel_sched_p () && reload_completed)
7402 {
7403 for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7404 insn != current_sched_info->next_tail;
7405 insn = NEXT_INSN (insn))
7406 gcc_assert (!SCHED_GROUP_P (insn));
7407 }
b32d5189 7408 last_scheduled_insn = NULL;
30028c85 7409 init_insn_group_barriers ();
388092d5
AB
7410
7411 current_cycle = 0;
7412 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
2130b7fb
BS
7413}
7414
048d0d36
MK
7415/* We're beginning a scheduling pass. Check assertion. */
7416
7417static void
7418ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7419 int sched_verbose ATTRIBUTE_UNUSED,
7420 int max_ready ATTRIBUTE_UNUSED)
7421{
388092d5 7422 gcc_assert (pending_data_specs == 0);
048d0d36
MK
7423}
7424
7425/* Scheduling pass is now finished. Free/reset static variable. */
7426static void
7427ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7428 int sched_verbose ATTRIBUTE_UNUSED)
7429{
388092d5
AB
7430 gcc_assert (pending_data_specs == 0);
7431}
7432
7433/* Return TRUE if INSN is a load (either normal or speculative, but not a
7434 speculation check), FALSE otherwise. */
7435static bool
647d790d 7436is_load_p (rtx_insn *insn)
388092d5
AB
7437{
7438 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7439
7440 return
7441 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7442 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7443}
7444
7445/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7446 (taking account for 3-cycle cache reference postponing for stores: Intel
7447 Itanium 2 Reference Manual for Software Development and Optimization,
7448 6.7.3.1). */
7449static void
647d790d 7450record_memory_reference (rtx_insn *insn)
388092d5
AB
7451{
7452 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7453
7454 switch (insn_class) {
7455 case ITANIUM_CLASS_FLD:
7456 case ITANIUM_CLASS_LD:
7457 mem_ops_in_group[current_cycle % 4]++;
7458 break;
7459 case ITANIUM_CLASS_STF:
7460 case ITANIUM_CLASS_ST:
7461 mem_ops_in_group[(current_cycle + 3) % 4]++;
7462 break;
7463 default:;
7464 }
048d0d36
MK
7465}
7466
30028c85
VM
7467/* We are about to being issuing insns for this clock cycle.
7468 Override the default sort algorithm to better slot instructions. */
2130b7fb 7469
30028c85 7470static int
ce1ce33a 7471ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
388092d5 7472 int *pn_ready, int clock_var,
9c808aad 7473 int reorder_type)
2130b7fb 7474{
30028c85
VM
7475 int n_asms;
7476 int n_ready = *pn_ready;
ce1ce33a
DM
7477 rtx_insn **e_ready = ready + n_ready;
7478 rtx_insn **insnp;
2130b7fb 7479
30028c85
VM
7480 if (sched_verbose)
7481 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 7482
30028c85 7483 if (reorder_type == 0)
2130b7fb 7484 {
30028c85
VM
7485 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7486 n_asms = 0;
7487 for (insnp = ready; insnp < e_ready; insnp++)
7488 if (insnp < e_ready)
7489 {
ce1ce33a 7490 rtx_insn *insn = *insnp;
30028c85
VM
7491 enum attr_type t = ia64_safe_type (insn);
7492 if (t == TYPE_UNKNOWN)
7493 {
7494 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7495 || asm_noperands (PATTERN (insn)) >= 0)
7496 {
ce1ce33a 7497 rtx_insn *lowest = ready[n_asms];
30028c85
VM
7498 ready[n_asms] = insn;
7499 *insnp = lowest;
7500 n_asms++;
7501 }
7502 else
7503 {
ce1ce33a 7504 rtx_insn *highest = ready[n_ready - 1];
30028c85
VM
7505 ready[n_ready - 1] = insn;
7506 *insnp = highest;
7507 return 1;
7508 }
7509 }
7510 }
98d2b17e 7511
30028c85 7512 if (n_asms < n_ready)
98d2b17e 7513 {
30028c85
VM
7514 /* Some normal insns to process. Skip the asms. */
7515 ready += n_asms;
7516 n_ready -= n_asms;
98d2b17e 7517 }
30028c85
VM
7518 else if (n_ready > 0)
7519 return 1;
2130b7fb
BS
7520 }
7521
30028c85 7522 if (ia64_final_schedule)
2130b7fb 7523 {
30028c85
VM
7524 int deleted = 0;
7525 int nr_need_stop = 0;
7526
7527 for (insnp = ready; insnp < e_ready; insnp++)
c1bc6ca8 7528 if (safe_group_barrier_needed (*insnp))
30028c85 7529 nr_need_stop++;
9c808aad 7530
30028c85
VM
7531 if (reorder_type == 1 && n_ready == nr_need_stop)
7532 return 0;
7533 if (reorder_type == 0)
7534 return 1;
7535 insnp = e_ready;
7536 /* Move down everything that needs a stop bit, preserving
7537 relative order. */
7538 while (insnp-- > ready + deleted)
7539 while (insnp >= ready + deleted)
7540 {
ce1ce33a 7541 rtx_insn *insn = *insnp;
c1bc6ca8 7542 if (! safe_group_barrier_needed (insn))
30028c85
VM
7543 break;
7544 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7545 *ready = insn;
7546 deleted++;
7547 }
7548 n_ready -= deleted;
7549 ready += deleted;
2130b7fb 7550 }
2130b7fb 7551
388092d5
AB
7552 current_cycle = clock_var;
7553 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7554 {
7555 int moved = 0;
7556
7557 insnp = e_ready;
7558 /* Move down loads/stores, preserving relative order. */
7559 while (insnp-- > ready + moved)
7560 while (insnp >= ready + moved)
7561 {
ce1ce33a 7562 rtx_insn *insn = *insnp;
388092d5
AB
7563 if (! is_load_p (insn))
7564 break;
7565 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7566 *ready = insn;
7567 moved++;
7568 }
7569 n_ready -= moved;
7570 ready += moved;
7571 }
7572
30028c85 7573 return 1;
2130b7fb 7574}
6b6c1201 7575
30028c85
VM
7576/* We are about to being issuing insns for this clock cycle. Override
7577 the default sort algorithm to better slot instructions. */
c65ebc55 7578
30028c85 7579static int
ce1ce33a
DM
7580ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7581 int *pn_ready, int clock_var)
2130b7fb 7582{
30028c85
VM
7583 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7584 pn_ready, clock_var, 0);
2130b7fb
BS
7585}
7586
30028c85
VM
7587/* Like ia64_sched_reorder, but called after issuing each insn.
7588 Override the default sort algorithm to better slot instructions. */
2130b7fb 7589
30028c85 7590static int
9c808aad 7591ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
ce1ce33a 7592 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
9c808aad 7593 int *pn_ready, int clock_var)
30028c85 7594{
30028c85
VM
7595 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7596 clock_var, 1);
2130b7fb
BS
7597}
7598
30028c85
VM
7599/* We are about to issue INSN. Return the number of insns left on the
7600 ready queue that can be issued this cycle. */
2130b7fb 7601
30028c85 7602static int
9c808aad
AJ
7603ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7604 int sched_verbose ATTRIBUTE_UNUSED,
ac44248e 7605 rtx_insn *insn,
9c808aad 7606 int can_issue_more ATTRIBUTE_UNUSED)
2130b7fb 7607{
388092d5 7608 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
048d0d36 7609 /* Modulo scheduling does not extend h_i_d when emitting
388092d5 7610 new instructions. Don't use h_i_d, if we don't have to. */
048d0d36
MK
7611 {
7612 if (DONE_SPEC (insn) & BEGIN_DATA)
7613 pending_data_specs++;
7614 if (CHECK_SPEC (insn) & BEGIN_DATA)
7615 pending_data_specs--;
7616 }
7617
b5b8b0ac
AO
7618 if (DEBUG_INSN_P (insn))
7619 return 1;
7620
30028c85
VM
7621 last_scheduled_insn = insn;
7622 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7623 if (reload_completed)
2130b7fb 7624 {
c1bc6ca8 7625 int needed = group_barrier_needed (insn);
e820471b
NS
7626
7627 gcc_assert (!needed);
b64925dc 7628 if (CALL_P (insn))
30028c85
VM
7629 init_insn_group_barriers ();
7630 stops_p [INSN_UID (insn)] = stop_before_p;
7631 stop_before_p = 0;
388092d5
AB
7632
7633 record_memory_reference (insn);
2130b7fb 7634 }
30028c85
VM
7635 return 1;
7636}
c65ebc55 7637
4960a0cb 7638/* We are choosing insn from the ready queue. Return zero if INSN
30028c85 7639 can be chosen. */
c65ebc55 7640
30028c85 7641static int
ac44248e 7642ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30028c85 7643{
388092d5 7644 gcc_assert (insn && INSN_P (insn));
048d0d36 7645
4960a0cb
MK
7646 /* Size of ALAT is 32. As far as we perform conservative
7647 data speculation, we keep ALAT half-empty. */
31815ed7 7648 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
4960a0cb 7649 return ready_index == 0 ? -1 : 1;
048d0d36 7650
4960a0cb
MK
7651 if (ready_index == 0)
7652 return 0;
7653
7654 if ((!reload_completed
7655 || !safe_group_barrier_needed (insn))
7656 && (!mflag_sched_mem_insns_hard_limit
7657 || !is_load_p (insn)
7658 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7659 return 0;
676cad4d
MK
7660
7661 return 1;
2130b7fb
BS
7662}
7663
30028c85
VM
7664/* The following variable value is pseudo-insn used by the DFA insn
7665 scheduler to change the DFA state when the simulated clock is
7666 increased. */
2130b7fb 7667
dd3d2b35 7668static rtx_insn *dfa_pre_cycle_insn;
2130b7fb 7669
388092d5
AB
7670/* Returns 1 when a meaningful insn was scheduled between the last group
7671 barrier and LAST. */
7672static int
b32d5189 7673scheduled_good_insn (rtx_insn *last)
388092d5
AB
7674{
7675 if (last && recog_memoized (last) >= 0)
7676 return 1;
7677
7678 for ( ;
7679 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7680 && !stops_p[INSN_UID (last)];
7681 last = PREV_INSN (last))
7682 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7683 the ebb we're scheduling. */
7684 if (INSN_P (last) && recog_memoized (last) >= 0)
7685 return 1;
7686
7687 return 0;
7688}
7689
1e5f1716 7690/* We are about to being issuing INSN. Return nonzero if we cannot
30028c85
VM
7691 issue it on given cycle CLOCK and return zero if we should not sort
7692 the ready queue on the next clock start. */
2130b7fb
BS
7693
7694static int
ac44248e 7695ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
9c808aad 7696 int clock, int *sort_p)
2130b7fb 7697{
e820471b 7698 gcc_assert (insn && INSN_P (insn));
b5b8b0ac
AO
7699
7700 if (DEBUG_INSN_P (insn))
7701 return 0;
7702
388092d5
AB
7703 /* When a group barrier is needed for insn, last_scheduled_insn
7704 should be set. */
7705 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7706 || last_scheduled_insn);
7707
7708 if ((reload_completed
7709 && (safe_group_barrier_needed (insn)
7710 || (mflag_sched_stop_bits_after_every_cycle
7711 && last_clock != clock
7712 && last_scheduled_insn
7713 && scheduled_good_insn (last_scheduled_insn))))
30028c85 7714 || (last_scheduled_insn
b64925dc 7715 && (CALL_P (last_scheduled_insn)
7b84aac0 7716 || unknown_for_bundling_p (last_scheduled_insn))))
2130b7fb 7717 {
30028c85 7718 init_insn_group_barriers ();
388092d5 7719
30028c85
VM
7720 if (verbose && dump)
7721 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7722 last_clock == clock ? " + cycle advance" : "");
388092d5 7723
30028c85 7724 stop_before_p = 1;
388092d5
AB
7725 current_cycle = clock;
7726 mem_ops_in_group[current_cycle % 4] = 0;
7727
30028c85 7728 if (last_clock == clock)
2130b7fb 7729 {
30028c85
VM
7730 state_transition (curr_state, dfa_stop_insn);
7731 if (TARGET_EARLY_STOP_BITS)
7732 *sort_p = (last_scheduled_insn == NULL_RTX
b64925dc 7733 || ! CALL_P (last_scheduled_insn));
30028c85
VM
7734 else
7735 *sort_p = 0;
7736 return 1;
7737 }
388092d5
AB
7738
7739 if (last_scheduled_insn)
25069b42 7740 {
7b84aac0 7741 if (unknown_for_bundling_p (last_scheduled_insn))
388092d5
AB
7742 state_reset (curr_state);
7743 else
7744 {
7745 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7746 state_transition (curr_state, dfa_stop_insn);
7747 state_transition (curr_state, dfa_pre_cycle_insn);
7748 state_transition (curr_state, NULL);
7749 }
25069b42 7750 }
30028c85 7751 }
30028c85 7752 return 0;
2130b7fb
BS
7753}
7754
048d0d36
MK
7755/* Implement targetm.sched.h_i_d_extended hook.
7756 Extend internal data structures. */
7757static void
7758ia64_h_i_d_extended (void)
7759{
048d0d36
MK
7760 if (stops_p != NULL)
7761 {
388092d5 7762 int new_clocks_length = get_max_uid () * 3 / 2;
5ead67f6 7763 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
048d0d36
MK
7764 clocks_length = new_clocks_length;
7765 }
7766}
388092d5
AB
7767\f
7768
7769/* This structure describes the data used by the backend to guide scheduling.
7770 When the current scheduling point is switched, this data should be saved
7771 and restored later, if the scheduler returns to this point. */
7772struct _ia64_sched_context
7773{
7774 state_t prev_cycle_state;
b32d5189 7775 rtx_insn *last_scheduled_insn;
388092d5
AB
7776 struct reg_write_state rws_sum[NUM_REGS];
7777 struct reg_write_state rws_insn[NUM_REGS];
7778 int first_instruction;
7779 int pending_data_specs;
7780 int current_cycle;
7781 char mem_ops_in_group[4];
7782};
7783typedef struct _ia64_sched_context *ia64_sched_context_t;
7784
7785/* Allocates a scheduling context. */
7786static void *
7787ia64_alloc_sched_context (void)
7788{
7789 return xmalloc (sizeof (struct _ia64_sched_context));
7790}
7791
7792/* Initializes the _SC context with clean data, if CLEAN_P, and from
7793 the global context otherwise. */
7794static void
7795ia64_init_sched_context (void *_sc, bool clean_p)
7796{
7797 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7798
7799 sc->prev_cycle_state = xmalloc (dfa_state_size);
7800 if (clean_p)
7801 {
7802 state_reset (sc->prev_cycle_state);
b32d5189 7803 sc->last_scheduled_insn = NULL;
388092d5
AB
7804 memset (sc->rws_sum, 0, sizeof (rws_sum));
7805 memset (sc->rws_insn, 0, sizeof (rws_insn));
7806 sc->first_instruction = 1;
7807 sc->pending_data_specs = 0;
7808 sc->current_cycle = 0;
7809 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7810 }
7811 else
7812 {
7813 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7814 sc->last_scheduled_insn = last_scheduled_insn;
7815 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7816 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7817 sc->first_instruction = first_instruction;
7818 sc->pending_data_specs = pending_data_specs;
7819 sc->current_cycle = current_cycle;
7820 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7821 }
7822}
7823
7824/* Sets the global scheduling context to the one pointed to by _SC. */
7825static void
7826ia64_set_sched_context (void *_sc)
7827{
7828 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7829
7830 gcc_assert (sc != NULL);
7831
7832 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7833 last_scheduled_insn = sc->last_scheduled_insn;
7834 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7835 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7836 first_instruction = sc->first_instruction;
7837 pending_data_specs = sc->pending_data_specs;
7838 current_cycle = sc->current_cycle;
7839 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7840}
7841
7842/* Clears the data in the _SC scheduling context. */
7843static void
7844ia64_clear_sched_context (void *_sc)
7845{
7846 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7847
7848 free (sc->prev_cycle_state);
7849 sc->prev_cycle_state = NULL;
7850}
7851
7852/* Frees the _SC scheduling context. */
7853static void
7854ia64_free_sched_context (void *_sc)
7855{
7856 gcc_assert (_sc != NULL);
7857
7858 free (_sc);
7859}
7860
7861typedef rtx (* gen_func_t) (rtx, rtx);
7862
7863/* Return a function that will generate a load of mode MODE_NO
7864 with speculation types TS. */
7865static gen_func_t
7866get_spec_load_gen_function (ds_t ts, int mode_no)
7867{
7868 static gen_func_t gen_ld_[] = {
7869 gen_movbi,
7870 gen_movqi_internal,
7871 gen_movhi_internal,
7872 gen_movsi_internal,
7873 gen_movdi_internal,
7874 gen_movsf_internal,
7875 gen_movdf_internal,
7876 gen_movxf_internal,
7877 gen_movti_internal,
7878 gen_zero_extendqidi2,
7879 gen_zero_extendhidi2,
7880 gen_zero_extendsidi2,
7881 };
7882
7883 static gen_func_t gen_ld_a[] = {
7884 gen_movbi_advanced,
7885 gen_movqi_advanced,
7886 gen_movhi_advanced,
7887 gen_movsi_advanced,
7888 gen_movdi_advanced,
7889 gen_movsf_advanced,
7890 gen_movdf_advanced,
7891 gen_movxf_advanced,
7892 gen_movti_advanced,
7893 gen_zero_extendqidi2_advanced,
7894 gen_zero_extendhidi2_advanced,
7895 gen_zero_extendsidi2_advanced,
7896 };
7897 static gen_func_t gen_ld_s[] = {
7898 gen_movbi_speculative,
7899 gen_movqi_speculative,
7900 gen_movhi_speculative,
7901 gen_movsi_speculative,
7902 gen_movdi_speculative,
7903 gen_movsf_speculative,
7904 gen_movdf_speculative,
7905 gen_movxf_speculative,
7906 gen_movti_speculative,
7907 gen_zero_extendqidi2_speculative,
7908 gen_zero_extendhidi2_speculative,
7909 gen_zero_extendsidi2_speculative,
7910 };
7911 static gen_func_t gen_ld_sa[] = {
7912 gen_movbi_speculative_advanced,
7913 gen_movqi_speculative_advanced,
7914 gen_movhi_speculative_advanced,
7915 gen_movsi_speculative_advanced,
7916 gen_movdi_speculative_advanced,
7917 gen_movsf_speculative_advanced,
7918 gen_movdf_speculative_advanced,
7919 gen_movxf_speculative_advanced,
7920 gen_movti_speculative_advanced,
7921 gen_zero_extendqidi2_speculative_advanced,
7922 gen_zero_extendhidi2_speculative_advanced,
7923 gen_zero_extendsidi2_speculative_advanced,
7924 };
7925 static gen_func_t gen_ld_s_a[] = {
7926 gen_movbi_speculative_a,
7927 gen_movqi_speculative_a,
7928 gen_movhi_speculative_a,
7929 gen_movsi_speculative_a,
7930 gen_movdi_speculative_a,
7931 gen_movsf_speculative_a,
7932 gen_movdf_speculative_a,
7933 gen_movxf_speculative_a,
7934 gen_movti_speculative_a,
7935 gen_zero_extendqidi2_speculative_a,
7936 gen_zero_extendhidi2_speculative_a,
7937 gen_zero_extendsidi2_speculative_a,
7938 };
7939
7940 gen_func_t *gen_ld;
7941
7942 if (ts & BEGIN_DATA)
7943 {
7944 if (ts & BEGIN_CONTROL)
7945 gen_ld = gen_ld_sa;
7946 else
7947 gen_ld = gen_ld_a;
7948 }
7949 else if (ts & BEGIN_CONTROL)
7950 {
7951 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7952 || ia64_needs_block_p (ts))
7953 gen_ld = gen_ld_s;
7954 else
7955 gen_ld = gen_ld_s_a;
7956 }
7957 else if (ts == 0)
7958 gen_ld = gen_ld_;
7959 else
7960 gcc_unreachable ();
7961
7962 return gen_ld[mode_no];
7963}
048d0d36 7964
ef4bddc2 7965/* Constants that help mapping 'machine_mode' to int. */
048d0d36
MK
7966enum SPEC_MODES
7967 {
7968 SPEC_MODE_INVALID = -1,
7969 SPEC_MODE_FIRST = 0,
7970 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7971 SPEC_MODE_FOR_EXTEND_LAST = 3,
7972 SPEC_MODE_LAST = 8
7973 };
7974
388092d5
AB
7975enum
7976 {
7977 /* Offset to reach ZERO_EXTEND patterns. */
7978 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7979 };
7980
048d0d36
MK
7981/* Return index of the MODE. */
7982static int
ef4bddc2 7983ia64_mode_to_int (machine_mode mode)
048d0d36
MK
7984{
7985 switch (mode)
7986 {
4e10a5a7
RS
7987 case E_BImode: return 0; /* SPEC_MODE_FIRST */
7988 case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7989 case E_HImode: return 2;
7990 case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7991 case E_DImode: return 4;
7992 case E_SFmode: return 5;
7993 case E_DFmode: return 6;
7994 case E_XFmode: return 7;
7995 case E_TImode:
048d0d36
MK
7996 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7997 mentioned in itanium[12].md. Predicate fp_register_operand also
7998 needs to be defined. Bottom line: better disable for now. */
7999 return SPEC_MODE_INVALID;
8000 default: return SPEC_MODE_INVALID;
8001 }
8002}
8003
8004/* Provide information about speculation capabilities. */
8005static void
8006ia64_set_sched_flags (spec_info_t spec_info)
8007{
8008 unsigned int *flags = &(current_sched_info->flags);
8009
8010 if (*flags & SCHED_RGN
388092d5
AB
8011 || *flags & SCHED_EBB
8012 || *flags & SEL_SCHED)
048d0d36
MK
8013 {
8014 int mask = 0;
8015
a57aee2a 8016 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
388092d5 8017 || (mflag_sched_ar_data_spec && reload_completed))
048d0d36
MK
8018 {
8019 mask |= BEGIN_DATA;
388092d5
AB
8020
8021 if (!sel_sched_p ()
8022 && ((mflag_sched_br_in_data_spec && !reload_completed)
8023 || (mflag_sched_ar_in_data_spec && reload_completed)))
048d0d36
MK
8024 mask |= BE_IN_DATA;
8025 }
8026
388092d5
AB
8027 if (mflag_sched_control_spec
8028 && (!sel_sched_p ()
8029 || reload_completed))
048d0d36
MK
8030 {
8031 mask |= BEGIN_CONTROL;
8032
388092d5 8033 if (!sel_sched_p () && mflag_sched_in_control_spec)
048d0d36
MK
8034 mask |= BE_IN_CONTROL;
8035 }
8036
7ab5df48
AB
8037 spec_info->mask = mask;
8038
048d0d36
MK
8039 if (mask)
8040 {
6fb5fa3c
DB
8041 *flags |= USE_DEPS_LIST | DO_SPECULATION;
8042
8043 if (mask & BE_IN_SPEC)
8044 *flags |= NEW_BBS;
048d0d36 8045
048d0d36
MK
8046 spec_info->flags = 0;
8047
16d83dd6
MK
8048 if ((mask & CONTROL_SPEC)
8049 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
8050 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
388092d5
AB
8051
8052 if (sched_verbose >= 1)
8053 spec_info->dump = sched_dump;
048d0d36
MK
8054 else
8055 spec_info->dump = 0;
8056
8057 if (mflag_sched_count_spec_in_critical_path)
8058 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
8059 }
8060 }
cd510f15
AM
8061 else
8062 spec_info->mask = 0;
048d0d36
MK
8063}
8064
388092d5
AB
8065/* If INSN is an appropriate load return its mode.
8066 Return -1 otherwise. */
048d0d36 8067static int
647d790d 8068get_mode_no_for_insn (rtx_insn *insn)
388092d5
AB
8069{
8070 rtx reg, mem, mode_rtx;
8071 int mode_no;
048d0d36 8072 bool extend_p;
048d0d36 8073
388092d5 8074 extract_insn_cached (insn);
048d0d36 8075
388092d5
AB
8076 /* We use WHICH_ALTERNATIVE only after reload. This will
8077 guarantee that reload won't touch a speculative insn. */
f6ec1d11 8078
388092d5 8079 if (recog_data.n_operands != 2)
048d0d36
MK
8080 return -1;
8081
388092d5
AB
8082 reg = recog_data.operand[0];
8083 mem = recog_data.operand[1];
f6ec1d11 8084
388092d5
AB
8085 /* We should use MEM's mode since REG's mode in presence of
8086 ZERO_EXTEND will always be DImode. */
8087 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8088 /* Process non-speculative ld. */
8089 {
8090 if (!reload_completed)
8091 {
8092 /* Do not speculate into regs like ar.lc. */
8093 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8094 return -1;
8095
8096 if (!MEM_P (mem))
8097 return -1;
8098
8099 {
8100 rtx mem_reg = XEXP (mem, 0);
8101
8102 if (!REG_P (mem_reg))
8103 return -1;
8104 }
8105
8106 mode_rtx = mem;
8107 }
8108 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8109 {
8110 gcc_assert (REG_P (reg) && MEM_P (mem));
8111 mode_rtx = mem;
8112 }
8113 else
8114 return -1;
8115 }
8116 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8117 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8118 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8119 /* Process speculative ld or ld.c. */
048d0d36 8120 {
388092d5
AB
8121 gcc_assert (REG_P (reg) && MEM_P (mem));
8122 mode_rtx = mem;
048d0d36
MK
8123 }
8124 else
048d0d36 8125 {
388092d5 8126 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
048d0d36 8127
388092d5
AB
8128 if (attr_class == ITANIUM_CLASS_CHK_A
8129 || attr_class == ITANIUM_CLASS_CHK_S_I
8130 || attr_class == ITANIUM_CLASS_CHK_S_F)
8131 /* Process chk. */
8132 mode_rtx = reg;
8133 else
8134 return -1;
048d0d36 8135 }
f6ec1d11 8136
388092d5 8137 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
f6ec1d11 8138
388092d5 8139 if (mode_no == SPEC_MODE_INVALID)
048d0d36
MK
8140 return -1;
8141
388092d5
AB
8142 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8143
8144 if (extend_p)
8145 {
8146 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8147 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8148 return -1;
f6ec1d11 8149
388092d5
AB
8150 mode_no += SPEC_GEN_EXTEND_OFFSET;
8151 }
048d0d36 8152
388092d5 8153 return mode_no;
048d0d36
MK
8154}
8155
388092d5
AB
8156/* If X is an unspec part of a speculative load, return its code.
8157 Return -1 otherwise. */
8158static int
8159get_spec_unspec_code (const_rtx x)
8160{
8161 if (GET_CODE (x) != UNSPEC)
8162 return -1;
048d0d36 8163
048d0d36 8164 {
388092d5 8165 int code;
048d0d36 8166
388092d5 8167 code = XINT (x, 1);
048d0d36 8168
388092d5
AB
8169 switch (code)
8170 {
8171 case UNSPEC_LDA:
8172 case UNSPEC_LDS:
8173 case UNSPEC_LDS_A:
8174 case UNSPEC_LDSA:
8175 return code;
048d0d36 8176
388092d5
AB
8177 default:
8178 return -1;
8179 }
8180 }
8181}
048d0d36 8182
388092d5
AB
8183/* Implement skip_rtx_p hook. */
8184static bool
8185ia64_skip_rtx_p (const_rtx x)
8186{
8187 return get_spec_unspec_code (x) != -1;
8188}
048d0d36 8189
388092d5
AB
8190/* If INSN is a speculative load, return its UNSPEC code.
8191 Return -1 otherwise. */
8192static int
8193get_insn_spec_code (const_rtx insn)
8194{
8195 rtx pat, reg, mem;
048d0d36 8196
388092d5 8197 pat = PATTERN (insn);
048d0d36 8198
388092d5
AB
8199 if (GET_CODE (pat) == COND_EXEC)
8200 pat = COND_EXEC_CODE (pat);
048d0d36 8201
388092d5
AB
8202 if (GET_CODE (pat) != SET)
8203 return -1;
8204
8205 reg = SET_DEST (pat);
8206 if (!REG_P (reg))
8207 return -1;
8208
8209 mem = SET_SRC (pat);
8210 if (GET_CODE (mem) == ZERO_EXTEND)
8211 mem = XEXP (mem, 0);
8212
8213 return get_spec_unspec_code (mem);
8214}
8215
8216/* If INSN is a speculative load, return a ds with the speculation types.
8217 Otherwise [if INSN is a normal instruction] return 0. */
8218static ds_t
ac44248e 8219ia64_get_insn_spec_ds (rtx_insn *insn)
388092d5
AB
8220{
8221 int code = get_insn_spec_code (insn);
8222
8223 switch (code)
048d0d36 8224 {
388092d5
AB
8225 case UNSPEC_LDA:
8226 return BEGIN_DATA;
048d0d36 8227
388092d5
AB
8228 case UNSPEC_LDS:
8229 case UNSPEC_LDS_A:
8230 return BEGIN_CONTROL;
048d0d36 8231
388092d5
AB
8232 case UNSPEC_LDSA:
8233 return BEGIN_DATA | BEGIN_CONTROL;
048d0d36 8234
388092d5
AB
8235 default:
8236 return 0;
048d0d36 8237 }
388092d5
AB
8238}
8239
8240/* If INSN is a speculative load return a ds with the speculation types that
8241 will be checked.
8242 Otherwise [if INSN is a normal instruction] return 0. */
8243static ds_t
ac44248e 8244ia64_get_insn_checked_ds (rtx_insn *insn)
388092d5
AB
8245{
8246 int code = get_insn_spec_code (insn);
8247
8248 switch (code)
048d0d36 8249 {
388092d5
AB
8250 case UNSPEC_LDA:
8251 return BEGIN_DATA | BEGIN_CONTROL;
8252
8253 case UNSPEC_LDS:
8254 return BEGIN_CONTROL;
8255
8256 case UNSPEC_LDS_A:
8257 case UNSPEC_LDSA:
8258 return BEGIN_DATA | BEGIN_CONTROL;
8259
8260 default:
8261 return 0;
048d0d36 8262 }
388092d5 8263}
048d0d36 8264
388092d5
AB
8265/* If GEN_P is true, calculate the index of needed speculation check and return
8266 speculative pattern for INSN with speculative mode TS, machine mode
8267 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8268 If GEN_P is false, just calculate the index of needed speculation check. */
8269static rtx
8270ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8271{
8272 rtx pat, new_pat;
8273 gen_func_t gen_load;
048d0d36 8274
388092d5 8275 gen_load = get_spec_load_gen_function (ts, mode_no);
048d0d36 8276
388092d5
AB
8277 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8278 copy_rtx (recog_data.operand[1]));
048d0d36
MK
8279
8280 pat = PATTERN (insn);
8281 if (GET_CODE (pat) == COND_EXEC)
388092d5
AB
8282 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8283 new_pat);
048d0d36
MK
8284
8285 return new_pat;
8286}
8287
048d0d36 8288static bool
388092d5
AB
8289insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8290 ds_t ds ATTRIBUTE_UNUSED)
048d0d36 8291{
388092d5
AB
8292 return false;
8293}
048d0d36 8294
388092d5
AB
8295/* Implement targetm.sched.speculate_insn hook.
8296 Check if the INSN can be TS speculative.
8297 If 'no' - return -1.
8298 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8299 If current pattern of the INSN already provides TS speculation,
8300 return 0. */
8301static int
ac44248e 8302ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
388092d5
AB
8303{
8304 int mode_no;
8305 int res;
8306
8307 gcc_assert (!(ts & ~SPECULATIVE));
048d0d36 8308
388092d5
AB
8309 if (ia64_spec_check_p (insn))
8310 return -1;
048d0d36 8311
388092d5
AB
8312 if ((ts & BE_IN_SPEC)
8313 && !insn_can_be_in_speculative_p (insn, ts))
8314 return -1;
048d0d36 8315
388092d5 8316 mode_no = get_mode_no_for_insn (insn);
048d0d36 8317
388092d5
AB
8318 if (mode_no != SPEC_MODE_INVALID)
8319 {
8320 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8321 res = 0;
8322 else
8323 {
8324 res = 1;
8325 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8326 }
8327 }
8328 else
8329 res = -1;
048d0d36 8330
388092d5
AB
8331 return res;
8332}
048d0d36 8333
388092d5
AB
8334/* Return a function that will generate a check for speculation TS with mode
8335 MODE_NO.
8336 If simple check is needed, pass true for SIMPLE_CHECK_P.
8337 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8338static gen_func_t
8339get_spec_check_gen_function (ds_t ts, int mode_no,
8340 bool simple_check_p, bool clearing_check_p)
8341{
8342 static gen_func_t gen_ld_c_clr[] = {
048d0d36
MK
8343 gen_movbi_clr,
8344 gen_movqi_clr,
8345 gen_movhi_clr,
8346 gen_movsi_clr,
8347 gen_movdi_clr,
8348 gen_movsf_clr,
8349 gen_movdf_clr,
8350 gen_movxf_clr,
8351 gen_movti_clr,
8352 gen_zero_extendqidi2_clr,
8353 gen_zero_extendhidi2_clr,
8354 gen_zero_extendsidi2_clr,
388092d5
AB
8355 };
8356 static gen_func_t gen_ld_c_nc[] = {
8357 gen_movbi_nc,
8358 gen_movqi_nc,
8359 gen_movhi_nc,
8360 gen_movsi_nc,
8361 gen_movdi_nc,
8362 gen_movsf_nc,
8363 gen_movdf_nc,
8364 gen_movxf_nc,
8365 gen_movti_nc,
8366 gen_zero_extendqidi2_nc,
8367 gen_zero_extendhidi2_nc,
8368 gen_zero_extendsidi2_nc,
8369 };
8370 static gen_func_t gen_chk_a_clr[] = {
048d0d36
MK
8371 gen_advanced_load_check_clr_bi,
8372 gen_advanced_load_check_clr_qi,
8373 gen_advanced_load_check_clr_hi,
8374 gen_advanced_load_check_clr_si,
8375 gen_advanced_load_check_clr_di,
8376 gen_advanced_load_check_clr_sf,
8377 gen_advanced_load_check_clr_df,
8378 gen_advanced_load_check_clr_xf,
8379 gen_advanced_load_check_clr_ti,
8380 gen_advanced_load_check_clr_di,
8381 gen_advanced_load_check_clr_di,
8382 gen_advanced_load_check_clr_di,
388092d5
AB
8383 };
8384 static gen_func_t gen_chk_a_nc[] = {
8385 gen_advanced_load_check_nc_bi,
8386 gen_advanced_load_check_nc_qi,
8387 gen_advanced_load_check_nc_hi,
8388 gen_advanced_load_check_nc_si,
8389 gen_advanced_load_check_nc_di,
8390 gen_advanced_load_check_nc_sf,
8391 gen_advanced_load_check_nc_df,
8392 gen_advanced_load_check_nc_xf,
8393 gen_advanced_load_check_nc_ti,
8394 gen_advanced_load_check_nc_di,
8395 gen_advanced_load_check_nc_di,
8396 gen_advanced_load_check_nc_di,
8397 };
8398 static gen_func_t gen_chk_s[] = {
048d0d36
MK
8399 gen_speculation_check_bi,
8400 gen_speculation_check_qi,
8401 gen_speculation_check_hi,
8402 gen_speculation_check_si,
8403 gen_speculation_check_di,
8404 gen_speculation_check_sf,
8405 gen_speculation_check_df,
8406 gen_speculation_check_xf,
8407 gen_speculation_check_ti,
8408 gen_speculation_check_di,
8409 gen_speculation_check_di,
388092d5 8410 gen_speculation_check_di,
048d0d36
MK
8411 };
8412
388092d5 8413 gen_func_t *gen_check;
048d0d36 8414
388092d5 8415 if (ts & BEGIN_DATA)
048d0d36 8416 {
388092d5
AB
8417 /* We don't need recovery because even if this is ld.sa
8418 ALAT entry will be allocated only if NAT bit is set to zero.
8419 So it is enough to use ld.c here. */
8420
8421 if (simple_check_p)
8422 {
8423 gcc_assert (mflag_sched_spec_ldc);
8424
8425 if (clearing_check_p)
8426 gen_check = gen_ld_c_clr;
8427 else
8428 gen_check = gen_ld_c_nc;
8429 }
8430 else
8431 {
8432 if (clearing_check_p)
8433 gen_check = gen_chk_a_clr;
8434 else
8435 gen_check = gen_chk_a_nc;
8436 }
048d0d36 8437 }
388092d5 8438 else if (ts & BEGIN_CONTROL)
048d0d36 8439 {
388092d5
AB
8440 if (simple_check_p)
8441 /* We might want to use ld.sa -> ld.c instead of
8442 ld.s -> chk.s. */
048d0d36 8443 {
388092d5 8444 gcc_assert (!ia64_needs_block_p (ts));
048d0d36 8445
388092d5
AB
8446 if (clearing_check_p)
8447 gen_check = gen_ld_c_clr;
8448 else
8449 gen_check = gen_ld_c_nc;
8450 }
8451 else
8452 {
8453 gen_check = gen_chk_s;
048d0d36 8454 }
388092d5
AB
8455 }
8456 else
8457 gcc_unreachable ();
8458
8459 gcc_assert (mode_no >= 0);
8460 return gen_check[mode_no];
8461}
8462
8463/* Return nonzero, if INSN needs branchy recovery check. */
8464static bool
8465ia64_needs_block_p (ds_t ts)
8466{
8467 if (ts & BEGIN_DATA)
8468 return !mflag_sched_spec_ldc;
8469
8470 gcc_assert ((ts & BEGIN_CONTROL) != 0);
048d0d36 8471
388092d5
AB
8472 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8473}
8474
8e90de43 8475/* Generate (or regenerate) a recovery check for INSN. */
388092d5 8476static rtx
ac44248e 8477ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
388092d5
AB
8478{
8479 rtx op1, pat, check_pat;
8480 gen_func_t gen_check;
8481 int mode_no;
8482
8483 mode_no = get_mode_no_for_insn (insn);
8484 gcc_assert (mode_no >= 0);
8485
8486 if (label)
8487 op1 = label;
8488 else
8489 {
8490 gcc_assert (!ia64_needs_block_p (ds));
8491 op1 = copy_rtx (recog_data.operand[1]);
048d0d36 8492 }
388092d5
AB
8493
8494 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8495 true);
048d0d36 8496
388092d5 8497 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
048d0d36
MK
8498
8499 pat = PATTERN (insn);
8500 if (GET_CODE (pat) == COND_EXEC)
8501 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8502 check_pat);
8503
8504 return check_pat;
8505}
8506
8507/* Return nonzero, if X is branchy recovery check. */
8508static int
8509ia64_spec_check_p (rtx x)
8510{
8511 x = PATTERN (x);
8512 if (GET_CODE (x) == COND_EXEC)
8513 x = COND_EXEC_CODE (x);
8514 if (GET_CODE (x) == SET)
8515 return ia64_spec_check_src_p (SET_SRC (x));
8516 return 0;
8517}
8518
8519/* Return nonzero, if SRC belongs to recovery check. */
8520static int
8521ia64_spec_check_src_p (rtx src)
8522{
8523 if (GET_CODE (src) == IF_THEN_ELSE)
8524 {
8525 rtx t;
8526
8527 t = XEXP (src, 0);
8528 if (GET_CODE (t) == NE)
8529 {
8530 t = XEXP (t, 0);
8531
8532 if (GET_CODE (t) == UNSPEC)
8533 {
8534 int code;
8535
8536 code = XINT (t, 1);
8537
388092d5
AB
8538 if (code == UNSPEC_LDCCLR
8539 || code == UNSPEC_LDCNC
8540 || code == UNSPEC_CHKACLR
8541 || code == UNSPEC_CHKANC
8542 || code == UNSPEC_CHKS)
048d0d36
MK
8543 {
8544 gcc_assert (code != 0);
8545 return code;
8546 }
8547 }
8548 }
8549 }
8550 return 0;
8551}
30028c85 8552\f
2130b7fb 8553
30028c85
VM
8554/* The following page contains abstract data `bundle states' which are
8555 used for bundling insns (inserting nops and template generation). */
8556
8557/* The following describes state of insn bundling. */
8558
8559struct bundle_state
8560{
8561 /* Unique bundle state number to identify them in the debugging
8562 output */
8563 int unique_num;
b32d5189 8564 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
30028c85
VM
8565 /* number nops before and after the insn */
8566 short before_nops_num, after_nops_num;
8567 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8568 insn */
8569 int cost; /* cost of the state in cycles */
8570 int accumulated_insns_num; /* number of all previous insns including
8571 nops. L is considered as 2 insns */
8572 int branch_deviation; /* deviation of previous branches from 3rd slots */
388092d5 8573 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
30028c85
VM
8574 struct bundle_state *next; /* next state with the same insn_num */
8575 struct bundle_state *originator; /* originator (previous insn state) */
8576 /* All bundle states are in the following chain. */
8577 struct bundle_state *allocated_states_chain;
8578 /* The DFA State after issuing the insn and the nops. */
8579 state_t dfa_state;
8580};
2130b7fb 8581
30028c85 8582/* The following is map insn number to the corresponding bundle state. */
2130b7fb 8583
30028c85 8584static struct bundle_state **index_to_bundle_states;
2130b7fb 8585
30028c85 8586/* The unique number of next bundle state. */
2130b7fb 8587
30028c85 8588static int bundle_states_num;
2130b7fb 8589
30028c85 8590/* All allocated bundle states are in the following chain. */
2130b7fb 8591
30028c85 8592static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 8593
30028c85
VM
8594/* All allocated but not used bundle states are in the following
8595 chain. */
870f9ec0 8596
30028c85 8597static struct bundle_state *free_bundle_state_chain;
2130b7fb 8598
2130b7fb 8599
30028c85 8600/* The following function returns a free bundle state. */
2130b7fb 8601
30028c85 8602static struct bundle_state *
9c808aad 8603get_free_bundle_state (void)
30028c85
VM
8604{
8605 struct bundle_state *result;
2130b7fb 8606
30028c85 8607 if (free_bundle_state_chain != NULL)
2130b7fb 8608 {
30028c85
VM
8609 result = free_bundle_state_chain;
8610 free_bundle_state_chain = result->next;
2130b7fb 8611 }
30028c85 8612 else
2130b7fb 8613 {
5ead67f6 8614 result = XNEW (struct bundle_state);
30028c85
VM
8615 result->dfa_state = xmalloc (dfa_state_size);
8616 result->allocated_states_chain = allocated_bundle_states_chain;
8617 allocated_bundle_states_chain = result;
2130b7fb 8618 }
30028c85
VM
8619 result->unique_num = bundle_states_num++;
8620 return result;
9c808aad 8621
30028c85 8622}
2130b7fb 8623
30028c85 8624/* The following function frees given bundle state. */
2130b7fb 8625
30028c85 8626static void
9c808aad 8627free_bundle_state (struct bundle_state *state)
30028c85
VM
8628{
8629 state->next = free_bundle_state_chain;
8630 free_bundle_state_chain = state;
8631}
2130b7fb 8632
30028c85 8633/* Start work with abstract data `bundle states'. */
2130b7fb 8634
30028c85 8635static void
9c808aad 8636initiate_bundle_states (void)
30028c85
VM
8637{
8638 bundle_states_num = 0;
8639 free_bundle_state_chain = NULL;
8640 allocated_bundle_states_chain = NULL;
2130b7fb
BS
8641}
8642
30028c85 8643/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
8644
8645static void
9c808aad 8646finish_bundle_states (void)
2130b7fb 8647{
30028c85
VM
8648 struct bundle_state *curr_state, *next_state;
8649
8650 for (curr_state = allocated_bundle_states_chain;
8651 curr_state != NULL;
8652 curr_state = next_state)
2130b7fb 8653 {
30028c85
VM
8654 next_state = curr_state->allocated_states_chain;
8655 free (curr_state->dfa_state);
8656 free (curr_state);
2130b7fb 8657 }
2130b7fb
BS
8658}
8659
3a4f280b 8660/* Hashtable helpers. */
2130b7fb 8661
8d67ee55 8662struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
3a4f280b 8663{
67f58944
TS
8664 static inline hashval_t hash (const bundle_state *);
8665 static inline bool equal (const bundle_state *, const bundle_state *);
3a4f280b 8666};
2130b7fb 8667
30028c85 8668/* The function returns hash of BUNDLE_STATE. */
2130b7fb 8669
3a4f280b 8670inline hashval_t
67f58944 8671bundle_state_hasher::hash (const bundle_state *state)
30028c85 8672{
30028c85 8673 unsigned result, i;
2130b7fb 8674
30028c85
VM
8675 for (result = i = 0; i < dfa_state_size; i++)
8676 result += (((unsigned char *) state->dfa_state) [i]
8677 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8678 return result + state->insn_num;
8679}
2130b7fb 8680
30028c85 8681/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 8682
3a4f280b 8683inline bool
67f58944
TS
8684bundle_state_hasher::equal (const bundle_state *state1,
8685 const bundle_state *state2)
30028c85 8686{
30028c85
VM
8687 return (state1->insn_num == state2->insn_num
8688 && memcmp (state1->dfa_state, state2->dfa_state,
8689 dfa_state_size) == 0);
8690}
2130b7fb 8691
3a4f280b
LC
8692/* Hash table of the bundle states. The key is dfa_state and insn_num
8693 of the bundle states. */
8694
c203e8a7 8695static hash_table<bundle_state_hasher> *bundle_state_table;
3a4f280b 8696
30028c85
VM
8697/* The function inserts the BUNDLE_STATE into the hash table. The
8698 function returns nonzero if the bundle has been inserted into the
8699 table. The table contains the best bundle state with given key. */
2130b7fb 8700
30028c85 8701static int
9c808aad 8702insert_bundle_state (struct bundle_state *bundle_state)
30028c85 8703{
3a4f280b 8704 struct bundle_state **entry_ptr;
2130b7fb 8705
c203e8a7 8706 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
30028c85
VM
8707 if (*entry_ptr == NULL)
8708 {
8709 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8710 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
3a4f280b 8711 *entry_ptr = bundle_state;
30028c85 8712 return TRUE;
2130b7fb 8713 }
3a4f280b
LC
8714 else if (bundle_state->cost < (*entry_ptr)->cost
8715 || (bundle_state->cost == (*entry_ptr)->cost
8716 && ((*entry_ptr)->accumulated_insns_num
30028c85 8717 > bundle_state->accumulated_insns_num
3a4f280b 8718 || ((*entry_ptr)->accumulated_insns_num
30028c85 8719 == bundle_state->accumulated_insns_num
3a4f280b 8720 && ((*entry_ptr)->branch_deviation
388092d5 8721 > bundle_state->branch_deviation
3a4f280b 8722 || ((*entry_ptr)->branch_deviation
388092d5 8723 == bundle_state->branch_deviation
3a4f280b 8724 && (*entry_ptr)->middle_bundle_stops
388092d5 8725 > bundle_state->middle_bundle_stops))))))
9c808aad 8726
2130b7fb 8727 {
30028c85
VM
8728 struct bundle_state temp;
8729
3a4f280b
LC
8730 temp = **entry_ptr;
8731 **entry_ptr = *bundle_state;
8732 (*entry_ptr)->next = temp.next;
30028c85 8733 *bundle_state = temp;
2130b7fb 8734 }
30028c85
VM
8735 return FALSE;
8736}
2130b7fb 8737
30028c85
VM
8738/* Start work with the hash table. */
8739
8740static void
9c808aad 8741initiate_bundle_state_table (void)
30028c85 8742{
c203e8a7 8743 bundle_state_table = new hash_table<bundle_state_hasher> (50);
2130b7fb
BS
8744}
8745
30028c85 8746/* Finish work with the hash table. */
e4027dab
BS
8747
8748static void
9c808aad 8749finish_bundle_state_table (void)
e4027dab 8750{
c203e8a7
TS
8751 delete bundle_state_table;
8752 bundle_state_table = NULL;
e4027dab
BS
8753}
8754
30028c85 8755\f
a0a7b566 8756
30028c85
VM
8757/* The following variable is a insn `nop' used to check bundle states
8758 with different number of inserted nops. */
a0a7b566 8759
dd3d2b35 8760static rtx_insn *ia64_nop;
a0a7b566 8761
30028c85
VM
8762/* The following function tries to issue NOPS_NUM nops for the current
8763 state without advancing processor cycle. If it failed, the
8764 function returns FALSE and frees the current state. */
8765
8766static int
9c808aad 8767try_issue_nops (struct bundle_state *curr_state, int nops_num)
a0a7b566 8768{
30028c85 8769 int i;
a0a7b566 8770
30028c85
VM
8771 for (i = 0; i < nops_num; i++)
8772 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8773 {
8774 free_bundle_state (curr_state);
8775 return FALSE;
8776 }
8777 return TRUE;
8778}
a0a7b566 8779
30028c85
VM
8780/* The following function tries to issue INSN for the current
8781 state without advancing processor cycle. If it failed, the
8782 function returns FALSE and frees the current state. */
a0a7b566 8783
30028c85 8784static int
9c808aad 8785try_issue_insn (struct bundle_state *curr_state, rtx insn)
30028c85
VM
8786{
8787 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8788 {
8789 free_bundle_state (curr_state);
8790 return FALSE;
8791 }
8792 return TRUE;
8793}
a0a7b566 8794
30028c85
VM
8795/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8796 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
8797 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8798 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8799 If it was successful, the function creates new bundle state and
8800 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 8801
30028c85 8802static void
9c808aad 8803issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
b32d5189
DM
8804 rtx_insn *insn, int try_bundle_end_p,
8805 int only_bundle_end_p)
30028c85
VM
8806{
8807 struct bundle_state *curr_state;
8808
8809 curr_state = get_free_bundle_state ();
8810 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8811 curr_state->insn = insn;
8812 curr_state->insn_num = originator->insn_num + 1;
8813 curr_state->cost = originator->cost;
8814 curr_state->originator = originator;
8815 curr_state->before_nops_num = before_nops_num;
8816 curr_state->after_nops_num = 0;
8817 curr_state->accumulated_insns_num
8818 = originator->accumulated_insns_num + before_nops_num;
8819 curr_state->branch_deviation = originator->branch_deviation;
388092d5 8820 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
e820471b
NS
8821 gcc_assert (insn);
8822 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
30028c85 8823 {
e820471b 8824 gcc_assert (GET_MODE (insn) != TImode);
30028c85
VM
8825 if (!try_issue_nops (curr_state, before_nops_num))
8826 return;
8827 if (!try_issue_insn (curr_state, insn))
8828 return;
8829 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
388092d5
AB
8830 if (curr_state->accumulated_insns_num % 3 != 0)
8831 curr_state->middle_bundle_stops++;
30028c85
VM
8832 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8833 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 8834 {
30028c85
VM
8835 free_bundle_state (curr_state);
8836 return;
a0a7b566 8837 }
a0a7b566 8838 }
30028c85 8839 else if (GET_MODE (insn) != TImode)
a0a7b566 8840 {
30028c85
VM
8841 if (!try_issue_nops (curr_state, before_nops_num))
8842 return;
8843 if (!try_issue_insn (curr_state, insn))
8844 return;
f32360c7 8845 curr_state->accumulated_insns_num++;
7b84aac0 8846 gcc_assert (!unknown_for_bundling_p (insn));
e820471b 8847
30028c85
VM
8848 if (ia64_safe_type (insn) == TYPE_L)
8849 curr_state->accumulated_insns_num++;
8850 }
8851 else
8852 {
68e11b42
JW
8853 /* If this is an insn that must be first in a group, then don't allow
8854 nops to be emitted before it. Currently, alloc is the only such
8855 supported instruction. */
8856 /* ??? The bundling automatons should handle this for us, but they do
8857 not yet have support for the first_insn attribute. */
8858 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8859 {
8860 free_bundle_state (curr_state);
8861 return;
8862 }
8863
30028c85
VM
8864 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8865 state_transition (curr_state->dfa_state, NULL);
8866 curr_state->cost++;
8867 if (!try_issue_nops (curr_state, before_nops_num))
8868 return;
8869 if (!try_issue_insn (curr_state, insn))
8870 return;
f32360c7 8871 curr_state->accumulated_insns_num++;
7b84aac0 8872 if (unknown_for_bundling_p (insn))
f32360c7
VM
8873 {
8874 /* Finish bundle containing asm insn. */
8875 curr_state->after_nops_num
8876 = 3 - curr_state->accumulated_insns_num % 3;
8877 curr_state->accumulated_insns_num
8878 += 3 - curr_state->accumulated_insns_num % 3;
8879 }
8880 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
8881 curr_state->accumulated_insns_num++;
8882 }
8883 if (ia64_safe_type (insn) == TYPE_B)
8884 curr_state->branch_deviation
8885 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8886 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8887 {
f32360c7 8888 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 8889 {
30028c85
VM
8890 state_t dfa_state;
8891 struct bundle_state *curr_state1;
8892 struct bundle_state *allocated_states_chain;
8893
8894 curr_state1 = get_free_bundle_state ();
8895 dfa_state = curr_state1->dfa_state;
8896 allocated_states_chain = curr_state1->allocated_states_chain;
8897 *curr_state1 = *curr_state;
8898 curr_state1->dfa_state = dfa_state;
8899 curr_state1->allocated_states_chain = allocated_states_chain;
8900 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8901 dfa_state_size);
8902 curr_state = curr_state1;
a0a7b566 8903 }
30028c85
VM
8904 if (!try_issue_nops (curr_state,
8905 3 - curr_state->accumulated_insns_num % 3))
8906 return;
8907 curr_state->after_nops_num
8908 = 3 - curr_state->accumulated_insns_num % 3;
8909 curr_state->accumulated_insns_num
8910 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 8911 }
30028c85
VM
8912 if (!insert_bundle_state (curr_state))
8913 free_bundle_state (curr_state);
8914 return;
8915}
e013f3c7 8916
30028c85
VM
8917/* The following function returns position in the two window bundle
8918 for given STATE. */
8919
8920static int
9c808aad 8921get_max_pos (state_t state)
30028c85
VM
8922{
8923 if (cpu_unit_reservation_p (state, pos_6))
8924 return 6;
8925 else if (cpu_unit_reservation_p (state, pos_5))
8926 return 5;
8927 else if (cpu_unit_reservation_p (state, pos_4))
8928 return 4;
8929 else if (cpu_unit_reservation_p (state, pos_3))
8930 return 3;
8931 else if (cpu_unit_reservation_p (state, pos_2))
8932 return 2;
8933 else if (cpu_unit_reservation_p (state, pos_1))
8934 return 1;
8935 else
8936 return 0;
a0a7b566
BS
8937}
8938
30028c85
VM
8939/* The function returns code of a possible template for given position
8940 and state. The function should be called only with 2 values of
96ddf8ef
VM
8941 position equal to 3 or 6. We avoid generating F NOPs by putting
8942 templates containing F insns at the end of the template search
8943 because undocumented anomaly in McKinley derived cores which can
8944 cause stalls if an F-unit insn (including a NOP) is issued within a
8945 six-cycle window after reading certain application registers (such
8946 as ar.bsp). Furthermore, power-considerations also argue against
8947 the use of F-unit instructions unless they're really needed. */
2130b7fb 8948
c237e94a 8949static int
9c808aad 8950get_template (state_t state, int pos)
2130b7fb 8951{
30028c85 8952 switch (pos)
2130b7fb 8953 {
30028c85 8954 case 3:
96ddf8ef 8955 if (cpu_unit_reservation_p (state, _0mmi_))
30028c85 8956 return 1;
96ddf8ef
VM
8957 else if (cpu_unit_reservation_p (state, _0mii_))
8958 return 0;
30028c85
VM
8959 else if (cpu_unit_reservation_p (state, _0mmb_))
8960 return 7;
96ddf8ef
VM
8961 else if (cpu_unit_reservation_p (state, _0mib_))
8962 return 6;
8963 else if (cpu_unit_reservation_p (state, _0mbb_))
8964 return 5;
8965 else if (cpu_unit_reservation_p (state, _0bbb_))
8966 return 4;
8967 else if (cpu_unit_reservation_p (state, _0mmf_))
8968 return 3;
8969 else if (cpu_unit_reservation_p (state, _0mfi_))
8970 return 2;
30028c85
VM
8971 else if (cpu_unit_reservation_p (state, _0mfb_))
8972 return 8;
8973 else if (cpu_unit_reservation_p (state, _0mlx_))
8974 return 9;
8975 else
e820471b 8976 gcc_unreachable ();
30028c85 8977 case 6:
96ddf8ef 8978 if (cpu_unit_reservation_p (state, _1mmi_))
30028c85 8979 return 1;
96ddf8ef
VM
8980 else if (cpu_unit_reservation_p (state, _1mii_))
8981 return 0;
30028c85
VM
8982 else if (cpu_unit_reservation_p (state, _1mmb_))
8983 return 7;
96ddf8ef
VM
8984 else if (cpu_unit_reservation_p (state, _1mib_))
8985 return 6;
8986 else if (cpu_unit_reservation_p (state, _1mbb_))
8987 return 5;
8988 else if (cpu_unit_reservation_p (state, _1bbb_))
8989 return 4;
8990 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8991 return 3;
8992 else if (cpu_unit_reservation_p (state, _1mfi_))
8993 return 2;
30028c85
VM
8994 else if (cpu_unit_reservation_p (state, _1mfb_))
8995 return 8;
8996 else if (cpu_unit_reservation_p (state, _1mlx_))
8997 return 9;
8998 else
e820471b 8999 gcc_unreachable ();
30028c85 9000 default:
e820471b 9001 gcc_unreachable ();
2130b7fb 9002 }
30028c85 9003}
2130b7fb 9004
388092d5 9005/* True when INSN is important for bundling. */
7b84aac0 9006
388092d5 9007static bool
647d790d 9008important_for_bundling_p (rtx_insn *insn)
388092d5
AB
9009{
9010 return (INSN_P (insn)
9011 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
9012 && GET_CODE (PATTERN (insn)) != USE
9013 && GET_CODE (PATTERN (insn)) != CLOBBER);
9014}
9015
30028c85
VM
9016/* The following function returns an insn important for insn bundling
9017 followed by INSN and before TAIL. */
a0a7b566 9018
b32d5189
DM
9019static rtx_insn *
9020get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
30028c85
VM
9021{
9022 for (; insn && insn != tail; insn = NEXT_INSN (insn))
388092d5 9023 if (important_for_bundling_p (insn))
30028c85 9024 return insn;
b32d5189 9025 return NULL;
30028c85
VM
9026}
9027
7b84aac0
EB
9028/* True when INSN is unknown, but important, for bundling. */
9029
9030static bool
647d790d 9031unknown_for_bundling_p (rtx_insn *insn)
7b84aac0
EB
9032{
9033 return (INSN_P (insn)
9034 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
9035 && GET_CODE (PATTERN (insn)) != USE
9036 && GET_CODE (PATTERN (insn)) != CLOBBER);
9037}
9038
4a4cd49c
JJ
9039/* Add a bundle selector TEMPLATE0 before INSN. */
9040
9041static void
b32d5189 9042ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
4a4cd49c
JJ
9043{
9044 rtx b = gen_bundle_selector (GEN_INT (template0));
9045
9046 ia64_emit_insn_before (b, insn);
9047#if NR_BUNDLES == 10
9048 if ((template0 == 4 || template0 == 5)
d5fabb58 9049 && ia64_except_unwind_info (&global_options) == UI_TARGET)
4a4cd49c
JJ
9050 {
9051 int i;
9052 rtx note = NULL_RTX;
9053
9054 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9055 first or second slot. If it is and has REG_EH_NOTE set, copy it
9056 to following nops, as br.call sets rp to the address of following
9057 bundle and therefore an EH region end must be on a bundle
9058 boundary. */
9059 insn = PREV_INSN (insn);
9060 for (i = 0; i < 3; i++)
9061 {
9062 do
9063 insn = next_active_insn (insn);
b64925dc 9064 while (NONJUMP_INSN_P (insn)
4a4cd49c 9065 && get_attr_empty (insn) == EMPTY_YES);
b64925dc 9066 if (CALL_P (insn))
4a4cd49c
JJ
9067 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9068 else if (note)
9069 {
9070 int code;
9071
9072 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9073 || code == CODE_FOR_nop_b);
9074 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9075 note = NULL_RTX;
9076 else
bbbbb16a 9077 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
4a4cd49c
JJ
9078 }
9079 }
9080 }
9081#endif
9082}
9083
c856f536
VM
9084/* The following function does insn bundling. Bundling means
9085 inserting templates and nop insns to fit insn groups into permitted
9086 templates. Instruction scheduling uses NDFA (non-deterministic
9087 finite automata) encoding informations about the templates and the
9088 inserted nops. Nondeterminism of the automata permits follows
9089 all possible insn sequences very fast.
9090
9091 Unfortunately it is not possible to get information about inserting
9092 nop insns and used templates from the automata states. The
9093 automata only says that we can issue an insn possibly inserting
9094 some nops before it and using some template. Therefore insn
9095 bundling in this function is implemented by using DFA
048d0d36 9096 (deterministic finite automata). We follow all possible insn
c856f536
VM
9097 sequences by inserting 0-2 nops (that is what the NDFA describe for
9098 insn scheduling) before/after each insn being bundled. We know the
9099 start of simulated processor cycle from insn scheduling (insn
9100 starting a new cycle has TImode).
9101
9102 Simple implementation of insn bundling would create enormous
9103 number of possible insn sequences satisfying information about new
9104 cycle ticks taken from the insn scheduling. To make the algorithm
9105 practical we use dynamic programming. Each decision (about
9106 inserting nops and implicitly about previous decisions) is described
9107 by structure bundle_state (see above). If we generate the same
9108 bundle state (key is automaton state after issuing the insns and
9109 nops for it), we reuse already generated one. As consequence we
1e5f1716 9110 reject some decisions which cannot improve the solution and
c856f536
VM
9111 reduce memory for the algorithm.
9112
9113 When we reach the end of EBB (extended basic block), we choose the
9114 best sequence and then, moving back in EBB, insert templates for
9115 the best alternative. The templates are taken from querying
9116 automaton state for each insn in chosen bundle states.
9117
9118 So the algorithm makes two (forward and backward) passes through
7400e46b 9119 EBB. */
a0a7b566 9120
30028c85 9121static void
b32d5189 9122bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
30028c85
VM
9123{
9124 struct bundle_state *curr_state, *next_state, *best_state;
b32d5189 9125 rtx_insn *insn, *next_insn;
30028c85 9126 int insn_num;
f32360c7 9127 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 9128 int pos = 0, max_pos, template0, template1;
b32d5189 9129 rtx_insn *b;
30028c85 9130 enum attr_type type;
2d1b811d 9131
30028c85 9132 insn_num = 0;
c856f536 9133 /* Count insns in the EBB. */
30028c85
VM
9134 for (insn = NEXT_INSN (prev_head_insn);
9135 insn && insn != tail;
9136 insn = NEXT_INSN (insn))
9137 if (INSN_P (insn))
9138 insn_num++;
9139 if (insn_num == 0)
9140 return;
9141 bundling_p = 1;
9142 dfa_clean_insn_cache ();
9143 initiate_bundle_state_table ();
5ead67f6 9144 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
ff482c8d 9145 /* First (forward) pass -- generation of bundle states. */
30028c85
VM
9146 curr_state = get_free_bundle_state ();
9147 curr_state->insn = NULL;
9148 curr_state->before_nops_num = 0;
9149 curr_state->after_nops_num = 0;
9150 curr_state->insn_num = 0;
9151 curr_state->cost = 0;
9152 curr_state->accumulated_insns_num = 0;
9153 curr_state->branch_deviation = 0;
388092d5 9154 curr_state->middle_bundle_stops = 0;
30028c85
VM
9155 curr_state->next = NULL;
9156 curr_state->originator = NULL;
9157 state_reset (curr_state->dfa_state);
9158 index_to_bundle_states [0] = curr_state;
9159 insn_num = 0;
c856f536 9160 /* Shift cycle mark if it is put on insn which could be ignored. */
30028c85
VM
9161 for (insn = NEXT_INSN (prev_head_insn);
9162 insn != tail;
9163 insn = NEXT_INSN (insn))
9164 if (INSN_P (insn)
7b84aac0 9165 && !important_for_bundling_p (insn)
30028c85 9166 && GET_MODE (insn) == TImode)
2130b7fb 9167 {
30028c85
VM
9168 PUT_MODE (insn, VOIDmode);
9169 for (next_insn = NEXT_INSN (insn);
9170 next_insn != tail;
9171 next_insn = NEXT_INSN (next_insn))
7b84aac0 9172 if (important_for_bundling_p (next_insn)
388092d5 9173 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
30028c85
VM
9174 {
9175 PUT_MODE (next_insn, TImode);
9176 break;
9177 }
2130b7fb 9178 }
048d0d36 9179 /* Forward pass: generation of bundle states. */
30028c85
VM
9180 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9181 insn != NULL_RTX;
9182 insn = next_insn)
1ad72cef 9183 {
7b84aac0 9184 gcc_assert (important_for_bundling_p (insn));
f32360c7 9185 type = ia64_safe_type (insn);
30028c85
VM
9186 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9187 insn_num++;
9188 index_to_bundle_states [insn_num] = NULL;
9189 for (curr_state = index_to_bundle_states [insn_num - 1];
9190 curr_state != NULL;
9191 curr_state = next_state)
f83594c4 9192 {
30028c85 9193 pos = curr_state->accumulated_insns_num % 3;
30028c85 9194 next_state = curr_state->next;
c856f536
VM
9195 /* We must fill up the current bundle in order to start a
9196 subsequent asm insn in a new bundle. Asm insn is always
9197 placed in a separate bundle. */
f32360c7
VM
9198 only_bundle_end_p
9199 = (next_insn != NULL_RTX
9200 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7b84aac0 9201 && unknown_for_bundling_p (next_insn));
c856f536
VM
9202 /* We may fill up the current bundle if it is the cycle end
9203 without a group barrier. */
30028c85 9204 bundle_end_p
f32360c7 9205 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
9206 || (GET_MODE (next_insn) == TImode
9207 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9208 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7400e46b 9209 || type == TYPE_S)
f32360c7
VM
9210 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9211 only_bundle_end_p);
9212 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9213 only_bundle_end_p);
9214 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9215 only_bundle_end_p);
f83594c4 9216 }
e820471b 9217 gcc_assert (index_to_bundle_states [insn_num]);
30028c85
VM
9218 for (curr_state = index_to_bundle_states [insn_num];
9219 curr_state != NULL;
9220 curr_state = curr_state->next)
9221 if (verbose >= 2 && dump)
9222 {
c856f536
VM
9223 /* This structure is taken from generated code of the
9224 pipeline hazard recognizer (see file insn-attrtab.c).
9225 Please don't forget to change the structure if a new
9226 automaton is added to .md file. */
30028c85
VM
9227 struct DFA_chip
9228 {
9229 unsigned short one_automaton_state;
9230 unsigned short oneb_automaton_state;
9231 unsigned short two_automaton_state;
9232 unsigned short twob_automaton_state;
9233 };
9c808aad 9234
30028c85
VM
9235 fprintf
9236 (dump,
388092d5 9237 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
30028c85
VM
9238 curr_state->unique_num,
9239 (curr_state->originator == NULL
9240 ? -1 : curr_state->originator->unique_num),
9241 curr_state->cost,
9242 curr_state->before_nops_num, curr_state->after_nops_num,
9243 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 9244 curr_state->middle_bundle_stops,
7400e46b 9245 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85
VM
9246 INSN_UID (insn));
9247 }
1ad72cef 9248 }
e820471b
NS
9249
9250 /* We should find a solution because the 2nd insn scheduling has
9251 found one. */
9252 gcc_assert (index_to_bundle_states [insn_num]);
c856f536 9253 /* Find a state corresponding to the best insn sequence. */
30028c85
VM
9254 best_state = NULL;
9255 for (curr_state = index_to_bundle_states [insn_num];
9256 curr_state != NULL;
9257 curr_state = curr_state->next)
c856f536
VM
9258 /* We are just looking at the states with fully filled up last
9259 bundle. The first we prefer insn sequences with minimal cost
9260 then with minimal inserted nops and finally with branch insns
9261 placed in the 3rd slots. */
30028c85
VM
9262 if (curr_state->accumulated_insns_num % 3 == 0
9263 && (best_state == NULL || best_state->cost > curr_state->cost
9264 || (best_state->cost == curr_state->cost
9265 && (curr_state->accumulated_insns_num
9266 < best_state->accumulated_insns_num
9267 || (curr_state->accumulated_insns_num
9268 == best_state->accumulated_insns_num
388092d5
AB
9269 && (curr_state->branch_deviation
9270 < best_state->branch_deviation
9271 || (curr_state->branch_deviation
9272 == best_state->branch_deviation
9273 && curr_state->middle_bundle_stops
9274 < best_state->middle_bundle_stops)))))))
30028c85 9275 best_state = curr_state;
c856f536 9276 /* Second (backward) pass: adding nops and templates. */
388092d5 9277 gcc_assert (best_state);
30028c85
VM
9278 insn_num = best_state->before_nops_num;
9279 template0 = template1 = -1;
9280 for (curr_state = best_state;
9281 curr_state->originator != NULL;
9282 curr_state = curr_state->originator)
9283 {
9284 insn = curr_state->insn;
7b84aac0 9285 asm_p = unknown_for_bundling_p (insn);
30028c85
VM
9286 insn_num++;
9287 if (verbose >= 2 && dump)
2130b7fb 9288 {
30028c85
VM
9289 struct DFA_chip
9290 {
9291 unsigned short one_automaton_state;
9292 unsigned short oneb_automaton_state;
9293 unsigned short two_automaton_state;
9294 unsigned short twob_automaton_state;
9295 };
9c808aad 9296
30028c85
VM
9297 fprintf
9298 (dump,
388092d5 9299 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
30028c85
VM
9300 curr_state->unique_num,
9301 (curr_state->originator == NULL
9302 ? -1 : curr_state->originator->unique_num),
9303 curr_state->cost,
9304 curr_state->before_nops_num, curr_state->after_nops_num,
9305 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 9306 curr_state->middle_bundle_stops,
7400e46b 9307 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85 9308 INSN_UID (insn));
2130b7fb 9309 }
c856f536
VM
9310 /* Find the position in the current bundle window. The window can
9311 contain at most two bundles. Two bundle window means that
9312 the processor will make two bundle rotation. */
30028c85 9313 max_pos = get_max_pos (curr_state->dfa_state);
c856f536
VM
9314 if (max_pos == 6
9315 /* The following (negative template number) means that the
9316 processor did one bundle rotation. */
9317 || (max_pos == 3 && template0 < 0))
2130b7fb 9318 {
c856f536
VM
9319 /* We are at the end of the window -- find template(s) for
9320 its bundle(s). */
30028c85
VM
9321 pos = max_pos;
9322 if (max_pos == 3)
9323 template0 = get_template (curr_state->dfa_state, 3);
9324 else
9325 {
9326 template1 = get_template (curr_state->dfa_state, 3);
9327 template0 = get_template (curr_state->dfa_state, 6);
9328 }
9329 }
9330 if (max_pos > 3 && template1 < 0)
c856f536 9331 /* It may happen when we have the stop inside a bundle. */
30028c85 9332 {
e820471b 9333 gcc_assert (pos <= 3);
30028c85
VM
9334 template1 = get_template (curr_state->dfa_state, 3);
9335 pos += 3;
9336 }
f32360c7 9337 if (!asm_p)
c856f536 9338 /* Emit nops after the current insn. */
f32360c7
VM
9339 for (i = 0; i < curr_state->after_nops_num; i++)
9340 {
b32d5189
DM
9341 rtx nop_pat = gen_nop ();
9342 rtx_insn *nop = emit_insn_after (nop_pat, insn);
f32360c7 9343 pos--;
e820471b 9344 gcc_assert (pos >= 0);
f32360c7
VM
9345 if (pos % 3 == 0)
9346 {
c856f536
VM
9347 /* We are at the start of a bundle: emit the template
9348 (it should be defined). */
e820471b 9349 gcc_assert (template0 >= 0);
4a4cd49c 9350 ia64_add_bundle_selector_before (template0, nop);
c856f536
VM
9351 /* If we have two bundle window, we make one bundle
9352 rotation. Otherwise template0 will be undefined
9353 (negative value). */
f32360c7
VM
9354 template0 = template1;
9355 template1 = -1;
9356 }
9357 }
c856f536
VM
9358 /* Move the position backward in the window. Group barrier has
9359 no slot. Asm insn takes all bundle. */
30028c85 9360 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7b84aac0 9361 && !unknown_for_bundling_p (insn))
30028c85 9362 pos--;
c856f536 9363 /* Long insn takes 2 slots. */
30028c85
VM
9364 if (ia64_safe_type (insn) == TYPE_L)
9365 pos--;
e820471b 9366 gcc_assert (pos >= 0);
30028c85
VM
9367 if (pos % 3 == 0
9368 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7b84aac0 9369 && !unknown_for_bundling_p (insn))
30028c85 9370 {
c856f536
VM
9371 /* The current insn is at the bundle start: emit the
9372 template. */
e820471b 9373 gcc_assert (template0 >= 0);
4a4cd49c 9374 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
9375 b = PREV_INSN (insn);
9376 insn = b;
68776c43 9377 /* See comment above in analogous place for emitting nops
c856f536 9378 after the insn. */
30028c85
VM
9379 template0 = template1;
9380 template1 = -1;
9381 }
c856f536 9382 /* Emit nops after the current insn. */
30028c85
VM
9383 for (i = 0; i < curr_state->before_nops_num; i++)
9384 {
b32d5189
DM
9385 rtx nop_pat = gen_nop ();
9386 ia64_emit_insn_before (nop_pat, insn);
9387 rtx_insn *nop = PREV_INSN (insn);
30028c85
VM
9388 insn = nop;
9389 pos--;
e820471b 9390 gcc_assert (pos >= 0);
30028c85
VM
9391 if (pos % 3 == 0)
9392 {
68776c43 9393 /* See comment above in analogous place for emitting nops
c856f536 9394 after the insn. */
e820471b 9395 gcc_assert (template0 >= 0);
4a4cd49c 9396 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
9397 b = PREV_INSN (insn);
9398 insn = b;
9399 template0 = template1;
9400 template1 = -1;
9401 }
2130b7fb
BS
9402 }
9403 }
388092d5 9404
e28c2052
MM
9405 if (flag_checking)
9406 {
9407 /* Assert right calculation of middle_bundle_stops. */
9408 int num = best_state->middle_bundle_stops;
9409 bool start_bundle = true, end_bundle = false;
388092d5 9410
e28c2052
MM
9411 for (insn = NEXT_INSN (prev_head_insn);
9412 insn && insn != tail;
9413 insn = NEXT_INSN (insn))
9414 {
9415 if (!INSN_P (insn))
9416 continue;
9417 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9418 start_bundle = true;
9419 else
9420 {
9421 rtx_insn *next_insn;
9422
9423 for (next_insn = NEXT_INSN (insn);
9424 next_insn && next_insn != tail;
9425 next_insn = NEXT_INSN (next_insn))
9426 if (INSN_P (next_insn)
9427 && (ia64_safe_itanium_class (next_insn)
9428 != ITANIUM_CLASS_IGNORE
9429 || recog_memoized (next_insn)
9430 == CODE_FOR_bundle_selector)
9431 && GET_CODE (PATTERN (next_insn)) != USE
9432 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9433 break;
9434
9435 end_bundle = next_insn == NULL_RTX
9436 || next_insn == tail
9437 || (INSN_P (next_insn)
9438 && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9439 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9440 && !start_bundle && !end_bundle
9441 && next_insn
9442 && !unknown_for_bundling_p (next_insn))
9443 num--;
9444
9445 start_bundle = false;
9446 }
9447 }
388092d5 9448
e28c2052
MM
9449 gcc_assert (num == 0);
9450 }
388092d5 9451
30028c85
VM
9452 free (index_to_bundle_states);
9453 finish_bundle_state_table ();
9454 bundling_p = 0;
9455 dfa_clean_insn_cache ();
2130b7fb 9456}
c65ebc55 9457
30028c85
VM
9458/* The following function is called at the end of scheduling BB or
9459 EBB. After reload, it inserts stop bits and does insn bundling. */
9460
9461static void
9c808aad 9462ia64_sched_finish (FILE *dump, int sched_verbose)
c237e94a 9463{
30028c85
VM
9464 if (sched_verbose)
9465 fprintf (dump, "// Finishing schedule.\n");
9466 if (!reload_completed)
9467 return;
9468 if (reload_completed)
9469 {
9470 final_emit_insn_group_barriers (dump);
9471 bundling (dump, sched_verbose, current_sched_info->prev_head,
9472 current_sched_info->next_tail);
9473 if (sched_verbose && dump)
9474 fprintf (dump, "// finishing %d-%d\n",
9475 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9476 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9c808aad 9477
30028c85
VM
9478 return;
9479 }
c237e94a
ZW
9480}
9481
30028c85 9482/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 9483
30028c85 9484static void
9c808aad 9485final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
2130b7fb 9486{
dd3d2b35 9487 rtx_insn *insn;
30028c85 9488 int need_barrier_p = 0;
388092d5 9489 int seen_good_insn = 0;
2130b7fb 9490
30028c85 9491 init_insn_group_barriers ();
2130b7fb 9492
30028c85
VM
9493 for (insn = NEXT_INSN (current_sched_info->prev_head);
9494 insn != current_sched_info->next_tail;
9495 insn = NEXT_INSN (insn))
9496 {
b64925dc 9497 if (BARRIER_P (insn))
b395ddbe 9498 {
dd3d2b35 9499 rtx_insn *last = prev_active_insn (insn);
14d118d6 9500
30028c85 9501 if (! last)
b395ddbe 9502 continue;
34f0d87a 9503 if (JUMP_TABLE_DATA_P (last))
30028c85
VM
9504 last = prev_active_insn (last);
9505 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9506 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 9507
30028c85 9508 init_insn_group_barriers ();
388092d5 9509 seen_good_insn = 0;
30028c85 9510 need_barrier_p = 0;
b395ddbe 9511 }
b5b8b0ac 9512 else if (NONDEBUG_INSN_P (insn))
2130b7fb 9513 {
30028c85 9514 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 9515 {
30028c85 9516 init_insn_group_barriers ();
388092d5 9517 seen_good_insn = 0;
30028c85 9518 need_barrier_p = 0;
c65ebc55 9519 }
388092d5
AB
9520 else if (need_barrier_p || group_barrier_needed (insn)
9521 || (mflag_sched_stop_bits_after_every_cycle
9522 && GET_MODE (insn) == TImode
9523 && seen_good_insn))
2130b7fb 9524 {
30028c85
VM
9525 if (TARGET_EARLY_STOP_BITS)
9526 {
dd3d2b35 9527 rtx_insn *last;
9c808aad 9528
30028c85
VM
9529 for (last = insn;
9530 last != current_sched_info->prev_head;
9531 last = PREV_INSN (last))
9532 if (INSN_P (last) && GET_MODE (last) == TImode
9533 && stops_p [INSN_UID (last)])
9534 break;
9535 if (last == current_sched_info->prev_head)
9536 last = insn;
9537 last = prev_active_insn (last);
9538 if (last
9539 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9540 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9541 last);
9542 init_insn_group_barriers ();
9543 for (last = NEXT_INSN (last);
9544 last != insn;
9545 last = NEXT_INSN (last))
9546 if (INSN_P (last))
388092d5
AB
9547 {
9548 group_barrier_needed (last);
9549 if (recog_memoized (last) >= 0
9550 && important_for_bundling_p (last))
9551 seen_good_insn = 1;
9552 }
30028c85
VM
9553 }
9554 else
9555 {
9556 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9557 insn);
9558 init_insn_group_barriers ();
388092d5 9559 seen_good_insn = 0;
30028c85 9560 }
c1bc6ca8 9561 group_barrier_needed (insn);
388092d5
AB
9562 if (recog_memoized (insn) >= 0
9563 && important_for_bundling_p (insn))
9564 seen_good_insn = 1;
2130b7fb 9565 }
388092d5
AB
9566 else if (recog_memoized (insn) >= 0
9567 && important_for_bundling_p (insn))
034288ef 9568 seen_good_insn = 1;
b64925dc 9569 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
c65ebc55 9570 }
2130b7fb 9571 }
30028c85 9572}
2130b7fb 9573
30028c85 9574\f
2130b7fb 9575
a4d05547 9576/* If the following function returns TRUE, we will use the DFA
30028c85 9577 insn scheduler. */
2130b7fb 9578
c237e94a 9579static int
9c808aad 9580ia64_first_cycle_multipass_dfa_lookahead (void)
2130b7fb 9581{
30028c85
VM
9582 return (reload_completed ? 6 : 4);
9583}
2130b7fb 9584
30028c85 9585/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 9586
30028c85 9587static void
9c808aad 9588ia64_init_dfa_pre_cycle_insn (void)
30028c85
VM
9589{
9590 if (temp_dfa_state == NULL)
2130b7fb 9591 {
30028c85
VM
9592 dfa_state_size = state_size ();
9593 temp_dfa_state = xmalloc (dfa_state_size);
9594 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 9595 }
30028c85 9596 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
0f82e5c9 9597 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
30028c85
VM
9598 recog_memoized (dfa_pre_cycle_insn);
9599 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
0f82e5c9 9600 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
30028c85
VM
9601 recog_memoized (dfa_stop_insn);
9602}
2130b7fb 9603
30028c85
VM
9604/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9605 used by the DFA insn scheduler. */
2130b7fb 9606
30028c85 9607static rtx
9c808aad 9608ia64_dfa_pre_cycle_insn (void)
30028c85
VM
9609{
9610 return dfa_pre_cycle_insn;
9611}
2130b7fb 9612
30028c85
VM
9613/* The following function returns TRUE if PRODUCER (of type ilog or
9614 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 9615
30028c85 9616int
647d790d 9617ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
30028c85
VM
9618{
9619 rtx dest, reg, mem;
2130b7fb 9620
e820471b 9621 gcc_assert (producer && consumer);
30028c85 9622 dest = ia64_single_set (producer);
e820471b
NS
9623 gcc_assert (dest);
9624 reg = SET_DEST (dest);
9625 gcc_assert (reg);
30028c85
VM
9626 if (GET_CODE (reg) == SUBREG)
9627 reg = SUBREG_REG (reg);
e820471b
NS
9628 gcc_assert (GET_CODE (reg) == REG);
9629
30028c85 9630 dest = ia64_single_set (consumer);
e820471b
NS
9631 gcc_assert (dest);
9632 mem = SET_DEST (dest);
9633 gcc_assert (mem && GET_CODE (mem) == MEM);
30028c85 9634 return reg_mentioned_p (reg, mem);
2130b7fb
BS
9635}
9636
30028c85
VM
9637/* The following function returns TRUE if PRODUCER (of type ilog or
9638 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 9639
30028c85 9640int
647d790d 9641ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
2130b7fb 9642{
30028c85
VM
9643 rtx dest, src, reg, mem;
9644
e820471b 9645 gcc_assert (producer && consumer);
30028c85 9646 dest = ia64_single_set (producer);
e820471b
NS
9647 gcc_assert (dest);
9648 reg = SET_DEST (dest);
9649 gcc_assert (reg);
30028c85
VM
9650 if (GET_CODE (reg) == SUBREG)
9651 reg = SUBREG_REG (reg);
e820471b
NS
9652 gcc_assert (GET_CODE (reg) == REG);
9653
30028c85 9654 src = ia64_single_set (consumer);
e820471b
NS
9655 gcc_assert (src);
9656 mem = SET_SRC (src);
9657 gcc_assert (mem);
048d0d36 9658
30028c85
VM
9659 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9660 mem = XVECEXP (mem, 0, 0);
048d0d36 9661 else if (GET_CODE (mem) == IF_THEN_ELSE)
917f1b7e 9662 /* ??? Is this bypass necessary for ld.c? */
048d0d36
MK
9663 {
9664 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9665 mem = XEXP (mem, 1);
9666 }
9667
30028c85
VM
9668 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9669 mem = XEXP (mem, 0);
ef1ecf87 9670
048d0d36
MK
9671 if (GET_CODE (mem) == UNSPEC)
9672 {
9673 int c = XINT (mem, 1);
9674
388092d5
AB
9675 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9676 || c == UNSPEC_LDSA);
048d0d36
MK
9677 mem = XVECEXP (mem, 0, 0);
9678 }
9679
ef1ecf87 9680 /* Note that LO_SUM is used for GOT loads. */
e820471b 9681 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
ef1ecf87 9682
30028c85
VM
9683 return reg_mentioned_p (reg, mem);
9684}
9685
9686/* The following function returns TRUE if INSN produces address for a
9687 load/store insn. We will place such insns into M slot because it
ff482c8d 9688 decreases its latency time. */
30028c85
VM
9689
9690int
9c808aad 9691ia64_produce_address_p (rtx insn)
30028c85
VM
9692{
9693 return insn->call;
2130b7fb 9694}
30028c85 9695
2130b7fb 9696\f
3b572406
RH
9697/* Emit pseudo-ops for the assembler to describe predicate relations.
9698 At present this assumes that we only consider predicate pairs to
9699 be mutex, and that the assembler can deduce proper values from
9700 straight-line code. */
9701
9702static void
9c808aad 9703emit_predicate_relation_info (void)
3b572406 9704{
e0082a72 9705 basic_block bb;
3b572406 9706
4f42035e 9707 FOR_EACH_BB_REVERSE_FN (bb, cfun)
3b572406 9708 {
3b572406 9709 int r;
dd3d2b35 9710 rtx_insn *head = BB_HEAD (bb);
3b572406
RH
9711
9712 /* We only need such notes at code labels. */
b64925dc 9713 if (! LABEL_P (head))
3b572406 9714 continue;
740aeb38 9715 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
3b572406
RH
9716 head = NEXT_INSN (head);
9717
9f3b8452
RH
9718 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9719 grabbing the entire block of predicate registers. */
9720 for (r = PR_REG (2); r < PR_REG (64); r += 2)
6fb5fa3c 9721 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
3b572406 9722 {
f2f90c63 9723 rtx p = gen_rtx_REG (BImode, r);
dd3d2b35 9724 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
a813c111 9725 if (head == BB_END (bb))
1130d5e3 9726 BB_END (bb) = n;
3b572406
RH
9727 head = n;
9728 }
9729 }
ca3920ad
JW
9730
9731 /* Look for conditional calls that do not return, and protect predicate
9732 relations around them. Otherwise the assembler will assume the call
9733 returns, and complain about uses of call-clobbered predicates after
9734 the call. */
4f42035e 9735 FOR_EACH_BB_REVERSE_FN (bb, cfun)
ca3920ad 9736 {
dd3d2b35 9737 rtx_insn *insn = BB_HEAD (bb);
9c808aad 9738
ca3920ad
JW
9739 while (1)
9740 {
b64925dc 9741 if (CALL_P (insn)
ca3920ad
JW
9742 && GET_CODE (PATTERN (insn)) == COND_EXEC
9743 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9744 {
dd3d2b35
DM
9745 rtx_insn *b =
9746 emit_insn_before (gen_safe_across_calls_all (), insn);
9747 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
a813c111 9748 if (BB_HEAD (bb) == insn)
1130d5e3 9749 BB_HEAD (bb) = b;
a813c111 9750 if (BB_END (bb) == insn)
1130d5e3 9751 BB_END (bb) = a;
ca3920ad 9752 }
9c808aad 9753
a813c111 9754 if (insn == BB_END (bb))
ca3920ad
JW
9755 break;
9756 insn = NEXT_INSN (insn);
9757 }
9758 }
3b572406
RH
9759}
9760
c65ebc55
JW
9761/* Perform machine dependent operations on the rtl chain INSNS. */
9762
18dbd950 9763static void
9c808aad 9764ia64_reorg (void)
c65ebc55 9765{
1e3881c2
JH
9766 /* We are freeing block_for_insn in the toplev to keep compatibility
9767 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 9768 compute_bb_for_insn ();
a00fe19f
RH
9769
9770 /* If optimizing, we'll have split before scheduling. */
9771 if (optimize == 0)
6fb5fa3c 9772 split_all_insns ();
2130b7fb 9773
2ba42841 9774 if (optimize && flag_schedule_insns_after_reload
388092d5 9775 && dbg_cnt (ia64_sched2))
f4d578da 9776 {
547fdef8 9777 basic_block bb;
eced69b5 9778 timevar_push (TV_SCHED2);
f4d578da 9779 ia64_final_schedule = 1;
30028c85 9780
547fdef8
BS
9781 /* We can't let modulo-sched prevent us from scheduling any bbs,
9782 since we need the final schedule to produce bundle information. */
11cd3bed 9783 FOR_EACH_BB_FN (bb, cfun)
547fdef8
BS
9784 bb->flags &= ~BB_DISABLE_SCHEDULE;
9785
30028c85
VM
9786 initiate_bundle_states ();
9787 ia64_nop = make_insn_raw (gen_nop ());
0f82e5c9 9788 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
30028c85
VM
9789 recog_memoized (ia64_nop);
9790 clocks_length = get_max_uid () + 1;
5ead67f6 9791 stops_p = XCNEWVEC (char, clocks_length);
7400e46b 9792
30028c85
VM
9793 if (ia64_tune == PROCESSOR_ITANIUM2)
9794 {
9795 pos_1 = get_cpu_unit_code ("2_1");
9796 pos_2 = get_cpu_unit_code ("2_2");
9797 pos_3 = get_cpu_unit_code ("2_3");
9798 pos_4 = get_cpu_unit_code ("2_4");
9799 pos_5 = get_cpu_unit_code ("2_5");
9800 pos_6 = get_cpu_unit_code ("2_6");
9801 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9802 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9803 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9804 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9805 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9806 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9807 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9808 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9809 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9810 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9811 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9812 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9813 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9814 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9815 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9816 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9817 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9818 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9819 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9820 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9821 }
9822 else
9823 {
9824 pos_1 = get_cpu_unit_code ("1_1");
9825 pos_2 = get_cpu_unit_code ("1_2");
9826 pos_3 = get_cpu_unit_code ("1_3");
9827 pos_4 = get_cpu_unit_code ("1_4");
9828 pos_5 = get_cpu_unit_code ("1_5");
9829 pos_6 = get_cpu_unit_code ("1_6");
9830 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9831 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9832 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9833 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9834 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9835 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9836 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9837 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9838 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9839 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9840 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9841 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9842 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9843 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9844 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9845 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9846 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9847 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9848 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9849 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9850 }
388092d5
AB
9851
9852 if (flag_selective_scheduling2
9853 && !maybe_skip_selective_scheduling ())
9854 run_selective_scheduling ();
9855 else
9856 schedule_ebbs ();
9857
9858 /* Redo alignment computation, as it might gone wrong. */
9859 compute_alignments ();
9860
6fb5fa3c
DB
9861 /* We cannot reuse this one because it has been corrupted by the
9862 evil glat. */
30028c85 9863 finish_bundle_states ();
30028c85 9864 free (stops_p);
048d0d36 9865 stops_p = NULL;
c263766c 9866 emit_insn_group_barriers (dump_file);
30028c85 9867
f4d578da 9868 ia64_final_schedule = 0;
eced69b5 9869 timevar_pop (TV_SCHED2);
f4d578da
BS
9870 }
9871 else
c263766c 9872 emit_all_insn_group_barriers (dump_file);
f2f90c63 9873
6fb5fa3c
DB
9874 df_analyze ();
9875
f12f25a7
RH
9876 /* A call must not be the last instruction in a function, so that the
9877 return address is still within the function, so that unwinding works
9878 properly. Note that IA-64 differs from dwarf2 on this point. */
d5fabb58 9879 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
f12f25a7 9880 {
dd3d2b35 9881 rtx_insn *insn;
f12f25a7
RH
9882 int saw_stop = 0;
9883
9884 insn = get_last_insn ();
9885 if (! INSN_P (insn))
9886 insn = prev_active_insn (insn);
2ca57608 9887 if (insn)
f12f25a7 9888 {
2ca57608 9889 /* Skip over insns that expand to nothing. */
b64925dc 9890 while (NONJUMP_INSN_P (insn)
2ca57608
L
9891 && get_attr_empty (insn) == EMPTY_YES)
9892 {
9893 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9894 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9895 saw_stop = 1;
9896 insn = prev_active_insn (insn);
9897 }
b64925dc 9898 if (CALL_P (insn))
2ca57608
L
9899 {
9900 if (! saw_stop)
9901 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9902 emit_insn (gen_break_f ());
9903 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9904 }
f12f25a7
RH
9905 }
9906 }
9907
f2f90c63 9908 emit_predicate_relation_info ();
014a1138 9909
2ba42841 9910 if (flag_var_tracking)
014a1138
JZ
9911 {
9912 timevar_push (TV_VAR_TRACKING);
9913 variable_tracking_main ();
9914 timevar_pop (TV_VAR_TRACKING);
9915 }
0d475361 9916 df_finish_pass (false);
c65ebc55
JW
9917}
9918\f
9919/* Return true if REGNO is used by the epilogue. */
9920
9921int
9c808aad 9922ia64_epilogue_uses (int regno)
c65ebc55 9923{
6ca3c22f
RH
9924 switch (regno)
9925 {
9926 case R_GR (1):
b23ba0b8
RH
9927 /* With a call to a function in another module, we will write a new
9928 value to "gp". After returning from such a call, we need to make
9929 sure the function restores the original gp-value, even if the
9930 function itself does not use the gp anymore. */
9931 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
6ca3c22f
RH
9932
9933 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9934 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9935 /* For functions defined with the syscall_linkage attribute, all
9936 input registers are marked as live at all function exits. This
9937 prevents the register allocator from using the input registers,
9938 which in turn makes it possible to restart a system call after
9939 an interrupt without having to save/restore the input registers.
9940 This also prevents kernel data from leaking to application code. */
9941 return lookup_attribute ("syscall_linkage",
9942 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9943
9944 case R_BR (0):
9945 /* Conditional return patterns can't represent the use of `b0' as
9946 the return address, so we force the value live this way. */
9947 return 1;
6b6c1201 9948
6ca3c22f
RH
9949 case AR_PFS_REGNUM:
9950 /* Likewise for ar.pfs, which is used by br.ret. */
9951 return 1;
5527bf14 9952
6ca3c22f
RH
9953 default:
9954 return 0;
9955 }
c65ebc55 9956}
15b5aef3
RH
9957
9958/* Return true if REGNO is used by the frame unwinder. */
9959
9960int
9c808aad 9961ia64_eh_uses (int regno)
15b5aef3 9962{
09639a83 9963 unsigned int r;
6fb5fa3c 9964
15b5aef3
RH
9965 if (! reload_completed)
9966 return 0;
9967
6fb5fa3c
DB
9968 if (regno == 0)
9969 return 0;
9970
9971 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9972 if (regno == current_frame_info.r[r]
9973 || regno == emitted_frame_related_regs[r])
9974 return 1;
15b5aef3
RH
9975
9976 return 0;
9977}
c65ebc55 9978\f
1cdbd630 9979/* Return true if this goes in small data/bss. */
c65ebc55
JW
9980
9981/* ??? We could also support own long data here. Generating movl/add/ld8
9982 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9983 code faster because there is one less load. This also includes incomplete
9984 types which can't go in sdata/sbss. */
9985
ae46c4e0 9986static bool
3101faab 9987ia64_in_small_data_p (const_tree exp)
ae46c4e0
RH
9988{
9989 if (TARGET_NO_SDATA)
9990 return false;
9991
3907500b
RH
9992 /* We want to merge strings, so we never consider them small data. */
9993 if (TREE_CODE (exp) == STRING_CST)
9994 return false;
9995
4c494a15
ZW
9996 /* Functions are never small data. */
9997 if (TREE_CODE (exp) == FUNCTION_DECL)
9998 return false;
9999
ae46c4e0
RH
10000 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
10001 {
f961457f 10002 const char *section = DECL_SECTION_NAME (exp);
826eb7ed 10003
ae46c4e0 10004 if (strcmp (section, ".sdata") == 0
826eb7ed
JB
10005 || strncmp (section, ".sdata.", 7) == 0
10006 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
10007 || strcmp (section, ".sbss") == 0
10008 || strncmp (section, ".sbss.", 6) == 0
10009 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
ae46c4e0
RH
10010 return true;
10011 }
10012 else
10013 {
10014 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
10015
10016 /* If this is an incomplete type with size 0, then we can't put it
10017 in sdata because it might be too big when completed. */
10018 if (size > 0 && size <= ia64_section_threshold)
10019 return true;
10020 }
10021
10022 return false;
10023}
0c96007e 10024\f
ad0fc698
JW
10025/* Output assembly directives for prologue regions. */
10026
10027/* The current basic block number. */
10028
e0082a72 10029static bool last_block;
ad0fc698
JW
10030
10031/* True if we need a copy_state command at the start of the next block. */
10032
e0082a72 10033static bool need_copy_state;
ad0fc698 10034
658f32fd
AO
10035#ifndef MAX_ARTIFICIAL_LABEL_BYTES
10036# define MAX_ARTIFICIAL_LABEL_BYTES 30
10037#endif
10038
ad0fc698
JW
10039/* The function emits unwind directives for the start of an epilogue. */
10040
10041static void
7d3c6cd8
RH
10042process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
10043 bool unwind, bool frame ATTRIBUTE_UNUSED)
ad0fc698
JW
10044{
10045 /* If this isn't the last block of the function, then we need to label the
10046 current state, and copy it back in at the start of the next block. */
10047
e0082a72 10048 if (!last_block)
ad0fc698 10049 {
658f32fd
AO
10050 if (unwind)
10051 fprintf (asm_out_file, "\t.label_state %d\n",
10052 ++cfun->machine->state_num);
e0082a72 10053 need_copy_state = true;
ad0fc698
JW
10054 }
10055
658f32fd
AO
10056 if (unwind)
10057 fprintf (asm_out_file, "\t.restore sp\n");
ad0fc698 10058}
0c96007e 10059
5c255b57 10060/* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
97e242b0 10061
5c255b57
RH
10062static void
10063process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10064 bool unwind, bool frame)
0c96007e 10065{
0c96007e 10066 rtx dest = SET_DEST (pat);
5c255b57 10067 rtx src = SET_SRC (pat);
0c96007e 10068
5c255b57 10069 if (dest == stack_pointer_rtx)
0c96007e
AM
10070 {
10071 if (GET_CODE (src) == PLUS)
5c255b57 10072 {
0c96007e
AM
10073 rtx op0 = XEXP (src, 0);
10074 rtx op1 = XEXP (src, 1);
e820471b
NS
10075
10076 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10077
10078 if (INTVAL (op1) < 0)
658f32fd
AO
10079 {
10080 gcc_assert (!frame_pointer_needed);
10081 if (unwind)
5c255b57 10082 fprintf (asm_out_file,
16998094 10083 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
658f32fd 10084 -INTVAL (op1));
658f32fd 10085 }
0186257f 10086 else
658f32fd 10087 process_epilogue (asm_out_file, insn, unwind, frame);
0c96007e 10088 }
0186257f 10089 else
e820471b 10090 {
5c255b57 10091 gcc_assert (src == hard_frame_pointer_rtx);
658f32fd 10092 process_epilogue (asm_out_file, insn, unwind, frame);
e820471b 10093 }
5c255b57
RH
10094 }
10095 else if (dest == hard_frame_pointer_rtx)
10096 {
10097 gcc_assert (src == stack_pointer_rtx);
10098 gcc_assert (frame_pointer_needed);
0186257f 10099
5c255b57
RH
10100 if (unwind)
10101 fprintf (asm_out_file, "\t.vframe r%d\n",
10102 ia64_dbx_register_number (REGNO (dest)));
0c96007e 10103 }
5c255b57
RH
10104 else
10105 gcc_unreachable ();
10106}
0c96007e 10107
5c255b57 10108/* This function processes a SET pattern for REG_CFA_REGISTER. */
97e242b0 10109
5c255b57
RH
10110static void
10111process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10112{
10113 rtx dest = SET_DEST (pat);
10114 rtx src = SET_SRC (pat);
5c255b57 10115 int dest_regno = REGNO (dest);
5f740973 10116 int src_regno;
97e242b0 10117
5f740973 10118 if (src == pc_rtx)
5c255b57 10119 {
5c255b57 10120 /* Saving return address pointer. */
5c255b57
RH
10121 if (unwind)
10122 fprintf (asm_out_file, "\t.save rp, r%d\n",
10123 ia64_dbx_register_number (dest_regno));
5f740973
RH
10124 return;
10125 }
10126
10127 src_regno = REGNO (src);
97e242b0 10128
5f740973
RH
10129 switch (src_regno)
10130 {
5c255b57
RH
10131 case PR_REG (0):
10132 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10133 if (unwind)
10134 fprintf (asm_out_file, "\t.save pr, r%d\n",
10135 ia64_dbx_register_number (dest_regno));
10136 break;
97e242b0 10137
5c255b57
RH
10138 case AR_UNAT_REGNUM:
10139 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10140 if (unwind)
10141 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10142 ia64_dbx_register_number (dest_regno));
10143 break;
97e242b0 10144
5c255b57
RH
10145 case AR_LC_REGNUM:
10146 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10147 if (unwind)
10148 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10149 ia64_dbx_register_number (dest_regno));
10150 break;
10151
10152 default:
10153 /* Everything else should indicate being stored to memory. */
10154 gcc_unreachable ();
0c96007e 10155 }
5c255b57 10156}
97e242b0 10157
5c255b57 10158/* This function processes a SET pattern for REG_CFA_OFFSET. */
97e242b0 10159
5c255b57
RH
10160static void
10161process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10162{
10163 rtx dest = SET_DEST (pat);
10164 rtx src = SET_SRC (pat);
10165 int src_regno = REGNO (src);
10166 const char *saveop;
10167 HOST_WIDE_INT off;
10168 rtx base;
0c96007e 10169
5c255b57
RH
10170 gcc_assert (MEM_P (dest));
10171 if (GET_CODE (XEXP (dest, 0)) == REG)
10172 {
10173 base = XEXP (dest, 0);
10174 off = 0;
10175 }
10176 else
10177 {
10178 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10179 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10180 base = XEXP (XEXP (dest, 0), 0);
10181 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10182 }
97e242b0 10183
5c255b57
RH
10184 if (base == hard_frame_pointer_rtx)
10185 {
10186 saveop = ".savepsp";
10187 off = - off;
10188 }
10189 else
10190 {
10191 gcc_assert (base == stack_pointer_rtx);
10192 saveop = ".savesp";
10193 }
97e242b0 10194
5c255b57
RH
10195 src_regno = REGNO (src);
10196 switch (src_regno)
10197 {
10198 case BR_REG (0):
10199 gcc_assert (!current_frame_info.r[reg_save_b0]);
10200 if (unwind)
10201 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10202 saveop, off);
10203 break;
97e242b0 10204
5c255b57
RH
10205 case PR_REG (0):
10206 gcc_assert (!current_frame_info.r[reg_save_pr]);
10207 if (unwind)
10208 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10209 saveop, off);
10210 break;
97e242b0 10211
5c255b57
RH
10212 case AR_LC_REGNUM:
10213 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10214 if (unwind)
10215 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10216 saveop, off);
10217 break;
97e242b0 10218
5c255b57
RH
10219 case AR_PFS_REGNUM:
10220 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10221 if (unwind)
10222 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10223 saveop, off);
10224 break;
97e242b0 10225
5c255b57
RH
10226 case AR_UNAT_REGNUM:
10227 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10228 if (unwind)
10229 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10230 saveop, off);
10231 break;
97e242b0 10232
5c255b57
RH
10233 case GR_REG (4):
10234 case GR_REG (5):
10235 case GR_REG (6):
10236 case GR_REG (7):
10237 if (unwind)
10238 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10239 1 << (src_regno - GR_REG (4)));
10240 break;
97e242b0 10241
5c255b57
RH
10242 case BR_REG (1):
10243 case BR_REG (2):
10244 case BR_REG (3):
10245 case BR_REG (4):
10246 case BR_REG (5):
10247 if (unwind)
10248 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10249 1 << (src_regno - BR_REG (1)));
10250 break;
97e242b0 10251
5c255b57
RH
10252 case FR_REG (2):
10253 case FR_REG (3):
10254 case FR_REG (4):
10255 case FR_REG (5):
10256 if (unwind)
10257 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10258 1 << (src_regno - FR_REG (2)));
10259 break;
97e242b0 10260
5c255b57
RH
10261 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10262 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10263 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10264 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10265 if (unwind)
10266 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10267 1 << (src_regno - FR_REG (12)));
10268 break;
97e242b0 10269
5c255b57
RH
10270 default:
10271 /* ??? For some reason we mark other general registers, even those
10272 we can't represent in the unwind info. Ignore them. */
10273 break;
10274 }
0c96007e
AM
10275}
10276
0c96007e
AM
10277/* This function looks at a single insn and emits any directives
10278 required to unwind this insn. */
5c255b57 10279
a68b5e52 10280static void
ac44248e 10281ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
0c96007e 10282{
d5fabb58 10283 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
658f32fd 10284 bool frame = dwarf2out_do_frame ();
5c255b57
RH
10285 rtx note, pat;
10286 bool handled_one;
10287
10288 if (!unwind && !frame)
10289 return;
658f32fd 10290
5c255b57 10291 if (NOTE_INSN_BASIC_BLOCK_P (insn))
0c96007e 10292 {
fefa31b5
DM
10293 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10294 == EXIT_BLOCK_PTR_FOR_FN (cfun);
97e242b0 10295
5c255b57
RH
10296 /* Restore unwind state from immediately before the epilogue. */
10297 if (need_copy_state)
ad0fc698 10298 {
5c255b57 10299 if (unwind)
ad0fc698 10300 {
5c255b57
RH
10301 fprintf (asm_out_file, "\t.body\n");
10302 fprintf (asm_out_file, "\t.copy_state %d\n",
10303 cfun->machine->state_num);
ad0fc698 10304 }
5c255b57 10305 need_copy_state = false;
ad0fc698 10306 }
5c255b57 10307 }
ad0fc698 10308
b64925dc 10309 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
5c255b57
RH
10310 return;
10311
10312 /* Look for the ALLOC insn. */
10313 if (INSN_CODE (insn) == CODE_FOR_alloc)
10314 {
10315 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10316 int dest_regno = REGNO (dest);
ad0fc698 10317
5c255b57
RH
10318 /* If this is the final destination for ar.pfs, then this must
10319 be the alloc in the prologue. */
10320 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10321 {
10322 if (unwind)
10323 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10324 ia64_dbx_register_number (dest_regno));
10325 }
97e242b0 10326 else
5c255b57
RH
10327 {
10328 /* This must be an alloc before a sibcall. We must drop the
10329 old frame info. The easiest way to drop the old frame
10330 info is to ensure we had a ".restore sp" directive
10331 followed by a new prologue. If the procedure doesn't
10332 have a memory-stack frame, we'll issue a dummy ".restore
10333 sp" now. */
10334 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10335 /* if haven't done process_epilogue() yet, do it now */
10336 process_epilogue (asm_out_file, insn, unwind, frame);
10337 if (unwind)
10338 fprintf (asm_out_file, "\t.prologue\n");
10339 }
10340 return;
10341 }
0c96007e 10342
5c255b57
RH
10343 handled_one = false;
10344 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10345 switch (REG_NOTE_KIND (note))
10346 {
10347 case REG_CFA_ADJUST_CFA:
10348 pat = XEXP (note, 0);
10349 if (pat == NULL)
10350 pat = PATTERN (insn);
10351 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10352 handled_one = true;
10353 break;
809d4ef1 10354
5c255b57
RH
10355 case REG_CFA_OFFSET:
10356 pat = XEXP (note, 0);
10357 if (pat == NULL)
10358 pat = PATTERN (insn);
10359 process_cfa_offset (asm_out_file, pat, unwind);
10360 handled_one = true;
10361 break;
809d4ef1 10362
5c255b57
RH
10363 case REG_CFA_REGISTER:
10364 pat = XEXP (note, 0);
10365 if (pat == NULL)
10366 pat = PATTERN (insn);
10367 process_cfa_register (asm_out_file, pat, unwind);
10368 handled_one = true;
10369 break;
10370
10371 case REG_FRAME_RELATED_EXPR:
10372 case REG_CFA_DEF_CFA:
10373 case REG_CFA_EXPRESSION:
10374 case REG_CFA_RESTORE:
10375 case REG_CFA_SET_VDRAP:
10376 /* Not used in the ia64 port. */
10377 gcc_unreachable ();
10378
10379 default:
10380 /* Not a frame-related note. */
10381 break;
10382 }
10383
10384 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10385 explicit action to take. No guessing required. */
10386 gcc_assert (handled_one);
0c96007e 10387}
c65ebc55 10388
a68b5e52
RH
10389/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10390
10391static void
10392ia64_asm_emit_except_personality (rtx personality)
10393{
10394 fputs ("\t.personality\t", asm_out_file);
10395 output_addr_const (asm_out_file, personality);
10396 fputc ('\n', asm_out_file);
10397}
10398
10399/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10400
10401static void
10402ia64_asm_init_sections (void)
10403{
10404 exception_section = get_unnamed_section (0, output_section_asm_op,
10405 "\t.handlerdata");
10406}
f0a0390e
RH
10407
10408/* Implement TARGET_DEBUG_UNWIND_INFO. */
10409
10410static enum unwind_info_type
10411ia64_debug_unwind_info (void)
10412{
10413 return UI_TARGET;
10414}
0551c32d 10415\f
af795c3c
RH
10416enum ia64_builtins
10417{
10418 IA64_BUILTIN_BSP,
c252db20
L
10419 IA64_BUILTIN_COPYSIGNQ,
10420 IA64_BUILTIN_FABSQ,
10421 IA64_BUILTIN_FLUSHRS,
fcb82ab0 10422 IA64_BUILTIN_INFQ,
b14446e2 10423 IA64_BUILTIN_HUGE_VALQ,
b6ca982f
UB
10424 IA64_BUILTIN_NANQ,
10425 IA64_BUILTIN_NANSQ,
b14446e2 10426 IA64_BUILTIN_max
af795c3c
RH
10427};
10428
b14446e2
SE
10429static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10430
c65ebc55 10431void
9c808aad 10432ia64_init_builtins (void)
c65ebc55 10433{
9649812a 10434 tree fpreg_type;
bf9ab6b6 10435 tree float80_type;
b14446e2 10436 tree decl;
9649812a
MM
10437
10438 /* The __fpreg type. */
10439 fpreg_type = make_node (REAL_TYPE);
4de67c26 10440 TYPE_PRECISION (fpreg_type) = 82;
9649812a
MM
10441 layout_type (fpreg_type);
10442 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10443
10444 /* The __float80 type. */
c65699ef
JM
10445 if (float64x_type_node != NULL_TREE
10446 && TYPE_MODE (float64x_type_node) == XFmode)
10447 float80_type = float64x_type_node;
10448 else
10449 {
10450 float80_type = make_node (REAL_TYPE);
10451 TYPE_PRECISION (float80_type) = 80;
10452 layout_type (float80_type);
10453 }
bf9ab6b6 10454 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9649812a
MM
10455
10456 /* The __float128 type. */
02befdf4 10457 if (!TARGET_HPUX)
9649812a 10458 {
b14446e2 10459 tree ftype;
b6ca982f
UB
10460 tree const_string_type
10461 = build_pointer_type (build_qualified_type
10462 (char_type_node, TYPE_QUAL_CONST));
c252db20 10463
c65699ef
JM
10464 (*lang_hooks.types.register_builtin_type) (float128_type_node,
10465 "__float128");
c252db20
L
10466
10467 /* TFmode support builtins. */
c65699ef 10468 ftype = build_function_type_list (float128_type_node, NULL_TREE);
b14446e2
SE
10469 decl = add_builtin_function ("__builtin_infq", ftype,
10470 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10471 NULL, NULL_TREE);
10472 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
c252db20 10473
b14446e2
SE
10474 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10475 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10476 NULL, NULL_TREE);
10477 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
fcb82ab0 10478
c65699ef 10479 ftype = build_function_type_list (float128_type_node,
b6ca982f
UB
10480 const_string_type,
10481 NULL_TREE);
10482 decl = add_builtin_function ("__builtin_nanq", ftype,
10483 IA64_BUILTIN_NANQ, BUILT_IN_MD,
10484 "nanq", NULL_TREE);
10485 TREE_READONLY (decl) = 1;
10486 ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10487
10488 decl = add_builtin_function ("__builtin_nansq", ftype,
10489 IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10490 "nansq", NULL_TREE);
10491 TREE_READONLY (decl) = 1;
10492 ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10493
c65699ef
JM
10494 ftype = build_function_type_list (float128_type_node,
10495 float128_type_node,
c252db20
L
10496 NULL_TREE);
10497 decl = add_builtin_function ("__builtin_fabsq", ftype,
10498 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10499 "__fabstf2", NULL_TREE);
10500 TREE_READONLY (decl) = 1;
b14446e2 10501 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
c252db20 10502
c65699ef
JM
10503 ftype = build_function_type_list (float128_type_node,
10504 float128_type_node,
10505 float128_type_node,
c252db20
L
10506 NULL_TREE);
10507 decl = add_builtin_function ("__builtin_copysignq", ftype,
10508 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10509 "__copysigntf3", NULL_TREE);
10510 TREE_READONLY (decl) = 1;
b14446e2 10511 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
9649812a
MM
10512 }
10513 else
02befdf4 10514 /* Under HPUX, this is a synonym for "long double". */
9649812a
MM
10515 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10516 "__float128");
10517
f2972bf8 10518 /* Fwrite on VMS is non-standard. */
171da07a
RH
10519#if TARGET_ABI_OPEN_VMS
10520 vms_patch_builtins ();
10521#endif
f2972bf8 10522
6e34d3a3 10523#define def_builtin(name, type, code) \
c79efc4d
RÁE
10524 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10525 NULL, NULL_TREE)
0551c32d 10526
b14446e2 10527 decl = def_builtin ("__builtin_ia64_bsp",
c0676219
NF
10528 build_function_type_list (ptr_type_node, NULL_TREE),
10529 IA64_BUILTIN_BSP);
b14446e2 10530 ia64_builtins[IA64_BUILTIN_BSP] = decl;
ce152ef8 10531
b14446e2 10532 decl = def_builtin ("__builtin_ia64_flushrs",
c0676219
NF
10533 build_function_type_list (void_type_node, NULL_TREE),
10534 IA64_BUILTIN_FLUSHRS);
b14446e2 10535 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
ce152ef8 10536
0551c32d 10537#undef def_builtin
7d522000
SE
10538
10539 if (TARGET_HPUX)
10540 {
ccea4a27 10541 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
e79983f4 10542 set_user_assembler_name (decl, "_Isfinite");
ccea4a27 10543 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
e79983f4 10544 set_user_assembler_name (decl, "_Isfinitef");
ccea4a27 10545 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
e79983f4 10546 set_user_assembler_name (decl, "_Isfinitef128");
7d522000 10547 }
c65ebc55
JW
10548}
10549
b6ca982f
UB
10550static tree
10551ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10552 tree *args, bool ignore ATTRIBUTE_UNUSED)
10553{
10554 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10555 {
10556 enum ia64_builtins fn_code = (enum ia64_builtins)
10557 DECL_FUNCTION_CODE (fndecl);
10558 switch (fn_code)
10559 {
10560 case IA64_BUILTIN_NANQ:
10561 case IA64_BUILTIN_NANSQ:
10562 {
10563 tree type = TREE_TYPE (TREE_TYPE (fndecl));
10564 const char *str = c_getstr (*args);
10565 int quiet = fn_code == IA64_BUILTIN_NANQ;
10566 REAL_VALUE_TYPE real;
10567
10568 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10569 return build_real (type, real);
10570 return NULL_TREE;
10571 }
10572
10573 default:
10574 break;
10575 }
10576 }
10577
10578#ifdef SUBTARGET_FOLD_BUILTIN
10579 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10580#endif
10581
10582 return NULL_TREE;
10583}
10584
c65ebc55 10585rtx
9c808aad 10586ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
ef4bddc2 10587 machine_mode mode ATTRIBUTE_UNUSED,
9c808aad 10588 int ignore ATTRIBUTE_UNUSED)
c65ebc55 10589{
767fad4c 10590 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
97e242b0 10591 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
c65ebc55
JW
10592
10593 switch (fcode)
10594 {
ce152ef8 10595 case IA64_BUILTIN_BSP:
0551c32d
RH
10596 if (! target || ! register_operand (target, DImode))
10597 target = gen_reg_rtx (DImode);
10598 emit_insn (gen_bsp_value (target));
8419b675
RK
10599#ifdef POINTERS_EXTEND_UNSIGNED
10600 target = convert_memory_address (ptr_mode, target);
10601#endif
0551c32d 10602 return target;
ce152ef8
AM
10603
10604 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
10605 emit_insn (gen_flushrs ());
10606 return const0_rtx;
ce152ef8 10607
c252db20 10608 case IA64_BUILTIN_INFQ:
fcb82ab0 10609 case IA64_BUILTIN_HUGE_VALQ:
c252db20 10610 {
ef4bddc2 10611 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
c252db20
L
10612 REAL_VALUE_TYPE inf;
10613 rtx tmp;
10614
10615 real_inf (&inf);
555affd7 10616 tmp = const_double_from_real_value (inf, target_mode);
c252db20 10617
6aad068a 10618 tmp = validize_mem (force_const_mem (target_mode, tmp));
c252db20
L
10619
10620 if (target == 0)
6aad068a 10621 target = gen_reg_rtx (target_mode);
c252db20
L
10622
10623 emit_move_insn (target, tmp);
10624 return target;
10625 }
10626
b6ca982f
UB
10627 case IA64_BUILTIN_NANQ:
10628 case IA64_BUILTIN_NANSQ:
c252db20
L
10629 case IA64_BUILTIN_FABSQ:
10630 case IA64_BUILTIN_COPYSIGNQ:
10631 return expand_call (exp, target, ignore);
10632
c65ebc55 10633 default:
c252db20 10634 gcc_unreachable ();
c65ebc55
JW
10635 }
10636
0551c32d 10637 return NULL_RTX;
c65ebc55 10638}
0d7839da 10639
b14446e2
SE
10640/* Return the ia64 builtin for CODE. */
10641
10642static tree
10643ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10644{
10645 if (code >= IA64_BUILTIN_max)
10646 return error_mark_node;
10647
10648 return ia64_builtins[code];
10649}
10650
76b0cbf8
RS
10651/* Implement TARGET_FUNCTION_ARG_PADDING.
10652
10653 For the HP-UX IA64 aggregate parameters are passed stored in the
0d7839da
SE
10654 most significant bits of the stack slot. */
10655
76b0cbf8
RS
10656static pad_direction
10657ia64_function_arg_padding (machine_mode mode, const_tree type)
0d7839da 10658{
76b0cbf8
RS
10659 /* Exception to normal case for structures/unions/etc. */
10660 if (TARGET_HPUX
10661 && type
10662 && AGGREGATE_TYPE_P (type)
10663 && int_size_in_bytes (type) < UNITS_PER_WORD)
10664 return PAD_UPWARD;
0d7839da 10665
76b0cbf8
RS
10666 /* Fall back to the default. */
10667 return default_function_arg_padding (mode, type);
0d7839da 10668}
686f3bf0 10669
c47c29c8
L
10670/* Emit text to declare externally defined variables and functions, because
10671 the Intel assembler does not support undefined externals. */
686f3bf0 10672
c47c29c8
L
10673void
10674ia64_asm_output_external (FILE *file, tree decl, const char *name)
686f3bf0 10675{
c47c29c8
L
10676 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10677 set in order to avoid putting out names that are never really
10678 used. */
10679 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
686f3bf0 10680 {
c47c29c8 10681 /* maybe_assemble_visibility will return 1 if the assembler
2e226e66 10682 visibility directive is output. */
c47c29c8
L
10683 int need_visibility = ((*targetm.binds_local_p) (decl)
10684 && maybe_assemble_visibility (decl));
57d4f65c 10685
c47c29c8
L
10686 /* GNU as does not need anything here, but the HP linker does
10687 need something for external functions. */
10688 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10689 && TREE_CODE (decl) == FUNCTION_DECL)
812b587e 10690 (*targetm.asm_out.globalize_decl_name) (file, decl);
c47c29c8
L
10691 else if (need_visibility && !TARGET_GNU_AS)
10692 (*targetm.asm_out.globalize_label) (file, name);
686f3bf0
SE
10693 }
10694}
10695
1f7aa7cd 10696/* Set SImode div/mod functions, init_integral_libfuncs only initializes
6bc709c1
L
10697 modes of word_mode and larger. Rename the TFmode libfuncs using the
10698 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10699 backward compatibility. */
1f7aa7cd
SE
10700
10701static void
10702ia64_init_libfuncs (void)
10703{
10704 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10705 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10706 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10707 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
6bc709c1
L
10708
10709 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10710 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10711 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10712 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10713 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10714
10715 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10716 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10717 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10718 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10719 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10720 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10721
10722 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10723 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
4a73d865 10724 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
6bc709c1
L
10725 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10726 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10727
10728 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10729 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
4a73d865 10730 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
2a3ebe77
JM
10731 /* HP-UX 11.23 libc does not have a function for unsigned
10732 SImode-to-TFmode conversion. */
10733 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
1f7aa7cd
SE
10734}
10735
c15c90bb 10736/* Rename all the TFmode libfuncs using the HPUX conventions. */
738e7b39 10737
c15c90bb
ZW
10738static void
10739ia64_hpux_init_libfuncs (void)
10740{
1f7aa7cd
SE
10741 ia64_init_libfuncs ();
10742
bdbba3c2
SE
10743 /* The HP SI millicode division and mod functions expect DI arguments.
10744 By turning them off completely we avoid using both libgcc and the
10745 non-standard millicode routines and use the HP DI millicode routines
10746 instead. */
10747
10748 set_optab_libfunc (sdiv_optab, SImode, 0);
10749 set_optab_libfunc (udiv_optab, SImode, 0);
10750 set_optab_libfunc (smod_optab, SImode, 0);
10751 set_optab_libfunc (umod_optab, SImode, 0);
10752
10753 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10754 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10755 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10756 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10757
10758 /* HP-UX libc has TF min/max/abs routines in it. */
c15c90bb
ZW
10759 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10760 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10761 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
c15c90bb 10762
24ea7948
ZW
10763 /* ia64_expand_compare uses this. */
10764 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10765
10766 /* These should never be used. */
10767 set_optab_libfunc (eq_optab, TFmode, 0);
10768 set_optab_libfunc (ne_optab, TFmode, 0);
10769 set_optab_libfunc (gt_optab, TFmode, 0);
10770 set_optab_libfunc (ge_optab, TFmode, 0);
10771 set_optab_libfunc (lt_optab, TFmode, 0);
10772 set_optab_libfunc (le_optab, TFmode, 0);
c15c90bb 10773}
738e7b39
RK
10774
10775/* Rename the division and modulus functions in VMS. */
10776
10777static void
10778ia64_vms_init_libfuncs (void)
10779{
10780 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10781 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10782 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10783 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10784 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10785 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10786 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10787 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
f2972bf8
DR
10788#ifdef MEM_LIBFUNCS_INIT
10789 MEM_LIBFUNCS_INIT;
10790#endif
738e7b39 10791}
6bc709c1
L
10792
10793/* Rename the TFmode libfuncs available from soft-fp in glibc using
10794 the HPUX conventions. */
10795
10796static void
10797ia64_sysv4_init_libfuncs (void)
10798{
10799 ia64_init_libfuncs ();
10800
10801 /* These functions are not part of the HPUX TFmode interface. We
10802 use them instead of _U_Qfcmp, which doesn't work the way we
10803 expect. */
10804 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10805 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10806 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10807 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10808 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10809 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10810
10811 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10812 glibc doesn't have them. */
10813}
c252db20
L
10814
10815/* Use soft-fp. */
10816
10817static void
10818ia64_soft_fp_init_libfuncs (void)
10819{
10820}
f2972bf8
DR
10821
10822static bool
095a2d76 10823ia64_vms_valid_pointer_mode (scalar_int_mode mode)
f2972bf8
DR
10824{
10825 return (mode == SImode || mode == DImode);
10826}
ae46c4e0 10827\f
9b580a0b
RH
10828/* For HPUX, it is illegal to have relocations in shared segments. */
10829
10830static int
10831ia64_hpux_reloc_rw_mask (void)
10832{
10833 return 3;
10834}
10835
10836/* For others, relax this so that relocations to local data goes in
10837 read-only segments, but we still cannot allow global relocations
10838 in read-only segments. */
10839
10840static int
10841ia64_reloc_rw_mask (void)
10842{
10843 return flag_pic ? 3 : 2;
10844}
10845
d6b5193b
RS
10846/* Return the section to use for X. The only special thing we do here
10847 is to honor small data. */
b64a1b53 10848
d6b5193b 10849static section *
ef4bddc2 10850ia64_select_rtx_section (machine_mode mode, rtx x,
9c808aad 10851 unsigned HOST_WIDE_INT align)
b64a1b53
RH
10852{
10853 if (GET_MODE_SIZE (mode) > 0
1f4a2e84
SE
10854 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10855 && !TARGET_NO_SDATA)
d6b5193b 10856 return sdata_section;
b64a1b53 10857 else
d6b5193b 10858 return default_elf_select_rtx_section (mode, x, align);
b64a1b53
RH
10859}
10860
1e1bd14e 10861static unsigned int
abb8b19a
AM
10862ia64_section_type_flags (tree decl, const char *name, int reloc)
10863{
10864 unsigned int flags = 0;
10865
10866 if (strcmp (name, ".sdata") == 0
10867 || strncmp (name, ".sdata.", 7) == 0
10868 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10869 || strncmp (name, ".sdata2.", 8) == 0
10870 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10871 || strcmp (name, ".sbss") == 0
10872 || strncmp (name, ".sbss.", 6) == 0
10873 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10874 flags = SECTION_SMALL;
10875
9b580a0b 10876 flags |= default_section_type_flags (decl, name, reloc);
abb8b19a 10877 return flags;
1e1bd14e
RH
10878}
10879
57782ad8
MM
10880/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10881 structure type and that the address of that type should be passed
10882 in out0, rather than in r8. */
10883
10884static bool
10885ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10886{
10887 tree ret_type = TREE_TYPE (fntype);
10888
10889 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10890 as the structure return address parameter, if the return value
10891 type has a non-trivial copy constructor or destructor. It is not
10892 clear if this same convention should be used for other
10893 programming languages. Until G++ 3.4, we incorrectly used r8 for
10894 these return values. */
10895 return (abi_version_at_least (2)
10896 && ret_type
10897 && TYPE_MODE (ret_type) == BLKmode
10898 && TREE_ADDRESSABLE (ret_type)
dcc97066 10899 && lang_GNU_CXX ());
57782ad8 10900}
1e1bd14e 10901
5f13cfc6
RH
10902/* Output the assembler code for a thunk function. THUNK_DECL is the
10903 declaration for the thunk function itself, FUNCTION is the decl for
10904 the target function. DELTA is an immediate constant offset to be
272d0bee 10905 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
10906 *(*this + vcall_offset) should be added to THIS. */
10907
c590b625 10908static void
9c808aad
AJ
10909ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10910 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10911 tree function)
483ab821 10912{
dd3d2b35
DM
10913 rtx this_rtx, funexp;
10914 rtx_insn *insn;
57782ad8
MM
10915 unsigned int this_parmno;
10916 unsigned int this_regno;
13f70342 10917 rtx delta_rtx;
5f13cfc6 10918
599aedd9 10919 reload_completed = 1;
fe3ad572 10920 epilogue_completed = 1;
599aedd9 10921
5f13cfc6
RH
10922 /* Set things up as ia64_expand_prologue might. */
10923 last_scratch_gr_reg = 15;
10924
10925 memset (&current_frame_info, 0, sizeof (current_frame_info));
10926 current_frame_info.spill_cfa_off = -16;
10927 current_frame_info.n_input_regs = 1;
10928 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10929
5f13cfc6 10930 /* Mark the end of the (empty) prologue. */
2e040219 10931 emit_note (NOTE_INSN_PROLOGUE_END);
5f13cfc6 10932
57782ad8
MM
10933 /* Figure out whether "this" will be the first parameter (the
10934 typical case) or the second parameter (as happens when the
10935 virtual function returns certain class objects). */
10936 this_parmno
10937 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10938 ? 1 : 0);
10939 this_regno = IN_REG (this_parmno);
10940 if (!TARGET_REG_NAMES)
10941 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10942
0a2aaacc 10943 this_rtx = gen_rtx_REG (Pmode, this_regno);
13f70342
RH
10944
10945 /* Apply the constant offset, if required. */
10946 delta_rtx = GEN_INT (delta);
36c216e5
MM
10947 if (TARGET_ILP32)
10948 {
57782ad8 10949 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
36c216e5 10950 REG_POINTER (tmp) = 1;
13f70342 10951 if (delta && satisfies_constraint_I (delta_rtx))
36c216e5 10952 {
0a2aaacc 10953 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
36c216e5
MM
10954 delta = 0;
10955 }
10956 else
0a2aaacc 10957 emit_insn (gen_ptr_extend (this_rtx, tmp));
36c216e5 10958 }
5f13cfc6
RH
10959 if (delta)
10960 {
13f70342 10961 if (!satisfies_constraint_I (delta_rtx))
5f13cfc6
RH
10962 {
10963 rtx tmp = gen_rtx_REG (Pmode, 2);
10964 emit_move_insn (tmp, delta_rtx);
10965 delta_rtx = tmp;
10966 }
0a2aaacc 10967 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
5f13cfc6
RH
10968 }
10969
10970 /* Apply the offset from the vtable, if required. */
10971 if (vcall_offset)
10972 {
10973 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10974 rtx tmp = gen_rtx_REG (Pmode, 2);
10975
36c216e5
MM
10976 if (TARGET_ILP32)
10977 {
10978 rtx t = gen_rtx_REG (ptr_mode, 2);
10979 REG_POINTER (t) = 1;
0a2aaacc 10980 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
13f70342 10981 if (satisfies_constraint_I (vcall_offset_rtx))
36c216e5 10982 {
13f70342 10983 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
36c216e5
MM
10984 vcall_offset = 0;
10985 }
10986 else
10987 emit_insn (gen_ptr_extend (tmp, t));
10988 }
10989 else
0a2aaacc 10990 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
5f13cfc6 10991
36c216e5 10992 if (vcall_offset)
5f13cfc6 10993 {
13f70342 10994 if (!satisfies_constraint_J (vcall_offset_rtx))
36c216e5
MM
10995 {
10996 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10997 emit_move_insn (tmp2, vcall_offset_rtx);
10998 vcall_offset_rtx = tmp2;
10999 }
11000 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
5f13cfc6 11001 }
5f13cfc6 11002
36c216e5 11003 if (TARGET_ILP32)
13f70342 11004 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
36c216e5
MM
11005 else
11006 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
5f13cfc6 11007
0a2aaacc 11008 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
5f13cfc6
RH
11009 }
11010
11011 /* Generate a tail call to the target function. */
11012 if (! TREE_USED (function))
11013 {
11014 assemble_external (function);
11015 TREE_USED (function) = 1;
11016 }
11017 funexp = XEXP (DECL_RTL (function), 0);
11018 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11019 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
11020 insn = get_last_insn ();
11021 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
11022
11023 /* Code generation for calls relies on splitting. */
11024 reload_completed = 1;
fe3ad572 11025 epilogue_completed = 1;
599aedd9
RH
11026 try_split (PATTERN (insn), insn, 0);
11027
5f13cfc6
RH
11028 emit_barrier ();
11029
11030 /* Run just enough of rest_of_compilation to get the insns emitted.
11031 There's not really enough bulk here to make other passes such as
11032 instruction scheduling worth while. Note that use_thunk calls
11033 assemble_start_function and assemble_end_function. */
599aedd9 11034
18dbd950 11035 emit_all_insn_group_barriers (NULL);
5f13cfc6 11036 insn = get_insns ();
5f13cfc6
RH
11037 shorten_branches (insn);
11038 final_start_function (insn, file, 1);
c9d691e9 11039 final (insn, file, 1);
5f13cfc6 11040 final_end_function ();
599aedd9
RH
11041
11042 reload_completed = 0;
fe3ad572 11043 epilogue_completed = 0;
483ab821
MM
11044}
11045
351a758b
KH
11046/* Worker function for TARGET_STRUCT_VALUE_RTX. */
11047
11048static rtx
57782ad8 11049ia64_struct_value_rtx (tree fntype,
351a758b
KH
11050 int incoming ATTRIBUTE_UNUSED)
11051{
f2972bf8
DR
11052 if (TARGET_ABI_OPEN_VMS ||
11053 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
57782ad8 11054 return NULL_RTX;
351a758b
KH
11055 return gen_rtx_REG (Pmode, GR_REG (8));
11056}
11057
88ed5ef5 11058static bool
18e2a8b8 11059ia64_scalar_mode_supported_p (scalar_mode mode)
88ed5ef5
SE
11060{
11061 switch (mode)
11062 {
4e10a5a7
RS
11063 case E_QImode:
11064 case E_HImode:
11065 case E_SImode:
11066 case E_DImode:
11067 case E_TImode:
88ed5ef5
SE
11068 return true;
11069
4e10a5a7
RS
11070 case E_SFmode:
11071 case E_DFmode:
11072 case E_XFmode:
11073 case E_RFmode:
88ed5ef5
SE
11074 return true;
11075
4e10a5a7 11076 case E_TFmode:
c252db20 11077 return true;
88ed5ef5
SE
11078
11079 default:
11080 return false;
11081 }
11082}
11083
f61134e8 11084static bool
ef4bddc2 11085ia64_vector_mode_supported_p (machine_mode mode)
f61134e8
RH
11086{
11087 switch (mode)
11088 {
4e10a5a7
RS
11089 case E_V8QImode:
11090 case E_V4HImode:
11091 case E_V2SImode:
f61134e8
RH
11092 return true;
11093
4e10a5a7 11094 case E_V2SFmode:
f61134e8
RH
11095 return true;
11096
11097 default:
11098 return false;
11099 }
11100}
11101
694a2f6e
EB
11102/* Implement the FUNCTION_PROFILER macro. */
11103
2b4f149b
RH
11104void
11105ia64_output_function_profiler (FILE *file, int labelno)
11106{
694a2f6e
EB
11107 bool indirect_call;
11108
11109 /* If the function needs a static chain and the static chain
11110 register is r15, we use an indirect call so as to bypass
11111 the PLT stub in case the executable is dynamically linked,
11112 because the stub clobbers r15 as per 5.3.6 of the psABI.
11113 We don't need to do that in non canonical PIC mode. */
11114
11115 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11116 {
11117 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11118 indirect_call = true;
11119 }
11120 else
11121 indirect_call = false;
11122
2b4f149b
RH
11123 if (TARGET_GNU_AS)
11124 fputs ("\t.prologue 4, r40\n", file);
11125 else
11126 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11127 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
bd8633a3
RH
11128
11129 if (NO_PROFILE_COUNTERS)
694a2f6e 11130 fputs ("\tmov out3 = r0\n", file);
bd8633a3
RH
11131 else
11132 {
11133 char buf[20];
11134 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11135
11136 if (TARGET_AUTO_PIC)
11137 fputs ("\tmovl out3 = @gprel(", file);
11138 else
11139 fputs ("\taddl out3 = @ltoff(", file);
11140 assemble_name (file, buf);
11141 if (TARGET_AUTO_PIC)
694a2f6e 11142 fputs (")\n", file);
bd8633a3 11143 else
694a2f6e 11144 fputs ("), r1\n", file);
bd8633a3
RH
11145 }
11146
694a2f6e
EB
11147 if (indirect_call)
11148 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11149 fputs ("\t;;\n", file);
11150
2b4f149b 11151 fputs ("\t.save rp, r42\n", file);
bd8633a3 11152 fputs ("\tmov out2 = b0\n", file);
694a2f6e
EB
11153 if (indirect_call)
11154 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
2b4f149b 11155 fputs ("\t.body\n", file);
2b4f149b 11156 fputs ("\tmov out1 = r1\n", file);
694a2f6e
EB
11157 if (indirect_call)
11158 {
11159 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11160 fputs ("\tmov b6 = r16\n", file);
11161 fputs ("\tld8 r1 = [r14]\n", file);
11162 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11163 }
11164 else
11165 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
2b4f149b
RH
11166}
11167
d26afa4f
SE
11168static GTY(()) rtx mcount_func_rtx;
11169static rtx
11170gen_mcount_func_rtx (void)
11171{
11172 if (!mcount_func_rtx)
11173 mcount_func_rtx = init_one_libfunc ("_mcount");
11174 return mcount_func_rtx;
11175}
11176
11177void
11178ia64_profile_hook (int labelno)
11179{
11180 rtx label, ip;
11181
11182 if (NO_PROFILE_COUNTERS)
11183 label = const0_rtx;
11184 else
11185 {
11186 char buf[30];
11187 const char *label_name;
11188 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
55504c7c 11189 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
d26afa4f
SE
11190 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11191 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11192 }
11193 ip = gen_reg_rtx (Pmode);
11194 emit_insn (gen_ip_value (ip));
11195 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
db69559b 11196 VOIDmode,
d26afa4f
SE
11197 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11198 ip, Pmode,
11199 label, Pmode);
11200}
11201
cac24f06
JM
11202/* Return the mangling of TYPE if it is an extended fundamental type. */
11203
11204static const char *
3101faab 11205ia64_mangle_type (const_tree type)
cac24f06 11206{
608063c3
JB
11207 type = TYPE_MAIN_VARIANT (type);
11208
11209 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11210 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11211 return NULL;
11212
cac24f06
JM
11213 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11214 mangled as "e". */
11215 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11216 return "g";
11217 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11218 an extended mangling. Elsewhere, "e" is available since long
11219 double is 80 bits. */
11220 if (TYPE_MODE (type) == XFmode)
11221 return TARGET_HPUX ? "u9__float80" : "e";
4de67c26
JM
11222 if (TYPE_MODE (type) == RFmode)
11223 return "u7__fpreg";
11224 return NULL;
11225}
11226
11227/* Return the diagnostic message string if conversion from FROMTYPE to
11228 TOTYPE is not allowed, NULL otherwise. */
11229static const char *
3101faab 11230ia64_invalid_conversion (const_tree fromtype, const_tree totype)
4de67c26
JM
11231{
11232 /* Reject nontrivial conversion to or from __fpreg. */
11233 if (TYPE_MODE (fromtype) == RFmode
11234 && TYPE_MODE (totype) != RFmode
11235 && TYPE_MODE (totype) != VOIDmode)
11236 return N_("invalid conversion from %<__fpreg%>");
11237 if (TYPE_MODE (totype) == RFmode
11238 && TYPE_MODE (fromtype) != RFmode)
11239 return N_("invalid conversion to %<__fpreg%>");
11240 return NULL;
11241}
11242
11243/* Return the diagnostic message string if the unary operation OP is
11244 not permitted on TYPE, NULL otherwise. */
11245static const char *
3101faab 11246ia64_invalid_unary_op (int op, const_tree type)
4de67c26
JM
11247{
11248 /* Reject operations on __fpreg other than unary + or &. */
11249 if (TYPE_MODE (type) == RFmode
11250 && op != CONVERT_EXPR
11251 && op != ADDR_EXPR)
11252 return N_("invalid operation on %<__fpreg%>");
11253 return NULL;
11254}
11255
11256/* Return the diagnostic message string if the binary operation OP is
11257 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11258static const char *
3101faab 11259ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
4de67c26
JM
11260{
11261 /* Reject operations on __fpreg. */
11262 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11263 return N_("invalid operation on %<__fpreg%>");
cac24f06
JM
11264 return NULL;
11265}
11266
812b587e
SE
11267/* HP-UX version_id attribute.
11268 For object foo, if the version_id is set to 1234 put out an alias
11269 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11270 other than an alias statement because it is an illegal symbol name. */
11271
11272static tree
11273ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11274 tree name ATTRIBUTE_UNUSED,
11275 tree args,
11276 int flags ATTRIBUTE_UNUSED,
11277 bool *no_add_attrs)
11278{
11279 tree arg = TREE_VALUE (args);
11280
11281 if (TREE_CODE (arg) != STRING_CST)
11282 {
11283 error("version attribute is not a string");
11284 *no_add_attrs = true;
11285 return NULL_TREE;
11286 }
11287 return NULL_TREE;
11288}
11289
a31fa2e0
SE
11290/* Target hook for c_mode_for_suffix. */
11291
ef4bddc2 11292static machine_mode
a31fa2e0
SE
11293ia64_c_mode_for_suffix (char suffix)
11294{
11295 if (suffix == 'q')
11296 return TFmode;
11297 if (suffix == 'w')
11298 return XFmode;
11299
11300 return VOIDmode;
11301}
11302
f3a83111
SE
11303static GTY(()) rtx ia64_dconst_0_5_rtx;
11304
11305rtx
11306ia64_dconst_0_5 (void)
11307{
11308 if (! ia64_dconst_0_5_rtx)
11309 {
11310 REAL_VALUE_TYPE rv;
11311 real_from_string (&rv, "0.5");
11312 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11313 }
11314 return ia64_dconst_0_5_rtx;
11315}
11316
11317static GTY(()) rtx ia64_dconst_0_375_rtx;
11318
11319rtx
11320ia64_dconst_0_375 (void)
11321{
11322 if (! ia64_dconst_0_375_rtx)
11323 {
11324 REAL_VALUE_TYPE rv;
11325 real_from_string (&rv, "0.375");
11326 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11327 }
11328 return ia64_dconst_0_375_rtx;
11329}
11330
ef4bddc2 11331static machine_mode
ffa88471
SE
11332ia64_get_reg_raw_mode (int regno)
11333{
11334 if (FR_REGNO_P (regno))
11335 return XFmode;
11336 return default_get_reg_raw_mode(regno);
11337}
f3a83111 11338
d9886a9e
L
11339/* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11340 anymore. */
11341
11342bool
ef4bddc2 11343ia64_member_type_forces_blk (const_tree, machine_mode mode)
d9886a9e
L
11344{
11345 return TARGET_HPUX && mode == TFmode;
11346}
11347
f16d3f39
JH
11348/* Always default to .text section until HP-UX linker is fixed. */
11349
11350ATTRIBUTE_UNUSED static section *
11351ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11352 enum node_frequency freq ATTRIBUTE_UNUSED,
11353 bool startup ATTRIBUTE_UNUSED,
11354 bool exit ATTRIBUTE_UNUSED)
11355{
11356 return NULL;
11357}
e6431744
RH
11358\f
11359/* Construct (set target (vec_select op0 (parallel perm))) and
11360 return true if that's a valid instruction in the active ISA. */
11361
11362static bool
11363expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11364{
11365 rtx rperm[MAX_VECT_LEN], x;
11366 unsigned i;
11367
11368 for (i = 0; i < nelt; ++i)
11369 rperm[i] = GEN_INT (perm[i]);
11370
11371 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11372 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
f7df4a84 11373 x = gen_rtx_SET (target, x);
e6431744 11374
647d790d
DM
11375 rtx_insn *insn = emit_insn (x);
11376 if (recog_memoized (insn) < 0)
e6431744 11377 {
647d790d 11378 remove_insn (insn);
e6431744
RH
11379 return false;
11380 }
11381 return true;
11382}
11383
11384/* Similar, but generate a vec_concat from op0 and op1 as well. */
11385
11386static bool
11387expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11388 const unsigned char *perm, unsigned nelt)
11389{
ef4bddc2 11390 machine_mode v2mode;
e6431744
RH
11391 rtx x;
11392
490d0f6c
RS
11393 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
11394 return false;
e6431744
RH
11395 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11396 return expand_vselect (target, x, perm, nelt);
11397}
11398
11399/* Try to expand a no-op permutation. */
11400
11401static bool
11402expand_vec_perm_identity (struct expand_vec_perm_d *d)
11403{
11404 unsigned i, nelt = d->nelt;
11405
11406 for (i = 0; i < nelt; ++i)
11407 if (d->perm[i] != i)
11408 return false;
11409
11410 if (!d->testing_p)
11411 emit_move_insn (d->target, d->op0);
11412
11413 return true;
11414}
11415
11416/* Try to expand D via a shrp instruction. */
11417
11418static bool
11419expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11420{
11421 unsigned i, nelt = d->nelt, shift, mask;
2d130b31 11422 rtx tmp, hi, lo;
e6431744
RH
11423
11424 /* ??? Don't force V2SFmode into the integer registers. */
11425 if (d->vmode == V2SFmode)
11426 return false;
11427
11428 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11429
11430 shift = d->perm[0];
2d130b31
UB
11431 if (BYTES_BIG_ENDIAN && shift > nelt)
11432 return false;
11433
e6431744
RH
11434 for (i = 1; i < nelt; ++i)
11435 if (d->perm[i] != ((shift + i) & mask))
11436 return false;
11437
11438 if (d->testing_p)
11439 return true;
11440
2d130b31
UB
11441 hi = shift < nelt ? d->op1 : d->op0;
11442 lo = shift < nelt ? d->op0 : d->op1;
11443
11444 shift %= nelt;
11445
e6431744
RH
11446 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11447
11448 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11449 gcc_assert (IN_RANGE (shift, 1, 63));
11450
11451 /* Recall that big-endian elements are numbered starting at the top of
11452 the register. Ideally we'd have a shift-left-pair. But since we
11453 don't, convert to a shift the other direction. */
11454 if (BYTES_BIG_ENDIAN)
11455 shift = 64 - shift;
11456
11457 tmp = gen_reg_rtx (DImode);
2d130b31
UB
11458 hi = gen_lowpart (DImode, hi);
11459 lo = gen_lowpart (DImode, lo);
11460 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
e6431744
RH
11461
11462 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11463 return true;
11464}
11465
11466/* Try to instantiate D in a single instruction. */
11467
11468static bool
11469expand_vec_perm_1 (struct expand_vec_perm_d *d)
11470{
11471 unsigned i, nelt = d->nelt;
11472 unsigned char perm2[MAX_VECT_LEN];
11473
11474 /* Try single-operand selections. */
11475 if (d->one_operand_p)
11476 {
11477 if (expand_vec_perm_identity (d))
11478 return true;
11479 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11480 return true;
11481 }
11482
11483 /* Try two operand selections. */
11484 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11485 return true;
11486
11487 /* Recognize interleave style patterns with reversed operands. */
11488 if (!d->one_operand_p)
11489 {
11490 for (i = 0; i < nelt; ++i)
11491 {
11492 unsigned e = d->perm[i];
11493 if (e >= nelt)
11494 e -= nelt;
11495 else
11496 e += nelt;
11497 perm2[i] = e;
11498 }
11499
11500 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11501 return true;
11502 }
11503
11504 if (expand_vec_perm_shrp (d))
11505 return true;
11506
11507 /* ??? Look for deposit-like permutations where most of the result
11508 comes from one vector unchanged and the rest comes from a
11509 sequential hunk of the other vector. */
11510
11511 return false;
11512}
11513
11514/* Pattern match broadcast permutations. */
11515
11516static bool
11517expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11518{
11519 unsigned i, elt, nelt = d->nelt;
11520 unsigned char perm2[2];
11521 rtx temp;
11522 bool ok;
11523
11524 if (!d->one_operand_p)
11525 return false;
11526
11527 elt = d->perm[0];
11528 for (i = 1; i < nelt; ++i)
11529 if (d->perm[i] != elt)
11530 return false;
11531
11532 switch (d->vmode)
11533 {
4e10a5a7
RS
11534 case E_V2SImode:
11535 case E_V2SFmode:
e6431744
RH
11536 /* Implementable by interleave. */
11537 perm2[0] = elt;
11538 perm2[1] = elt + 2;
11539 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11540 gcc_assert (ok);
11541 break;
11542
4e10a5a7 11543 case E_V8QImode:
e6431744
RH
11544 /* Implementable by extract + broadcast. */
11545 if (BYTES_BIG_ENDIAN)
11546 elt = 7 - elt;
11547 elt *= BITS_PER_UNIT;
11548 temp = gen_reg_rtx (DImode);
11549 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
96fda42c 11550 GEN_INT (8), GEN_INT (elt)));
e6431744
RH
11551 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11552 break;
11553
4e10a5a7 11554 case E_V4HImode:
e6431744
RH
11555 /* Should have been matched directly by vec_select. */
11556 default:
11557 gcc_unreachable ();
11558 }
11559
11560 return true;
11561}
11562
11563/* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11564 two vector permutation into a single vector permutation by using
11565 an interleave operation to merge the vectors. */
11566
11567static bool
11568expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11569{
11570 struct expand_vec_perm_d dremap, dfinal;
11571 unsigned char remap[2 * MAX_VECT_LEN];
11572 unsigned contents, i, nelt, nelt2;
11573 unsigned h0, h1, h2, h3;
dd3d2b35 11574 rtx_insn *seq;
e6431744
RH
11575 bool ok;
11576
11577 if (d->one_operand_p)
11578 return false;
11579
11580 nelt = d->nelt;
11581 nelt2 = nelt / 2;
11582
11583 /* Examine from whence the elements come. */
11584 contents = 0;
11585 for (i = 0; i < nelt; ++i)
11586 contents |= 1u << d->perm[i];
11587
11588 memset (remap, 0xff, sizeof (remap));
11589 dremap = *d;
11590
11591 h0 = (1u << nelt2) - 1;
11592 h1 = h0 << nelt2;
11593 h2 = h0 << nelt;
11594 h3 = h0 << (nelt + nelt2);
11595
11596 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11597 {
11598 for (i = 0; i < nelt; ++i)
11599 {
11600 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11601 remap[which] = i;
11602 dremap.perm[i] = which;
11603 }
11604 }
11605 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11606 {
11607 for (i = 0; i < nelt; ++i)
11608 {
11609 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11610 remap[which] = i;
11611 dremap.perm[i] = which;
11612 }
11613 }
11614 else if ((contents & 0x5555) == contents) /* mix even elements */
11615 {
11616 for (i = 0; i < nelt; ++i)
11617 {
11618 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11619 remap[which] = i;
11620 dremap.perm[i] = which;
11621 }
11622 }
11623 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11624 {
11625 for (i = 0; i < nelt; ++i)
11626 {
11627 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11628 remap[which] = i;
11629 dremap.perm[i] = which;
11630 }
11631 }
11632 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11633 {
11634 unsigned shift = ctz_hwi (contents);
11635 for (i = 0; i < nelt; ++i)
11636 {
11637 unsigned which = (i + shift) & (2 * nelt - 1);
11638 remap[which] = i;
11639 dremap.perm[i] = which;
11640 }
11641 }
11642 else
11643 return false;
11644
11645 /* Use the remapping array set up above to move the elements from their
11646 swizzled locations into their final destinations. */
11647 dfinal = *d;
11648 for (i = 0; i < nelt; ++i)
11649 {
11650 unsigned e = remap[d->perm[i]];
11651 gcc_assert (e < nelt);
11652 dfinal.perm[i] = e;
11653 }
b4b78e2d
EB
11654 if (d->testing_p)
11655 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11656 else
11657 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
e6431744
RH
11658 dfinal.op1 = dfinal.op0;
11659 dfinal.one_operand_p = true;
11660 dremap.target = dfinal.op0;
11661
11662 /* Test if the final remap can be done with a single insn. For V4HImode
11663 this *will* succeed. For V8QImode or V2SImode it may not. */
11664 start_sequence ();
11665 ok = expand_vec_perm_1 (&dfinal);
11666 seq = get_insns ();
11667 end_sequence ();
11668 if (!ok)
11669 return false;
11670 if (d->testing_p)
11671 return true;
11672
11673 ok = expand_vec_perm_1 (&dremap);
11674 gcc_assert (ok);
11675
11676 emit_insn (seq);
11677 return true;
11678}
11679
11680/* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11681 constant permutation via two mux2 and a merge. */
11682
11683static bool
11684expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11685{
11686 unsigned char perm2[4];
11687 rtx rmask[4];
11688 unsigned i;
11689 rtx t0, t1, mask, x;
11690 bool ok;
11691
11692 if (d->vmode != V4HImode || d->one_operand_p)
11693 return false;
11694 if (d->testing_p)
11695 return true;
11696
11697 for (i = 0; i < 4; ++i)
11698 {
11699 perm2[i] = d->perm[i] & 3;
11700 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11701 }
11702 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11703 mask = force_reg (V4HImode, mask);
11704
11705 t0 = gen_reg_rtx (V4HImode);
11706 t1 = gen_reg_rtx (V4HImode);
11707
11708 ok = expand_vselect (t0, d->op0, perm2, 4);
11709 gcc_assert (ok);
11710 ok = expand_vselect (t1, d->op1, perm2, 4);
11711 gcc_assert (ok);
11712
11713 x = gen_rtx_AND (V4HImode, mask, t0);
f7df4a84 11714 emit_insn (gen_rtx_SET (t0, x));
e6431744
RH
11715
11716 x = gen_rtx_NOT (V4HImode, mask);
11717 x = gen_rtx_AND (V4HImode, x, t1);
f7df4a84 11718 emit_insn (gen_rtx_SET (t1, x));
e6431744
RH
11719
11720 x = gen_rtx_IOR (V4HImode, t0, t1);
f7df4a84 11721 emit_insn (gen_rtx_SET (d->target, x));
e6431744
RH
11722
11723 return true;
11724}
11725
11726/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11727 With all of the interface bits taken care of, perform the expansion
11728 in D and return true on success. */
11729
11730static bool
11731ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11732{
11733 if (expand_vec_perm_1 (d))
11734 return true;
11735 if (expand_vec_perm_broadcast (d))
11736 return true;
11737 if (expand_vec_perm_interleave_2 (d))
11738 return true;
11739 if (expand_vec_perm_v4hi_5 (d))
11740 return true;
11741 return false;
11742}
11743
11744bool
11745ia64_expand_vec_perm_const (rtx operands[4])
11746{
11747 struct expand_vec_perm_d d;
11748 unsigned char perm[MAX_VECT_LEN];
11749 int i, nelt, which;
11750 rtx sel;
11751
11752 d.target = operands[0];
11753 d.op0 = operands[1];
11754 d.op1 = operands[2];
11755 sel = operands[3];
11756
11757 d.vmode = GET_MODE (d.target);
11758 gcc_assert (VECTOR_MODE_P (d.vmode));
11759 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11760 d.testing_p = false;
11761
11762 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11763 gcc_assert (XVECLEN (sel, 0) == nelt);
11764 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11765
11766 for (i = which = 0; i < nelt; ++i)
11767 {
11768 rtx e = XVECEXP (sel, 0, i);
11769 int ei = INTVAL (e) & (2 * nelt - 1);
11770
11771 which |= (ei < nelt ? 1 : 2);
11772 d.perm[i] = ei;
11773 perm[i] = ei;
11774 }
11775
11776 switch (which)
11777 {
11778 default:
11779 gcc_unreachable();
11780
11781 case 3:
11782 if (!rtx_equal_p (d.op0, d.op1))
11783 {
11784 d.one_operand_p = false;
11785 break;
11786 }
11787
11788 /* The elements of PERM do not suggest that only the first operand
11789 is used, but both operands are identical. Allow easier matching
11790 of the permutation by folding the permutation into the single
11791 input vector. */
11792 for (i = 0; i < nelt; ++i)
11793 if (d.perm[i] >= nelt)
11794 d.perm[i] -= nelt;
11795 /* FALLTHRU */
11796
11797 case 1:
11798 d.op1 = d.op0;
11799 d.one_operand_p = true;
11800 break;
11801
11802 case 2:
11803 for (i = 0; i < nelt; ++i)
11804 d.perm[i] -= nelt;
11805 d.op0 = d.op1;
11806 d.one_operand_p = true;
11807 break;
11808 }
11809
11810 if (ia64_expand_vec_perm_const_1 (&d))
11811 return true;
11812
11813 /* If the mask says both arguments are needed, but they are the same,
11814 the above tried to expand with one_operand_p true. If that didn't
11815 work, retry with one_operand_p false, as that's what we used in _ok. */
11816 if (which == 3 && d.one_operand_p)
11817 {
11818 memcpy (d.perm, perm, sizeof (perm));
11819 d.one_operand_p = false;
11820 return ia64_expand_vec_perm_const_1 (&d);
11821 }
11822
11823 return false;
11824}
11825
11826/* Implement targetm.vectorize.vec_perm_const_ok. */
11827
11828static bool
579f3687 11829ia64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
e6431744
RH
11830{
11831 struct expand_vec_perm_d d;
11832 unsigned int i, nelt, which;
11833 bool ret;
11834
11835 d.vmode = vmode;
11836 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11837 d.testing_p = true;
11838
11839 /* Extract the values from the vector CST into the permutation
11840 array in D. */
e6431744
RH
11841 for (i = which = 0; i < nelt; ++i)
11842 {
579f3687
RS
11843 unsigned char e = sel[i];
11844 d.perm[i] = e;
e6431744
RH
11845 gcc_assert (e < 2 * nelt);
11846 which |= (e < nelt ? 1 : 2);
11847 }
11848
11849 /* For all elements from second vector, fold the elements to first. */
11850 if (which == 2)
11851 for (i = 0; i < nelt; ++i)
11852 d.perm[i] -= nelt;
11853
11854 /* Check whether the mask can be applied to the vector type. */
11855 d.one_operand_p = (which != 3);
11856
11857 /* Otherwise we have to go through the motions and see if we can
11858 figure out how to generate the requested permutation. */
11859 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11860 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11861 if (!d.one_operand_p)
11862 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11863
11864 start_sequence ();
11865 ret = ia64_expand_vec_perm_const_1 (&d);
11866 end_sequence ();
11867
11868 return ret;
11869}
11870
11871void
11872ia64_expand_vec_setv2sf (rtx operands[3])
11873{
11874 struct expand_vec_perm_d d;
11875 unsigned int which;
11876 bool ok;
11877
11878 d.target = operands[0];
11879 d.op0 = operands[0];
11880 d.op1 = gen_reg_rtx (V2SFmode);
11881 d.vmode = V2SFmode;
11882 d.nelt = 2;
11883 d.one_operand_p = false;
11884 d.testing_p = false;
11885
11886 which = INTVAL (operands[2]);
11887 gcc_assert (which <= 1);
11888 d.perm[0] = 1 - which;
11889 d.perm[1] = which + 2;
11890
11891 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11892
11893 ok = ia64_expand_vec_perm_const_1 (&d);
11894 gcc_assert (ok);
11895}
11896
11897void
11898ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11899{
11900 struct expand_vec_perm_d d;
ef4bddc2 11901 machine_mode vmode = GET_MODE (target);
e6431744
RH
11902 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11903 bool ok;
11904
11905 d.target = target;
11906 d.op0 = op0;
11907 d.op1 = op1;
11908 d.vmode = vmode;
11909 d.nelt = nelt;
11910 d.one_operand_p = false;
11911 d.testing_p = false;
11912
11913 for (i = 0; i < nelt; ++i)
11914 d.perm[i] = i * 2 + odd;
11915
11916 ok = ia64_expand_vec_perm_const_1 (&d);
11917 gcc_assert (ok);
11918}
f16d3f39 11919
0d803030
RS
11920/* Implement TARGET_CAN_CHANGE_MODE_CLASS.
11921
11922 In BR regs, we can't change the DImode at all.
11923 In FP regs, we can't change FP values to integer values and vice versa,
11924 but we can change e.g. DImode to SImode, and V2SFmode into DImode. */
11925
11926static bool
11927ia64_can_change_mode_class (machine_mode from, machine_mode to,
11928 reg_class_t rclass)
11929{
11930 if (reg_classes_intersect_p (rclass, BR_REGS))
11931 return from == to;
11932 if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to))
11933 return !reg_classes_intersect_p (rclass, FR_REGS);
11934 return true;
11935}
11936
e2500fed 11937#include "gt-ia64.h"