]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/ia64.c
tree-core.h: Include symtab.h.
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
5624e564 2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
c65ebc55 3 Contributed by James E. Wilson <wilson@cygnus.com> and
9c808aad 4 David Mosberger <davidm@hpl.hp.com>.
c65ebc55 5
3bed2930 6This file is part of GCC.
c65ebc55 7
3bed2930 8GCC is free software; you can redistribute it and/or modify
c65ebc55 9it under the terms of the GNU General Public License as published by
2f83c7d6 10the Free Software Foundation; either version 3, or (at your option)
c65ebc55
JW
11any later version.
12
3bed2930 13GCC is distributed in the hope that it will be useful,
c65ebc55
JW
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
2f83c7d6
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
c65ebc55 21
c65ebc55 22#include "config.h"
ed9ccd8a 23#include "system.h"
4977bab6 24#include "coretypes.h"
c7131fb2
AM
25#include "backend.h"
26#include "tree.h"
27#include "gimple.h"
c65ebc55 28#include "rtl.h"
c7131fb2 29#include "df.h"
40e23961 30#include "alias.h"
40e23961 31#include "fold-const.h"
d8a2d370
DN
32#include "stringpool.h"
33#include "stor-layout.h"
34#include "calls.h"
35#include "varasm.h"
c65ebc55 36#include "regs.h"
c65ebc55
JW
37#include "insn-config.h"
38#include "conditions.h"
c65ebc55
JW
39#include "output.h"
40#include "insn-attr.h"
41#include "flags.h"
42#include "recog.h"
36566b39
PK
43#include "expmed.h"
44#include "dojump.h"
45#include "explow.h"
46#include "emit-rtl.h"
47#include "stmt.h"
c65ebc55 48#include "expr.h"
b0710fe1 49#include "insn-codes.h"
e78d8e51 50#include "optabs.h"
c65ebc55 51#include "except.h"
60393bbc
AM
52#include "cfgrtl.h"
53#include "cfganal.h"
54#include "lcm.h"
55#include "cfgbuild.h"
56#include "cfgcleanup.h"
f2972bf8 57#include "libfuncs.h"
718f9c0f 58#include "diagnostic-core.h"
2130b7fb 59#include "sched-int.h"
eced69b5 60#include "timevar.h"
672a6f42 61#include "target.h"
7b84aac0 62#include "common/common-target.h"
98d2b17e 63#include "tm_p.h"
08744705 64#include "langhooks.h"
2fb9a547
AM
65#include "internal-fn.h"
66#include "gimple-fold.h"
67#include "tree-eh.h"
45b0be94 68#include "gimplify.h"
4de67c26 69#include "intl.h"
658f32fd 70#include "debug.h"
bb83aa4b 71#include "params.h"
6fb5fa3c 72#include "dbgcnt.h"
13f70342 73#include "tm-constrs.h"
388092d5 74#include "sel-sched.h"
69e18c09 75#include "reload.h"
96e45421 76#include "opts.h"
7ee2468b 77#include "dumpfile.h"
9b2b7279 78#include "builtins.h"
c65ebc55 79
994c5d85 80/* This file should be included last. */
d58627a0
RS
81#include "target-def.h"
82
c65ebc55
JW
83/* This is used for communication between ASM_OUTPUT_LABEL and
84 ASM_OUTPUT_LABELREF. */
85int ia64_asm_output_label = 0;
86
c65ebc55 87/* Register names for ia64_expand_prologue. */
3b572406 88static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
89{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
90 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
91 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
92 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
93 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
94 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
95 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
96 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
97 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
98 "r104","r105","r106","r107","r108","r109","r110","r111",
99 "r112","r113","r114","r115","r116","r117","r118","r119",
100 "r120","r121","r122","r123","r124","r125","r126","r127"};
101
102/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 103static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
104{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
105
106/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 107static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
108{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
109 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
110 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
111 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
112 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
113 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
114 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
115 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
116 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
117 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
118
119/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 120static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
121{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
122
c65ebc55
JW
123/* Variables which are this size or smaller are put in the sdata/sbss
124 sections. */
125
3b572406 126unsigned int ia64_section_threshold;
30028c85
VM
127
128/* The following variable is used by the DFA insn scheduler. The value is
129 TRUE if we do insn bundling instead of insn scheduling. */
130int bundling_p = 0;
131
6fb5fa3c
DB
132enum ia64_frame_regs
133{
134 reg_fp,
135 reg_save_b0,
136 reg_save_pr,
137 reg_save_ar_pfs,
138 reg_save_ar_unat,
139 reg_save_ar_lc,
140 reg_save_gp,
141 number_of_ia64_frame_regs
142};
143
599aedd9
RH
144/* Structure to be filled in by ia64_compute_frame_size with register
145 save masks and offsets for the current function. */
146
147struct ia64_frame_info
148{
149 HOST_WIDE_INT total_size; /* size of the stack frame, not including
150 the caller's scratch area. */
151 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
152 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
153 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
154 HARD_REG_SET mask; /* mask of saved registers. */
9c808aad 155 unsigned int gr_used_mask; /* mask of registers in use as gr spill
599aedd9
RH
156 registers or long-term scratches. */
157 int n_spilled; /* number of spilled registers. */
6fb5fa3c 158 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
599aedd9
RH
159 int n_input_regs; /* number of input registers used. */
160 int n_local_regs; /* number of local registers used. */
161 int n_output_regs; /* number of output registers used. */
162 int n_rotate_regs; /* number of rotating registers used. */
163
164 char need_regstk; /* true if a .regstk directive needed. */
165 char initialized; /* true if the data is finalized. */
166};
167
168/* Current frame information calculated by ia64_compute_frame_size. */
169static struct ia64_frame_info current_frame_info;
6fb5fa3c
DB
170/* The actual registers that are emitted. */
171static int emitted_frame_related_regs[number_of_ia64_frame_regs];
3b572406 172\f
9c808aad 173static int ia64_first_cycle_multipass_dfa_lookahead (void);
ce1ce33a 174static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
9c808aad
AJ
175static void ia64_init_dfa_pre_cycle_insn (void);
176static rtx ia64_dfa_pre_cycle_insn (void);
ac44248e
DM
177static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
178static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
048d0d36 179static void ia64_h_i_d_extended (void);
388092d5
AB
180static void * ia64_alloc_sched_context (void);
181static void ia64_init_sched_context (void *, bool);
182static void ia64_set_sched_context (void *);
183static void ia64_clear_sched_context (void *);
184static void ia64_free_sched_context (void *);
ef4bddc2 185static int ia64_mode_to_int (machine_mode);
048d0d36 186static void ia64_set_sched_flags (spec_info_t);
ac44248e
DM
187static ds_t ia64_get_insn_spec_ds (rtx_insn *);
188static ds_t ia64_get_insn_checked_ds (rtx_insn *);
388092d5 189static bool ia64_skip_rtx_p (const_rtx);
ac44248e 190static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
8e90de43 191static bool ia64_needs_block_p (ds_t);
ac44248e 192static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
048d0d36
MK
193static int ia64_spec_check_p (rtx);
194static int ia64_spec_check_src_p (rtx);
9c808aad
AJ
195static rtx gen_tls_get_addr (void);
196static rtx gen_thread_pointer (void);
6fb5fa3c 197static int find_gr_spill (enum ia64_frame_regs, int);
9c808aad
AJ
198static int next_scratch_gr_reg (void);
199static void mark_reg_gr_used_mask (rtx, void *);
200static void ia64_compute_frame_size (HOST_WIDE_INT);
201static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
202static void finish_spill_pointers (void);
203static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
204static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
205static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
206static rtx gen_movdi_x (rtx, rtx, rtx);
207static rtx gen_fr_spill_x (rtx, rtx, rtx);
208static rtx gen_fr_restore_x (rtx, rtx, rtx);
209
930572b9 210static void ia64_option_override (void);
7b5cbb57 211static bool ia64_can_eliminate (const int, const int);
ef4bddc2
RS
212static machine_mode hfa_element_mode (const_tree, bool);
213static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
351a758b 214 tree, int *, int);
ef4bddc2 215static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
78a52f11 216 tree, bool);
ef4bddc2 217static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
ffa88471 218 const_tree, bool, bool);
ef4bddc2 219static rtx ia64_function_arg (cumulative_args_t, machine_mode,
ffa88471 220 const_tree, bool);
d5cc9181 221static rtx ia64_function_incoming_arg (cumulative_args_t,
ef4bddc2
RS
222 machine_mode, const_tree, bool);
223static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
ffa88471 224 const_tree, bool);
ef4bddc2 225static unsigned int ia64_function_arg_boundary (machine_mode,
c2ed6cf8 226 const_tree);
9c808aad 227static bool ia64_function_ok_for_sibcall (tree, tree);
586de218 228static bool ia64_return_in_memory (const_tree, const_tree);
ba90d838 229static rtx ia64_function_value (const_tree, const_tree, bool);
ef4bddc2 230static rtx ia64_libcall_value (machine_mode, const_rtx);
ba90d838 231static bool ia64_function_value_regno_p (const unsigned int);
ef4bddc2 232static int ia64_register_move_cost (machine_mode, reg_class_t,
c21fc181 233 reg_class_t);
ef4bddc2 234static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
69e18c09 235 bool);
68f932c4 236static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
215b063c 237static int ia64_unspec_may_trap_p (const_rtx, unsigned);
9c808aad
AJ
238static void fix_range (const char *);
239static struct machine_function * ia64_init_machine_status (void);
240static void emit_insn_group_barriers (FILE *);
241static void emit_all_insn_group_barriers (FILE *);
242static void final_emit_insn_group_barriers (FILE *);
243static void emit_predicate_relation_info (void);
244static void ia64_reorg (void);
3101faab 245static bool ia64_in_small_data_p (const_tree);
658f32fd 246static void process_epilogue (FILE *, rtx, bool, bool);
9c808aad 247
9c808aad
AJ
248static bool ia64_assemble_integer (rtx, unsigned int, int);
249static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
250static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
251static void ia64_output_function_end_prologue (FILE *);
252
5e50b799
AS
253static void ia64_print_operand (FILE *, rtx, int);
254static void ia64_print_operand_address (FILE *, rtx);
255static bool ia64_print_operand_punct_valid_p (unsigned char code);
256
9c808aad 257static int ia64_issue_rate (void);
ac44248e 258static int ia64_adjust_cost_2 (rtx_insn *, int, rtx_insn *, int, dw_t);
9c808aad 259static void ia64_sched_init (FILE *, int, int);
048d0d36
MK
260static void ia64_sched_init_global (FILE *, int, int);
261static void ia64_sched_finish_global (FILE *, int);
9c808aad 262static void ia64_sched_finish (FILE *, int);
ce1ce33a
DM
263static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
264static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
265static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
ac44248e 266static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
9c808aad 267
ac44248e 268static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
a68b5e52
RH
269static void ia64_asm_emit_except_personality (rtx);
270static void ia64_asm_init_sections (void);
271
f0a0390e 272static enum unwind_info_type ia64_debug_unwind_info (void);
f0a0390e 273
9c808aad
AJ
274static struct bundle_state *get_free_bundle_state (void);
275static void free_bundle_state (struct bundle_state *);
276static void initiate_bundle_states (void);
277static void finish_bundle_states (void);
9c808aad
AJ
278static int insert_bundle_state (struct bundle_state *);
279static void initiate_bundle_state_table (void);
280static void finish_bundle_state_table (void);
281static int try_issue_nops (struct bundle_state *, int);
282static int try_issue_insn (struct bundle_state *, rtx);
b32d5189
DM
283static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
284 int, int);
9c808aad
AJ
285static int get_max_pos (state_t);
286static int get_template (state_t, int);
287
b32d5189 288static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
647d790d
DM
289static bool important_for_bundling_p (rtx_insn *);
290static bool unknown_for_bundling_p (rtx_insn *);
b32d5189 291static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
9c808aad
AJ
292
293static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
294 HOST_WIDE_INT, tree);
295static void ia64_file_start (void);
812b587e 296static void ia64_globalize_decl_name (FILE *, tree);
9c808aad 297
9b580a0b
RH
298static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
299static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
ef4bddc2 300static section *ia64_select_rtx_section (machine_mode, rtx,
d6b5193b 301 unsigned HOST_WIDE_INT);
fdbe66f2
EB
302static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
303 ATTRIBUTE_UNUSED;
abb8b19a 304static unsigned int ia64_section_type_flags (tree, const char *, int);
1f7aa7cd
SE
305static void ia64_init_libfuncs (void)
306 ATTRIBUTE_UNUSED;
c15c90bb
ZW
307static void ia64_hpux_init_libfuncs (void)
308 ATTRIBUTE_UNUSED;
6bc709c1
L
309static void ia64_sysv4_init_libfuncs (void)
310 ATTRIBUTE_UNUSED;
738e7b39
RK
311static void ia64_vms_init_libfuncs (void)
312 ATTRIBUTE_UNUSED;
c252db20
L
313static void ia64_soft_fp_init_libfuncs (void)
314 ATTRIBUTE_UNUSED;
ef4bddc2 315static bool ia64_vms_valid_pointer_mode (machine_mode mode)
f2972bf8 316 ATTRIBUTE_UNUSED;
30ed9d3d
TG
317static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
318 ATTRIBUTE_UNUSED;
a5fe455b 319
d8def3cf 320static bool ia64_attribute_takes_identifier_p (const_tree);
a32767e4 321static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
812b587e 322static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
a32767e4 323static void ia64_encode_section_info (tree, rtx, int);
351a758b 324static rtx ia64_struct_value_rtx (tree, int);
726a989a 325static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
ef4bddc2
RS
326static bool ia64_scalar_mode_supported_p (machine_mode mode);
327static bool ia64_vector_mode_supported_p (machine_mode mode);
328static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode);
329static bool ia64_legitimate_constant_p (machine_mode, rtx);
330static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
331static bool ia64_cannot_force_const_mem (machine_mode, rtx);
3101faab
KG
332static const char *ia64_mangle_type (const_tree);
333static const char *ia64_invalid_conversion (const_tree, const_tree);
334static const char *ia64_invalid_unary_op (int, const_tree);
335static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
ef4bddc2 336static machine_mode ia64_c_mode_for_suffix (char);
2a1211e5 337static void ia64_trampoline_init (rtx, tree, rtx);
2b7e2984 338static void ia64_override_options_after_change (void);
ef4bddc2 339static bool ia64_member_type_forces_blk (const_tree, machine_mode);
5c255b57 340
b14446e2 341static tree ia64_builtin_decl (unsigned, bool);
ab177ad5
AS
342
343static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
ef4bddc2 344static machine_mode ia64_get_reg_raw_mode (int regno);
f16d3f39
JH
345static section * ia64_hpux_function_section (tree, enum node_frequency,
346 bool, bool);
e6431744 347
ef4bddc2 348static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
e6431744
RH
349 const unsigned char *sel);
350
351#define MAX_VECT_LEN 8
352
353struct expand_vec_perm_d
354{
355 rtx target, op0, op1;
356 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 357 machine_mode vmode;
e6431744
RH
358 unsigned char nelt;
359 bool one_operand_p;
360 bool testing_p;
361};
362
363static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
364
672a6f42 365\f
e6542f4e
RH
366/* Table of valid machine attributes. */
367static const struct attribute_spec ia64_attribute_table[] =
368{
62d784f7
KT
369 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
370 affects_type_identity } */
371 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
372 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
373 false },
30ed9d3d 374#if TARGET_ABI_OPEN_VMS
62d784f7
KT
375 { "common_object", 1, 1, true, false, false,
376 ia64_vms_common_object_attribute, false },
30ed9d3d 377#endif
812b587e 378 { "version_id", 1, 1, true, false, false,
62d784f7
KT
379 ia64_handle_version_id_attribute, false },
380 { NULL, 0, 0, false, false, false, NULL, false }
e6542f4e
RH
381};
382
672a6f42 383/* Initialize the GCC target structure. */
91d231cb
JM
384#undef TARGET_ATTRIBUTE_TABLE
385#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 386
f6155fda
SS
387#undef TARGET_INIT_BUILTINS
388#define TARGET_INIT_BUILTINS ia64_init_builtins
389
390#undef TARGET_EXPAND_BUILTIN
391#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
392
b14446e2
SE
393#undef TARGET_BUILTIN_DECL
394#define TARGET_BUILTIN_DECL ia64_builtin_decl
395
301d03af
RS
396#undef TARGET_ASM_BYTE_OP
397#define TARGET_ASM_BYTE_OP "\tdata1\t"
398#undef TARGET_ASM_ALIGNED_HI_OP
399#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
400#undef TARGET_ASM_ALIGNED_SI_OP
401#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
402#undef TARGET_ASM_ALIGNED_DI_OP
403#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
404#undef TARGET_ASM_UNALIGNED_HI_OP
405#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
406#undef TARGET_ASM_UNALIGNED_SI_OP
407#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
408#undef TARGET_ASM_UNALIGNED_DI_OP
409#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
410#undef TARGET_ASM_INTEGER
411#define TARGET_ASM_INTEGER ia64_assemble_integer
412
930572b9
AS
413#undef TARGET_OPTION_OVERRIDE
414#define TARGET_OPTION_OVERRIDE ia64_option_override
415
08c148a8
NB
416#undef TARGET_ASM_FUNCTION_PROLOGUE
417#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
418#undef TARGET_ASM_FUNCTION_END_PROLOGUE
419#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
420#undef TARGET_ASM_FUNCTION_EPILOGUE
421#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
422
5e50b799
AS
423#undef TARGET_PRINT_OPERAND
424#define TARGET_PRINT_OPERAND ia64_print_operand
425#undef TARGET_PRINT_OPERAND_ADDRESS
426#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
427#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
428#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
429
ae46c4e0
RH
430#undef TARGET_IN_SMALL_DATA_P
431#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
432
388092d5
AB
433#undef TARGET_SCHED_ADJUST_COST_2
434#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
c237e94a
ZW
435#undef TARGET_SCHED_ISSUE_RATE
436#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
437#undef TARGET_SCHED_VARIABLE_ISSUE
438#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
439#undef TARGET_SCHED_INIT
440#define TARGET_SCHED_INIT ia64_sched_init
441#undef TARGET_SCHED_FINISH
442#define TARGET_SCHED_FINISH ia64_sched_finish
048d0d36
MK
443#undef TARGET_SCHED_INIT_GLOBAL
444#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
445#undef TARGET_SCHED_FINISH_GLOBAL
446#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
c237e94a
ZW
447#undef TARGET_SCHED_REORDER
448#define TARGET_SCHED_REORDER ia64_sched_reorder
449#undef TARGET_SCHED_REORDER2
450#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 451
30028c85
VM
452#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
453#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
454
30028c85
VM
455#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
456#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
457
458#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
459#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
460#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
461#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
462
463#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
464#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
465 ia64_first_cycle_multipass_dfa_lookahead_guard
466
467#undef TARGET_SCHED_DFA_NEW_CYCLE
468#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
469
048d0d36
MK
470#undef TARGET_SCHED_H_I_D_EXTENDED
471#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
472
388092d5
AB
473#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
474#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
475
476#undef TARGET_SCHED_INIT_SCHED_CONTEXT
477#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
478
479#undef TARGET_SCHED_SET_SCHED_CONTEXT
480#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
481
482#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
483#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
484
485#undef TARGET_SCHED_FREE_SCHED_CONTEXT
486#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
487
048d0d36
MK
488#undef TARGET_SCHED_SET_SCHED_FLAGS
489#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
490
388092d5
AB
491#undef TARGET_SCHED_GET_INSN_SPEC_DS
492#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
493
494#undef TARGET_SCHED_GET_INSN_CHECKED_DS
495#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
496
048d0d36
MK
497#undef TARGET_SCHED_SPECULATE_INSN
498#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
499
500#undef TARGET_SCHED_NEEDS_BLOCK_P
501#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
502
e855c69d 503#undef TARGET_SCHED_GEN_SPEC_CHECK
388092d5 504#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
048d0d36 505
388092d5
AB
506#undef TARGET_SCHED_SKIP_RTX_P
507#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
508
599aedd9
RH
509#undef TARGET_FUNCTION_OK_FOR_SIBCALL
510#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
78a52f11
RH
511#undef TARGET_ARG_PARTIAL_BYTES
512#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
ffa88471
SE
513#undef TARGET_FUNCTION_ARG
514#define TARGET_FUNCTION_ARG ia64_function_arg
515#undef TARGET_FUNCTION_INCOMING_ARG
516#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
517#undef TARGET_FUNCTION_ARG_ADVANCE
518#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
c2ed6cf8
NF
519#undef TARGET_FUNCTION_ARG_BOUNDARY
520#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
599aedd9 521
c590b625
RH
522#undef TARGET_ASM_OUTPUT_MI_THUNK
523#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 524#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
3101faab 525#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
c590b625 526
1bc7c5b6
ZW
527#undef TARGET_ASM_FILE_START
528#define TARGET_ASM_FILE_START ia64_file_start
529
812b587e
SE
530#undef TARGET_ASM_GLOBALIZE_DECL_NAME
531#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
532
de8f4b07
AS
533#undef TARGET_REGISTER_MOVE_COST
534#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
69e18c09
AS
535#undef TARGET_MEMORY_MOVE_COST
536#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
3c50106f
RH
537#undef TARGET_RTX_COSTS
538#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67 539#undef TARGET_ADDRESS_COST
b413068c 540#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3c50106f 541
215b063c
PB
542#undef TARGET_UNSPEC_MAY_TRAP_P
543#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
544
18dbd950
RS
545#undef TARGET_MACHINE_DEPENDENT_REORG
546#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
547
a32767e4
DM
548#undef TARGET_ENCODE_SECTION_INFO
549#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
550
abb8b19a
AM
551#undef TARGET_SECTION_TYPE_FLAGS
552#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
553
fdbe66f2
EB
554#ifdef HAVE_AS_TLS
555#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
556#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
557#endif
558
351a758b
KH
559/* ??? Investigate. */
560#if 0
561#undef TARGET_PROMOTE_PROTOTYPES
562#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
563#endif
564
ba90d838
AS
565#undef TARGET_FUNCTION_VALUE
566#define TARGET_FUNCTION_VALUE ia64_function_value
567#undef TARGET_LIBCALL_VALUE
568#define TARGET_LIBCALL_VALUE ia64_libcall_value
569#undef TARGET_FUNCTION_VALUE_REGNO_P
570#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
571
351a758b
KH
572#undef TARGET_STRUCT_VALUE_RTX
573#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
574#undef TARGET_RETURN_IN_MEMORY
575#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
351a758b
KH
576#undef TARGET_SETUP_INCOMING_VARARGS
577#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
578#undef TARGET_STRICT_ARGUMENT_NAMING
579#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
fe984136
RH
580#undef TARGET_MUST_PASS_IN_STACK
581#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
ffa88471
SE
582#undef TARGET_GET_RAW_RESULT_MODE
583#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
584#undef TARGET_GET_RAW_ARG_MODE
585#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
351a758b 586
d9886a9e
L
587#undef TARGET_MEMBER_TYPE_FORCES_BLK
588#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
589
cd3ce9b4
JM
590#undef TARGET_GIMPLIFY_VA_ARG_EXPR
591#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
592
38f8b050 593#undef TARGET_ASM_UNWIND_EMIT
a68b5e52
RH
594#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
595#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
596#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
597#undef TARGET_ASM_INIT_SECTIONS
598#define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
951120ea 599
f0a0390e
RH
600#undef TARGET_DEBUG_UNWIND_INFO
601#define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
f0a0390e 602
88ed5ef5
SE
603#undef TARGET_SCALAR_MODE_SUPPORTED_P
604#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
f61134e8
RH
605#undef TARGET_VECTOR_MODE_SUPPORTED_P
606#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
88ed5ef5 607
8cc4b7a2
JM
608#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
609#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
610 ia64_libgcc_floating_mode_supported_p
611
1a627b35
RS
612#undef TARGET_LEGITIMATE_CONSTANT_P
613#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
903a9601
AS
614#undef TARGET_LEGITIMATE_ADDRESS_P
615#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
1a627b35 616
5e6c8b64
RH
617#undef TARGET_CANNOT_FORCE_CONST_MEM
618#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
619
608063c3
JB
620#undef TARGET_MANGLE_TYPE
621#define TARGET_MANGLE_TYPE ia64_mangle_type
cac24f06 622
4de67c26
JM
623#undef TARGET_INVALID_CONVERSION
624#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
625#undef TARGET_INVALID_UNARY_OP
626#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
627#undef TARGET_INVALID_BINARY_OP
628#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
629
a31fa2e0
SE
630#undef TARGET_C_MODE_FOR_SUFFIX
631#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
632
7b5cbb57
AS
633#undef TARGET_CAN_ELIMINATE
634#define TARGET_CAN_ELIMINATE ia64_can_eliminate
635
2a1211e5
RH
636#undef TARGET_TRAMPOLINE_INIT
637#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
638
1d0216c8
RS
639#undef TARGET_CAN_USE_DOLOOP_P
640#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
810d71d0 641#undef TARGET_INVALID_WITHIN_DOLOOP
ac44248e 642#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
810d71d0 643
2b7e2984
SE
644#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
645#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
646
ab177ad5
AS
647#undef TARGET_PREFERRED_RELOAD_CLASS
648#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
649
2ba42841
AO
650#undef TARGET_DELAY_SCHED2
651#define TARGET_DELAY_SCHED2 true
652
653/* Variable tracking should be run after all optimizations which
654 change order of insns. It also needs a valid CFG. */
655#undef TARGET_DELAY_VARTRACK
656#define TARGET_DELAY_VARTRACK true
657
e6431744
RH
658#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
659#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
660
d8def3cf
JJ
661#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
662#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
663
f6897b10 664struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 665\f
d8def3cf
JJ
666/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
667 identifier as an argument, so the front end shouldn't look it up. */
668
669static bool
670ia64_attribute_takes_identifier_p (const_tree attr_id)
671{
672 if (is_attribute_p ("model", attr_id))
673 return true;
674#if TARGET_ABI_OPEN_VMS
675 if (is_attribute_p ("common_object", attr_id))
676 return true;
677#endif
678 return false;
679}
680
a32767e4
DM
681typedef enum
682 {
683 ADDR_AREA_NORMAL, /* normal address area */
684 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
685 }
686ia64_addr_area;
687
688static GTY(()) tree small_ident1;
689static GTY(()) tree small_ident2;
690
691static void
692init_idents (void)
693{
694 if (small_ident1 == 0)
695 {
696 small_ident1 = get_identifier ("small");
697 small_ident2 = get_identifier ("__small__");
698 }
699}
700
701/* Retrieve the address area that has been chosen for the given decl. */
702
703static ia64_addr_area
704ia64_get_addr_area (tree decl)
705{
706 tree model_attr;
707
708 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
709 if (model_attr)
710 {
711 tree id;
712
713 init_idents ();
714 id = TREE_VALUE (TREE_VALUE (model_attr));
715 if (id == small_ident1 || id == small_ident2)
716 return ADDR_AREA_SMALL;
717 }
718 return ADDR_AREA_NORMAL;
719}
720
721static tree
f61134e8
RH
722ia64_handle_model_attribute (tree *node, tree name, tree args,
723 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
a32767e4
DM
724{
725 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
726 ia64_addr_area area;
727 tree arg, decl = *node;
728
729 init_idents ();
730 arg = TREE_VALUE (args);
731 if (arg == small_ident1 || arg == small_ident2)
732 {
733 addr_area = ADDR_AREA_SMALL;
734 }
735 else
736 {
29d08eba
JM
737 warning (OPT_Wattributes, "invalid argument of %qE attribute",
738 name);
a32767e4
DM
739 *no_add_attrs = true;
740 }
741
742 switch (TREE_CODE (decl))
743 {
744 case VAR_DECL:
745 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
746 == FUNCTION_DECL)
747 && !TREE_STATIC (decl))
748 {
c5d75364
MLI
749 error_at (DECL_SOURCE_LOCATION (decl),
750 "an address area attribute cannot be specified for "
751 "local variables");
a32767e4
DM
752 *no_add_attrs = true;
753 }
754 area = ia64_get_addr_area (decl);
755 if (area != ADDR_AREA_NORMAL && addr_area != area)
756 {
dee15844
JM
757 error ("address area of %q+D conflicts with previous "
758 "declaration", decl);
a32767e4
DM
759 *no_add_attrs = true;
760 }
761 break;
762
763 case FUNCTION_DECL:
c5d75364 764 error_at (DECL_SOURCE_LOCATION (decl),
d575725b
L
765 "address area attribute cannot be specified for "
766 "functions");
a32767e4
DM
767 *no_add_attrs = true;
768 break;
769
770 default:
29d08eba
JM
771 warning (OPT_Wattributes, "%qE attribute ignored",
772 name);
a32767e4
DM
773 *no_add_attrs = true;
774 break;
775 }
776
777 return NULL_TREE;
778}
779
30ed9d3d
TG
780/* Part of the low level implementation of DEC Ada pragma Common_Object which
781 enables the shared use of variables stored in overlaid linker areas
782 corresponding to the use of Fortran COMMON. */
783
784static tree
785ia64_vms_common_object_attribute (tree *node, tree name, tree args,
786 int flags ATTRIBUTE_UNUSED,
787 bool *no_add_attrs)
788{
789 tree decl = *node;
fe5798c0
TG
790 tree id;
791
792 gcc_assert (DECL_P (decl));
30ed9d3d
TG
793
794 DECL_COMMON (decl) = 1;
795 id = TREE_VALUE (args);
fe5798c0 796 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
30ed9d3d 797 {
fe5798c0 798 error ("%qE attribute requires a string constant argument", name);
30ed9d3d
TG
799 *no_add_attrs = true;
800 return NULL_TREE;
801 }
30ed9d3d
TG
802 return NULL_TREE;
803}
804
805/* Part of the low level implementation of DEC Ada pragma Common_Object. */
806
807void
808ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
809 unsigned HOST_WIDE_INT size,
810 unsigned int align)
811{
812 tree attr = DECL_ATTRIBUTES (decl);
813
fe5798c0 814 if (attr)
30ed9d3d 815 attr = lookup_attribute ("common_object", attr);
fe5798c0 816 if (attr)
30ed9d3d 817 {
fe5798c0
TG
818 tree id = TREE_VALUE (TREE_VALUE (attr));
819 const char *name;
30ed9d3d 820
fe5798c0
TG
821 if (TREE_CODE (id) == IDENTIFIER_NODE)
822 name = IDENTIFIER_POINTER (id);
823 else if (TREE_CODE (id) == STRING_CST)
824 name = TREE_STRING_POINTER (id);
825 else
826 abort ();
30ed9d3d 827
fe5798c0 828 fprintf (file, "\t.vms_common\t\"%s\",", name);
30ed9d3d 829 }
fe5798c0
TG
830 else
831 fprintf (file, "%s", COMMON_ASM_OP);
30ed9d3d 832
fe5798c0
TG
833 /* Code from elfos.h. */
834 assemble_name (file, name);
16998094 835 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
fe5798c0 836 size, align / BITS_PER_UNIT);
30ed9d3d 837
fe5798c0 838 fputc ('\n', file);
30ed9d3d
TG
839}
840
a32767e4
DM
841static void
842ia64_encode_addr_area (tree decl, rtx symbol)
843{
844 int flags;
845
846 flags = SYMBOL_REF_FLAGS (symbol);
847 switch (ia64_get_addr_area (decl))
848 {
849 case ADDR_AREA_NORMAL: break;
850 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
e820471b 851 default: gcc_unreachable ();
a32767e4
DM
852 }
853 SYMBOL_REF_FLAGS (symbol) = flags;
854}
855
856static void
857ia64_encode_section_info (tree decl, rtx rtl, int first)
858{
859 default_encode_section_info (decl, rtl, first);
860
2897f1d4 861 /* Careful not to prod global register variables. */
a32767e4 862 if (TREE_CODE (decl) == VAR_DECL
2897f1d4
L
863 && GET_CODE (DECL_RTL (decl)) == MEM
864 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
a32767e4
DM
865 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
866 ia64_encode_addr_area (decl, XEXP (rtl, 0));
867}
868\f
557b9df5
RH
869/* Return 1 if the operands of a move are ok. */
870
871int
9c808aad 872ia64_move_ok (rtx dst, rtx src)
557b9df5
RH
873{
874 /* If we're under init_recog_no_volatile, we'll not be able to use
875 memory_operand. So check the code directly and don't worry about
876 the validity of the underlying address, which should have been
877 checked elsewhere anyway. */
878 if (GET_CODE (dst) != MEM)
879 return 1;
880 if (GET_CODE (src) == MEM)
881 return 0;
882 if (register_operand (src, VOIDmode))
883 return 1;
884
885 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
886 if (INTEGRAL_MODE_P (GET_MODE (dst)))
887 return src == const0_rtx;
888 else
13f70342 889 return satisfies_constraint_G (src);
557b9df5 890}
9b7bf67d 891
a71aef0b
JB
892/* Return 1 if the operands are ok for a floating point load pair. */
893
894int
895ia64_load_pair_ok (rtx dst, rtx src)
896{
22be5918
EB
897 /* ??? There is a thinko in the implementation of the "x" constraint and the
898 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
899 also return false for it. */
900 if (GET_CODE (dst) != REG
901 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
a71aef0b
JB
902 return 0;
903 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
904 return 0;
905 switch (GET_CODE (XEXP (src, 0)))
906 {
907 case REG:
908 case POST_INC:
909 break;
910 case POST_DEC:
911 return 0;
912 case POST_MODIFY:
913 {
914 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
915
916 if (GET_CODE (adjust) != CONST_INT
917 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
918 return 0;
919 }
920 break;
921 default:
922 abort ();
923 }
924 return 1;
925}
926
08744705 927int
9c808aad 928addp4_optimize_ok (rtx op1, rtx op2)
08744705 929{
08744705
SE
930 return (basereg_operand (op1, GET_MODE(op1)) !=
931 basereg_operand (op2, GET_MODE(op2)));
932}
933
9e4f94de 934/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
935 Return the length of the field, or <= 0 on failure. */
936
937int
9c808aad 938ia64_depz_field_mask (rtx rop, rtx rshift)
041f25e6
RH
939{
940 unsigned HOST_WIDE_INT op = INTVAL (rop);
941 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
942
943 /* Get rid of the zero bits we're shifting in. */
944 op >>= shift;
945
946 /* We must now have a solid block of 1's at bit 0. */
947 return exact_log2 (op + 1);
948}
949
5e6c8b64
RH
950/* Return the TLS model to use for ADDR. */
951
952static enum tls_model
953tls_symbolic_operand_type (rtx addr)
954{
81f40b79 955 enum tls_model tls_kind = TLS_MODEL_NONE;
5e6c8b64
RH
956
957 if (GET_CODE (addr) == CONST)
958 {
959 if (GET_CODE (XEXP (addr, 0)) == PLUS
960 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
961 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
962 }
963 else if (GET_CODE (addr) == SYMBOL_REF)
964 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
965
966 return tls_kind;
967}
968
903a9601
AS
969/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
970 as a base register. */
971
972static inline bool
973ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
974{
975 if (strict
976 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
977 return true;
978 else if (!strict
979 && (GENERAL_REGNO_P (REGNO (reg))
980 || !HARD_REGISTER_P (reg)))
981 return true;
982 else
983 return false;
984}
985
986static bool
987ia64_legitimate_address_reg (const_rtx reg, bool strict)
988{
989 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
990 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
991 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
992 return true;
993
994 return false;
995}
996
997static bool
998ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
999{
1000 if (GET_CODE (disp) == PLUS
1001 && rtx_equal_p (reg, XEXP (disp, 0))
1002 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1003 || (CONST_INT_P (XEXP (disp, 1))
1004 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1005 return true;
1006
1007 return false;
1008}
1009
1010/* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1011
1012static bool
ef4bddc2 1013ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
903a9601
AS
1014 rtx x, bool strict)
1015{
1016 if (ia64_legitimate_address_reg (x, strict))
1017 return true;
1018 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1019 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1020 && XEXP (x, 0) != arg_pointer_rtx)
1021 return true;
1022 else if (GET_CODE (x) == POST_MODIFY
1023 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1024 && XEXP (x, 0) != arg_pointer_rtx
1025 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1026 return true;
1027 else
1028 return false;
1029}
1030
5e6c8b64
RH
1031/* Return true if X is a constant that is valid for some immediate
1032 field in an instruction. */
1033
1a627b35 1034static bool
ef4bddc2 1035ia64_legitimate_constant_p (machine_mode mode, rtx x)
5e6c8b64
RH
1036{
1037 switch (GET_CODE (x))
1038 {
1039 case CONST_INT:
1040 case LABEL_REF:
1041 return true;
1042
1043 case CONST_DOUBLE:
1a627b35 1044 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
5e6c8b64 1045 return true;
13f70342 1046 return satisfies_constraint_G (x);
5e6c8b64
RH
1047
1048 case CONST:
1049 case SYMBOL_REF:
d0970db2
JW
1050 /* ??? Short term workaround for PR 28490. We must make the code here
1051 match the code in ia64_expand_move and move_operand, even though they
1052 are both technically wrong. */
1053 if (tls_symbolic_operand_type (x) == 0)
1054 {
1055 HOST_WIDE_INT addend = 0;
1056 rtx op = x;
1057
1058 if (GET_CODE (op) == CONST
1059 && GET_CODE (XEXP (op, 0)) == PLUS
1060 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1061 {
1062 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1063 op = XEXP (XEXP (op, 0), 0);
1064 }
1065
1a627b35
RS
1066 if (any_offset_symbol_operand (op, mode)
1067 || function_operand (op, mode))
7ab62966 1068 return true;
1a627b35 1069 if (aligned_offset_symbol_operand (op, mode))
d0970db2
JW
1070 return (addend & 0x3fff) == 0;
1071 return false;
1072 }
1073 return false;
5e6c8b64 1074
b4e3537b 1075 case CONST_VECTOR:
1a627b35
RS
1076 if (mode == V2SFmode)
1077 return satisfies_constraint_Y (x);
b4e3537b 1078
1a627b35
RS
1079 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1080 && GET_MODE_SIZE (mode) <= 8);
b4e3537b 1081
5e6c8b64
RH
1082 default:
1083 return false;
1084 }
1085}
1086
1087/* Don't allow TLS addresses to get spilled to memory. */
1088
1089static bool
ef4bddc2 1090ia64_cannot_force_const_mem (machine_mode mode, rtx x)
5e6c8b64 1091{
fbbf66e7 1092 if (mode == RFmode)
103a6411 1093 return true;
5e6c8b64
RH
1094 return tls_symbolic_operand_type (x) != 0;
1095}
1096
9b7bf67d 1097/* Expand a symbolic constant load. */
9b7bf67d 1098
5e6c8b64 1099bool
9c808aad 1100ia64_expand_load_address (rtx dest, rtx src)
9b7bf67d 1101{
e820471b 1102 gcc_assert (GET_CODE (dest) == REG);
7b6e506e 1103
ae49d6e5
RH
1104 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1105 having to pointer-extend the value afterward. Other forms of address
1106 computation below are also more natural to compute as 64-bit quantities.
1107 If we've been given an SImode destination register, change it. */
1108 if (GET_MODE (dest) != Pmode)
38ae7651
RS
1109 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1110 byte_lowpart_offset (Pmode, GET_MODE (dest)));
ae49d6e5 1111
5e6c8b64
RH
1112 if (TARGET_NO_PIC)
1113 return false;
1114 if (small_addr_symbolic_operand (src, VOIDmode))
1115 return false;
1116
1117 if (TARGET_AUTO_PIC)
1118 emit_insn (gen_load_gprel64 (dest, src));
1cdbd630 1119 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
5e6c8b64 1120 emit_insn (gen_load_fptr (dest, src));
21515593 1121 else if (sdata_symbolic_operand (src, VOIDmode))
5e6c8b64
RH
1122 emit_insn (gen_load_gprel (dest, src));
1123 else
21515593 1124 {
5e6c8b64
RH
1125 HOST_WIDE_INT addend = 0;
1126 rtx tmp;
21515593 1127
5e6c8b64
RH
1128 /* We did split constant offsets in ia64_expand_move, and we did try
1129 to keep them split in move_operand, but we also allowed reload to
1130 rematerialize arbitrary constants rather than spill the value to
1131 the stack and reload it. So we have to be prepared here to split
1132 them apart again. */
1133 if (GET_CODE (src) == CONST)
1134 {
1135 HOST_WIDE_INT hi, lo;
9b7bf67d 1136
5e6c8b64
RH
1137 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1138 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1139 hi = hi - lo;
9b7bf67d 1140
5e6c8b64
RH
1141 if (lo != 0)
1142 {
1143 addend = lo;
0a81f074 1144 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
5e6c8b64
RH
1145 }
1146 }
ae49d6e5
RH
1147
1148 tmp = gen_rtx_HIGH (Pmode, src);
1149 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
f7df4a84 1150 emit_insn (gen_rtx_SET (dest, tmp));
ae49d6e5 1151
1f88caaa 1152 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
f7df4a84 1153 emit_insn (gen_rtx_SET (dest, tmp));
5e6c8b64
RH
1154
1155 if (addend)
1156 {
1157 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
f7df4a84 1158 emit_insn (gen_rtx_SET (dest, tmp));
5e6c8b64 1159 }
ae49d6e5 1160 }
5e6c8b64
RH
1161
1162 return true;
9b7bf67d 1163}
97e242b0 1164
e2500fed 1165static GTY(()) rtx gen_tls_tga;
7b6e506e 1166static rtx
9c808aad 1167gen_tls_get_addr (void)
7b6e506e 1168{
e2500fed 1169 if (!gen_tls_tga)
21515593 1170 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 1171 return gen_tls_tga;
7b6e506e
RH
1172}
1173
e2500fed 1174static GTY(()) rtx thread_pointer_rtx;
7b6e506e 1175static rtx
9c808aad 1176gen_thread_pointer (void)
7b6e506e 1177{
e2500fed 1178 if (!thread_pointer_rtx)
389fdba0 1179 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
135ca7b2 1180 return thread_pointer_rtx;
7b6e506e
RH
1181}
1182
21515593 1183static rtx
5e6c8b64 1184ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
b15b83fb 1185 rtx orig_op1, HOST_WIDE_INT addend)
21515593 1186{
dd3d2b35
DM
1187 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1188 rtx_insn *insns;
b15b83fb 1189 rtx orig_op0 = op0;
5e6c8b64
RH
1190 HOST_WIDE_INT addend_lo, addend_hi;
1191
21515593
RH
1192 switch (tls_kind)
1193 {
1194 case TLS_MODEL_GLOBAL_DYNAMIC:
1195 start_sequence ();
1196
1197 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1198 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1199
1200 tga_op2 = gen_reg_rtx (Pmode);
5e6c8b64 1201 emit_insn (gen_load_dtprel (tga_op2, op1));
9c808aad 1202
21515593
RH
1203 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1204 LCT_CONST, Pmode, 2, tga_op1,
1205 Pmode, tga_op2, Pmode);
1206
1207 insns = get_insns ();
1208 end_sequence ();
1209
0d433a6a
RH
1210 if (GET_MODE (op0) != Pmode)
1211 op0 = tga_ret;
21515593 1212 emit_libcall_block (insns, op0, tga_ret, op1);
0d433a6a 1213 break;
21515593
RH
1214
1215 case TLS_MODEL_LOCAL_DYNAMIC:
1216 /* ??? This isn't the completely proper way to do local-dynamic
1217 If the call to __tls_get_addr is used only by a single symbol,
1218 then we should (somehow) move the dtprel to the second arg
1219 to avoid the extra add. */
1220 start_sequence ();
1221
1222 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1223 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1224
1225 tga_op2 = const0_rtx;
1226
1227 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1228 LCT_CONST, Pmode, 2, tga_op1,
1229 Pmode, tga_op2, Pmode);
1230
1231 insns = get_insns ();
1232 end_sequence ();
1233
1234 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1235 UNSPEC_LD_BASE);
1236 tmp = gen_reg_rtx (Pmode);
1237 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1238
0d433a6a
RH
1239 if (!register_operand (op0, Pmode))
1240 op0 = gen_reg_rtx (Pmode);
21515593
RH
1241 if (TARGET_TLS64)
1242 {
0d433a6a
RH
1243 emit_insn (gen_load_dtprel (op0, op1));
1244 emit_insn (gen_adddi3 (op0, tmp, op0));
21515593
RH
1245 }
1246 else
5e6c8b64 1247 emit_insn (gen_add_dtprel (op0, op1, tmp));
0d433a6a 1248 break;
21515593
RH
1249
1250 case TLS_MODEL_INITIAL_EXEC:
b15b83fb
JJ
1251 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1252 addend_hi = addend - addend_lo;
1253
0a81f074 1254 op1 = plus_constant (Pmode, op1, addend_hi);
5e6c8b64
RH
1255 addend = addend_lo;
1256
21515593 1257 tmp = gen_reg_rtx (Pmode);
5e6c8b64 1258 emit_insn (gen_load_tprel (tmp, op1));
21515593 1259
0d433a6a
RH
1260 if (!register_operand (op0, Pmode))
1261 op0 = gen_reg_rtx (Pmode);
1262 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1263 break;
21515593
RH
1264
1265 case TLS_MODEL_LOCAL_EXEC:
0d433a6a
RH
1266 if (!register_operand (op0, Pmode))
1267 op0 = gen_reg_rtx (Pmode);
5e6c8b64
RH
1268
1269 op1 = orig_op1;
1270 addend = 0;
21515593
RH
1271 if (TARGET_TLS64)
1272 {
0d433a6a 1273 emit_insn (gen_load_tprel (op0, op1));
5e6c8b64 1274 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
21515593
RH
1275 }
1276 else
5e6c8b64 1277 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
0d433a6a 1278 break;
21515593
RH
1279
1280 default:
e820471b 1281 gcc_unreachable ();
21515593 1282 }
0d433a6a 1283
5e6c8b64
RH
1284 if (addend)
1285 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1286 orig_op0, 1, OPTAB_DIRECT);
0d433a6a
RH
1287 if (orig_op0 == op0)
1288 return NULL_RTX;
1289 if (GET_MODE (orig_op0) == Pmode)
1290 return op0;
1291 return gen_lowpart (GET_MODE (orig_op0), op0);
21515593
RH
1292}
1293
7b6e506e 1294rtx
9c808aad 1295ia64_expand_move (rtx op0, rtx op1)
7b6e506e 1296{
ef4bddc2 1297 machine_mode mode = GET_MODE (op0);
7b6e506e
RH
1298
1299 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1300 op1 = force_reg (mode, op1);
1301
21515593 1302 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e 1303 {
5e6c8b64 1304 HOST_WIDE_INT addend = 0;
7b6e506e 1305 enum tls_model tls_kind;
5e6c8b64
RH
1306 rtx sym = op1;
1307
1308 if (GET_CODE (op1) == CONST
1309 && GET_CODE (XEXP (op1, 0)) == PLUS
1310 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1311 {
1312 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1313 sym = XEXP (XEXP (op1, 0), 0);
1314 }
1315
1316 tls_kind = tls_symbolic_operand_type (sym);
1317 if (tls_kind)
b15b83fb 1318 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
5e6c8b64
RH
1319
1320 if (any_offset_symbol_operand (sym, mode))
1321 addend = 0;
1322 else if (aligned_offset_symbol_operand (sym, mode))
1323 {
1324 HOST_WIDE_INT addend_lo, addend_hi;
1325
1326 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1327 addend_hi = addend - addend_lo;
1328
1329 if (addend_lo != 0)
1330 {
0a81f074 1331 op1 = plus_constant (mode, sym, addend_hi);
5e6c8b64
RH
1332 addend = addend_lo;
1333 }
21e43850
L
1334 else
1335 addend = 0;
5e6c8b64
RH
1336 }
1337 else
1338 op1 = sym;
1339
1340 if (reload_completed)
1341 {
1342 /* We really should have taken care of this offset earlier. */
1343 gcc_assert (addend == 0);
1344 if (ia64_expand_load_address (op0, op1))
1345 return NULL_RTX;
1346 }
21515593 1347
5e6c8b64 1348 if (addend)
7b6e506e 1349 {
b3a13419 1350 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
5e6c8b64 1351
f7df4a84 1352 emit_insn (gen_rtx_SET (subtarget, op1));
5e6c8b64
RH
1353
1354 op1 = expand_simple_binop (mode, PLUS, subtarget,
1355 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1356 if (op0 == op1)
1357 return NULL_RTX;
7b6e506e
RH
1358 }
1359 }
1360
1361 return op1;
1362}
1363
21515593
RH
1364/* Split a move from OP1 to OP0 conditional on COND. */
1365
1366void
9c808aad 1367ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
21515593 1368{
dd3d2b35 1369 rtx_insn *insn, *first = get_last_insn ();
21515593
RH
1370
1371 emit_move_insn (op0, op1);
1372
1373 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1374 if (INSN_P (insn))
1375 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1376 PATTERN (insn));
1377}
1378
f57fc998 1379/* Split a post-reload TImode or TFmode reference into two DImode
2ffe0e02
ZW
1380 components. This is made extra difficult by the fact that we do
1381 not get any scratch registers to work with, because reload cannot
1382 be prevented from giving us a scratch that overlaps the register
1383 pair involved. So instead, when addressing memory, we tweak the
1384 pointer register up and back down with POST_INCs. Or up and not
1385 back down when we can get away with it.
1386
1387 REVERSED is true when the loads must be done in reversed order
1388 (high word first) for correctness. DEAD is true when the pointer
1389 dies with the second insn we generate and therefore the second
1390 address must not carry a postmodify.
1391
1392 May return an insn which is to be emitted after the moves. */
3f622353 1393
f57fc998 1394static rtx
2ffe0e02 1395ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
3f622353 1396{
2ffe0e02
ZW
1397 rtx fixup = 0;
1398
3f622353
RH
1399 switch (GET_CODE (in))
1400 {
1401 case REG:
2ffe0e02
ZW
1402 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1403 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1404 break;
3f622353
RH
1405
1406 case CONST_INT:
1407 case CONST_DOUBLE:
2ffe0e02 1408 /* Cannot occur reversed. */
e820471b 1409 gcc_assert (!reversed);
2ffe0e02 1410
f57fc998
ZW
1411 if (GET_MODE (in) != TFmode)
1412 split_double (in, &out[0], &out[1]);
1413 else
1414 /* split_double does not understand how to split a TFmode
1415 quantity into a pair of DImode constants. */
1416 {
1417 REAL_VALUE_TYPE r;
1418 unsigned HOST_WIDE_INT p[2];
1419 long l[4]; /* TFmode is 128 bits */
1420
1421 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1422 real_to_target (l, &r, TFmode);
1423
1424 if (FLOAT_WORDS_BIG_ENDIAN)
1425 {
1426 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1427 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1428 }
1429 else
1430 {
9eb578c8
L
1431 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1432 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
f57fc998
ZW
1433 }
1434 out[0] = GEN_INT (p[0]);
1435 out[1] = GEN_INT (p[1]);
1436 }
2ffe0e02
ZW
1437 break;
1438
1439 case MEM:
1440 {
1441 rtx base = XEXP (in, 0);
1442 rtx offset;
1443
1444 switch (GET_CODE (base))
1445 {
1446 case REG:
1447 if (!reversed)
1448 {
1449 out[0] = adjust_automodify_address
1450 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1451 out[1] = adjust_automodify_address
1452 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1453 }
1454 else
1455 {
1456 /* Reversal requires a pre-increment, which can only
1457 be done as a separate insn. */
1458 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1459 out[0] = adjust_automodify_address
1460 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1461 out[1] = adjust_address (in, DImode, 0);
1462 }
1463 break;
1464
1465 case POST_INC:
e820471b
NS
1466 gcc_assert (!reversed && !dead);
1467
2ffe0e02
ZW
1468 /* Just do the increment in two steps. */
1469 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1470 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1471 break;
1472
1473 case POST_DEC:
e820471b
NS
1474 gcc_assert (!reversed && !dead);
1475
2ffe0e02
ZW
1476 /* Add 8, subtract 24. */
1477 base = XEXP (base, 0);
1478 out[0] = adjust_automodify_address
1479 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1480 out[1] = adjust_automodify_address
1481 (in, DImode,
0a81f074
RS
1482 gen_rtx_POST_MODIFY (Pmode, base,
1483 plus_constant (Pmode, base, -24)),
2ffe0e02
ZW
1484 8);
1485 break;
1486
1487 case POST_MODIFY:
e820471b
NS
1488 gcc_assert (!reversed && !dead);
1489
2ffe0e02
ZW
1490 /* Extract and adjust the modification. This case is
1491 trickier than the others, because we might have an
1492 index register, or we might have a combined offset that
1493 doesn't fit a signed 9-bit displacement field. We can
1494 assume the incoming expression is already legitimate. */
1495 offset = XEXP (base, 1);
1496 base = XEXP (base, 0);
1497
1498 out[0] = adjust_automodify_address
1499 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1500
1501 if (GET_CODE (XEXP (offset, 1)) == REG)
1502 {
1503 /* Can't adjust the postmodify to match. Emit the
1504 original, then a separate addition insn. */
1505 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1506 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1507 }
2ffe0e02
ZW
1508 else
1509 {
e820471b
NS
1510 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1511 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1512 {
1513 /* Again the postmodify cannot be made to match,
1514 but in this case it's more efficient to get rid
1515 of the postmodify entirely and fix up with an
1516 add insn. */
1517 out[1] = adjust_automodify_address (in, DImode, base, 8);
1518 fixup = gen_adddi3
1519 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1520 }
1521 else
1522 {
1523 /* Combined offset still fits in the displacement field.
1524 (We cannot overflow it at the high end.) */
1525 out[1] = adjust_automodify_address
1526 (in, DImode, gen_rtx_POST_MODIFY
1527 (Pmode, base, gen_rtx_PLUS
1528 (Pmode, base,
1529 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1530 8);
1531 }
2ffe0e02
ZW
1532 }
1533 break;
1534
1535 default:
e820471b 1536 gcc_unreachable ();
2ffe0e02
ZW
1537 }
1538 break;
1539 }
3f622353
RH
1540
1541 default:
e820471b 1542 gcc_unreachable ();
3f622353 1543 }
2ffe0e02
ZW
1544
1545 return fixup;
3f622353
RH
1546}
1547
f57fc998
ZW
1548/* Split a TImode or TFmode move instruction after reload.
1549 This is used by *movtf_internal and *movti_internal. */
1550void
1551ia64_split_tmode_move (rtx operands[])
1552{
2ffe0e02
ZW
1553 rtx in[2], out[2], insn;
1554 rtx fixup[2];
1555 bool dead = false;
1556 bool reversed = false;
1557
1558 /* It is possible for reload to decide to overwrite a pointer with
1559 the value it points to. In that case we have to do the loads in
1560 the appropriate order so that the pointer is not destroyed too
1561 early. Also we must not generate a postmodify for that second
6d3f673c
KY
1562 load, or rws_access_regno will die. And we must not generate a
1563 postmodify for the second load if the destination register
1564 overlaps with the base register. */
2ffe0e02
ZW
1565 if (GET_CODE (operands[1]) == MEM
1566 && reg_overlap_mentioned_p (operands[0], operands[1]))
f57fc998 1567 {
2ffe0e02
ZW
1568 rtx base = XEXP (operands[1], 0);
1569 while (GET_CODE (base) != REG)
1570 base = XEXP (base, 0);
f57fc998 1571
2ffe0e02 1572 if (REGNO (base) == REGNO (operands[0]))
6d3f673c 1573 reversed = true;
2430d1e2 1574
6d3f673c
KY
1575 if (refers_to_regno_p (REGNO (operands[0]),
1576 REGNO (operands[0])+2,
1577 base, 0))
2430d1e2 1578 dead = true;
2ffe0e02
ZW
1579 }
1580 /* Another reason to do the moves in reversed order is if the first
1581 element of the target register pair is also the second element of
1582 the source register pair. */
1583 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1584 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1585 reversed = true;
1586
1587 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1588 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1589
1590#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1591 if (GET_CODE (EXP) == MEM \
1592 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1593 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1594 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
bbbbb16a 1595 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
2ffe0e02 1596
f7df4a84 1597 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
2ffe0e02
ZW
1598 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1599 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1600
f7df4a84 1601 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
2ffe0e02
ZW
1602 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1603 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1604
1605 if (fixup[0])
1606 emit_insn (fixup[0]);
1607 if (fixup[1])
1608 emit_insn (fixup[1]);
1609
1610#undef MAYBE_ADD_REG_INC_NOTE
f57fc998
ZW
1611}
1612
02befdf4 1613/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
3f622353
RH
1614 through memory plus an extra GR scratch register. Except that you can
1615 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1616 SECONDARY_RELOAD_CLASS, but not both.
1617
1618 We got into problems in the first place by allowing a construct like
02befdf4 1619 (subreg:XF (reg:TI)), which we got from a union containing a long double.
f5143c46 1620 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1621 we see something like the above, we spill the inner register to memory. */
1622
4de67c26 1623static rtx
ef4bddc2 1624spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
3f622353
RH
1625{
1626 if (GET_CODE (in) == SUBREG
1627 && GET_MODE (SUBREG_REG (in)) == TImode
1628 && GET_CODE (SUBREG_REG (in)) == REG)
1629 {
9474e8ab 1630 rtx memt = assign_stack_temp (TImode, 16);
68d22aa5 1631 emit_move_insn (memt, SUBREG_REG (in));
4de67c26 1632 return adjust_address (memt, mode, 0);
3f622353
RH
1633 }
1634 else if (force && GET_CODE (in) == REG)
1635 {
9474e8ab 1636 rtx memx = assign_stack_temp (mode, 16);
68d22aa5
RH
1637 emit_move_insn (memx, in);
1638 return memx;
3f622353 1639 }
3f622353
RH
1640 else
1641 return in;
1642}
f2f90c63 1643
4de67c26
JM
1644/* Expand the movxf or movrf pattern (MODE says which) with the given
1645 OPERANDS, returning true if the pattern should then invoke
1646 DONE. */
1647
1648bool
ef4bddc2 1649ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
4de67c26
JM
1650{
1651 rtx op0 = operands[0];
1652
1653 if (GET_CODE (op0) == SUBREG)
1654 op0 = SUBREG_REG (op0);
1655
1656 /* We must support XFmode loads into general registers for stdarg/vararg,
1657 unprototyped calls, and a rare case where a long double is passed as
1658 an argument after a float HFA fills the FP registers. We split them into
1659 DImode loads for convenience. We also need to support XFmode stores
1660 for the last case. This case does not happen for stdarg/vararg routines,
1661 because we do a block store to memory of unnamed arguments. */
1662
1663 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1664 {
1665 rtx out[2];
1666
1667 /* We're hoping to transform everything that deals with XFmode
1668 quantities and GR registers early in the compiler. */
b3a13419 1669 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1670
1671 /* Struct to register can just use TImode instead. */
1672 if ((GET_CODE (operands[1]) == SUBREG
1673 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1674 || (GET_CODE (operands[1]) == REG
1675 && GR_REGNO_P (REGNO (operands[1]))))
1676 {
1677 rtx op1 = operands[1];
1678
1679 if (GET_CODE (op1) == SUBREG)
1680 op1 = SUBREG_REG (op1);
1681 else
1682 op1 = gen_rtx_REG (TImode, REGNO (op1));
1683
1684 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1685 return true;
1686 }
1687
1688 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1689 {
ae4d3291 1690 /* Don't word-swap when reading in the constant. */
4de67c26 1691 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
ae4d3291
JW
1692 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1693 0, mode));
4de67c26 1694 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
ae4d3291
JW
1695 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1696 0, mode));
4de67c26
JM
1697 return true;
1698 }
1699
1700 /* If the quantity is in a register not known to be GR, spill it. */
1701 if (register_operand (operands[1], mode))
1702 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1703
1704 gcc_assert (GET_CODE (operands[1]) == MEM);
1705
ae4d3291
JW
1706 /* Don't word-swap when reading in the value. */
1707 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1708 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
4de67c26
JM
1709
1710 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1711 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1712 return true;
1713 }
1714
1715 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1716 {
1717 /* We're hoping to transform everything that deals with XFmode
1718 quantities and GR registers early in the compiler. */
b3a13419 1719 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1720
1721 /* Op0 can't be a GR_REG here, as that case is handled above.
1722 If op0 is a register, then we spill op1, so that we now have a
1723 MEM operand. This requires creating an XFmode subreg of a TImode reg
1724 to force the spill. */
1725 if (register_operand (operands[0], mode))
1726 {
1727 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1728 op1 = gen_rtx_SUBREG (mode, op1, 0);
1729 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1730 }
1731
1732 else
1733 {
1734 rtx in[2];
1735
ae4d3291
JW
1736 gcc_assert (GET_CODE (operands[0]) == MEM);
1737
1738 /* Don't word-swap when writing out the value. */
1739 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1740 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
4de67c26
JM
1741
1742 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1743 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1744 return true;
1745 }
1746 }
1747
1748 if (!reload_in_progress && !reload_completed)
1749 {
1750 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1751
1752 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1753 {
1754 rtx memt, memx, in = operands[1];
1755 if (CONSTANT_P (in))
1756 in = validize_mem (force_const_mem (mode, in));
1757 if (GET_CODE (in) == MEM)
1758 memt = adjust_address (in, TImode, 0);
1759 else
1760 {
9474e8ab 1761 memt = assign_stack_temp (TImode, 16);
4de67c26
JM
1762 memx = adjust_address (memt, mode, 0);
1763 emit_move_insn (memx, in);
1764 }
1765 emit_move_insn (op0, memt);
1766 return true;
1767 }
1768
1769 if (!ia64_move_ok (operands[0], operands[1]))
1770 operands[1] = force_reg (mode, operands[1]);
1771 }
1772
1773 return false;
1774}
1775
f90b7a5a
PB
1776/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1777 with the expression that holds the compare result (in VOIDmode). */
f2f90c63 1778
24ea7948
ZW
1779static GTY(()) rtx cmptf_libfunc;
1780
f90b7a5a
PB
1781void
1782ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
f2f90c63 1783{
f90b7a5a 1784 enum rtx_code code = GET_CODE (*expr);
f2f90c63
RH
1785 rtx cmp;
1786
1787 /* If we have a BImode input, then we already have a compare result, and
1788 do not need to emit another comparison. */
f90b7a5a 1789 if (GET_MODE (*op0) == BImode)
f2f90c63 1790 {
f90b7a5a
PB
1791 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1792 cmp = *op0;
f2f90c63 1793 }
24ea7948
ZW
1794 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1795 magic number as its third argument, that indicates what to do.
1796 The return value is an integer to be compared against zero. */
f90b7a5a 1797 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
24ea7948
ZW
1798 {
1799 enum qfcmp_magic {
8fc53a5f 1800 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
24ea7948
ZW
1801 QCMP_UNORD = 2,
1802 QCMP_EQ = 4,
1803 QCMP_LT = 8,
1804 QCMP_GT = 16
32e8bb8e
ILT
1805 };
1806 int magic;
24ea7948
ZW
1807 enum rtx_code ncode;
1808 rtx ret, insns;
e820471b 1809
f90b7a5a 1810 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
24ea7948
ZW
1811 switch (code)
1812 {
1813 /* 1 = equal, 0 = not equal. Equality operators do
8fc53a5f 1814 not raise FP_INVALID when given a NaN operand. */
24ea7948
ZW
1815 case EQ: magic = QCMP_EQ; ncode = NE; break;
1816 case NE: magic = QCMP_EQ; ncode = EQ; break;
1817 /* isunordered() from C99. */
1818 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
b1346fa3 1819 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
24ea7948 1820 /* Relational operators raise FP_INVALID when given
8fc53a5f 1821 a NaN operand. */
24ea7948
ZW
1822 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1823 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1824 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1825 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
8fc53a5f
EB
1826 /* Unordered relational operators do not raise FP_INVALID
1827 when given a NaN operand. */
1828 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1829 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1830 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1831 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1832 /* Not supported. */
1833 case UNEQ:
1834 case LTGT:
e820471b 1835 default: gcc_unreachable ();
24ea7948
ZW
1836 }
1837
1838 start_sequence ();
1839
1840 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
f90b7a5a 1841 *op0, TFmode, *op1, TFmode,
24ea7948
ZW
1842 GEN_INT (magic), DImode);
1843 cmp = gen_reg_rtx (BImode);
f7df4a84
RS
1844 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1845 ret, const0_rtx)));
24ea7948
ZW
1846
1847 insns = get_insns ();
1848 end_sequence ();
1849
1850 emit_libcall_block (insns, cmp, cmp,
f90b7a5a 1851 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
24ea7948
ZW
1852 code = NE;
1853 }
f2f90c63
RH
1854 else
1855 {
1856 cmp = gen_reg_rtx (BImode);
f7df4a84 1857 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
f2f90c63
RH
1858 code = NE;
1859 }
1860
f90b7a5a
PB
1861 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1862 *op0 = cmp;
1863 *op1 = const0_rtx;
f2f90c63 1864}
2ed4af6f 1865
e934ca47
RH
1866/* Generate an integral vector comparison. Return true if the condition has
1867 been reversed, and so the sense of the comparison should be inverted. */
f61134e8
RH
1868
1869static bool
ef4bddc2 1870ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
f61134e8
RH
1871 rtx dest, rtx op0, rtx op1)
1872{
1873 bool negate = false;
1874 rtx x;
1875
e934ca47 1876 /* Canonicalize the comparison to EQ, GT, GTU. */
f61134e8
RH
1877 switch (code)
1878 {
1879 case EQ:
1880 case GT:
e934ca47 1881 case GTU:
f61134e8
RH
1882 break;
1883
1884 case NE:
f61134e8 1885 case LE:
e934ca47
RH
1886 case LEU:
1887 code = reverse_condition (code);
f61134e8
RH
1888 negate = true;
1889 break;
1890
1891 case GE:
e934ca47
RH
1892 case GEU:
1893 code = reverse_condition (code);
f61134e8
RH
1894 negate = true;
1895 /* FALLTHRU */
1896
1897 case LT:
f61134e8 1898 case LTU:
e934ca47
RH
1899 code = swap_condition (code);
1900 x = op0, op0 = op1, op1 = x;
1901 break;
f61134e8 1902
e934ca47
RH
1903 default:
1904 gcc_unreachable ();
1905 }
f61134e8 1906
e934ca47 1907 /* Unsigned parallel compare is not supported by the hardware. Play some
6283ba26 1908 tricks to turn this into a signed comparison against 0. */
e934ca47
RH
1909 if (code == GTU)
1910 {
1911 switch (mode)
1912 {
1913 case V2SImode:
f61134e8 1914 {
e934ca47
RH
1915 rtx t1, t2, mask;
1916
9540f5ef
SE
1917 /* Subtract (-(INT MAX) - 1) from both operands to make
1918 them signed. */
1919 mask = GEN_INT (0x80000000);
e934ca47 1920 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
9540f5ef
SE
1921 mask = force_reg (mode, mask);
1922 t1 = gen_reg_rtx (mode);
1923 emit_insn (gen_subv2si3 (t1, op0, mask));
1924 t2 = gen_reg_rtx (mode);
1925 emit_insn (gen_subv2si3 (t2, op1, mask));
1926 op0 = t1;
1927 op1 = t2;
6283ba26 1928 code = GT;
f61134e8 1929 }
e934ca47
RH
1930 break;
1931
1932 case V8QImode:
1933 case V4HImode:
1934 /* Perform a parallel unsigned saturating subtraction. */
1935 x = gen_reg_rtx (mode);
f7df4a84 1936 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
6283ba26
RH
1937
1938 code = EQ;
1939 op0 = x;
1940 op1 = CONST0_RTX (mode);
1941 negate = !negate;
e934ca47
RH
1942 break;
1943
1944 default:
1945 gcc_unreachable ();
1946 }
f61134e8
RH
1947 }
1948
1949 x = gen_rtx_fmt_ee (code, mode, op0, op1);
f7df4a84 1950 emit_insn (gen_rtx_SET (dest, x));
f61134e8
RH
1951
1952 return negate;
1953}
1954
f61134e8
RH
1955/* Emit an integral vector conditional move. */
1956
1957void
1958ia64_expand_vecint_cmov (rtx operands[])
1959{
ef4bddc2 1960 machine_mode mode = GET_MODE (operands[0]);
f61134e8
RH
1961 enum rtx_code code = GET_CODE (operands[3]);
1962 bool negate;
1963 rtx cmp, x, ot, of;
1964
f61134e8
RH
1965 cmp = gen_reg_rtx (mode);
1966 negate = ia64_expand_vecint_compare (code, mode, cmp,
1967 operands[4], operands[5]);
1968
1969 ot = operands[1+negate];
1970 of = operands[2-negate];
1971
1972 if (ot == CONST0_RTX (mode))
1973 {
1974 if (of == CONST0_RTX (mode))
1975 {
1976 emit_move_insn (operands[0], ot);
1977 return;
1978 }
1979
1980 x = gen_rtx_NOT (mode, cmp);
1981 x = gen_rtx_AND (mode, x, of);
f7df4a84 1982 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
1983 }
1984 else if (of == CONST0_RTX (mode))
1985 {
1986 x = gen_rtx_AND (mode, cmp, ot);
f7df4a84 1987 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
1988 }
1989 else
1990 {
1991 rtx t, f;
1992
1993 t = gen_reg_rtx (mode);
1994 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
f7df4a84 1995 emit_insn (gen_rtx_SET (t, x));
f61134e8
RH
1996
1997 f = gen_reg_rtx (mode);
1998 x = gen_rtx_NOT (mode, cmp);
1999 x = gen_rtx_AND (mode, x, operands[2-negate]);
f7df4a84 2000 emit_insn (gen_rtx_SET (f, x));
f61134e8
RH
2001
2002 x = gen_rtx_IOR (mode, t, f);
f7df4a84 2003 emit_insn (gen_rtx_SET (operands[0], x));
f61134e8
RH
2004 }
2005}
2006
2007/* Emit an integral vector min or max operation. Return true if all done. */
2008
2009bool
ef4bddc2 2010ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
f61134e8
RH
2011 rtx operands[])
2012{
cabddb23 2013 rtx xops[6];
f61134e8
RH
2014
2015 /* These four combinations are supported directly. */
2016 if (mode == V8QImode && (code == UMIN || code == UMAX))
2017 return false;
2018 if (mode == V4HImode && (code == SMIN || code == SMAX))
2019 return false;
2020
93b4080b
RH
2021 /* This combination can be implemented with only saturating subtraction. */
2022 if (mode == V4HImode && code == UMAX)
2023 {
2024 rtx x, tmp = gen_reg_rtx (mode);
2025
2026 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
f7df4a84 2027 emit_insn (gen_rtx_SET (tmp, x));
93b4080b
RH
2028
2029 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2030 return true;
2031 }
2032
f61134e8
RH
2033 /* Everything else implemented via vector comparisons. */
2034 xops[0] = operands[0];
2035 xops[4] = xops[1] = operands[1];
2036 xops[5] = xops[2] = operands[2];
2037
2038 switch (code)
2039 {
2040 case UMIN:
2041 code = LTU;
2042 break;
2043 case UMAX:
2044 code = GTU;
2045 break;
2046 case SMIN:
2047 code = LT;
2048 break;
2049 case SMAX:
2050 code = GT;
2051 break;
2052 default:
e820471b 2053 gcc_unreachable ();
f61134e8
RH
2054 }
2055 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2056
2057 ia64_expand_vecint_cmov (xops);
2058 return true;
2059}
2060
55eaaa5b
RH
2061/* The vectors LO and HI each contain N halves of a double-wide vector.
2062 Reassemble either the first N/2 or the second N/2 elements. */
604e3ff3
RH
2063
2064void
55eaaa5b 2065ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
604e3ff3 2066{
ef4bddc2 2067 machine_mode vmode = GET_MODE (lo);
e6431744
RH
2068 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2069 struct expand_vec_perm_d d;
2070 bool ok;
604e3ff3 2071
e6431744
RH
2072 d.target = gen_lowpart (vmode, out);
2073 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2074 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2075 d.vmode = vmode;
2076 d.nelt = nelt;
2077 d.one_operand_p = false;
2078 d.testing_p = false;
2079
2080 high = (highp ? nelt / 2 : 0);
2081 for (i = 0; i < nelt / 2; ++i)
604e3ff3 2082 {
e6431744
RH
2083 d.perm[i * 2] = i + high;
2084 d.perm[i * 2 + 1] = i + high + nelt;
604e3ff3
RH
2085 }
2086
e6431744
RH
2087 ok = ia64_expand_vec_perm_const_1 (&d);
2088 gcc_assert (ok);
604e3ff3
RH
2089}
2090
55eaaa5b 2091/* Return a vector of the sign-extension of VEC. */
e898620c 2092
55eaaa5b
RH
2093static rtx
2094ia64_unpack_sign (rtx vec, bool unsignedp)
e898620c 2095{
ef4bddc2 2096 machine_mode mode = GET_MODE (vec);
55eaaa5b 2097 rtx zero = CONST0_RTX (mode);
e898620c 2098
e898620c 2099 if (unsignedp)
55eaaa5b 2100 return zero;
e898620c
RH
2101 else
2102 {
55eaaa5b 2103 rtx sign = gen_reg_rtx (mode);
e898620c
RH
2104 bool neg;
2105
55eaaa5b 2106 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
e898620c 2107 gcc_assert (!neg);
55eaaa5b
RH
2108
2109 return sign;
e898620c 2110 }
55eaaa5b 2111}
e898620c 2112
55eaaa5b 2113/* Emit an integral vector unpack operation. */
e898620c 2114
55eaaa5b
RH
2115void
2116ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2117{
2118 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2119 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
e898620c
RH
2120}
2121
55eaaa5b
RH
2122/* Emit an integral vector widening sum operations. */
2123
604e3ff3 2124void
55eaaa5b 2125ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
604e3ff3 2126{
ef4bddc2 2127 machine_mode wmode;
55eaaa5b 2128 rtx l, h, t, sign;
604e3ff3 2129
55eaaa5b
RH
2130 sign = ia64_unpack_sign (operands[1], unsignedp);
2131
2132 wmode = GET_MODE (operands[0]);
2133 l = gen_reg_rtx (wmode);
2134 h = gen_reg_rtx (wmode);
604e3ff3 2135
55eaaa5b
RH
2136 ia64_unpack_assemble (l, operands[1], sign, false);
2137 ia64_unpack_assemble (h, operands[1], sign, true);
604e3ff3 2138
55eaaa5b
RH
2139 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2140 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2141 if (t != operands[0])
2142 emit_move_insn (operands[0], t);
604e3ff3
RH
2143}
2144
2ed4af6f
RH
2145/* Emit the appropriate sequence for a call. */
2146
2147void
9c808aad
AJ
2148ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2149 int sibcall_p)
2ed4af6f 2150{
599aedd9 2151 rtx insn, b0;
2ed4af6f
RH
2152
2153 addr = XEXP (addr, 0);
c8083186 2154 addr = convert_memory_address (DImode, addr);
2ed4af6f 2155 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 2156
599aedd9 2157 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
2158 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2159 {
2160 if (sibcall_p)
599aedd9 2161 insn = gen_sibcall_nogp (addr);
2ed4af6f 2162 else if (! retval)
599aedd9 2163 insn = gen_call_nogp (addr, b0);
2ed4af6f 2164 else
599aedd9
RH
2165 insn = gen_call_value_nogp (retval, addr, b0);
2166 insn = emit_call_insn (insn);
2ed4af6f 2167 }
2ed4af6f 2168 else
599aedd9
RH
2169 {
2170 if (sibcall_p)
2171 insn = gen_sibcall_gp (addr);
2172 else if (! retval)
2173 insn = gen_call_gp (addr, b0);
2174 else
2175 insn = gen_call_value_gp (retval, addr, b0);
2176 insn = emit_call_insn (insn);
2ed4af6f 2177
599aedd9
RH
2178 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2179 }
6dad5a56 2180
599aedd9 2181 if (sibcall_p)
4e14f1f9 2182 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
f2972bf8
DR
2183
2184 if (TARGET_ABI_OPEN_VMS)
2185 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2186 gen_rtx_REG (DImode, GR_REG (25)));
599aedd9
RH
2187}
2188
6fb5fa3c
DB
2189static void
2190reg_emitted (enum ia64_frame_regs r)
2191{
2192 if (emitted_frame_related_regs[r] == 0)
2193 emitted_frame_related_regs[r] = current_frame_info.r[r];
2194 else
2195 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2196}
2197
2198static int
2199get_reg (enum ia64_frame_regs r)
2200{
2201 reg_emitted (r);
2202 return current_frame_info.r[r];
2203}
2204
2205static bool
2206is_emitted (int regno)
2207{
09639a83 2208 unsigned int r;
6fb5fa3c
DB
2209
2210 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2211 if (emitted_frame_related_regs[r] == regno)
2212 return true;
2213 return false;
2214}
2215
599aedd9 2216void
9c808aad 2217ia64_reload_gp (void)
599aedd9
RH
2218{
2219 rtx tmp;
2220
6fb5fa3c
DB
2221 if (current_frame_info.r[reg_save_gp])
2222 {
2223 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2224 }
2ed4af6f 2225 else
599aedd9
RH
2226 {
2227 HOST_WIDE_INT offset;
13f70342 2228 rtx offset_r;
599aedd9
RH
2229
2230 offset = (current_frame_info.spill_cfa_off
2231 + current_frame_info.spill_size);
2232 if (frame_pointer_needed)
2233 {
2234 tmp = hard_frame_pointer_rtx;
2235 offset = -offset;
2236 }
2237 else
2238 {
2239 tmp = stack_pointer_rtx;
2240 offset = current_frame_info.total_size - offset;
2241 }
2242
13f70342
RH
2243 offset_r = GEN_INT (offset);
2244 if (satisfies_constraint_I (offset_r))
2245 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
599aedd9
RH
2246 else
2247 {
13f70342 2248 emit_move_insn (pic_offset_table_rtx, offset_r);
599aedd9
RH
2249 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2250 pic_offset_table_rtx, tmp));
2251 }
2252
2253 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2254 }
2255
2256 emit_move_insn (pic_offset_table_rtx, tmp);
2257}
2258
2259void
9c808aad
AJ
2260ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2261 rtx scratch_b, int noreturn_p, int sibcall_p)
599aedd9
RH
2262{
2263 rtx insn;
2264 bool is_desc = false;
2265
2266 /* If we find we're calling through a register, then we're actually
2267 calling through a descriptor, so load up the values. */
4e14f1f9 2268 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
599aedd9
RH
2269 {
2270 rtx tmp;
2271 bool addr_dead_p;
2272
2273 /* ??? We are currently constrained to *not* use peep2, because
2a43945f 2274 we can legitimately change the global lifetime of the GP
9c808aad 2275 (in the form of killing where previously live). This is
599aedd9
RH
2276 because a call through a descriptor doesn't use the previous
2277 value of the GP, while a direct call does, and we do not
2278 commit to either form until the split here.
2279
2280 That said, this means that we lack precise life info for
2281 whether ADDR is dead after this call. This is not terribly
2282 important, since we can fix things up essentially for free
2283 with the POST_DEC below, but it's nice to not use it when we
2284 can immediately tell it's not necessary. */
2285 addr_dead_p = ((noreturn_p || sibcall_p
2286 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2287 REGNO (addr)))
2288 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2289
2290 /* Load the code address into scratch_b. */
2291 tmp = gen_rtx_POST_INC (Pmode, addr);
2292 tmp = gen_rtx_MEM (Pmode, tmp);
2293 emit_move_insn (scratch_r, tmp);
2294 emit_move_insn (scratch_b, scratch_r);
2295
2296 /* Load the GP address. If ADDR is not dead here, then we must
2297 revert the change made above via the POST_INCREMENT. */
2298 if (!addr_dead_p)
2299 tmp = gen_rtx_POST_DEC (Pmode, addr);
2300 else
2301 tmp = addr;
2302 tmp = gen_rtx_MEM (Pmode, tmp);
2303 emit_move_insn (pic_offset_table_rtx, tmp);
2304
2305 is_desc = true;
2306 addr = scratch_b;
2307 }
2ed4af6f 2308
6dad5a56 2309 if (sibcall_p)
599aedd9
RH
2310 insn = gen_sibcall_nogp (addr);
2311 else if (retval)
2312 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 2313 else
599aedd9 2314 insn = gen_call_nogp (addr, retaddr);
6dad5a56 2315 emit_call_insn (insn);
2ed4af6f 2316
599aedd9
RH
2317 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2318 ia64_reload_gp ();
2ed4af6f 2319}
16df4ee6
RH
2320
2321/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2322
2323 This differs from the generic code in that we know about the zero-extending
2324 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2325 also know that ld.acq+cmpxchg.rel equals a full barrier.
2326
2327 The loop we want to generate looks like
2328
2329 cmp_reg = mem;
2330 label:
2331 old_reg = cmp_reg;
2332 new_reg = cmp_reg op val;
2333 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2334 if (cmp_reg != old_reg)
2335 goto label;
2336
2337 Note that we only do the plain load from memory once. Subsequent
2338 iterations use the value loaded by the compare-and-swap pattern. */
2339
2340void
2341ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
28875d67 2342 rtx old_dst, rtx new_dst, enum memmodel model)
16df4ee6 2343{
ef4bddc2 2344 machine_mode mode = GET_MODE (mem);
16df4ee6
RH
2345 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2346 enum insn_code icode;
2347
2348 /* Special case for using fetchadd. */
dca13767
JJ
2349 if ((mode == SImode || mode == DImode)
2350 && (code == PLUS || code == MINUS)
2351 && fetchadd_operand (val, mode))
16df4ee6 2352 {
dca13767
JJ
2353 if (code == MINUS)
2354 val = GEN_INT (-INTVAL (val));
2355
16df4ee6
RH
2356 if (!old_dst)
2357 old_dst = gen_reg_rtx (mode);
2358
28875d67
RH
2359 switch (model)
2360 {
2361 case MEMMODEL_ACQ_REL:
2362 case MEMMODEL_SEQ_CST:
46b35980 2363 case MEMMODEL_SYNC_SEQ_CST:
28875d67
RH
2364 emit_insn (gen_memory_barrier ());
2365 /* FALLTHRU */
2366 case MEMMODEL_RELAXED:
2367 case MEMMODEL_ACQUIRE:
46b35980 2368 case MEMMODEL_SYNC_ACQUIRE:
28875d67
RH
2369 case MEMMODEL_CONSUME:
2370 if (mode == SImode)
2371 icode = CODE_FOR_fetchadd_acq_si;
2372 else
2373 icode = CODE_FOR_fetchadd_acq_di;
2374 break;
2375 case MEMMODEL_RELEASE:
46b35980 2376 case MEMMODEL_SYNC_RELEASE:
28875d67
RH
2377 if (mode == SImode)
2378 icode = CODE_FOR_fetchadd_rel_si;
2379 else
2380 icode = CODE_FOR_fetchadd_rel_di;
2381 break;
2382
2383 default:
2384 gcc_unreachable ();
2385 }
16df4ee6 2386
16df4ee6
RH
2387 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2388
2389 if (new_dst)
2390 {
2391 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2392 true, OPTAB_WIDEN);
2393 if (new_reg != new_dst)
2394 emit_move_insn (new_dst, new_reg);
2395 }
2396 return;
2397 }
2398
2399 /* Because of the volatile mem read, we get an ld.acq, which is the
28875d67
RH
2400 front half of the full barrier. The end half is the cmpxchg.rel.
2401 For relaxed and release memory models, we don't need this. But we
2402 also don't bother trying to prevent it either. */
46b35980 2403 gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
28875d67 2404 || MEM_VOLATILE_P (mem));
16df4ee6
RH
2405
2406 old_reg = gen_reg_rtx (DImode);
2407 cmp_reg = gen_reg_rtx (DImode);
2408 label = gen_label_rtx ();
2409
2410 if (mode != DImode)
2411 {
2412 val = simplify_gen_subreg (DImode, val, mode, 0);
2413 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2414 }
2415 else
2416 emit_move_insn (cmp_reg, mem);
2417
2418 emit_label (label);
2419
2420 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2421 emit_move_insn (old_reg, cmp_reg);
2422 emit_move_insn (ar_ccv, cmp_reg);
2423
2424 if (old_dst)
2425 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2426
2427 new_reg = cmp_reg;
2428 if (code == NOT)
2429 {
974920dc
UB
2430 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2431 true, OPTAB_DIRECT);
2432 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
16df4ee6 2433 }
974920dc
UB
2434 else
2435 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2436 true, OPTAB_DIRECT);
16df4ee6
RH
2437
2438 if (mode != DImode)
2439 new_reg = gen_lowpart (mode, new_reg);
2440 if (new_dst)
2441 emit_move_insn (new_dst, new_reg);
2442
28875d67 2443 switch (model)
16df4ee6 2444 {
28875d67
RH
2445 case MEMMODEL_RELAXED:
2446 case MEMMODEL_ACQUIRE:
46b35980 2447 case MEMMODEL_SYNC_ACQUIRE:
28875d67
RH
2448 case MEMMODEL_CONSUME:
2449 switch (mode)
2450 {
2451 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2452 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2453 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2454 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2455 default:
2456 gcc_unreachable ();
2457 }
2458 break;
2459
2460 case MEMMODEL_RELEASE:
46b35980 2461 case MEMMODEL_SYNC_RELEASE:
28875d67
RH
2462 case MEMMODEL_ACQ_REL:
2463 case MEMMODEL_SEQ_CST:
46b35980 2464 case MEMMODEL_SYNC_SEQ_CST:
28875d67
RH
2465 switch (mode)
2466 {
2467 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2468 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2469 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2470 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2471 default:
2472 gcc_unreachable ();
2473 }
2474 break;
2475
16df4ee6
RH
2476 default:
2477 gcc_unreachable ();
2478 }
2479
2480 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2481
6819a463 2482 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
16df4ee6 2483}
809d4ef1 2484\f
3b572406
RH
2485/* Begin the assembly file. */
2486
1bc7c5b6 2487static void
9c808aad 2488ia64_file_start (void)
1bc7c5b6
ZW
2489{
2490 default_file_start ();
2491 emit_safe_across_calls ();
2492}
2493
3b572406 2494void
9c808aad 2495emit_safe_across_calls (void)
3b572406
RH
2496{
2497 unsigned int rs, re;
2498 int out_state;
2499
2500 rs = 1;
2501 out_state = 0;
2502 while (1)
2503 {
2504 while (rs < 64 && call_used_regs[PR_REG (rs)])
2505 rs++;
2506 if (rs >= 64)
2507 break;
2508 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2509 continue;
2510 if (out_state == 0)
2511 {
1bc7c5b6 2512 fputs ("\t.pred.safe_across_calls ", asm_out_file);
3b572406
RH
2513 out_state = 1;
2514 }
2515 else
1bc7c5b6 2516 fputc (',', asm_out_file);
3b572406 2517 if (re == rs + 1)
1bc7c5b6 2518 fprintf (asm_out_file, "p%u", rs);
3b572406 2519 else
1bc7c5b6 2520 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
3b572406
RH
2521 rs = re + 1;
2522 }
2523 if (out_state)
1bc7c5b6 2524 fputc ('\n', asm_out_file);
3b572406
RH
2525}
2526
812b587e
SE
2527/* Globalize a declaration. */
2528
2529static void
2530ia64_globalize_decl_name (FILE * stream, tree decl)
2531{
2532 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2533 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2534 if (version_attr)
2535 {
2536 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2537 const char *p = TREE_STRING_POINTER (v);
2538 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2539 }
2540 targetm.asm_out.globalize_label (stream, name);
2541 if (TREE_CODE (decl) == FUNCTION_DECL)
2542 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2543}
2544
97e242b0
RH
2545/* Helper function for ia64_compute_frame_size: find an appropriate general
2546 register to spill some special register to. SPECIAL_SPILL_MASK contains
2547 bits in GR0 to GR31 that have already been allocated by this routine.
2548 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 2549
97e242b0 2550static int
6fb5fa3c 2551find_gr_spill (enum ia64_frame_regs r, int try_locals)
97e242b0
RH
2552{
2553 int regno;
2554
6fb5fa3c
DB
2555 if (emitted_frame_related_regs[r] != 0)
2556 {
2557 regno = emitted_frame_related_regs[r];
2951f79b
JJ
2558 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2559 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
6fb5fa3c 2560 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
416ff32e 2561 else if (crtl->is_leaf
6fb5fa3c
DB
2562 && regno >= GR_REG (1) && regno <= GR_REG (31))
2563 current_frame_info.gr_used_mask |= 1 << regno;
2564
2565 return regno;
2566 }
2567
97e242b0
RH
2568 /* If this is a leaf function, first try an otherwise unused
2569 call-clobbered register. */
416ff32e 2570 if (crtl->is_leaf)
97e242b0
RH
2571 {
2572 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2573 if (! df_regs_ever_live_p (regno)
97e242b0
RH
2574 && call_used_regs[regno]
2575 && ! fixed_regs[regno]
2576 && ! global_regs[regno]
6fb5fa3c
DB
2577 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2578 && ! is_emitted (regno))
97e242b0
RH
2579 {
2580 current_frame_info.gr_used_mask |= 1 << regno;
2581 return regno;
2582 }
2583 }
2584
2585 if (try_locals)
2586 {
2587 regno = current_frame_info.n_local_regs;
9502c558
JW
2588 /* If there is a frame pointer, then we can't use loc79, because
2589 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2590 reg_name switching code in ia64_expand_prologue. */
2951f79b
JJ
2591 while (regno < (80 - frame_pointer_needed))
2592 if (! is_emitted (LOC_REG (regno++)))
2593 {
2594 current_frame_info.n_local_regs = regno;
2595 return LOC_REG (regno - 1);
2596 }
97e242b0
RH
2597 }
2598
2599 /* Failed to find a general register to spill to. Must use stack. */
2600 return 0;
2601}
2602
2603/* In order to make for nice schedules, we try to allocate every temporary
2604 to a different register. We must of course stay away from call-saved,
2605 fixed, and global registers. We must also stay away from registers
2606 allocated in current_frame_info.gr_used_mask, since those include regs
2607 used all through the prologue.
2608
2609 Any register allocated here must be used immediately. The idea is to
2610 aid scheduling, not to solve data flow problems. */
2611
2612static int last_scratch_gr_reg;
2613
2614static int
9c808aad 2615next_scratch_gr_reg (void)
97e242b0
RH
2616{
2617 int i, regno;
2618
2619 for (i = 0; i < 32; ++i)
2620 {
2621 regno = (last_scratch_gr_reg + i + 1) & 31;
2622 if (call_used_regs[regno]
2623 && ! fixed_regs[regno]
2624 && ! global_regs[regno]
2625 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2626 {
2627 last_scratch_gr_reg = regno;
2628 return regno;
2629 }
2630 }
2631
2632 /* There must be _something_ available. */
e820471b 2633 gcc_unreachable ();
97e242b0
RH
2634}
2635
2636/* Helper function for ia64_compute_frame_size, called through
2637 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2638
2639static void
9c808aad 2640mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
c65ebc55 2641{
97e242b0
RH
2642 unsigned int regno = REGNO (reg);
2643 if (regno < 32)
f95e79cc 2644 {
c8b622ff 2645 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
f95e79cc
RH
2646 for (i = 0; i < n; ++i)
2647 current_frame_info.gr_used_mask |= 1 << (regno + i);
2648 }
c65ebc55
JW
2649}
2650
6fb5fa3c 2651
c65ebc55
JW
2652/* Returns the number of bytes offset between the frame pointer and the stack
2653 pointer for the current function. SIZE is the number of bytes of space
2654 needed for local variables. */
97e242b0
RH
2655
2656static void
9c808aad 2657ia64_compute_frame_size (HOST_WIDE_INT size)
c65ebc55 2658{
97e242b0
RH
2659 HOST_WIDE_INT total_size;
2660 HOST_WIDE_INT spill_size = 0;
2661 HOST_WIDE_INT extra_spill_size = 0;
2662 HOST_WIDE_INT pretend_args_size;
c65ebc55 2663 HARD_REG_SET mask;
97e242b0
RH
2664 int n_spilled = 0;
2665 int spilled_gr_p = 0;
2666 int spilled_fr_p = 0;
2667 unsigned int regno;
2951f79b
JJ
2668 int min_regno;
2669 int max_regno;
97e242b0 2670 int i;
c65ebc55 2671
97e242b0
RH
2672 if (current_frame_info.initialized)
2673 return;
294dac80 2674
97e242b0 2675 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
2676 CLEAR_HARD_REG_SET (mask);
2677
97e242b0
RH
2678 /* Don't allocate scratches to the return register. */
2679 diddle_return_value (mark_reg_gr_used_mask, NULL);
2680
2681 /* Don't allocate scratches to the EH scratch registers. */
2682 if (cfun->machine->ia64_eh_epilogue_sp)
2683 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2684 if (cfun->machine->ia64_eh_epilogue_bsp)
2685 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 2686
7b84aac0
EB
2687 /* Static stack checking uses r2 and r3. */
2688 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2689 current_frame_info.gr_used_mask |= 0xc;
2690
97e242b0
RH
2691 /* Find the size of the register stack frame. We have only 80 local
2692 registers, because we reserve 8 for the inputs and 8 for the
2693 outputs. */
2694
2695 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2696 since we'll be adjusting that down later. */
2697 regno = LOC_REG (78) + ! frame_pointer_needed;
2698 for (; regno >= LOC_REG (0); regno--)
6fb5fa3c 2699 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
97e242b0
RH
2700 break;
2701 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 2702
3f67ac08
DM
2703 /* For functions marked with the syscall_linkage attribute, we must mark
2704 all eight input registers as in use, so that locals aren't visible to
2705 the caller. */
2706
2707 if (cfun->machine->n_varargs > 0
2708 || lookup_attribute ("syscall_linkage",
2709 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
2710 current_frame_info.n_input_regs = 8;
2711 else
2712 {
2713 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
6fb5fa3c 2714 if (df_regs_ever_live_p (regno))
97e242b0
RH
2715 break;
2716 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2717 }
2718
2719 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
6fb5fa3c 2720 if (df_regs_ever_live_p (regno))
97e242b0
RH
2721 break;
2722 i = regno - OUT_REG (0) + 1;
2723
d26afa4f 2724#ifndef PROFILE_HOOK
97e242b0 2725 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 2726 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
2727 profiling, we need two output registers for the two bb_init_trace_func
2728 arguments. */
e3b5732b 2729 if (crtl->profile)
97e242b0 2730 i = MAX (i, 1);
d26afa4f 2731#endif
97e242b0
RH
2732 current_frame_info.n_output_regs = i;
2733
2734 /* ??? No rotating register support yet. */
2735 current_frame_info.n_rotate_regs = 0;
2736
2737 /* Discover which registers need spilling, and how much room that
9c808aad 2738 will take. Begin with floating point and general registers,
97e242b0
RH
2739 which will always wind up on the stack. */
2740
2741 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
6fb5fa3c 2742 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2743 {
2744 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2745 spill_size += 16;
2746 n_spilled += 1;
2747 spilled_fr_p = 1;
c65ebc55
JW
2748 }
2749
97e242b0 2750 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2751 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2752 {
2753 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2754 spill_size += 8;
2755 n_spilled += 1;
2756 spilled_gr_p = 1;
c65ebc55
JW
2757 }
2758
97e242b0 2759 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
6fb5fa3c 2760 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2761 {
2762 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2763 spill_size += 8;
2764 n_spilled += 1;
c65ebc55
JW
2765 }
2766
97e242b0
RH
2767 /* Now come all special registers that might get saved in other
2768 general registers. */
9c808aad 2769
97e242b0
RH
2770 if (frame_pointer_needed)
2771 {
6fb5fa3c 2772 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
0c35f902
JW
2773 /* If we did not get a register, then we take LOC79. This is guaranteed
2774 to be free, even if regs_ever_live is already set, because this is
2775 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2776 as we don't count loc79 above. */
6fb5fa3c 2777 if (current_frame_info.r[reg_fp] == 0)
0c35f902 2778 {
6fb5fa3c
DB
2779 current_frame_info.r[reg_fp] = LOC_REG (79);
2780 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
0c35f902 2781 }
97e242b0
RH
2782 }
2783
416ff32e 2784 if (! crtl->is_leaf)
c65ebc55 2785 {
97e242b0
RH
2786 /* Emit a save of BR0 if we call other functions. Do this even
2787 if this function doesn't return, as EH depends on this to be
2788 able to unwind the stack. */
2789 SET_HARD_REG_BIT (mask, BR_REG (0));
2790
6fb5fa3c
DB
2791 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2792 if (current_frame_info.r[reg_save_b0] == 0)
97e242b0 2793 {
ae1e2d4c 2794 extra_spill_size += 8;
97e242b0
RH
2795 n_spilled += 1;
2796 }
2797
2798 /* Similarly for ar.pfs. */
2799 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2800 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2801 if (current_frame_info.r[reg_save_ar_pfs] == 0)
97e242b0
RH
2802 {
2803 extra_spill_size += 8;
2804 n_spilled += 1;
2805 }
599aedd9
RH
2806
2807 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2808 registers are clobbered, so we fall back to the stack. */
6fb5fa3c 2809 current_frame_info.r[reg_save_gp]
e3b5732b 2810 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
6fb5fa3c 2811 if (current_frame_info.r[reg_save_gp] == 0)
599aedd9
RH
2812 {
2813 SET_HARD_REG_BIT (mask, GR_REG (1));
2814 spill_size += 8;
2815 n_spilled += 1;
2816 }
c65ebc55
JW
2817 }
2818 else
97e242b0 2819 {
6fb5fa3c 2820 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
97e242b0
RH
2821 {
2822 SET_HARD_REG_BIT (mask, BR_REG (0));
ae1e2d4c 2823 extra_spill_size += 8;
97e242b0
RH
2824 n_spilled += 1;
2825 }
f5bdba44 2826
6fb5fa3c 2827 if (df_regs_ever_live_p (AR_PFS_REGNUM))
f5bdba44
RH
2828 {
2829 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2830 current_frame_info.r[reg_save_ar_pfs]
2831 = find_gr_spill (reg_save_ar_pfs, 1);
2832 if (current_frame_info.r[reg_save_ar_pfs] == 0)
f5bdba44
RH
2833 {
2834 extra_spill_size += 8;
2835 n_spilled += 1;
2836 }
2837 }
97e242b0 2838 }
c65ebc55 2839
97e242b0
RH
2840 /* Unwind descriptor hackery: things are most efficient if we allocate
2841 consecutive GR save registers for RP, PFS, FP in that order. However,
2842 it is absolutely critical that FP get the only hard register that's
2843 guaranteed to be free, so we allocated it first. If all three did
2844 happen to be allocated hard regs, and are consecutive, rearrange them
6fb5fa3c
DB
2845 into the preferred order now.
2846
2847 If we have already emitted code for any of those registers,
2848 then it's already too late to change. */
2951f79b
JJ
2849 min_regno = MIN (current_frame_info.r[reg_fp],
2850 MIN (current_frame_info.r[reg_save_b0],
2851 current_frame_info.r[reg_save_ar_pfs]));
2852 max_regno = MAX (current_frame_info.r[reg_fp],
2853 MAX (current_frame_info.r[reg_save_b0],
2854 current_frame_info.r[reg_save_ar_pfs]));
2855 if (min_regno > 0
2856 && min_regno + 2 == max_regno
2857 && (current_frame_info.r[reg_fp] == min_regno + 1
2858 || current_frame_info.r[reg_save_b0] == min_regno + 1
2859 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2860 && (emitted_frame_related_regs[reg_save_b0] == 0
2861 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2862 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2863 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2864 && (emitted_frame_related_regs[reg_fp] == 0
2865 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
5527bf14 2866 {
2951f79b
JJ
2867 current_frame_info.r[reg_save_b0] = min_regno;
2868 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2869 current_frame_info.r[reg_fp] = min_regno + 2;
5527bf14
RH
2870 }
2871
97e242b0
RH
2872 /* See if we need to store the predicate register block. */
2873 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2874 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
97e242b0
RH
2875 break;
2876 if (regno <= PR_REG (63))
c65ebc55 2877 {
97e242b0 2878 SET_HARD_REG_BIT (mask, PR_REG (0));
6fb5fa3c
DB
2879 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2880 if (current_frame_info.r[reg_save_pr] == 0)
97e242b0
RH
2881 {
2882 extra_spill_size += 8;
2883 n_spilled += 1;
2884 }
2885
2886 /* ??? Mark them all as used so that register renaming and such
2887 are free to use them. */
2888 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2889 df_set_regs_ever_live (regno, true);
c65ebc55
JW
2890 }
2891
97e242b0 2892 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
2893 ar.unat as well. The check for existing liveness allows inline asm
2894 to touch ar.unat. */
2895 if (spilled_gr_p || cfun->machine->n_varargs
6fb5fa3c 2896 || df_regs_ever_live_p (AR_UNAT_REGNUM))
97e242b0 2897 {
6fb5fa3c 2898 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
97e242b0 2899 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
6fb5fa3c
DB
2900 current_frame_info.r[reg_save_ar_unat]
2901 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2902 if (current_frame_info.r[reg_save_ar_unat] == 0)
97e242b0
RH
2903 {
2904 extra_spill_size += 8;
2905 n_spilled += 1;
2906 }
2907 }
2908
6fb5fa3c 2909 if (df_regs_ever_live_p (AR_LC_REGNUM))
97e242b0
RH
2910 {
2911 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
6fb5fa3c
DB
2912 current_frame_info.r[reg_save_ar_lc]
2913 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2914 if (current_frame_info.r[reg_save_ar_lc] == 0)
97e242b0
RH
2915 {
2916 extra_spill_size += 8;
2917 n_spilled += 1;
2918 }
2919 }
2920
2921 /* If we have an odd number of words of pretend arguments written to
2922 the stack, then the FR save area will be unaligned. We round the
2923 size of this area up to keep things 16 byte aligned. */
2924 if (spilled_fr_p)
38173d38 2925 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
97e242b0 2926 else
38173d38 2927 pretend_args_size = crtl->args.pretend_args_size;
97e242b0
RH
2928
2929 total_size = (spill_size + extra_spill_size + size + pretend_args_size
38173d38 2930 + crtl->outgoing_args_size);
97e242b0
RH
2931 total_size = IA64_STACK_ALIGN (total_size);
2932
2933 /* We always use the 16-byte scratch area provided by the caller, but
2934 if we are a leaf function, there's no one to which we need to provide
44bd7f65
EB
2935 a scratch area. However, if the function allocates dynamic stack space,
2936 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2937 so we need to cope. */
2938 if (crtl->is_leaf && !cfun->calls_alloca)
97e242b0
RH
2939 total_size = MAX (0, total_size - 16);
2940
c65ebc55 2941 current_frame_info.total_size = total_size;
97e242b0
RH
2942 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2943 current_frame_info.spill_size = spill_size;
2944 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2945 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2946 current_frame_info.n_spilled = n_spilled;
c65ebc55 2947 current_frame_info.initialized = reload_completed;
97e242b0
RH
2948}
2949
7b5cbb57
AS
2950/* Worker function for TARGET_CAN_ELIMINATE. */
2951
2952bool
2953ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2954{
416ff32e 2955 return (to == BR_REG (0) ? crtl->is_leaf : true);
7b5cbb57
AS
2956}
2957
97e242b0
RH
2958/* Compute the initial difference between the specified pair of registers. */
2959
2960HOST_WIDE_INT
9c808aad 2961ia64_initial_elimination_offset (int from, int to)
97e242b0
RH
2962{
2963 HOST_WIDE_INT offset;
2964
2965 ia64_compute_frame_size (get_frame_size ());
2966 switch (from)
2967 {
2968 case FRAME_POINTER_REGNUM:
e820471b 2969 switch (to)
97e242b0 2970 {
e820471b 2971 case HARD_FRAME_POINTER_REGNUM:
44bd7f65
EB
2972 offset = -current_frame_info.total_size;
2973 if (!crtl->is_leaf || cfun->calls_alloca)
2974 offset += 16 + crtl->outgoing_args_size;
e820471b
NS
2975 break;
2976
2977 case STACK_POINTER_REGNUM:
44bd7f65
EB
2978 offset = 0;
2979 if (!crtl->is_leaf || cfun->calls_alloca)
2980 offset += 16 + crtl->outgoing_args_size;
e820471b
NS
2981 break;
2982
2983 default:
2984 gcc_unreachable ();
97e242b0 2985 }
97e242b0 2986 break;
c65ebc55 2987
97e242b0
RH
2988 case ARG_POINTER_REGNUM:
2989 /* Arguments start above the 16 byte save area, unless stdarg
2990 in which case we store through the 16 byte save area. */
e820471b
NS
2991 switch (to)
2992 {
2993 case HARD_FRAME_POINTER_REGNUM:
38173d38 2994 offset = 16 - crtl->args.pretend_args_size;
e820471b
NS
2995 break;
2996
2997 case STACK_POINTER_REGNUM:
2998 offset = (current_frame_info.total_size
38173d38 2999 + 16 - crtl->args.pretend_args_size);
e820471b
NS
3000 break;
3001
3002 default:
3003 gcc_unreachable ();
3004 }
97e242b0
RH
3005 break;
3006
97e242b0 3007 default:
e820471b 3008 gcc_unreachable ();
97e242b0
RH
3009 }
3010
3011 return offset;
c65ebc55
JW
3012}
3013
97e242b0
RH
3014/* If there are more than a trivial number of register spills, we use
3015 two interleaved iterators so that we can get two memory references
3016 per insn group.
3017
3018 In order to simplify things in the prologue and epilogue expanders,
3019 we use helper functions to fix up the memory references after the
3020 fact with the appropriate offsets to a POST_MODIFY memory mode.
3021 The following data structure tracks the state of the two iterators
3022 while insns are being emitted. */
3023
3024struct spill_fill_data
c65ebc55 3025{
dd3d2b35 3026 rtx_insn *init_after; /* point at which to emit initializations */
97e242b0
RH
3027 rtx init_reg[2]; /* initial base register */
3028 rtx iter_reg[2]; /* the iterator registers */
3029 rtx *prev_addr[2]; /* address of last memory use */
dd3d2b35 3030 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
3031 HOST_WIDE_INT prev_off[2]; /* last offset */
3032 int n_iter; /* number of iterators in use */
3033 int next_iter; /* next iterator to use */
3034 unsigned int save_gr_used_mask;
3035};
3036
3037static struct spill_fill_data spill_fill_data;
c65ebc55 3038
97e242b0 3039static void
9c808aad 3040setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
3041{
3042 int i;
3043
3044 spill_fill_data.init_after = get_last_insn ();
3045 spill_fill_data.init_reg[0] = init_reg;
3046 spill_fill_data.init_reg[1] = init_reg;
3047 spill_fill_data.prev_addr[0] = NULL;
3048 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
3049 spill_fill_data.prev_insn[0] = NULL;
3050 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
3051 spill_fill_data.prev_off[0] = cfa_off;
3052 spill_fill_data.prev_off[1] = cfa_off;
3053 spill_fill_data.next_iter = 0;
3054 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3055
3056 spill_fill_data.n_iter = 1 + (n_spills > 2);
3057 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 3058 {
97e242b0
RH
3059 int regno = next_scratch_gr_reg ();
3060 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3061 current_frame_info.gr_used_mask |= 1 << regno;
3062 }
3063}
3064
3065static void
9c808aad 3066finish_spill_pointers (void)
97e242b0
RH
3067{
3068 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3069}
c65ebc55 3070
97e242b0 3071static rtx
9c808aad 3072spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
3073{
3074 int iter = spill_fill_data.next_iter;
3075 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3076 rtx disp_rtx = GEN_INT (disp);
3077 rtx mem;
3078
3079 if (spill_fill_data.prev_addr[iter])
3080 {
13f70342 3081 if (satisfies_constraint_N (disp_rtx))
703cf211
BS
3082 {
3083 *spill_fill_data.prev_addr[iter]
3084 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3085 gen_rtx_PLUS (DImode,
3086 spill_fill_data.iter_reg[iter],
3087 disp_rtx));
bbbbb16a
ILT
3088 add_reg_note (spill_fill_data.prev_insn[iter],
3089 REG_INC, spill_fill_data.iter_reg[iter]);
703cf211 3090 }
c65ebc55
JW
3091 else
3092 {
97e242b0 3093 /* ??? Could use register post_modify for loads. */
13f70342 3094 if (!satisfies_constraint_I (disp_rtx))
97e242b0
RH
3095 {
3096 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3097 emit_move_insn (tmp, disp_rtx);
3098 disp_rtx = tmp;
3099 }
3100 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3101 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 3102 }
97e242b0
RH
3103 }
3104 /* Micro-optimization: if we've created a frame pointer, it's at
3105 CFA 0, which may allow the real iterator to be initialized lower,
3106 slightly increasing parallelism. Also, if there are few saves
3107 it may eliminate the iterator entirely. */
3108 else if (disp == 0
3109 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3110 && frame_pointer_needed)
3111 {
3112 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 3113 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
3114 return mem;
3115 }
3116 else
3117 {
dd3d2b35
DM
3118 rtx seq;
3119 rtx_insn *insn;
809d4ef1 3120
97e242b0
RH
3121 if (disp == 0)
3122 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3123 spill_fill_data.init_reg[iter]);
3124 else
c65ebc55 3125 {
97e242b0
RH
3126 start_sequence ();
3127
13f70342 3128 if (!satisfies_constraint_I (disp_rtx))
c65ebc55 3129 {
97e242b0
RH
3130 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3131 emit_move_insn (tmp, disp_rtx);
3132 disp_rtx = tmp;
c65ebc55 3133 }
97e242b0
RH
3134
3135 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3136 spill_fill_data.init_reg[iter],
3137 disp_rtx));
3138
2f937369 3139 seq = get_insns ();
97e242b0 3140 end_sequence ();
c65ebc55 3141 }
809d4ef1 3142
97e242b0
RH
3143 /* Careful for being the first insn in a sequence. */
3144 if (spill_fill_data.init_after)
892a4e60 3145 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 3146 else
bc08aefe 3147 {
dd3d2b35 3148 rtx_insn *first = get_insns ();
bc08aefe 3149 if (first)
892a4e60 3150 insn = emit_insn_before (seq, first);
bc08aefe 3151 else
892a4e60 3152 insn = emit_insn (seq);
bc08aefe 3153 }
892a4e60 3154 spill_fill_data.init_after = insn;
97e242b0 3155 }
c65ebc55 3156
97e242b0 3157 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 3158
97e242b0
RH
3159 /* ??? Not all of the spills are for varargs, but some of them are.
3160 The rest of the spills belong in an alias set of their own. But
3161 it doesn't actually hurt to include them here. */
ba4828e0 3162 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 3163
97e242b0
RH
3164 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3165 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 3166
97e242b0
RH
3167 if (++iter >= spill_fill_data.n_iter)
3168 iter = 0;
3169 spill_fill_data.next_iter = iter;
c65ebc55 3170
97e242b0
RH
3171 return mem;
3172}
5527bf14 3173
97e242b0 3174static void
9c808aad
AJ
3175do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3176 rtx frame_reg)
97e242b0 3177{
703cf211 3178 int iter = spill_fill_data.next_iter;
dd3d2b35
DM
3179 rtx mem;
3180 rtx_insn *insn;
5527bf14 3181
97e242b0 3182 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 3183 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 3184 spill_fill_data.prev_insn[iter] = insn;
5527bf14 3185
97e242b0
RH
3186 if (frame_reg)
3187 {
3188 rtx base;
3189 HOST_WIDE_INT off;
3190
3191 RTX_FRAME_RELATED_P (insn) = 1;
3192
9c808aad 3193 /* Don't even pretend that the unwind code can intuit its way
97e242b0
RH
3194 through a pair of interleaved post_modify iterators. Just
3195 provide the correct answer. */
3196
3197 if (frame_pointer_needed)
3198 {
3199 base = hard_frame_pointer_rtx;
3200 off = - cfa_off;
5527bf14 3201 }
97e242b0
RH
3202 else
3203 {
3204 base = stack_pointer_rtx;
3205 off = current_frame_info.total_size - cfa_off;
3206 }
3207
5c255b57 3208 add_reg_note (insn, REG_CFA_OFFSET,
f7df4a84 3209 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
0a81f074
RS
3210 plus_constant (Pmode,
3211 base, off)),
bbbbb16a 3212 frame_reg));
c65ebc55
JW
3213 }
3214}
3215
97e242b0 3216static void
9c808aad 3217do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
97e242b0 3218{
703cf211 3219 int iter = spill_fill_data.next_iter;
dd3d2b35 3220 rtx_insn *insn;
703cf211
BS
3221
3222 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3223 GEN_INT (cfa_off)));
3224 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
3225}
3226
870f9ec0
RH
3227/* Wrapper functions that discards the CONST_INT spill offset. These
3228 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 3229 use a consistent function interface. */
870f9ec0
RH
3230
3231static rtx
9c808aad 3232gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3233{
3234 return gen_movdi (dest, src);
3235}
3236
3237static rtx
9c808aad 3238gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3239{
3240 return gen_fr_spill (dest, src);
3241}
3242
3243static rtx
9c808aad 3244gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3245{
3246 return gen_fr_restore (dest, src);
3247}
c65ebc55 3248
7b84aac0
EB
3249#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3250
3251/* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3252#define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3253
3254/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
0dca9cd8
EB
3255 inclusive. These are offsets from the current stack pointer. BS_SIZE
3256 is the size of the backing store. ??? This clobbers r2 and r3. */
7b84aac0
EB
3257
3258static void
0dca9cd8
EB
3259ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3260 int bs_size)
7b84aac0 3261{
7b84aac0
EB
3262 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3263 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
0dca9cd8
EB
3264 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3265
3266 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3267 of the Register Stack Engine. We also need to probe it after checking
3268 that the 2 stacks don't overlap. */
3269 emit_insn (gen_bsp_value (r3));
3270 emit_move_insn (r2, GEN_INT (-(first + size)));
3271
3272 /* Compare current value of BSP and SP registers. */
f7df4a84
RS
3273 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3274 r3, stack_pointer_rtx)));
0dca9cd8
EB
3275
3276 /* Compute the address of the probe for the Backing Store (which grows
3277 towards higher addresses). We probe only at the first offset of
3278 the next page because some OS (eg Linux/ia64) only extend the
3279 backing store when this specific address is hit (but generate a SEGV
3280 on other address). Page size is the worst case (4KB). The reserve
3281 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3282 Also compute the address of the last probe for the memory stack
3283 (which grows towards lower addresses). */
f7df4a84
RS
3284 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3285 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
0dca9cd8
EB
3286
3287 /* Compare them and raise SEGV if the former has topped the latter. */
3288 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3289 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
f7df4a84
RS
3290 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3291 r3, r2))));
3292 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
0dca9cd8
EB
3293 const0_rtx),
3294 const0_rtx));
3295 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3296 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3297 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3298 GEN_INT (11))));
7b84aac0
EB
3299
3300 /* Probe the Backing Store if necessary. */
3301 if (bs_size > 0)
3302 emit_stack_probe (r3);
3303
3304 /* Probe the memory stack if necessary. */
3305 if (size == 0)
3306 ;
3307
3308 /* See if we have a constant small number of probes to generate. If so,
3309 that's the easy case. */
3310 else if (size <= PROBE_INTERVAL)
3311 emit_stack_probe (r2);
3312
3313 /* The run-time loop is made up of 8 insns in the generic case while this
3314 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3315 else if (size <= 4 * PROBE_INTERVAL)
3316 {
3317 HOST_WIDE_INT i;
3318
3319 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
f7df4a84 3320 emit_insn (gen_rtx_SET (r2,
7b84aac0
EB
3321 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3322 emit_stack_probe (r2);
3323
3324 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3325 it exceeds SIZE. If only two probes are needed, this will not
3326 generate any code. Then probe at FIRST + SIZE. */
3327 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3328 {
f7df4a84 3329 emit_insn (gen_rtx_SET (r2,
f65e3801 3330 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
7b84aac0
EB
3331 emit_stack_probe (r2);
3332 }
3333
f7df4a84 3334 emit_insn (gen_rtx_SET (r2,
f65e3801 3335 plus_constant (Pmode, r2,
7b84aac0
EB
3336 (i - PROBE_INTERVAL) - size)));
3337 emit_stack_probe (r2);
3338 }
3339
3340 /* Otherwise, do the same as above, but in a loop. Note that we must be
3341 extra careful with variables wrapping around because we might be at
3342 the very top (or the very bottom) of the address space and we have
3343 to be able to handle this case properly; in particular, we use an
3344 equality test for the loop condition. */
3345 else
3346 {
3347 HOST_WIDE_INT rounded_size;
3348
3349 emit_move_insn (r2, GEN_INT (-first));
3350
3351
3352 /* Step 1: round SIZE to the previous multiple of the interval. */
3353
3354 rounded_size = size & -PROBE_INTERVAL;
3355
3356
3357 /* Step 2: compute initial and final value of the loop counter. */
3358
3359 /* TEST_ADDR = SP + FIRST. */
f7df4a84 3360 emit_insn (gen_rtx_SET (r2,
7b84aac0
EB
3361 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3362
3363 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3364 if (rounded_size > (1 << 21))
3365 {
3366 emit_move_insn (r3, GEN_INT (-rounded_size));
f7df4a84 3367 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
7b84aac0
EB
3368 }
3369 else
f7df4a84
RS
3370 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3371 GEN_INT (-rounded_size))));
7b84aac0
EB
3372
3373
3374 /* Step 3: the loop
3375
3376 while (TEST_ADDR != LAST_ADDR)
3377 {
3378 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3379 probe at TEST_ADDR
3380 }
3381
3382 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3383 until it is equal to ROUNDED_SIZE. */
3384
3385 emit_insn (gen_probe_stack_range (r2, r2, r3));
3386
3387
3388 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3389 that SIZE is equal to ROUNDED_SIZE. */
3390
3391 /* TEMP = SIZE - ROUNDED_SIZE. */
3392 if (size != rounded_size)
3393 {
f7df4a84
RS
3394 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3395 rounded_size - size)));
7b84aac0
EB
3396 emit_stack_probe (r2);
3397 }
3398 }
3399
3400 /* Make sure nothing is scheduled before we are done. */
3401 emit_insn (gen_blockage ());
3402}
3403
3404/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3405 absolute addresses. */
3406
3407const char *
3408output_probe_stack_range (rtx reg1, rtx reg2)
3409{
3410 static int labelno = 0;
3411 char loop_lab[32], end_lab[32];
3412 rtx xops[3];
3413
3414 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
3415 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
3416
3417 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3418
3419 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
3420 xops[0] = reg1;
3421 xops[1] = reg2;
3422 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3423 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3424 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [REGNO (xops[2])]);
3425 assemble_name_raw (asm_out_file, end_lab);
3426 fputc ('\n', asm_out_file);
3427
3428 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3429 xops[1] = GEN_INT (-PROBE_INTERVAL);
3430 output_asm_insn ("addl %0 = %1, %0", xops);
3431 fputs ("\t;;\n", asm_out_file);
3432
3433 /* Probe at TEST_ADDR and branch. */
3434 output_asm_insn ("probe.w.fault %0, 0", xops);
3435 fprintf (asm_out_file, "\tbr ");
3436 assemble_name_raw (asm_out_file, loop_lab);
3437 fputc ('\n', asm_out_file);
3438
3439 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
3440
3441 return "";
3442}
3443
c65ebc55
JW
3444/* Called after register allocation to add any instructions needed for the
3445 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 3446 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3447 to intermix instructions with the saves of the caller saved registers. In
3448 some cases, it might be necessary to emit a barrier instruction as the last
3449 insn to prevent such scheduling.
3450
3451 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
3452 so that the debug info generation code can handle them properly.
3453
073a8998 3454 The register save area is laid out like so:
97e242b0
RH
3455 cfa+16
3456 [ varargs spill area ]
3457 [ fr register spill area ]
3458 [ br register spill area ]
3459 [ ar register spill area ]
3460 [ pr register spill area ]
3461 [ gr register spill area ] */
c65ebc55
JW
3462
3463/* ??? Get inefficient code when the frame size is larger than can fit in an
3464 adds instruction. */
3465
c65ebc55 3466void
9c808aad 3467ia64_expand_prologue (void)
c65ebc55 3468{
dd3d2b35
DM
3469 rtx_insn *insn;
3470 rtx ar_pfs_save_reg, ar_unat_save_reg;
97e242b0
RH
3471 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3472 rtx reg, alt_reg;
3473
3474 ia64_compute_frame_size (get_frame_size ());
3475 last_scratch_gr_reg = 15;
3476
a11e0df4 3477 if (flag_stack_usage_info)
d3c12306
EB
3478 current_function_static_stack_size = current_frame_info.total_size;
3479
7b84aac0 3480 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
0dca9cd8
EB
3481 {
3482 HOST_WIDE_INT size = current_frame_info.total_size;
3483 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3484 + current_frame_info.n_local_regs);
3485
3486 if (crtl->is_leaf && !cfun->calls_alloca)
3487 {
3488 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3489 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3490 size - STACK_CHECK_PROTECT,
3491 bs_size);
3492 else if (size + bs_size > STACK_CHECK_PROTECT)
3493 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3494 }
3495 else if (size + bs_size > 0)
3496 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3497 }
7b84aac0 3498
6fb5fa3c
DB
3499 if (dump_file)
3500 {
3501 fprintf (dump_file, "ia64 frame related registers "
3502 "recorded in current_frame_info.r[]:\n");
3503#define PRINTREG(a) if (current_frame_info.r[a]) \
3504 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3505 PRINTREG(reg_fp);
3506 PRINTREG(reg_save_b0);
3507 PRINTREG(reg_save_pr);
3508 PRINTREG(reg_save_ar_pfs);
3509 PRINTREG(reg_save_ar_unat);
3510 PRINTREG(reg_save_ar_lc);
3511 PRINTREG(reg_save_gp);
3512#undef PRINTREG
3513 }
3514
97e242b0
RH
3515 /* If there is no epilogue, then we don't need some prologue insns.
3516 We need to avoid emitting the dead prologue insns, because flow
3517 will complain about them. */
c65ebc55
JW
3518 if (optimize)
3519 {
97e242b0 3520 edge e;
9924d7d8 3521 edge_iterator ei;
97e242b0 3522
fefa31b5 3523 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
c65ebc55
JW
3524 if ((e->flags & EDGE_FAKE) == 0
3525 && (e->flags & EDGE_FALLTHRU) != 0)
3526 break;
3527 epilogue_p = (e != NULL);
3528 }
3529 else
3530 epilogue_p = 1;
3531
97e242b0
RH
3532 /* Set the local, input, and output register names. We need to do this
3533 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3534 half. If we use in/loc/out register names, then we get assembler errors
3535 in crtn.S because there is no alloc insn or regstk directive in there. */
3536 if (! TARGET_REG_NAMES)
3537 {
3538 int inputs = current_frame_info.n_input_regs;
3539 int locals = current_frame_info.n_local_regs;
3540 int outputs = current_frame_info.n_output_regs;
3541
3542 for (i = 0; i < inputs; i++)
3543 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3544 for (i = 0; i < locals; i++)
3545 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3546 for (i = 0; i < outputs; i++)
3547 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3548 }
c65ebc55 3549
97e242b0
RH
3550 /* Set the frame pointer register name. The regnum is logically loc79,
3551 but of course we'll not have allocated that many locals. Rather than
3552 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
3553 /* ??? This code means that we can never use one local register when
3554 there is a frame pointer. loc79 gets wasted in this case, as it is
3555 renamed to a register that will never be used. See also the try_locals
3556 code in find_gr_spill. */
6fb5fa3c 3557 if (current_frame_info.r[reg_fp])
97e242b0
RH
3558 {
3559 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3560 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3561 = reg_names[current_frame_info.r[reg_fp]];
3562 reg_names[current_frame_info.r[reg_fp]] = tmp;
97e242b0 3563 }
c65ebc55 3564
97e242b0
RH
3565 /* We don't need an alloc instruction if we've used no outputs or locals. */
3566 if (current_frame_info.n_local_regs == 0
2ed4af6f 3567 && current_frame_info.n_output_regs == 0
38173d38 3568 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
f5bdba44 3569 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
3570 {
3571 /* If there is no alloc, but there are input registers used, then we
3572 need a .regstk directive. */
3573 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3574 ar_pfs_save_reg = NULL_RTX;
3575 }
3576 else
3577 {
3578 current_frame_info.need_regstk = 0;
c65ebc55 3579
6fb5fa3c
DB
3580 if (current_frame_info.r[reg_save_ar_pfs])
3581 {
3582 regno = current_frame_info.r[reg_save_ar_pfs];
3583 reg_emitted (reg_save_ar_pfs);
3584 }
97e242b0
RH
3585 else
3586 regno = next_scratch_gr_reg ();
3587 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3588
9c808aad 3589 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
97e242b0
RH
3590 GEN_INT (current_frame_info.n_input_regs),
3591 GEN_INT (current_frame_info.n_local_regs),
3592 GEN_INT (current_frame_info.n_output_regs),
3593 GEN_INT (current_frame_info.n_rotate_regs)));
9f2ff8e5
RH
3594 if (current_frame_info.r[reg_save_ar_pfs])
3595 {
3596 RTX_FRAME_RELATED_P (insn) = 1;
3597 add_reg_note (insn, REG_CFA_REGISTER,
f7df4a84 3598 gen_rtx_SET (ar_pfs_save_reg,
9f2ff8e5
RH
3599 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3600 }
97e242b0 3601 }
c65ebc55 3602
97e242b0 3603 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 3604
26a110f5 3605 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
3606 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3607 stack_pointer_rtx, 0);
c65ebc55 3608
97e242b0
RH
3609 if (frame_pointer_needed)
3610 {
3611 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3612 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57
RH
3613
3614 /* Force the unwind info to recognize this as defining a new CFA,
3615 rather than some temp register setup. */
3616 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
97e242b0 3617 }
c65ebc55 3618
97e242b0
RH
3619 if (current_frame_info.total_size != 0)
3620 {
3621 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3622 rtx offset;
c65ebc55 3623
13f70342 3624 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3625 offset = frame_size_rtx;
3626 else
3627 {
3628 regno = next_scratch_gr_reg ();
9c808aad 3629 offset = gen_rtx_REG (DImode, regno);
97e242b0
RH
3630 emit_move_insn (offset, frame_size_rtx);
3631 }
c65ebc55 3632
97e242b0
RH
3633 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3634 stack_pointer_rtx, offset));
c65ebc55 3635
97e242b0
RH
3636 if (! frame_pointer_needed)
3637 {
3638 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3639 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 3640 gen_rtx_SET (stack_pointer_rtx,
5c255b57
RH
3641 gen_rtx_PLUS (DImode,
3642 stack_pointer_rtx,
3643 frame_size_rtx)));
97e242b0 3644 }
c65ebc55 3645
97e242b0
RH
3646 /* ??? At this point we must generate a magic insn that appears to
3647 modify the stack pointer, the frame pointer, and all spill
3648 iterators. This would allow the most scheduling freedom. For
3649 now, just hard stop. */
3650 emit_insn (gen_blockage ());
3651 }
c65ebc55 3652
97e242b0
RH
3653 /* Must copy out ar.unat before doing any integer spills. */
3654 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 3655 {
6fb5fa3c
DB
3656 if (current_frame_info.r[reg_save_ar_unat])
3657 {
3658 ar_unat_save_reg
3659 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3660 reg_emitted (reg_save_ar_unat);
3661 }
97e242b0 3662 else
c65ebc55 3663 {
97e242b0
RH
3664 alt_regno = next_scratch_gr_reg ();
3665 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3666 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 3667 }
c65ebc55 3668
97e242b0
RH
3669 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3670 insn = emit_move_insn (ar_unat_save_reg, reg);
5c255b57
RH
3671 if (current_frame_info.r[reg_save_ar_unat])
3672 {
3673 RTX_FRAME_RELATED_P (insn) = 1;
3674 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3675 }
97e242b0
RH
3676
3677 /* Even if we're not going to generate an epilogue, we still
3678 need to save the register so that EH works. */
6fb5fa3c 3679 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
d0e82870 3680 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
3681 }
3682 else
97e242b0
RH
3683 ar_unat_save_reg = NULL_RTX;
3684
3685 /* Spill all varargs registers. Do this before spilling any GR registers,
3686 since we want the UNAT bits for the GR registers to override the UNAT
3687 bits from varargs, which we don't care about. */
c65ebc55 3688
97e242b0
RH
3689 cfa_off = -16;
3690 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 3691 {
97e242b0 3692 reg = gen_rtx_REG (DImode, regno);
870f9ec0 3693 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 3694 }
c65ebc55 3695
97e242b0
RH
3696 /* Locate the bottom of the register save area. */
3697 cfa_off = (current_frame_info.spill_cfa_off
3698 + current_frame_info.spill_size
3699 + current_frame_info.extra_spill_size);
c65ebc55 3700
97e242b0
RH
3701 /* Save the predicate register block either in a register or in memory. */
3702 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3703 {
3704 reg = gen_rtx_REG (DImode, PR_REG (0));
6fb5fa3c 3705 if (current_frame_info.r[reg_save_pr] != 0)
1ff5b671 3706 {
6fb5fa3c
DB
3707 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3708 reg_emitted (reg_save_pr);
97e242b0 3709 insn = emit_move_insn (alt_reg, reg);
1ff5b671 3710
97e242b0
RH
3711 /* ??? Denote pr spill/fill by a DImode move that modifies all
3712 64 hard registers. */
1ff5b671 3713 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3714 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
46327bc5 3715
97e242b0
RH
3716 /* Even if we're not going to generate an epilogue, we still
3717 need to save the register so that EH works. */
3718 if (! epilogue_p)
d0e82870 3719 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
3720 }
3721 else
97e242b0
RH
3722 {
3723 alt_regno = next_scratch_gr_reg ();
3724 alt_reg = gen_rtx_REG (DImode, alt_regno);
3725 insn = emit_move_insn (alt_reg, reg);
870f9ec0 3726 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3727 cfa_off -= 8;
3728 }
c65ebc55
JW
3729 }
3730
97e242b0
RH
3731 /* Handle AR regs in numerical order. All of them get special handling. */
3732 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6fb5fa3c 3733 && current_frame_info.r[reg_save_ar_unat] == 0)
c65ebc55 3734 {
97e242b0 3735 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 3736 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 3737 cfa_off -= 8;
c65ebc55 3738 }
97e242b0
RH
3739
3740 /* The alloc insn already copied ar.pfs into a general register. The
3741 only thing we have to do now is copy that register to a stack slot
3742 if we'd not allocated a local register for the job. */
f5bdba44 3743 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
6fb5fa3c 3744 && current_frame_info.r[reg_save_ar_pfs] == 0)
c65ebc55 3745 {
97e242b0 3746 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 3747 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
3748 cfa_off -= 8;
3749 }
3750
3751 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3752 {
3753 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
6fb5fa3c 3754 if (current_frame_info.r[reg_save_ar_lc] != 0)
97e242b0 3755 {
6fb5fa3c
DB
3756 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3757 reg_emitted (reg_save_ar_lc);
97e242b0
RH
3758 insn = emit_move_insn (alt_reg, reg);
3759 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 3760 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
97e242b0
RH
3761
3762 /* Even if we're not going to generate an epilogue, we still
3763 need to save the register so that EH works. */
3764 if (! epilogue_p)
d0e82870 3765 emit_insn (gen_prologue_use (alt_reg));
97e242b0 3766 }
c65ebc55
JW
3767 else
3768 {
97e242b0
RH
3769 alt_regno = next_scratch_gr_reg ();
3770 alt_reg = gen_rtx_REG (DImode, alt_regno);
3771 emit_move_insn (alt_reg, reg);
870f9ec0 3772 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3773 cfa_off -= 8;
3774 }
3775 }
3776
ae1e2d4c
AS
3777 /* Save the return pointer. */
3778 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3779 {
3780 reg = gen_rtx_REG (DImode, BR_REG (0));
6fb5fa3c 3781 if (current_frame_info.r[reg_save_b0] != 0)
ae1e2d4c 3782 {
6fb5fa3c
DB
3783 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3784 reg_emitted (reg_save_b0);
ae1e2d4c
AS
3785 insn = emit_move_insn (alt_reg, reg);
3786 RTX_FRAME_RELATED_P (insn) = 1;
f7df4a84 3787 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
ae1e2d4c
AS
3788
3789 /* Even if we're not going to generate an epilogue, we still
3790 need to save the register so that EH works. */
3791 if (! epilogue_p)
3792 emit_insn (gen_prologue_use (alt_reg));
3793 }
3794 else
3795 {
3796 alt_regno = next_scratch_gr_reg ();
3797 alt_reg = gen_rtx_REG (DImode, alt_regno);
3798 emit_move_insn (alt_reg, reg);
3799 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3800 cfa_off -= 8;
3801 }
3802 }
3803
6fb5fa3c 3804 if (current_frame_info.r[reg_save_gp])
599aedd9 3805 {
6fb5fa3c 3806 reg_emitted (reg_save_gp);
599aedd9 3807 insn = emit_move_insn (gen_rtx_REG (DImode,
6fb5fa3c 3808 current_frame_info.r[reg_save_gp]),
599aedd9 3809 pic_offset_table_rtx);
599aedd9
RH
3810 }
3811
97e242b0 3812 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3813 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3814 + current_frame_info.spill_size));
97e242b0
RH
3815
3816 /* Spill all general registers. */
3817 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3818 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3819 {
3820 reg = gen_rtx_REG (DImode, regno);
3821 do_spill (gen_gr_spill, reg, cfa_off, reg);
3822 cfa_off -= 8;
3823 }
3824
97e242b0
RH
3825 /* Spill the rest of the BR registers. */
3826 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3827 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3828 {
3829 alt_regno = next_scratch_gr_reg ();
3830 alt_reg = gen_rtx_REG (DImode, alt_regno);
3831 reg = gen_rtx_REG (DImode, regno);
3832 emit_move_insn (alt_reg, reg);
870f9ec0 3833 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3834 cfa_off -= 8;
3835 }
3836
3837 /* Align the frame and spill all FR registers. */
3838 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3839 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3840 {
e820471b 3841 gcc_assert (!(cfa_off & 15));
02befdf4 3842 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3843 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
3844 cfa_off -= 16;
3845 }
3846
e820471b 3847 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3848
3849 finish_spill_pointers ();
c65ebc55
JW
3850}
3851
8e7745dc
DR
3852/* Output the textual info surrounding the prologue. */
3853
3854void
3855ia64_start_function (FILE *file, const char *fnname,
3856 tree decl ATTRIBUTE_UNUSED)
3857{
4b12e93d
TG
3858#if TARGET_ABI_OPEN_VMS
3859 vms_start_function (fnname);
8e7745dc
DR
3860#endif
3861
3862 fputs ("\t.proc ", file);
3863 assemble_name (file, fnname);
3864 fputc ('\n', file);
3865 ASM_OUTPUT_LABEL (file, fnname);
3866}
3867
c65ebc55 3868/* Called after register allocation to add any instructions needed for the
5519a4f9 3869 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 3870 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3871 to intermix instructions with the saves of the caller saved registers. In
3872 some cases, it might be necessary to emit a barrier instruction as the last
3873 insn to prevent such scheduling. */
3874
3875void
9c808aad 3876ia64_expand_epilogue (int sibcall_p)
c65ebc55 3877{
dd3d2b35
DM
3878 rtx_insn *insn;
3879 rtx reg, alt_reg, ar_unat_save_reg;
97e242b0
RH
3880 int regno, alt_regno, cfa_off;
3881
3882 ia64_compute_frame_size (get_frame_size ());
3883
3884 /* If there is a frame pointer, then we use it instead of the stack
3885 pointer, so that the stack pointer does not need to be valid when
3886 the epilogue starts. See EXIT_IGNORE_STACK. */
3887 if (frame_pointer_needed)
3888 setup_spill_pointers (current_frame_info.n_spilled,
3889 hard_frame_pointer_rtx, 0);
3890 else
9c808aad 3891 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
97e242b0
RH
3892 current_frame_info.total_size);
3893
3894 if (current_frame_info.total_size != 0)
3895 {
3896 /* ??? At this point we must generate a magic insn that appears to
3897 modify the spill iterators and the frame pointer. This would
3898 allow the most scheduling freedom. For now, just hard stop. */
3899 emit_insn (gen_blockage ());
3900 }
3901
3902 /* Locate the bottom of the register save area. */
3903 cfa_off = (current_frame_info.spill_cfa_off
3904 + current_frame_info.spill_size
3905 + current_frame_info.extra_spill_size);
3906
3907 /* Restore the predicate registers. */
3908 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3909 {
6fb5fa3c
DB
3910 if (current_frame_info.r[reg_save_pr] != 0)
3911 {
3912 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3913 reg_emitted (reg_save_pr);
3914 }
97e242b0
RH
3915 else
3916 {
3917 alt_regno = next_scratch_gr_reg ();
3918 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3919 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3920 cfa_off -= 8;
3921 }
3922 reg = gen_rtx_REG (DImode, PR_REG (0));
3923 emit_move_insn (reg, alt_reg);
3924 }
3925
3926 /* Restore the application registers. */
3927
3928 /* Load the saved unat from the stack, but do not restore it until
3929 after the GRs have been restored. */
3930 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3931 {
6fb5fa3c
DB
3932 if (current_frame_info.r[reg_save_ar_unat] != 0)
3933 {
3934 ar_unat_save_reg
3935 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3936 reg_emitted (reg_save_ar_unat);
3937 }
97e242b0
RH
3938 else
3939 {
3940 alt_regno = next_scratch_gr_reg ();
3941 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3942 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 3943 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
3944 cfa_off -= 8;
3945 }
3946 }
3947 else
3948 ar_unat_save_reg = NULL_RTX;
9c808aad 3949
6fb5fa3c 3950 if (current_frame_info.r[reg_save_ar_pfs] != 0)
97e242b0 3951 {
6fb5fa3c
DB
3952 reg_emitted (reg_save_ar_pfs);
3953 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
97e242b0
RH
3954 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3955 emit_move_insn (reg, alt_reg);
3956 }
4e14f1f9 3957 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
c65ebc55 3958 {
97e242b0
RH
3959 alt_regno = next_scratch_gr_reg ();
3960 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3961 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3962 cfa_off -= 8;
3963 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3964 emit_move_insn (reg, alt_reg);
3965 }
3966
3967 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3968 {
6fb5fa3c
DB
3969 if (current_frame_info.r[reg_save_ar_lc] != 0)
3970 {
3971 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3972 reg_emitted (reg_save_ar_lc);
3973 }
97e242b0
RH
3974 else
3975 {
3976 alt_regno = next_scratch_gr_reg ();
3977 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3978 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3979 cfa_off -= 8;
3980 }
3981 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3982 emit_move_insn (reg, alt_reg);
3983 }
3984
ae1e2d4c
AS
3985 /* Restore the return pointer. */
3986 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3987 {
6fb5fa3c
DB
3988 if (current_frame_info.r[reg_save_b0] != 0)
3989 {
3990 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3991 reg_emitted (reg_save_b0);
3992 }
ae1e2d4c
AS
3993 else
3994 {
3995 alt_regno = next_scratch_gr_reg ();
3996 alt_reg = gen_rtx_REG (DImode, alt_regno);
3997 do_restore (gen_movdi_x, alt_reg, cfa_off);
3998 cfa_off -= 8;
3999 }
4000 reg = gen_rtx_REG (DImode, BR_REG (0));
4001 emit_move_insn (reg, alt_reg);
4002 }
4003
97e242b0 4004 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
4005 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4006 + current_frame_info.spill_size));
97e242b0 4007
599aedd9
RH
4008 /* The GP may be stored on the stack in the prologue, but it's
4009 never restored in the epilogue. Skip the stack slot. */
4010 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4011 cfa_off -= 8;
4012
97e242b0 4013 /* Restore all general registers. */
599aedd9 4014 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 4015 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 4016 {
97e242b0
RH
4017 reg = gen_rtx_REG (DImode, regno);
4018 do_restore (gen_gr_restore, reg, cfa_off);
4019 cfa_off -= 8;
0c96007e 4020 }
9c808aad 4021
ae1e2d4c 4022 /* Restore the branch registers. */
97e242b0
RH
4023 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4024 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 4025 {
97e242b0
RH
4026 alt_regno = next_scratch_gr_reg ();
4027 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 4028 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
4029 cfa_off -= 8;
4030 reg = gen_rtx_REG (DImode, regno);
4031 emit_move_insn (reg, alt_reg);
4032 }
c65ebc55 4033
97e242b0
RH
4034 /* Restore floating point registers. */
4035 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4036 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4037 {
e820471b 4038 gcc_assert (!(cfa_off & 15));
02befdf4 4039 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 4040 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 4041 cfa_off -= 16;
0c96007e 4042 }
97e242b0
RH
4043
4044 /* Restore ar.unat for real. */
4045 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4046 {
4047 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4048 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
4049 }
4050
e820471b 4051 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
4052
4053 finish_spill_pointers ();
c65ebc55 4054
c93646bd
JJ
4055 if (current_frame_info.total_size
4056 || cfun->machine->ia64_eh_epilogue_sp
4057 || frame_pointer_needed)
97e242b0
RH
4058 {
4059 /* ??? At this point we must generate a magic insn that appears to
4060 modify the spill iterators, the stack pointer, and the frame
4061 pointer. This would allow the most scheduling freedom. For now,
4062 just hard stop. */
4063 emit_insn (gen_blockage ());
4064 }
c65ebc55 4065
97e242b0
RH
4066 if (cfun->machine->ia64_eh_epilogue_sp)
4067 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4068 else if (frame_pointer_needed)
4069 {
4070 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4071 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 4072 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
97e242b0
RH
4073 }
4074 else if (current_frame_info.total_size)
0c96007e 4075 {
97e242b0
RH
4076 rtx offset, frame_size_rtx;
4077
4078 frame_size_rtx = GEN_INT (current_frame_info.total_size);
13f70342 4079 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
4080 offset = frame_size_rtx;
4081 else
4082 {
4083 regno = next_scratch_gr_reg ();
4084 offset = gen_rtx_REG (DImode, regno);
4085 emit_move_insn (offset, frame_size_rtx);
4086 }
4087
4088 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4089 offset));
4090
4091 RTX_FRAME_RELATED_P (insn) = 1;
5c255b57 4092 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 4093 gen_rtx_SET (stack_pointer_rtx,
5c255b57
RH
4094 gen_rtx_PLUS (DImode,
4095 stack_pointer_rtx,
4096 frame_size_rtx)));
0c96007e 4097 }
97e242b0
RH
4098
4099 if (cfun->machine->ia64_eh_epilogue_bsp)
4100 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
9c808aad 4101
2ed4af6f
RH
4102 if (! sibcall_p)
4103 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 4104 else
8206fc89
AM
4105 {
4106 int fp = GR_REG (2);
5c255b57
RH
4107 /* We need a throw away register here, r0 and r1 are reserved,
4108 so r2 is the first available call clobbered register. If
4109 there was a frame_pointer register, we may have swapped the
4110 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4111 sure we're using the string "r2" when emitting the register
4112 name for the assembler. */
6fb5fa3c
DB
4113 if (current_frame_info.r[reg_fp]
4114 && current_frame_info.r[reg_fp] == GR_REG (2))
8206fc89
AM
4115 fp = HARD_FRAME_POINTER_REGNUM;
4116
4117 /* We must emit an alloc to force the input registers to become output
4118 registers. Otherwise, if the callee tries to pass its parameters
4119 through to another call without an intervening alloc, then these
4120 values get lost. */
4121 /* ??? We don't need to preserve all input registers. We only need to
4122 preserve those input registers used as arguments to the sibling call.
4123 It is unclear how to compute that number here. */
4124 if (current_frame_info.n_input_regs != 0)
a8f5224e
DM
4125 {
4126 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
c2b40eba 4127
a8f5224e
DM
4128 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4129 const0_rtx, const0_rtx,
4130 n_inputs, const0_rtx));
4131 RTX_FRAME_RELATED_P (insn) = 1;
c2b40eba
RH
4132
4133 /* ??? We need to mark the alloc as frame-related so that it gets
4134 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4135 But there's nothing dwarf2 related to be done wrt the register
4136 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4137 the empty parallel means dwarf2out will not see anything. */
4138 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4139 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
a8f5224e 4140 }
8206fc89 4141 }
c65ebc55
JW
4142}
4143
97e242b0
RH
4144/* Return 1 if br.ret can do all the work required to return from a
4145 function. */
4146
4147int
9c808aad 4148ia64_direct_return (void)
97e242b0
RH
4149{
4150 if (reload_completed && ! frame_pointer_needed)
4151 {
4152 ia64_compute_frame_size (get_frame_size ());
4153
4154 return (current_frame_info.total_size == 0
4155 && current_frame_info.n_spilled == 0
6fb5fa3c
DB
4156 && current_frame_info.r[reg_save_b0] == 0
4157 && current_frame_info.r[reg_save_pr] == 0
4158 && current_frame_info.r[reg_save_ar_pfs] == 0
4159 && current_frame_info.r[reg_save_ar_unat] == 0
4160 && current_frame_info.r[reg_save_ar_lc] == 0);
97e242b0
RH
4161 }
4162 return 0;
4163}
4164
af1e5518
RH
4165/* Return the magic cookie that we use to hold the return address
4166 during early compilation. */
4167
4168rtx
9c808aad 4169ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
af1e5518
RH
4170{
4171 if (count != 0)
4172 return NULL;
4173 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4174}
4175
4176/* Split this value after reload, now that we know where the return
4177 address is saved. */
4178
4179void
9c808aad 4180ia64_split_return_addr_rtx (rtx dest)
af1e5518
RH
4181{
4182 rtx src;
4183
4184 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4185 {
6fb5fa3c
DB
4186 if (current_frame_info.r[reg_save_b0] != 0)
4187 {
4188 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4189 reg_emitted (reg_save_b0);
4190 }
af1e5518
RH
4191 else
4192 {
4193 HOST_WIDE_INT off;
4194 unsigned int regno;
13f70342 4195 rtx off_r;
af1e5518
RH
4196
4197 /* Compute offset from CFA for BR0. */
4198 /* ??? Must be kept in sync with ia64_expand_prologue. */
4199 off = (current_frame_info.spill_cfa_off
4200 + current_frame_info.spill_size);
4201 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4202 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4203 off -= 8;
4204
4205 /* Convert CFA offset to a register based offset. */
4206 if (frame_pointer_needed)
4207 src = hard_frame_pointer_rtx;
4208 else
4209 {
4210 src = stack_pointer_rtx;
4211 off += current_frame_info.total_size;
4212 }
4213
4214 /* Load address into scratch register. */
13f70342
RH
4215 off_r = GEN_INT (off);
4216 if (satisfies_constraint_I (off_r))
4217 emit_insn (gen_adddi3 (dest, src, off_r));
af1e5518
RH
4218 else
4219 {
13f70342 4220 emit_move_insn (dest, off_r);
af1e5518
RH
4221 emit_insn (gen_adddi3 (dest, src, dest));
4222 }
4223
4224 src = gen_rtx_MEM (Pmode, dest);
4225 }
4226 }
4227 else
4228 src = gen_rtx_REG (DImode, BR_REG (0));
4229
4230 emit_move_insn (dest, src);
4231}
4232
10c9f189 4233int
9c808aad 4234ia64_hard_regno_rename_ok (int from, int to)
10c9f189
RH
4235{
4236 /* Don't clobber any of the registers we reserved for the prologue. */
09639a83 4237 unsigned int r;
10c9f189 4238
6fb5fa3c
DB
4239 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4240 if (to == current_frame_info.r[r]
4241 || from == current_frame_info.r[r]
4242 || to == emitted_frame_related_regs[r]
4243 || from == emitted_frame_related_regs[r])
4244 return 0;
2130b7fb 4245
10c9f189
RH
4246 /* Don't use output registers outside the register frame. */
4247 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4248 return 0;
4249
4250 /* Retain even/oddness on predicate register pairs. */
4251 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4252 return (from & 1) == (to & 1);
4253
4254 return 1;
4255}
4256
301d03af
RS
4257/* Target hook for assembling integer objects. Handle word-sized
4258 aligned objects and detect the cases when @fptr is needed. */
4259
4260static bool
9c808aad 4261ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 4262{
b6a41a62 4263 if (size == POINTER_SIZE / BITS_PER_UNIT
301d03af
RS
4264 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4265 && GET_CODE (x) == SYMBOL_REF
1cdbd630 4266 && SYMBOL_REF_FUNCTION_P (x))
301d03af 4267 {
1b79dc38
DM
4268 static const char * const directive[2][2] = {
4269 /* 64-bit pointer */ /* 32-bit pointer */
4270 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4271 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4272 };
4273 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
301d03af
RS
4274 output_addr_const (asm_out_file, x);
4275 fputs (")\n", asm_out_file);
4276 return true;
4277 }
4278 return default_assemble_integer (x, size, aligned_p);
4279}
4280
c65ebc55
JW
4281/* Emit the function prologue. */
4282
08c148a8 4283static void
9c808aad 4284ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 4285{
97e242b0
RH
4286 int mask, grsave, grsave_prev;
4287
4288 if (current_frame_info.need_regstk)
4289 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4290 current_frame_info.n_input_regs,
4291 current_frame_info.n_local_regs,
4292 current_frame_info.n_output_regs,
4293 current_frame_info.n_rotate_regs);
c65ebc55 4294
d5fabb58 4295 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
0c96007e
AM
4296 return;
4297
97e242b0 4298 /* Emit the .prologue directive. */
809d4ef1 4299
97e242b0
RH
4300 mask = 0;
4301 grsave = grsave_prev = 0;
6fb5fa3c 4302 if (current_frame_info.r[reg_save_b0] != 0)
0c96007e 4303 {
97e242b0 4304 mask |= 8;
6fb5fa3c 4305 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
97e242b0 4306 }
6fb5fa3c 4307 if (current_frame_info.r[reg_save_ar_pfs] != 0
97e242b0 4308 && (grsave_prev == 0
6fb5fa3c 4309 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
97e242b0
RH
4310 {
4311 mask |= 4;
4312 if (grsave_prev == 0)
6fb5fa3c
DB
4313 grsave = current_frame_info.r[reg_save_ar_pfs];
4314 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
0c96007e 4315 }
6fb5fa3c 4316 if (current_frame_info.r[reg_fp] != 0
97e242b0 4317 && (grsave_prev == 0
6fb5fa3c 4318 || current_frame_info.r[reg_fp] == grsave_prev + 1))
97e242b0
RH
4319 {
4320 mask |= 2;
4321 if (grsave_prev == 0)
4322 grsave = HARD_FRAME_POINTER_REGNUM;
6fb5fa3c 4323 grsave_prev = current_frame_info.r[reg_fp];
97e242b0 4324 }
6fb5fa3c 4325 if (current_frame_info.r[reg_save_pr] != 0
97e242b0 4326 && (grsave_prev == 0
6fb5fa3c 4327 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
97e242b0
RH
4328 {
4329 mask |= 1;
4330 if (grsave_prev == 0)
6fb5fa3c 4331 grsave = current_frame_info.r[reg_save_pr];
97e242b0
RH
4332 }
4333
738e7b39 4334 if (mask && TARGET_GNU_AS)
97e242b0
RH
4335 fprintf (file, "\t.prologue %d, %d\n", mask,
4336 ia64_dbx_register_number (grsave));
4337 else
4338 fputs ("\t.prologue\n", file);
4339
4340 /* Emit a .spill directive, if necessary, to relocate the base of
4341 the register spill area. */
4342 if (current_frame_info.spill_cfa_off != -16)
4343 fprintf (file, "\t.spill %ld\n",
4344 (long) (current_frame_info.spill_cfa_off
4345 + current_frame_info.spill_size));
c65ebc55
JW
4346}
4347
0186257f
JW
4348/* Emit the .body directive at the scheduled end of the prologue. */
4349
b4c25db2 4350static void
9c808aad 4351ia64_output_function_end_prologue (FILE *file)
0186257f 4352{
d5fabb58 4353 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
0186257f
JW
4354 return;
4355
4356 fputs ("\t.body\n", file);
4357}
4358
c65ebc55
JW
4359/* Emit the function epilogue. */
4360
08c148a8 4361static void
9c808aad
AJ
4362ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4363 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 4364{
8a959ea5
RH
4365 int i;
4366
6fb5fa3c 4367 if (current_frame_info.r[reg_fp])
97e242b0
RH
4368 {
4369 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4370 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
4371 = reg_names[current_frame_info.r[reg_fp]];
4372 reg_names[current_frame_info.r[reg_fp]] = tmp;
4373 reg_emitted (reg_fp);
97e242b0
RH
4374 }
4375 if (! TARGET_REG_NAMES)
4376 {
97e242b0
RH
4377 for (i = 0; i < current_frame_info.n_input_regs; i++)
4378 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4379 for (i = 0; i < current_frame_info.n_local_regs; i++)
4380 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4381 for (i = 0; i < current_frame_info.n_output_regs; i++)
4382 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4383 }
8a959ea5 4384
97e242b0
RH
4385 current_frame_info.initialized = 0;
4386}
c65ebc55
JW
4387
4388int
9c808aad 4389ia64_dbx_register_number (int regno)
c65ebc55 4390{
97e242b0
RH
4391 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4392 from its home at loc79 to something inside the register frame. We
4393 must perform the same renumbering here for the debug info. */
6fb5fa3c 4394 if (current_frame_info.r[reg_fp])
97e242b0
RH
4395 {
4396 if (regno == HARD_FRAME_POINTER_REGNUM)
6fb5fa3c
DB
4397 regno = current_frame_info.r[reg_fp];
4398 else if (regno == current_frame_info.r[reg_fp])
97e242b0
RH
4399 regno = HARD_FRAME_POINTER_REGNUM;
4400 }
4401
4402 if (IN_REGNO_P (regno))
4403 return 32 + regno - IN_REG (0);
4404 else if (LOC_REGNO_P (regno))
4405 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4406 else if (OUT_REGNO_P (regno))
4407 return (32 + current_frame_info.n_input_regs
4408 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4409 else
4410 return regno;
c65ebc55
JW
4411}
4412
2a1211e5
RH
4413/* Implement TARGET_TRAMPOLINE_INIT.
4414
4415 The trampoline should set the static chain pointer to value placed
4416 into the trampoline and should branch to the specified routine.
4417 To make the normal indirect-subroutine calling convention work,
4418 the trampoline must look like a function descriptor; the first
4419 word being the target address and the second being the target's
4420 global pointer.
4421
4422 We abuse the concept of a global pointer by arranging for it
4423 to point to the data we need to load. The complete trampoline
4424 has the following form:
4425
4426 +-------------------+ \
4427 TRAMP: | __ia64_trampoline | |
4428 +-------------------+ > fake function descriptor
4429 | TRAMP+16 | |
4430 +-------------------+ /
4431 | target descriptor |
4432 +-------------------+
4433 | static link |
4434 +-------------------+
4435*/
4436
4437static void
4438ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
97e242b0 4439{
2a1211e5
RH
4440 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4441 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
97e242b0 4442
738e7b39
RK
4443 /* The Intel assembler requires that the global __ia64_trampoline symbol
4444 be declared explicitly */
4445 if (!TARGET_GNU_AS)
4446 {
4447 static bool declared_ia64_trampoline = false;
4448
4449 if (!declared_ia64_trampoline)
4450 {
4451 declared_ia64_trampoline = true;
b6a41a62
RK
4452 (*targetm.asm_out.globalize_label) (asm_out_file,
4453 "__ia64_trampoline");
738e7b39
RK
4454 }
4455 }
4456
5e89a381 4457 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2a1211e5 4458 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
5e89a381
SE
4459 fnaddr = convert_memory_address (Pmode, fnaddr);
4460 static_chain = convert_memory_address (Pmode, static_chain);
4461
97e242b0 4462 /* Load up our iterator. */
2a1211e5
RH
4463 addr_reg = copy_to_reg (addr);
4464 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
97e242b0
RH
4465
4466 /* The first two words are the fake descriptor:
4467 __ia64_trampoline, ADDR+16. */
f2972bf8
DR
4468 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4469 if (TARGET_ABI_OPEN_VMS)
4470 {
4471 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4472 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4473 relocation against function symbols to make it identical to the
4474 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4475 strict ELF and dereference to get the bare code address. */
4476 rtx reg = gen_reg_rtx (Pmode);
4477 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4478 emit_move_insn (reg, tramp);
4479 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4480 tramp = reg;
4481 }
2a1211e5 4482 emit_move_insn (m_tramp, tramp);
97e242b0 4483 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4484 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0 4485
0a81f074 4486 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
97e242b0 4487 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4488 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4489
4490 /* The third word is the target descriptor. */
2a1211e5 4491 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
97e242b0 4492 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4493 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4494
4495 /* The fourth word is the static chain. */
2a1211e5 4496 emit_move_insn (m_tramp, static_chain);
97e242b0 4497}
c65ebc55
JW
4498\f
4499/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
4500 for the last named argument which has type TYPE and mode MODE.
4501
4502 We generate the actual spill instructions during prologue generation. */
4503
351a758b 4504static void
ef4bddc2 4505ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
351a758b 4506 tree type, int * pretend_size,
9c808aad 4507 int second_time ATTRIBUTE_UNUSED)
c65ebc55 4508{
d5cc9181 4509 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
351a758b 4510
6c535c69 4511 /* Skip the current argument. */
d5cc9181 4512 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
c65ebc55 4513
351a758b 4514 if (next_cum.words < MAX_ARGUMENT_SLOTS)
26a110f5 4515 {
351a758b 4516 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
26a110f5
RH
4517 *pretend_size = n * UNITS_PER_WORD;
4518 cfun->machine->n_varargs = n;
4519 }
c65ebc55
JW
4520}
4521
4522/* Check whether TYPE is a homogeneous floating point aggregate. If
4523 it is, return the mode of the floating point type that appears
4524 in all leafs. If it is not, return VOIDmode.
4525
4526 An aggregate is a homogeneous floating point aggregate is if all
4527 fields/elements in it have the same floating point type (e.g,
3d6a9acd
RH
4528 SFmode). 128-bit quad-precision floats are excluded.
4529
4530 Variable sized aggregates should never arrive here, since we should
4531 have already decided to pass them by reference. Top-level zero-sized
4532 aggregates are excluded because our parallels crash the middle-end. */
c65ebc55 4533
ef4bddc2 4534static machine_mode
586de218 4535hfa_element_mode (const_tree type, bool nested)
c65ebc55 4536{
ef4bddc2
RS
4537 machine_mode element_mode = VOIDmode;
4538 machine_mode mode;
c65ebc55
JW
4539 enum tree_code code = TREE_CODE (type);
4540 int know_element_mode = 0;
4541 tree t;
4542
3d6a9acd
RH
4543 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4544 return VOIDmode;
4545
c65ebc55
JW
4546 switch (code)
4547 {
4548 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
0cc8f5c5 4549 case BOOLEAN_TYPE: case POINTER_TYPE:
c65ebc55 4550 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
5662a50d 4551 case LANG_TYPE: case FUNCTION_TYPE:
c65ebc55
JW
4552 return VOIDmode;
4553
4554 /* Fortran complex types are supposed to be HFAs, so we need to handle
4555 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4556 types though. */
4557 case COMPLEX_TYPE:
16448fd4 4558 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
02befdf4
ZW
4559 && TYPE_MODE (type) != TCmode)
4560 return GET_MODE_INNER (TYPE_MODE (type));
c65ebc55
JW
4561 else
4562 return VOIDmode;
4563
4564 case REAL_TYPE:
4565 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4566 mode if this is contained within an aggregate. */
02befdf4 4567 if (nested && TYPE_MODE (type) != TFmode)
c65ebc55
JW
4568 return TYPE_MODE (type);
4569 else
4570 return VOIDmode;
4571
4572 case ARRAY_TYPE:
46399021 4573 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
4574
4575 case RECORD_TYPE:
4576 case UNION_TYPE:
4577 case QUAL_UNION_TYPE:
910ad8de 4578 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
c65ebc55
JW
4579 {
4580 if (TREE_CODE (t) != FIELD_DECL)
4581 continue;
4582
4583 mode = hfa_element_mode (TREE_TYPE (t), 1);
4584 if (know_element_mode)
4585 {
4586 if (mode != element_mode)
4587 return VOIDmode;
4588 }
4589 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4590 return VOIDmode;
4591 else
4592 {
4593 know_element_mode = 1;
4594 element_mode = mode;
4595 }
4596 }
4597 return element_mode;
4598
4599 default:
4600 /* If we reach here, we probably have some front-end specific type
4601 that the backend doesn't know about. This can happen via the
4602 aggregate_value_p call in init_function_start. All we can do is
4603 ignore unknown tree types. */
4604 return VOIDmode;
4605 }
4606
4607 return VOIDmode;
4608}
4609
f57fc998
ZW
4610/* Return the number of words required to hold a quantity of TYPE and MODE
4611 when passed as an argument. */
4612static int
ef4bddc2 4613ia64_function_arg_words (const_tree type, machine_mode mode)
f57fc998
ZW
4614{
4615 int words;
4616
4617 if (mode == BLKmode)
4618 words = int_size_in_bytes (type);
4619 else
4620 words = GET_MODE_SIZE (mode);
4621
4622 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4623}
4624
4625/* Return the number of registers that should be skipped so the current
4626 argument (described by TYPE and WORDS) will be properly aligned.
4627
4628 Integer and float arguments larger than 8 bytes start at the next
4629 even boundary. Aggregates larger than 8 bytes start at the next
4630 even boundary if the aggregate has 16 byte alignment. Note that
4631 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4632 but are still to be aligned in registers.
4633
4634 ??? The ABI does not specify how to handle aggregates with
4635 alignment from 9 to 15 bytes, or greater than 16. We handle them
4636 all as if they had 16 byte alignment. Such aggregates can occur
4637 only if gcc extensions are used. */
4638static int
ffa88471
SE
4639ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4640 const_tree type, int words)
f57fc998 4641{
f2972bf8
DR
4642 /* No registers are skipped on VMS. */
4643 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
f57fc998
ZW
4644 return 0;
4645
4646 if (type
4647 && TREE_CODE (type) != INTEGER_TYPE
4648 && TREE_CODE (type) != REAL_TYPE)
4649 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4650 else
4651 return words > 1;
4652}
4653
c65ebc55
JW
4654/* Return rtx for register where argument is passed, or zero if it is passed
4655 on the stack. */
c65ebc55
JW
4656/* ??? 128-bit quad-precision floats are always passed in general
4657 registers. */
4658
ffa88471 4659static rtx
ef4bddc2 4660ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
ffa88471 4661 const_tree type, bool named, bool incoming)
c65ebc55 4662{
d5cc9181
JR
4663 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4664
c65ebc55 4665 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
f57fc998
ZW
4666 int words = ia64_function_arg_words (type, mode);
4667 int offset = ia64_function_arg_offset (cum, type, words);
ef4bddc2 4668 machine_mode hfa_mode = VOIDmode;
c65ebc55 4669
f2972bf8
DR
4670 /* For OPEN VMS, emit the instruction setting up the argument register here,
4671 when we know this will be together with the other arguments setup related
4672 insns. This is not the conceptually best place to do this, but this is
4673 the easiest as we have convenient access to cumulative args info. */
4674
4675 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4676 && named == 1)
4677 {
4678 unsigned HOST_WIDE_INT regval = cum->words;
4679 int i;
4680
4681 for (i = 0; i < 8; i++)
4682 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4683
4684 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4685 GEN_INT (regval));
4686 }
4687
c65ebc55
JW
4688 /* If all argument slots are used, then it must go on the stack. */
4689 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4690 return 0;
4691
472b8fdc
TG
4692 /* On OpenVMS argument is either in Rn or Fn. */
4693 if (TARGET_ABI_OPEN_VMS)
4694 {
4695 if (FLOAT_MODE_P (mode))
4696 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4697 else
4698 return gen_rtx_REG (mode, basereg + cum->words);
4699 }
4700
c65ebc55
JW
4701 /* Check for and handle homogeneous FP aggregates. */
4702 if (type)
4703 hfa_mode = hfa_element_mode (type, 0);
4704
4705 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4706 and unprototyped hfas are passed specially. */
4707 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4708 {
4709 rtx loc[16];
4710 int i = 0;
4711 int fp_regs = cum->fp_regs;
4712 int int_regs = cum->words + offset;
4713 int hfa_size = GET_MODE_SIZE (hfa_mode);
4714 int byte_size;
4715 int args_byte_size;
4716
4717 /* If prototyped, pass it in FR regs then GR regs.
4718 If not prototyped, pass it in both FR and GR regs.
4719
4720 If this is an SFmode aggregate, then it is possible to run out of
4721 FR regs while GR regs are still left. In that case, we pass the
4722 remaining part in the GR regs. */
4723
4724 /* Fill the FP regs. We do this always. We stop if we reach the end
4725 of the argument, the last FP register, or the last argument slot. */
4726
4727 byte_size = ((mode == BLKmode)
4728 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4729 args_byte_size = int_regs * UNITS_PER_WORD;
4730 offset = 0;
4731 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4732 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4733 {
4734 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4735 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4736 + fp_regs)),
4737 GEN_INT (offset));
c65ebc55
JW
4738 offset += hfa_size;
4739 args_byte_size += hfa_size;
4740 fp_regs++;
4741 }
4742
4743 /* If no prototype, then the whole thing must go in GR regs. */
4744 if (! cum->prototype)
4745 offset = 0;
4746 /* If this is an SFmode aggregate, then we might have some left over
4747 that needs to go in GR regs. */
4748 else if (byte_size != offset)
4749 int_regs += offset / UNITS_PER_WORD;
4750
4751 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4752
4753 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4754 {
ef4bddc2 4755 machine_mode gr_mode = DImode;
826b47cc 4756 unsigned int gr_size;
c65ebc55
JW
4757
4758 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4759 then this goes in a GR reg left adjusted/little endian, right
4760 adjusted/big endian. */
4761 /* ??? Currently this is handled wrong, because 4-byte hunks are
4762 always right adjusted/little endian. */
4763 if (offset & 0x4)
4764 gr_mode = SImode;
4765 /* If we have an even 4 byte hunk because the aggregate is a
4766 multiple of 4 bytes in size, then this goes in a GR reg right
4767 adjusted/little endian. */
4768 else if (byte_size - offset == 4)
4769 gr_mode = SImode;
4770
4771 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4772 gen_rtx_REG (gr_mode, (basereg
4773 + int_regs)),
4774 GEN_INT (offset));
826b47cc
ZW
4775
4776 gr_size = GET_MODE_SIZE (gr_mode);
4777 offset += gr_size;
4778 if (gr_size == UNITS_PER_WORD
4779 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4780 int_regs++;
4781 else if (gr_size > UNITS_PER_WORD)
4782 int_regs += gr_size / UNITS_PER_WORD;
c65ebc55 4783 }
9dec91d4 4784 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4785 }
f2972bf8 4786
c65ebc55
JW
4787 /* Integral and aggregates go in general registers. If we have run out of
4788 FR registers, then FP values must also go in general registers. This can
4789 happen when we have a SFmode HFA. */
02befdf4
ZW
4790 else if (mode == TFmode || mode == TCmode
4791 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
4792 {
4793 int byte_size = ((mode == BLKmode)
4794 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4795 if (BYTES_BIG_ENDIAN
4796 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4797 && byte_size < UNITS_PER_WORD
4798 && byte_size > 0)
4799 {
4800 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4801 gen_rtx_REG (DImode,
4802 (basereg + cum->words
4803 + offset)),
4804 const0_rtx);
4805 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4806 }
4807 else
4808 return gen_rtx_REG (mode, basereg + cum->words + offset);
4809
4810 }
c65ebc55
JW
4811
4812 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4813 named, and in a GR register when unnamed. */
c65ebc55
JW
4814 else if (cum->prototype)
4815 {
f9c887ac 4816 if (named)
c65ebc55 4817 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
f9c887ac
ZW
4818 /* In big-endian mode, an anonymous SFmode value must be represented
4819 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4820 the value into the high half of the general register. */
4821 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4822 return gen_rtx_PARALLEL (mode,
4823 gen_rtvec (1,
4824 gen_rtx_EXPR_LIST (VOIDmode,
4825 gen_rtx_REG (DImode, basereg + cum->words + offset),
4826 const0_rtx)));
4827 else
4828 return gen_rtx_REG (mode, basereg + cum->words + offset);
c65ebc55
JW
4829 }
4830 /* If there is no prototype, then FP values go in both FR and GR
4831 registers. */
4832 else
4833 {
f9c887ac 4834 /* See comment above. */
ef4bddc2 4835 machine_mode inner_mode =
f9c887ac
ZW
4836 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4837
c65ebc55
JW
4838 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4839 gen_rtx_REG (mode, (FR_ARG_FIRST
4840 + cum->fp_regs)),
4841 const0_rtx);
4842 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
f9c887ac 4843 gen_rtx_REG (inner_mode,
c65ebc55
JW
4844 (basereg + cum->words
4845 + offset)),
4846 const0_rtx);
809d4ef1 4847
c65ebc55
JW
4848 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4849 }
4850}
4851
ffa88471
SE
4852/* Implement TARGET_FUNCION_ARG target hook. */
4853
4854static rtx
ef4bddc2 4855ia64_function_arg (cumulative_args_t cum, machine_mode mode,
ffa88471
SE
4856 const_tree type, bool named)
4857{
4858 return ia64_function_arg_1 (cum, mode, type, named, false);
4859}
4860
4861/* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4862
4863static rtx
d5cc9181 4864ia64_function_incoming_arg (cumulative_args_t cum,
ef4bddc2 4865 machine_mode mode,
ffa88471
SE
4866 const_tree type, bool named)
4867{
4868 return ia64_function_arg_1 (cum, mode, type, named, true);
4869}
4870
78a52f11 4871/* Return number of bytes, at the beginning of the argument, that must be
c65ebc55
JW
4872 put in registers. 0 is the argument is entirely in registers or entirely
4873 in memory. */
4874
78a52f11 4875static int
ef4bddc2 4876ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
78a52f11 4877 tree type, bool named ATTRIBUTE_UNUSED)
c65ebc55 4878{
d5cc9181
JR
4879 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4880
f57fc998
ZW
4881 int words = ia64_function_arg_words (type, mode);
4882 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4883
4884 /* If all argument slots are used, then it must go on the stack. */
4885 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4886 return 0;
4887
4888 /* It doesn't matter whether the argument goes in FR or GR regs. If
4889 it fits within the 8 argument slots, then it goes entirely in
4890 registers. If it extends past the last argument slot, then the rest
4891 goes on the stack. */
4892
4893 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4894 return 0;
4895
78a52f11 4896 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
c65ebc55
JW
4897}
4898
f2972bf8
DR
4899/* Return ivms_arg_type based on machine_mode. */
4900
4901static enum ivms_arg_type
ef4bddc2 4902ia64_arg_type (machine_mode mode)
f2972bf8
DR
4903{
4904 switch (mode)
4905 {
4906 case SFmode:
4907 return FS;
4908 case DFmode:
4909 return FT;
4910 default:
4911 return I64;
4912 }
4913}
4914
c65ebc55
JW
4915/* Update CUM to point after this argument. This is patterned after
4916 ia64_function_arg. */
4917
ffa88471 4918static void
ef4bddc2 4919ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
ffa88471 4920 const_tree type, bool named)
c65ebc55 4921{
d5cc9181 4922 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
f57fc998
ZW
4923 int words = ia64_function_arg_words (type, mode);
4924 int offset = ia64_function_arg_offset (cum, type, words);
ef4bddc2 4925 machine_mode hfa_mode = VOIDmode;
c65ebc55
JW
4926
4927 /* If all arg slots are already full, then there is nothing to do. */
4928 if (cum->words >= MAX_ARGUMENT_SLOTS)
f2972bf8
DR
4929 {
4930 cum->words += words + offset;
4931 return;
4932 }
c65ebc55 4933
f2972bf8 4934 cum->atypes[cum->words] = ia64_arg_type (mode);
c65ebc55
JW
4935 cum->words += words + offset;
4936
472b8fdc
TG
4937 /* On OpenVMS argument is either in Rn or Fn. */
4938 if (TARGET_ABI_OPEN_VMS)
4939 {
4940 cum->int_regs = cum->words;
4941 cum->fp_regs = cum->words;
4942 return;
4943 }
4944
c65ebc55
JW
4945 /* Check for and handle homogeneous FP aggregates. */
4946 if (type)
4947 hfa_mode = hfa_element_mode (type, 0);
4948
4949 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4950 and unprototyped hfas are passed specially. */
4951 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4952 {
4953 int fp_regs = cum->fp_regs;
4954 /* This is the original value of cum->words + offset. */
4955 int int_regs = cum->words - words;
4956 int hfa_size = GET_MODE_SIZE (hfa_mode);
4957 int byte_size;
4958 int args_byte_size;
4959
4960 /* If prototyped, pass it in FR regs then GR regs.
4961 If not prototyped, pass it in both FR and GR regs.
4962
4963 If this is an SFmode aggregate, then it is possible to run out of
4964 FR regs while GR regs are still left. In that case, we pass the
4965 remaining part in the GR regs. */
4966
4967 /* Fill the FP regs. We do this always. We stop if we reach the end
4968 of the argument, the last FP register, or the last argument slot. */
4969
4970 byte_size = ((mode == BLKmode)
4971 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4972 args_byte_size = int_regs * UNITS_PER_WORD;
4973 offset = 0;
4974 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4975 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4976 {
c65ebc55
JW
4977 offset += hfa_size;
4978 args_byte_size += hfa_size;
4979 fp_regs++;
4980 }
4981
4982 cum->fp_regs = fp_regs;
4983 }
4984
d13256a3
SE
4985 /* Integral and aggregates go in general registers. So do TFmode FP values.
4986 If we have run out of FR registers, then other FP values must also go in
4987 general registers. This can happen when we have a SFmode HFA. */
4988 else if (mode == TFmode || mode == TCmode
4989 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
648fe28b 4990 cum->int_regs = cum->words;
c65ebc55
JW
4991
4992 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4993 named, and in a GR register when unnamed. */
c65ebc55
JW
4994 else if (cum->prototype)
4995 {
4996 if (! named)
648fe28b 4997 cum->int_regs = cum->words;
c65ebc55
JW
4998 else
4999 /* ??? Complex types should not reach here. */
5000 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5001 }
5002 /* If there is no prototype, then FP values go in both FR and GR
5003 registers. */
5004 else
9c808aad 5005 {
648fe28b
RH
5006 /* ??? Complex types should not reach here. */
5007 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5008 cum->int_regs = cum->words;
5009 }
c65ebc55 5010}
51dcde6f 5011
d13256a3 5012/* Arguments with alignment larger than 8 bytes start at the next even
93348822 5013 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
d13256a3
SE
5014 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5015
c2ed6cf8 5016static unsigned int
ef4bddc2 5017ia64_function_arg_boundary (machine_mode mode, const_tree type)
d13256a3 5018{
d13256a3
SE
5019 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5020 return PARM_BOUNDARY * 2;
5021
5022 if (type)
5023 {
5024 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5025 return PARM_BOUNDARY * 2;
5026 else
5027 return PARM_BOUNDARY;
5028 }
5029
5030 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5031 return PARM_BOUNDARY * 2;
5032 else
5033 return PARM_BOUNDARY;
5034}
5035
599aedd9
RH
5036/* True if it is OK to do sibling call optimization for the specified
5037 call expression EXP. DECL will be the called function, or NULL if
5038 this is an indirect call. */
5039static bool
9c808aad 5040ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
599aedd9 5041{
097f3d48
JW
5042 /* We can't perform a sibcall if the current function has the syscall_linkage
5043 attribute. */
5044 if (lookup_attribute ("syscall_linkage",
5045 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5046 return false;
5047
b23ba0b8 5048 /* We must always return with our current GP. This means we can
c208436c
SE
5049 only sibcall to functions defined in the current module unless
5050 TARGET_CONST_GP is set to true. */
5051 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
599aedd9 5052}
c65ebc55 5053\f
c65ebc55
JW
5054
5055/* Implement va_arg. */
5056
23a60a04 5057static tree
726a989a
RB
5058ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5059 gimple_seq *post_p)
cd3ce9b4 5060{
cd3ce9b4 5061 /* Variable sized types are passed by reference. */
08b0dc1b 5062 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
cd3ce9b4 5063 {
23a60a04
JM
5064 tree ptrtype = build_pointer_type (type);
5065 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
c2433d7d 5066 return build_va_arg_indirect_ref (addr);
cd3ce9b4
JM
5067 }
5068
5069 /* Aggregate arguments with alignment larger than 8 bytes start at
5070 the next even boundary. Integer and floating point arguments
5071 do so if they are larger than 8 bytes, whether or not they are
5072 also aligned larger than 8 bytes. */
5073 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5074 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5075 {
5d49b6a7 5076 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
47a25a46 5077 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5d49b6a7 5078 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
726a989a 5079 gimplify_assign (unshare_expr (valist), t, pre_p);
cd3ce9b4
JM
5080 }
5081
23a60a04 5082 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4 5083}
c65ebc55
JW
5084\f
5085/* Return 1 if function return value returned in memory. Return 0 if it is
5086 in a register. */
5087
351a758b 5088static bool
586de218 5089ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
c65ebc55 5090{
ef4bddc2
RS
5091 machine_mode mode;
5092 machine_mode hfa_mode;
487b97e0 5093 HOST_WIDE_INT byte_size;
c65ebc55
JW
5094
5095 mode = TYPE_MODE (valtype);
487b97e0
RH
5096 byte_size = GET_MODE_SIZE (mode);
5097 if (mode == BLKmode)
5098 {
5099 byte_size = int_size_in_bytes (valtype);
5100 if (byte_size < 0)
351a758b 5101 return true;
487b97e0 5102 }
c65ebc55
JW
5103
5104 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5105
5106 hfa_mode = hfa_element_mode (valtype, 0);
5107 if (hfa_mode != VOIDmode)
5108 {
5109 int hfa_size = GET_MODE_SIZE (hfa_mode);
5110
c65ebc55 5111 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
351a758b 5112 return true;
c65ebc55 5113 else
351a758b 5114 return false;
c65ebc55 5115 }
c65ebc55 5116 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
351a758b 5117 return true;
c65ebc55 5118 else
351a758b 5119 return false;
c65ebc55
JW
5120}
5121
5122/* Return rtx for register that holds the function return value. */
5123
ba90d838
AS
5124static rtx
5125ia64_function_value (const_tree valtype,
5126 const_tree fn_decl_or_type,
5127 bool outgoing ATTRIBUTE_UNUSED)
c65ebc55 5128{
ef4bddc2
RS
5129 machine_mode mode;
5130 machine_mode hfa_mode;
f2972bf8 5131 int unsignedp;
ba90d838 5132 const_tree func = fn_decl_or_type;
c65ebc55 5133
ba90d838
AS
5134 if (fn_decl_or_type
5135 && !DECL_P (fn_decl_or_type))
5136 func = NULL;
5137
c65ebc55
JW
5138 mode = TYPE_MODE (valtype);
5139 hfa_mode = hfa_element_mode (valtype, 0);
5140
5141 if (hfa_mode != VOIDmode)
5142 {
5143 rtx loc[8];
5144 int i;
5145 int hfa_size;
5146 int byte_size;
5147 int offset;
5148
5149 hfa_size = GET_MODE_SIZE (hfa_mode);
5150 byte_size = ((mode == BLKmode)
5151 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5152 offset = 0;
5153 for (i = 0; offset < byte_size; i++)
5154 {
5155 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5156 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5157 GEN_INT (offset));
c65ebc55
JW
5158 offset += hfa_size;
5159 }
9dec91d4 5160 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 5161 }
f57fc998 5162 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
c65ebc55
JW
5163 return gen_rtx_REG (mode, FR_ARG_FIRST);
5164 else
3870df96 5165 {
8c5cacfd
RH
5166 bool need_parallel = false;
5167
5168 /* In big-endian mode, we need to manage the layout of aggregates
5169 in the registers so that we get the bits properly aligned in
5170 the highpart of the registers. */
3870df96
SE
5171 if (BYTES_BIG_ENDIAN
5172 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
8c5cacfd
RH
5173 need_parallel = true;
5174
5175 /* Something like struct S { long double x; char a[0] } is not an
5176 HFA structure, and therefore doesn't go in fp registers. But
5177 the middle-end will give it XFmode anyway, and XFmode values
5178 don't normally fit in integer registers. So we need to smuggle
5179 the value inside a parallel. */
4de67c26 5180 else if (mode == XFmode || mode == XCmode || mode == RFmode)
8c5cacfd
RH
5181 need_parallel = true;
5182
5183 if (need_parallel)
3870df96
SE
5184 {
5185 rtx loc[8];
5186 int offset;
5187 int bytesize;
5188 int i;
5189
5190 offset = 0;
5191 bytesize = int_size_in_bytes (valtype);
543144ed
JM
5192 /* An empty PARALLEL is invalid here, but the return value
5193 doesn't matter for empty structs. */
5194 if (bytesize == 0)
5195 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96
SE
5196 for (i = 0; offset < bytesize; i++)
5197 {
5198 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5199 gen_rtx_REG (DImode,
5200 GR_RET_FIRST + i),
5201 GEN_INT (offset));
5202 offset += UNITS_PER_WORD;
5203 }
5204 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5205 }
8c5cacfd 5206
8ee95727
TG
5207 mode = promote_function_mode (valtype, mode, &unsignedp,
5208 func ? TREE_TYPE (func) : NULL_TREE,
5209 true);
f2972bf8 5210
8c5cacfd 5211 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96 5212 }
c65ebc55
JW
5213}
5214
ba90d838
AS
5215/* Worker function for TARGET_LIBCALL_VALUE. */
5216
5217static rtx
ef4bddc2 5218ia64_libcall_value (machine_mode mode,
ba90d838
AS
5219 const_rtx fun ATTRIBUTE_UNUSED)
5220{
5221 return gen_rtx_REG (mode,
5222 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5223 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5224 && (mode) != TFmode)
5225 ? FR_RET_FIRST : GR_RET_FIRST));
5226}
5227
5228/* Worker function for FUNCTION_VALUE_REGNO_P. */
5229
5230static bool
5231ia64_function_value_regno_p (const unsigned int regno)
5232{
5233 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5234 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5235}
5236
fdbe66f2 5237/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6b2300b3
JJ
5238 We need to emit DTP-relative relocations. */
5239
fdbe66f2 5240static void
9c808aad 5241ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
6b2300b3 5242{
6f3113ed
SE
5243 gcc_assert (size == 4 || size == 8);
5244 if (size == 4)
5245 fputs ("\tdata4.ua\t@dtprel(", file);
5246 else
5247 fputs ("\tdata8.ua\t@dtprel(", file);
6b2300b3
JJ
5248 output_addr_const (file, x);
5249 fputs (")", file);
5250}
5251
c65ebc55
JW
5252/* Print a memory address as an operand to reference that memory location. */
5253
5254/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5255 also call this from ia64_print_operand for memory addresses. */
5256
5e50b799 5257static void
9c808aad
AJ
5258ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5259 rtx address ATTRIBUTE_UNUSED)
c65ebc55
JW
5260{
5261}
5262
3569057d 5263/* Print an operand to an assembler instruction.
c65ebc55
JW
5264 C Swap and print a comparison operator.
5265 D Print an FP comparison operator.
5266 E Print 32 - constant, for SImode shifts as extract.
66db6b45 5267 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
5268 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5269 a floating point register emitted normally.
735b94a7 5270 G A floating point constant.
c65ebc55 5271 I Invert a predicate register by adding 1.
e5bde68a 5272 J Select the proper predicate register for a condition.
6b6c1201 5273 j Select the inverse predicate register for a condition.
c65ebc55
JW
5274 O Append .acq for volatile load.
5275 P Postincrement of a MEM.
5276 Q Append .rel for volatile store.
4883241c 5277 R Print .s .d or nothing for a single, double or no truncation.
c65ebc55
JW
5278 S Shift amount for shladd instruction.
5279 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5280 for Intel assembler.
5281 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5282 for Intel assembler.
a71aef0b 5283 X A pair of floating point registers.
c65ebc55 5284 r Print register name, or constant 0 as r0. HP compatibility for
f61134e8
RH
5285 Linux kernel.
5286 v Print vector constant value as an 8-byte integer value. */
5287
5e50b799 5288static void
9c808aad 5289ia64_print_operand (FILE * file, rtx x, int code)
c65ebc55 5290{
e57b9d65
RH
5291 const char *str;
5292
c65ebc55
JW
5293 switch (code)
5294 {
c65ebc55
JW
5295 case 0:
5296 /* Handled below. */
5297 break;
809d4ef1 5298
c65ebc55
JW
5299 case 'C':
5300 {
5301 enum rtx_code c = swap_condition (GET_CODE (x));
5302 fputs (GET_RTX_NAME (c), file);
5303 return;
5304 }
5305
5306 case 'D':
e57b9d65
RH
5307 switch (GET_CODE (x))
5308 {
5309 case NE:
5310 str = "neq";
5311 break;
5312 case UNORDERED:
5313 str = "unord";
5314 break;
5315 case ORDERED:
5316 str = "ord";
5317 break;
86ad1da0
SE
5318 case UNLT:
5319 str = "nge";
5320 break;
5321 case UNLE:
5322 str = "ngt";
5323 break;
5324 case UNGT:
5325 str = "nle";
5326 break;
5327 case UNGE:
5328 str = "nlt";
5329 break;
8fc53a5f
EB
5330 case UNEQ:
5331 case LTGT:
5332 gcc_unreachable ();
e57b9d65
RH
5333 default:
5334 str = GET_RTX_NAME (GET_CODE (x));
5335 break;
5336 }
5337 fputs (str, file);
c65ebc55
JW
5338 return;
5339
5340 case 'E':
5341 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5342 return;
5343
66db6b45
RH
5344 case 'e':
5345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5346 return;
5347
c65ebc55
JW
5348 case 'F':
5349 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 5350 str = reg_names [FR_REG (0)];
c65ebc55 5351 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 5352 str = reg_names [FR_REG (1)];
c65ebc55 5353 else
e820471b
NS
5354 {
5355 gcc_assert (GET_CODE (x) == REG);
5356 str = reg_names [REGNO (x)];
5357 }
e57b9d65 5358 fputs (str, file);
c65ebc55
JW
5359 return;
5360
735b94a7
SE
5361 case 'G':
5362 {
5363 long val[4];
5364 REAL_VALUE_TYPE rv;
5365 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5366 real_to_target (val, &rv, GET_MODE (x));
5367 if (GET_MODE (x) == SFmode)
5368 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5369 else if (GET_MODE (x) == DFmode)
5370 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5371 & 0xffffffff,
5372 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5373 & 0xffffffff);
5374 else
5375 output_operand_lossage ("invalid %%G mode");
5376 }
5377 return;
5378
c65ebc55
JW
5379 case 'I':
5380 fputs (reg_names [REGNO (x) + 1], file);
5381 return;
5382
e5bde68a 5383 case 'J':
6b6c1201
RH
5384 case 'j':
5385 {
5386 unsigned int regno = REGNO (XEXP (x, 0));
5387 if (GET_CODE (x) == EQ)
5388 regno += 1;
5389 if (code == 'j')
5390 regno ^= 1;
5391 fputs (reg_names [regno], file);
5392 }
e5bde68a
RH
5393 return;
5394
c65ebc55
JW
5395 case 'O':
5396 if (MEM_VOLATILE_P (x))
5397 fputs(".acq", file);
5398 return;
5399
5400 case 'P':
5401 {
4b983fdc 5402 HOST_WIDE_INT value;
c65ebc55 5403
4b983fdc
RH
5404 switch (GET_CODE (XEXP (x, 0)))
5405 {
5406 default:
5407 return;
5408
5409 case POST_MODIFY:
5410 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5411 if (GET_CODE (x) == CONST_INT)
08012cda 5412 value = INTVAL (x);
e820471b 5413 else
4b983fdc 5414 {
e820471b 5415 gcc_assert (GET_CODE (x) == REG);
08012cda 5416 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
5417 return;
5418 }
4b983fdc 5419 break;
c65ebc55 5420
4b983fdc
RH
5421 case POST_INC:
5422 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 5423 break;
c65ebc55 5424
4b983fdc 5425 case POST_DEC:
08012cda 5426 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
5427 break;
5428 }
809d4ef1 5429
4a0a75dd 5430 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
5431 return;
5432 }
5433
5434 case 'Q':
5435 if (MEM_VOLATILE_P (x))
5436 fputs(".rel", file);
5437 return;
5438
4883241c
SE
5439 case 'R':
5440 if (x == CONST0_RTX (GET_MODE (x)))
5441 fputs(".s", file);
5442 else if (x == CONST1_RTX (GET_MODE (x)))
5443 fputs(".d", file);
5444 else if (x == CONST2_RTX (GET_MODE (x)))
5445 ;
5446 else
5447 output_operand_lossage ("invalid %%R value");
5448 return;
5449
c65ebc55 5450 case 'S':
809d4ef1 5451 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
5452 return;
5453
5454 case 'T':
5455 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5456 {
809d4ef1 5457 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
5458 return;
5459 }
5460 break;
5461
5462 case 'U':
5463 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5464 {
3b572406 5465 const char *prefix = "0x";
c65ebc55
JW
5466 if (INTVAL (x) & 0x80000000)
5467 {
5468 fprintf (file, "0xffffffff");
5469 prefix = "";
5470 }
809d4ef1 5471 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
5472 return;
5473 }
5474 break;
809d4ef1 5475
a71aef0b
JB
5476 case 'X':
5477 {
5478 unsigned int regno = REGNO (x);
5479 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5480 }
5481 return;
5482
c65ebc55 5483 case 'r':
18a3c539
JW
5484 /* If this operand is the constant zero, write it as register zero.
5485 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
5486 if (GET_CODE (x) == REG)
5487 fputs (reg_names[REGNO (x)], file);
5488 else if (x == CONST0_RTX (GET_MODE (x)))
5489 fputs ("r0", file);
18a3c539
JW
5490 else if (GET_CODE (x) == CONST_INT)
5491 output_addr_const (file, x);
c65ebc55
JW
5492 else
5493 output_operand_lossage ("invalid %%r value");
5494 return;
5495
f61134e8
RH
5496 case 'v':
5497 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5498 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5499 break;
5500
85548039
RH
5501 case '+':
5502 {
5503 const char *which;
9c808aad 5504
85548039
RH
5505 /* For conditional branches, returns or calls, substitute
5506 sptk, dptk, dpnt, or spnt for %s. */
5507 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5508 if (x)
5509 {
e5af9ddd 5510 int pred_val = XINT (x, 0);
85548039
RH
5511
5512 /* Guess top and bottom 10% statically predicted. */
2c9e13f3
JH
5513 if (pred_val < REG_BR_PROB_BASE / 50
5514 && br_prob_note_reliable_p (x))
85548039
RH
5515 which = ".spnt";
5516 else if (pred_val < REG_BR_PROB_BASE / 2)
5517 which = ".dpnt";
2c9e13f3
JH
5518 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5519 || !br_prob_note_reliable_p (x))
85548039
RH
5520 which = ".dptk";
5521 else
5522 which = ".sptk";
5523 }
b64925dc 5524 else if (CALL_P (current_output_insn))
85548039
RH
5525 which = ".sptk";
5526 else
5527 which = ".dptk";
5528
5529 fputs (which, file);
5530 return;
5531 }
5532
6f8aa100
RH
5533 case ',':
5534 x = current_insn_predicate;
5535 if (x)
5536 {
5537 unsigned int regno = REGNO (XEXP (x, 0));
5538 if (GET_CODE (x) == EQ)
5539 regno += 1;
6f8aa100
RH
5540 fprintf (file, "(%s) ", reg_names [regno]);
5541 }
5542 return;
5543
c65ebc55
JW
5544 default:
5545 output_operand_lossage ("ia64_print_operand: unknown code");
5546 return;
5547 }
5548
5549 switch (GET_CODE (x))
5550 {
5551 /* This happens for the spill/restore instructions. */
5552 case POST_INC:
4b983fdc
RH
5553 case POST_DEC:
5554 case POST_MODIFY:
c65ebc55 5555 x = XEXP (x, 0);
ed168e45 5556 /* ... fall through ... */
c65ebc55
JW
5557
5558 case REG:
5559 fputs (reg_names [REGNO (x)], file);
5560 break;
5561
5562 case MEM:
5563 {
5564 rtx addr = XEXP (x, 0);
ec8e098d 5565 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
c65ebc55
JW
5566 addr = XEXP (addr, 0);
5567 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5568 break;
5569 }
809d4ef1 5570
c65ebc55
JW
5571 default:
5572 output_addr_const (file, x);
5573 break;
5574 }
5575
5576 return;
5577}
5e50b799
AS
5578
5579/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5580
5581static bool
5582ia64_print_operand_punct_valid_p (unsigned char code)
5583{
5584 return (code == '+' || code == ',');
5585}
c65ebc55 5586\f
3c50106f
RH
5587/* Compute a (partial) cost for rtx X. Return true if the complete
5588 cost has been computed, and false if subexpressions should be
5589 scanned. In either case, *TOTAL contains the cost result. */
5590/* ??? This is incomplete. */
5591
5592static bool
68f932c4
RS
5593ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5594 int *total, bool speed ATTRIBUTE_UNUSED)
3c50106f
RH
5595{
5596 switch (code)
5597 {
5598 case CONST_INT:
5599 switch (outer_code)
5600 {
5601 case SET:
13f70342 5602 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
3c50106f
RH
5603 return true;
5604 case PLUS:
13f70342 5605 if (satisfies_constraint_I (x))
3c50106f 5606 *total = 0;
13f70342 5607 else if (satisfies_constraint_J (x))
3c50106f
RH
5608 *total = 1;
5609 else
5610 *total = COSTS_N_INSNS (1);
5611 return true;
5612 default:
13f70342 5613 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
3c50106f
RH
5614 *total = 0;
5615 else
5616 *total = COSTS_N_INSNS (1);
5617 return true;
5618 }
5619
5620 case CONST_DOUBLE:
5621 *total = COSTS_N_INSNS (1);
5622 return true;
5623
5624 case CONST:
5625 case SYMBOL_REF:
5626 case LABEL_REF:
5627 *total = COSTS_N_INSNS (3);
5628 return true;
5629
f19f1e5e
RH
5630 case FMA:
5631 *total = COSTS_N_INSNS (4);
5632 return true;
5633
3c50106f
RH
5634 case MULT:
5635 /* For multiplies wider than HImode, we have to go to the FPU,
5636 which normally involves copies. Plus there's the latency
5637 of the multiply itself, and the latency of the instructions to
5638 transfer integer regs to FP regs. */
f19f1e5e
RH
5639 if (FLOAT_MODE_P (GET_MODE (x)))
5640 *total = COSTS_N_INSNS (4);
5641 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
3c50106f
RH
5642 *total = COSTS_N_INSNS (10);
5643 else
5644 *total = COSTS_N_INSNS (2);
5645 return true;
5646
5647 case PLUS:
5648 case MINUS:
f19f1e5e
RH
5649 if (FLOAT_MODE_P (GET_MODE (x)))
5650 {
5651 *total = COSTS_N_INSNS (4);
5652 return true;
5653 }
5654 /* FALLTHRU */
5655
3c50106f
RH
5656 case ASHIFT:
5657 case ASHIFTRT:
5658 case LSHIFTRT:
5659 *total = COSTS_N_INSNS (1);
5660 return true;
5661
5662 case DIV:
5663 case UDIV:
5664 case MOD:
5665 case UMOD:
5666 /* We make divide expensive, so that divide-by-constant will be
5667 optimized to a multiply. */
5668 *total = COSTS_N_INSNS (60);
5669 return true;
5670
5671 default:
5672 return false;
5673 }
5674}
5675
9e4f94de 5676/* Calculate the cost of moving data from a register in class FROM to
7109d286 5677 one in class TO, using MODE. */
5527bf14 5678
de8f4b07 5679static int
ef4bddc2 5680ia64_register_move_cost (machine_mode mode, reg_class_t from,
6f76a878 5681 reg_class_t to)
a87cf97e 5682{
7109d286
RH
5683 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5684 if (to == ADDL_REGS)
5685 to = GR_REGS;
5686 if (from == ADDL_REGS)
5687 from = GR_REGS;
5688
5689 /* All costs are symmetric, so reduce cases by putting the
5690 lower number class as the destination. */
5691 if (from < to)
5692 {
6f76a878 5693 reg_class_t tmp = to;
7109d286
RH
5694 to = from, from = tmp;
5695 }
5696
02befdf4 5697 /* Moving from FR<->GR in XFmode must be more expensive than 2,
7109d286 5698 so that we get secondary memory reloads. Between FR_REGS,
69e18c09 5699 we have to make this at least as expensive as memory_move_cost
7109d286 5700 to avoid spectacularly poor register class preferencing. */
4de67c26 5701 if (mode == XFmode || mode == RFmode)
7109d286
RH
5702 {
5703 if (to != GR_REGS || from != GR_REGS)
69e18c09 5704 return memory_move_cost (mode, to, false);
7109d286
RH
5705 else
5706 return 3;
5707 }
5708
5709 switch (to)
5710 {
5711 case PR_REGS:
5712 /* Moving between PR registers takes two insns. */
5713 if (from == PR_REGS)
5714 return 3;
5715 /* Moving between PR and anything but GR is impossible. */
5716 if (from != GR_REGS)
69e18c09 5717 return memory_move_cost (mode, to, false);
7109d286
RH
5718 break;
5719
5720 case BR_REGS:
5721 /* Moving between BR and anything but GR is impossible. */
5722 if (from != GR_REGS && from != GR_AND_BR_REGS)
69e18c09 5723 return memory_move_cost (mode, to, false);
7109d286
RH
5724 break;
5725
5726 case AR_I_REGS:
5727 case AR_M_REGS:
5728 /* Moving between AR and anything but GR is impossible. */
5729 if (from != GR_REGS)
69e18c09 5730 return memory_move_cost (mode, to, false);
7109d286
RH
5731 break;
5732
5733 case GR_REGS:
5734 case FR_REGS:
a71aef0b 5735 case FP_REGS:
7109d286
RH
5736 case GR_AND_FR_REGS:
5737 case GR_AND_BR_REGS:
5738 case ALL_REGS:
5739 break;
5740
5741 default:
e820471b 5742 gcc_unreachable ();
7109d286 5743 }
3f622353 5744
5527bf14
RH
5745 return 2;
5746}
c65ebc55 5747
69e18c09
AS
5748/* Calculate the cost of moving data of MODE from a register to or from
5749 memory. */
5750
5751static int
ef4bddc2 5752ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
69e18c09
AS
5753 reg_class_t rclass,
5754 bool in ATTRIBUTE_UNUSED)
5755{
5756 if (rclass == GENERAL_REGS
5757 || rclass == FR_REGS
5758 || rclass == FP_REGS
5759 || rclass == GR_AND_FR_REGS)
5760 return 4;
5761 else
5762 return 10;
5763}
5764
ab177ad5
AS
5765/* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5766 on RCLASS to use when copying X into that class. */
f61134e8 5767
ab177ad5
AS
5768static reg_class_t
5769ia64_preferred_reload_class (rtx x, reg_class_t rclass)
f61134e8 5770{
0a2aaacc 5771 switch (rclass)
f61134e8
RH
5772 {
5773 case FR_REGS:
a71aef0b 5774 case FP_REGS:
f61134e8
RH
5775 /* Don't allow volatile mem reloads into floating point registers.
5776 This is defined to force reload to choose the r/m case instead
5777 of the f/f case when reloading (set (reg fX) (mem/v)). */
5778 if (MEM_P (x) && MEM_VOLATILE_P (x))
5779 return NO_REGS;
5780
5781 /* Force all unrecognized constants into the constant pool. */
5782 if (CONSTANT_P (x))
5783 return NO_REGS;
5784 break;
5785
5786 case AR_M_REGS:
5787 case AR_I_REGS:
5788 if (!OBJECT_P (x))
5789 return NO_REGS;
5790 break;
5791
5792 default:
5793 break;
5794 }
5795
0a2aaacc 5796 return rclass;
f61134e8
RH
5797}
5798
c65ebc55 5799/* This function returns the register class required for a secondary
0a2aaacc 5800 register when copying between one of the registers in RCLASS, and X,
c65ebc55
JW
5801 using MODE. A return value of NO_REGS means that no secondary register
5802 is required. */
5803
5804enum reg_class
0a2aaacc 5805ia64_secondary_reload_class (enum reg_class rclass,
ef4bddc2 5806 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
c65ebc55
JW
5807{
5808 int regno = -1;
5809
5810 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5811 regno = true_regnum (x);
5812
0a2aaacc 5813 switch (rclass)
97e242b0
RH
5814 {
5815 case BR_REGS:
7109d286
RH
5816 case AR_M_REGS:
5817 case AR_I_REGS:
5818 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5819 interaction. We end up with two pseudos with overlapping lifetimes
5820 both of which are equiv to the same constant, and both which need
5821 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5822 changes depending on the path length, which means the qty_first_reg
5823 check in make_regs_eqv can give different answers at different times.
5824 At some point I'll probably need a reload_indi pattern to handle
5825 this.
5826
5827 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5828 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5829 non-general registers for good measure. */
5830 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
5831 return GR_REGS;
5832
5833 /* This is needed if a pseudo used as a call_operand gets spilled to a
5834 stack slot. */
5835 if (GET_CODE (x) == MEM)
5836 return GR_REGS;
5837 break;
5838
5839 case FR_REGS:
a71aef0b 5840 case FP_REGS:
c51e6d85 5841 /* Need to go through general registers to get to other class regs. */
7109d286
RH
5842 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5843 return GR_REGS;
9c808aad 5844
97e242b0
RH
5845 /* This can happen when a paradoxical subreg is an operand to the
5846 muldi3 pattern. */
5847 /* ??? This shouldn't be necessary after instruction scheduling is
5848 enabled, because paradoxical subregs are not accepted by
5849 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5850 stop the paradoxical subreg stupidity in the *_operand functions
5851 in recog.c. */
5852 if (GET_CODE (x) == MEM
5853 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5854 || GET_MODE (x) == QImode))
5855 return GR_REGS;
5856
5857 /* This can happen because of the ior/and/etc patterns that accept FP
5858 registers as operands. If the third operand is a constant, then it
5859 needs to be reloaded into a FP register. */
5860 if (GET_CODE (x) == CONST_INT)
5861 return GR_REGS;
5862
5863 /* This can happen because of register elimination in a muldi3 insn.
5864 E.g. `26107 * (unsigned long)&u'. */
5865 if (GET_CODE (x) == PLUS)
5866 return GR_REGS;
5867 break;
5868
5869 case PR_REGS:
f2f90c63 5870 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
5871 and the function has a nonlocal goto. This is because global
5872 does not allocate call crossing pseudos to hard registers when
e3b5732b 5873 crtl->has_nonlocal_goto is true. This is relatively
97e242b0
RH
5874 common for C++ programs that use exceptions. To reproduce,
5875 return NO_REGS and compile libstdc++. */
5876 if (GET_CODE (x) == MEM)
5877 return GR_REGS;
f2f90c63
RH
5878
5879 /* This can happen when we take a BImode subreg of a DImode value,
5880 and that DImode value winds up in some non-GR register. */
5881 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5882 return GR_REGS;
97e242b0
RH
5883 break;
5884
5885 default:
5886 break;
5887 }
c65ebc55
JW
5888
5889 return NO_REGS;
5890}
5891
215b063c
PB
5892\f
5893/* Implement targetm.unspec_may_trap_p hook. */
5894static int
5895ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5896{
c84a808e
EB
5897 switch (XINT (x, 1))
5898 {
5899 case UNSPEC_LDA:
5900 case UNSPEC_LDS:
5901 case UNSPEC_LDSA:
5902 case UNSPEC_LDCCLR:
5903 case UNSPEC_CHKACLR:
5904 case UNSPEC_CHKS:
5905 /* These unspecs are just wrappers. */
5906 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
215b063c
PB
5907 }
5908
5909 return default_unspec_may_trap_p (x, flags);
5910}
5911
c65ebc55
JW
5912\f
5913/* Parse the -mfixed-range= option string. */
5914
5915static void
9c808aad 5916fix_range (const char *const_str)
c65ebc55
JW
5917{
5918 int i, first, last;
3b572406 5919 char *str, *dash, *comma;
c65ebc55
JW
5920
5921 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5922 REG2 are either register names or register numbers. The effect
5923 of this option is to mark the registers in the range from REG1 to
5924 REG2 as ``fixed'' so they won't be used by the compiler. This is
5925 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5926
3b572406
RH
5927 i = strlen (const_str);
5928 str = (char *) alloca (i + 1);
5929 memcpy (str, const_str, i + 1);
5930
c65ebc55
JW
5931 while (1)
5932 {
5933 dash = strchr (str, '-');
5934 if (!dash)
5935 {
d4ee4d25 5936 warning (0, "value of -mfixed-range must have form REG1-REG2");
c65ebc55
JW
5937 return;
5938 }
5939 *dash = '\0';
5940
5941 comma = strchr (dash + 1, ',');
5942 if (comma)
5943 *comma = '\0';
5944
5945 first = decode_reg_name (str);
5946 if (first < 0)
5947 {
d4ee4d25 5948 warning (0, "unknown register name: %s", str);
c65ebc55
JW
5949 return;
5950 }
5951
5952 last = decode_reg_name (dash + 1);
5953 if (last < 0)
5954 {
d4ee4d25 5955 warning (0, "unknown register name: %s", dash + 1);
c65ebc55
JW
5956 return;
5957 }
5958
5959 *dash = '-';
5960
5961 if (first > last)
5962 {
d4ee4d25 5963 warning (0, "%s-%s is an empty range", str, dash + 1);
c65ebc55
JW
5964 return;
5965 }
5966
5967 for (i = first; i <= last; ++i)
5968 fixed_regs[i] = call_used_regs[i] = 1;
5969
5970 if (!comma)
5971 break;
5972
5973 *comma = ',';
5974 str = comma + 1;
5975 }
5976}
5977
930572b9 5978/* Implement TARGET_OPTION_OVERRIDE. */
c65ebc55 5979
930572b9
AS
5980static void
5981ia64_option_override (void)
c65ebc55 5982{
e6cc0c98
JM
5983 unsigned int i;
5984 cl_deferred_option *opt;
9771b263
DN
5985 vec<cl_deferred_option> *v
5986 = (vec<cl_deferred_option> *) ia64_deferred_options;
e6cc0c98 5987
9771b263
DN
5988 if (v)
5989 FOR_EACH_VEC_ELT (*v, i, opt)
5990 {
5991 switch (opt->opt_index)
5992 {
5993 case OPT_mfixed_range_:
5994 fix_range (opt->arg);
5995 break;
e6cc0c98 5996
9771b263
DN
5997 default:
5998 gcc_unreachable ();
5999 }
6000 }
e6cc0c98 6001
59da9a7d
JW
6002 if (TARGET_AUTO_PIC)
6003 target_flags |= MASK_CONST_GP;
6004
7e1e7d4c
VM
6005 /* Numerous experiment shows that IRA based loop pressure
6006 calculation works better for RTL loop invariant motion on targets
6007 with enough (>= 32) registers. It is an expensive optimization.
6008 So it is on only for peak performance. */
6009 if (optimize >= 3)
6010 flag_ira_loop_pressure = 1;
6011
6012
fa37ed29
JM
6013 ia64_section_threshold = (global_options_set.x_g_switch_value
6014 ? g_switch_value
6015 : IA64_DEFAULT_GVALUE);
2b7e2984
SE
6016
6017 init_machine_status = ia64_init_machine_status;
6018
6019 if (align_functions <= 0)
6020 align_functions = 64;
6021 if (align_loops <= 0)
6022 align_loops = 32;
6023 if (TARGET_ABI_OPEN_VMS)
6024 flag_no_common = 1;
6025
6026 ia64_override_options_after_change();
6027}
6028
6029/* Implement targetm.override_options_after_change. */
6030
6031static void
6032ia64_override_options_after_change (void)
6033{
388092d5 6034 if (optimize >= 3
d4d24ba4
JM
6035 && !global_options_set.x_flag_selective_scheduling
6036 && !global_options_set.x_flag_selective_scheduling2)
388092d5
AB
6037 {
6038 flag_selective_scheduling2 = 1;
6039 flag_sel_sched_pipelining = 1;
6040 }
6041 if (mflag_sched_control_spec == 2)
6042 {
6043 /* Control speculation is on by default for the selective scheduler,
6044 but not for the Haifa scheduler. */
6045 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6046 }
6047 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6048 {
6049 /* FIXME: remove this when we'd implement breaking autoinsns as
6050 a transformation. */
6051 flag_auto_inc_dec = 0;
6052 }
c65ebc55 6053}
dbdd120f 6054
6fb5fa3c
DB
6055/* Initialize the record of emitted frame related registers. */
6056
6057void ia64_init_expanders (void)
6058{
6059 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6060}
6061
dbdd120f
RH
6062static struct machine_function *
6063ia64_init_machine_status (void)
6064{
766090c2 6065 return ggc_cleared_alloc<machine_function> ();
dbdd120f 6066}
c65ebc55 6067\f
647d790d
DM
6068static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6069static enum attr_type ia64_safe_type (rtx_insn *);
2130b7fb 6070
2130b7fb 6071static enum attr_itanium_class
647d790d 6072ia64_safe_itanium_class (rtx_insn *insn)
2130b7fb
BS
6073{
6074 if (recog_memoized (insn) >= 0)
6075 return get_attr_itanium_class (insn);
b5b8b0ac
AO
6076 else if (DEBUG_INSN_P (insn))
6077 return ITANIUM_CLASS_IGNORE;
2130b7fb
BS
6078 else
6079 return ITANIUM_CLASS_UNKNOWN;
6080}
6081
6082static enum attr_type
647d790d 6083ia64_safe_type (rtx_insn *insn)
2130b7fb
BS
6084{
6085 if (recog_memoized (insn) >= 0)
6086 return get_attr_type (insn);
6087 else
6088 return TYPE_UNKNOWN;
6089}
6090\f
c65ebc55
JW
6091/* The following collection of routines emit instruction group stop bits as
6092 necessary to avoid dependencies. */
6093
6094/* Need to track some additional registers as far as serialization is
6095 concerned so we can properly handle br.call and br.ret. We could
6096 make these registers visible to gcc, but since these registers are
6097 never explicitly used in gcc generated code, it seems wasteful to
6098 do so (plus it would make the call and return patterns needlessly
6099 complex). */
c65ebc55 6100#define REG_RP (BR_REG (0))
c65ebc55 6101#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
6102/* This is used for volatile asms which may require a stop bit immediately
6103 before and after them. */
5527bf14 6104#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
6105#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6106#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 6107
f2f90c63
RH
6108/* For each register, we keep track of how it has been written in the
6109 current instruction group.
6110
6111 If a register is written unconditionally (no qualifying predicate),
6112 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6113
6114 If a register is written if its qualifying predicate P is true, we
6115 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6116 may be written again by the complement of P (P^1) and when this happens,
6117 WRITE_COUNT gets set to 2.
6118
6119 The result of this is that whenever an insn attempts to write a register
e03f5d43 6120 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
6121
6122 If a predicate register is written by a floating-point insn, we set
6123 WRITTEN_BY_FP to true.
6124
6125 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6126 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6127
444a356a
JJ
6128#if GCC_VERSION >= 4000
6129#define RWS_FIELD_TYPE __extension__ unsigned short
6130#else
6131#define RWS_FIELD_TYPE unsigned int
6132#endif
c65ebc55
JW
6133struct reg_write_state
6134{
444a356a
JJ
6135 RWS_FIELD_TYPE write_count : 2;
6136 RWS_FIELD_TYPE first_pred : 10;
6137 RWS_FIELD_TYPE written_by_fp : 1;
6138 RWS_FIELD_TYPE written_by_and : 1;
6139 RWS_FIELD_TYPE written_by_or : 1;
c65ebc55
JW
6140};
6141
6142/* Cumulative info for the current instruction group. */
6143struct reg_write_state rws_sum[NUM_REGS];
444a356a
JJ
6144#ifdef ENABLE_CHECKING
6145/* Bitmap whether a register has been written in the current insn. */
6146HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6147 / HOST_BITS_PER_WIDEST_FAST_INT];
6148
6149static inline void
6150rws_insn_set (int regno)
6151{
6152 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6153 SET_HARD_REG_BIT (rws_insn, regno);
6154}
6155
6156static inline int
6157rws_insn_test (int regno)
6158{
6159 return TEST_HARD_REG_BIT (rws_insn, regno);
6160}
6161#else
6162/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6163unsigned char rws_insn[2];
6164
6165static inline void
6166rws_insn_set (int regno)
6167{
6168 if (regno == REG_AR_CFM)
6169 rws_insn[0] = 1;
6170 else if (regno == REG_VOLATILE)
6171 rws_insn[1] = 1;
6172}
6173
6174static inline int
6175rws_insn_test (int regno)
6176{
6177 if (regno == REG_AR_CFM)
6178 return rws_insn[0];
6179 if (regno == REG_VOLATILE)
6180 return rws_insn[1];
6181 return 0;
6182}
6183#endif
c65ebc55 6184
25250265 6185/* Indicates whether this is the first instruction after a stop bit,
e820471b
NS
6186 in which case we don't need another stop bit. Without this,
6187 ia64_variable_issue will die when scheduling an alloc. */
25250265
JW
6188static int first_instruction;
6189
c65ebc55
JW
6190/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6191 RTL for one instruction. */
6192struct reg_flags
6193{
6194 unsigned int is_write : 1; /* Is register being written? */
6195 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6196 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
6197 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6198 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 6199 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
6200};
6201
444a356a 6202static void rws_update (int, struct reg_flags, int);
9c808aad
AJ
6203static int rws_access_regno (int, struct reg_flags, int);
6204static int rws_access_reg (rtx, struct reg_flags, int);
c1bc6ca8
JW
6205static void update_set_flags (rtx, struct reg_flags *);
6206static int set_src_needs_barrier (rtx, struct reg_flags, int);
9c808aad
AJ
6207static int rtx_needs_barrier (rtx, struct reg_flags, int);
6208static void init_insn_group_barriers (void);
647d790d
DM
6209static int group_barrier_needed (rtx_insn *);
6210static int safe_group_barrier_needed (rtx_insn *);
444a356a 6211static int in_safe_group_barrier;
3b572406 6212
c65ebc55
JW
6213/* Update *RWS for REGNO, which is being written by the current instruction,
6214 with predicate PRED, and associated register flags in FLAGS. */
6215
6216static void
444a356a 6217rws_update (int regno, struct reg_flags flags, int pred)
c65ebc55 6218{
3e7c7805 6219 if (pred)
444a356a 6220 rws_sum[regno].write_count++;
3e7c7805 6221 else
444a356a
JJ
6222 rws_sum[regno].write_count = 2;
6223 rws_sum[regno].written_by_fp |= flags.is_fp;
f2f90c63 6224 /* ??? Not tracking and/or across differing predicates. */
444a356a
JJ
6225 rws_sum[regno].written_by_and = flags.is_and;
6226 rws_sum[regno].written_by_or = flags.is_or;
6227 rws_sum[regno].first_pred = pred;
c65ebc55
JW
6228}
6229
6230/* Handle an access to register REGNO of type FLAGS using predicate register
444a356a 6231 PRED. Update rws_sum array. Return 1 if this access creates
c65ebc55
JW
6232 a dependency with an earlier instruction in the same group. */
6233
6234static int
9c808aad 6235rws_access_regno (int regno, struct reg_flags flags, int pred)
c65ebc55
JW
6236{
6237 int need_barrier = 0;
c65ebc55 6238
e820471b 6239 gcc_assert (regno < NUM_REGS);
c65ebc55 6240
f2f90c63
RH
6241 if (! PR_REGNO_P (regno))
6242 flags.is_and = flags.is_or = 0;
6243
c65ebc55
JW
6244 if (flags.is_write)
6245 {
12c2c7aa
JW
6246 int write_count;
6247
444a356a 6248 rws_insn_set (regno);
12c2c7aa 6249 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
6250
6251 switch (write_count)
c65ebc55
JW
6252 {
6253 case 0:
6254 /* The register has not been written yet. */
444a356a
JJ
6255 if (!in_safe_group_barrier)
6256 rws_update (regno, flags, pred);
c65ebc55
JW
6257 break;
6258
6259 case 1:
89774469
SE
6260 /* The register has been written via a predicate. Treat
6261 it like a unconditional write and do not try to check
6262 for complementary pred reg in earlier write. */
f2f90c63 6263 if (flags.is_and && rws_sum[regno].written_by_and)
9c808aad 6264 ;
f2f90c63
RH
6265 else if (flags.is_or && rws_sum[regno].written_by_or)
6266 ;
89774469 6267 else
c65ebc55 6268 need_barrier = 1;
444a356a
JJ
6269 if (!in_safe_group_barrier)
6270 rws_update (regno, flags, pred);
c65ebc55
JW
6271 break;
6272
6273 case 2:
6274 /* The register has been unconditionally written already. We
6275 need a barrier. */
f2f90c63
RH
6276 if (flags.is_and && rws_sum[regno].written_by_and)
6277 ;
6278 else if (flags.is_or && rws_sum[regno].written_by_or)
6279 ;
6280 else
6281 need_barrier = 1;
444a356a
JJ
6282 if (!in_safe_group_barrier)
6283 {
6284 rws_sum[regno].written_by_and = flags.is_and;
6285 rws_sum[regno].written_by_or = flags.is_or;
6286 }
c65ebc55
JW
6287 break;
6288
6289 default:
e820471b 6290 gcc_unreachable ();
c65ebc55
JW
6291 }
6292 }
6293 else
6294 {
6295 if (flags.is_branch)
6296 {
6297 /* Branches have several RAW exceptions that allow to avoid
6298 barriers. */
6299
5527bf14 6300 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
6301 /* RAW dependencies on branch regs are permissible as long
6302 as the writer is a non-branch instruction. Since we
6303 never generate code that uses a branch register written
6304 by a branch instruction, handling this case is
6305 easy. */
5527bf14 6306 return 0;
c65ebc55
JW
6307
6308 if (REGNO_REG_CLASS (regno) == PR_REGS
6309 && ! rws_sum[regno].written_by_fp)
6310 /* The predicates of a branch are available within the
6311 same insn group as long as the predicate was written by
ed168e45 6312 something other than a floating-point instruction. */
c65ebc55
JW
6313 return 0;
6314 }
6315
f2f90c63
RH
6316 if (flags.is_and && rws_sum[regno].written_by_and)
6317 return 0;
6318 if (flags.is_or && rws_sum[regno].written_by_or)
6319 return 0;
6320
c65ebc55
JW
6321 switch (rws_sum[regno].write_count)
6322 {
6323 case 0:
6324 /* The register has not been written yet. */
6325 break;
6326
6327 case 1:
89774469
SE
6328 /* The register has been written via a predicate, assume we
6329 need a barrier (don't check for complementary regs). */
6330 need_barrier = 1;
c65ebc55
JW
6331 break;
6332
6333 case 2:
6334 /* The register has been unconditionally written already. We
6335 need a barrier. */
6336 need_barrier = 1;
6337 break;
6338
6339 default:
e820471b 6340 gcc_unreachable ();
c65ebc55
JW
6341 }
6342 }
6343
6344 return need_barrier;
6345}
6346
97e242b0 6347static int
9c808aad 6348rws_access_reg (rtx reg, struct reg_flags flags, int pred)
97e242b0
RH
6349{
6350 int regno = REGNO (reg);
6351 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6352
6353 if (n == 1)
6354 return rws_access_regno (regno, flags, pred);
6355 else
6356 {
6357 int need_barrier = 0;
6358 while (--n >= 0)
6359 need_barrier |= rws_access_regno (regno + n, flags, pred);
6360 return need_barrier;
6361 }
6362}
6363
112333d3
BS
6364/* Examine X, which is a SET rtx, and update the flags, the predicate, and
6365 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6366
6367static void
c1bc6ca8 6368update_set_flags (rtx x, struct reg_flags *pflags)
112333d3
BS
6369{
6370 rtx src = SET_SRC (x);
6371
112333d3
BS
6372 switch (GET_CODE (src))
6373 {
6374 case CALL:
6375 return;
6376
6377 case IF_THEN_ELSE:
048d0d36 6378 /* There are four cases here:
c8d3810f
RH
6379 (1) The destination is (pc), in which case this is a branch,
6380 nothing here applies.
6381 (2) The destination is ar.lc, in which case this is a
6382 doloop_end_internal,
6383 (3) The destination is an fp register, in which case this is
6384 an fselect instruction.
048d0d36
MK
6385 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6386 this is a check load.
c8d3810f
RH
6387 In all cases, nothing we do in this function applies. */
6388 return;
112333d3
BS
6389
6390 default:
ec8e098d 6391 if (COMPARISON_P (src)
c8d3810f 6392 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
112333d3
BS
6393 /* Set pflags->is_fp to 1 so that we know we're dealing
6394 with a floating point comparison when processing the
6395 destination of the SET. */
6396 pflags->is_fp = 1;
6397
6398 /* Discover if this is a parallel comparison. We only handle
6399 and.orcm and or.andcm at present, since we must retain a
6400 strict inverse on the predicate pair. */
6401 else if (GET_CODE (src) == AND)
6402 pflags->is_and = 1;
6403 else if (GET_CODE (src) == IOR)
6404 pflags->is_or = 1;
6405
6406 break;
6407 }
6408}
6409
6410/* Subroutine of rtx_needs_barrier; this function determines whether the
6411 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6412 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6413 for this insn. */
9c808aad 6414
112333d3 6415static int
c1bc6ca8 6416set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
112333d3
BS
6417{
6418 int need_barrier = 0;
6419 rtx dst;
6420 rtx src = SET_SRC (x);
6421
6422 if (GET_CODE (src) == CALL)
6423 /* We don't need to worry about the result registers that
6424 get written by subroutine call. */
6425 return rtx_needs_barrier (src, flags, pred);
6426 else if (SET_DEST (x) == pc_rtx)
6427 {
6428 /* X is a conditional branch. */
6429 /* ??? This seems redundant, as the caller sets this bit for
6430 all JUMP_INSNs. */
048d0d36
MK
6431 if (!ia64_spec_check_src_p (src))
6432 flags.is_branch = 1;
112333d3
BS
6433 return rtx_needs_barrier (src, flags, pred);
6434 }
6435
048d0d36
MK
6436 if (ia64_spec_check_src_p (src))
6437 /* Avoid checking one register twice (in condition
6438 and in 'then' section) for ldc pattern. */
6439 {
6440 gcc_assert (REG_P (XEXP (src, 2)));
6441 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6442
6443 /* We process MEM below. */
6444 src = XEXP (src, 1);
6445 }
6446
6447 need_barrier |= rtx_needs_barrier (src, flags, pred);
112333d3 6448
112333d3
BS
6449 dst = SET_DEST (x);
6450 if (GET_CODE (dst) == ZERO_EXTRACT)
6451 {
6452 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6453 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
112333d3
BS
6454 }
6455 return need_barrier;
6456}
6457
b38ba463
ZW
6458/* Handle an access to rtx X of type FLAGS using predicate register
6459 PRED. Return 1 if this access creates a dependency with an earlier
6460 instruction in the same group. */
c65ebc55
JW
6461
6462static int
9c808aad 6463rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
c65ebc55
JW
6464{
6465 int i, j;
6466 int is_complemented = 0;
6467 int need_barrier = 0;
6468 const char *format_ptr;
6469 struct reg_flags new_flags;
c1bc6ca8 6470 rtx cond;
c65ebc55
JW
6471
6472 if (! x)
6473 return 0;
6474
6475 new_flags = flags;
6476
6477 switch (GET_CODE (x))
6478 {
9c808aad 6479 case SET:
c1bc6ca8
JW
6480 update_set_flags (x, &new_flags);
6481 need_barrier = set_src_needs_barrier (x, new_flags, pred);
112333d3 6482 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 6483 {
112333d3
BS
6484 new_flags.is_write = 1;
6485 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 6486 }
c65ebc55
JW
6487 break;
6488
6489 case CALL:
6490 new_flags.is_write = 0;
97e242b0 6491 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
6492
6493 /* Avoid multiple register writes, in case this is a pattern with
e820471b 6494 multiple CALL rtx. This avoids a failure in rws_access_reg. */
444a356a 6495 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
c65ebc55
JW
6496 {
6497 new_flags.is_write = 1;
97e242b0
RH
6498 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6499 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6500 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6501 }
6502 break;
6503
e5bde68a
RH
6504 case COND_EXEC:
6505 /* X is a predicated instruction. */
6506
6507 cond = COND_EXEC_TEST (x);
e820471b 6508 gcc_assert (!pred);
e5bde68a
RH
6509 need_barrier = rtx_needs_barrier (cond, flags, 0);
6510
6511 if (GET_CODE (cond) == EQ)
6512 is_complemented = 1;
6513 cond = XEXP (cond, 0);
e820471b 6514 gcc_assert (GET_CODE (cond) == REG
c1bc6ca8 6515 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
e5bde68a
RH
6516 pred = REGNO (cond);
6517 if (is_complemented)
6518 ++pred;
6519
6520 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6521 return need_barrier;
6522
c65ebc55 6523 case CLOBBER:
c65ebc55 6524 case USE:
c65ebc55
JW
6525 /* Clobber & use are for earlier compiler-phases only. */
6526 break;
6527
6528 case ASM_OPERANDS:
6529 case ASM_INPUT:
6530 /* We always emit stop bits for traditional asms. We emit stop bits
6531 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6532 if (GET_CODE (x) != ASM_OPERANDS
6533 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6534 {
6535 /* Avoid writing the register multiple times if we have multiple
e820471b 6536 asm outputs. This avoids a failure in rws_access_reg. */
444a356a 6537 if (! rws_insn_test (REG_VOLATILE))
c65ebc55
JW
6538 {
6539 new_flags.is_write = 1;
97e242b0 6540 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
6541 }
6542 return 1;
6543 }
6544
6545 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
1e5f1716 6546 We cannot just fall through here since then we would be confused
c65ebc55
JW
6547 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6548 traditional asms unlike their normal usage. */
6549
6550 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6551 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6552 need_barrier = 1;
6553 break;
6554
6555 case PARALLEL:
6556 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
6557 {
6558 rtx pat = XVECEXP (x, 0, i);
051d8245 6559 switch (GET_CODE (pat))
112333d3 6560 {
051d8245 6561 case SET:
c1bc6ca8
JW
6562 update_set_flags (pat, &new_flags);
6563 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
051d8245
RH
6564 break;
6565
6566 case USE:
6567 case CALL:
6568 case ASM_OPERANDS:
6569 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6570 break;
6571
6572 case CLOBBER:
628162ea
JJ
6573 if (REG_P (XEXP (pat, 0))
6574 && extract_asm_operands (x) != NULL_RTX
6575 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6576 {
6577 new_flags.is_write = 1;
6578 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6579 new_flags, pred);
6580 new_flags = flags;
6581 }
6582 break;
6583
051d8245
RH
6584 case RETURN:
6585 break;
6586
6587 default:
6588 gcc_unreachable ();
112333d3 6589 }
112333d3
BS
6590 }
6591 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6592 {
6593 rtx pat = XVECEXP (x, 0, i);
6594 if (GET_CODE (pat) == SET)
6595 {
6596 if (GET_CODE (SET_SRC (pat)) != CALL)
6597 {
6598 new_flags.is_write = 1;
6599 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6600 pred);
6601 }
6602 }
339cb12e 6603 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
6604 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6605 }
c65ebc55
JW
6606 break;
6607
6608 case SUBREG:
077bc924
JM
6609 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6610 break;
c65ebc55 6611 case REG:
870f9ec0
RH
6612 if (REGNO (x) == AR_UNAT_REGNUM)
6613 {
6614 for (i = 0; i < 64; ++i)
6615 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6616 }
6617 else
6618 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
6619 break;
6620
6621 case MEM:
6622 /* Find the regs used in memory address computation. */
6623 new_flags.is_write = 0;
6624 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6625 break;
6626
051d8245 6627 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
c65ebc55
JW
6628 case SYMBOL_REF: case LABEL_REF: case CONST:
6629 break;
6630
6631 /* Operators with side-effects. */
6632 case POST_INC: case POST_DEC:
e820471b 6633 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
c65ebc55
JW
6634
6635 new_flags.is_write = 0;
97e242b0 6636 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 6637 new_flags.is_write = 1;
97e242b0 6638 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6639 break;
6640
6641 case POST_MODIFY:
e820471b 6642 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
4b983fdc
RH
6643
6644 new_flags.is_write = 0;
97e242b0 6645 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6646 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6647 new_flags.is_write = 1;
97e242b0 6648 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
6649 break;
6650
6651 /* Handle common unary and binary ops for efficiency. */
6652 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6653 case MOD: case UDIV: case UMOD: case AND: case IOR:
6654 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6655 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6656 case NE: case EQ: case GE: case GT: case LE:
6657 case LT: case GEU: case GTU: case LEU: case LTU:
6658 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6659 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6660 break;
6661
6662 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6663 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6664 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 6665 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
6666 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6667 break;
6668
051d8245
RH
6669 case VEC_SELECT:
6670 /* VEC_SELECT's second argument is a PARALLEL with integers that
6671 describe the elements selected. On ia64, those integers are
6672 always constants. Avoid walking the PARALLEL so that we don't
e820471b 6673 get confused with "normal" parallels and then die. */
051d8245
RH
6674 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6675 break;
6676
c65ebc55
JW
6677 case UNSPEC:
6678 switch (XINT (x, 1))
6679 {
7b6e506e
RH
6680 case UNSPEC_LTOFF_DTPMOD:
6681 case UNSPEC_LTOFF_DTPREL:
6682 case UNSPEC_DTPREL:
6683 case UNSPEC_LTOFF_TPREL:
6684 case UNSPEC_TPREL:
6685 case UNSPEC_PRED_REL_MUTEX:
6686 case UNSPEC_PIC_CALL:
6687 case UNSPEC_MF:
6688 case UNSPEC_FETCHADD_ACQ:
28875d67 6689 case UNSPEC_FETCHADD_REL:
7b6e506e
RH
6690 case UNSPEC_BSP_VALUE:
6691 case UNSPEC_FLUSHRS:
6692 case UNSPEC_BUNDLE_SELECTOR:
6693 break;
6694
086c0f96
RH
6695 case UNSPEC_GR_SPILL:
6696 case UNSPEC_GR_RESTORE:
870f9ec0
RH
6697 {
6698 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6699 HOST_WIDE_INT bit = (offset >> 3) & 63;
6700
6701 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
83338d15 6702 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
870f9ec0
RH
6703 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6704 new_flags, pred);
6705 break;
6706 }
9c808aad 6707
086c0f96
RH
6708 case UNSPEC_FR_SPILL:
6709 case UNSPEC_FR_RESTORE:
c407570a 6710 case UNSPEC_GETF_EXP:
b38ba463 6711 case UNSPEC_SETF_EXP:
086c0f96 6712 case UNSPEC_ADDP4:
b38ba463 6713 case UNSPEC_FR_SQRT_RECIP_APPROX:
07acc7b3 6714 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
048d0d36
MK
6715 case UNSPEC_LDA:
6716 case UNSPEC_LDS:
388092d5 6717 case UNSPEC_LDS_A:
048d0d36
MK
6718 case UNSPEC_LDSA:
6719 case UNSPEC_CHKACLR:
6720 case UNSPEC_CHKS:
6dd12198
SE
6721 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6722 break;
6723
086c0f96 6724 case UNSPEC_FR_RECIP_APPROX:
f526a3c8 6725 case UNSPEC_SHRP:
046625fa 6726 case UNSPEC_COPYSIGN:
1def9c3f 6727 case UNSPEC_FR_RECIP_APPROX_RES:
655f2eb9
RH
6728 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6729 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6730 break;
6731
086c0f96 6732 case UNSPEC_CMPXCHG_ACQ:
28875d67 6733 case UNSPEC_CMPXCHG_REL:
0551c32d
RH
6734 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6735 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6736 break;
6737
c65ebc55 6738 default:
e820471b 6739 gcc_unreachable ();
c65ebc55
JW
6740 }
6741 break;
6742
6743 case UNSPEC_VOLATILE:
6744 switch (XINT (x, 1))
6745 {
086c0f96 6746 case UNSPECV_ALLOC:
25250265
JW
6747 /* Alloc must always be the first instruction of a group.
6748 We force this by always returning true. */
6749 /* ??? We might get better scheduling if we explicitly check for
6750 input/local/output register dependencies, and modify the
6751 scheduler so that alloc is always reordered to the start of
6752 the current group. We could then eliminate all of the
6753 first_instruction code. */
6754 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6755
6756 new_flags.is_write = 1;
25250265
JW
6757 rws_access_regno (REG_AR_CFM, new_flags, pred);
6758 return 1;
c65ebc55 6759
086c0f96 6760 case UNSPECV_SET_BSP:
7b84aac0 6761 case UNSPECV_PROBE_STACK_RANGE:
3b572406
RH
6762 need_barrier = 1;
6763 break;
6764
086c0f96
RH
6765 case UNSPECV_BLOCKAGE:
6766 case UNSPECV_INSN_GROUP_BARRIER:
6767 case UNSPECV_BREAK:
6768 case UNSPECV_PSAC_ALL:
6769 case UNSPECV_PSAC_NORMAL:
3b572406 6770 return 0;
0c96007e 6771
7b84aac0
EB
6772 case UNSPECV_PROBE_STACK_ADDRESS:
6773 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6774 break;
6775
c65ebc55 6776 default:
e820471b 6777 gcc_unreachable ();
c65ebc55
JW
6778 }
6779 break;
6780
6781 case RETURN:
6782 new_flags.is_write = 0;
97e242b0
RH
6783 need_barrier = rws_access_regno (REG_RP, flags, pred);
6784 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6785
6786 new_flags.is_write = 1;
97e242b0
RH
6787 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6788 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6789 break;
6790
6791 default:
6792 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6793 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6794 switch (format_ptr[i])
6795 {
6796 case '0': /* unused field */
6797 case 'i': /* integer */
6798 case 'n': /* note */
6799 case 'w': /* wide integer */
6800 case 's': /* pointer to string */
6801 case 'S': /* optional pointer to string */
6802 break;
6803
6804 case 'e':
6805 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6806 need_barrier = 1;
6807 break;
6808
6809 case 'E':
6810 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6811 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6812 need_barrier = 1;
6813 break;
6814
6815 default:
e820471b 6816 gcc_unreachable ();
c65ebc55 6817 }
2ed4af6f 6818 break;
c65ebc55
JW
6819 }
6820 return need_barrier;
6821}
6822
c1bc6ca8 6823/* Clear out the state for group_barrier_needed at the start of a
2130b7fb
BS
6824 sequence of insns. */
6825
6826static void
9c808aad 6827init_insn_group_barriers (void)
2130b7fb
BS
6828{
6829 memset (rws_sum, 0, sizeof (rws_sum));
25250265 6830 first_instruction = 1;
2130b7fb
BS
6831}
6832
c1bc6ca8
JW
6833/* Given the current state, determine whether a group barrier (a stop bit) is
6834 necessary before INSN. Return nonzero if so. This modifies the state to
6835 include the effects of INSN as a side-effect. */
2130b7fb
BS
6836
6837static int
647d790d 6838group_barrier_needed (rtx_insn *insn)
2130b7fb
BS
6839{
6840 rtx pat;
6841 int need_barrier = 0;
6842 struct reg_flags flags;
6843
6844 memset (&flags, 0, sizeof (flags));
6845 switch (GET_CODE (insn))
6846 {
6847 case NOTE:
b5b8b0ac 6848 case DEBUG_INSN:
2130b7fb
BS
6849 break;
6850
6851 case BARRIER:
6852 /* A barrier doesn't imply an instruction group boundary. */
6853 break;
6854
6855 case CODE_LABEL:
6856 memset (rws_insn, 0, sizeof (rws_insn));
6857 return 1;
6858
6859 case CALL_INSN:
6860 flags.is_branch = 1;
6861 flags.is_sibcall = SIBLING_CALL_P (insn);
6862 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
6863
6864 /* Don't bundle a call following another call. */
b64925dc 6865 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
f12f25a7
RH
6866 {
6867 need_barrier = 1;
6868 break;
6869 }
6870
2130b7fb
BS
6871 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6872 break;
6873
6874 case JUMP_INSN:
048d0d36
MK
6875 if (!ia64_spec_check_p (insn))
6876 flags.is_branch = 1;
f12f25a7
RH
6877
6878 /* Don't bundle a jump following a call. */
b64925dc 6879 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
f12f25a7
RH
6880 {
6881 need_barrier = 1;
6882 break;
6883 }
5efb1046 6884 /* FALLTHRU */
2130b7fb
BS
6885
6886 case INSN:
6887 if (GET_CODE (PATTERN (insn)) == USE
6888 || GET_CODE (PATTERN (insn)) == CLOBBER)
6889 /* Don't care about USE and CLOBBER "insns"---those are used to
6890 indicate to the optimizer that it shouldn't get rid of
6891 certain operations. */
6892 break;
6893
6894 pat = PATTERN (insn);
6895
6896 /* Ug. Hack hacks hacked elsewhere. */
6897 switch (recog_memoized (insn))
6898 {
6899 /* We play dependency tricks with the epilogue in order
6900 to get proper schedules. Undo this for dv analysis. */
6901 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 6902 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
6903 pat = XVECEXP (pat, 0, 0);
6904 break;
6905
6906 /* The pattern we use for br.cloop confuses the code above.
6907 The second element of the vector is representative. */
6908 case CODE_FOR_doloop_end_internal:
6909 pat = XVECEXP (pat, 0, 1);
6910 break;
6911
6912 /* Doesn't generate code. */
6913 case CODE_FOR_pred_rel_mutex:
d0e82870 6914 case CODE_FOR_prologue_use:
2130b7fb
BS
6915 return 0;
6916
6917 default:
6918 break;
6919 }
6920
6921 memset (rws_insn, 0, sizeof (rws_insn));
6922 need_barrier = rtx_needs_barrier (pat, flags, 0);
6923
6924 /* Check to see if the previous instruction was a volatile
6925 asm. */
6926 if (! need_barrier)
6927 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
388092d5 6928
2130b7fb
BS
6929 break;
6930
6931 default:
e820471b 6932 gcc_unreachable ();
2130b7fb 6933 }
25250265 6934
7b84aac0 6935 if (first_instruction && important_for_bundling_p (insn))
25250265
JW
6936 {
6937 need_barrier = 0;
6938 first_instruction = 0;
6939 }
6940
2130b7fb
BS
6941 return need_barrier;
6942}
6943
c1bc6ca8 6944/* Like group_barrier_needed, but do not clobber the current state. */
2130b7fb
BS
6945
6946static int
647d790d 6947safe_group_barrier_needed (rtx_insn *insn)
2130b7fb 6948{
25250265 6949 int saved_first_instruction;
2130b7fb 6950 int t;
25250265 6951
25250265 6952 saved_first_instruction = first_instruction;
444a356a 6953 in_safe_group_barrier = 1;
25250265 6954
c1bc6ca8 6955 t = group_barrier_needed (insn);
25250265 6956
25250265 6957 first_instruction = saved_first_instruction;
444a356a 6958 in_safe_group_barrier = 0;
25250265 6959
2130b7fb
BS
6960 return t;
6961}
6962
18dbd950
RS
6963/* Scan the current function and insert stop bits as necessary to
6964 eliminate dependencies. This function assumes that a final
6965 instruction scheduling pass has been run which has already
6966 inserted most of the necessary stop bits. This function only
6967 inserts new ones at basic block boundaries, since these are
6968 invisible to the scheduler. */
2130b7fb
BS
6969
6970static void
9c808aad 6971emit_insn_group_barriers (FILE *dump)
2130b7fb 6972{
dd3d2b35
DM
6973 rtx_insn *insn;
6974 rtx_insn *last_label = 0;
2130b7fb
BS
6975 int insns_since_last_label = 0;
6976
6977 init_insn_group_barriers ();
6978
18dbd950 6979 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb 6980 {
b64925dc 6981 if (LABEL_P (insn))
2130b7fb
BS
6982 {
6983 if (insns_since_last_label)
6984 last_label = insn;
6985 insns_since_last_label = 0;
6986 }
b64925dc 6987 else if (NOTE_P (insn)
a38e7aa5 6988 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
2130b7fb
BS
6989 {
6990 if (insns_since_last_label)
6991 last_label = insn;
6992 insns_since_last_label = 0;
6993 }
b64925dc 6994 else if (NONJUMP_INSN_P (insn)
2130b7fb 6995 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 6996 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
6997 {
6998 init_insn_group_barriers ();
6999 last_label = 0;
7000 }
b5b8b0ac 7001 else if (NONDEBUG_INSN_P (insn))
2130b7fb
BS
7002 {
7003 insns_since_last_label = 1;
7004
c1bc6ca8 7005 if (group_barrier_needed (insn))
2130b7fb
BS
7006 {
7007 if (last_label)
7008 {
7009 if (dump)
7010 fprintf (dump, "Emitting stop before label %d\n",
7011 INSN_UID (last_label));
7012 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7013 insn = last_label;
112333d3
BS
7014
7015 init_insn_group_barriers ();
7016 last_label = 0;
2130b7fb 7017 }
2130b7fb
BS
7018 }
7019 }
7020 }
7021}
f4d578da
BS
7022
7023/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7024 This function has to emit all necessary group barriers. */
7025
7026static void
9c808aad 7027emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
f4d578da 7028{
dd3d2b35 7029 rtx_insn *insn;
f4d578da
BS
7030
7031 init_insn_group_barriers ();
7032
18dbd950 7033 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 7034 {
b64925dc 7035 if (BARRIER_P (insn))
bd7b9a0f 7036 {
dd3d2b35 7037 rtx_insn *last = prev_active_insn (insn);
bd7b9a0f
RH
7038
7039 if (! last)
7040 continue;
34f0d87a 7041 if (JUMP_TABLE_DATA_P (last))
bd7b9a0f
RH
7042 last = prev_active_insn (last);
7043 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7044 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7045
7046 init_insn_group_barriers ();
7047 }
b5b8b0ac 7048 else if (NONDEBUG_INSN_P (insn))
f4d578da 7049 {
bd7b9a0f
RH
7050 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7051 init_insn_group_barriers ();
c1bc6ca8 7052 else if (group_barrier_needed (insn))
f4d578da
BS
7053 {
7054 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7055 init_insn_group_barriers ();
c1bc6ca8 7056 group_barrier_needed (insn);
f4d578da
BS
7057 }
7058 }
7059 }
7060}
30028c85 7061
2130b7fb 7062\f
2130b7fb 7063
30028c85 7064/* Instruction scheduling support. */
2130b7fb
BS
7065
7066#define NR_BUNDLES 10
7067
30028c85 7068/* A list of names of all available bundles. */
2130b7fb 7069
30028c85 7070static const char *bundle_name [NR_BUNDLES] =
2130b7fb 7071{
30028c85
VM
7072 ".mii",
7073 ".mmi",
7074 ".mfi",
7075 ".mmf",
2130b7fb 7076#if NR_BUNDLES == 10
30028c85
VM
7077 ".bbb",
7078 ".mbb",
2130b7fb 7079#endif
30028c85
VM
7080 ".mib",
7081 ".mmb",
7082 ".mfb",
7083 ".mlx"
2130b7fb
BS
7084};
7085
30028c85 7086/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 7087
30028c85 7088int ia64_final_schedule = 0;
2130b7fb 7089
35fd3193 7090/* Codes of the corresponding queried units: */
2130b7fb 7091
30028c85
VM
7092static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7093static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 7094
30028c85
VM
7095static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7096static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 7097
30028c85
VM
7098static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7099
7100/* The following variable value is an insn group barrier. */
7101
dd3d2b35 7102static rtx_insn *dfa_stop_insn;
30028c85
VM
7103
7104/* The following variable value is the last issued insn. */
7105
b32d5189 7106static rtx_insn *last_scheduled_insn;
30028c85 7107
30028c85
VM
7108/* The following variable value is pointer to a DFA state used as
7109 temporary variable. */
7110
7111static state_t temp_dfa_state = NULL;
7112
7113/* The following variable value is DFA state after issuing the last
7114 insn. */
7115
7116static state_t prev_cycle_state = NULL;
7117
7118/* The following array element values are TRUE if the corresponding
9e4f94de 7119 insn requires to add stop bits before it. */
30028c85 7120
048d0d36
MK
7121static char *stops_p = NULL;
7122
30028c85
VM
7123/* The following variable is used to set up the mentioned above array. */
7124
7125static int stop_before_p = 0;
7126
7127/* The following variable value is length of the arrays `clocks' and
7128 `add_cycles'. */
7129
7130static int clocks_length;
7131
048d0d36
MK
7132/* The following variable value is number of data speculations in progress. */
7133static int pending_data_specs = 0;
7134
388092d5
AB
7135/* Number of memory references on current and three future processor cycles. */
7136static char mem_ops_in_group[4];
7137
7138/* Number of current processor cycle (from scheduler's point of view). */
7139static int current_cycle;
7140
647d790d 7141static rtx ia64_single_set (rtx_insn *);
9c808aad 7142static void ia64_emit_insn_before (rtx, rtx);
2130b7fb
BS
7143
7144/* Map a bundle number to its pseudo-op. */
7145
7146const char *
9c808aad 7147get_bundle_name (int b)
2130b7fb 7148{
30028c85 7149 return bundle_name[b];
2130b7fb
BS
7150}
7151
2130b7fb
BS
7152
7153/* Return the maximum number of instructions a cpu can issue. */
7154
c237e94a 7155static int
9c808aad 7156ia64_issue_rate (void)
2130b7fb
BS
7157{
7158 return 6;
7159}
7160
7161/* Helper function - like single_set, but look inside COND_EXEC. */
7162
7163static rtx
647d790d 7164ia64_single_set (rtx_insn *insn)
2130b7fb 7165{
30fa7e33 7166 rtx x = PATTERN (insn), ret;
2130b7fb
BS
7167 if (GET_CODE (x) == COND_EXEC)
7168 x = COND_EXEC_CODE (x);
7169 if (GET_CODE (x) == SET)
7170 return x;
bdbe5b8d
RH
7171
7172 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7173 Although they are not classical single set, the second set is there just
7174 to protect it from moving past FP-relative stack accesses. */
7175 switch (recog_memoized (insn))
30fa7e33 7176 {
bdbe5b8d 7177 case CODE_FOR_prologue_allocate_stack:
9eb8c09f 7178 case CODE_FOR_prologue_allocate_stack_pr:
bdbe5b8d 7179 case CODE_FOR_epilogue_deallocate_stack:
9eb8c09f 7180 case CODE_FOR_epilogue_deallocate_stack_pr:
bdbe5b8d
RH
7181 ret = XVECEXP (x, 0, 0);
7182 break;
7183
7184 default:
7185 ret = single_set_2 (insn, x);
7186 break;
30fa7e33 7187 }
bdbe5b8d 7188
30fa7e33 7189 return ret;
2130b7fb
BS
7190}
7191
388092d5
AB
7192/* Adjust the cost of a scheduling dependency.
7193 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7194 COST is the current cost, DW is dependency weakness. */
c237e94a 7195static int
ac44248e
DM
7196ia64_adjust_cost_2 (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7197 int cost, dw_t dw)
2130b7fb 7198{
388092d5 7199 enum reg_note dep_type = (enum reg_note) dep_type1;
2130b7fb
BS
7200 enum attr_itanium_class dep_class;
7201 enum attr_itanium_class insn_class;
2130b7fb 7202
2130b7fb 7203 insn_class = ia64_safe_itanium_class (insn);
30028c85 7204 dep_class = ia64_safe_itanium_class (dep_insn);
388092d5
AB
7205
7206 /* Treat true memory dependencies separately. Ignore apparent true
7207 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7208 if (dep_type == REG_DEP_TRUE
7209 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7210 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7211 return 0;
7212
7213 if (dw == MIN_DEP_WEAK)
7214 /* Store and load are likely to alias, use higher cost to avoid stall. */
7215 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7216 else if (dw > MIN_DEP_WEAK)
7217 {
7218 /* Store and load are less likely to alias. */
7219 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7220 /* Assume there will be no cache conflict for floating-point data.
7221 For integer data, L1 conflict penalty is huge (17 cycles), so we
7222 never assume it will not cause a conflict. */
7223 return 0;
7224 else
7225 return cost;
7226 }
7227
7228 if (dep_type != REG_DEP_OUTPUT)
7229 return cost;
7230
30028c85
VM
7231 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7232 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
7233 return 0;
7234
2130b7fb
BS
7235 return cost;
7236}
7237
14d118d6
DM
7238/* Like emit_insn_before, but skip cycle_display notes.
7239 ??? When cycle display notes are implemented, update this. */
7240
7241static void
9c808aad 7242ia64_emit_insn_before (rtx insn, rtx before)
14d118d6
DM
7243{
7244 emit_insn_before (insn, before);
7245}
7246
30028c85
VM
7247/* The following function marks insns who produce addresses for load
7248 and store insns. Such insns will be placed into M slots because it
7249 decrease latency time for Itanium1 (see function
7250 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
7251
7252static void
ce1ce33a 7253ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
2130b7fb 7254{
ce1ce33a 7255 rtx_insn *insn, *next, *next_tail;
9c808aad 7256
f12b785d
RH
7257 /* Before reload, which_alternative is not set, which means that
7258 ia64_safe_itanium_class will produce wrong results for (at least)
7259 move instructions. */
7260 if (!reload_completed)
7261 return;
7262
30028c85
VM
7263 next_tail = NEXT_INSN (tail);
7264 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7265 if (INSN_P (insn))
7266 insn->call = 0;
7267 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7268 if (INSN_P (insn)
7269 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7270 {
e2f6ff94
MK
7271 sd_iterator_def sd_it;
7272 dep_t dep;
7273 bool has_mem_op_consumer_p = false;
b198261f 7274
e2f6ff94 7275 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
30028c85 7276 {
a71aef0b
JB
7277 enum attr_itanium_class c;
7278
e2f6ff94 7279 if (DEP_TYPE (dep) != REG_DEP_TRUE)
f12b785d 7280 continue;
b198261f 7281
e2f6ff94 7282 next = DEP_CON (dep);
a71aef0b
JB
7283 c = ia64_safe_itanium_class (next);
7284 if ((c == ITANIUM_CLASS_ST
7285 || c == ITANIUM_CLASS_STF)
30028c85 7286 && ia64_st_address_bypass_p (insn, next))
e2f6ff94
MK
7287 {
7288 has_mem_op_consumer_p = true;
7289 break;
7290 }
a71aef0b
JB
7291 else if ((c == ITANIUM_CLASS_LD
7292 || c == ITANIUM_CLASS_FLD
7293 || c == ITANIUM_CLASS_FLDP)
30028c85 7294 && ia64_ld_address_bypass_p (insn, next))
e2f6ff94
MK
7295 {
7296 has_mem_op_consumer_p = true;
7297 break;
7298 }
30028c85 7299 }
e2f6ff94
MK
7300
7301 insn->call = has_mem_op_consumer_p;
30028c85
VM
7302 }
7303}
2130b7fb 7304
30028c85 7305/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 7306
30028c85 7307static void
9c808aad
AJ
7308ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7309 int sched_verbose ATTRIBUTE_UNUSED,
7310 int max_ready ATTRIBUTE_UNUSED)
30028c85
VM
7311{
7312#ifdef ENABLE_CHECKING
b32d5189 7313 rtx_insn *insn;
9c808aad 7314
388092d5 7315 if (!sel_sched_p () && reload_completed)
30028c85
VM
7316 for (insn = NEXT_INSN (current_sched_info->prev_head);
7317 insn != current_sched_info->next_tail;
7318 insn = NEXT_INSN (insn))
e820471b 7319 gcc_assert (!SCHED_GROUP_P (insn));
30028c85 7320#endif
b32d5189 7321 last_scheduled_insn = NULL;
30028c85 7322 init_insn_group_barriers ();
388092d5
AB
7323
7324 current_cycle = 0;
7325 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
2130b7fb
BS
7326}
7327
048d0d36
MK
7328/* We're beginning a scheduling pass. Check assertion. */
7329
7330static void
7331ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7332 int sched_verbose ATTRIBUTE_UNUSED,
7333 int max_ready ATTRIBUTE_UNUSED)
7334{
388092d5 7335 gcc_assert (pending_data_specs == 0);
048d0d36
MK
7336}
7337
7338/* Scheduling pass is now finished. Free/reset static variable. */
7339static void
7340ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7341 int sched_verbose ATTRIBUTE_UNUSED)
7342{
388092d5
AB
7343 gcc_assert (pending_data_specs == 0);
7344}
7345
7346/* Return TRUE if INSN is a load (either normal or speculative, but not a
7347 speculation check), FALSE otherwise. */
7348static bool
647d790d 7349is_load_p (rtx_insn *insn)
388092d5
AB
7350{
7351 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7352
7353 return
7354 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7355 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7356}
7357
7358/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7359 (taking account for 3-cycle cache reference postponing for stores: Intel
7360 Itanium 2 Reference Manual for Software Development and Optimization,
7361 6.7.3.1). */
7362static void
647d790d 7363record_memory_reference (rtx_insn *insn)
388092d5
AB
7364{
7365 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7366
7367 switch (insn_class) {
7368 case ITANIUM_CLASS_FLD:
7369 case ITANIUM_CLASS_LD:
7370 mem_ops_in_group[current_cycle % 4]++;
7371 break;
7372 case ITANIUM_CLASS_STF:
7373 case ITANIUM_CLASS_ST:
7374 mem_ops_in_group[(current_cycle + 3) % 4]++;
7375 break;
7376 default:;
7377 }
048d0d36
MK
7378}
7379
30028c85
VM
7380/* We are about to being issuing insns for this clock cycle.
7381 Override the default sort algorithm to better slot instructions. */
2130b7fb 7382
30028c85 7383static int
ce1ce33a 7384ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
388092d5 7385 int *pn_ready, int clock_var,
9c808aad 7386 int reorder_type)
2130b7fb 7387{
30028c85
VM
7388 int n_asms;
7389 int n_ready = *pn_ready;
ce1ce33a
DM
7390 rtx_insn **e_ready = ready + n_ready;
7391 rtx_insn **insnp;
2130b7fb 7392
30028c85
VM
7393 if (sched_verbose)
7394 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 7395
30028c85 7396 if (reorder_type == 0)
2130b7fb 7397 {
30028c85
VM
7398 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7399 n_asms = 0;
7400 for (insnp = ready; insnp < e_ready; insnp++)
7401 if (insnp < e_ready)
7402 {
ce1ce33a 7403 rtx_insn *insn = *insnp;
30028c85
VM
7404 enum attr_type t = ia64_safe_type (insn);
7405 if (t == TYPE_UNKNOWN)
7406 {
7407 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7408 || asm_noperands (PATTERN (insn)) >= 0)
7409 {
ce1ce33a 7410 rtx_insn *lowest = ready[n_asms];
30028c85
VM
7411 ready[n_asms] = insn;
7412 *insnp = lowest;
7413 n_asms++;
7414 }
7415 else
7416 {
ce1ce33a 7417 rtx_insn *highest = ready[n_ready - 1];
30028c85
VM
7418 ready[n_ready - 1] = insn;
7419 *insnp = highest;
7420 return 1;
7421 }
7422 }
7423 }
98d2b17e 7424
30028c85 7425 if (n_asms < n_ready)
98d2b17e 7426 {
30028c85
VM
7427 /* Some normal insns to process. Skip the asms. */
7428 ready += n_asms;
7429 n_ready -= n_asms;
98d2b17e 7430 }
30028c85
VM
7431 else if (n_ready > 0)
7432 return 1;
2130b7fb
BS
7433 }
7434
30028c85 7435 if (ia64_final_schedule)
2130b7fb 7436 {
30028c85
VM
7437 int deleted = 0;
7438 int nr_need_stop = 0;
7439
7440 for (insnp = ready; insnp < e_ready; insnp++)
c1bc6ca8 7441 if (safe_group_barrier_needed (*insnp))
30028c85 7442 nr_need_stop++;
9c808aad 7443
30028c85
VM
7444 if (reorder_type == 1 && n_ready == nr_need_stop)
7445 return 0;
7446 if (reorder_type == 0)
7447 return 1;
7448 insnp = e_ready;
7449 /* Move down everything that needs a stop bit, preserving
7450 relative order. */
7451 while (insnp-- > ready + deleted)
7452 while (insnp >= ready + deleted)
7453 {
ce1ce33a 7454 rtx_insn *insn = *insnp;
c1bc6ca8 7455 if (! safe_group_barrier_needed (insn))
30028c85
VM
7456 break;
7457 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7458 *ready = insn;
7459 deleted++;
7460 }
7461 n_ready -= deleted;
7462 ready += deleted;
2130b7fb 7463 }
2130b7fb 7464
388092d5
AB
7465 current_cycle = clock_var;
7466 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7467 {
7468 int moved = 0;
7469
7470 insnp = e_ready;
7471 /* Move down loads/stores, preserving relative order. */
7472 while (insnp-- > ready + moved)
7473 while (insnp >= ready + moved)
7474 {
ce1ce33a 7475 rtx_insn *insn = *insnp;
388092d5
AB
7476 if (! is_load_p (insn))
7477 break;
7478 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7479 *ready = insn;
7480 moved++;
7481 }
7482 n_ready -= moved;
7483 ready += moved;
7484 }
7485
30028c85 7486 return 1;
2130b7fb 7487}
6b6c1201 7488
30028c85
VM
7489/* We are about to being issuing insns for this clock cycle. Override
7490 the default sort algorithm to better slot instructions. */
c65ebc55 7491
30028c85 7492static int
ce1ce33a
DM
7493ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7494 int *pn_ready, int clock_var)
2130b7fb 7495{
30028c85
VM
7496 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7497 pn_ready, clock_var, 0);
2130b7fb
BS
7498}
7499
30028c85
VM
7500/* Like ia64_sched_reorder, but called after issuing each insn.
7501 Override the default sort algorithm to better slot instructions. */
2130b7fb 7502
30028c85 7503static int
9c808aad 7504ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
ce1ce33a 7505 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
9c808aad 7506 int *pn_ready, int clock_var)
30028c85 7507{
30028c85
VM
7508 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7509 clock_var, 1);
2130b7fb
BS
7510}
7511
30028c85
VM
7512/* We are about to issue INSN. Return the number of insns left on the
7513 ready queue that can be issued this cycle. */
2130b7fb 7514
30028c85 7515static int
9c808aad
AJ
7516ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7517 int sched_verbose ATTRIBUTE_UNUSED,
ac44248e 7518 rtx_insn *insn,
9c808aad 7519 int can_issue_more ATTRIBUTE_UNUSED)
2130b7fb 7520{
388092d5 7521 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
048d0d36 7522 /* Modulo scheduling does not extend h_i_d when emitting
388092d5 7523 new instructions. Don't use h_i_d, if we don't have to. */
048d0d36
MK
7524 {
7525 if (DONE_SPEC (insn) & BEGIN_DATA)
7526 pending_data_specs++;
7527 if (CHECK_SPEC (insn) & BEGIN_DATA)
7528 pending_data_specs--;
7529 }
7530
b5b8b0ac
AO
7531 if (DEBUG_INSN_P (insn))
7532 return 1;
7533
30028c85
VM
7534 last_scheduled_insn = insn;
7535 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7536 if (reload_completed)
2130b7fb 7537 {
c1bc6ca8 7538 int needed = group_barrier_needed (insn);
e820471b
NS
7539
7540 gcc_assert (!needed);
b64925dc 7541 if (CALL_P (insn))
30028c85
VM
7542 init_insn_group_barriers ();
7543 stops_p [INSN_UID (insn)] = stop_before_p;
7544 stop_before_p = 0;
388092d5
AB
7545
7546 record_memory_reference (insn);
2130b7fb 7547 }
30028c85
VM
7548 return 1;
7549}
c65ebc55 7550
4960a0cb 7551/* We are choosing insn from the ready queue. Return zero if INSN
30028c85 7552 can be chosen. */
c65ebc55 7553
30028c85 7554static int
ac44248e 7555ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30028c85 7556{
388092d5 7557 gcc_assert (insn && INSN_P (insn));
048d0d36 7558
4960a0cb
MK
7559 /* Size of ALAT is 32. As far as we perform conservative
7560 data speculation, we keep ALAT half-empty. */
31815ed7 7561 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
4960a0cb 7562 return ready_index == 0 ? -1 : 1;
048d0d36 7563
4960a0cb
MK
7564 if (ready_index == 0)
7565 return 0;
7566
7567 if ((!reload_completed
7568 || !safe_group_barrier_needed (insn))
7569 && (!mflag_sched_mem_insns_hard_limit
7570 || !is_load_p (insn)
7571 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7572 return 0;
676cad4d
MK
7573
7574 return 1;
2130b7fb
BS
7575}
7576
30028c85
VM
7577/* The following variable value is pseudo-insn used by the DFA insn
7578 scheduler to change the DFA state when the simulated clock is
7579 increased. */
2130b7fb 7580
dd3d2b35 7581static rtx_insn *dfa_pre_cycle_insn;
2130b7fb 7582
388092d5
AB
7583/* Returns 1 when a meaningful insn was scheduled between the last group
7584 barrier and LAST. */
7585static int
b32d5189 7586scheduled_good_insn (rtx_insn *last)
388092d5
AB
7587{
7588 if (last && recog_memoized (last) >= 0)
7589 return 1;
7590
7591 for ( ;
7592 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7593 && !stops_p[INSN_UID (last)];
7594 last = PREV_INSN (last))
7595 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7596 the ebb we're scheduling. */
7597 if (INSN_P (last) && recog_memoized (last) >= 0)
7598 return 1;
7599
7600 return 0;
7601}
7602
1e5f1716 7603/* We are about to being issuing INSN. Return nonzero if we cannot
30028c85
VM
7604 issue it on given cycle CLOCK and return zero if we should not sort
7605 the ready queue on the next clock start. */
2130b7fb
BS
7606
7607static int
ac44248e 7608ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
9c808aad 7609 int clock, int *sort_p)
2130b7fb 7610{
e820471b 7611 gcc_assert (insn && INSN_P (insn));
b5b8b0ac
AO
7612
7613 if (DEBUG_INSN_P (insn))
7614 return 0;
7615
388092d5
AB
7616 /* When a group barrier is needed for insn, last_scheduled_insn
7617 should be set. */
7618 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7619 || last_scheduled_insn);
7620
7621 if ((reload_completed
7622 && (safe_group_barrier_needed (insn)
7623 || (mflag_sched_stop_bits_after_every_cycle
7624 && last_clock != clock
7625 && last_scheduled_insn
7626 && scheduled_good_insn (last_scheduled_insn))))
30028c85 7627 || (last_scheduled_insn
b64925dc 7628 && (CALL_P (last_scheduled_insn)
7b84aac0 7629 || unknown_for_bundling_p (last_scheduled_insn))))
2130b7fb 7630 {
30028c85 7631 init_insn_group_barriers ();
388092d5 7632
30028c85
VM
7633 if (verbose && dump)
7634 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7635 last_clock == clock ? " + cycle advance" : "");
388092d5 7636
30028c85 7637 stop_before_p = 1;
388092d5
AB
7638 current_cycle = clock;
7639 mem_ops_in_group[current_cycle % 4] = 0;
7640
30028c85 7641 if (last_clock == clock)
2130b7fb 7642 {
30028c85
VM
7643 state_transition (curr_state, dfa_stop_insn);
7644 if (TARGET_EARLY_STOP_BITS)
7645 *sort_p = (last_scheduled_insn == NULL_RTX
b64925dc 7646 || ! CALL_P (last_scheduled_insn));
30028c85
VM
7647 else
7648 *sort_p = 0;
7649 return 1;
7650 }
388092d5
AB
7651
7652 if (last_scheduled_insn)
25069b42 7653 {
7b84aac0 7654 if (unknown_for_bundling_p (last_scheduled_insn))
388092d5
AB
7655 state_reset (curr_state);
7656 else
7657 {
7658 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7659 state_transition (curr_state, dfa_stop_insn);
7660 state_transition (curr_state, dfa_pre_cycle_insn);
7661 state_transition (curr_state, NULL);
7662 }
25069b42 7663 }
30028c85 7664 }
30028c85 7665 return 0;
2130b7fb
BS
7666}
7667
048d0d36
MK
7668/* Implement targetm.sched.h_i_d_extended hook.
7669 Extend internal data structures. */
7670static void
7671ia64_h_i_d_extended (void)
7672{
048d0d36
MK
7673 if (stops_p != NULL)
7674 {
388092d5 7675 int new_clocks_length = get_max_uid () * 3 / 2;
5ead67f6 7676 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
048d0d36
MK
7677 clocks_length = new_clocks_length;
7678 }
7679}
388092d5
AB
7680\f
7681
7682/* This structure describes the data used by the backend to guide scheduling.
7683 When the current scheduling point is switched, this data should be saved
7684 and restored later, if the scheduler returns to this point. */
7685struct _ia64_sched_context
7686{
7687 state_t prev_cycle_state;
b32d5189 7688 rtx_insn *last_scheduled_insn;
388092d5
AB
7689 struct reg_write_state rws_sum[NUM_REGS];
7690 struct reg_write_state rws_insn[NUM_REGS];
7691 int first_instruction;
7692 int pending_data_specs;
7693 int current_cycle;
7694 char mem_ops_in_group[4];
7695};
7696typedef struct _ia64_sched_context *ia64_sched_context_t;
7697
7698/* Allocates a scheduling context. */
7699static void *
7700ia64_alloc_sched_context (void)
7701{
7702 return xmalloc (sizeof (struct _ia64_sched_context));
7703}
7704
7705/* Initializes the _SC context with clean data, if CLEAN_P, and from
7706 the global context otherwise. */
7707static void
7708ia64_init_sched_context (void *_sc, bool clean_p)
7709{
7710 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7711
7712 sc->prev_cycle_state = xmalloc (dfa_state_size);
7713 if (clean_p)
7714 {
7715 state_reset (sc->prev_cycle_state);
b32d5189 7716 sc->last_scheduled_insn = NULL;
388092d5
AB
7717 memset (sc->rws_sum, 0, sizeof (rws_sum));
7718 memset (sc->rws_insn, 0, sizeof (rws_insn));
7719 sc->first_instruction = 1;
7720 sc->pending_data_specs = 0;
7721 sc->current_cycle = 0;
7722 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7723 }
7724 else
7725 {
7726 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7727 sc->last_scheduled_insn = last_scheduled_insn;
7728 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7729 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7730 sc->first_instruction = first_instruction;
7731 sc->pending_data_specs = pending_data_specs;
7732 sc->current_cycle = current_cycle;
7733 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7734 }
7735}
7736
7737/* Sets the global scheduling context to the one pointed to by _SC. */
7738static void
7739ia64_set_sched_context (void *_sc)
7740{
7741 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7742
7743 gcc_assert (sc != NULL);
7744
7745 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7746 last_scheduled_insn = sc->last_scheduled_insn;
7747 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7748 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7749 first_instruction = sc->first_instruction;
7750 pending_data_specs = sc->pending_data_specs;
7751 current_cycle = sc->current_cycle;
7752 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7753}
7754
7755/* Clears the data in the _SC scheduling context. */
7756static void
7757ia64_clear_sched_context (void *_sc)
7758{
7759 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7760
7761 free (sc->prev_cycle_state);
7762 sc->prev_cycle_state = NULL;
7763}
7764
7765/* Frees the _SC scheduling context. */
7766static void
7767ia64_free_sched_context (void *_sc)
7768{
7769 gcc_assert (_sc != NULL);
7770
7771 free (_sc);
7772}
7773
7774typedef rtx (* gen_func_t) (rtx, rtx);
7775
7776/* Return a function that will generate a load of mode MODE_NO
7777 with speculation types TS. */
7778static gen_func_t
7779get_spec_load_gen_function (ds_t ts, int mode_no)
7780{
7781 static gen_func_t gen_ld_[] = {
7782 gen_movbi,
7783 gen_movqi_internal,
7784 gen_movhi_internal,
7785 gen_movsi_internal,
7786 gen_movdi_internal,
7787 gen_movsf_internal,
7788 gen_movdf_internal,
7789 gen_movxf_internal,
7790 gen_movti_internal,
7791 gen_zero_extendqidi2,
7792 gen_zero_extendhidi2,
7793 gen_zero_extendsidi2,
7794 };
7795
7796 static gen_func_t gen_ld_a[] = {
7797 gen_movbi_advanced,
7798 gen_movqi_advanced,
7799 gen_movhi_advanced,
7800 gen_movsi_advanced,
7801 gen_movdi_advanced,
7802 gen_movsf_advanced,
7803 gen_movdf_advanced,
7804 gen_movxf_advanced,
7805 gen_movti_advanced,
7806 gen_zero_extendqidi2_advanced,
7807 gen_zero_extendhidi2_advanced,
7808 gen_zero_extendsidi2_advanced,
7809 };
7810 static gen_func_t gen_ld_s[] = {
7811 gen_movbi_speculative,
7812 gen_movqi_speculative,
7813 gen_movhi_speculative,
7814 gen_movsi_speculative,
7815 gen_movdi_speculative,
7816 gen_movsf_speculative,
7817 gen_movdf_speculative,
7818 gen_movxf_speculative,
7819 gen_movti_speculative,
7820 gen_zero_extendqidi2_speculative,
7821 gen_zero_extendhidi2_speculative,
7822 gen_zero_extendsidi2_speculative,
7823 };
7824 static gen_func_t gen_ld_sa[] = {
7825 gen_movbi_speculative_advanced,
7826 gen_movqi_speculative_advanced,
7827 gen_movhi_speculative_advanced,
7828 gen_movsi_speculative_advanced,
7829 gen_movdi_speculative_advanced,
7830 gen_movsf_speculative_advanced,
7831 gen_movdf_speculative_advanced,
7832 gen_movxf_speculative_advanced,
7833 gen_movti_speculative_advanced,
7834 gen_zero_extendqidi2_speculative_advanced,
7835 gen_zero_extendhidi2_speculative_advanced,
7836 gen_zero_extendsidi2_speculative_advanced,
7837 };
7838 static gen_func_t gen_ld_s_a[] = {
7839 gen_movbi_speculative_a,
7840 gen_movqi_speculative_a,
7841 gen_movhi_speculative_a,
7842 gen_movsi_speculative_a,
7843 gen_movdi_speculative_a,
7844 gen_movsf_speculative_a,
7845 gen_movdf_speculative_a,
7846 gen_movxf_speculative_a,
7847 gen_movti_speculative_a,
7848 gen_zero_extendqidi2_speculative_a,
7849 gen_zero_extendhidi2_speculative_a,
7850 gen_zero_extendsidi2_speculative_a,
7851 };
7852
7853 gen_func_t *gen_ld;
7854
7855 if (ts & BEGIN_DATA)
7856 {
7857 if (ts & BEGIN_CONTROL)
7858 gen_ld = gen_ld_sa;
7859 else
7860 gen_ld = gen_ld_a;
7861 }
7862 else if (ts & BEGIN_CONTROL)
7863 {
7864 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7865 || ia64_needs_block_p (ts))
7866 gen_ld = gen_ld_s;
7867 else
7868 gen_ld = gen_ld_s_a;
7869 }
7870 else if (ts == 0)
7871 gen_ld = gen_ld_;
7872 else
7873 gcc_unreachable ();
7874
7875 return gen_ld[mode_no];
7876}
048d0d36 7877
ef4bddc2 7878/* Constants that help mapping 'machine_mode' to int. */
048d0d36
MK
7879enum SPEC_MODES
7880 {
7881 SPEC_MODE_INVALID = -1,
7882 SPEC_MODE_FIRST = 0,
7883 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7884 SPEC_MODE_FOR_EXTEND_LAST = 3,
7885 SPEC_MODE_LAST = 8
7886 };
7887
388092d5
AB
7888enum
7889 {
7890 /* Offset to reach ZERO_EXTEND patterns. */
7891 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7892 };
7893
048d0d36
MK
7894/* Return index of the MODE. */
7895static int
ef4bddc2 7896ia64_mode_to_int (machine_mode mode)
048d0d36
MK
7897{
7898 switch (mode)
7899 {
7900 case BImode: return 0; /* SPEC_MODE_FIRST */
7901 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7902 case HImode: return 2;
7903 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7904 case DImode: return 4;
7905 case SFmode: return 5;
7906 case DFmode: return 6;
7907 case XFmode: return 7;
7908 case TImode:
7909 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7910 mentioned in itanium[12].md. Predicate fp_register_operand also
7911 needs to be defined. Bottom line: better disable for now. */
7912 return SPEC_MODE_INVALID;
7913 default: return SPEC_MODE_INVALID;
7914 }
7915}
7916
7917/* Provide information about speculation capabilities. */
7918static void
7919ia64_set_sched_flags (spec_info_t spec_info)
7920{
7921 unsigned int *flags = &(current_sched_info->flags);
7922
7923 if (*flags & SCHED_RGN
388092d5
AB
7924 || *flags & SCHED_EBB
7925 || *flags & SEL_SCHED)
048d0d36
MK
7926 {
7927 int mask = 0;
7928
a57aee2a 7929 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
388092d5 7930 || (mflag_sched_ar_data_spec && reload_completed))
048d0d36
MK
7931 {
7932 mask |= BEGIN_DATA;
388092d5
AB
7933
7934 if (!sel_sched_p ()
7935 && ((mflag_sched_br_in_data_spec && !reload_completed)
7936 || (mflag_sched_ar_in_data_spec && reload_completed)))
048d0d36
MK
7937 mask |= BE_IN_DATA;
7938 }
7939
388092d5
AB
7940 if (mflag_sched_control_spec
7941 && (!sel_sched_p ()
7942 || reload_completed))
048d0d36
MK
7943 {
7944 mask |= BEGIN_CONTROL;
7945
388092d5 7946 if (!sel_sched_p () && mflag_sched_in_control_spec)
048d0d36
MK
7947 mask |= BE_IN_CONTROL;
7948 }
7949
7ab5df48
AB
7950 spec_info->mask = mask;
7951
048d0d36
MK
7952 if (mask)
7953 {
6fb5fa3c
DB
7954 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7955
7956 if (mask & BE_IN_SPEC)
7957 *flags |= NEW_BBS;
048d0d36 7958
048d0d36
MK
7959 spec_info->flags = 0;
7960
16d83dd6
MK
7961 if ((mask & CONTROL_SPEC)
7962 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7963 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
388092d5
AB
7964
7965 if (sched_verbose >= 1)
7966 spec_info->dump = sched_dump;
048d0d36
MK
7967 else
7968 spec_info->dump = 0;
7969
7970 if (mflag_sched_count_spec_in_critical_path)
7971 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7972 }
7973 }
cd510f15
AM
7974 else
7975 spec_info->mask = 0;
048d0d36
MK
7976}
7977
388092d5
AB
7978/* If INSN is an appropriate load return its mode.
7979 Return -1 otherwise. */
048d0d36 7980static int
647d790d 7981get_mode_no_for_insn (rtx_insn *insn)
388092d5
AB
7982{
7983 rtx reg, mem, mode_rtx;
7984 int mode_no;
048d0d36 7985 bool extend_p;
048d0d36 7986
388092d5 7987 extract_insn_cached (insn);
048d0d36 7988
388092d5
AB
7989 /* We use WHICH_ALTERNATIVE only after reload. This will
7990 guarantee that reload won't touch a speculative insn. */
f6ec1d11 7991
388092d5 7992 if (recog_data.n_operands != 2)
048d0d36
MK
7993 return -1;
7994
388092d5
AB
7995 reg = recog_data.operand[0];
7996 mem = recog_data.operand[1];
f6ec1d11 7997
388092d5
AB
7998 /* We should use MEM's mode since REG's mode in presence of
7999 ZERO_EXTEND will always be DImode. */
8000 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8001 /* Process non-speculative ld. */
8002 {
8003 if (!reload_completed)
8004 {
8005 /* Do not speculate into regs like ar.lc. */
8006 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8007 return -1;
8008
8009 if (!MEM_P (mem))
8010 return -1;
8011
8012 {
8013 rtx mem_reg = XEXP (mem, 0);
8014
8015 if (!REG_P (mem_reg))
8016 return -1;
8017 }
8018
8019 mode_rtx = mem;
8020 }
8021 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8022 {
8023 gcc_assert (REG_P (reg) && MEM_P (mem));
8024 mode_rtx = mem;
8025 }
8026 else
8027 return -1;
8028 }
8029 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8030 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8031 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8032 /* Process speculative ld or ld.c. */
048d0d36 8033 {
388092d5
AB
8034 gcc_assert (REG_P (reg) && MEM_P (mem));
8035 mode_rtx = mem;
048d0d36
MK
8036 }
8037 else
048d0d36 8038 {
388092d5 8039 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
048d0d36 8040
388092d5
AB
8041 if (attr_class == ITANIUM_CLASS_CHK_A
8042 || attr_class == ITANIUM_CLASS_CHK_S_I
8043 || attr_class == ITANIUM_CLASS_CHK_S_F)
8044 /* Process chk. */
8045 mode_rtx = reg;
8046 else
8047 return -1;
048d0d36 8048 }
f6ec1d11 8049
388092d5 8050 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
f6ec1d11 8051
388092d5 8052 if (mode_no == SPEC_MODE_INVALID)
048d0d36
MK
8053 return -1;
8054
388092d5
AB
8055 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8056
8057 if (extend_p)
8058 {
8059 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8060 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8061 return -1;
f6ec1d11 8062
388092d5
AB
8063 mode_no += SPEC_GEN_EXTEND_OFFSET;
8064 }
048d0d36 8065
388092d5 8066 return mode_no;
048d0d36
MK
8067}
8068
388092d5
AB
8069/* If X is an unspec part of a speculative load, return its code.
8070 Return -1 otherwise. */
8071static int
8072get_spec_unspec_code (const_rtx x)
8073{
8074 if (GET_CODE (x) != UNSPEC)
8075 return -1;
048d0d36 8076
048d0d36 8077 {
388092d5 8078 int code;
048d0d36 8079
388092d5 8080 code = XINT (x, 1);
048d0d36 8081
388092d5
AB
8082 switch (code)
8083 {
8084 case UNSPEC_LDA:
8085 case UNSPEC_LDS:
8086 case UNSPEC_LDS_A:
8087 case UNSPEC_LDSA:
8088 return code;
048d0d36 8089
388092d5
AB
8090 default:
8091 return -1;
8092 }
8093 }
8094}
048d0d36 8095
388092d5
AB
8096/* Implement skip_rtx_p hook. */
8097static bool
8098ia64_skip_rtx_p (const_rtx x)
8099{
8100 return get_spec_unspec_code (x) != -1;
8101}
048d0d36 8102
388092d5
AB
8103/* If INSN is a speculative load, return its UNSPEC code.
8104 Return -1 otherwise. */
8105static int
8106get_insn_spec_code (const_rtx insn)
8107{
8108 rtx pat, reg, mem;
048d0d36 8109
388092d5 8110 pat = PATTERN (insn);
048d0d36 8111
388092d5
AB
8112 if (GET_CODE (pat) == COND_EXEC)
8113 pat = COND_EXEC_CODE (pat);
048d0d36 8114
388092d5
AB
8115 if (GET_CODE (pat) != SET)
8116 return -1;
8117
8118 reg = SET_DEST (pat);
8119 if (!REG_P (reg))
8120 return -1;
8121
8122 mem = SET_SRC (pat);
8123 if (GET_CODE (mem) == ZERO_EXTEND)
8124 mem = XEXP (mem, 0);
8125
8126 return get_spec_unspec_code (mem);
8127}
8128
8129/* If INSN is a speculative load, return a ds with the speculation types.
8130 Otherwise [if INSN is a normal instruction] return 0. */
8131static ds_t
ac44248e 8132ia64_get_insn_spec_ds (rtx_insn *insn)
388092d5
AB
8133{
8134 int code = get_insn_spec_code (insn);
8135
8136 switch (code)
048d0d36 8137 {
388092d5
AB
8138 case UNSPEC_LDA:
8139 return BEGIN_DATA;
048d0d36 8140
388092d5
AB
8141 case UNSPEC_LDS:
8142 case UNSPEC_LDS_A:
8143 return BEGIN_CONTROL;
048d0d36 8144
388092d5
AB
8145 case UNSPEC_LDSA:
8146 return BEGIN_DATA | BEGIN_CONTROL;
048d0d36 8147
388092d5
AB
8148 default:
8149 return 0;
048d0d36 8150 }
388092d5
AB
8151}
8152
8153/* If INSN is a speculative load return a ds with the speculation types that
8154 will be checked.
8155 Otherwise [if INSN is a normal instruction] return 0. */
8156static ds_t
ac44248e 8157ia64_get_insn_checked_ds (rtx_insn *insn)
388092d5
AB
8158{
8159 int code = get_insn_spec_code (insn);
8160
8161 switch (code)
048d0d36 8162 {
388092d5
AB
8163 case UNSPEC_LDA:
8164 return BEGIN_DATA | BEGIN_CONTROL;
8165
8166 case UNSPEC_LDS:
8167 return BEGIN_CONTROL;
8168
8169 case UNSPEC_LDS_A:
8170 case UNSPEC_LDSA:
8171 return BEGIN_DATA | BEGIN_CONTROL;
8172
8173 default:
8174 return 0;
048d0d36 8175 }
388092d5 8176}
048d0d36 8177
388092d5
AB
8178/* If GEN_P is true, calculate the index of needed speculation check and return
8179 speculative pattern for INSN with speculative mode TS, machine mode
8180 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8181 If GEN_P is false, just calculate the index of needed speculation check. */
8182static rtx
8183ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8184{
8185 rtx pat, new_pat;
8186 gen_func_t gen_load;
048d0d36 8187
388092d5 8188 gen_load = get_spec_load_gen_function (ts, mode_no);
048d0d36 8189
388092d5
AB
8190 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8191 copy_rtx (recog_data.operand[1]));
048d0d36
MK
8192
8193 pat = PATTERN (insn);
8194 if (GET_CODE (pat) == COND_EXEC)
388092d5
AB
8195 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8196 new_pat);
048d0d36
MK
8197
8198 return new_pat;
8199}
8200
048d0d36 8201static bool
388092d5
AB
8202insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8203 ds_t ds ATTRIBUTE_UNUSED)
048d0d36 8204{
388092d5
AB
8205 return false;
8206}
048d0d36 8207
388092d5
AB
8208/* Implement targetm.sched.speculate_insn hook.
8209 Check if the INSN can be TS speculative.
8210 If 'no' - return -1.
8211 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8212 If current pattern of the INSN already provides TS speculation,
8213 return 0. */
8214static int
ac44248e 8215ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
388092d5
AB
8216{
8217 int mode_no;
8218 int res;
8219
8220 gcc_assert (!(ts & ~SPECULATIVE));
048d0d36 8221
388092d5
AB
8222 if (ia64_spec_check_p (insn))
8223 return -1;
048d0d36 8224
388092d5
AB
8225 if ((ts & BE_IN_SPEC)
8226 && !insn_can_be_in_speculative_p (insn, ts))
8227 return -1;
048d0d36 8228
388092d5 8229 mode_no = get_mode_no_for_insn (insn);
048d0d36 8230
388092d5
AB
8231 if (mode_no != SPEC_MODE_INVALID)
8232 {
8233 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8234 res = 0;
8235 else
8236 {
8237 res = 1;
8238 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8239 }
8240 }
8241 else
8242 res = -1;
048d0d36 8243
388092d5
AB
8244 return res;
8245}
048d0d36 8246
388092d5
AB
8247/* Return a function that will generate a check for speculation TS with mode
8248 MODE_NO.
8249 If simple check is needed, pass true for SIMPLE_CHECK_P.
8250 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8251static gen_func_t
8252get_spec_check_gen_function (ds_t ts, int mode_no,
8253 bool simple_check_p, bool clearing_check_p)
8254{
8255 static gen_func_t gen_ld_c_clr[] = {
048d0d36
MK
8256 gen_movbi_clr,
8257 gen_movqi_clr,
8258 gen_movhi_clr,
8259 gen_movsi_clr,
8260 gen_movdi_clr,
8261 gen_movsf_clr,
8262 gen_movdf_clr,
8263 gen_movxf_clr,
8264 gen_movti_clr,
8265 gen_zero_extendqidi2_clr,
8266 gen_zero_extendhidi2_clr,
8267 gen_zero_extendsidi2_clr,
388092d5
AB
8268 };
8269 static gen_func_t gen_ld_c_nc[] = {
8270 gen_movbi_nc,
8271 gen_movqi_nc,
8272 gen_movhi_nc,
8273 gen_movsi_nc,
8274 gen_movdi_nc,
8275 gen_movsf_nc,
8276 gen_movdf_nc,
8277 gen_movxf_nc,
8278 gen_movti_nc,
8279 gen_zero_extendqidi2_nc,
8280 gen_zero_extendhidi2_nc,
8281 gen_zero_extendsidi2_nc,
8282 };
8283 static gen_func_t gen_chk_a_clr[] = {
048d0d36
MK
8284 gen_advanced_load_check_clr_bi,
8285 gen_advanced_load_check_clr_qi,
8286 gen_advanced_load_check_clr_hi,
8287 gen_advanced_load_check_clr_si,
8288 gen_advanced_load_check_clr_di,
8289 gen_advanced_load_check_clr_sf,
8290 gen_advanced_load_check_clr_df,
8291 gen_advanced_load_check_clr_xf,
8292 gen_advanced_load_check_clr_ti,
8293 gen_advanced_load_check_clr_di,
8294 gen_advanced_load_check_clr_di,
8295 gen_advanced_load_check_clr_di,
388092d5
AB
8296 };
8297 static gen_func_t gen_chk_a_nc[] = {
8298 gen_advanced_load_check_nc_bi,
8299 gen_advanced_load_check_nc_qi,
8300 gen_advanced_load_check_nc_hi,
8301 gen_advanced_load_check_nc_si,
8302 gen_advanced_load_check_nc_di,
8303 gen_advanced_load_check_nc_sf,
8304 gen_advanced_load_check_nc_df,
8305 gen_advanced_load_check_nc_xf,
8306 gen_advanced_load_check_nc_ti,
8307 gen_advanced_load_check_nc_di,
8308 gen_advanced_load_check_nc_di,
8309 gen_advanced_load_check_nc_di,
8310 };
8311 static gen_func_t gen_chk_s[] = {
048d0d36
MK
8312 gen_speculation_check_bi,
8313 gen_speculation_check_qi,
8314 gen_speculation_check_hi,
8315 gen_speculation_check_si,
8316 gen_speculation_check_di,
8317 gen_speculation_check_sf,
8318 gen_speculation_check_df,
8319 gen_speculation_check_xf,
8320 gen_speculation_check_ti,
8321 gen_speculation_check_di,
8322 gen_speculation_check_di,
388092d5 8323 gen_speculation_check_di,
048d0d36
MK
8324 };
8325
388092d5 8326 gen_func_t *gen_check;
048d0d36 8327
388092d5 8328 if (ts & BEGIN_DATA)
048d0d36 8329 {
388092d5
AB
8330 /* We don't need recovery because even if this is ld.sa
8331 ALAT entry will be allocated only if NAT bit is set to zero.
8332 So it is enough to use ld.c here. */
8333
8334 if (simple_check_p)
8335 {
8336 gcc_assert (mflag_sched_spec_ldc);
8337
8338 if (clearing_check_p)
8339 gen_check = gen_ld_c_clr;
8340 else
8341 gen_check = gen_ld_c_nc;
8342 }
8343 else
8344 {
8345 if (clearing_check_p)
8346 gen_check = gen_chk_a_clr;
8347 else
8348 gen_check = gen_chk_a_nc;
8349 }
048d0d36 8350 }
388092d5 8351 else if (ts & BEGIN_CONTROL)
048d0d36 8352 {
388092d5
AB
8353 if (simple_check_p)
8354 /* We might want to use ld.sa -> ld.c instead of
8355 ld.s -> chk.s. */
048d0d36 8356 {
388092d5 8357 gcc_assert (!ia64_needs_block_p (ts));
048d0d36 8358
388092d5
AB
8359 if (clearing_check_p)
8360 gen_check = gen_ld_c_clr;
8361 else
8362 gen_check = gen_ld_c_nc;
8363 }
8364 else
8365 {
8366 gen_check = gen_chk_s;
048d0d36 8367 }
388092d5
AB
8368 }
8369 else
8370 gcc_unreachable ();
8371
8372 gcc_assert (mode_no >= 0);
8373 return gen_check[mode_no];
8374}
8375
8376/* Return nonzero, if INSN needs branchy recovery check. */
8377static bool
8378ia64_needs_block_p (ds_t ts)
8379{
8380 if (ts & BEGIN_DATA)
8381 return !mflag_sched_spec_ldc;
8382
8383 gcc_assert ((ts & BEGIN_CONTROL) != 0);
048d0d36 8384
388092d5
AB
8385 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8386}
8387
8e90de43 8388/* Generate (or regenerate) a recovery check for INSN. */
388092d5 8389static rtx
ac44248e 8390ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
388092d5
AB
8391{
8392 rtx op1, pat, check_pat;
8393 gen_func_t gen_check;
8394 int mode_no;
8395
8396 mode_no = get_mode_no_for_insn (insn);
8397 gcc_assert (mode_no >= 0);
8398
8399 if (label)
8400 op1 = label;
8401 else
8402 {
8403 gcc_assert (!ia64_needs_block_p (ds));
8404 op1 = copy_rtx (recog_data.operand[1]);
048d0d36 8405 }
388092d5
AB
8406
8407 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8408 true);
048d0d36 8409
388092d5 8410 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
048d0d36
MK
8411
8412 pat = PATTERN (insn);
8413 if (GET_CODE (pat) == COND_EXEC)
8414 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8415 check_pat);
8416
8417 return check_pat;
8418}
8419
8420/* Return nonzero, if X is branchy recovery check. */
8421static int
8422ia64_spec_check_p (rtx x)
8423{
8424 x = PATTERN (x);
8425 if (GET_CODE (x) == COND_EXEC)
8426 x = COND_EXEC_CODE (x);
8427 if (GET_CODE (x) == SET)
8428 return ia64_spec_check_src_p (SET_SRC (x));
8429 return 0;
8430}
8431
8432/* Return nonzero, if SRC belongs to recovery check. */
8433static int
8434ia64_spec_check_src_p (rtx src)
8435{
8436 if (GET_CODE (src) == IF_THEN_ELSE)
8437 {
8438 rtx t;
8439
8440 t = XEXP (src, 0);
8441 if (GET_CODE (t) == NE)
8442 {
8443 t = XEXP (t, 0);
8444
8445 if (GET_CODE (t) == UNSPEC)
8446 {
8447 int code;
8448
8449 code = XINT (t, 1);
8450
388092d5
AB
8451 if (code == UNSPEC_LDCCLR
8452 || code == UNSPEC_LDCNC
8453 || code == UNSPEC_CHKACLR
8454 || code == UNSPEC_CHKANC
8455 || code == UNSPEC_CHKS)
048d0d36
MK
8456 {
8457 gcc_assert (code != 0);
8458 return code;
8459 }
8460 }
8461 }
8462 }
8463 return 0;
8464}
30028c85 8465\f
2130b7fb 8466
30028c85
VM
8467/* The following page contains abstract data `bundle states' which are
8468 used for bundling insns (inserting nops and template generation). */
8469
8470/* The following describes state of insn bundling. */
8471
8472struct bundle_state
8473{
8474 /* Unique bundle state number to identify them in the debugging
8475 output */
8476 int unique_num;
b32d5189 8477 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
30028c85
VM
8478 /* number nops before and after the insn */
8479 short before_nops_num, after_nops_num;
8480 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8481 insn */
8482 int cost; /* cost of the state in cycles */
8483 int accumulated_insns_num; /* number of all previous insns including
8484 nops. L is considered as 2 insns */
8485 int branch_deviation; /* deviation of previous branches from 3rd slots */
388092d5 8486 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
30028c85
VM
8487 struct bundle_state *next; /* next state with the same insn_num */
8488 struct bundle_state *originator; /* originator (previous insn state) */
8489 /* All bundle states are in the following chain. */
8490 struct bundle_state *allocated_states_chain;
8491 /* The DFA State after issuing the insn and the nops. */
8492 state_t dfa_state;
8493};
2130b7fb 8494
30028c85 8495/* The following is map insn number to the corresponding bundle state. */
2130b7fb 8496
30028c85 8497static struct bundle_state **index_to_bundle_states;
2130b7fb 8498
30028c85 8499/* The unique number of next bundle state. */
2130b7fb 8500
30028c85 8501static int bundle_states_num;
2130b7fb 8502
30028c85 8503/* All allocated bundle states are in the following chain. */
2130b7fb 8504
30028c85 8505static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 8506
30028c85
VM
8507/* All allocated but not used bundle states are in the following
8508 chain. */
870f9ec0 8509
30028c85 8510static struct bundle_state *free_bundle_state_chain;
2130b7fb 8511
2130b7fb 8512
30028c85 8513/* The following function returns a free bundle state. */
2130b7fb 8514
30028c85 8515static struct bundle_state *
9c808aad 8516get_free_bundle_state (void)
30028c85
VM
8517{
8518 struct bundle_state *result;
2130b7fb 8519
30028c85 8520 if (free_bundle_state_chain != NULL)
2130b7fb 8521 {
30028c85
VM
8522 result = free_bundle_state_chain;
8523 free_bundle_state_chain = result->next;
2130b7fb 8524 }
30028c85 8525 else
2130b7fb 8526 {
5ead67f6 8527 result = XNEW (struct bundle_state);
30028c85
VM
8528 result->dfa_state = xmalloc (dfa_state_size);
8529 result->allocated_states_chain = allocated_bundle_states_chain;
8530 allocated_bundle_states_chain = result;
2130b7fb 8531 }
30028c85
VM
8532 result->unique_num = bundle_states_num++;
8533 return result;
9c808aad 8534
30028c85 8535}
2130b7fb 8536
30028c85 8537/* The following function frees given bundle state. */
2130b7fb 8538
30028c85 8539static void
9c808aad 8540free_bundle_state (struct bundle_state *state)
30028c85
VM
8541{
8542 state->next = free_bundle_state_chain;
8543 free_bundle_state_chain = state;
8544}
2130b7fb 8545
30028c85 8546/* Start work with abstract data `bundle states'. */
2130b7fb 8547
30028c85 8548static void
9c808aad 8549initiate_bundle_states (void)
30028c85
VM
8550{
8551 bundle_states_num = 0;
8552 free_bundle_state_chain = NULL;
8553 allocated_bundle_states_chain = NULL;
2130b7fb
BS
8554}
8555
30028c85 8556/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
8557
8558static void
9c808aad 8559finish_bundle_states (void)
2130b7fb 8560{
30028c85
VM
8561 struct bundle_state *curr_state, *next_state;
8562
8563 for (curr_state = allocated_bundle_states_chain;
8564 curr_state != NULL;
8565 curr_state = next_state)
2130b7fb 8566 {
30028c85
VM
8567 next_state = curr_state->allocated_states_chain;
8568 free (curr_state->dfa_state);
8569 free (curr_state);
2130b7fb 8570 }
2130b7fb
BS
8571}
8572
3a4f280b 8573/* Hashtable helpers. */
2130b7fb 8574
8d67ee55 8575struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
3a4f280b 8576{
67f58944
TS
8577 static inline hashval_t hash (const bundle_state *);
8578 static inline bool equal (const bundle_state *, const bundle_state *);
3a4f280b 8579};
2130b7fb 8580
30028c85 8581/* The function returns hash of BUNDLE_STATE. */
2130b7fb 8582
3a4f280b 8583inline hashval_t
67f58944 8584bundle_state_hasher::hash (const bundle_state *state)
30028c85 8585{
30028c85 8586 unsigned result, i;
2130b7fb 8587
30028c85
VM
8588 for (result = i = 0; i < dfa_state_size; i++)
8589 result += (((unsigned char *) state->dfa_state) [i]
8590 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8591 return result + state->insn_num;
8592}
2130b7fb 8593
30028c85 8594/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 8595
3a4f280b 8596inline bool
67f58944
TS
8597bundle_state_hasher::equal (const bundle_state *state1,
8598 const bundle_state *state2)
30028c85 8599{
30028c85
VM
8600 return (state1->insn_num == state2->insn_num
8601 && memcmp (state1->dfa_state, state2->dfa_state,
8602 dfa_state_size) == 0);
8603}
2130b7fb 8604
3a4f280b
LC
8605/* Hash table of the bundle states. The key is dfa_state and insn_num
8606 of the bundle states. */
8607
c203e8a7 8608static hash_table<bundle_state_hasher> *bundle_state_table;
3a4f280b 8609
30028c85
VM
8610/* The function inserts the BUNDLE_STATE into the hash table. The
8611 function returns nonzero if the bundle has been inserted into the
8612 table. The table contains the best bundle state with given key. */
2130b7fb 8613
30028c85 8614static int
9c808aad 8615insert_bundle_state (struct bundle_state *bundle_state)
30028c85 8616{
3a4f280b 8617 struct bundle_state **entry_ptr;
2130b7fb 8618
c203e8a7 8619 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
30028c85
VM
8620 if (*entry_ptr == NULL)
8621 {
8622 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8623 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
3a4f280b 8624 *entry_ptr = bundle_state;
30028c85 8625 return TRUE;
2130b7fb 8626 }
3a4f280b
LC
8627 else if (bundle_state->cost < (*entry_ptr)->cost
8628 || (bundle_state->cost == (*entry_ptr)->cost
8629 && ((*entry_ptr)->accumulated_insns_num
30028c85 8630 > bundle_state->accumulated_insns_num
3a4f280b 8631 || ((*entry_ptr)->accumulated_insns_num
30028c85 8632 == bundle_state->accumulated_insns_num
3a4f280b 8633 && ((*entry_ptr)->branch_deviation
388092d5 8634 > bundle_state->branch_deviation
3a4f280b 8635 || ((*entry_ptr)->branch_deviation
388092d5 8636 == bundle_state->branch_deviation
3a4f280b 8637 && (*entry_ptr)->middle_bundle_stops
388092d5 8638 > bundle_state->middle_bundle_stops))))))
9c808aad 8639
2130b7fb 8640 {
30028c85
VM
8641 struct bundle_state temp;
8642
3a4f280b
LC
8643 temp = **entry_ptr;
8644 **entry_ptr = *bundle_state;
8645 (*entry_ptr)->next = temp.next;
30028c85 8646 *bundle_state = temp;
2130b7fb 8647 }
30028c85
VM
8648 return FALSE;
8649}
2130b7fb 8650
30028c85
VM
8651/* Start work with the hash table. */
8652
8653static void
9c808aad 8654initiate_bundle_state_table (void)
30028c85 8655{
c203e8a7 8656 bundle_state_table = new hash_table<bundle_state_hasher> (50);
2130b7fb
BS
8657}
8658
30028c85 8659/* Finish work with the hash table. */
e4027dab
BS
8660
8661static void
9c808aad 8662finish_bundle_state_table (void)
e4027dab 8663{
c203e8a7
TS
8664 delete bundle_state_table;
8665 bundle_state_table = NULL;
e4027dab
BS
8666}
8667
30028c85 8668\f
a0a7b566 8669
30028c85
VM
8670/* The following variable is a insn `nop' used to check bundle states
8671 with different number of inserted nops. */
a0a7b566 8672
dd3d2b35 8673static rtx_insn *ia64_nop;
a0a7b566 8674
30028c85
VM
8675/* The following function tries to issue NOPS_NUM nops for the current
8676 state without advancing processor cycle. If it failed, the
8677 function returns FALSE and frees the current state. */
8678
8679static int
9c808aad 8680try_issue_nops (struct bundle_state *curr_state, int nops_num)
a0a7b566 8681{
30028c85 8682 int i;
a0a7b566 8683
30028c85
VM
8684 for (i = 0; i < nops_num; i++)
8685 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8686 {
8687 free_bundle_state (curr_state);
8688 return FALSE;
8689 }
8690 return TRUE;
8691}
a0a7b566 8692
30028c85
VM
8693/* The following function tries to issue INSN for the current
8694 state without advancing processor cycle. If it failed, the
8695 function returns FALSE and frees the current state. */
a0a7b566 8696
30028c85 8697static int
9c808aad 8698try_issue_insn (struct bundle_state *curr_state, rtx insn)
30028c85
VM
8699{
8700 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8701 {
8702 free_bundle_state (curr_state);
8703 return FALSE;
8704 }
8705 return TRUE;
8706}
a0a7b566 8707
30028c85
VM
8708/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8709 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
8710 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8711 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8712 If it was successful, the function creates new bundle state and
8713 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 8714
30028c85 8715static void
9c808aad 8716issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
b32d5189
DM
8717 rtx_insn *insn, int try_bundle_end_p,
8718 int only_bundle_end_p)
30028c85
VM
8719{
8720 struct bundle_state *curr_state;
8721
8722 curr_state = get_free_bundle_state ();
8723 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8724 curr_state->insn = insn;
8725 curr_state->insn_num = originator->insn_num + 1;
8726 curr_state->cost = originator->cost;
8727 curr_state->originator = originator;
8728 curr_state->before_nops_num = before_nops_num;
8729 curr_state->after_nops_num = 0;
8730 curr_state->accumulated_insns_num
8731 = originator->accumulated_insns_num + before_nops_num;
8732 curr_state->branch_deviation = originator->branch_deviation;
388092d5 8733 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
e820471b
NS
8734 gcc_assert (insn);
8735 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
30028c85 8736 {
e820471b 8737 gcc_assert (GET_MODE (insn) != TImode);
30028c85
VM
8738 if (!try_issue_nops (curr_state, before_nops_num))
8739 return;
8740 if (!try_issue_insn (curr_state, insn))
8741 return;
8742 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
388092d5
AB
8743 if (curr_state->accumulated_insns_num % 3 != 0)
8744 curr_state->middle_bundle_stops++;
30028c85
VM
8745 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8746 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 8747 {
30028c85
VM
8748 free_bundle_state (curr_state);
8749 return;
a0a7b566 8750 }
a0a7b566 8751 }
30028c85 8752 else if (GET_MODE (insn) != TImode)
a0a7b566 8753 {
30028c85
VM
8754 if (!try_issue_nops (curr_state, before_nops_num))
8755 return;
8756 if (!try_issue_insn (curr_state, insn))
8757 return;
f32360c7 8758 curr_state->accumulated_insns_num++;
7b84aac0 8759 gcc_assert (!unknown_for_bundling_p (insn));
e820471b 8760
30028c85
VM
8761 if (ia64_safe_type (insn) == TYPE_L)
8762 curr_state->accumulated_insns_num++;
8763 }
8764 else
8765 {
68e11b42
JW
8766 /* If this is an insn that must be first in a group, then don't allow
8767 nops to be emitted before it. Currently, alloc is the only such
8768 supported instruction. */
8769 /* ??? The bundling automatons should handle this for us, but they do
8770 not yet have support for the first_insn attribute. */
8771 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8772 {
8773 free_bundle_state (curr_state);
8774 return;
8775 }
8776
30028c85
VM
8777 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8778 state_transition (curr_state->dfa_state, NULL);
8779 curr_state->cost++;
8780 if (!try_issue_nops (curr_state, before_nops_num))
8781 return;
8782 if (!try_issue_insn (curr_state, insn))
8783 return;
f32360c7 8784 curr_state->accumulated_insns_num++;
7b84aac0 8785 if (unknown_for_bundling_p (insn))
f32360c7
VM
8786 {
8787 /* Finish bundle containing asm insn. */
8788 curr_state->after_nops_num
8789 = 3 - curr_state->accumulated_insns_num % 3;
8790 curr_state->accumulated_insns_num
8791 += 3 - curr_state->accumulated_insns_num % 3;
8792 }
8793 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
8794 curr_state->accumulated_insns_num++;
8795 }
8796 if (ia64_safe_type (insn) == TYPE_B)
8797 curr_state->branch_deviation
8798 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8799 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8800 {
f32360c7 8801 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 8802 {
30028c85
VM
8803 state_t dfa_state;
8804 struct bundle_state *curr_state1;
8805 struct bundle_state *allocated_states_chain;
8806
8807 curr_state1 = get_free_bundle_state ();
8808 dfa_state = curr_state1->dfa_state;
8809 allocated_states_chain = curr_state1->allocated_states_chain;
8810 *curr_state1 = *curr_state;
8811 curr_state1->dfa_state = dfa_state;
8812 curr_state1->allocated_states_chain = allocated_states_chain;
8813 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8814 dfa_state_size);
8815 curr_state = curr_state1;
a0a7b566 8816 }
30028c85
VM
8817 if (!try_issue_nops (curr_state,
8818 3 - curr_state->accumulated_insns_num % 3))
8819 return;
8820 curr_state->after_nops_num
8821 = 3 - curr_state->accumulated_insns_num % 3;
8822 curr_state->accumulated_insns_num
8823 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 8824 }
30028c85
VM
8825 if (!insert_bundle_state (curr_state))
8826 free_bundle_state (curr_state);
8827 return;
8828}
e013f3c7 8829
30028c85
VM
8830/* The following function returns position in the two window bundle
8831 for given STATE. */
8832
8833static int
9c808aad 8834get_max_pos (state_t state)
30028c85
VM
8835{
8836 if (cpu_unit_reservation_p (state, pos_6))
8837 return 6;
8838 else if (cpu_unit_reservation_p (state, pos_5))
8839 return 5;
8840 else if (cpu_unit_reservation_p (state, pos_4))
8841 return 4;
8842 else if (cpu_unit_reservation_p (state, pos_3))
8843 return 3;
8844 else if (cpu_unit_reservation_p (state, pos_2))
8845 return 2;
8846 else if (cpu_unit_reservation_p (state, pos_1))
8847 return 1;
8848 else
8849 return 0;
a0a7b566
BS
8850}
8851
30028c85
VM
8852/* The function returns code of a possible template for given position
8853 and state. The function should be called only with 2 values of
96ddf8ef
VM
8854 position equal to 3 or 6. We avoid generating F NOPs by putting
8855 templates containing F insns at the end of the template search
8856 because undocumented anomaly in McKinley derived cores which can
8857 cause stalls if an F-unit insn (including a NOP) is issued within a
8858 six-cycle window after reading certain application registers (such
8859 as ar.bsp). Furthermore, power-considerations also argue against
8860 the use of F-unit instructions unless they're really needed. */
2130b7fb 8861
c237e94a 8862static int
9c808aad 8863get_template (state_t state, int pos)
2130b7fb 8864{
30028c85 8865 switch (pos)
2130b7fb 8866 {
30028c85 8867 case 3:
96ddf8ef 8868 if (cpu_unit_reservation_p (state, _0mmi_))
30028c85 8869 return 1;
96ddf8ef
VM
8870 else if (cpu_unit_reservation_p (state, _0mii_))
8871 return 0;
30028c85
VM
8872 else if (cpu_unit_reservation_p (state, _0mmb_))
8873 return 7;
96ddf8ef
VM
8874 else if (cpu_unit_reservation_p (state, _0mib_))
8875 return 6;
8876 else if (cpu_unit_reservation_p (state, _0mbb_))
8877 return 5;
8878 else if (cpu_unit_reservation_p (state, _0bbb_))
8879 return 4;
8880 else if (cpu_unit_reservation_p (state, _0mmf_))
8881 return 3;
8882 else if (cpu_unit_reservation_p (state, _0mfi_))
8883 return 2;
30028c85
VM
8884 else if (cpu_unit_reservation_p (state, _0mfb_))
8885 return 8;
8886 else if (cpu_unit_reservation_p (state, _0mlx_))
8887 return 9;
8888 else
e820471b 8889 gcc_unreachable ();
30028c85 8890 case 6:
96ddf8ef 8891 if (cpu_unit_reservation_p (state, _1mmi_))
30028c85 8892 return 1;
96ddf8ef
VM
8893 else if (cpu_unit_reservation_p (state, _1mii_))
8894 return 0;
30028c85
VM
8895 else if (cpu_unit_reservation_p (state, _1mmb_))
8896 return 7;
96ddf8ef
VM
8897 else if (cpu_unit_reservation_p (state, _1mib_))
8898 return 6;
8899 else if (cpu_unit_reservation_p (state, _1mbb_))
8900 return 5;
8901 else if (cpu_unit_reservation_p (state, _1bbb_))
8902 return 4;
8903 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8904 return 3;
8905 else if (cpu_unit_reservation_p (state, _1mfi_))
8906 return 2;
30028c85
VM
8907 else if (cpu_unit_reservation_p (state, _1mfb_))
8908 return 8;
8909 else if (cpu_unit_reservation_p (state, _1mlx_))
8910 return 9;
8911 else
e820471b 8912 gcc_unreachable ();
30028c85 8913 default:
e820471b 8914 gcc_unreachable ();
2130b7fb 8915 }
30028c85 8916}
2130b7fb 8917
388092d5 8918/* True when INSN is important for bundling. */
7b84aac0 8919
388092d5 8920static bool
647d790d 8921important_for_bundling_p (rtx_insn *insn)
388092d5
AB
8922{
8923 return (INSN_P (insn)
8924 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8925 && GET_CODE (PATTERN (insn)) != USE
8926 && GET_CODE (PATTERN (insn)) != CLOBBER);
8927}
8928
30028c85
VM
8929/* The following function returns an insn important for insn bundling
8930 followed by INSN and before TAIL. */
a0a7b566 8931
b32d5189
DM
8932static rtx_insn *
8933get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
30028c85
VM
8934{
8935 for (; insn && insn != tail; insn = NEXT_INSN (insn))
388092d5 8936 if (important_for_bundling_p (insn))
30028c85 8937 return insn;
b32d5189 8938 return NULL;
30028c85
VM
8939}
8940
7b84aac0
EB
8941/* True when INSN is unknown, but important, for bundling. */
8942
8943static bool
647d790d 8944unknown_for_bundling_p (rtx_insn *insn)
7b84aac0
EB
8945{
8946 return (INSN_P (insn)
8947 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8948 && GET_CODE (PATTERN (insn)) != USE
8949 && GET_CODE (PATTERN (insn)) != CLOBBER);
8950}
8951
4a4cd49c
JJ
8952/* Add a bundle selector TEMPLATE0 before INSN. */
8953
8954static void
b32d5189 8955ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
4a4cd49c
JJ
8956{
8957 rtx b = gen_bundle_selector (GEN_INT (template0));
8958
8959 ia64_emit_insn_before (b, insn);
8960#if NR_BUNDLES == 10
8961 if ((template0 == 4 || template0 == 5)
d5fabb58 8962 && ia64_except_unwind_info (&global_options) == UI_TARGET)
4a4cd49c
JJ
8963 {
8964 int i;
8965 rtx note = NULL_RTX;
8966
8967 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8968 first or second slot. If it is and has REG_EH_NOTE set, copy it
8969 to following nops, as br.call sets rp to the address of following
8970 bundle and therefore an EH region end must be on a bundle
8971 boundary. */
8972 insn = PREV_INSN (insn);
8973 for (i = 0; i < 3; i++)
8974 {
8975 do
8976 insn = next_active_insn (insn);
b64925dc 8977 while (NONJUMP_INSN_P (insn)
4a4cd49c 8978 && get_attr_empty (insn) == EMPTY_YES);
b64925dc 8979 if (CALL_P (insn))
4a4cd49c
JJ
8980 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8981 else if (note)
8982 {
8983 int code;
8984
8985 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8986 || code == CODE_FOR_nop_b);
8987 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8988 note = NULL_RTX;
8989 else
bbbbb16a 8990 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
4a4cd49c
JJ
8991 }
8992 }
8993 }
8994#endif
8995}
8996
c856f536
VM
8997/* The following function does insn bundling. Bundling means
8998 inserting templates and nop insns to fit insn groups into permitted
8999 templates. Instruction scheduling uses NDFA (non-deterministic
9000 finite automata) encoding informations about the templates and the
9001 inserted nops. Nondeterminism of the automata permits follows
9002 all possible insn sequences very fast.
9003
9004 Unfortunately it is not possible to get information about inserting
9005 nop insns and used templates from the automata states. The
9006 automata only says that we can issue an insn possibly inserting
9007 some nops before it and using some template. Therefore insn
9008 bundling in this function is implemented by using DFA
048d0d36 9009 (deterministic finite automata). We follow all possible insn
c856f536
VM
9010 sequences by inserting 0-2 nops (that is what the NDFA describe for
9011 insn scheduling) before/after each insn being bundled. We know the
9012 start of simulated processor cycle from insn scheduling (insn
9013 starting a new cycle has TImode).
9014
9015 Simple implementation of insn bundling would create enormous
9016 number of possible insn sequences satisfying information about new
9017 cycle ticks taken from the insn scheduling. To make the algorithm
9018 practical we use dynamic programming. Each decision (about
9019 inserting nops and implicitly about previous decisions) is described
9020 by structure bundle_state (see above). If we generate the same
9021 bundle state (key is automaton state after issuing the insns and
9022 nops for it), we reuse already generated one. As consequence we
1e5f1716 9023 reject some decisions which cannot improve the solution and
c856f536
VM
9024 reduce memory for the algorithm.
9025
9026 When we reach the end of EBB (extended basic block), we choose the
9027 best sequence and then, moving back in EBB, insert templates for
9028 the best alternative. The templates are taken from querying
9029 automaton state for each insn in chosen bundle states.
9030
9031 So the algorithm makes two (forward and backward) passes through
7400e46b 9032 EBB. */
a0a7b566 9033
30028c85 9034static void
b32d5189 9035bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
30028c85
VM
9036{
9037 struct bundle_state *curr_state, *next_state, *best_state;
b32d5189 9038 rtx_insn *insn, *next_insn;
30028c85 9039 int insn_num;
f32360c7 9040 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 9041 int pos = 0, max_pos, template0, template1;
b32d5189 9042 rtx_insn *b;
30028c85 9043 enum attr_type type;
2d1b811d 9044
30028c85 9045 insn_num = 0;
c856f536 9046 /* Count insns in the EBB. */
30028c85
VM
9047 for (insn = NEXT_INSN (prev_head_insn);
9048 insn && insn != tail;
9049 insn = NEXT_INSN (insn))
9050 if (INSN_P (insn))
9051 insn_num++;
9052 if (insn_num == 0)
9053 return;
9054 bundling_p = 1;
9055 dfa_clean_insn_cache ();
9056 initiate_bundle_state_table ();
5ead67f6 9057 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
ff482c8d 9058 /* First (forward) pass -- generation of bundle states. */
30028c85
VM
9059 curr_state = get_free_bundle_state ();
9060 curr_state->insn = NULL;
9061 curr_state->before_nops_num = 0;
9062 curr_state->after_nops_num = 0;
9063 curr_state->insn_num = 0;
9064 curr_state->cost = 0;
9065 curr_state->accumulated_insns_num = 0;
9066 curr_state->branch_deviation = 0;
388092d5 9067 curr_state->middle_bundle_stops = 0;
30028c85
VM
9068 curr_state->next = NULL;
9069 curr_state->originator = NULL;
9070 state_reset (curr_state->dfa_state);
9071 index_to_bundle_states [0] = curr_state;
9072 insn_num = 0;
c856f536 9073 /* Shift cycle mark if it is put on insn which could be ignored. */
30028c85
VM
9074 for (insn = NEXT_INSN (prev_head_insn);
9075 insn != tail;
9076 insn = NEXT_INSN (insn))
9077 if (INSN_P (insn)
7b84aac0 9078 && !important_for_bundling_p (insn)
30028c85 9079 && GET_MODE (insn) == TImode)
2130b7fb 9080 {
30028c85
VM
9081 PUT_MODE (insn, VOIDmode);
9082 for (next_insn = NEXT_INSN (insn);
9083 next_insn != tail;
9084 next_insn = NEXT_INSN (next_insn))
7b84aac0 9085 if (important_for_bundling_p (next_insn)
388092d5 9086 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
30028c85
VM
9087 {
9088 PUT_MODE (next_insn, TImode);
9089 break;
9090 }
2130b7fb 9091 }
048d0d36 9092 /* Forward pass: generation of bundle states. */
30028c85
VM
9093 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9094 insn != NULL_RTX;
9095 insn = next_insn)
1ad72cef 9096 {
7b84aac0 9097 gcc_assert (important_for_bundling_p (insn));
f32360c7 9098 type = ia64_safe_type (insn);
30028c85
VM
9099 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9100 insn_num++;
9101 index_to_bundle_states [insn_num] = NULL;
9102 for (curr_state = index_to_bundle_states [insn_num - 1];
9103 curr_state != NULL;
9104 curr_state = next_state)
f83594c4 9105 {
30028c85 9106 pos = curr_state->accumulated_insns_num % 3;
30028c85 9107 next_state = curr_state->next;
c856f536
VM
9108 /* We must fill up the current bundle in order to start a
9109 subsequent asm insn in a new bundle. Asm insn is always
9110 placed in a separate bundle. */
f32360c7
VM
9111 only_bundle_end_p
9112 = (next_insn != NULL_RTX
9113 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7b84aac0 9114 && unknown_for_bundling_p (next_insn));
c856f536
VM
9115 /* We may fill up the current bundle if it is the cycle end
9116 without a group barrier. */
30028c85 9117 bundle_end_p
f32360c7 9118 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
9119 || (GET_MODE (next_insn) == TImode
9120 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9121 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7400e46b 9122 || type == TYPE_S)
f32360c7
VM
9123 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9124 only_bundle_end_p);
9125 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9126 only_bundle_end_p);
9127 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9128 only_bundle_end_p);
f83594c4 9129 }
e820471b 9130 gcc_assert (index_to_bundle_states [insn_num]);
30028c85
VM
9131 for (curr_state = index_to_bundle_states [insn_num];
9132 curr_state != NULL;
9133 curr_state = curr_state->next)
9134 if (verbose >= 2 && dump)
9135 {
c856f536
VM
9136 /* This structure is taken from generated code of the
9137 pipeline hazard recognizer (see file insn-attrtab.c).
9138 Please don't forget to change the structure if a new
9139 automaton is added to .md file. */
30028c85
VM
9140 struct DFA_chip
9141 {
9142 unsigned short one_automaton_state;
9143 unsigned short oneb_automaton_state;
9144 unsigned short two_automaton_state;
9145 unsigned short twob_automaton_state;
9146 };
9c808aad 9147
30028c85
VM
9148 fprintf
9149 (dump,
388092d5 9150 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
30028c85
VM
9151 curr_state->unique_num,
9152 (curr_state->originator == NULL
9153 ? -1 : curr_state->originator->unique_num),
9154 curr_state->cost,
9155 curr_state->before_nops_num, curr_state->after_nops_num,
9156 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 9157 curr_state->middle_bundle_stops,
7400e46b 9158 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85
VM
9159 INSN_UID (insn));
9160 }
1ad72cef 9161 }
e820471b
NS
9162
9163 /* We should find a solution because the 2nd insn scheduling has
9164 found one. */
9165 gcc_assert (index_to_bundle_states [insn_num]);
c856f536 9166 /* Find a state corresponding to the best insn sequence. */
30028c85
VM
9167 best_state = NULL;
9168 for (curr_state = index_to_bundle_states [insn_num];
9169 curr_state != NULL;
9170 curr_state = curr_state->next)
c856f536
VM
9171 /* We are just looking at the states with fully filled up last
9172 bundle. The first we prefer insn sequences with minimal cost
9173 then with minimal inserted nops and finally with branch insns
9174 placed in the 3rd slots. */
30028c85
VM
9175 if (curr_state->accumulated_insns_num % 3 == 0
9176 && (best_state == NULL || best_state->cost > curr_state->cost
9177 || (best_state->cost == curr_state->cost
9178 && (curr_state->accumulated_insns_num
9179 < best_state->accumulated_insns_num
9180 || (curr_state->accumulated_insns_num
9181 == best_state->accumulated_insns_num
388092d5
AB
9182 && (curr_state->branch_deviation
9183 < best_state->branch_deviation
9184 || (curr_state->branch_deviation
9185 == best_state->branch_deviation
9186 && curr_state->middle_bundle_stops
9187 < best_state->middle_bundle_stops)))))))
30028c85 9188 best_state = curr_state;
c856f536 9189 /* Second (backward) pass: adding nops and templates. */
388092d5 9190 gcc_assert (best_state);
30028c85
VM
9191 insn_num = best_state->before_nops_num;
9192 template0 = template1 = -1;
9193 for (curr_state = best_state;
9194 curr_state->originator != NULL;
9195 curr_state = curr_state->originator)
9196 {
9197 insn = curr_state->insn;
7b84aac0 9198 asm_p = unknown_for_bundling_p (insn);
30028c85
VM
9199 insn_num++;
9200 if (verbose >= 2 && dump)
2130b7fb 9201 {
30028c85
VM
9202 struct DFA_chip
9203 {
9204 unsigned short one_automaton_state;
9205 unsigned short oneb_automaton_state;
9206 unsigned short two_automaton_state;
9207 unsigned short twob_automaton_state;
9208 };
9c808aad 9209
30028c85
VM
9210 fprintf
9211 (dump,
388092d5 9212 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
30028c85
VM
9213 curr_state->unique_num,
9214 (curr_state->originator == NULL
9215 ? -1 : curr_state->originator->unique_num),
9216 curr_state->cost,
9217 curr_state->before_nops_num, curr_state->after_nops_num,
9218 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 9219 curr_state->middle_bundle_stops,
7400e46b 9220 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85 9221 INSN_UID (insn));
2130b7fb 9222 }
c856f536
VM
9223 /* Find the position in the current bundle window. The window can
9224 contain at most two bundles. Two bundle window means that
9225 the processor will make two bundle rotation. */
30028c85 9226 max_pos = get_max_pos (curr_state->dfa_state);
c856f536
VM
9227 if (max_pos == 6
9228 /* The following (negative template number) means that the
9229 processor did one bundle rotation. */
9230 || (max_pos == 3 && template0 < 0))
2130b7fb 9231 {
c856f536
VM
9232 /* We are at the end of the window -- find template(s) for
9233 its bundle(s). */
30028c85
VM
9234 pos = max_pos;
9235 if (max_pos == 3)
9236 template0 = get_template (curr_state->dfa_state, 3);
9237 else
9238 {
9239 template1 = get_template (curr_state->dfa_state, 3);
9240 template0 = get_template (curr_state->dfa_state, 6);
9241 }
9242 }
9243 if (max_pos > 3 && template1 < 0)
c856f536 9244 /* It may happen when we have the stop inside a bundle. */
30028c85 9245 {
e820471b 9246 gcc_assert (pos <= 3);
30028c85
VM
9247 template1 = get_template (curr_state->dfa_state, 3);
9248 pos += 3;
9249 }
f32360c7 9250 if (!asm_p)
c856f536 9251 /* Emit nops after the current insn. */
f32360c7
VM
9252 for (i = 0; i < curr_state->after_nops_num; i++)
9253 {
b32d5189
DM
9254 rtx nop_pat = gen_nop ();
9255 rtx_insn *nop = emit_insn_after (nop_pat, insn);
f32360c7 9256 pos--;
e820471b 9257 gcc_assert (pos >= 0);
f32360c7
VM
9258 if (pos % 3 == 0)
9259 {
c856f536
VM
9260 /* We are at the start of a bundle: emit the template
9261 (it should be defined). */
e820471b 9262 gcc_assert (template0 >= 0);
4a4cd49c 9263 ia64_add_bundle_selector_before (template0, nop);
c856f536
VM
9264 /* If we have two bundle window, we make one bundle
9265 rotation. Otherwise template0 will be undefined
9266 (negative value). */
f32360c7
VM
9267 template0 = template1;
9268 template1 = -1;
9269 }
9270 }
c856f536
VM
9271 /* Move the position backward in the window. Group barrier has
9272 no slot. Asm insn takes all bundle. */
30028c85 9273 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7b84aac0 9274 && !unknown_for_bundling_p (insn))
30028c85 9275 pos--;
c856f536 9276 /* Long insn takes 2 slots. */
30028c85
VM
9277 if (ia64_safe_type (insn) == TYPE_L)
9278 pos--;
e820471b 9279 gcc_assert (pos >= 0);
30028c85
VM
9280 if (pos % 3 == 0
9281 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7b84aac0 9282 && !unknown_for_bundling_p (insn))
30028c85 9283 {
c856f536
VM
9284 /* The current insn is at the bundle start: emit the
9285 template. */
e820471b 9286 gcc_assert (template0 >= 0);
4a4cd49c 9287 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
9288 b = PREV_INSN (insn);
9289 insn = b;
68776c43 9290 /* See comment above in analogous place for emitting nops
c856f536 9291 after the insn. */
30028c85
VM
9292 template0 = template1;
9293 template1 = -1;
9294 }
c856f536 9295 /* Emit nops after the current insn. */
30028c85
VM
9296 for (i = 0; i < curr_state->before_nops_num; i++)
9297 {
b32d5189
DM
9298 rtx nop_pat = gen_nop ();
9299 ia64_emit_insn_before (nop_pat, insn);
9300 rtx_insn *nop = PREV_INSN (insn);
30028c85
VM
9301 insn = nop;
9302 pos--;
e820471b 9303 gcc_assert (pos >= 0);
30028c85
VM
9304 if (pos % 3 == 0)
9305 {
68776c43 9306 /* See comment above in analogous place for emitting nops
c856f536 9307 after the insn. */
e820471b 9308 gcc_assert (template0 >= 0);
4a4cd49c 9309 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
9310 b = PREV_INSN (insn);
9311 insn = b;
9312 template0 = template1;
9313 template1 = -1;
9314 }
2130b7fb
BS
9315 }
9316 }
388092d5
AB
9317
9318#ifdef ENABLE_CHECKING
9319 {
9320 /* Assert right calculation of middle_bundle_stops. */
9321 int num = best_state->middle_bundle_stops;
9322 bool start_bundle = true, end_bundle = false;
9323
9324 for (insn = NEXT_INSN (prev_head_insn);
9325 insn && insn != tail;
9326 insn = NEXT_INSN (insn))
9327 {
9328 if (!INSN_P (insn))
9329 continue;
9330 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9331 start_bundle = true;
9332 else
9333 {
b32d5189 9334 rtx_insn *next_insn;
388092d5
AB
9335
9336 for (next_insn = NEXT_INSN (insn);
9337 next_insn && next_insn != tail;
9338 next_insn = NEXT_INSN (next_insn))
9339 if (INSN_P (next_insn)
9340 && (ia64_safe_itanium_class (next_insn)
9341 != ITANIUM_CLASS_IGNORE
9342 || recog_memoized (next_insn)
9343 == CODE_FOR_bundle_selector)
9344 && GET_CODE (PATTERN (next_insn)) != USE
9345 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9346 break;
9347
9348 end_bundle = next_insn == NULL_RTX
9349 || next_insn == tail
9350 || (INSN_P (next_insn)
9351 && recog_memoized (next_insn)
9352 == CODE_FOR_bundle_selector);
9353 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9354 && !start_bundle && !end_bundle
9355 && next_insn
7b84aac0 9356 && !unknown_for_bundling_p (next_insn))
388092d5
AB
9357 num--;
9358
9359 start_bundle = false;
9360 }
9361 }
9362
9363 gcc_assert (num == 0);
9364 }
9365#endif
9366
30028c85
VM
9367 free (index_to_bundle_states);
9368 finish_bundle_state_table ();
9369 bundling_p = 0;
9370 dfa_clean_insn_cache ();
2130b7fb 9371}
c65ebc55 9372
30028c85
VM
9373/* The following function is called at the end of scheduling BB or
9374 EBB. After reload, it inserts stop bits and does insn bundling. */
9375
9376static void
9c808aad 9377ia64_sched_finish (FILE *dump, int sched_verbose)
c237e94a 9378{
30028c85
VM
9379 if (sched_verbose)
9380 fprintf (dump, "// Finishing schedule.\n");
9381 if (!reload_completed)
9382 return;
9383 if (reload_completed)
9384 {
9385 final_emit_insn_group_barriers (dump);
9386 bundling (dump, sched_verbose, current_sched_info->prev_head,
9387 current_sched_info->next_tail);
9388 if (sched_verbose && dump)
9389 fprintf (dump, "// finishing %d-%d\n",
9390 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9391 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9c808aad 9392
30028c85
VM
9393 return;
9394 }
c237e94a
ZW
9395}
9396
30028c85 9397/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 9398
30028c85 9399static void
9c808aad 9400final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
2130b7fb 9401{
dd3d2b35 9402 rtx_insn *insn;
30028c85 9403 int need_barrier_p = 0;
388092d5 9404 int seen_good_insn = 0;
2130b7fb 9405
30028c85 9406 init_insn_group_barriers ();
2130b7fb 9407
30028c85
VM
9408 for (insn = NEXT_INSN (current_sched_info->prev_head);
9409 insn != current_sched_info->next_tail;
9410 insn = NEXT_INSN (insn))
9411 {
b64925dc 9412 if (BARRIER_P (insn))
b395ddbe 9413 {
dd3d2b35 9414 rtx_insn *last = prev_active_insn (insn);
14d118d6 9415
30028c85 9416 if (! last)
b395ddbe 9417 continue;
34f0d87a 9418 if (JUMP_TABLE_DATA_P (last))
30028c85
VM
9419 last = prev_active_insn (last);
9420 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9421 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 9422
30028c85 9423 init_insn_group_barriers ();
388092d5 9424 seen_good_insn = 0;
30028c85 9425 need_barrier_p = 0;
b395ddbe 9426 }
b5b8b0ac 9427 else if (NONDEBUG_INSN_P (insn))
2130b7fb 9428 {
30028c85 9429 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 9430 {
30028c85 9431 init_insn_group_barriers ();
388092d5 9432 seen_good_insn = 0;
30028c85 9433 need_barrier_p = 0;
c65ebc55 9434 }
388092d5
AB
9435 else if (need_barrier_p || group_barrier_needed (insn)
9436 || (mflag_sched_stop_bits_after_every_cycle
9437 && GET_MODE (insn) == TImode
9438 && seen_good_insn))
2130b7fb 9439 {
30028c85
VM
9440 if (TARGET_EARLY_STOP_BITS)
9441 {
dd3d2b35 9442 rtx_insn *last;
9c808aad 9443
30028c85
VM
9444 for (last = insn;
9445 last != current_sched_info->prev_head;
9446 last = PREV_INSN (last))
9447 if (INSN_P (last) && GET_MODE (last) == TImode
9448 && stops_p [INSN_UID (last)])
9449 break;
9450 if (last == current_sched_info->prev_head)
9451 last = insn;
9452 last = prev_active_insn (last);
9453 if (last
9454 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9455 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9456 last);
9457 init_insn_group_barriers ();
9458 for (last = NEXT_INSN (last);
9459 last != insn;
9460 last = NEXT_INSN (last))
9461 if (INSN_P (last))
388092d5
AB
9462 {
9463 group_barrier_needed (last);
9464 if (recog_memoized (last) >= 0
9465 && important_for_bundling_p (last))
9466 seen_good_insn = 1;
9467 }
30028c85
VM
9468 }
9469 else
9470 {
9471 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9472 insn);
9473 init_insn_group_barriers ();
388092d5 9474 seen_good_insn = 0;
30028c85 9475 }
c1bc6ca8 9476 group_barrier_needed (insn);
388092d5
AB
9477 if (recog_memoized (insn) >= 0
9478 && important_for_bundling_p (insn))
9479 seen_good_insn = 1;
2130b7fb 9480 }
388092d5
AB
9481 else if (recog_memoized (insn) >= 0
9482 && important_for_bundling_p (insn))
034288ef 9483 seen_good_insn = 1;
b64925dc 9484 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
c65ebc55 9485 }
2130b7fb 9486 }
30028c85 9487}
2130b7fb 9488
30028c85 9489\f
2130b7fb 9490
a4d05547 9491/* If the following function returns TRUE, we will use the DFA
30028c85 9492 insn scheduler. */
2130b7fb 9493
c237e94a 9494static int
9c808aad 9495ia64_first_cycle_multipass_dfa_lookahead (void)
2130b7fb 9496{
30028c85
VM
9497 return (reload_completed ? 6 : 4);
9498}
2130b7fb 9499
30028c85 9500/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 9501
30028c85 9502static void
9c808aad 9503ia64_init_dfa_pre_cycle_insn (void)
30028c85
VM
9504{
9505 if (temp_dfa_state == NULL)
2130b7fb 9506 {
30028c85
VM
9507 dfa_state_size = state_size ();
9508 temp_dfa_state = xmalloc (dfa_state_size);
9509 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 9510 }
30028c85 9511 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
0f82e5c9 9512 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
30028c85
VM
9513 recog_memoized (dfa_pre_cycle_insn);
9514 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
0f82e5c9 9515 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
30028c85
VM
9516 recog_memoized (dfa_stop_insn);
9517}
2130b7fb 9518
30028c85
VM
9519/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9520 used by the DFA insn scheduler. */
2130b7fb 9521
30028c85 9522static rtx
9c808aad 9523ia64_dfa_pre_cycle_insn (void)
30028c85
VM
9524{
9525 return dfa_pre_cycle_insn;
9526}
2130b7fb 9527
30028c85
VM
9528/* The following function returns TRUE if PRODUCER (of type ilog or
9529 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 9530
30028c85 9531int
647d790d 9532ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
30028c85
VM
9533{
9534 rtx dest, reg, mem;
2130b7fb 9535
e820471b 9536 gcc_assert (producer && consumer);
30028c85 9537 dest = ia64_single_set (producer);
e820471b
NS
9538 gcc_assert (dest);
9539 reg = SET_DEST (dest);
9540 gcc_assert (reg);
30028c85
VM
9541 if (GET_CODE (reg) == SUBREG)
9542 reg = SUBREG_REG (reg);
e820471b
NS
9543 gcc_assert (GET_CODE (reg) == REG);
9544
30028c85 9545 dest = ia64_single_set (consumer);
e820471b
NS
9546 gcc_assert (dest);
9547 mem = SET_DEST (dest);
9548 gcc_assert (mem && GET_CODE (mem) == MEM);
30028c85 9549 return reg_mentioned_p (reg, mem);
2130b7fb
BS
9550}
9551
30028c85
VM
9552/* The following function returns TRUE if PRODUCER (of type ilog or
9553 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 9554
30028c85 9555int
647d790d 9556ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
2130b7fb 9557{
30028c85
VM
9558 rtx dest, src, reg, mem;
9559
e820471b 9560 gcc_assert (producer && consumer);
30028c85 9561 dest = ia64_single_set (producer);
e820471b
NS
9562 gcc_assert (dest);
9563 reg = SET_DEST (dest);
9564 gcc_assert (reg);
30028c85
VM
9565 if (GET_CODE (reg) == SUBREG)
9566 reg = SUBREG_REG (reg);
e820471b
NS
9567 gcc_assert (GET_CODE (reg) == REG);
9568
30028c85 9569 src = ia64_single_set (consumer);
e820471b
NS
9570 gcc_assert (src);
9571 mem = SET_SRC (src);
9572 gcc_assert (mem);
048d0d36 9573
30028c85
VM
9574 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9575 mem = XVECEXP (mem, 0, 0);
048d0d36 9576 else if (GET_CODE (mem) == IF_THEN_ELSE)
917f1b7e 9577 /* ??? Is this bypass necessary for ld.c? */
048d0d36
MK
9578 {
9579 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9580 mem = XEXP (mem, 1);
9581 }
9582
30028c85
VM
9583 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9584 mem = XEXP (mem, 0);
ef1ecf87 9585
048d0d36
MK
9586 if (GET_CODE (mem) == UNSPEC)
9587 {
9588 int c = XINT (mem, 1);
9589
388092d5
AB
9590 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9591 || c == UNSPEC_LDSA);
048d0d36
MK
9592 mem = XVECEXP (mem, 0, 0);
9593 }
9594
ef1ecf87 9595 /* Note that LO_SUM is used for GOT loads. */
e820471b 9596 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
ef1ecf87 9597
30028c85
VM
9598 return reg_mentioned_p (reg, mem);
9599}
9600
9601/* The following function returns TRUE if INSN produces address for a
9602 load/store insn. We will place such insns into M slot because it
ff482c8d 9603 decreases its latency time. */
30028c85
VM
9604
9605int
9c808aad 9606ia64_produce_address_p (rtx insn)
30028c85
VM
9607{
9608 return insn->call;
2130b7fb 9609}
30028c85 9610
2130b7fb 9611\f
3b572406
RH
9612/* Emit pseudo-ops for the assembler to describe predicate relations.
9613 At present this assumes that we only consider predicate pairs to
9614 be mutex, and that the assembler can deduce proper values from
9615 straight-line code. */
9616
9617static void
9c808aad 9618emit_predicate_relation_info (void)
3b572406 9619{
e0082a72 9620 basic_block bb;
3b572406 9621
4f42035e 9622 FOR_EACH_BB_REVERSE_FN (bb, cfun)
3b572406 9623 {
3b572406 9624 int r;
dd3d2b35 9625 rtx_insn *head = BB_HEAD (bb);
3b572406
RH
9626
9627 /* We only need such notes at code labels. */
b64925dc 9628 if (! LABEL_P (head))
3b572406 9629 continue;
740aeb38 9630 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
3b572406
RH
9631 head = NEXT_INSN (head);
9632
9f3b8452
RH
9633 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9634 grabbing the entire block of predicate registers. */
9635 for (r = PR_REG (2); r < PR_REG (64); r += 2)
6fb5fa3c 9636 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
3b572406 9637 {
f2f90c63 9638 rtx p = gen_rtx_REG (BImode, r);
dd3d2b35 9639 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
a813c111 9640 if (head == BB_END (bb))
1130d5e3 9641 BB_END (bb) = n;
3b572406
RH
9642 head = n;
9643 }
9644 }
ca3920ad
JW
9645
9646 /* Look for conditional calls that do not return, and protect predicate
9647 relations around them. Otherwise the assembler will assume the call
9648 returns, and complain about uses of call-clobbered predicates after
9649 the call. */
4f42035e 9650 FOR_EACH_BB_REVERSE_FN (bb, cfun)
ca3920ad 9651 {
dd3d2b35 9652 rtx_insn *insn = BB_HEAD (bb);
9c808aad 9653
ca3920ad
JW
9654 while (1)
9655 {
b64925dc 9656 if (CALL_P (insn)
ca3920ad
JW
9657 && GET_CODE (PATTERN (insn)) == COND_EXEC
9658 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9659 {
dd3d2b35
DM
9660 rtx_insn *b =
9661 emit_insn_before (gen_safe_across_calls_all (), insn);
9662 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
a813c111 9663 if (BB_HEAD (bb) == insn)
1130d5e3 9664 BB_HEAD (bb) = b;
a813c111 9665 if (BB_END (bb) == insn)
1130d5e3 9666 BB_END (bb) = a;
ca3920ad 9667 }
9c808aad 9668
a813c111 9669 if (insn == BB_END (bb))
ca3920ad
JW
9670 break;
9671 insn = NEXT_INSN (insn);
9672 }
9673 }
3b572406
RH
9674}
9675
c65ebc55
JW
9676/* Perform machine dependent operations on the rtl chain INSNS. */
9677
18dbd950 9678static void
9c808aad 9679ia64_reorg (void)
c65ebc55 9680{
1e3881c2
JH
9681 /* We are freeing block_for_insn in the toplev to keep compatibility
9682 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 9683 compute_bb_for_insn ();
a00fe19f
RH
9684
9685 /* If optimizing, we'll have split before scheduling. */
9686 if (optimize == 0)
6fb5fa3c 9687 split_all_insns ();
2130b7fb 9688
2ba42841 9689 if (optimize && flag_schedule_insns_after_reload
388092d5 9690 && dbg_cnt (ia64_sched2))
f4d578da 9691 {
547fdef8 9692 basic_block bb;
eced69b5 9693 timevar_push (TV_SCHED2);
f4d578da 9694 ia64_final_schedule = 1;
30028c85 9695
547fdef8
BS
9696 /* We can't let modulo-sched prevent us from scheduling any bbs,
9697 since we need the final schedule to produce bundle information. */
11cd3bed 9698 FOR_EACH_BB_FN (bb, cfun)
547fdef8
BS
9699 bb->flags &= ~BB_DISABLE_SCHEDULE;
9700
30028c85
VM
9701 initiate_bundle_states ();
9702 ia64_nop = make_insn_raw (gen_nop ());
0f82e5c9 9703 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
30028c85
VM
9704 recog_memoized (ia64_nop);
9705 clocks_length = get_max_uid () + 1;
5ead67f6 9706 stops_p = XCNEWVEC (char, clocks_length);
7400e46b 9707
30028c85
VM
9708 if (ia64_tune == PROCESSOR_ITANIUM2)
9709 {
9710 pos_1 = get_cpu_unit_code ("2_1");
9711 pos_2 = get_cpu_unit_code ("2_2");
9712 pos_3 = get_cpu_unit_code ("2_3");
9713 pos_4 = get_cpu_unit_code ("2_4");
9714 pos_5 = get_cpu_unit_code ("2_5");
9715 pos_6 = get_cpu_unit_code ("2_6");
9716 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9717 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9718 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9719 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9720 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9721 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9722 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9723 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9724 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9725 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9726 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9727 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9728 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9729 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9730 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9731 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9732 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9733 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9734 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9735 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9736 }
9737 else
9738 {
9739 pos_1 = get_cpu_unit_code ("1_1");
9740 pos_2 = get_cpu_unit_code ("1_2");
9741 pos_3 = get_cpu_unit_code ("1_3");
9742 pos_4 = get_cpu_unit_code ("1_4");
9743 pos_5 = get_cpu_unit_code ("1_5");
9744 pos_6 = get_cpu_unit_code ("1_6");
9745 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9746 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9747 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9748 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9749 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9750 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9751 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9752 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9753 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9754 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9755 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9756 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9757 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9758 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9759 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9760 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9761 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9762 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9763 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9764 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9765 }
388092d5
AB
9766
9767 if (flag_selective_scheduling2
9768 && !maybe_skip_selective_scheduling ())
9769 run_selective_scheduling ();
9770 else
9771 schedule_ebbs ();
9772
9773 /* Redo alignment computation, as it might gone wrong. */
9774 compute_alignments ();
9775
6fb5fa3c
DB
9776 /* We cannot reuse this one because it has been corrupted by the
9777 evil glat. */
30028c85 9778 finish_bundle_states ();
30028c85 9779 free (stops_p);
048d0d36 9780 stops_p = NULL;
c263766c 9781 emit_insn_group_barriers (dump_file);
30028c85 9782
f4d578da 9783 ia64_final_schedule = 0;
eced69b5 9784 timevar_pop (TV_SCHED2);
f4d578da
BS
9785 }
9786 else
c263766c 9787 emit_all_insn_group_barriers (dump_file);
f2f90c63 9788
6fb5fa3c
DB
9789 df_analyze ();
9790
f12f25a7
RH
9791 /* A call must not be the last instruction in a function, so that the
9792 return address is still within the function, so that unwinding works
9793 properly. Note that IA-64 differs from dwarf2 on this point. */
d5fabb58 9794 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
f12f25a7 9795 {
dd3d2b35 9796 rtx_insn *insn;
f12f25a7
RH
9797 int saw_stop = 0;
9798
9799 insn = get_last_insn ();
9800 if (! INSN_P (insn))
9801 insn = prev_active_insn (insn);
2ca57608 9802 if (insn)
f12f25a7 9803 {
2ca57608 9804 /* Skip over insns that expand to nothing. */
b64925dc 9805 while (NONJUMP_INSN_P (insn)
2ca57608
L
9806 && get_attr_empty (insn) == EMPTY_YES)
9807 {
9808 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9809 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9810 saw_stop = 1;
9811 insn = prev_active_insn (insn);
9812 }
b64925dc 9813 if (CALL_P (insn))
2ca57608
L
9814 {
9815 if (! saw_stop)
9816 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9817 emit_insn (gen_break_f ());
9818 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9819 }
f12f25a7
RH
9820 }
9821 }
9822
f2f90c63 9823 emit_predicate_relation_info ();
014a1138 9824
2ba42841 9825 if (flag_var_tracking)
014a1138
JZ
9826 {
9827 timevar_push (TV_VAR_TRACKING);
9828 variable_tracking_main ();
9829 timevar_pop (TV_VAR_TRACKING);
9830 }
0d475361 9831 df_finish_pass (false);
c65ebc55
JW
9832}
9833\f
9834/* Return true if REGNO is used by the epilogue. */
9835
9836int
9c808aad 9837ia64_epilogue_uses (int regno)
c65ebc55 9838{
6ca3c22f
RH
9839 switch (regno)
9840 {
9841 case R_GR (1):
b23ba0b8
RH
9842 /* With a call to a function in another module, we will write a new
9843 value to "gp". After returning from such a call, we need to make
9844 sure the function restores the original gp-value, even if the
9845 function itself does not use the gp anymore. */
9846 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
6ca3c22f
RH
9847
9848 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9849 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9850 /* For functions defined with the syscall_linkage attribute, all
9851 input registers are marked as live at all function exits. This
9852 prevents the register allocator from using the input registers,
9853 which in turn makes it possible to restart a system call after
9854 an interrupt without having to save/restore the input registers.
9855 This also prevents kernel data from leaking to application code. */
9856 return lookup_attribute ("syscall_linkage",
9857 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9858
9859 case R_BR (0):
9860 /* Conditional return patterns can't represent the use of `b0' as
9861 the return address, so we force the value live this way. */
9862 return 1;
6b6c1201 9863
6ca3c22f
RH
9864 case AR_PFS_REGNUM:
9865 /* Likewise for ar.pfs, which is used by br.ret. */
9866 return 1;
5527bf14 9867
6ca3c22f
RH
9868 default:
9869 return 0;
9870 }
c65ebc55 9871}
15b5aef3
RH
9872
9873/* Return true if REGNO is used by the frame unwinder. */
9874
9875int
9c808aad 9876ia64_eh_uses (int regno)
15b5aef3 9877{
09639a83 9878 unsigned int r;
6fb5fa3c 9879
15b5aef3
RH
9880 if (! reload_completed)
9881 return 0;
9882
6fb5fa3c
DB
9883 if (regno == 0)
9884 return 0;
9885
9886 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9887 if (regno == current_frame_info.r[r]
9888 || regno == emitted_frame_related_regs[r])
9889 return 1;
15b5aef3
RH
9890
9891 return 0;
9892}
c65ebc55 9893\f
1cdbd630 9894/* Return true if this goes in small data/bss. */
c65ebc55
JW
9895
9896/* ??? We could also support own long data here. Generating movl/add/ld8
9897 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9898 code faster because there is one less load. This also includes incomplete
9899 types which can't go in sdata/sbss. */
9900
ae46c4e0 9901static bool
3101faab 9902ia64_in_small_data_p (const_tree exp)
ae46c4e0
RH
9903{
9904 if (TARGET_NO_SDATA)
9905 return false;
9906
3907500b
RH
9907 /* We want to merge strings, so we never consider them small data. */
9908 if (TREE_CODE (exp) == STRING_CST)
9909 return false;
9910
4c494a15
ZW
9911 /* Functions are never small data. */
9912 if (TREE_CODE (exp) == FUNCTION_DECL)
9913 return false;
9914
ae46c4e0
RH
9915 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9916 {
f961457f 9917 const char *section = DECL_SECTION_NAME (exp);
826eb7ed 9918
ae46c4e0 9919 if (strcmp (section, ".sdata") == 0
826eb7ed
JB
9920 || strncmp (section, ".sdata.", 7) == 0
9921 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9922 || strcmp (section, ".sbss") == 0
9923 || strncmp (section, ".sbss.", 6) == 0
9924 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
ae46c4e0
RH
9925 return true;
9926 }
9927 else
9928 {
9929 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9930
9931 /* If this is an incomplete type with size 0, then we can't put it
9932 in sdata because it might be too big when completed. */
9933 if (size > 0 && size <= ia64_section_threshold)
9934 return true;
9935 }
9936
9937 return false;
9938}
0c96007e 9939\f
ad0fc698
JW
9940/* Output assembly directives for prologue regions. */
9941
9942/* The current basic block number. */
9943
e0082a72 9944static bool last_block;
ad0fc698
JW
9945
9946/* True if we need a copy_state command at the start of the next block. */
9947
e0082a72 9948static bool need_copy_state;
ad0fc698 9949
658f32fd
AO
9950#ifndef MAX_ARTIFICIAL_LABEL_BYTES
9951# define MAX_ARTIFICIAL_LABEL_BYTES 30
9952#endif
9953
ad0fc698
JW
9954/* The function emits unwind directives for the start of an epilogue. */
9955
9956static void
7d3c6cd8
RH
9957process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9958 bool unwind, bool frame ATTRIBUTE_UNUSED)
ad0fc698
JW
9959{
9960 /* If this isn't the last block of the function, then we need to label the
9961 current state, and copy it back in at the start of the next block. */
9962
e0082a72 9963 if (!last_block)
ad0fc698 9964 {
658f32fd
AO
9965 if (unwind)
9966 fprintf (asm_out_file, "\t.label_state %d\n",
9967 ++cfun->machine->state_num);
e0082a72 9968 need_copy_state = true;
ad0fc698
JW
9969 }
9970
658f32fd
AO
9971 if (unwind)
9972 fprintf (asm_out_file, "\t.restore sp\n");
ad0fc698 9973}
0c96007e 9974
5c255b57 9975/* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
97e242b0 9976
5c255b57
RH
9977static void
9978process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9979 bool unwind, bool frame)
0c96007e 9980{
0c96007e 9981 rtx dest = SET_DEST (pat);
5c255b57 9982 rtx src = SET_SRC (pat);
0c96007e 9983
5c255b57 9984 if (dest == stack_pointer_rtx)
0c96007e
AM
9985 {
9986 if (GET_CODE (src) == PLUS)
5c255b57 9987 {
0c96007e
AM
9988 rtx op0 = XEXP (src, 0);
9989 rtx op1 = XEXP (src, 1);
e820471b
NS
9990
9991 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9992
9993 if (INTVAL (op1) < 0)
658f32fd
AO
9994 {
9995 gcc_assert (!frame_pointer_needed);
9996 if (unwind)
5c255b57 9997 fprintf (asm_out_file,
16998094 9998 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
658f32fd 9999 -INTVAL (op1));
658f32fd 10000 }
0186257f 10001 else
658f32fd 10002 process_epilogue (asm_out_file, insn, unwind, frame);
0c96007e 10003 }
0186257f 10004 else
e820471b 10005 {
5c255b57 10006 gcc_assert (src == hard_frame_pointer_rtx);
658f32fd 10007 process_epilogue (asm_out_file, insn, unwind, frame);
e820471b 10008 }
5c255b57
RH
10009 }
10010 else if (dest == hard_frame_pointer_rtx)
10011 {
10012 gcc_assert (src == stack_pointer_rtx);
10013 gcc_assert (frame_pointer_needed);
0186257f 10014
5c255b57
RH
10015 if (unwind)
10016 fprintf (asm_out_file, "\t.vframe r%d\n",
10017 ia64_dbx_register_number (REGNO (dest)));
0c96007e 10018 }
5c255b57
RH
10019 else
10020 gcc_unreachable ();
10021}
0c96007e 10022
5c255b57 10023/* This function processes a SET pattern for REG_CFA_REGISTER. */
97e242b0 10024
5c255b57
RH
10025static void
10026process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10027{
10028 rtx dest = SET_DEST (pat);
10029 rtx src = SET_SRC (pat);
5c255b57 10030 int dest_regno = REGNO (dest);
5f740973 10031 int src_regno;
97e242b0 10032
5f740973 10033 if (src == pc_rtx)
5c255b57 10034 {
5c255b57 10035 /* Saving return address pointer. */
5c255b57
RH
10036 if (unwind)
10037 fprintf (asm_out_file, "\t.save rp, r%d\n",
10038 ia64_dbx_register_number (dest_regno));
5f740973
RH
10039 return;
10040 }
10041
10042 src_regno = REGNO (src);
97e242b0 10043
5f740973
RH
10044 switch (src_regno)
10045 {
5c255b57
RH
10046 case PR_REG (0):
10047 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10048 if (unwind)
10049 fprintf (asm_out_file, "\t.save pr, r%d\n",
10050 ia64_dbx_register_number (dest_regno));
10051 break;
97e242b0 10052
5c255b57
RH
10053 case AR_UNAT_REGNUM:
10054 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10055 if (unwind)
10056 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10057 ia64_dbx_register_number (dest_regno));
10058 break;
97e242b0 10059
5c255b57
RH
10060 case AR_LC_REGNUM:
10061 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10062 if (unwind)
10063 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10064 ia64_dbx_register_number (dest_regno));
10065 break;
10066
10067 default:
10068 /* Everything else should indicate being stored to memory. */
10069 gcc_unreachable ();
0c96007e 10070 }
5c255b57 10071}
97e242b0 10072
5c255b57 10073/* This function processes a SET pattern for REG_CFA_OFFSET. */
97e242b0 10074
5c255b57
RH
10075static void
10076process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10077{
10078 rtx dest = SET_DEST (pat);
10079 rtx src = SET_SRC (pat);
10080 int src_regno = REGNO (src);
10081 const char *saveop;
10082 HOST_WIDE_INT off;
10083 rtx base;
0c96007e 10084
5c255b57
RH
10085 gcc_assert (MEM_P (dest));
10086 if (GET_CODE (XEXP (dest, 0)) == REG)
10087 {
10088 base = XEXP (dest, 0);
10089 off = 0;
10090 }
10091 else
10092 {
10093 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10094 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10095 base = XEXP (XEXP (dest, 0), 0);
10096 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10097 }
97e242b0 10098
5c255b57
RH
10099 if (base == hard_frame_pointer_rtx)
10100 {
10101 saveop = ".savepsp";
10102 off = - off;
10103 }
10104 else
10105 {
10106 gcc_assert (base == stack_pointer_rtx);
10107 saveop = ".savesp";
10108 }
97e242b0 10109
5c255b57
RH
10110 src_regno = REGNO (src);
10111 switch (src_regno)
10112 {
10113 case BR_REG (0):
10114 gcc_assert (!current_frame_info.r[reg_save_b0]);
10115 if (unwind)
10116 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10117 saveop, off);
10118 break;
97e242b0 10119
5c255b57
RH
10120 case PR_REG (0):
10121 gcc_assert (!current_frame_info.r[reg_save_pr]);
10122 if (unwind)
10123 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10124 saveop, off);
10125 break;
97e242b0 10126
5c255b57
RH
10127 case AR_LC_REGNUM:
10128 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10129 if (unwind)
10130 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10131 saveop, off);
10132 break;
97e242b0 10133
5c255b57
RH
10134 case AR_PFS_REGNUM:
10135 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10136 if (unwind)
10137 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10138 saveop, off);
10139 break;
97e242b0 10140
5c255b57
RH
10141 case AR_UNAT_REGNUM:
10142 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10143 if (unwind)
10144 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10145 saveop, off);
10146 break;
97e242b0 10147
5c255b57
RH
10148 case GR_REG (4):
10149 case GR_REG (5):
10150 case GR_REG (6):
10151 case GR_REG (7):
10152 if (unwind)
10153 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10154 1 << (src_regno - GR_REG (4)));
10155 break;
97e242b0 10156
5c255b57
RH
10157 case BR_REG (1):
10158 case BR_REG (2):
10159 case BR_REG (3):
10160 case BR_REG (4):
10161 case BR_REG (5):
10162 if (unwind)
10163 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10164 1 << (src_regno - BR_REG (1)));
10165 break;
97e242b0 10166
5c255b57
RH
10167 case FR_REG (2):
10168 case FR_REG (3):
10169 case FR_REG (4):
10170 case FR_REG (5):
10171 if (unwind)
10172 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10173 1 << (src_regno - FR_REG (2)));
10174 break;
97e242b0 10175
5c255b57
RH
10176 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10177 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10178 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10179 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10180 if (unwind)
10181 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10182 1 << (src_regno - FR_REG (12)));
10183 break;
97e242b0 10184
5c255b57
RH
10185 default:
10186 /* ??? For some reason we mark other general registers, even those
10187 we can't represent in the unwind info. Ignore them. */
10188 break;
10189 }
0c96007e
AM
10190}
10191
0c96007e
AM
10192/* This function looks at a single insn and emits any directives
10193 required to unwind this insn. */
5c255b57 10194
a68b5e52 10195static void
ac44248e 10196ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
0c96007e 10197{
d5fabb58 10198 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
658f32fd 10199 bool frame = dwarf2out_do_frame ();
5c255b57
RH
10200 rtx note, pat;
10201 bool handled_one;
10202
10203 if (!unwind && !frame)
10204 return;
658f32fd 10205
5c255b57 10206 if (NOTE_INSN_BASIC_BLOCK_P (insn))
0c96007e 10207 {
fefa31b5
DM
10208 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10209 == EXIT_BLOCK_PTR_FOR_FN (cfun);
97e242b0 10210
5c255b57
RH
10211 /* Restore unwind state from immediately before the epilogue. */
10212 if (need_copy_state)
ad0fc698 10213 {
5c255b57 10214 if (unwind)
ad0fc698 10215 {
5c255b57
RH
10216 fprintf (asm_out_file, "\t.body\n");
10217 fprintf (asm_out_file, "\t.copy_state %d\n",
10218 cfun->machine->state_num);
ad0fc698 10219 }
5c255b57 10220 need_copy_state = false;
ad0fc698 10221 }
5c255b57 10222 }
ad0fc698 10223
b64925dc 10224 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
5c255b57
RH
10225 return;
10226
10227 /* Look for the ALLOC insn. */
10228 if (INSN_CODE (insn) == CODE_FOR_alloc)
10229 {
10230 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10231 int dest_regno = REGNO (dest);
ad0fc698 10232
5c255b57
RH
10233 /* If this is the final destination for ar.pfs, then this must
10234 be the alloc in the prologue. */
10235 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10236 {
10237 if (unwind)
10238 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10239 ia64_dbx_register_number (dest_regno));
10240 }
97e242b0 10241 else
5c255b57
RH
10242 {
10243 /* This must be an alloc before a sibcall. We must drop the
10244 old frame info. The easiest way to drop the old frame
10245 info is to ensure we had a ".restore sp" directive
10246 followed by a new prologue. If the procedure doesn't
10247 have a memory-stack frame, we'll issue a dummy ".restore
10248 sp" now. */
10249 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10250 /* if haven't done process_epilogue() yet, do it now */
10251 process_epilogue (asm_out_file, insn, unwind, frame);
10252 if (unwind)
10253 fprintf (asm_out_file, "\t.prologue\n");
10254 }
10255 return;
10256 }
0c96007e 10257
5c255b57
RH
10258 handled_one = false;
10259 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10260 switch (REG_NOTE_KIND (note))
10261 {
10262 case REG_CFA_ADJUST_CFA:
10263 pat = XEXP (note, 0);
10264 if (pat == NULL)
10265 pat = PATTERN (insn);
10266 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10267 handled_one = true;
10268 break;
809d4ef1 10269
5c255b57
RH
10270 case REG_CFA_OFFSET:
10271 pat = XEXP (note, 0);
10272 if (pat == NULL)
10273 pat = PATTERN (insn);
10274 process_cfa_offset (asm_out_file, pat, unwind);
10275 handled_one = true;
10276 break;
809d4ef1 10277
5c255b57
RH
10278 case REG_CFA_REGISTER:
10279 pat = XEXP (note, 0);
10280 if (pat == NULL)
10281 pat = PATTERN (insn);
10282 process_cfa_register (asm_out_file, pat, unwind);
10283 handled_one = true;
10284 break;
10285
10286 case REG_FRAME_RELATED_EXPR:
10287 case REG_CFA_DEF_CFA:
10288 case REG_CFA_EXPRESSION:
10289 case REG_CFA_RESTORE:
10290 case REG_CFA_SET_VDRAP:
10291 /* Not used in the ia64 port. */
10292 gcc_unreachable ();
10293
10294 default:
10295 /* Not a frame-related note. */
10296 break;
10297 }
10298
10299 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10300 explicit action to take. No guessing required. */
10301 gcc_assert (handled_one);
0c96007e 10302}
c65ebc55 10303
a68b5e52
RH
10304/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10305
10306static void
10307ia64_asm_emit_except_personality (rtx personality)
10308{
10309 fputs ("\t.personality\t", asm_out_file);
10310 output_addr_const (asm_out_file, personality);
10311 fputc ('\n', asm_out_file);
10312}
10313
10314/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10315
10316static void
10317ia64_asm_init_sections (void)
10318{
10319 exception_section = get_unnamed_section (0, output_section_asm_op,
10320 "\t.handlerdata");
10321}
f0a0390e
RH
10322
10323/* Implement TARGET_DEBUG_UNWIND_INFO. */
10324
10325static enum unwind_info_type
10326ia64_debug_unwind_info (void)
10327{
10328 return UI_TARGET;
10329}
0551c32d 10330\f
af795c3c
RH
10331enum ia64_builtins
10332{
10333 IA64_BUILTIN_BSP,
c252db20
L
10334 IA64_BUILTIN_COPYSIGNQ,
10335 IA64_BUILTIN_FABSQ,
10336 IA64_BUILTIN_FLUSHRS,
fcb82ab0 10337 IA64_BUILTIN_INFQ,
b14446e2
SE
10338 IA64_BUILTIN_HUGE_VALQ,
10339 IA64_BUILTIN_max
af795c3c
RH
10340};
10341
b14446e2
SE
10342static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10343
c65ebc55 10344void
9c808aad 10345ia64_init_builtins (void)
c65ebc55 10346{
9649812a 10347 tree fpreg_type;
bf9ab6b6 10348 tree float80_type;
b14446e2 10349 tree decl;
9649812a
MM
10350
10351 /* The __fpreg type. */
10352 fpreg_type = make_node (REAL_TYPE);
4de67c26 10353 TYPE_PRECISION (fpreg_type) = 82;
9649812a
MM
10354 layout_type (fpreg_type);
10355 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10356
10357 /* The __float80 type. */
bf9ab6b6 10358 float80_type = make_node (REAL_TYPE);
968a7562 10359 TYPE_PRECISION (float80_type) = 80;
bf9ab6b6
MM
10360 layout_type (float80_type);
10361 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9649812a
MM
10362
10363 /* The __float128 type. */
02befdf4 10364 if (!TARGET_HPUX)
9649812a 10365 {
b14446e2 10366 tree ftype;
9649812a 10367 tree float128_type = make_node (REAL_TYPE);
c252db20 10368
9649812a
MM
10369 TYPE_PRECISION (float128_type) = 128;
10370 layout_type (float128_type);
10371 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
c252db20
L
10372
10373 /* TFmode support builtins. */
c0676219 10374 ftype = build_function_type_list (float128_type, NULL_TREE);
b14446e2
SE
10375 decl = add_builtin_function ("__builtin_infq", ftype,
10376 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10377 NULL, NULL_TREE);
10378 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
c252db20 10379
b14446e2
SE
10380 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10381 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10382 NULL, NULL_TREE);
10383 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
fcb82ab0 10384
c252db20
L
10385 ftype = build_function_type_list (float128_type,
10386 float128_type,
10387 NULL_TREE);
10388 decl = add_builtin_function ("__builtin_fabsq", ftype,
10389 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10390 "__fabstf2", NULL_TREE);
10391 TREE_READONLY (decl) = 1;
b14446e2 10392 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
c252db20
L
10393
10394 ftype = build_function_type_list (float128_type,
10395 float128_type,
10396 float128_type,
10397 NULL_TREE);
10398 decl = add_builtin_function ("__builtin_copysignq", ftype,
10399 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10400 "__copysigntf3", NULL_TREE);
10401 TREE_READONLY (decl) = 1;
b14446e2 10402 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
9649812a
MM
10403 }
10404 else
02befdf4 10405 /* Under HPUX, this is a synonym for "long double". */
9649812a
MM
10406 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10407 "__float128");
10408
f2972bf8 10409 /* Fwrite on VMS is non-standard. */
171da07a
RH
10410#if TARGET_ABI_OPEN_VMS
10411 vms_patch_builtins ();
10412#endif
f2972bf8 10413
6e34d3a3 10414#define def_builtin(name, type, code) \
c79efc4d
RÁE
10415 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10416 NULL, NULL_TREE)
0551c32d 10417
b14446e2 10418 decl = def_builtin ("__builtin_ia64_bsp",
c0676219
NF
10419 build_function_type_list (ptr_type_node, NULL_TREE),
10420 IA64_BUILTIN_BSP);
b14446e2 10421 ia64_builtins[IA64_BUILTIN_BSP] = decl;
ce152ef8 10422
b14446e2 10423 decl = def_builtin ("__builtin_ia64_flushrs",
c0676219
NF
10424 build_function_type_list (void_type_node, NULL_TREE),
10425 IA64_BUILTIN_FLUSHRS);
b14446e2 10426 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
ce152ef8 10427
0551c32d 10428#undef def_builtin
7d522000
SE
10429
10430 if (TARGET_HPUX)
10431 {
ccea4a27 10432 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
e79983f4 10433 set_user_assembler_name (decl, "_Isfinite");
ccea4a27 10434 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
e79983f4 10435 set_user_assembler_name (decl, "_Isfinitef");
ccea4a27 10436 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
e79983f4 10437 set_user_assembler_name (decl, "_Isfinitef128");
7d522000 10438 }
c65ebc55
JW
10439}
10440
c65ebc55 10441rtx
9c808aad 10442ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
ef4bddc2 10443 machine_mode mode ATTRIBUTE_UNUSED,
9c808aad 10444 int ignore ATTRIBUTE_UNUSED)
c65ebc55 10445{
767fad4c 10446 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
97e242b0 10447 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
c65ebc55
JW
10448
10449 switch (fcode)
10450 {
ce152ef8 10451 case IA64_BUILTIN_BSP:
0551c32d
RH
10452 if (! target || ! register_operand (target, DImode))
10453 target = gen_reg_rtx (DImode);
10454 emit_insn (gen_bsp_value (target));
8419b675
RK
10455#ifdef POINTERS_EXTEND_UNSIGNED
10456 target = convert_memory_address (ptr_mode, target);
10457#endif
0551c32d 10458 return target;
ce152ef8
AM
10459
10460 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
10461 emit_insn (gen_flushrs ());
10462 return const0_rtx;
ce152ef8 10463
c252db20 10464 case IA64_BUILTIN_INFQ:
fcb82ab0 10465 case IA64_BUILTIN_HUGE_VALQ:
c252db20 10466 {
ef4bddc2 10467 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
c252db20
L
10468 REAL_VALUE_TYPE inf;
10469 rtx tmp;
10470
10471 real_inf (&inf);
6aad068a 10472 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
c252db20 10473
6aad068a 10474 tmp = validize_mem (force_const_mem (target_mode, tmp));
c252db20
L
10475
10476 if (target == 0)
6aad068a 10477 target = gen_reg_rtx (target_mode);
c252db20
L
10478
10479 emit_move_insn (target, tmp);
10480 return target;
10481 }
10482
10483 case IA64_BUILTIN_FABSQ:
10484 case IA64_BUILTIN_COPYSIGNQ:
10485 return expand_call (exp, target, ignore);
10486
c65ebc55 10487 default:
c252db20 10488 gcc_unreachable ();
c65ebc55
JW
10489 }
10490
0551c32d 10491 return NULL_RTX;
c65ebc55 10492}
0d7839da 10493
b14446e2
SE
10494/* Return the ia64 builtin for CODE. */
10495
10496static tree
10497ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10498{
10499 if (code >= IA64_BUILTIN_max)
10500 return error_mark_node;
10501
10502 return ia64_builtins[code];
10503}
10504
0d7839da
SE
10505/* For the HP-UX IA64 aggregate parameters are passed stored in the
10506 most significant bits of the stack slot. */
10507
10508enum direction
ef4bddc2 10509ia64_hpux_function_arg_padding (machine_mode mode, const_tree type)
0d7839da 10510{
ed168e45 10511 /* Exception to normal case for structures/unions/etc. */
0d7839da
SE
10512
10513 if (type && AGGREGATE_TYPE_P (type)
10514 && int_size_in_bytes (type) < UNITS_PER_WORD)
10515 return upward;
10516
d3704c46
KH
10517 /* Fall back to the default. */
10518 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
0d7839da 10519}
686f3bf0 10520
c47c29c8
L
10521/* Emit text to declare externally defined variables and functions, because
10522 the Intel assembler does not support undefined externals. */
686f3bf0 10523
c47c29c8
L
10524void
10525ia64_asm_output_external (FILE *file, tree decl, const char *name)
686f3bf0 10526{
c47c29c8
L
10527 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10528 set in order to avoid putting out names that are never really
10529 used. */
10530 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
686f3bf0 10531 {
c47c29c8 10532 /* maybe_assemble_visibility will return 1 if the assembler
2e226e66 10533 visibility directive is output. */
c47c29c8
L
10534 int need_visibility = ((*targetm.binds_local_p) (decl)
10535 && maybe_assemble_visibility (decl));
57d4f65c 10536
c47c29c8
L
10537 /* GNU as does not need anything here, but the HP linker does
10538 need something for external functions. */
10539 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10540 && TREE_CODE (decl) == FUNCTION_DECL)
812b587e 10541 (*targetm.asm_out.globalize_decl_name) (file, decl);
c47c29c8
L
10542 else if (need_visibility && !TARGET_GNU_AS)
10543 (*targetm.asm_out.globalize_label) (file, name);
686f3bf0
SE
10544 }
10545}
10546
1f7aa7cd 10547/* Set SImode div/mod functions, init_integral_libfuncs only initializes
6bc709c1
L
10548 modes of word_mode and larger. Rename the TFmode libfuncs using the
10549 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10550 backward compatibility. */
1f7aa7cd
SE
10551
10552static void
10553ia64_init_libfuncs (void)
10554{
10555 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10556 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10557 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10558 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
6bc709c1
L
10559
10560 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10561 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10562 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10563 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10564 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10565
10566 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10567 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10568 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10569 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10570 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10571 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10572
10573 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10574 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
4a73d865 10575 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
6bc709c1
L
10576 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10577 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10578
10579 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10580 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
4a73d865 10581 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
2a3ebe77
JM
10582 /* HP-UX 11.23 libc does not have a function for unsigned
10583 SImode-to-TFmode conversion. */
10584 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
1f7aa7cd
SE
10585}
10586
c15c90bb 10587/* Rename all the TFmode libfuncs using the HPUX conventions. */
738e7b39 10588
c15c90bb
ZW
10589static void
10590ia64_hpux_init_libfuncs (void)
10591{
1f7aa7cd
SE
10592 ia64_init_libfuncs ();
10593
bdbba3c2
SE
10594 /* The HP SI millicode division and mod functions expect DI arguments.
10595 By turning them off completely we avoid using both libgcc and the
10596 non-standard millicode routines and use the HP DI millicode routines
10597 instead. */
10598
10599 set_optab_libfunc (sdiv_optab, SImode, 0);
10600 set_optab_libfunc (udiv_optab, SImode, 0);
10601 set_optab_libfunc (smod_optab, SImode, 0);
10602 set_optab_libfunc (umod_optab, SImode, 0);
10603
10604 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10605 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10606 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10607 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10608
10609 /* HP-UX libc has TF min/max/abs routines in it. */
c15c90bb
ZW
10610 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10611 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10612 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
c15c90bb 10613
24ea7948
ZW
10614 /* ia64_expand_compare uses this. */
10615 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10616
10617 /* These should never be used. */
10618 set_optab_libfunc (eq_optab, TFmode, 0);
10619 set_optab_libfunc (ne_optab, TFmode, 0);
10620 set_optab_libfunc (gt_optab, TFmode, 0);
10621 set_optab_libfunc (ge_optab, TFmode, 0);
10622 set_optab_libfunc (lt_optab, TFmode, 0);
10623 set_optab_libfunc (le_optab, TFmode, 0);
c15c90bb 10624}
738e7b39
RK
10625
10626/* Rename the division and modulus functions in VMS. */
10627
10628static void
10629ia64_vms_init_libfuncs (void)
10630{
10631 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10632 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10633 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10634 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10635 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10636 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10637 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10638 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
f2972bf8
DR
10639 abort_libfunc = init_one_libfunc ("decc$abort");
10640 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10641#ifdef MEM_LIBFUNCS_INIT
10642 MEM_LIBFUNCS_INIT;
10643#endif
738e7b39 10644}
6bc709c1
L
10645
10646/* Rename the TFmode libfuncs available from soft-fp in glibc using
10647 the HPUX conventions. */
10648
10649static void
10650ia64_sysv4_init_libfuncs (void)
10651{
10652 ia64_init_libfuncs ();
10653
10654 /* These functions are not part of the HPUX TFmode interface. We
10655 use them instead of _U_Qfcmp, which doesn't work the way we
10656 expect. */
10657 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10658 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10659 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10660 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10661 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10662 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10663
10664 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10665 glibc doesn't have them. */
10666}
c252db20
L
10667
10668/* Use soft-fp. */
10669
10670static void
10671ia64_soft_fp_init_libfuncs (void)
10672{
10673}
f2972bf8
DR
10674
10675static bool
ef4bddc2 10676ia64_vms_valid_pointer_mode (machine_mode mode)
f2972bf8
DR
10677{
10678 return (mode == SImode || mode == DImode);
10679}
ae46c4e0 10680\f
9b580a0b
RH
10681/* For HPUX, it is illegal to have relocations in shared segments. */
10682
10683static int
10684ia64_hpux_reloc_rw_mask (void)
10685{
10686 return 3;
10687}
10688
10689/* For others, relax this so that relocations to local data goes in
10690 read-only segments, but we still cannot allow global relocations
10691 in read-only segments. */
10692
10693static int
10694ia64_reloc_rw_mask (void)
10695{
10696 return flag_pic ? 3 : 2;
10697}
10698
d6b5193b
RS
10699/* Return the section to use for X. The only special thing we do here
10700 is to honor small data. */
b64a1b53 10701
d6b5193b 10702static section *
ef4bddc2 10703ia64_select_rtx_section (machine_mode mode, rtx x,
9c808aad 10704 unsigned HOST_WIDE_INT align)
b64a1b53
RH
10705{
10706 if (GET_MODE_SIZE (mode) > 0
1f4a2e84
SE
10707 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10708 && !TARGET_NO_SDATA)
d6b5193b 10709 return sdata_section;
b64a1b53 10710 else
d6b5193b 10711 return default_elf_select_rtx_section (mode, x, align);
b64a1b53
RH
10712}
10713
1e1bd14e 10714static unsigned int
abb8b19a
AM
10715ia64_section_type_flags (tree decl, const char *name, int reloc)
10716{
10717 unsigned int flags = 0;
10718
10719 if (strcmp (name, ".sdata") == 0
10720 || strncmp (name, ".sdata.", 7) == 0
10721 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10722 || strncmp (name, ".sdata2.", 8) == 0
10723 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10724 || strcmp (name, ".sbss") == 0
10725 || strncmp (name, ".sbss.", 6) == 0
10726 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10727 flags = SECTION_SMALL;
10728
9b580a0b 10729 flags |= default_section_type_flags (decl, name, reloc);
abb8b19a 10730 return flags;
1e1bd14e
RH
10731}
10732
57782ad8
MM
10733/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10734 structure type and that the address of that type should be passed
10735 in out0, rather than in r8. */
10736
10737static bool
10738ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10739{
10740 tree ret_type = TREE_TYPE (fntype);
10741
10742 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10743 as the structure return address parameter, if the return value
10744 type has a non-trivial copy constructor or destructor. It is not
10745 clear if this same convention should be used for other
10746 programming languages. Until G++ 3.4, we incorrectly used r8 for
10747 these return values. */
10748 return (abi_version_at_least (2)
10749 && ret_type
10750 && TYPE_MODE (ret_type) == BLKmode
10751 && TREE_ADDRESSABLE (ret_type)
dcc97066 10752 && lang_GNU_CXX ());
57782ad8 10753}
1e1bd14e 10754
5f13cfc6
RH
10755/* Output the assembler code for a thunk function. THUNK_DECL is the
10756 declaration for the thunk function itself, FUNCTION is the decl for
10757 the target function. DELTA is an immediate constant offset to be
272d0bee 10758 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
10759 *(*this + vcall_offset) should be added to THIS. */
10760
c590b625 10761static void
9c808aad
AJ
10762ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10763 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10764 tree function)
483ab821 10765{
dd3d2b35
DM
10766 rtx this_rtx, funexp;
10767 rtx_insn *insn;
57782ad8
MM
10768 unsigned int this_parmno;
10769 unsigned int this_regno;
13f70342 10770 rtx delta_rtx;
5f13cfc6 10771
599aedd9 10772 reload_completed = 1;
fe3ad572 10773 epilogue_completed = 1;
599aedd9 10774
5f13cfc6
RH
10775 /* Set things up as ia64_expand_prologue might. */
10776 last_scratch_gr_reg = 15;
10777
10778 memset (&current_frame_info, 0, sizeof (current_frame_info));
10779 current_frame_info.spill_cfa_off = -16;
10780 current_frame_info.n_input_regs = 1;
10781 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10782
5f13cfc6 10783 /* Mark the end of the (empty) prologue. */
2e040219 10784 emit_note (NOTE_INSN_PROLOGUE_END);
5f13cfc6 10785
57782ad8
MM
10786 /* Figure out whether "this" will be the first parameter (the
10787 typical case) or the second parameter (as happens when the
10788 virtual function returns certain class objects). */
10789 this_parmno
10790 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10791 ? 1 : 0);
10792 this_regno = IN_REG (this_parmno);
10793 if (!TARGET_REG_NAMES)
10794 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10795
0a2aaacc 10796 this_rtx = gen_rtx_REG (Pmode, this_regno);
13f70342
RH
10797
10798 /* Apply the constant offset, if required. */
10799 delta_rtx = GEN_INT (delta);
36c216e5
MM
10800 if (TARGET_ILP32)
10801 {
57782ad8 10802 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
36c216e5 10803 REG_POINTER (tmp) = 1;
13f70342 10804 if (delta && satisfies_constraint_I (delta_rtx))
36c216e5 10805 {
0a2aaacc 10806 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
36c216e5
MM
10807 delta = 0;
10808 }
10809 else
0a2aaacc 10810 emit_insn (gen_ptr_extend (this_rtx, tmp));
36c216e5 10811 }
5f13cfc6
RH
10812 if (delta)
10813 {
13f70342 10814 if (!satisfies_constraint_I (delta_rtx))
5f13cfc6
RH
10815 {
10816 rtx tmp = gen_rtx_REG (Pmode, 2);
10817 emit_move_insn (tmp, delta_rtx);
10818 delta_rtx = tmp;
10819 }
0a2aaacc 10820 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
5f13cfc6
RH
10821 }
10822
10823 /* Apply the offset from the vtable, if required. */
10824 if (vcall_offset)
10825 {
10826 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10827 rtx tmp = gen_rtx_REG (Pmode, 2);
10828
36c216e5
MM
10829 if (TARGET_ILP32)
10830 {
10831 rtx t = gen_rtx_REG (ptr_mode, 2);
10832 REG_POINTER (t) = 1;
0a2aaacc 10833 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
13f70342 10834 if (satisfies_constraint_I (vcall_offset_rtx))
36c216e5 10835 {
13f70342 10836 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
36c216e5
MM
10837 vcall_offset = 0;
10838 }
10839 else
10840 emit_insn (gen_ptr_extend (tmp, t));
10841 }
10842 else
0a2aaacc 10843 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
5f13cfc6 10844
36c216e5 10845 if (vcall_offset)
5f13cfc6 10846 {
13f70342 10847 if (!satisfies_constraint_J (vcall_offset_rtx))
36c216e5
MM
10848 {
10849 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10850 emit_move_insn (tmp2, vcall_offset_rtx);
10851 vcall_offset_rtx = tmp2;
10852 }
10853 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
5f13cfc6 10854 }
5f13cfc6 10855
36c216e5 10856 if (TARGET_ILP32)
13f70342 10857 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
36c216e5
MM
10858 else
10859 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
5f13cfc6 10860
0a2aaacc 10861 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
5f13cfc6
RH
10862 }
10863
10864 /* Generate a tail call to the target function. */
10865 if (! TREE_USED (function))
10866 {
10867 assemble_external (function);
10868 TREE_USED (function) = 1;
10869 }
10870 funexp = XEXP (DECL_RTL (function), 0);
10871 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10872 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10873 insn = get_last_insn ();
10874 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
10875
10876 /* Code generation for calls relies on splitting. */
10877 reload_completed = 1;
fe3ad572 10878 epilogue_completed = 1;
599aedd9
RH
10879 try_split (PATTERN (insn), insn, 0);
10880
5f13cfc6
RH
10881 emit_barrier ();
10882
10883 /* Run just enough of rest_of_compilation to get the insns emitted.
10884 There's not really enough bulk here to make other passes such as
10885 instruction scheduling worth while. Note that use_thunk calls
10886 assemble_start_function and assemble_end_function. */
599aedd9 10887
18dbd950 10888 emit_all_insn_group_barriers (NULL);
5f13cfc6 10889 insn = get_insns ();
5f13cfc6
RH
10890 shorten_branches (insn);
10891 final_start_function (insn, file, 1);
c9d691e9 10892 final (insn, file, 1);
5f13cfc6 10893 final_end_function ();
599aedd9
RH
10894
10895 reload_completed = 0;
fe3ad572 10896 epilogue_completed = 0;
483ab821
MM
10897}
10898
351a758b
KH
10899/* Worker function for TARGET_STRUCT_VALUE_RTX. */
10900
10901static rtx
57782ad8 10902ia64_struct_value_rtx (tree fntype,
351a758b
KH
10903 int incoming ATTRIBUTE_UNUSED)
10904{
f2972bf8
DR
10905 if (TARGET_ABI_OPEN_VMS ||
10906 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
57782ad8 10907 return NULL_RTX;
351a758b
KH
10908 return gen_rtx_REG (Pmode, GR_REG (8));
10909}
10910
88ed5ef5 10911static bool
ef4bddc2 10912ia64_scalar_mode_supported_p (machine_mode mode)
88ed5ef5
SE
10913{
10914 switch (mode)
10915 {
10916 case QImode:
10917 case HImode:
10918 case SImode:
10919 case DImode:
10920 case TImode:
10921 return true;
10922
10923 case SFmode:
10924 case DFmode:
10925 case XFmode:
4de67c26 10926 case RFmode:
88ed5ef5
SE
10927 return true;
10928
10929 case TFmode:
c252db20 10930 return true;
88ed5ef5
SE
10931
10932 default:
10933 return false;
10934 }
10935}
10936
f61134e8 10937static bool
ef4bddc2 10938ia64_vector_mode_supported_p (machine_mode mode)
f61134e8
RH
10939{
10940 switch (mode)
10941 {
10942 case V8QImode:
10943 case V4HImode:
10944 case V2SImode:
10945 return true;
10946
10947 case V2SFmode:
10948 return true;
10949
10950 default:
10951 return false;
10952 }
10953}
10954
8cc4b7a2
JM
10955/* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P. */
10956
10957static bool
ef4bddc2 10958ia64_libgcc_floating_mode_supported_p (machine_mode mode)
8cc4b7a2
JM
10959{
10960 switch (mode)
10961 {
10962 case SFmode:
10963 case DFmode:
10964 return true;
10965
10966 case XFmode:
10967#ifdef IA64_NO_LIBGCC_XFMODE
10968 return false;
10969#else
10970 return true;
10971#endif
10972
10973 case TFmode:
10974#ifdef IA64_NO_LIBGCC_TFMODE
10975 return false;
10976#else
10977 return true;
10978#endif
10979
10980 default:
10981 return false;
10982 }
10983}
10984
694a2f6e
EB
10985/* Implement the FUNCTION_PROFILER macro. */
10986
2b4f149b
RH
10987void
10988ia64_output_function_profiler (FILE *file, int labelno)
10989{
694a2f6e
EB
10990 bool indirect_call;
10991
10992 /* If the function needs a static chain and the static chain
10993 register is r15, we use an indirect call so as to bypass
10994 the PLT stub in case the executable is dynamically linked,
10995 because the stub clobbers r15 as per 5.3.6 of the psABI.
10996 We don't need to do that in non canonical PIC mode. */
10997
10998 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10999 {
11000 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11001 indirect_call = true;
11002 }
11003 else
11004 indirect_call = false;
11005
2b4f149b
RH
11006 if (TARGET_GNU_AS)
11007 fputs ("\t.prologue 4, r40\n", file);
11008 else
11009 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11010 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
bd8633a3
RH
11011
11012 if (NO_PROFILE_COUNTERS)
694a2f6e 11013 fputs ("\tmov out3 = r0\n", file);
bd8633a3
RH
11014 else
11015 {
11016 char buf[20];
11017 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11018
11019 if (TARGET_AUTO_PIC)
11020 fputs ("\tmovl out3 = @gprel(", file);
11021 else
11022 fputs ("\taddl out3 = @ltoff(", file);
11023 assemble_name (file, buf);
11024 if (TARGET_AUTO_PIC)
694a2f6e 11025 fputs (")\n", file);
bd8633a3 11026 else
694a2f6e 11027 fputs ("), r1\n", file);
bd8633a3
RH
11028 }
11029
694a2f6e
EB
11030 if (indirect_call)
11031 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11032 fputs ("\t;;\n", file);
11033
2b4f149b 11034 fputs ("\t.save rp, r42\n", file);
bd8633a3 11035 fputs ("\tmov out2 = b0\n", file);
694a2f6e
EB
11036 if (indirect_call)
11037 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
2b4f149b 11038 fputs ("\t.body\n", file);
2b4f149b 11039 fputs ("\tmov out1 = r1\n", file);
694a2f6e
EB
11040 if (indirect_call)
11041 {
11042 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11043 fputs ("\tmov b6 = r16\n", file);
11044 fputs ("\tld8 r1 = [r14]\n", file);
11045 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11046 }
11047 else
11048 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
2b4f149b
RH
11049}
11050
d26afa4f
SE
11051static GTY(()) rtx mcount_func_rtx;
11052static rtx
11053gen_mcount_func_rtx (void)
11054{
11055 if (!mcount_func_rtx)
11056 mcount_func_rtx = init_one_libfunc ("_mcount");
11057 return mcount_func_rtx;
11058}
11059
11060void
11061ia64_profile_hook (int labelno)
11062{
11063 rtx label, ip;
11064
11065 if (NO_PROFILE_COUNTERS)
11066 label = const0_rtx;
11067 else
11068 {
11069 char buf[30];
11070 const char *label_name;
11071 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
55504c7c 11072 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
d26afa4f
SE
11073 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11074 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11075 }
11076 ip = gen_reg_rtx (Pmode);
11077 emit_insn (gen_ip_value (ip));
11078 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11079 VOIDmode, 3,
11080 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11081 ip, Pmode,
11082 label, Pmode);
11083}
11084
cac24f06
JM
11085/* Return the mangling of TYPE if it is an extended fundamental type. */
11086
11087static const char *
3101faab 11088ia64_mangle_type (const_tree type)
cac24f06 11089{
608063c3
JB
11090 type = TYPE_MAIN_VARIANT (type);
11091
11092 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11093 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11094 return NULL;
11095
cac24f06
JM
11096 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11097 mangled as "e". */
11098 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11099 return "g";
11100 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11101 an extended mangling. Elsewhere, "e" is available since long
11102 double is 80 bits. */
11103 if (TYPE_MODE (type) == XFmode)
11104 return TARGET_HPUX ? "u9__float80" : "e";
4de67c26
JM
11105 if (TYPE_MODE (type) == RFmode)
11106 return "u7__fpreg";
11107 return NULL;
11108}
11109
11110/* Return the diagnostic message string if conversion from FROMTYPE to
11111 TOTYPE is not allowed, NULL otherwise. */
11112static const char *
3101faab 11113ia64_invalid_conversion (const_tree fromtype, const_tree totype)
4de67c26
JM
11114{
11115 /* Reject nontrivial conversion to or from __fpreg. */
11116 if (TYPE_MODE (fromtype) == RFmode
11117 && TYPE_MODE (totype) != RFmode
11118 && TYPE_MODE (totype) != VOIDmode)
11119 return N_("invalid conversion from %<__fpreg%>");
11120 if (TYPE_MODE (totype) == RFmode
11121 && TYPE_MODE (fromtype) != RFmode)
11122 return N_("invalid conversion to %<__fpreg%>");
11123 return NULL;
11124}
11125
11126/* Return the diagnostic message string if the unary operation OP is
11127 not permitted on TYPE, NULL otherwise. */
11128static const char *
3101faab 11129ia64_invalid_unary_op (int op, const_tree type)
4de67c26
JM
11130{
11131 /* Reject operations on __fpreg other than unary + or &. */
11132 if (TYPE_MODE (type) == RFmode
11133 && op != CONVERT_EXPR
11134 && op != ADDR_EXPR)
11135 return N_("invalid operation on %<__fpreg%>");
11136 return NULL;
11137}
11138
11139/* Return the diagnostic message string if the binary operation OP is
11140 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11141static const char *
3101faab 11142ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
4de67c26
JM
11143{
11144 /* Reject operations on __fpreg. */
11145 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11146 return N_("invalid operation on %<__fpreg%>");
cac24f06
JM
11147 return NULL;
11148}
11149
812b587e
SE
11150/* HP-UX version_id attribute.
11151 For object foo, if the version_id is set to 1234 put out an alias
11152 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11153 other than an alias statement because it is an illegal symbol name. */
11154
11155static tree
11156ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11157 tree name ATTRIBUTE_UNUSED,
11158 tree args,
11159 int flags ATTRIBUTE_UNUSED,
11160 bool *no_add_attrs)
11161{
11162 tree arg = TREE_VALUE (args);
11163
11164 if (TREE_CODE (arg) != STRING_CST)
11165 {
11166 error("version attribute is not a string");
11167 *no_add_attrs = true;
11168 return NULL_TREE;
11169 }
11170 return NULL_TREE;
11171}
11172
a31fa2e0
SE
11173/* Target hook for c_mode_for_suffix. */
11174
ef4bddc2 11175static machine_mode
a31fa2e0
SE
11176ia64_c_mode_for_suffix (char suffix)
11177{
11178 if (suffix == 'q')
11179 return TFmode;
11180 if (suffix == 'w')
11181 return XFmode;
11182
11183 return VOIDmode;
11184}
11185
f3a83111
SE
11186static GTY(()) rtx ia64_dconst_0_5_rtx;
11187
11188rtx
11189ia64_dconst_0_5 (void)
11190{
11191 if (! ia64_dconst_0_5_rtx)
11192 {
11193 REAL_VALUE_TYPE rv;
11194 real_from_string (&rv, "0.5");
11195 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11196 }
11197 return ia64_dconst_0_5_rtx;
11198}
11199
11200static GTY(()) rtx ia64_dconst_0_375_rtx;
11201
11202rtx
11203ia64_dconst_0_375 (void)
11204{
11205 if (! ia64_dconst_0_375_rtx)
11206 {
11207 REAL_VALUE_TYPE rv;
11208 real_from_string (&rv, "0.375");
11209 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11210 }
11211 return ia64_dconst_0_375_rtx;
11212}
11213
ef4bddc2 11214static machine_mode
ffa88471
SE
11215ia64_get_reg_raw_mode (int regno)
11216{
11217 if (FR_REGNO_P (regno))
11218 return XFmode;
11219 return default_get_reg_raw_mode(regno);
11220}
f3a83111 11221
d9886a9e
L
11222/* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11223 anymore. */
11224
11225bool
ef4bddc2 11226ia64_member_type_forces_blk (const_tree, machine_mode mode)
d9886a9e
L
11227{
11228 return TARGET_HPUX && mode == TFmode;
11229}
11230
f16d3f39
JH
11231/* Always default to .text section until HP-UX linker is fixed. */
11232
11233ATTRIBUTE_UNUSED static section *
11234ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11235 enum node_frequency freq ATTRIBUTE_UNUSED,
11236 bool startup ATTRIBUTE_UNUSED,
11237 bool exit ATTRIBUTE_UNUSED)
11238{
11239 return NULL;
11240}
e6431744
RH
11241\f
11242/* Construct (set target (vec_select op0 (parallel perm))) and
11243 return true if that's a valid instruction in the active ISA. */
11244
11245static bool
11246expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11247{
11248 rtx rperm[MAX_VECT_LEN], x;
11249 unsigned i;
11250
11251 for (i = 0; i < nelt; ++i)
11252 rperm[i] = GEN_INT (perm[i]);
11253
11254 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11255 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
f7df4a84 11256 x = gen_rtx_SET (target, x);
e6431744 11257
647d790d
DM
11258 rtx_insn *insn = emit_insn (x);
11259 if (recog_memoized (insn) < 0)
e6431744 11260 {
647d790d 11261 remove_insn (insn);
e6431744
RH
11262 return false;
11263 }
11264 return true;
11265}
11266
11267/* Similar, but generate a vec_concat from op0 and op1 as well. */
11268
11269static bool
11270expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11271 const unsigned char *perm, unsigned nelt)
11272{
ef4bddc2 11273 machine_mode v2mode;
e6431744
RH
11274 rtx x;
11275
11276 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11277 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11278 return expand_vselect (target, x, perm, nelt);
11279}
11280
11281/* Try to expand a no-op permutation. */
11282
11283static bool
11284expand_vec_perm_identity (struct expand_vec_perm_d *d)
11285{
11286 unsigned i, nelt = d->nelt;
11287
11288 for (i = 0; i < nelt; ++i)
11289 if (d->perm[i] != i)
11290 return false;
11291
11292 if (!d->testing_p)
11293 emit_move_insn (d->target, d->op0);
11294
11295 return true;
11296}
11297
11298/* Try to expand D via a shrp instruction. */
11299
11300static bool
11301expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11302{
11303 unsigned i, nelt = d->nelt, shift, mask;
2d130b31 11304 rtx tmp, hi, lo;
e6431744
RH
11305
11306 /* ??? Don't force V2SFmode into the integer registers. */
11307 if (d->vmode == V2SFmode)
11308 return false;
11309
11310 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11311
11312 shift = d->perm[0];
2d130b31
UB
11313 if (BYTES_BIG_ENDIAN && shift > nelt)
11314 return false;
11315
e6431744
RH
11316 for (i = 1; i < nelt; ++i)
11317 if (d->perm[i] != ((shift + i) & mask))
11318 return false;
11319
11320 if (d->testing_p)
11321 return true;
11322
2d130b31
UB
11323 hi = shift < nelt ? d->op1 : d->op0;
11324 lo = shift < nelt ? d->op0 : d->op1;
11325
11326 shift %= nelt;
11327
e6431744
RH
11328 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11329
11330 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11331 gcc_assert (IN_RANGE (shift, 1, 63));
11332
11333 /* Recall that big-endian elements are numbered starting at the top of
11334 the register. Ideally we'd have a shift-left-pair. But since we
11335 don't, convert to a shift the other direction. */
11336 if (BYTES_BIG_ENDIAN)
11337 shift = 64 - shift;
11338
11339 tmp = gen_reg_rtx (DImode);
2d130b31
UB
11340 hi = gen_lowpart (DImode, hi);
11341 lo = gen_lowpart (DImode, lo);
11342 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
e6431744
RH
11343
11344 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11345 return true;
11346}
11347
11348/* Try to instantiate D in a single instruction. */
11349
11350static bool
11351expand_vec_perm_1 (struct expand_vec_perm_d *d)
11352{
11353 unsigned i, nelt = d->nelt;
11354 unsigned char perm2[MAX_VECT_LEN];
11355
11356 /* Try single-operand selections. */
11357 if (d->one_operand_p)
11358 {
11359 if (expand_vec_perm_identity (d))
11360 return true;
11361 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11362 return true;
11363 }
11364
11365 /* Try two operand selections. */
11366 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11367 return true;
11368
11369 /* Recognize interleave style patterns with reversed operands. */
11370 if (!d->one_operand_p)
11371 {
11372 for (i = 0; i < nelt; ++i)
11373 {
11374 unsigned e = d->perm[i];
11375 if (e >= nelt)
11376 e -= nelt;
11377 else
11378 e += nelt;
11379 perm2[i] = e;
11380 }
11381
11382 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11383 return true;
11384 }
11385
11386 if (expand_vec_perm_shrp (d))
11387 return true;
11388
11389 /* ??? Look for deposit-like permutations where most of the result
11390 comes from one vector unchanged and the rest comes from a
11391 sequential hunk of the other vector. */
11392
11393 return false;
11394}
11395
11396/* Pattern match broadcast permutations. */
11397
11398static bool
11399expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11400{
11401 unsigned i, elt, nelt = d->nelt;
11402 unsigned char perm2[2];
11403 rtx temp;
11404 bool ok;
11405
11406 if (!d->one_operand_p)
11407 return false;
11408
11409 elt = d->perm[0];
11410 for (i = 1; i < nelt; ++i)
11411 if (d->perm[i] != elt)
11412 return false;
11413
11414 switch (d->vmode)
11415 {
11416 case V2SImode:
11417 case V2SFmode:
11418 /* Implementable by interleave. */
11419 perm2[0] = elt;
11420 perm2[1] = elt + 2;
11421 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11422 gcc_assert (ok);
11423 break;
11424
11425 case V8QImode:
11426 /* Implementable by extract + broadcast. */
11427 if (BYTES_BIG_ENDIAN)
11428 elt = 7 - elt;
11429 elt *= BITS_PER_UNIT;
11430 temp = gen_reg_rtx (DImode);
11431 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
96fda42c 11432 GEN_INT (8), GEN_INT (elt)));
e6431744
RH
11433 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11434 break;
11435
11436 case V4HImode:
11437 /* Should have been matched directly by vec_select. */
11438 default:
11439 gcc_unreachable ();
11440 }
11441
11442 return true;
11443}
11444
11445/* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11446 two vector permutation into a single vector permutation by using
11447 an interleave operation to merge the vectors. */
11448
11449static bool
11450expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11451{
11452 struct expand_vec_perm_d dremap, dfinal;
11453 unsigned char remap[2 * MAX_VECT_LEN];
11454 unsigned contents, i, nelt, nelt2;
11455 unsigned h0, h1, h2, h3;
dd3d2b35 11456 rtx_insn *seq;
e6431744
RH
11457 bool ok;
11458
11459 if (d->one_operand_p)
11460 return false;
11461
11462 nelt = d->nelt;
11463 nelt2 = nelt / 2;
11464
11465 /* Examine from whence the elements come. */
11466 contents = 0;
11467 for (i = 0; i < nelt; ++i)
11468 contents |= 1u << d->perm[i];
11469
11470 memset (remap, 0xff, sizeof (remap));
11471 dremap = *d;
11472
11473 h0 = (1u << nelt2) - 1;
11474 h1 = h0 << nelt2;
11475 h2 = h0 << nelt;
11476 h3 = h0 << (nelt + nelt2);
11477
11478 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11479 {
11480 for (i = 0; i < nelt; ++i)
11481 {
11482 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11483 remap[which] = i;
11484 dremap.perm[i] = which;
11485 }
11486 }
11487 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11488 {
11489 for (i = 0; i < nelt; ++i)
11490 {
11491 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11492 remap[which] = i;
11493 dremap.perm[i] = which;
11494 }
11495 }
11496 else if ((contents & 0x5555) == contents) /* mix even elements */
11497 {
11498 for (i = 0; i < nelt; ++i)
11499 {
11500 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11501 remap[which] = i;
11502 dremap.perm[i] = which;
11503 }
11504 }
11505 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11506 {
11507 for (i = 0; i < nelt; ++i)
11508 {
11509 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11510 remap[which] = i;
11511 dremap.perm[i] = which;
11512 }
11513 }
11514 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11515 {
11516 unsigned shift = ctz_hwi (contents);
11517 for (i = 0; i < nelt; ++i)
11518 {
11519 unsigned which = (i + shift) & (2 * nelt - 1);
11520 remap[which] = i;
11521 dremap.perm[i] = which;
11522 }
11523 }
11524 else
11525 return false;
11526
11527 /* Use the remapping array set up above to move the elements from their
11528 swizzled locations into their final destinations. */
11529 dfinal = *d;
11530 for (i = 0; i < nelt; ++i)
11531 {
11532 unsigned e = remap[d->perm[i]];
11533 gcc_assert (e < nelt);
11534 dfinal.perm[i] = e;
11535 }
b4b78e2d
EB
11536 if (d->testing_p)
11537 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11538 else
11539 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
e6431744
RH
11540 dfinal.op1 = dfinal.op0;
11541 dfinal.one_operand_p = true;
11542 dremap.target = dfinal.op0;
11543
11544 /* Test if the final remap can be done with a single insn. For V4HImode
11545 this *will* succeed. For V8QImode or V2SImode it may not. */
11546 start_sequence ();
11547 ok = expand_vec_perm_1 (&dfinal);
11548 seq = get_insns ();
11549 end_sequence ();
11550 if (!ok)
11551 return false;
11552 if (d->testing_p)
11553 return true;
11554
11555 ok = expand_vec_perm_1 (&dremap);
11556 gcc_assert (ok);
11557
11558 emit_insn (seq);
11559 return true;
11560}
11561
11562/* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11563 constant permutation via two mux2 and a merge. */
11564
11565static bool
11566expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11567{
11568 unsigned char perm2[4];
11569 rtx rmask[4];
11570 unsigned i;
11571 rtx t0, t1, mask, x;
11572 bool ok;
11573
11574 if (d->vmode != V4HImode || d->one_operand_p)
11575 return false;
11576 if (d->testing_p)
11577 return true;
11578
11579 for (i = 0; i < 4; ++i)
11580 {
11581 perm2[i] = d->perm[i] & 3;
11582 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11583 }
11584 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11585 mask = force_reg (V4HImode, mask);
11586
11587 t0 = gen_reg_rtx (V4HImode);
11588 t1 = gen_reg_rtx (V4HImode);
11589
11590 ok = expand_vselect (t0, d->op0, perm2, 4);
11591 gcc_assert (ok);
11592 ok = expand_vselect (t1, d->op1, perm2, 4);
11593 gcc_assert (ok);
11594
11595 x = gen_rtx_AND (V4HImode, mask, t0);
f7df4a84 11596 emit_insn (gen_rtx_SET (t0, x));
e6431744
RH
11597
11598 x = gen_rtx_NOT (V4HImode, mask);
11599 x = gen_rtx_AND (V4HImode, x, t1);
f7df4a84 11600 emit_insn (gen_rtx_SET (t1, x));
e6431744
RH
11601
11602 x = gen_rtx_IOR (V4HImode, t0, t1);
f7df4a84 11603 emit_insn (gen_rtx_SET (d->target, x));
e6431744
RH
11604
11605 return true;
11606}
11607
11608/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11609 With all of the interface bits taken care of, perform the expansion
11610 in D and return true on success. */
11611
11612static bool
11613ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11614{
11615 if (expand_vec_perm_1 (d))
11616 return true;
11617 if (expand_vec_perm_broadcast (d))
11618 return true;
11619 if (expand_vec_perm_interleave_2 (d))
11620 return true;
11621 if (expand_vec_perm_v4hi_5 (d))
11622 return true;
11623 return false;
11624}
11625
11626bool
11627ia64_expand_vec_perm_const (rtx operands[4])
11628{
11629 struct expand_vec_perm_d d;
11630 unsigned char perm[MAX_VECT_LEN];
11631 int i, nelt, which;
11632 rtx sel;
11633
11634 d.target = operands[0];
11635 d.op0 = operands[1];
11636 d.op1 = operands[2];
11637 sel = operands[3];
11638
11639 d.vmode = GET_MODE (d.target);
11640 gcc_assert (VECTOR_MODE_P (d.vmode));
11641 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11642 d.testing_p = false;
11643
11644 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11645 gcc_assert (XVECLEN (sel, 0) == nelt);
11646 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11647
11648 for (i = which = 0; i < nelt; ++i)
11649 {
11650 rtx e = XVECEXP (sel, 0, i);
11651 int ei = INTVAL (e) & (2 * nelt - 1);
11652
11653 which |= (ei < nelt ? 1 : 2);
11654 d.perm[i] = ei;
11655 perm[i] = ei;
11656 }
11657
11658 switch (which)
11659 {
11660 default:
11661 gcc_unreachable();
11662
11663 case 3:
11664 if (!rtx_equal_p (d.op0, d.op1))
11665 {
11666 d.one_operand_p = false;
11667 break;
11668 }
11669
11670 /* The elements of PERM do not suggest that only the first operand
11671 is used, but both operands are identical. Allow easier matching
11672 of the permutation by folding the permutation into the single
11673 input vector. */
11674 for (i = 0; i < nelt; ++i)
11675 if (d.perm[i] >= nelt)
11676 d.perm[i] -= nelt;
11677 /* FALLTHRU */
11678
11679 case 1:
11680 d.op1 = d.op0;
11681 d.one_operand_p = true;
11682 break;
11683
11684 case 2:
11685 for (i = 0; i < nelt; ++i)
11686 d.perm[i] -= nelt;
11687 d.op0 = d.op1;
11688 d.one_operand_p = true;
11689 break;
11690 }
11691
11692 if (ia64_expand_vec_perm_const_1 (&d))
11693 return true;
11694
11695 /* If the mask says both arguments are needed, but they are the same,
11696 the above tried to expand with one_operand_p true. If that didn't
11697 work, retry with one_operand_p false, as that's what we used in _ok. */
11698 if (which == 3 && d.one_operand_p)
11699 {
11700 memcpy (d.perm, perm, sizeof (perm));
11701 d.one_operand_p = false;
11702 return ia64_expand_vec_perm_const_1 (&d);
11703 }
11704
11705 return false;
11706}
11707
11708/* Implement targetm.vectorize.vec_perm_const_ok. */
11709
11710static bool
ef4bddc2 11711ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
e6431744
RH
11712 const unsigned char *sel)
11713{
11714 struct expand_vec_perm_d d;
11715 unsigned int i, nelt, which;
11716 bool ret;
11717
11718 d.vmode = vmode;
11719 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11720 d.testing_p = true;
11721
11722 /* Extract the values from the vector CST into the permutation
11723 array in D. */
11724 memcpy (d.perm, sel, nelt);
11725 for (i = which = 0; i < nelt; ++i)
11726 {
11727 unsigned char e = d.perm[i];
11728 gcc_assert (e < 2 * nelt);
11729 which |= (e < nelt ? 1 : 2);
11730 }
11731
11732 /* For all elements from second vector, fold the elements to first. */
11733 if (which == 2)
11734 for (i = 0; i < nelt; ++i)
11735 d.perm[i] -= nelt;
11736
11737 /* Check whether the mask can be applied to the vector type. */
11738 d.one_operand_p = (which != 3);
11739
11740 /* Otherwise we have to go through the motions and see if we can
11741 figure out how to generate the requested permutation. */
11742 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11743 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11744 if (!d.one_operand_p)
11745 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11746
11747 start_sequence ();
11748 ret = ia64_expand_vec_perm_const_1 (&d);
11749 end_sequence ();
11750
11751 return ret;
11752}
11753
11754void
11755ia64_expand_vec_setv2sf (rtx operands[3])
11756{
11757 struct expand_vec_perm_d d;
11758 unsigned int which;
11759 bool ok;
11760
11761 d.target = operands[0];
11762 d.op0 = operands[0];
11763 d.op1 = gen_reg_rtx (V2SFmode);
11764 d.vmode = V2SFmode;
11765 d.nelt = 2;
11766 d.one_operand_p = false;
11767 d.testing_p = false;
11768
11769 which = INTVAL (operands[2]);
11770 gcc_assert (which <= 1);
11771 d.perm[0] = 1 - which;
11772 d.perm[1] = which + 2;
11773
11774 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11775
11776 ok = ia64_expand_vec_perm_const_1 (&d);
11777 gcc_assert (ok);
11778}
11779
11780void
11781ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11782{
11783 struct expand_vec_perm_d d;
ef4bddc2 11784 machine_mode vmode = GET_MODE (target);
e6431744
RH
11785 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11786 bool ok;
11787
11788 d.target = target;
11789 d.op0 = op0;
11790 d.op1 = op1;
11791 d.vmode = vmode;
11792 d.nelt = nelt;
11793 d.one_operand_p = false;
11794 d.testing_p = false;
11795
11796 for (i = 0; i < nelt; ++i)
11797 d.perm[i] = i * 2 + odd;
11798
11799 ok = ia64_expand_vec_perm_const_1 (&d);
11800 gcc_assert (ok);
11801}
f16d3f39 11802
e2500fed 11803#include "gt-ia64.h"