]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/ia64.c
invoke.texi (-fvar-tracking-assignments): New.
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
66647d44 2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
7b5cbb57
AS
3 2009
4 Free Software Foundation, Inc.
c65ebc55 5 Contributed by James E. Wilson <wilson@cygnus.com> and
9c808aad 6 David Mosberger <davidm@hpl.hp.com>.
c65ebc55 7
3bed2930 8This file is part of GCC.
c65ebc55 9
3bed2930 10GCC is free software; you can redistribute it and/or modify
c65ebc55 11it under the terms of the GNU General Public License as published by
2f83c7d6 12the Free Software Foundation; either version 3, or (at your option)
c65ebc55
JW
13any later version.
14
3bed2930 15GCC is distributed in the hope that it will be useful,
c65ebc55
JW
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
2f83c7d6
NC
21along with GCC; see the file COPYING3. If not see
22<http://www.gnu.org/licenses/>. */
c65ebc55 23
c65ebc55 24#include "config.h"
ed9ccd8a 25#include "system.h"
4977bab6
ZW
26#include "coretypes.h"
27#include "tm.h"
c65ebc55
JW
28#include "rtl.h"
29#include "tree.h"
c65ebc55
JW
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
c65ebc55
JW
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
e78d8e51 40#include "optabs.h"
c65ebc55
JW
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
f2972bf8 45#include "libfuncs.h"
809d4ef1 46#include "toplev.h"
2130b7fb 47#include "sched-int.h"
eced69b5 48#include "timevar.h"
672a6f42
NB
49#include "target.h"
50#include "target-def.h"
98d2b17e 51#include "tm_p.h"
30028c85 52#include "hashtab.h"
08744705 53#include "langhooks.h"
117dca74 54#include "cfglayout.h"
726a989a 55#include "gimple.h"
4de67c26 56#include "intl.h"
6fb5fa3c 57#include "df.h"
658f32fd 58#include "debug.h"
bb83aa4b 59#include "params.h"
6fb5fa3c 60#include "dbgcnt.h"
13f70342 61#include "tm-constrs.h"
388092d5 62#include "sel-sched.h"
c65ebc55
JW
63
64/* This is used for communication between ASM_OUTPUT_LABEL and
65 ASM_OUTPUT_LABELREF. */
66int ia64_asm_output_label = 0;
67
c65ebc55 68/* Register names for ia64_expand_prologue. */
3b572406 69static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
70{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79 "r104","r105","r106","r107","r108","r109","r110","r111",
80 "r112","r113","r114","r115","r116","r117","r118","r119",
81 "r120","r121","r122","r123","r124","r125","r126","r127"};
82
83/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 84static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
85{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
86
87/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 88static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
89{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
99
100/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 101static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
102{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
103
30028c85 104/* Which cpu are we scheduling for. */
dbdd120f 105enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
30028c85 106
68340ae9
BS
107/* Determines whether we run our final scheduling pass or not. We always
108 avoid the normal second scheduling pass. */
109static int ia64_flag_schedule_insns2;
110
014a1138
JZ
111/* Determines whether we run variable tracking in machine dependent
112 reorganization. */
113static int ia64_flag_var_tracking;
114
c65ebc55
JW
115/* Variables which are this size or smaller are put in the sdata/sbss
116 sections. */
117
3b572406 118unsigned int ia64_section_threshold;
30028c85
VM
119
120/* The following variable is used by the DFA insn scheduler. The value is
121 TRUE if we do insn bundling instead of insn scheduling. */
122int bundling_p = 0;
123
6fb5fa3c
DB
124enum ia64_frame_regs
125{
126 reg_fp,
127 reg_save_b0,
128 reg_save_pr,
129 reg_save_ar_pfs,
130 reg_save_ar_unat,
131 reg_save_ar_lc,
132 reg_save_gp,
133 number_of_ia64_frame_regs
134};
135
599aedd9
RH
136/* Structure to be filled in by ia64_compute_frame_size with register
137 save masks and offsets for the current function. */
138
139struct ia64_frame_info
140{
141 HOST_WIDE_INT total_size; /* size of the stack frame, not including
142 the caller's scratch area. */
143 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
144 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
145 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
146 HARD_REG_SET mask; /* mask of saved registers. */
9c808aad 147 unsigned int gr_used_mask; /* mask of registers in use as gr spill
599aedd9
RH
148 registers or long-term scratches. */
149 int n_spilled; /* number of spilled registers. */
6fb5fa3c 150 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
599aedd9
RH
151 int n_input_regs; /* number of input registers used. */
152 int n_local_regs; /* number of local registers used. */
153 int n_output_regs; /* number of output registers used. */
154 int n_rotate_regs; /* number of rotating registers used. */
155
156 char need_regstk; /* true if a .regstk directive needed. */
157 char initialized; /* true if the data is finalized. */
158};
159
160/* Current frame information calculated by ia64_compute_frame_size. */
161static struct ia64_frame_info current_frame_info;
6fb5fa3c
DB
162/* The actual registers that are emitted. */
163static int emitted_frame_related_regs[number_of_ia64_frame_regs];
3b572406 164\f
9c808aad
AJ
165static int ia64_first_cycle_multipass_dfa_lookahead (void);
166static void ia64_dependencies_evaluation_hook (rtx, rtx);
167static void ia64_init_dfa_pre_cycle_insn (void);
168static rtx ia64_dfa_pre_cycle_insn (void);
169static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
3101faab 170static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
9c808aad 171static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
048d0d36 172static void ia64_h_i_d_extended (void);
388092d5
AB
173static void * ia64_alloc_sched_context (void);
174static void ia64_init_sched_context (void *, bool);
175static void ia64_set_sched_context (void *);
176static void ia64_clear_sched_context (void *);
177static void ia64_free_sched_context (void *);
048d0d36
MK
178static int ia64_mode_to_int (enum machine_mode);
179static void ia64_set_sched_flags (spec_info_t);
388092d5
AB
180static ds_t ia64_get_insn_spec_ds (rtx);
181static ds_t ia64_get_insn_checked_ds (rtx);
182static bool ia64_skip_rtx_p (const_rtx);
048d0d36 183static int ia64_speculate_insn (rtx, ds_t, rtx *);
388092d5
AB
184static bool ia64_needs_block_p (int);
185static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
048d0d36
MK
186static int ia64_spec_check_p (rtx);
187static int ia64_spec_check_src_p (rtx);
9c808aad
AJ
188static rtx gen_tls_get_addr (void);
189static rtx gen_thread_pointer (void);
6fb5fa3c 190static int find_gr_spill (enum ia64_frame_regs, int);
9c808aad
AJ
191static int next_scratch_gr_reg (void);
192static void mark_reg_gr_used_mask (rtx, void *);
193static void ia64_compute_frame_size (HOST_WIDE_INT);
194static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
195static void finish_spill_pointers (void);
196static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
197static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
198static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
199static rtx gen_movdi_x (rtx, rtx, rtx);
200static rtx gen_fr_spill_x (rtx, rtx, rtx);
201static rtx gen_fr_restore_x (rtx, rtx, rtx);
202
7b5cbb57 203static bool ia64_can_eliminate (const int, const int);
586de218 204static enum machine_mode hfa_element_mode (const_tree, bool);
351a758b
KH
205static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
206 tree, int *, int);
78a52f11
RH
207static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
208 tree, bool);
9c808aad 209static bool ia64_function_ok_for_sibcall (tree, tree);
586de218 210static bool ia64_return_in_memory (const_tree, const_tree);
f40751dd 211static bool ia64_rtx_costs (rtx, int, int, int *, bool);
215b063c 212static int ia64_unspec_may_trap_p (const_rtx, unsigned);
9c808aad 213static void fix_range (const char *);
dbdd120f 214static bool ia64_handle_option (size_t, const char *, int);
9c808aad
AJ
215static struct machine_function * ia64_init_machine_status (void);
216static void emit_insn_group_barriers (FILE *);
217static void emit_all_insn_group_barriers (FILE *);
218static void final_emit_insn_group_barriers (FILE *);
219static void emit_predicate_relation_info (void);
220static void ia64_reorg (void);
3101faab 221static bool ia64_in_small_data_p (const_tree);
658f32fd
AO
222static void process_epilogue (FILE *, rtx, bool, bool);
223static int process_set (FILE *, rtx, rtx, bool, bool);
9c808aad 224
9c808aad
AJ
225static bool ia64_assemble_integer (rtx, unsigned int, int);
226static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
227static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
228static void ia64_output_function_end_prologue (FILE *);
229
230static int ia64_issue_rate (void);
388092d5 231static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
9c808aad 232static void ia64_sched_init (FILE *, int, int);
048d0d36
MK
233static void ia64_sched_init_global (FILE *, int, int);
234static void ia64_sched_finish_global (FILE *, int);
9c808aad
AJ
235static void ia64_sched_finish (FILE *, int);
236static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
237static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
238static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
239static int ia64_variable_issue (FILE *, int, rtx, int);
240
241static struct bundle_state *get_free_bundle_state (void);
242static void free_bundle_state (struct bundle_state *);
243static void initiate_bundle_states (void);
244static void finish_bundle_states (void);
245static unsigned bundle_state_hash (const void *);
246static int bundle_state_eq_p (const void *, const void *);
247static int insert_bundle_state (struct bundle_state *);
248static void initiate_bundle_state_table (void);
249static void finish_bundle_state_table (void);
250static int try_issue_nops (struct bundle_state *, int);
251static int try_issue_insn (struct bundle_state *, rtx);
252static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
253static int get_max_pos (state_t);
254static int get_template (state_t, int);
255
256static rtx get_next_important_insn (rtx, rtx);
388092d5 257static bool important_for_bundling_p (rtx);
9c808aad
AJ
258static void bundling (FILE *, int, rtx, rtx);
259
260static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
261 HOST_WIDE_INT, tree);
262static void ia64_file_start (void);
812b587e 263static void ia64_globalize_decl_name (FILE *, tree);
9c808aad 264
9b580a0b
RH
265static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
266static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
d6b5193b
RS
267static section *ia64_select_rtx_section (enum machine_mode, rtx,
268 unsigned HOST_WIDE_INT);
fdbe66f2
EB
269static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
270 ATTRIBUTE_UNUSED;
abb8b19a 271static unsigned int ia64_section_type_flags (tree, const char *, int);
1f7aa7cd
SE
272static void ia64_init_libfuncs (void)
273 ATTRIBUTE_UNUSED;
c15c90bb
ZW
274static void ia64_hpux_init_libfuncs (void)
275 ATTRIBUTE_UNUSED;
6bc709c1
L
276static void ia64_sysv4_init_libfuncs (void)
277 ATTRIBUTE_UNUSED;
738e7b39
RK
278static void ia64_vms_init_libfuncs (void)
279 ATTRIBUTE_UNUSED;
c252db20
L
280static void ia64_soft_fp_init_libfuncs (void)
281 ATTRIBUTE_UNUSED;
f2972bf8
DR
282static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
283 ATTRIBUTE_UNUSED;
30ed9d3d
TG
284static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
285 ATTRIBUTE_UNUSED;
a5fe455b 286
a32767e4 287static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
812b587e 288static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
a32767e4 289static void ia64_encode_section_info (tree, rtx, int);
351a758b 290static rtx ia64_struct_value_rtx (tree, int);
726a989a 291static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
88ed5ef5 292static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
f61134e8 293static bool ia64_vector_mode_supported_p (enum machine_mode mode);
5e6c8b64 294static bool ia64_cannot_force_const_mem (rtx);
3101faab
KG
295static const char *ia64_mangle_type (const_tree);
296static const char *ia64_invalid_conversion (const_tree, const_tree);
297static const char *ia64_invalid_unary_op (int, const_tree);
298static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
a31fa2e0 299static enum machine_mode ia64_c_mode_for_suffix (char);
f2972bf8
DR
300static enum machine_mode ia64_promote_function_mode (const_tree,
301 enum machine_mode,
302 int *,
303 const_tree,
304 int);
672a6f42 305\f
e6542f4e
RH
306/* Table of valid machine attributes. */
307static const struct attribute_spec ia64_attribute_table[] =
308{
309 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
310 { "syscall_linkage", 0, 0, false, true, true, NULL },
a32767e4 311 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
30ed9d3d
TG
312#if TARGET_ABI_OPEN_VMS
313 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute},
314#endif
812b587e
SE
315 { "version_id", 1, 1, true, false, false,
316 ia64_handle_version_id_attribute },
a32767e4 317 { NULL, 0, 0, false, false, false, NULL }
e6542f4e
RH
318};
319
672a6f42 320/* Initialize the GCC target structure. */
91d231cb
JM
321#undef TARGET_ATTRIBUTE_TABLE
322#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 323
f6155fda
SS
324#undef TARGET_INIT_BUILTINS
325#define TARGET_INIT_BUILTINS ia64_init_builtins
326
327#undef TARGET_EXPAND_BUILTIN
328#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
329
301d03af
RS
330#undef TARGET_ASM_BYTE_OP
331#define TARGET_ASM_BYTE_OP "\tdata1\t"
332#undef TARGET_ASM_ALIGNED_HI_OP
333#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
334#undef TARGET_ASM_ALIGNED_SI_OP
335#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
336#undef TARGET_ASM_ALIGNED_DI_OP
337#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
338#undef TARGET_ASM_UNALIGNED_HI_OP
339#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
340#undef TARGET_ASM_UNALIGNED_SI_OP
341#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
342#undef TARGET_ASM_UNALIGNED_DI_OP
343#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
344#undef TARGET_ASM_INTEGER
345#define TARGET_ASM_INTEGER ia64_assemble_integer
346
08c148a8
NB
347#undef TARGET_ASM_FUNCTION_PROLOGUE
348#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
349#undef TARGET_ASM_FUNCTION_END_PROLOGUE
350#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
351#undef TARGET_ASM_FUNCTION_EPILOGUE
352#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
353
ae46c4e0
RH
354#undef TARGET_IN_SMALL_DATA_P
355#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
356
388092d5
AB
357#undef TARGET_SCHED_ADJUST_COST_2
358#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
c237e94a
ZW
359#undef TARGET_SCHED_ISSUE_RATE
360#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
361#undef TARGET_SCHED_VARIABLE_ISSUE
362#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
363#undef TARGET_SCHED_INIT
364#define TARGET_SCHED_INIT ia64_sched_init
365#undef TARGET_SCHED_FINISH
366#define TARGET_SCHED_FINISH ia64_sched_finish
048d0d36
MK
367#undef TARGET_SCHED_INIT_GLOBAL
368#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
369#undef TARGET_SCHED_FINISH_GLOBAL
370#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
c237e94a
ZW
371#undef TARGET_SCHED_REORDER
372#define TARGET_SCHED_REORDER ia64_sched_reorder
373#undef TARGET_SCHED_REORDER2
374#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 375
30028c85
VM
376#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
377#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
378
30028c85
VM
379#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
380#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
381
382#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
383#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
384#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
385#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
386
387#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
388#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
389 ia64_first_cycle_multipass_dfa_lookahead_guard
390
391#undef TARGET_SCHED_DFA_NEW_CYCLE
392#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
393
048d0d36
MK
394#undef TARGET_SCHED_H_I_D_EXTENDED
395#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
396
388092d5
AB
397#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
398#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
399
400#undef TARGET_SCHED_INIT_SCHED_CONTEXT
401#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
402
403#undef TARGET_SCHED_SET_SCHED_CONTEXT
404#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
405
406#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
407#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
408
409#undef TARGET_SCHED_FREE_SCHED_CONTEXT
410#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
411
048d0d36
MK
412#undef TARGET_SCHED_SET_SCHED_FLAGS
413#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
414
388092d5
AB
415#undef TARGET_SCHED_GET_INSN_SPEC_DS
416#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
417
418#undef TARGET_SCHED_GET_INSN_CHECKED_DS
419#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
420
048d0d36
MK
421#undef TARGET_SCHED_SPECULATE_INSN
422#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
423
424#undef TARGET_SCHED_NEEDS_BLOCK_P
425#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
426
e855c69d 427#undef TARGET_SCHED_GEN_SPEC_CHECK
388092d5 428#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
048d0d36
MK
429
430#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
431#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
432 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
433
388092d5
AB
434#undef TARGET_SCHED_SKIP_RTX_P
435#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
436
599aedd9
RH
437#undef TARGET_FUNCTION_OK_FOR_SIBCALL
438#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
78a52f11
RH
439#undef TARGET_ARG_PARTIAL_BYTES
440#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
599aedd9 441
c590b625
RH
442#undef TARGET_ASM_OUTPUT_MI_THUNK
443#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 444#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
3101faab 445#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
c590b625 446
1bc7c5b6
ZW
447#undef TARGET_ASM_FILE_START
448#define TARGET_ASM_FILE_START ia64_file_start
449
812b587e
SE
450#undef TARGET_ASM_GLOBALIZE_DECL_NAME
451#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
452
3c50106f
RH
453#undef TARGET_RTX_COSTS
454#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67 455#undef TARGET_ADDRESS_COST
8a88c276 456#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
3c50106f 457
215b063c
PB
458#undef TARGET_UNSPEC_MAY_TRAP_P
459#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
460
18dbd950
RS
461#undef TARGET_MACHINE_DEPENDENT_REORG
462#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
463
a32767e4
DM
464#undef TARGET_ENCODE_SECTION_INFO
465#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
466
abb8b19a
AM
467#undef TARGET_SECTION_TYPE_FLAGS
468#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
469
fdbe66f2
EB
470#ifdef HAVE_AS_TLS
471#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
472#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
473#endif
474
cde0f3fd 475#undef TARGET_PROMOTE_FUNCTION_MODE
f2972bf8 476#define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
351a758b
KH
477
478/* ??? Investigate. */
479#if 0
480#undef TARGET_PROMOTE_PROTOTYPES
481#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
482#endif
483
484#undef TARGET_STRUCT_VALUE_RTX
485#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
486#undef TARGET_RETURN_IN_MEMORY
487#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
351a758b
KH
488#undef TARGET_SETUP_INCOMING_VARARGS
489#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
490#undef TARGET_STRICT_ARGUMENT_NAMING
491#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
fe984136
RH
492#undef TARGET_MUST_PASS_IN_STACK
493#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
351a758b 494
cd3ce9b4
JM
495#undef TARGET_GIMPLIFY_VA_ARG_EXPR
496#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
497
951120ea
PB
498#undef TARGET_UNWIND_EMIT
499#define TARGET_UNWIND_EMIT process_for_unwind_directive
500
88ed5ef5
SE
501#undef TARGET_SCALAR_MODE_SUPPORTED_P
502#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
f61134e8
RH
503#undef TARGET_VECTOR_MODE_SUPPORTED_P
504#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
88ed5ef5 505
445cf5eb
JM
506/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
507 in an order different from the specified program order. */
508#undef TARGET_RELAXED_ORDERING
509#define TARGET_RELAXED_ORDERING true
510
dbdd120f
RH
511#undef TARGET_DEFAULT_TARGET_FLAGS
512#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
513#undef TARGET_HANDLE_OPTION
514#define TARGET_HANDLE_OPTION ia64_handle_option
515
5e6c8b64
RH
516#undef TARGET_CANNOT_FORCE_CONST_MEM
517#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
518
608063c3
JB
519#undef TARGET_MANGLE_TYPE
520#define TARGET_MANGLE_TYPE ia64_mangle_type
cac24f06 521
4de67c26
JM
522#undef TARGET_INVALID_CONVERSION
523#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
524#undef TARGET_INVALID_UNARY_OP
525#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
526#undef TARGET_INVALID_BINARY_OP
527#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
528
a31fa2e0
SE
529#undef TARGET_C_MODE_FOR_SUFFIX
530#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
531
7b5cbb57
AS
532#undef TARGET_CAN_ELIMINATE
533#define TARGET_CAN_ELIMINATE ia64_can_eliminate
534
f6897b10 535struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 536\f
a32767e4
DM
537typedef enum
538 {
539 ADDR_AREA_NORMAL, /* normal address area */
540 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
541 }
542ia64_addr_area;
543
544static GTY(()) tree small_ident1;
545static GTY(()) tree small_ident2;
546
547static void
548init_idents (void)
549{
550 if (small_ident1 == 0)
551 {
552 small_ident1 = get_identifier ("small");
553 small_ident2 = get_identifier ("__small__");
554 }
555}
556
557/* Retrieve the address area that has been chosen for the given decl. */
558
559static ia64_addr_area
560ia64_get_addr_area (tree decl)
561{
562 tree model_attr;
563
564 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
565 if (model_attr)
566 {
567 tree id;
568
569 init_idents ();
570 id = TREE_VALUE (TREE_VALUE (model_attr));
571 if (id == small_ident1 || id == small_ident2)
572 return ADDR_AREA_SMALL;
573 }
574 return ADDR_AREA_NORMAL;
575}
576
577static tree
f61134e8
RH
578ia64_handle_model_attribute (tree *node, tree name, tree args,
579 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
a32767e4
DM
580{
581 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
582 ia64_addr_area area;
583 tree arg, decl = *node;
584
585 init_idents ();
586 arg = TREE_VALUE (args);
587 if (arg == small_ident1 || arg == small_ident2)
588 {
589 addr_area = ADDR_AREA_SMALL;
590 }
591 else
592 {
29d08eba
JM
593 warning (OPT_Wattributes, "invalid argument of %qE attribute",
594 name);
a32767e4
DM
595 *no_add_attrs = true;
596 }
597
598 switch (TREE_CODE (decl))
599 {
600 case VAR_DECL:
601 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
602 == FUNCTION_DECL)
603 && !TREE_STATIC (decl))
604 {
c5d75364
MLI
605 error_at (DECL_SOURCE_LOCATION (decl),
606 "an address area attribute cannot be specified for "
607 "local variables");
a32767e4
DM
608 *no_add_attrs = true;
609 }
610 area = ia64_get_addr_area (decl);
611 if (area != ADDR_AREA_NORMAL && addr_area != area)
612 {
dee15844
JM
613 error ("address area of %q+D conflicts with previous "
614 "declaration", decl);
a32767e4
DM
615 *no_add_attrs = true;
616 }
617 break;
618
619 case FUNCTION_DECL:
c5d75364 620 error_at (DECL_SOURCE_LOCATION (decl),
d575725b
L
621 "address area attribute cannot be specified for "
622 "functions");
a32767e4
DM
623 *no_add_attrs = true;
624 break;
625
626 default:
29d08eba
JM
627 warning (OPT_Wattributes, "%qE attribute ignored",
628 name);
a32767e4
DM
629 *no_add_attrs = true;
630 break;
631 }
632
633 return NULL_TREE;
634}
635
30ed9d3d
TG
636/* The section must have global and overlaid attributes. */
637#define SECTION_VMS_OVERLAY SECTION_MACH_DEP
638
639/* Part of the low level implementation of DEC Ada pragma Common_Object which
640 enables the shared use of variables stored in overlaid linker areas
641 corresponding to the use of Fortran COMMON. */
642
643static tree
644ia64_vms_common_object_attribute (tree *node, tree name, tree args,
645 int flags ATTRIBUTE_UNUSED,
646 bool *no_add_attrs)
647{
648 tree decl = *node;
649 tree id, val;
650 if (! DECL_P (decl))
651 abort ();
652
653 DECL_COMMON (decl) = 1;
654 id = TREE_VALUE (args);
655 if (TREE_CODE (id) == IDENTIFIER_NODE)
656 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
657 else if (TREE_CODE (id) == STRING_CST)
658 val = id;
659 else
660 {
661 warning (OPT_Wattributes,
662 "%qE attribute requires a string constant argument", name);
663 *no_add_attrs = true;
664 return NULL_TREE;
665 }
666 DECL_SECTION_NAME (decl) = val;
667 return NULL_TREE;
668}
669
670/* Part of the low level implementation of DEC Ada pragma Common_Object. */
671
672void
673ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
674 unsigned HOST_WIDE_INT size,
675 unsigned int align)
676{
677 tree attr = DECL_ATTRIBUTES (decl);
678
679 /* As common_object attribute set DECL_SECTION_NAME check it before
680 looking up the attribute. */
681 if (DECL_SECTION_NAME (decl) && attr)
682 attr = lookup_attribute ("common_object", attr);
683 else
684 attr = NULL_TREE;
685
686 if (!attr)
687 {
688 /* Code from elfos.h. */
689 fprintf (file, "%s", COMMON_ASM_OP);
690 assemble_name (file, name);
691 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
692 size, align / BITS_PER_UNIT);
693 }
694 else
695 {
696 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
697 ASM_OUTPUT_LABEL (file, name);
698 ASM_OUTPUT_SKIP (file, size ? size : 1);
699 }
700}
701
702/* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
703
704void
705ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
706 tree decl)
707{
708 if (!(flags & SECTION_VMS_OVERLAY))
709 {
710 default_elf_asm_named_section (name, flags, decl);
711 return;
712 }
713 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
714 abort ();
715
716 if (flags & SECTION_DECLARED)
717 {
718 fprintf (asm_out_file, "\t.section\t%s\n", name);
719 return;
720 }
721
722 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
723}
724
a32767e4
DM
725static void
726ia64_encode_addr_area (tree decl, rtx symbol)
727{
728 int flags;
729
730 flags = SYMBOL_REF_FLAGS (symbol);
731 switch (ia64_get_addr_area (decl))
732 {
733 case ADDR_AREA_NORMAL: break;
734 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
e820471b 735 default: gcc_unreachable ();
a32767e4
DM
736 }
737 SYMBOL_REF_FLAGS (symbol) = flags;
738}
739
740static void
741ia64_encode_section_info (tree decl, rtx rtl, int first)
742{
743 default_encode_section_info (decl, rtl, first);
744
2897f1d4 745 /* Careful not to prod global register variables. */
a32767e4 746 if (TREE_CODE (decl) == VAR_DECL
2897f1d4
L
747 && GET_CODE (DECL_RTL (decl)) == MEM
748 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
a32767e4
DM
749 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
750 ia64_encode_addr_area (decl, XEXP (rtl, 0));
751}
752\f
557b9df5
RH
753/* Return 1 if the operands of a move are ok. */
754
755int
9c808aad 756ia64_move_ok (rtx dst, rtx src)
557b9df5
RH
757{
758 /* If we're under init_recog_no_volatile, we'll not be able to use
759 memory_operand. So check the code directly and don't worry about
760 the validity of the underlying address, which should have been
761 checked elsewhere anyway. */
762 if (GET_CODE (dst) != MEM)
763 return 1;
764 if (GET_CODE (src) == MEM)
765 return 0;
766 if (register_operand (src, VOIDmode))
767 return 1;
768
769 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
770 if (INTEGRAL_MODE_P (GET_MODE (dst)))
771 return src == const0_rtx;
772 else
13f70342 773 return satisfies_constraint_G (src);
557b9df5 774}
9b7bf67d 775
a71aef0b
JB
776/* Return 1 if the operands are ok for a floating point load pair. */
777
778int
779ia64_load_pair_ok (rtx dst, rtx src)
780{
781 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
782 return 0;
783 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
784 return 0;
785 switch (GET_CODE (XEXP (src, 0)))
786 {
787 case REG:
788 case POST_INC:
789 break;
790 case POST_DEC:
791 return 0;
792 case POST_MODIFY:
793 {
794 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
795
796 if (GET_CODE (adjust) != CONST_INT
797 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
798 return 0;
799 }
800 break;
801 default:
802 abort ();
803 }
804 return 1;
805}
806
08744705 807int
9c808aad 808addp4_optimize_ok (rtx op1, rtx op2)
08744705 809{
08744705
SE
810 return (basereg_operand (op1, GET_MODE(op1)) !=
811 basereg_operand (op2, GET_MODE(op2)));
812}
813
9e4f94de 814/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
815 Return the length of the field, or <= 0 on failure. */
816
817int
9c808aad 818ia64_depz_field_mask (rtx rop, rtx rshift)
041f25e6
RH
819{
820 unsigned HOST_WIDE_INT op = INTVAL (rop);
821 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
822
823 /* Get rid of the zero bits we're shifting in. */
824 op >>= shift;
825
826 /* We must now have a solid block of 1's at bit 0. */
827 return exact_log2 (op + 1);
828}
829
5e6c8b64
RH
830/* Return the TLS model to use for ADDR. */
831
832static enum tls_model
833tls_symbolic_operand_type (rtx addr)
834{
81f40b79 835 enum tls_model tls_kind = TLS_MODEL_NONE;
5e6c8b64
RH
836
837 if (GET_CODE (addr) == CONST)
838 {
839 if (GET_CODE (XEXP (addr, 0)) == PLUS
840 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
841 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
842 }
843 else if (GET_CODE (addr) == SYMBOL_REF)
844 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
845
846 return tls_kind;
847}
848
849/* Return true if X is a constant that is valid for some immediate
850 field in an instruction. */
851
852bool
853ia64_legitimate_constant_p (rtx x)
854{
855 switch (GET_CODE (x))
856 {
857 case CONST_INT:
858 case LABEL_REF:
859 return true;
860
861 case CONST_DOUBLE:
735b94a7
SE
862 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
863 || GET_MODE (x) == DFmode)
5e6c8b64 864 return true;
13f70342 865 return satisfies_constraint_G (x);
5e6c8b64
RH
866
867 case CONST:
868 case SYMBOL_REF:
d0970db2
JW
869 /* ??? Short term workaround for PR 28490. We must make the code here
870 match the code in ia64_expand_move and move_operand, even though they
871 are both technically wrong. */
872 if (tls_symbolic_operand_type (x) == 0)
873 {
874 HOST_WIDE_INT addend = 0;
875 rtx op = x;
876
877 if (GET_CODE (op) == CONST
878 && GET_CODE (XEXP (op, 0)) == PLUS
879 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
880 {
881 addend = INTVAL (XEXP (XEXP (op, 0), 1));
882 op = XEXP (XEXP (op, 0), 0);
883 }
884
7ab62966
SE
885 if (any_offset_symbol_operand (op, GET_MODE (op))
886 || function_operand (op, GET_MODE (op)))
887 return true;
d0970db2
JW
888 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
889 return (addend & 0x3fff) == 0;
890 return false;
891 }
892 return false;
5e6c8b64 893
b4e3537b
RH
894 case CONST_VECTOR:
895 {
896 enum machine_mode mode = GET_MODE (x);
897
898 if (mode == V2SFmode)
13f70342 899 return satisfies_constraint_Y (x);
b4e3537b
RH
900
901 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
902 && GET_MODE_SIZE (mode) <= 8);
903 }
904
5e6c8b64
RH
905 default:
906 return false;
907 }
908}
909
910/* Don't allow TLS addresses to get spilled to memory. */
911
912static bool
913ia64_cannot_force_const_mem (rtx x)
914{
103a6411
SE
915 if (GET_MODE (x) == RFmode)
916 return true;
5e6c8b64
RH
917 return tls_symbolic_operand_type (x) != 0;
918}
919
9b7bf67d 920/* Expand a symbolic constant load. */
9b7bf67d 921
5e6c8b64 922bool
9c808aad 923ia64_expand_load_address (rtx dest, rtx src)
9b7bf67d 924{
e820471b 925 gcc_assert (GET_CODE (dest) == REG);
7b6e506e 926
ae49d6e5
RH
927 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
928 having to pointer-extend the value afterward. Other forms of address
929 computation below are also more natural to compute as 64-bit quantities.
930 If we've been given an SImode destination register, change it. */
931 if (GET_MODE (dest) != Pmode)
38ae7651
RS
932 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
933 byte_lowpart_offset (Pmode, GET_MODE (dest)));
ae49d6e5 934
5e6c8b64
RH
935 if (TARGET_NO_PIC)
936 return false;
937 if (small_addr_symbolic_operand (src, VOIDmode))
938 return false;
939
940 if (TARGET_AUTO_PIC)
941 emit_insn (gen_load_gprel64 (dest, src));
1cdbd630 942 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
5e6c8b64 943 emit_insn (gen_load_fptr (dest, src));
21515593 944 else if (sdata_symbolic_operand (src, VOIDmode))
5e6c8b64
RH
945 emit_insn (gen_load_gprel (dest, src));
946 else
21515593 947 {
5e6c8b64
RH
948 HOST_WIDE_INT addend = 0;
949 rtx tmp;
21515593 950
5e6c8b64
RH
951 /* We did split constant offsets in ia64_expand_move, and we did try
952 to keep them split in move_operand, but we also allowed reload to
953 rematerialize arbitrary constants rather than spill the value to
954 the stack and reload it. So we have to be prepared here to split
955 them apart again. */
956 if (GET_CODE (src) == CONST)
957 {
958 HOST_WIDE_INT hi, lo;
9b7bf67d 959
5e6c8b64
RH
960 hi = INTVAL (XEXP (XEXP (src, 0), 1));
961 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
962 hi = hi - lo;
9b7bf67d 963
5e6c8b64
RH
964 if (lo != 0)
965 {
966 addend = lo;
967 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
968 }
969 }
ae49d6e5
RH
970
971 tmp = gen_rtx_HIGH (Pmode, src);
972 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
973 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
974
5e6c8b64 975 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
ae49d6e5 976 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
5e6c8b64
RH
977
978 if (addend)
979 {
980 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
981 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
982 }
ae49d6e5 983 }
5e6c8b64
RH
984
985 return true;
9b7bf67d 986}
97e242b0 987
e2500fed 988static GTY(()) rtx gen_tls_tga;
7b6e506e 989static rtx
9c808aad 990gen_tls_get_addr (void)
7b6e506e 991{
e2500fed 992 if (!gen_tls_tga)
21515593 993 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 994 return gen_tls_tga;
7b6e506e
RH
995}
996
e2500fed 997static GTY(()) rtx thread_pointer_rtx;
7b6e506e 998static rtx
9c808aad 999gen_thread_pointer (void)
7b6e506e 1000{
e2500fed 1001 if (!thread_pointer_rtx)
389fdba0 1002 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
135ca7b2 1003 return thread_pointer_rtx;
7b6e506e
RH
1004}
1005
21515593 1006static rtx
5e6c8b64 1007ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
b15b83fb 1008 rtx orig_op1, HOST_WIDE_INT addend)
21515593
RH
1009{
1010 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
b15b83fb 1011 rtx orig_op0 = op0;
5e6c8b64
RH
1012 HOST_WIDE_INT addend_lo, addend_hi;
1013
21515593
RH
1014 switch (tls_kind)
1015 {
1016 case TLS_MODEL_GLOBAL_DYNAMIC:
1017 start_sequence ();
1018
1019 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1020 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1021
1022 tga_op2 = gen_reg_rtx (Pmode);
5e6c8b64 1023 emit_insn (gen_load_dtprel (tga_op2, op1));
9c808aad 1024
21515593
RH
1025 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1026 LCT_CONST, Pmode, 2, tga_op1,
1027 Pmode, tga_op2, Pmode);
1028
1029 insns = get_insns ();
1030 end_sequence ();
1031
0d433a6a
RH
1032 if (GET_MODE (op0) != Pmode)
1033 op0 = tga_ret;
21515593 1034 emit_libcall_block (insns, op0, tga_ret, op1);
0d433a6a 1035 break;
21515593
RH
1036
1037 case TLS_MODEL_LOCAL_DYNAMIC:
1038 /* ??? This isn't the completely proper way to do local-dynamic
1039 If the call to __tls_get_addr is used only by a single symbol,
1040 then we should (somehow) move the dtprel to the second arg
1041 to avoid the extra add. */
1042 start_sequence ();
1043
1044 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1045 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1046
1047 tga_op2 = const0_rtx;
1048
1049 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1050 LCT_CONST, Pmode, 2, tga_op1,
1051 Pmode, tga_op2, Pmode);
1052
1053 insns = get_insns ();
1054 end_sequence ();
1055
1056 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1057 UNSPEC_LD_BASE);
1058 tmp = gen_reg_rtx (Pmode);
1059 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1060
0d433a6a
RH
1061 if (!register_operand (op0, Pmode))
1062 op0 = gen_reg_rtx (Pmode);
21515593
RH
1063 if (TARGET_TLS64)
1064 {
0d433a6a
RH
1065 emit_insn (gen_load_dtprel (op0, op1));
1066 emit_insn (gen_adddi3 (op0, tmp, op0));
21515593
RH
1067 }
1068 else
5e6c8b64 1069 emit_insn (gen_add_dtprel (op0, op1, tmp));
0d433a6a 1070 break;
21515593
RH
1071
1072 case TLS_MODEL_INITIAL_EXEC:
b15b83fb
JJ
1073 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1074 addend_hi = addend - addend_lo;
1075
5e6c8b64
RH
1076 op1 = plus_constant (op1, addend_hi);
1077 addend = addend_lo;
1078
21515593 1079 tmp = gen_reg_rtx (Pmode);
5e6c8b64 1080 emit_insn (gen_load_tprel (tmp, op1));
21515593 1081
0d433a6a
RH
1082 if (!register_operand (op0, Pmode))
1083 op0 = gen_reg_rtx (Pmode);
1084 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1085 break;
21515593
RH
1086
1087 case TLS_MODEL_LOCAL_EXEC:
0d433a6a
RH
1088 if (!register_operand (op0, Pmode))
1089 op0 = gen_reg_rtx (Pmode);
5e6c8b64
RH
1090
1091 op1 = orig_op1;
1092 addend = 0;
21515593
RH
1093 if (TARGET_TLS64)
1094 {
0d433a6a 1095 emit_insn (gen_load_tprel (op0, op1));
5e6c8b64 1096 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
21515593
RH
1097 }
1098 else
5e6c8b64 1099 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
0d433a6a 1100 break;
21515593
RH
1101
1102 default:
e820471b 1103 gcc_unreachable ();
21515593 1104 }
0d433a6a 1105
5e6c8b64
RH
1106 if (addend)
1107 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1108 orig_op0, 1, OPTAB_DIRECT);
0d433a6a
RH
1109 if (orig_op0 == op0)
1110 return NULL_RTX;
1111 if (GET_MODE (orig_op0) == Pmode)
1112 return op0;
1113 return gen_lowpart (GET_MODE (orig_op0), op0);
21515593
RH
1114}
1115
7b6e506e 1116rtx
9c808aad 1117ia64_expand_move (rtx op0, rtx op1)
7b6e506e
RH
1118{
1119 enum machine_mode mode = GET_MODE (op0);
1120
1121 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1122 op1 = force_reg (mode, op1);
1123
21515593 1124 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e 1125 {
5e6c8b64 1126 HOST_WIDE_INT addend = 0;
7b6e506e 1127 enum tls_model tls_kind;
5e6c8b64
RH
1128 rtx sym = op1;
1129
1130 if (GET_CODE (op1) == CONST
1131 && GET_CODE (XEXP (op1, 0)) == PLUS
1132 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1133 {
1134 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1135 sym = XEXP (XEXP (op1, 0), 0);
1136 }
1137
1138 tls_kind = tls_symbolic_operand_type (sym);
1139 if (tls_kind)
b15b83fb 1140 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
5e6c8b64
RH
1141
1142 if (any_offset_symbol_operand (sym, mode))
1143 addend = 0;
1144 else if (aligned_offset_symbol_operand (sym, mode))
1145 {
1146 HOST_WIDE_INT addend_lo, addend_hi;
1147
1148 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1149 addend_hi = addend - addend_lo;
1150
1151 if (addend_lo != 0)
1152 {
1153 op1 = plus_constant (sym, addend_hi);
1154 addend = addend_lo;
1155 }
21e43850
L
1156 else
1157 addend = 0;
5e6c8b64
RH
1158 }
1159 else
1160 op1 = sym;
1161
1162 if (reload_completed)
1163 {
1164 /* We really should have taken care of this offset earlier. */
1165 gcc_assert (addend == 0);
1166 if (ia64_expand_load_address (op0, op1))
1167 return NULL_RTX;
1168 }
21515593 1169
5e6c8b64 1170 if (addend)
7b6e506e 1171 {
b3a13419 1172 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
5e6c8b64
RH
1173
1174 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1175
1176 op1 = expand_simple_binop (mode, PLUS, subtarget,
1177 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1178 if (op0 == op1)
1179 return NULL_RTX;
7b6e506e
RH
1180 }
1181 }
1182
1183 return op1;
1184}
1185
21515593
RH
1186/* Split a move from OP1 to OP0 conditional on COND. */
1187
1188void
9c808aad 1189ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
21515593
RH
1190{
1191 rtx insn, first = get_last_insn ();
1192
1193 emit_move_insn (op0, op1);
1194
1195 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1196 if (INSN_P (insn))
1197 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1198 PATTERN (insn));
1199}
1200
f57fc998 1201/* Split a post-reload TImode or TFmode reference into two DImode
2ffe0e02
ZW
1202 components. This is made extra difficult by the fact that we do
1203 not get any scratch registers to work with, because reload cannot
1204 be prevented from giving us a scratch that overlaps the register
1205 pair involved. So instead, when addressing memory, we tweak the
1206 pointer register up and back down with POST_INCs. Or up and not
1207 back down when we can get away with it.
1208
1209 REVERSED is true when the loads must be done in reversed order
1210 (high word first) for correctness. DEAD is true when the pointer
1211 dies with the second insn we generate and therefore the second
1212 address must not carry a postmodify.
1213
1214 May return an insn which is to be emitted after the moves. */
3f622353 1215
f57fc998 1216static rtx
2ffe0e02 1217ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
3f622353 1218{
2ffe0e02
ZW
1219 rtx fixup = 0;
1220
3f622353
RH
1221 switch (GET_CODE (in))
1222 {
1223 case REG:
2ffe0e02
ZW
1224 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1225 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1226 break;
3f622353
RH
1227
1228 case CONST_INT:
1229 case CONST_DOUBLE:
2ffe0e02 1230 /* Cannot occur reversed. */
e820471b 1231 gcc_assert (!reversed);
2ffe0e02 1232
f57fc998
ZW
1233 if (GET_MODE (in) != TFmode)
1234 split_double (in, &out[0], &out[1]);
1235 else
1236 /* split_double does not understand how to split a TFmode
1237 quantity into a pair of DImode constants. */
1238 {
1239 REAL_VALUE_TYPE r;
1240 unsigned HOST_WIDE_INT p[2];
1241 long l[4]; /* TFmode is 128 bits */
1242
1243 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1244 real_to_target (l, &r, TFmode);
1245
1246 if (FLOAT_WORDS_BIG_ENDIAN)
1247 {
1248 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1249 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1250 }
1251 else
1252 {
9eb578c8
L
1253 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1254 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
f57fc998
ZW
1255 }
1256 out[0] = GEN_INT (p[0]);
1257 out[1] = GEN_INT (p[1]);
1258 }
2ffe0e02
ZW
1259 break;
1260
1261 case MEM:
1262 {
1263 rtx base = XEXP (in, 0);
1264 rtx offset;
1265
1266 switch (GET_CODE (base))
1267 {
1268 case REG:
1269 if (!reversed)
1270 {
1271 out[0] = adjust_automodify_address
1272 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1273 out[1] = adjust_automodify_address
1274 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1275 }
1276 else
1277 {
1278 /* Reversal requires a pre-increment, which can only
1279 be done as a separate insn. */
1280 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1281 out[0] = adjust_automodify_address
1282 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1283 out[1] = adjust_address (in, DImode, 0);
1284 }
1285 break;
1286
1287 case POST_INC:
e820471b
NS
1288 gcc_assert (!reversed && !dead);
1289
2ffe0e02
ZW
1290 /* Just do the increment in two steps. */
1291 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1292 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1293 break;
1294
1295 case POST_DEC:
e820471b
NS
1296 gcc_assert (!reversed && !dead);
1297
2ffe0e02
ZW
1298 /* Add 8, subtract 24. */
1299 base = XEXP (base, 0);
1300 out[0] = adjust_automodify_address
1301 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1302 out[1] = adjust_automodify_address
1303 (in, DImode,
1304 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1305 8);
1306 break;
1307
1308 case POST_MODIFY:
e820471b
NS
1309 gcc_assert (!reversed && !dead);
1310
2ffe0e02
ZW
1311 /* Extract and adjust the modification. This case is
1312 trickier than the others, because we might have an
1313 index register, or we might have a combined offset that
1314 doesn't fit a signed 9-bit displacement field. We can
1315 assume the incoming expression is already legitimate. */
1316 offset = XEXP (base, 1);
1317 base = XEXP (base, 0);
1318
1319 out[0] = adjust_automodify_address
1320 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1321
1322 if (GET_CODE (XEXP (offset, 1)) == REG)
1323 {
1324 /* Can't adjust the postmodify to match. Emit the
1325 original, then a separate addition insn. */
1326 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1327 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1328 }
2ffe0e02
ZW
1329 else
1330 {
e820471b
NS
1331 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1332 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1333 {
1334 /* Again the postmodify cannot be made to match,
1335 but in this case it's more efficient to get rid
1336 of the postmodify entirely and fix up with an
1337 add insn. */
1338 out[1] = adjust_automodify_address (in, DImode, base, 8);
1339 fixup = gen_adddi3
1340 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1341 }
1342 else
1343 {
1344 /* Combined offset still fits in the displacement field.
1345 (We cannot overflow it at the high end.) */
1346 out[1] = adjust_automodify_address
1347 (in, DImode, gen_rtx_POST_MODIFY
1348 (Pmode, base, gen_rtx_PLUS
1349 (Pmode, base,
1350 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1351 8);
1352 }
2ffe0e02
ZW
1353 }
1354 break;
1355
1356 default:
e820471b 1357 gcc_unreachable ();
2ffe0e02
ZW
1358 }
1359 break;
1360 }
3f622353
RH
1361
1362 default:
e820471b 1363 gcc_unreachable ();
3f622353 1364 }
2ffe0e02
ZW
1365
1366 return fixup;
3f622353
RH
1367}
1368
f57fc998
ZW
1369/* Split a TImode or TFmode move instruction after reload.
1370 This is used by *movtf_internal and *movti_internal. */
1371void
1372ia64_split_tmode_move (rtx operands[])
1373{
2ffe0e02
ZW
1374 rtx in[2], out[2], insn;
1375 rtx fixup[2];
1376 bool dead = false;
1377 bool reversed = false;
1378
1379 /* It is possible for reload to decide to overwrite a pointer with
1380 the value it points to. In that case we have to do the loads in
1381 the appropriate order so that the pointer is not destroyed too
1382 early. Also we must not generate a postmodify for that second
e820471b 1383 load, or rws_access_regno will die. */
2ffe0e02
ZW
1384 if (GET_CODE (operands[1]) == MEM
1385 && reg_overlap_mentioned_p (operands[0], operands[1]))
f57fc998 1386 {
2ffe0e02
ZW
1387 rtx base = XEXP (operands[1], 0);
1388 while (GET_CODE (base) != REG)
1389 base = XEXP (base, 0);
f57fc998 1390
2ffe0e02
ZW
1391 if (REGNO (base) == REGNO (operands[0]))
1392 reversed = true;
1393 dead = true;
1394 }
1395 /* Another reason to do the moves in reversed order is if the first
1396 element of the target register pair is also the second element of
1397 the source register pair. */
1398 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1399 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1400 reversed = true;
1401
1402 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1403 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1404
1405#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1406 if (GET_CODE (EXP) == MEM \
1407 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1408 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1409 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
bbbbb16a 1410 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
2ffe0e02
ZW
1411
1412 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1413 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1414 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1415
1416 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1417 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1418 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1419
1420 if (fixup[0])
1421 emit_insn (fixup[0]);
1422 if (fixup[1])
1423 emit_insn (fixup[1]);
1424
1425#undef MAYBE_ADD_REG_INC_NOTE
f57fc998
ZW
1426}
1427
02befdf4 1428/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
3f622353
RH
1429 through memory plus an extra GR scratch register. Except that you can
1430 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1431 SECONDARY_RELOAD_CLASS, but not both.
1432
1433 We got into problems in the first place by allowing a construct like
02befdf4 1434 (subreg:XF (reg:TI)), which we got from a union containing a long double.
f5143c46 1435 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1436 we see something like the above, we spill the inner register to memory. */
1437
4de67c26
JM
1438static rtx
1439spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
3f622353
RH
1440{
1441 if (GET_CODE (in) == SUBREG
1442 && GET_MODE (SUBREG_REG (in)) == TImode
1443 && GET_CODE (SUBREG_REG (in)) == REG)
1444 {
68d22aa5
RH
1445 rtx memt = assign_stack_temp (TImode, 16, 0);
1446 emit_move_insn (memt, SUBREG_REG (in));
4de67c26 1447 return adjust_address (memt, mode, 0);
3f622353
RH
1448 }
1449 else if (force && GET_CODE (in) == REG)
1450 {
4de67c26 1451 rtx memx = assign_stack_temp (mode, 16, 0);
68d22aa5
RH
1452 emit_move_insn (memx, in);
1453 return memx;
3f622353 1454 }
3f622353
RH
1455 else
1456 return in;
1457}
f2f90c63 1458
4de67c26
JM
1459/* Expand the movxf or movrf pattern (MODE says which) with the given
1460 OPERANDS, returning true if the pattern should then invoke
1461 DONE. */
1462
1463bool
1464ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1465{
1466 rtx op0 = operands[0];
1467
1468 if (GET_CODE (op0) == SUBREG)
1469 op0 = SUBREG_REG (op0);
1470
1471 /* We must support XFmode loads into general registers for stdarg/vararg,
1472 unprototyped calls, and a rare case where a long double is passed as
1473 an argument after a float HFA fills the FP registers. We split them into
1474 DImode loads for convenience. We also need to support XFmode stores
1475 for the last case. This case does not happen for stdarg/vararg routines,
1476 because we do a block store to memory of unnamed arguments. */
1477
1478 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1479 {
1480 rtx out[2];
1481
1482 /* We're hoping to transform everything that deals with XFmode
1483 quantities and GR registers early in the compiler. */
b3a13419 1484 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1485
1486 /* Struct to register can just use TImode instead. */
1487 if ((GET_CODE (operands[1]) == SUBREG
1488 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1489 || (GET_CODE (operands[1]) == REG
1490 && GR_REGNO_P (REGNO (operands[1]))))
1491 {
1492 rtx op1 = operands[1];
1493
1494 if (GET_CODE (op1) == SUBREG)
1495 op1 = SUBREG_REG (op1);
1496 else
1497 op1 = gen_rtx_REG (TImode, REGNO (op1));
1498
1499 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1500 return true;
1501 }
1502
1503 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1504 {
ae4d3291 1505 /* Don't word-swap when reading in the constant. */
4de67c26 1506 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
ae4d3291
JW
1507 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1508 0, mode));
4de67c26 1509 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
ae4d3291
JW
1510 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1511 0, mode));
4de67c26
JM
1512 return true;
1513 }
1514
1515 /* If the quantity is in a register not known to be GR, spill it. */
1516 if (register_operand (operands[1], mode))
1517 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1518
1519 gcc_assert (GET_CODE (operands[1]) == MEM);
1520
ae4d3291
JW
1521 /* Don't word-swap when reading in the value. */
1522 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1523 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
4de67c26
JM
1524
1525 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1526 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1527 return true;
1528 }
1529
1530 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1531 {
1532 /* We're hoping to transform everything that deals with XFmode
1533 quantities and GR registers early in the compiler. */
b3a13419 1534 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1535
1536 /* Op0 can't be a GR_REG here, as that case is handled above.
1537 If op0 is a register, then we spill op1, so that we now have a
1538 MEM operand. This requires creating an XFmode subreg of a TImode reg
1539 to force the spill. */
1540 if (register_operand (operands[0], mode))
1541 {
1542 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1543 op1 = gen_rtx_SUBREG (mode, op1, 0);
1544 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1545 }
1546
1547 else
1548 {
1549 rtx in[2];
1550
ae4d3291
JW
1551 gcc_assert (GET_CODE (operands[0]) == MEM);
1552
1553 /* Don't word-swap when writing out the value. */
1554 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1555 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
4de67c26
JM
1556
1557 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1558 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1559 return true;
1560 }
1561 }
1562
1563 if (!reload_in_progress && !reload_completed)
1564 {
1565 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1566
1567 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1568 {
1569 rtx memt, memx, in = operands[1];
1570 if (CONSTANT_P (in))
1571 in = validize_mem (force_const_mem (mode, in));
1572 if (GET_CODE (in) == MEM)
1573 memt = adjust_address (in, TImode, 0);
1574 else
1575 {
1576 memt = assign_stack_temp (TImode, 16, 0);
1577 memx = adjust_address (memt, mode, 0);
1578 emit_move_insn (memx, in);
1579 }
1580 emit_move_insn (op0, memt);
1581 return true;
1582 }
1583
1584 if (!ia64_move_ok (operands[0], operands[1]))
1585 operands[1] = force_reg (mode, operands[1]);
1586 }
1587
1588 return false;
1589}
1590
f90b7a5a
PB
1591/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1592 with the expression that holds the compare result (in VOIDmode). */
f2f90c63 1593
24ea7948
ZW
1594static GTY(()) rtx cmptf_libfunc;
1595
f90b7a5a
PB
1596void
1597ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
f2f90c63 1598{
f90b7a5a 1599 enum rtx_code code = GET_CODE (*expr);
f2f90c63
RH
1600 rtx cmp;
1601
1602 /* If we have a BImode input, then we already have a compare result, and
1603 do not need to emit another comparison. */
f90b7a5a 1604 if (GET_MODE (*op0) == BImode)
f2f90c63 1605 {
f90b7a5a
PB
1606 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1607 cmp = *op0;
f2f90c63 1608 }
24ea7948
ZW
1609 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1610 magic number as its third argument, that indicates what to do.
1611 The return value is an integer to be compared against zero. */
f90b7a5a 1612 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
24ea7948
ZW
1613 {
1614 enum qfcmp_magic {
1615 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1616 QCMP_UNORD = 2,
1617 QCMP_EQ = 4,
1618 QCMP_LT = 8,
1619 QCMP_GT = 16
32e8bb8e
ILT
1620 };
1621 int magic;
24ea7948
ZW
1622 enum rtx_code ncode;
1623 rtx ret, insns;
e820471b 1624
f90b7a5a 1625 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
24ea7948
ZW
1626 switch (code)
1627 {
1628 /* 1 = equal, 0 = not equal. Equality operators do
1629 not raise FP_INVALID when given an SNaN operand. */
1630 case EQ: magic = QCMP_EQ; ncode = NE; break;
1631 case NE: magic = QCMP_EQ; ncode = EQ; break;
1632 /* isunordered() from C99. */
1633 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
b1346fa3 1634 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
24ea7948
ZW
1635 /* Relational operators raise FP_INVALID when given
1636 an SNaN operand. */
1637 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1638 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1639 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1640 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1641 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1642 Expanders for buneq etc. weuld have to be added to ia64.md
1643 for this to be useful. */
e820471b 1644 default: gcc_unreachable ();
24ea7948
ZW
1645 }
1646
1647 start_sequence ();
1648
1649 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
f90b7a5a 1650 *op0, TFmode, *op1, TFmode,
24ea7948
ZW
1651 GEN_INT (magic), DImode);
1652 cmp = gen_reg_rtx (BImode);
1653 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1654 gen_rtx_fmt_ee (ncode, BImode,
1655 ret, const0_rtx)));
1656
1657 insns = get_insns ();
1658 end_sequence ();
1659
1660 emit_libcall_block (insns, cmp, cmp,
f90b7a5a 1661 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
24ea7948
ZW
1662 code = NE;
1663 }
f2f90c63
RH
1664 else
1665 {
1666 cmp = gen_reg_rtx (BImode);
1667 emit_insn (gen_rtx_SET (VOIDmode, cmp,
f90b7a5a 1668 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
f2f90c63
RH
1669 code = NE;
1670 }
1671
f90b7a5a
PB
1672 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1673 *op0 = cmp;
1674 *op1 = const0_rtx;
f2f90c63 1675}
2ed4af6f 1676
e934ca47
RH
1677/* Generate an integral vector comparison. Return true if the condition has
1678 been reversed, and so the sense of the comparison should be inverted. */
f61134e8
RH
1679
1680static bool
1681ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1682 rtx dest, rtx op0, rtx op1)
1683{
1684 bool negate = false;
1685 rtx x;
1686
e934ca47 1687 /* Canonicalize the comparison to EQ, GT, GTU. */
f61134e8
RH
1688 switch (code)
1689 {
1690 case EQ:
1691 case GT:
e934ca47 1692 case GTU:
f61134e8
RH
1693 break;
1694
1695 case NE:
f61134e8 1696 case LE:
e934ca47
RH
1697 case LEU:
1698 code = reverse_condition (code);
f61134e8
RH
1699 negate = true;
1700 break;
1701
1702 case GE:
e934ca47
RH
1703 case GEU:
1704 code = reverse_condition (code);
f61134e8
RH
1705 negate = true;
1706 /* FALLTHRU */
1707
1708 case LT:
f61134e8 1709 case LTU:
e934ca47
RH
1710 code = swap_condition (code);
1711 x = op0, op0 = op1, op1 = x;
1712 break;
f61134e8 1713
e934ca47
RH
1714 default:
1715 gcc_unreachable ();
1716 }
f61134e8 1717
e934ca47 1718 /* Unsigned parallel compare is not supported by the hardware. Play some
6283ba26 1719 tricks to turn this into a signed comparison against 0. */
e934ca47
RH
1720 if (code == GTU)
1721 {
1722 switch (mode)
1723 {
1724 case V2SImode:
f61134e8 1725 {
e934ca47
RH
1726 rtx t1, t2, mask;
1727
1728 /* Perform a parallel modulo subtraction. */
1729 t1 = gen_reg_rtx (V2SImode);
1730 emit_insn (gen_subv2si3 (t1, op0, op1));
1731
1732 /* Extract the original sign bit of op0. */
1733 mask = GEN_INT (-0x80000000);
1734 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1735 mask = force_reg (V2SImode, mask);
1736 t2 = gen_reg_rtx (V2SImode);
1737 emit_insn (gen_andv2si3 (t2, op0, mask));
1738
1739 /* XOR it back into the result of the subtraction. This results
1740 in the sign bit set iff we saw unsigned underflow. */
1741 x = gen_reg_rtx (V2SImode);
1742 emit_insn (gen_xorv2si3 (x, t1, t2));
6283ba26
RH
1743
1744 code = GT;
1745 op0 = x;
1746 op1 = CONST0_RTX (mode);
f61134e8 1747 }
e934ca47
RH
1748 break;
1749
1750 case V8QImode:
1751 case V4HImode:
1752 /* Perform a parallel unsigned saturating subtraction. */
1753 x = gen_reg_rtx (mode);
1754 emit_insn (gen_rtx_SET (VOIDmode, x,
1755 gen_rtx_US_MINUS (mode, op0, op1)));
6283ba26
RH
1756
1757 code = EQ;
1758 op0 = x;
1759 op1 = CONST0_RTX (mode);
1760 negate = !negate;
e934ca47
RH
1761 break;
1762
1763 default:
1764 gcc_unreachable ();
1765 }
f61134e8
RH
1766 }
1767
1768 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1769 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1770
1771 return negate;
1772}
1773
f61134e8
RH
1774/* Emit an integral vector conditional move. */
1775
1776void
1777ia64_expand_vecint_cmov (rtx operands[])
1778{
1779 enum machine_mode mode = GET_MODE (operands[0]);
1780 enum rtx_code code = GET_CODE (operands[3]);
1781 bool negate;
1782 rtx cmp, x, ot, of;
1783
f61134e8
RH
1784 cmp = gen_reg_rtx (mode);
1785 negate = ia64_expand_vecint_compare (code, mode, cmp,
1786 operands[4], operands[5]);
1787
1788 ot = operands[1+negate];
1789 of = operands[2-negate];
1790
1791 if (ot == CONST0_RTX (mode))
1792 {
1793 if (of == CONST0_RTX (mode))
1794 {
1795 emit_move_insn (operands[0], ot);
1796 return;
1797 }
1798
1799 x = gen_rtx_NOT (mode, cmp);
1800 x = gen_rtx_AND (mode, x, of);
1801 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1802 }
1803 else if (of == CONST0_RTX (mode))
1804 {
1805 x = gen_rtx_AND (mode, cmp, ot);
1806 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1807 }
1808 else
1809 {
1810 rtx t, f;
1811
1812 t = gen_reg_rtx (mode);
1813 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1814 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1815
1816 f = gen_reg_rtx (mode);
1817 x = gen_rtx_NOT (mode, cmp);
1818 x = gen_rtx_AND (mode, x, operands[2-negate]);
1819 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1820
1821 x = gen_rtx_IOR (mode, t, f);
1822 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1823 }
1824}
1825
1826/* Emit an integral vector min or max operation. Return true if all done. */
1827
1828bool
1829ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1830 rtx operands[])
1831{
cabddb23 1832 rtx xops[6];
f61134e8
RH
1833
1834 /* These four combinations are supported directly. */
1835 if (mode == V8QImode && (code == UMIN || code == UMAX))
1836 return false;
1837 if (mode == V4HImode && (code == SMIN || code == SMAX))
1838 return false;
1839
93b4080b
RH
1840 /* This combination can be implemented with only saturating subtraction. */
1841 if (mode == V4HImode && code == UMAX)
1842 {
1843 rtx x, tmp = gen_reg_rtx (mode);
1844
1845 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1846 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1847
1848 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1849 return true;
1850 }
1851
f61134e8
RH
1852 /* Everything else implemented via vector comparisons. */
1853 xops[0] = operands[0];
1854 xops[4] = xops[1] = operands[1];
1855 xops[5] = xops[2] = operands[2];
1856
1857 switch (code)
1858 {
1859 case UMIN:
1860 code = LTU;
1861 break;
1862 case UMAX:
1863 code = GTU;
1864 break;
1865 case SMIN:
1866 code = LT;
1867 break;
1868 case SMAX:
1869 code = GT;
1870 break;
1871 default:
e820471b 1872 gcc_unreachable ();
f61134e8
RH
1873 }
1874 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1875
1876 ia64_expand_vecint_cmov (xops);
1877 return true;
1878}
1879
e898620c
RH
1880/* Emit an integral vector widening sum operations. */
1881
1882void
1883ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1884{
1885 rtx l, h, x, s;
1886 enum machine_mode wmode, mode;
1887 rtx (*unpack_l) (rtx, rtx, rtx);
1888 rtx (*unpack_h) (rtx, rtx, rtx);
1889 rtx (*plus) (rtx, rtx, rtx);
1890
1891 wmode = GET_MODE (operands[0]);
1892 mode = GET_MODE (operands[1]);
1893
1894 switch (mode)
1895 {
1896 case V8QImode:
1897 unpack_l = gen_unpack1_l;
1898 unpack_h = gen_unpack1_h;
1899 plus = gen_addv4hi3;
1900 break;
1901 case V4HImode:
1902 unpack_l = gen_unpack2_l;
1903 unpack_h = gen_unpack2_h;
1904 plus = gen_addv2si3;
1905 break;
1906 default:
1907 gcc_unreachable ();
1908 }
1909
1910 /* Fill in x with the sign extension of each element in op1. */
1911 if (unsignedp)
1912 x = CONST0_RTX (mode);
1913 else
1914 {
1915 bool neg;
1916
1917 x = gen_reg_rtx (mode);
1918
1919 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1920 CONST0_RTX (mode));
1921 gcc_assert (!neg);
1922 }
1923
1924 l = gen_reg_rtx (wmode);
1925 h = gen_reg_rtx (wmode);
1926 s = gen_reg_rtx (wmode);
1927
1928 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1929 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1930 emit_insn (plus (s, l, operands[2]));
1931 emit_insn (plus (operands[0], h, s));
1932}
1933
1934/* Emit a signed or unsigned V8QI dot product operation. */
1935
1936void
1937ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1938{
1939 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1940
1941 /* Fill in x1 and x2 with the sign extension of each element. */
1942 if (unsignedp)
1943 x1 = x2 = CONST0_RTX (V8QImode);
1944 else
1945 {
1946 bool neg;
1947
1948 x1 = gen_reg_rtx (V8QImode);
1949 x2 = gen_reg_rtx (V8QImode);
1950
1951 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1952 CONST0_RTX (V8QImode));
1953 gcc_assert (!neg);
1954 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1955 CONST0_RTX (V8QImode));
1956 gcc_assert (!neg);
1957 }
1958
1959 l1 = gen_reg_rtx (V4HImode);
1960 l2 = gen_reg_rtx (V4HImode);
1961 h1 = gen_reg_rtx (V4HImode);
1962 h2 = gen_reg_rtx (V4HImode);
1963
1964 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1965 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1966 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1967 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1968
1969 p1 = gen_reg_rtx (V2SImode);
1970 p2 = gen_reg_rtx (V2SImode);
1971 p3 = gen_reg_rtx (V2SImode);
1972 p4 = gen_reg_rtx (V2SImode);
1973 emit_insn (gen_pmpy2_r (p1, l1, l2));
1974 emit_insn (gen_pmpy2_l (p2, l1, l2));
1975 emit_insn (gen_pmpy2_r (p3, h1, h2));
1976 emit_insn (gen_pmpy2_l (p4, h1, h2));
1977
1978 s1 = gen_reg_rtx (V2SImode);
1979 s2 = gen_reg_rtx (V2SImode);
1980 s3 = gen_reg_rtx (V2SImode);
1981 emit_insn (gen_addv2si3 (s1, p1, p2));
1982 emit_insn (gen_addv2si3 (s2, p3, p4));
1983 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1984 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1985}
1986
2ed4af6f
RH
1987/* Emit the appropriate sequence for a call. */
1988
1989void
9c808aad
AJ
1990ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1991 int sibcall_p)
2ed4af6f 1992{
599aedd9 1993 rtx insn, b0;
2ed4af6f
RH
1994
1995 addr = XEXP (addr, 0);
c8083186 1996 addr = convert_memory_address (DImode, addr);
2ed4af6f 1997 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 1998
599aedd9 1999 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
2000 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2001 {
2002 if (sibcall_p)
599aedd9 2003 insn = gen_sibcall_nogp (addr);
2ed4af6f 2004 else if (! retval)
599aedd9 2005 insn = gen_call_nogp (addr, b0);
2ed4af6f 2006 else
599aedd9
RH
2007 insn = gen_call_value_nogp (retval, addr, b0);
2008 insn = emit_call_insn (insn);
2ed4af6f 2009 }
2ed4af6f 2010 else
599aedd9
RH
2011 {
2012 if (sibcall_p)
2013 insn = gen_sibcall_gp (addr);
2014 else if (! retval)
2015 insn = gen_call_gp (addr, b0);
2016 else
2017 insn = gen_call_value_gp (retval, addr, b0);
2018 insn = emit_call_insn (insn);
2ed4af6f 2019
599aedd9
RH
2020 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2021 }
6dad5a56 2022
599aedd9 2023 if (sibcall_p)
4e14f1f9 2024 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
f2972bf8
DR
2025
2026 if (TARGET_ABI_OPEN_VMS)
2027 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2028 gen_rtx_REG (DImode, GR_REG (25)));
599aedd9
RH
2029}
2030
6fb5fa3c
DB
2031static void
2032reg_emitted (enum ia64_frame_regs r)
2033{
2034 if (emitted_frame_related_regs[r] == 0)
2035 emitted_frame_related_regs[r] = current_frame_info.r[r];
2036 else
2037 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2038}
2039
2040static int
2041get_reg (enum ia64_frame_regs r)
2042{
2043 reg_emitted (r);
2044 return current_frame_info.r[r];
2045}
2046
2047static bool
2048is_emitted (int regno)
2049{
09639a83 2050 unsigned int r;
6fb5fa3c
DB
2051
2052 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2053 if (emitted_frame_related_regs[r] == regno)
2054 return true;
2055 return false;
2056}
2057
599aedd9 2058void
9c808aad 2059ia64_reload_gp (void)
599aedd9
RH
2060{
2061 rtx tmp;
2062
6fb5fa3c
DB
2063 if (current_frame_info.r[reg_save_gp])
2064 {
2065 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2066 }
2ed4af6f 2067 else
599aedd9
RH
2068 {
2069 HOST_WIDE_INT offset;
13f70342 2070 rtx offset_r;
599aedd9
RH
2071
2072 offset = (current_frame_info.spill_cfa_off
2073 + current_frame_info.spill_size);
2074 if (frame_pointer_needed)
2075 {
2076 tmp = hard_frame_pointer_rtx;
2077 offset = -offset;
2078 }
2079 else
2080 {
2081 tmp = stack_pointer_rtx;
2082 offset = current_frame_info.total_size - offset;
2083 }
2084
13f70342
RH
2085 offset_r = GEN_INT (offset);
2086 if (satisfies_constraint_I (offset_r))
2087 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
599aedd9
RH
2088 else
2089 {
13f70342 2090 emit_move_insn (pic_offset_table_rtx, offset_r);
599aedd9
RH
2091 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2092 pic_offset_table_rtx, tmp));
2093 }
2094
2095 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2096 }
2097
2098 emit_move_insn (pic_offset_table_rtx, tmp);
2099}
2100
2101void
9c808aad
AJ
2102ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2103 rtx scratch_b, int noreturn_p, int sibcall_p)
599aedd9
RH
2104{
2105 rtx insn;
2106 bool is_desc = false;
2107
2108 /* If we find we're calling through a register, then we're actually
2109 calling through a descriptor, so load up the values. */
4e14f1f9 2110 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
599aedd9
RH
2111 {
2112 rtx tmp;
2113 bool addr_dead_p;
2114
2115 /* ??? We are currently constrained to *not* use peep2, because
2a43945f 2116 we can legitimately change the global lifetime of the GP
9c808aad 2117 (in the form of killing where previously live). This is
599aedd9
RH
2118 because a call through a descriptor doesn't use the previous
2119 value of the GP, while a direct call does, and we do not
2120 commit to either form until the split here.
2121
2122 That said, this means that we lack precise life info for
2123 whether ADDR is dead after this call. This is not terribly
2124 important, since we can fix things up essentially for free
2125 with the POST_DEC below, but it's nice to not use it when we
2126 can immediately tell it's not necessary. */
2127 addr_dead_p = ((noreturn_p || sibcall_p
2128 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2129 REGNO (addr)))
2130 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2131
2132 /* Load the code address into scratch_b. */
2133 tmp = gen_rtx_POST_INC (Pmode, addr);
2134 tmp = gen_rtx_MEM (Pmode, tmp);
2135 emit_move_insn (scratch_r, tmp);
2136 emit_move_insn (scratch_b, scratch_r);
2137
2138 /* Load the GP address. If ADDR is not dead here, then we must
2139 revert the change made above via the POST_INCREMENT. */
2140 if (!addr_dead_p)
2141 tmp = gen_rtx_POST_DEC (Pmode, addr);
2142 else
2143 tmp = addr;
2144 tmp = gen_rtx_MEM (Pmode, tmp);
2145 emit_move_insn (pic_offset_table_rtx, tmp);
2146
2147 is_desc = true;
2148 addr = scratch_b;
2149 }
2ed4af6f 2150
6dad5a56 2151 if (sibcall_p)
599aedd9
RH
2152 insn = gen_sibcall_nogp (addr);
2153 else if (retval)
2154 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 2155 else
599aedd9 2156 insn = gen_call_nogp (addr, retaddr);
6dad5a56 2157 emit_call_insn (insn);
2ed4af6f 2158
599aedd9
RH
2159 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2160 ia64_reload_gp ();
2ed4af6f 2161}
16df4ee6
RH
2162
2163/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2164
2165 This differs from the generic code in that we know about the zero-extending
2166 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2167 also know that ld.acq+cmpxchg.rel equals a full barrier.
2168
2169 The loop we want to generate looks like
2170
2171 cmp_reg = mem;
2172 label:
2173 old_reg = cmp_reg;
2174 new_reg = cmp_reg op val;
2175 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2176 if (cmp_reg != old_reg)
2177 goto label;
2178
2179 Note that we only do the plain load from memory once. Subsequent
2180 iterations use the value loaded by the compare-and-swap pattern. */
2181
2182void
2183ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2184 rtx old_dst, rtx new_dst)
2185{
2186 enum machine_mode mode = GET_MODE (mem);
2187 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2188 enum insn_code icode;
2189
2190 /* Special case for using fetchadd. */
dca13767
JJ
2191 if ((mode == SImode || mode == DImode)
2192 && (code == PLUS || code == MINUS)
2193 && fetchadd_operand (val, mode))
16df4ee6 2194 {
dca13767
JJ
2195 if (code == MINUS)
2196 val = GEN_INT (-INTVAL (val));
2197
16df4ee6
RH
2198 if (!old_dst)
2199 old_dst = gen_reg_rtx (mode);
2200
2201 emit_insn (gen_memory_barrier ());
2202
2203 if (mode == SImode)
2204 icode = CODE_FOR_fetchadd_acq_si;
2205 else
2206 icode = CODE_FOR_fetchadd_acq_di;
2207 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2208
2209 if (new_dst)
2210 {
2211 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2212 true, OPTAB_WIDEN);
2213 if (new_reg != new_dst)
2214 emit_move_insn (new_dst, new_reg);
2215 }
2216 return;
2217 }
2218
2219 /* Because of the volatile mem read, we get an ld.acq, which is the
2220 front half of the full barrier. The end half is the cmpxchg.rel. */
2221 gcc_assert (MEM_VOLATILE_P (mem));
2222
2223 old_reg = gen_reg_rtx (DImode);
2224 cmp_reg = gen_reg_rtx (DImode);
2225 label = gen_label_rtx ();
2226
2227 if (mode != DImode)
2228 {
2229 val = simplify_gen_subreg (DImode, val, mode, 0);
2230 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2231 }
2232 else
2233 emit_move_insn (cmp_reg, mem);
2234
2235 emit_label (label);
2236
2237 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2238 emit_move_insn (old_reg, cmp_reg);
2239 emit_move_insn (ar_ccv, cmp_reg);
2240
2241 if (old_dst)
2242 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2243
2244 new_reg = cmp_reg;
2245 if (code == NOT)
2246 {
974920dc
UB
2247 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2248 true, OPTAB_DIRECT);
2249 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
16df4ee6 2250 }
974920dc
UB
2251 else
2252 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2253 true, OPTAB_DIRECT);
16df4ee6
RH
2254
2255 if (mode != DImode)
2256 new_reg = gen_lowpart (mode, new_reg);
2257 if (new_dst)
2258 emit_move_insn (new_dst, new_reg);
2259
2260 switch (mode)
2261 {
2262 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2263 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2264 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2265 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2266 default:
2267 gcc_unreachable ();
2268 }
2269
2270 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2271
6819a463 2272 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
16df4ee6 2273}
809d4ef1 2274\f
3b572406
RH
2275/* Begin the assembly file. */
2276
1bc7c5b6 2277static void
9c808aad 2278ia64_file_start (void)
1bc7c5b6 2279{
0f666d6e
JJ
2280 /* Variable tracking should be run after all optimizations which change order
2281 of insns. It also needs a valid CFG. This can't be done in
ee30710d 2282 ia64_override_options, because flag_var_tracking is finalized after
0f666d6e
JJ
2283 that. */
2284 ia64_flag_var_tracking = flag_var_tracking;
2285 flag_var_tracking = 0;
2286
1bc7c5b6
ZW
2287 default_file_start ();
2288 emit_safe_across_calls ();
2289}
2290
3b572406 2291void
9c808aad 2292emit_safe_across_calls (void)
3b572406
RH
2293{
2294 unsigned int rs, re;
2295 int out_state;
2296
2297 rs = 1;
2298 out_state = 0;
2299 while (1)
2300 {
2301 while (rs < 64 && call_used_regs[PR_REG (rs)])
2302 rs++;
2303 if (rs >= 64)
2304 break;
2305 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2306 continue;
2307 if (out_state == 0)
2308 {
1bc7c5b6 2309 fputs ("\t.pred.safe_across_calls ", asm_out_file);
3b572406
RH
2310 out_state = 1;
2311 }
2312 else
1bc7c5b6 2313 fputc (',', asm_out_file);
3b572406 2314 if (re == rs + 1)
1bc7c5b6 2315 fprintf (asm_out_file, "p%u", rs);
3b572406 2316 else
1bc7c5b6 2317 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
3b572406
RH
2318 rs = re + 1;
2319 }
2320 if (out_state)
1bc7c5b6 2321 fputc ('\n', asm_out_file);
3b572406
RH
2322}
2323
812b587e
SE
2324/* Globalize a declaration. */
2325
2326static void
2327ia64_globalize_decl_name (FILE * stream, tree decl)
2328{
2329 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2330 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2331 if (version_attr)
2332 {
2333 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2334 const char *p = TREE_STRING_POINTER (v);
2335 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2336 }
2337 targetm.asm_out.globalize_label (stream, name);
2338 if (TREE_CODE (decl) == FUNCTION_DECL)
2339 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2340}
2341
97e242b0
RH
2342/* Helper function for ia64_compute_frame_size: find an appropriate general
2343 register to spill some special register to. SPECIAL_SPILL_MASK contains
2344 bits in GR0 to GR31 that have already been allocated by this routine.
2345 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 2346
97e242b0 2347static int
6fb5fa3c 2348find_gr_spill (enum ia64_frame_regs r, int try_locals)
97e242b0
RH
2349{
2350 int regno;
2351
6fb5fa3c
DB
2352 if (emitted_frame_related_regs[r] != 0)
2353 {
2354 regno = emitted_frame_related_regs[r];
2951f79b
JJ
2355 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2356 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
6fb5fa3c
DB
2357 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2358 else if (current_function_is_leaf
2359 && regno >= GR_REG (1) && regno <= GR_REG (31))
2360 current_frame_info.gr_used_mask |= 1 << regno;
2361
2362 return regno;
2363 }
2364
97e242b0
RH
2365 /* If this is a leaf function, first try an otherwise unused
2366 call-clobbered register. */
2367 if (current_function_is_leaf)
2368 {
2369 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2370 if (! df_regs_ever_live_p (regno)
97e242b0
RH
2371 && call_used_regs[regno]
2372 && ! fixed_regs[regno]
2373 && ! global_regs[regno]
6fb5fa3c
DB
2374 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2375 && ! is_emitted (regno))
97e242b0
RH
2376 {
2377 current_frame_info.gr_used_mask |= 1 << regno;
2378 return regno;
2379 }
2380 }
2381
2382 if (try_locals)
2383 {
2384 regno = current_frame_info.n_local_regs;
9502c558
JW
2385 /* If there is a frame pointer, then we can't use loc79, because
2386 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2387 reg_name switching code in ia64_expand_prologue. */
2951f79b
JJ
2388 while (regno < (80 - frame_pointer_needed))
2389 if (! is_emitted (LOC_REG (regno++)))
2390 {
2391 current_frame_info.n_local_regs = regno;
2392 return LOC_REG (regno - 1);
2393 }
97e242b0
RH
2394 }
2395
2396 /* Failed to find a general register to spill to. Must use stack. */
2397 return 0;
2398}
2399
2400/* In order to make for nice schedules, we try to allocate every temporary
2401 to a different register. We must of course stay away from call-saved,
2402 fixed, and global registers. We must also stay away from registers
2403 allocated in current_frame_info.gr_used_mask, since those include regs
2404 used all through the prologue.
2405
2406 Any register allocated here must be used immediately. The idea is to
2407 aid scheduling, not to solve data flow problems. */
2408
2409static int last_scratch_gr_reg;
2410
2411static int
9c808aad 2412next_scratch_gr_reg (void)
97e242b0
RH
2413{
2414 int i, regno;
2415
2416 for (i = 0; i < 32; ++i)
2417 {
2418 regno = (last_scratch_gr_reg + i + 1) & 31;
2419 if (call_used_regs[regno]
2420 && ! fixed_regs[regno]
2421 && ! global_regs[regno]
2422 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2423 {
2424 last_scratch_gr_reg = regno;
2425 return regno;
2426 }
2427 }
2428
2429 /* There must be _something_ available. */
e820471b 2430 gcc_unreachable ();
97e242b0
RH
2431}
2432
2433/* Helper function for ia64_compute_frame_size, called through
2434 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2435
2436static void
9c808aad 2437mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
c65ebc55 2438{
97e242b0
RH
2439 unsigned int regno = REGNO (reg);
2440 if (regno < 32)
f95e79cc 2441 {
c8b622ff 2442 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
f95e79cc
RH
2443 for (i = 0; i < n; ++i)
2444 current_frame_info.gr_used_mask |= 1 << (regno + i);
2445 }
c65ebc55
JW
2446}
2447
6fb5fa3c 2448
c65ebc55
JW
2449/* Returns the number of bytes offset between the frame pointer and the stack
2450 pointer for the current function. SIZE is the number of bytes of space
2451 needed for local variables. */
97e242b0
RH
2452
2453static void
9c808aad 2454ia64_compute_frame_size (HOST_WIDE_INT size)
c65ebc55 2455{
97e242b0
RH
2456 HOST_WIDE_INT total_size;
2457 HOST_WIDE_INT spill_size = 0;
2458 HOST_WIDE_INT extra_spill_size = 0;
2459 HOST_WIDE_INT pretend_args_size;
c65ebc55 2460 HARD_REG_SET mask;
97e242b0
RH
2461 int n_spilled = 0;
2462 int spilled_gr_p = 0;
2463 int spilled_fr_p = 0;
2464 unsigned int regno;
2951f79b
JJ
2465 int min_regno;
2466 int max_regno;
97e242b0 2467 int i;
c65ebc55 2468
97e242b0
RH
2469 if (current_frame_info.initialized)
2470 return;
294dac80 2471
97e242b0 2472 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
2473 CLEAR_HARD_REG_SET (mask);
2474
97e242b0
RH
2475 /* Don't allocate scratches to the return register. */
2476 diddle_return_value (mark_reg_gr_used_mask, NULL);
2477
2478 /* Don't allocate scratches to the EH scratch registers. */
2479 if (cfun->machine->ia64_eh_epilogue_sp)
2480 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2481 if (cfun->machine->ia64_eh_epilogue_bsp)
2482 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 2483
97e242b0
RH
2484 /* Find the size of the register stack frame. We have only 80 local
2485 registers, because we reserve 8 for the inputs and 8 for the
2486 outputs. */
2487
2488 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2489 since we'll be adjusting that down later. */
2490 regno = LOC_REG (78) + ! frame_pointer_needed;
2491 for (; regno >= LOC_REG (0); regno--)
6fb5fa3c 2492 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
97e242b0
RH
2493 break;
2494 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 2495
3f67ac08
DM
2496 /* For functions marked with the syscall_linkage attribute, we must mark
2497 all eight input registers as in use, so that locals aren't visible to
2498 the caller. */
2499
2500 if (cfun->machine->n_varargs > 0
2501 || lookup_attribute ("syscall_linkage",
2502 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
2503 current_frame_info.n_input_regs = 8;
2504 else
2505 {
2506 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
6fb5fa3c 2507 if (df_regs_ever_live_p (regno))
97e242b0
RH
2508 break;
2509 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2510 }
2511
2512 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
6fb5fa3c 2513 if (df_regs_ever_live_p (regno))
97e242b0
RH
2514 break;
2515 i = regno - OUT_REG (0) + 1;
2516
d26afa4f 2517#ifndef PROFILE_HOOK
97e242b0 2518 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 2519 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
2520 profiling, we need two output registers for the two bb_init_trace_func
2521 arguments. */
e3b5732b 2522 if (crtl->profile)
97e242b0 2523 i = MAX (i, 1);
d26afa4f 2524#endif
97e242b0
RH
2525 current_frame_info.n_output_regs = i;
2526
2527 /* ??? No rotating register support yet. */
2528 current_frame_info.n_rotate_regs = 0;
2529
2530 /* Discover which registers need spilling, and how much room that
9c808aad 2531 will take. Begin with floating point and general registers,
97e242b0
RH
2532 which will always wind up on the stack. */
2533
2534 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
6fb5fa3c 2535 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2536 {
2537 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2538 spill_size += 16;
2539 n_spilled += 1;
2540 spilled_fr_p = 1;
c65ebc55
JW
2541 }
2542
97e242b0 2543 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2544 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2545 {
2546 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2547 spill_size += 8;
2548 n_spilled += 1;
2549 spilled_gr_p = 1;
c65ebc55
JW
2550 }
2551
97e242b0 2552 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
6fb5fa3c 2553 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2554 {
2555 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2556 spill_size += 8;
2557 n_spilled += 1;
c65ebc55
JW
2558 }
2559
97e242b0
RH
2560 /* Now come all special registers that might get saved in other
2561 general registers. */
9c808aad 2562
97e242b0
RH
2563 if (frame_pointer_needed)
2564 {
6fb5fa3c 2565 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
0c35f902
JW
2566 /* If we did not get a register, then we take LOC79. This is guaranteed
2567 to be free, even if regs_ever_live is already set, because this is
2568 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2569 as we don't count loc79 above. */
6fb5fa3c 2570 if (current_frame_info.r[reg_fp] == 0)
0c35f902 2571 {
6fb5fa3c
DB
2572 current_frame_info.r[reg_fp] = LOC_REG (79);
2573 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
0c35f902 2574 }
97e242b0
RH
2575 }
2576
2577 if (! current_function_is_leaf)
c65ebc55 2578 {
97e242b0
RH
2579 /* Emit a save of BR0 if we call other functions. Do this even
2580 if this function doesn't return, as EH depends on this to be
2581 able to unwind the stack. */
2582 SET_HARD_REG_BIT (mask, BR_REG (0));
2583
6fb5fa3c
DB
2584 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2585 if (current_frame_info.r[reg_save_b0] == 0)
97e242b0 2586 {
ae1e2d4c 2587 extra_spill_size += 8;
97e242b0
RH
2588 n_spilled += 1;
2589 }
2590
2591 /* Similarly for ar.pfs. */
2592 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2593 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2594 if (current_frame_info.r[reg_save_ar_pfs] == 0)
97e242b0
RH
2595 {
2596 extra_spill_size += 8;
2597 n_spilled += 1;
2598 }
599aedd9
RH
2599
2600 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2601 registers are clobbered, so we fall back to the stack. */
6fb5fa3c 2602 current_frame_info.r[reg_save_gp]
e3b5732b 2603 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
6fb5fa3c 2604 if (current_frame_info.r[reg_save_gp] == 0)
599aedd9
RH
2605 {
2606 SET_HARD_REG_BIT (mask, GR_REG (1));
2607 spill_size += 8;
2608 n_spilled += 1;
2609 }
c65ebc55
JW
2610 }
2611 else
97e242b0 2612 {
6fb5fa3c 2613 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
97e242b0
RH
2614 {
2615 SET_HARD_REG_BIT (mask, BR_REG (0));
ae1e2d4c 2616 extra_spill_size += 8;
97e242b0
RH
2617 n_spilled += 1;
2618 }
f5bdba44 2619
6fb5fa3c 2620 if (df_regs_ever_live_p (AR_PFS_REGNUM))
f5bdba44
RH
2621 {
2622 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2623 current_frame_info.r[reg_save_ar_pfs]
2624 = find_gr_spill (reg_save_ar_pfs, 1);
2625 if (current_frame_info.r[reg_save_ar_pfs] == 0)
f5bdba44
RH
2626 {
2627 extra_spill_size += 8;
2628 n_spilled += 1;
2629 }
2630 }
97e242b0 2631 }
c65ebc55 2632
97e242b0
RH
2633 /* Unwind descriptor hackery: things are most efficient if we allocate
2634 consecutive GR save registers for RP, PFS, FP in that order. However,
2635 it is absolutely critical that FP get the only hard register that's
2636 guaranteed to be free, so we allocated it first. If all three did
2637 happen to be allocated hard regs, and are consecutive, rearrange them
6fb5fa3c
DB
2638 into the preferred order now.
2639
2640 If we have already emitted code for any of those registers,
2641 then it's already too late to change. */
2951f79b
JJ
2642 min_regno = MIN (current_frame_info.r[reg_fp],
2643 MIN (current_frame_info.r[reg_save_b0],
2644 current_frame_info.r[reg_save_ar_pfs]));
2645 max_regno = MAX (current_frame_info.r[reg_fp],
2646 MAX (current_frame_info.r[reg_save_b0],
2647 current_frame_info.r[reg_save_ar_pfs]));
2648 if (min_regno > 0
2649 && min_regno + 2 == max_regno
2650 && (current_frame_info.r[reg_fp] == min_regno + 1
2651 || current_frame_info.r[reg_save_b0] == min_regno + 1
2652 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2653 && (emitted_frame_related_regs[reg_save_b0] == 0
2654 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2655 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2656 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2657 && (emitted_frame_related_regs[reg_fp] == 0
2658 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
5527bf14 2659 {
2951f79b
JJ
2660 current_frame_info.r[reg_save_b0] = min_regno;
2661 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2662 current_frame_info.r[reg_fp] = min_regno + 2;
5527bf14
RH
2663 }
2664
97e242b0
RH
2665 /* See if we need to store the predicate register block. */
2666 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2667 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
97e242b0
RH
2668 break;
2669 if (regno <= PR_REG (63))
c65ebc55 2670 {
97e242b0 2671 SET_HARD_REG_BIT (mask, PR_REG (0));
6fb5fa3c
DB
2672 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2673 if (current_frame_info.r[reg_save_pr] == 0)
97e242b0
RH
2674 {
2675 extra_spill_size += 8;
2676 n_spilled += 1;
2677 }
2678
2679 /* ??? Mark them all as used so that register renaming and such
2680 are free to use them. */
2681 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2682 df_set_regs_ever_live (regno, true);
c65ebc55
JW
2683 }
2684
97e242b0 2685 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
2686 ar.unat as well. The check for existing liveness allows inline asm
2687 to touch ar.unat. */
2688 if (spilled_gr_p || cfun->machine->n_varargs
6fb5fa3c 2689 || df_regs_ever_live_p (AR_UNAT_REGNUM))
97e242b0 2690 {
6fb5fa3c 2691 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
97e242b0 2692 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
6fb5fa3c
DB
2693 current_frame_info.r[reg_save_ar_unat]
2694 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2695 if (current_frame_info.r[reg_save_ar_unat] == 0)
97e242b0
RH
2696 {
2697 extra_spill_size += 8;
2698 n_spilled += 1;
2699 }
2700 }
2701
6fb5fa3c 2702 if (df_regs_ever_live_p (AR_LC_REGNUM))
97e242b0
RH
2703 {
2704 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
6fb5fa3c
DB
2705 current_frame_info.r[reg_save_ar_lc]
2706 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2707 if (current_frame_info.r[reg_save_ar_lc] == 0)
97e242b0
RH
2708 {
2709 extra_spill_size += 8;
2710 n_spilled += 1;
2711 }
2712 }
2713
2714 /* If we have an odd number of words of pretend arguments written to
2715 the stack, then the FR save area will be unaligned. We round the
2716 size of this area up to keep things 16 byte aligned. */
2717 if (spilled_fr_p)
38173d38 2718 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
97e242b0 2719 else
38173d38 2720 pretend_args_size = crtl->args.pretend_args_size;
97e242b0
RH
2721
2722 total_size = (spill_size + extra_spill_size + size + pretend_args_size
38173d38 2723 + crtl->outgoing_args_size);
97e242b0
RH
2724 total_size = IA64_STACK_ALIGN (total_size);
2725
2726 /* We always use the 16-byte scratch area provided by the caller, but
2727 if we are a leaf function, there's no one to which we need to provide
2728 a scratch area. */
2729 if (current_function_is_leaf)
2730 total_size = MAX (0, total_size - 16);
2731
c65ebc55 2732 current_frame_info.total_size = total_size;
97e242b0
RH
2733 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2734 current_frame_info.spill_size = spill_size;
2735 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2736 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2737 current_frame_info.n_spilled = n_spilled;
c65ebc55 2738 current_frame_info.initialized = reload_completed;
97e242b0
RH
2739}
2740
7b5cbb57
AS
2741/* Worker function for TARGET_CAN_ELIMINATE. */
2742
2743bool
2744ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2745{
2746 return (to == BR_REG (0) ? current_function_is_leaf : true);
2747}
2748
97e242b0
RH
2749/* Compute the initial difference between the specified pair of registers. */
2750
2751HOST_WIDE_INT
9c808aad 2752ia64_initial_elimination_offset (int from, int to)
97e242b0
RH
2753{
2754 HOST_WIDE_INT offset;
2755
2756 ia64_compute_frame_size (get_frame_size ());
2757 switch (from)
2758 {
2759 case FRAME_POINTER_REGNUM:
e820471b 2760 switch (to)
97e242b0 2761 {
e820471b 2762 case HARD_FRAME_POINTER_REGNUM:
97e242b0
RH
2763 if (current_function_is_leaf)
2764 offset = -current_frame_info.total_size;
2765 else
2766 offset = -(current_frame_info.total_size
38173d38 2767 - crtl->outgoing_args_size - 16);
e820471b
NS
2768 break;
2769
2770 case STACK_POINTER_REGNUM:
97e242b0
RH
2771 if (current_function_is_leaf)
2772 offset = 0;
2773 else
38173d38 2774 offset = 16 + crtl->outgoing_args_size;
e820471b
NS
2775 break;
2776
2777 default:
2778 gcc_unreachable ();
97e242b0 2779 }
97e242b0 2780 break;
c65ebc55 2781
97e242b0
RH
2782 case ARG_POINTER_REGNUM:
2783 /* Arguments start above the 16 byte save area, unless stdarg
2784 in which case we store through the 16 byte save area. */
e820471b
NS
2785 switch (to)
2786 {
2787 case HARD_FRAME_POINTER_REGNUM:
38173d38 2788 offset = 16 - crtl->args.pretend_args_size;
e820471b
NS
2789 break;
2790
2791 case STACK_POINTER_REGNUM:
2792 offset = (current_frame_info.total_size
38173d38 2793 + 16 - crtl->args.pretend_args_size);
e820471b
NS
2794 break;
2795
2796 default:
2797 gcc_unreachable ();
2798 }
97e242b0
RH
2799 break;
2800
97e242b0 2801 default:
e820471b 2802 gcc_unreachable ();
97e242b0
RH
2803 }
2804
2805 return offset;
c65ebc55
JW
2806}
2807
97e242b0
RH
2808/* If there are more than a trivial number of register spills, we use
2809 two interleaved iterators so that we can get two memory references
2810 per insn group.
2811
2812 In order to simplify things in the prologue and epilogue expanders,
2813 we use helper functions to fix up the memory references after the
2814 fact with the appropriate offsets to a POST_MODIFY memory mode.
2815 The following data structure tracks the state of the two iterators
2816 while insns are being emitted. */
2817
2818struct spill_fill_data
c65ebc55 2819{
d6a7951f 2820 rtx init_after; /* point at which to emit initializations */
97e242b0
RH
2821 rtx init_reg[2]; /* initial base register */
2822 rtx iter_reg[2]; /* the iterator registers */
2823 rtx *prev_addr[2]; /* address of last memory use */
703cf211 2824 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
2825 HOST_WIDE_INT prev_off[2]; /* last offset */
2826 int n_iter; /* number of iterators in use */
2827 int next_iter; /* next iterator to use */
2828 unsigned int save_gr_used_mask;
2829};
2830
2831static struct spill_fill_data spill_fill_data;
c65ebc55 2832
97e242b0 2833static void
9c808aad 2834setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2835{
2836 int i;
2837
2838 spill_fill_data.init_after = get_last_insn ();
2839 spill_fill_data.init_reg[0] = init_reg;
2840 spill_fill_data.init_reg[1] = init_reg;
2841 spill_fill_data.prev_addr[0] = NULL;
2842 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
2843 spill_fill_data.prev_insn[0] = NULL;
2844 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
2845 spill_fill_data.prev_off[0] = cfa_off;
2846 spill_fill_data.prev_off[1] = cfa_off;
2847 spill_fill_data.next_iter = 0;
2848 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2849
2850 spill_fill_data.n_iter = 1 + (n_spills > 2);
2851 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 2852 {
97e242b0
RH
2853 int regno = next_scratch_gr_reg ();
2854 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2855 current_frame_info.gr_used_mask |= 1 << regno;
2856 }
2857}
2858
2859static void
9c808aad 2860finish_spill_pointers (void)
97e242b0
RH
2861{
2862 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2863}
c65ebc55 2864
97e242b0 2865static rtx
9c808aad 2866spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2867{
2868 int iter = spill_fill_data.next_iter;
2869 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2870 rtx disp_rtx = GEN_INT (disp);
2871 rtx mem;
2872
2873 if (spill_fill_data.prev_addr[iter])
2874 {
13f70342 2875 if (satisfies_constraint_N (disp_rtx))
703cf211
BS
2876 {
2877 *spill_fill_data.prev_addr[iter]
2878 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2879 gen_rtx_PLUS (DImode,
2880 spill_fill_data.iter_reg[iter],
2881 disp_rtx));
bbbbb16a
ILT
2882 add_reg_note (spill_fill_data.prev_insn[iter],
2883 REG_INC, spill_fill_data.iter_reg[iter]);
703cf211 2884 }
c65ebc55
JW
2885 else
2886 {
97e242b0 2887 /* ??? Could use register post_modify for loads. */
13f70342 2888 if (!satisfies_constraint_I (disp_rtx))
97e242b0
RH
2889 {
2890 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2891 emit_move_insn (tmp, disp_rtx);
2892 disp_rtx = tmp;
2893 }
2894 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2895 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 2896 }
97e242b0
RH
2897 }
2898 /* Micro-optimization: if we've created a frame pointer, it's at
2899 CFA 0, which may allow the real iterator to be initialized lower,
2900 slightly increasing parallelism. Also, if there are few saves
2901 it may eliminate the iterator entirely. */
2902 else if (disp == 0
2903 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2904 && frame_pointer_needed)
2905 {
2906 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 2907 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
2908 return mem;
2909 }
2910 else
2911 {
892a4e60 2912 rtx seq, insn;
809d4ef1 2913
97e242b0
RH
2914 if (disp == 0)
2915 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2916 spill_fill_data.init_reg[iter]);
2917 else
c65ebc55 2918 {
97e242b0
RH
2919 start_sequence ();
2920
13f70342 2921 if (!satisfies_constraint_I (disp_rtx))
c65ebc55 2922 {
97e242b0
RH
2923 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2924 emit_move_insn (tmp, disp_rtx);
2925 disp_rtx = tmp;
c65ebc55 2926 }
97e242b0
RH
2927
2928 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2929 spill_fill_data.init_reg[iter],
2930 disp_rtx));
2931
2f937369 2932 seq = get_insns ();
97e242b0 2933 end_sequence ();
c65ebc55 2934 }
809d4ef1 2935
97e242b0
RH
2936 /* Careful for being the first insn in a sequence. */
2937 if (spill_fill_data.init_after)
892a4e60 2938 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 2939 else
bc08aefe
RH
2940 {
2941 rtx first = get_insns ();
2942 if (first)
892a4e60 2943 insn = emit_insn_before (seq, first);
bc08aefe 2944 else
892a4e60 2945 insn = emit_insn (seq);
bc08aefe 2946 }
892a4e60 2947 spill_fill_data.init_after = insn;
97e242b0 2948 }
c65ebc55 2949
97e242b0 2950 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 2951
97e242b0
RH
2952 /* ??? Not all of the spills are for varargs, but some of them are.
2953 The rest of the spills belong in an alias set of their own. But
2954 it doesn't actually hurt to include them here. */
ba4828e0 2955 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 2956
97e242b0
RH
2957 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2958 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 2959
97e242b0
RH
2960 if (++iter >= spill_fill_data.n_iter)
2961 iter = 0;
2962 spill_fill_data.next_iter = iter;
c65ebc55 2963
97e242b0
RH
2964 return mem;
2965}
5527bf14 2966
97e242b0 2967static void
9c808aad
AJ
2968do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2969 rtx frame_reg)
97e242b0 2970{
703cf211 2971 int iter = spill_fill_data.next_iter;
97e242b0 2972 rtx mem, insn;
5527bf14 2973
97e242b0 2974 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 2975 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 2976 spill_fill_data.prev_insn[iter] = insn;
5527bf14 2977
97e242b0
RH
2978 if (frame_reg)
2979 {
2980 rtx base;
2981 HOST_WIDE_INT off;
2982
2983 RTX_FRAME_RELATED_P (insn) = 1;
2984
9c808aad 2985 /* Don't even pretend that the unwind code can intuit its way
97e242b0
RH
2986 through a pair of interleaved post_modify iterators. Just
2987 provide the correct answer. */
2988
2989 if (frame_pointer_needed)
2990 {
2991 base = hard_frame_pointer_rtx;
2992 off = - cfa_off;
5527bf14 2993 }
97e242b0
RH
2994 else
2995 {
2996 base = stack_pointer_rtx;
2997 off = current_frame_info.total_size - cfa_off;
2998 }
2999
bbbbb16a
ILT
3000 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3001 gen_rtx_SET (VOIDmode,
3002 gen_rtx_MEM (GET_MODE (reg),
3003 plus_constant (base, off)),
3004 frame_reg));
c65ebc55
JW
3005 }
3006}
3007
97e242b0 3008static void
9c808aad 3009do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
97e242b0 3010{
703cf211
BS
3011 int iter = spill_fill_data.next_iter;
3012 rtx insn;
3013
3014 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3015 GEN_INT (cfa_off)));
3016 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
3017}
3018
870f9ec0
RH
3019/* Wrapper functions that discards the CONST_INT spill offset. These
3020 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 3021 use a consistent function interface. */
870f9ec0
RH
3022
3023static rtx
9c808aad 3024gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3025{
3026 return gen_movdi (dest, src);
3027}
3028
3029static rtx
9c808aad 3030gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3031{
3032 return gen_fr_spill (dest, src);
3033}
3034
3035static rtx
9c808aad 3036gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3037{
3038 return gen_fr_restore (dest, src);
3039}
c65ebc55
JW
3040
3041/* Called after register allocation to add any instructions needed for the
3042 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 3043 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3044 to intermix instructions with the saves of the caller saved registers. In
3045 some cases, it might be necessary to emit a barrier instruction as the last
3046 insn to prevent such scheduling.
3047
3048 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
3049 so that the debug info generation code can handle them properly.
3050
3051 The register save area is layed out like so:
3052 cfa+16
3053 [ varargs spill area ]
3054 [ fr register spill area ]
3055 [ br register spill area ]
3056 [ ar register spill area ]
3057 [ pr register spill area ]
3058 [ gr register spill area ] */
c65ebc55
JW
3059
3060/* ??? Get inefficient code when the frame size is larger than can fit in an
3061 adds instruction. */
3062
c65ebc55 3063void
9c808aad 3064ia64_expand_prologue (void)
c65ebc55 3065{
97e242b0
RH
3066 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3067 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3068 rtx reg, alt_reg;
3069
3070 ia64_compute_frame_size (get_frame_size ());
3071 last_scratch_gr_reg = 15;
3072
6fb5fa3c
DB
3073 if (dump_file)
3074 {
3075 fprintf (dump_file, "ia64 frame related registers "
3076 "recorded in current_frame_info.r[]:\n");
3077#define PRINTREG(a) if (current_frame_info.r[a]) \
3078 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3079 PRINTREG(reg_fp);
3080 PRINTREG(reg_save_b0);
3081 PRINTREG(reg_save_pr);
3082 PRINTREG(reg_save_ar_pfs);
3083 PRINTREG(reg_save_ar_unat);
3084 PRINTREG(reg_save_ar_lc);
3085 PRINTREG(reg_save_gp);
3086#undef PRINTREG
3087 }
3088
97e242b0
RH
3089 /* If there is no epilogue, then we don't need some prologue insns.
3090 We need to avoid emitting the dead prologue insns, because flow
3091 will complain about them. */
c65ebc55
JW
3092 if (optimize)
3093 {
97e242b0 3094 edge e;
9924d7d8 3095 edge_iterator ei;
97e242b0 3096
628f6a4e 3097 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
c65ebc55
JW
3098 if ((e->flags & EDGE_FAKE) == 0
3099 && (e->flags & EDGE_FALLTHRU) != 0)
3100 break;
3101 epilogue_p = (e != NULL);
3102 }
3103 else
3104 epilogue_p = 1;
3105
97e242b0
RH
3106 /* Set the local, input, and output register names. We need to do this
3107 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3108 half. If we use in/loc/out register names, then we get assembler errors
3109 in crtn.S because there is no alloc insn or regstk directive in there. */
3110 if (! TARGET_REG_NAMES)
3111 {
3112 int inputs = current_frame_info.n_input_regs;
3113 int locals = current_frame_info.n_local_regs;
3114 int outputs = current_frame_info.n_output_regs;
3115
3116 for (i = 0; i < inputs; i++)
3117 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3118 for (i = 0; i < locals; i++)
3119 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3120 for (i = 0; i < outputs; i++)
3121 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3122 }
c65ebc55 3123
97e242b0
RH
3124 /* Set the frame pointer register name. The regnum is logically loc79,
3125 but of course we'll not have allocated that many locals. Rather than
3126 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
3127 /* ??? This code means that we can never use one local register when
3128 there is a frame pointer. loc79 gets wasted in this case, as it is
3129 renamed to a register that will never be used. See also the try_locals
3130 code in find_gr_spill. */
6fb5fa3c 3131 if (current_frame_info.r[reg_fp])
97e242b0
RH
3132 {
3133 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3134 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3135 = reg_names[current_frame_info.r[reg_fp]];
3136 reg_names[current_frame_info.r[reg_fp]] = tmp;
97e242b0 3137 }
c65ebc55 3138
97e242b0
RH
3139 /* We don't need an alloc instruction if we've used no outputs or locals. */
3140 if (current_frame_info.n_local_regs == 0
2ed4af6f 3141 && current_frame_info.n_output_regs == 0
38173d38 3142 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
f5bdba44 3143 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
3144 {
3145 /* If there is no alloc, but there are input registers used, then we
3146 need a .regstk directive. */
3147 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3148 ar_pfs_save_reg = NULL_RTX;
3149 }
3150 else
3151 {
3152 current_frame_info.need_regstk = 0;
c65ebc55 3153
6fb5fa3c
DB
3154 if (current_frame_info.r[reg_save_ar_pfs])
3155 {
3156 regno = current_frame_info.r[reg_save_ar_pfs];
3157 reg_emitted (reg_save_ar_pfs);
3158 }
97e242b0
RH
3159 else
3160 regno = next_scratch_gr_reg ();
3161 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3162
9c808aad 3163 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
97e242b0
RH
3164 GEN_INT (current_frame_info.n_input_regs),
3165 GEN_INT (current_frame_info.n_local_regs),
3166 GEN_INT (current_frame_info.n_output_regs),
3167 GEN_INT (current_frame_info.n_rotate_regs)));
6fb5fa3c 3168 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
97e242b0 3169 }
c65ebc55 3170
97e242b0 3171 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 3172
26a110f5 3173 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
3174 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3175 stack_pointer_rtx, 0);
c65ebc55 3176
97e242b0
RH
3177 if (frame_pointer_needed)
3178 {
3179 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3180 RTX_FRAME_RELATED_P (insn) = 1;
3181 }
c65ebc55 3182
97e242b0
RH
3183 if (current_frame_info.total_size != 0)
3184 {
3185 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3186 rtx offset;
c65ebc55 3187
13f70342 3188 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3189 offset = frame_size_rtx;
3190 else
3191 {
3192 regno = next_scratch_gr_reg ();
9c808aad 3193 offset = gen_rtx_REG (DImode, regno);
97e242b0
RH
3194 emit_move_insn (offset, frame_size_rtx);
3195 }
c65ebc55 3196
97e242b0
RH
3197 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3198 stack_pointer_rtx, offset));
c65ebc55 3199
97e242b0
RH
3200 if (! frame_pointer_needed)
3201 {
3202 RTX_FRAME_RELATED_P (insn) = 1;
3203 if (GET_CODE (offset) != CONST_INT)
bbbbb16a
ILT
3204 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3205 gen_rtx_SET (VOIDmode,
3206 stack_pointer_rtx,
3207 gen_rtx_PLUS (DImode,
3208 stack_pointer_rtx,
3209 frame_size_rtx)));
97e242b0 3210 }
c65ebc55 3211
97e242b0
RH
3212 /* ??? At this point we must generate a magic insn that appears to
3213 modify the stack pointer, the frame pointer, and all spill
3214 iterators. This would allow the most scheduling freedom. For
3215 now, just hard stop. */
3216 emit_insn (gen_blockage ());
3217 }
c65ebc55 3218
97e242b0
RH
3219 /* Must copy out ar.unat before doing any integer spills. */
3220 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 3221 {
6fb5fa3c
DB
3222 if (current_frame_info.r[reg_save_ar_unat])
3223 {
3224 ar_unat_save_reg
3225 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3226 reg_emitted (reg_save_ar_unat);
3227 }
97e242b0 3228 else
c65ebc55 3229 {
97e242b0
RH
3230 alt_regno = next_scratch_gr_reg ();
3231 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3232 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 3233 }
c65ebc55 3234
97e242b0
RH
3235 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3236 insn = emit_move_insn (ar_unat_save_reg, reg);
6fb5fa3c 3237 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
97e242b0
RH
3238
3239 /* Even if we're not going to generate an epilogue, we still
3240 need to save the register so that EH works. */
6fb5fa3c 3241 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
d0e82870 3242 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
3243 }
3244 else
97e242b0
RH
3245 ar_unat_save_reg = NULL_RTX;
3246
3247 /* Spill all varargs registers. Do this before spilling any GR registers,
3248 since we want the UNAT bits for the GR registers to override the UNAT
3249 bits from varargs, which we don't care about. */
c65ebc55 3250
97e242b0
RH
3251 cfa_off = -16;
3252 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 3253 {
97e242b0 3254 reg = gen_rtx_REG (DImode, regno);
870f9ec0 3255 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 3256 }
c65ebc55 3257
97e242b0
RH
3258 /* Locate the bottom of the register save area. */
3259 cfa_off = (current_frame_info.spill_cfa_off
3260 + current_frame_info.spill_size
3261 + current_frame_info.extra_spill_size);
c65ebc55 3262
97e242b0
RH
3263 /* Save the predicate register block either in a register or in memory. */
3264 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3265 {
3266 reg = gen_rtx_REG (DImode, PR_REG (0));
6fb5fa3c 3267 if (current_frame_info.r[reg_save_pr] != 0)
1ff5b671 3268 {
6fb5fa3c
DB
3269 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3270 reg_emitted (reg_save_pr);
97e242b0 3271 insn = emit_move_insn (alt_reg, reg);
1ff5b671 3272
97e242b0
RH
3273 /* ??? Denote pr spill/fill by a DImode move that modifies all
3274 64 hard registers. */
1ff5b671 3275 RTX_FRAME_RELATED_P (insn) = 1;
bbbbb16a
ILT
3276 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3277 gen_rtx_SET (VOIDmode, alt_reg, reg));
46327bc5 3278
97e242b0
RH
3279 /* Even if we're not going to generate an epilogue, we still
3280 need to save the register so that EH works. */
3281 if (! epilogue_p)
d0e82870 3282 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
3283 }
3284 else
97e242b0
RH
3285 {
3286 alt_regno = next_scratch_gr_reg ();
3287 alt_reg = gen_rtx_REG (DImode, alt_regno);
3288 insn = emit_move_insn (alt_reg, reg);
870f9ec0 3289 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3290 cfa_off -= 8;
3291 }
c65ebc55
JW
3292 }
3293
97e242b0
RH
3294 /* Handle AR regs in numerical order. All of them get special handling. */
3295 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6fb5fa3c 3296 && current_frame_info.r[reg_save_ar_unat] == 0)
c65ebc55 3297 {
97e242b0 3298 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 3299 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 3300 cfa_off -= 8;
c65ebc55 3301 }
97e242b0
RH
3302
3303 /* The alloc insn already copied ar.pfs into a general register. The
3304 only thing we have to do now is copy that register to a stack slot
3305 if we'd not allocated a local register for the job. */
f5bdba44 3306 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
6fb5fa3c 3307 && current_frame_info.r[reg_save_ar_pfs] == 0)
c65ebc55 3308 {
97e242b0 3309 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 3310 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
3311 cfa_off -= 8;
3312 }
3313
3314 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3315 {
3316 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
6fb5fa3c 3317 if (current_frame_info.r[reg_save_ar_lc] != 0)
97e242b0 3318 {
6fb5fa3c
DB
3319 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3320 reg_emitted (reg_save_ar_lc);
97e242b0
RH
3321 insn = emit_move_insn (alt_reg, reg);
3322 RTX_FRAME_RELATED_P (insn) = 1;
3323
3324 /* Even if we're not going to generate an epilogue, we still
3325 need to save the register so that EH works. */
3326 if (! epilogue_p)
d0e82870 3327 emit_insn (gen_prologue_use (alt_reg));
97e242b0 3328 }
c65ebc55
JW
3329 else
3330 {
97e242b0
RH
3331 alt_regno = next_scratch_gr_reg ();
3332 alt_reg = gen_rtx_REG (DImode, alt_regno);
3333 emit_move_insn (alt_reg, reg);
870f9ec0 3334 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3335 cfa_off -= 8;
3336 }
3337 }
3338
ae1e2d4c
AS
3339 /* Save the return pointer. */
3340 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3341 {
3342 reg = gen_rtx_REG (DImode, BR_REG (0));
6fb5fa3c 3343 if (current_frame_info.r[reg_save_b0] != 0)
ae1e2d4c 3344 {
6fb5fa3c
DB
3345 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3346 reg_emitted (reg_save_b0);
ae1e2d4c
AS
3347 insn = emit_move_insn (alt_reg, reg);
3348 RTX_FRAME_RELATED_P (insn) = 1;
3349
3350 /* Even if we're not going to generate an epilogue, we still
3351 need to save the register so that EH works. */
3352 if (! epilogue_p)
3353 emit_insn (gen_prologue_use (alt_reg));
3354 }
3355 else
3356 {
3357 alt_regno = next_scratch_gr_reg ();
3358 alt_reg = gen_rtx_REG (DImode, alt_regno);
3359 emit_move_insn (alt_reg, reg);
3360 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3361 cfa_off -= 8;
3362 }
3363 }
3364
6fb5fa3c 3365 if (current_frame_info.r[reg_save_gp])
599aedd9 3366 {
6fb5fa3c 3367 reg_emitted (reg_save_gp);
599aedd9 3368 insn = emit_move_insn (gen_rtx_REG (DImode,
6fb5fa3c 3369 current_frame_info.r[reg_save_gp]),
599aedd9 3370 pic_offset_table_rtx);
599aedd9
RH
3371 }
3372
97e242b0 3373 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3374 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3375 + current_frame_info.spill_size));
97e242b0
RH
3376
3377 /* Spill all general registers. */
3378 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3379 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3380 {
3381 reg = gen_rtx_REG (DImode, regno);
3382 do_spill (gen_gr_spill, reg, cfa_off, reg);
3383 cfa_off -= 8;
3384 }
3385
97e242b0
RH
3386 /* Spill the rest of the BR registers. */
3387 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3388 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3389 {
3390 alt_regno = next_scratch_gr_reg ();
3391 alt_reg = gen_rtx_REG (DImode, alt_regno);
3392 reg = gen_rtx_REG (DImode, regno);
3393 emit_move_insn (alt_reg, reg);
870f9ec0 3394 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3395 cfa_off -= 8;
3396 }
3397
3398 /* Align the frame and spill all FR registers. */
3399 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3400 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3401 {
e820471b 3402 gcc_assert (!(cfa_off & 15));
02befdf4 3403 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3404 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
3405 cfa_off -= 16;
3406 }
3407
e820471b 3408 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3409
3410 finish_spill_pointers ();
c65ebc55
JW
3411}
3412
3413/* Called after register allocation to add any instructions needed for the
5519a4f9 3414 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 3415 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3416 to intermix instructions with the saves of the caller saved registers. In
3417 some cases, it might be necessary to emit a barrier instruction as the last
3418 insn to prevent such scheduling. */
3419
3420void
9c808aad 3421ia64_expand_epilogue (int sibcall_p)
c65ebc55 3422{
97e242b0
RH
3423 rtx insn, reg, alt_reg, ar_unat_save_reg;
3424 int regno, alt_regno, cfa_off;
3425
3426 ia64_compute_frame_size (get_frame_size ());
3427
3428 /* If there is a frame pointer, then we use it instead of the stack
3429 pointer, so that the stack pointer does not need to be valid when
3430 the epilogue starts. See EXIT_IGNORE_STACK. */
3431 if (frame_pointer_needed)
3432 setup_spill_pointers (current_frame_info.n_spilled,
3433 hard_frame_pointer_rtx, 0);
3434 else
9c808aad 3435 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
97e242b0
RH
3436 current_frame_info.total_size);
3437
3438 if (current_frame_info.total_size != 0)
3439 {
3440 /* ??? At this point we must generate a magic insn that appears to
3441 modify the spill iterators and the frame pointer. This would
3442 allow the most scheduling freedom. For now, just hard stop. */
3443 emit_insn (gen_blockage ());
3444 }
3445
3446 /* Locate the bottom of the register save area. */
3447 cfa_off = (current_frame_info.spill_cfa_off
3448 + current_frame_info.spill_size
3449 + current_frame_info.extra_spill_size);
3450
3451 /* Restore the predicate registers. */
3452 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3453 {
6fb5fa3c
DB
3454 if (current_frame_info.r[reg_save_pr] != 0)
3455 {
3456 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3457 reg_emitted (reg_save_pr);
3458 }
97e242b0
RH
3459 else
3460 {
3461 alt_regno = next_scratch_gr_reg ();
3462 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3463 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3464 cfa_off -= 8;
3465 }
3466 reg = gen_rtx_REG (DImode, PR_REG (0));
3467 emit_move_insn (reg, alt_reg);
3468 }
3469
3470 /* Restore the application registers. */
3471
3472 /* Load the saved unat from the stack, but do not restore it until
3473 after the GRs have been restored. */
3474 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3475 {
6fb5fa3c
DB
3476 if (current_frame_info.r[reg_save_ar_unat] != 0)
3477 {
3478 ar_unat_save_reg
3479 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3480 reg_emitted (reg_save_ar_unat);
3481 }
97e242b0
RH
3482 else
3483 {
3484 alt_regno = next_scratch_gr_reg ();
3485 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3486 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 3487 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
3488 cfa_off -= 8;
3489 }
3490 }
3491 else
3492 ar_unat_save_reg = NULL_RTX;
9c808aad 3493
6fb5fa3c 3494 if (current_frame_info.r[reg_save_ar_pfs] != 0)
97e242b0 3495 {
6fb5fa3c
DB
3496 reg_emitted (reg_save_ar_pfs);
3497 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
97e242b0
RH
3498 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3499 emit_move_insn (reg, alt_reg);
3500 }
4e14f1f9 3501 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
c65ebc55 3502 {
97e242b0
RH
3503 alt_regno = next_scratch_gr_reg ();
3504 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3505 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3506 cfa_off -= 8;
3507 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3508 emit_move_insn (reg, alt_reg);
3509 }
3510
3511 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3512 {
6fb5fa3c
DB
3513 if (current_frame_info.r[reg_save_ar_lc] != 0)
3514 {
3515 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3516 reg_emitted (reg_save_ar_lc);
3517 }
97e242b0
RH
3518 else
3519 {
3520 alt_regno = next_scratch_gr_reg ();
3521 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3522 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3523 cfa_off -= 8;
3524 }
3525 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3526 emit_move_insn (reg, alt_reg);
3527 }
3528
ae1e2d4c
AS
3529 /* Restore the return pointer. */
3530 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3531 {
6fb5fa3c
DB
3532 if (current_frame_info.r[reg_save_b0] != 0)
3533 {
3534 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3535 reg_emitted (reg_save_b0);
3536 }
ae1e2d4c
AS
3537 else
3538 {
3539 alt_regno = next_scratch_gr_reg ();
3540 alt_reg = gen_rtx_REG (DImode, alt_regno);
3541 do_restore (gen_movdi_x, alt_reg, cfa_off);
3542 cfa_off -= 8;
3543 }
3544 reg = gen_rtx_REG (DImode, BR_REG (0));
3545 emit_move_insn (reg, alt_reg);
3546 }
3547
97e242b0 3548 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3549 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3550 + current_frame_info.spill_size));
97e242b0 3551
599aedd9
RH
3552 /* The GP may be stored on the stack in the prologue, but it's
3553 never restored in the epilogue. Skip the stack slot. */
3554 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3555 cfa_off -= 8;
3556
97e242b0 3557 /* Restore all general registers. */
599aedd9 3558 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 3559 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3560 {
97e242b0
RH
3561 reg = gen_rtx_REG (DImode, regno);
3562 do_restore (gen_gr_restore, reg, cfa_off);
3563 cfa_off -= 8;
0c96007e 3564 }
9c808aad 3565
ae1e2d4c 3566 /* Restore the branch registers. */
97e242b0
RH
3567 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3568 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3569 {
97e242b0
RH
3570 alt_regno = next_scratch_gr_reg ();
3571 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3572 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3573 cfa_off -= 8;
3574 reg = gen_rtx_REG (DImode, regno);
3575 emit_move_insn (reg, alt_reg);
3576 }
c65ebc55 3577
97e242b0
RH
3578 /* Restore floating point registers. */
3579 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3580 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3581 {
e820471b 3582 gcc_assert (!(cfa_off & 15));
02befdf4 3583 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3584 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 3585 cfa_off -= 16;
0c96007e 3586 }
97e242b0
RH
3587
3588 /* Restore ar.unat for real. */
3589 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3590 {
3591 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3592 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
3593 }
3594
e820471b 3595 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3596
3597 finish_spill_pointers ();
c65ebc55 3598
c93646bd
JJ
3599 if (current_frame_info.total_size
3600 || cfun->machine->ia64_eh_epilogue_sp
3601 || frame_pointer_needed)
97e242b0
RH
3602 {
3603 /* ??? At this point we must generate a magic insn that appears to
3604 modify the spill iterators, the stack pointer, and the frame
3605 pointer. This would allow the most scheduling freedom. For now,
3606 just hard stop. */
3607 emit_insn (gen_blockage ());
3608 }
c65ebc55 3609
97e242b0
RH
3610 if (cfun->machine->ia64_eh_epilogue_sp)
3611 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3612 else if (frame_pointer_needed)
3613 {
3614 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3615 RTX_FRAME_RELATED_P (insn) = 1;
3616 }
3617 else if (current_frame_info.total_size)
0c96007e 3618 {
97e242b0
RH
3619 rtx offset, frame_size_rtx;
3620
3621 frame_size_rtx = GEN_INT (current_frame_info.total_size);
13f70342 3622 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3623 offset = frame_size_rtx;
3624 else
3625 {
3626 regno = next_scratch_gr_reg ();
3627 offset = gen_rtx_REG (DImode, regno);
3628 emit_move_insn (offset, frame_size_rtx);
3629 }
3630
3631 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3632 offset));
3633
3634 RTX_FRAME_RELATED_P (insn) = 1;
3635 if (GET_CODE (offset) != CONST_INT)
bbbbb16a
ILT
3636 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3637 gen_rtx_SET (VOIDmode,
3638 stack_pointer_rtx,
3639 gen_rtx_PLUS (DImode,
3640 stack_pointer_rtx,
3641 frame_size_rtx)));
0c96007e 3642 }
97e242b0
RH
3643
3644 if (cfun->machine->ia64_eh_epilogue_bsp)
3645 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
9c808aad 3646
2ed4af6f
RH
3647 if (! sibcall_p)
3648 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 3649 else
8206fc89
AM
3650 {
3651 int fp = GR_REG (2);
3652 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
9c808aad
AJ
3653 first available call clobbered register. If there was a frame_pointer
3654 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
8206fc89 3655 so we have to make sure we're using the string "r2" when emitting
9e4f94de 3656 the register name for the assembler. */
6fb5fa3c
DB
3657 if (current_frame_info.r[reg_fp]
3658 && current_frame_info.r[reg_fp] == GR_REG (2))
8206fc89
AM
3659 fp = HARD_FRAME_POINTER_REGNUM;
3660
3661 /* We must emit an alloc to force the input registers to become output
3662 registers. Otherwise, if the callee tries to pass its parameters
3663 through to another call without an intervening alloc, then these
3664 values get lost. */
3665 /* ??? We don't need to preserve all input registers. We only need to
3666 preserve those input registers used as arguments to the sibling call.
3667 It is unclear how to compute that number here. */
3668 if (current_frame_info.n_input_regs != 0)
a8f5224e
DM
3669 {
3670 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3671 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3672 const0_rtx, const0_rtx,
3673 n_inputs, const0_rtx));
3674 RTX_FRAME_RELATED_P (insn) = 1;
3675 }
8206fc89 3676 }
c65ebc55
JW
3677}
3678
97e242b0
RH
3679/* Return 1 if br.ret can do all the work required to return from a
3680 function. */
3681
3682int
9c808aad 3683ia64_direct_return (void)
97e242b0
RH
3684{
3685 if (reload_completed && ! frame_pointer_needed)
3686 {
3687 ia64_compute_frame_size (get_frame_size ());
3688
3689 return (current_frame_info.total_size == 0
3690 && current_frame_info.n_spilled == 0
6fb5fa3c
DB
3691 && current_frame_info.r[reg_save_b0] == 0
3692 && current_frame_info.r[reg_save_pr] == 0
3693 && current_frame_info.r[reg_save_ar_pfs] == 0
3694 && current_frame_info.r[reg_save_ar_unat] == 0
3695 && current_frame_info.r[reg_save_ar_lc] == 0);
97e242b0
RH
3696 }
3697 return 0;
3698}
3699
af1e5518
RH
3700/* Return the magic cookie that we use to hold the return address
3701 during early compilation. */
3702
3703rtx
9c808aad 3704ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
af1e5518
RH
3705{
3706 if (count != 0)
3707 return NULL;
3708 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3709}
3710
3711/* Split this value after reload, now that we know where the return
3712 address is saved. */
3713
3714void
9c808aad 3715ia64_split_return_addr_rtx (rtx dest)
af1e5518
RH
3716{
3717 rtx src;
3718
3719 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3720 {
6fb5fa3c
DB
3721 if (current_frame_info.r[reg_save_b0] != 0)
3722 {
3723 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3724 reg_emitted (reg_save_b0);
3725 }
af1e5518
RH
3726 else
3727 {
3728 HOST_WIDE_INT off;
3729 unsigned int regno;
13f70342 3730 rtx off_r;
af1e5518
RH
3731
3732 /* Compute offset from CFA for BR0. */
3733 /* ??? Must be kept in sync with ia64_expand_prologue. */
3734 off = (current_frame_info.spill_cfa_off
3735 + current_frame_info.spill_size);
3736 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3737 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3738 off -= 8;
3739
3740 /* Convert CFA offset to a register based offset. */
3741 if (frame_pointer_needed)
3742 src = hard_frame_pointer_rtx;
3743 else
3744 {
3745 src = stack_pointer_rtx;
3746 off += current_frame_info.total_size;
3747 }
3748
3749 /* Load address into scratch register. */
13f70342
RH
3750 off_r = GEN_INT (off);
3751 if (satisfies_constraint_I (off_r))
3752 emit_insn (gen_adddi3 (dest, src, off_r));
af1e5518
RH
3753 else
3754 {
13f70342 3755 emit_move_insn (dest, off_r);
af1e5518
RH
3756 emit_insn (gen_adddi3 (dest, src, dest));
3757 }
3758
3759 src = gen_rtx_MEM (Pmode, dest);
3760 }
3761 }
3762 else
3763 src = gen_rtx_REG (DImode, BR_REG (0));
3764
3765 emit_move_insn (dest, src);
3766}
3767
10c9f189 3768int
9c808aad 3769ia64_hard_regno_rename_ok (int from, int to)
10c9f189
RH
3770{
3771 /* Don't clobber any of the registers we reserved for the prologue. */
09639a83 3772 unsigned int r;
10c9f189 3773
6fb5fa3c
DB
3774 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3775 if (to == current_frame_info.r[r]
3776 || from == current_frame_info.r[r]
3777 || to == emitted_frame_related_regs[r]
3778 || from == emitted_frame_related_regs[r])
3779 return 0;
2130b7fb 3780
10c9f189
RH
3781 /* Don't use output registers outside the register frame. */
3782 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3783 return 0;
3784
3785 /* Retain even/oddness on predicate register pairs. */
3786 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3787 return (from & 1) == (to & 1);
3788
3789 return 1;
3790}
3791
301d03af
RS
3792/* Target hook for assembling integer objects. Handle word-sized
3793 aligned objects and detect the cases when @fptr is needed. */
3794
3795static bool
9c808aad 3796ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3797{
b6a41a62 3798 if (size == POINTER_SIZE / BITS_PER_UNIT
301d03af
RS
3799 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3800 && GET_CODE (x) == SYMBOL_REF
1cdbd630 3801 && SYMBOL_REF_FUNCTION_P (x))
301d03af 3802 {
1b79dc38
DM
3803 static const char * const directive[2][2] = {
3804 /* 64-bit pointer */ /* 32-bit pointer */
3805 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3806 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3807 };
3808 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
301d03af
RS
3809 output_addr_const (asm_out_file, x);
3810 fputs (")\n", asm_out_file);
3811 return true;
3812 }
3813 return default_assemble_integer (x, size, aligned_p);
3814}
3815
c65ebc55
JW
3816/* Emit the function prologue. */
3817
08c148a8 3818static void
9c808aad 3819ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3820{
97e242b0
RH
3821 int mask, grsave, grsave_prev;
3822
3823 if (current_frame_info.need_regstk)
3824 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3825 current_frame_info.n_input_regs,
3826 current_frame_info.n_local_regs,
3827 current_frame_info.n_output_regs,
3828 current_frame_info.n_rotate_regs);
c65ebc55 3829
531073e7 3830 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
3831 return;
3832
97e242b0 3833 /* Emit the .prologue directive. */
809d4ef1 3834
97e242b0
RH
3835 mask = 0;
3836 grsave = grsave_prev = 0;
6fb5fa3c 3837 if (current_frame_info.r[reg_save_b0] != 0)
0c96007e 3838 {
97e242b0 3839 mask |= 8;
6fb5fa3c 3840 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
97e242b0 3841 }
6fb5fa3c 3842 if (current_frame_info.r[reg_save_ar_pfs] != 0
97e242b0 3843 && (grsave_prev == 0
6fb5fa3c 3844 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
97e242b0
RH
3845 {
3846 mask |= 4;
3847 if (grsave_prev == 0)
6fb5fa3c
DB
3848 grsave = current_frame_info.r[reg_save_ar_pfs];
3849 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
0c96007e 3850 }
6fb5fa3c 3851 if (current_frame_info.r[reg_fp] != 0
97e242b0 3852 && (grsave_prev == 0
6fb5fa3c 3853 || current_frame_info.r[reg_fp] == grsave_prev + 1))
97e242b0
RH
3854 {
3855 mask |= 2;
3856 if (grsave_prev == 0)
3857 grsave = HARD_FRAME_POINTER_REGNUM;
6fb5fa3c 3858 grsave_prev = current_frame_info.r[reg_fp];
97e242b0 3859 }
6fb5fa3c 3860 if (current_frame_info.r[reg_save_pr] != 0
97e242b0 3861 && (grsave_prev == 0
6fb5fa3c 3862 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
97e242b0
RH
3863 {
3864 mask |= 1;
3865 if (grsave_prev == 0)
6fb5fa3c 3866 grsave = current_frame_info.r[reg_save_pr];
97e242b0
RH
3867 }
3868
738e7b39 3869 if (mask && TARGET_GNU_AS)
97e242b0
RH
3870 fprintf (file, "\t.prologue %d, %d\n", mask,
3871 ia64_dbx_register_number (grsave));
3872 else
3873 fputs ("\t.prologue\n", file);
3874
3875 /* Emit a .spill directive, if necessary, to relocate the base of
3876 the register spill area. */
3877 if (current_frame_info.spill_cfa_off != -16)
3878 fprintf (file, "\t.spill %ld\n",
3879 (long) (current_frame_info.spill_cfa_off
3880 + current_frame_info.spill_size));
c65ebc55
JW
3881}
3882
0186257f
JW
3883/* Emit the .body directive at the scheduled end of the prologue. */
3884
b4c25db2 3885static void
9c808aad 3886ia64_output_function_end_prologue (FILE *file)
0186257f 3887{
531073e7 3888 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
3889 return;
3890
3891 fputs ("\t.body\n", file);
3892}
3893
c65ebc55
JW
3894/* Emit the function epilogue. */
3895
08c148a8 3896static void
9c808aad
AJ
3897ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3898 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3899{
8a959ea5
RH
3900 int i;
3901
6fb5fa3c 3902 if (current_frame_info.r[reg_fp])
97e242b0
RH
3903 {
3904 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3905 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3906 = reg_names[current_frame_info.r[reg_fp]];
3907 reg_names[current_frame_info.r[reg_fp]] = tmp;
3908 reg_emitted (reg_fp);
97e242b0
RH
3909 }
3910 if (! TARGET_REG_NAMES)
3911 {
97e242b0
RH
3912 for (i = 0; i < current_frame_info.n_input_regs; i++)
3913 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3914 for (i = 0; i < current_frame_info.n_local_regs; i++)
3915 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3916 for (i = 0; i < current_frame_info.n_output_regs; i++)
3917 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3918 }
8a959ea5 3919
97e242b0
RH
3920 current_frame_info.initialized = 0;
3921}
c65ebc55
JW
3922
3923int
9c808aad 3924ia64_dbx_register_number (int regno)
c65ebc55 3925{
97e242b0
RH
3926 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3927 from its home at loc79 to something inside the register frame. We
3928 must perform the same renumbering here for the debug info. */
6fb5fa3c 3929 if (current_frame_info.r[reg_fp])
97e242b0
RH
3930 {
3931 if (regno == HARD_FRAME_POINTER_REGNUM)
6fb5fa3c
DB
3932 regno = current_frame_info.r[reg_fp];
3933 else if (regno == current_frame_info.r[reg_fp])
97e242b0
RH
3934 regno = HARD_FRAME_POINTER_REGNUM;
3935 }
3936
3937 if (IN_REGNO_P (regno))
3938 return 32 + regno - IN_REG (0);
3939 else if (LOC_REGNO_P (regno))
3940 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3941 else if (OUT_REGNO_P (regno))
3942 return (32 + current_frame_info.n_input_regs
3943 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3944 else
3945 return regno;
c65ebc55
JW
3946}
3947
97e242b0 3948void
9c808aad 3949ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
97e242b0 3950{
f2972bf8 3951 rtx addr_reg, tramp, eight = GEN_INT (8);
97e242b0 3952
738e7b39
RK
3953 /* The Intel assembler requires that the global __ia64_trampoline symbol
3954 be declared explicitly */
3955 if (!TARGET_GNU_AS)
3956 {
3957 static bool declared_ia64_trampoline = false;
3958
3959 if (!declared_ia64_trampoline)
3960 {
3961 declared_ia64_trampoline = true;
b6a41a62
RK
3962 (*targetm.asm_out.globalize_label) (asm_out_file,
3963 "__ia64_trampoline");
738e7b39
RK
3964 }
3965 }
3966
5e89a381
SE
3967 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3968 addr = convert_memory_address (Pmode, addr);
3969 fnaddr = convert_memory_address (Pmode, fnaddr);
3970 static_chain = convert_memory_address (Pmode, static_chain);
3971
97e242b0
RH
3972 /* Load up our iterator. */
3973 addr_reg = gen_reg_rtx (Pmode);
3974 emit_move_insn (addr_reg, addr);
3975
3976 /* The first two words are the fake descriptor:
3977 __ia64_trampoline, ADDR+16. */
f2972bf8
DR
3978 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
3979 if (TARGET_ABI_OPEN_VMS)
3980 {
3981 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
3982 in the Macro-32 compiler) and changed the semantics of the LTOFF22
3983 relocation against function symbols to make it identical to the
3984 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
3985 strict ELF and dereference to get the bare code address. */
3986 rtx reg = gen_reg_rtx (Pmode);
3987 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
3988 emit_move_insn (reg, tramp);
3989 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
3990 tramp = reg;
3991 }
3992 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), tramp);
97e242b0
RH
3993 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3994
3995 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3996 copy_to_reg (plus_constant (addr, 16)));
3997 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3998
3999 /* The third word is the target descriptor. */
4000 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
4001 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4002
4003 /* The fourth word is the static chain. */
4004 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
4005}
c65ebc55
JW
4006\f
4007/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
4008 for the last named argument which has type TYPE and mode MODE.
4009
4010 We generate the actual spill instructions during prologue generation. */
4011
351a758b
KH
4012static void
4013ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4014 tree type, int * pretend_size,
9c808aad 4015 int second_time ATTRIBUTE_UNUSED)
c65ebc55 4016{
351a758b
KH
4017 CUMULATIVE_ARGS next_cum = *cum;
4018
6c535c69 4019 /* Skip the current argument. */
351a758b 4020 ia64_function_arg_advance (&next_cum, mode, type, 1);
c65ebc55 4021
351a758b 4022 if (next_cum.words < MAX_ARGUMENT_SLOTS)
26a110f5 4023 {
351a758b 4024 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
26a110f5
RH
4025 *pretend_size = n * UNITS_PER_WORD;
4026 cfun->machine->n_varargs = n;
4027 }
c65ebc55
JW
4028}
4029
4030/* Check whether TYPE is a homogeneous floating point aggregate. If
4031 it is, return the mode of the floating point type that appears
4032 in all leafs. If it is not, return VOIDmode.
4033
4034 An aggregate is a homogeneous floating point aggregate is if all
4035 fields/elements in it have the same floating point type (e.g,
3d6a9acd
RH
4036 SFmode). 128-bit quad-precision floats are excluded.
4037
4038 Variable sized aggregates should never arrive here, since we should
4039 have already decided to pass them by reference. Top-level zero-sized
4040 aggregates are excluded because our parallels crash the middle-end. */
c65ebc55
JW
4041
4042static enum machine_mode
586de218 4043hfa_element_mode (const_tree type, bool nested)
c65ebc55
JW
4044{
4045 enum machine_mode element_mode = VOIDmode;
4046 enum machine_mode mode;
4047 enum tree_code code = TREE_CODE (type);
4048 int know_element_mode = 0;
4049 tree t;
4050
3d6a9acd
RH
4051 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4052 return VOIDmode;
4053
c65ebc55
JW
4054 switch (code)
4055 {
4056 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
0cc8f5c5 4057 case BOOLEAN_TYPE: case POINTER_TYPE:
c65ebc55 4058 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
5662a50d 4059 case LANG_TYPE: case FUNCTION_TYPE:
c65ebc55
JW
4060 return VOIDmode;
4061
4062 /* Fortran complex types are supposed to be HFAs, so we need to handle
4063 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4064 types though. */
4065 case COMPLEX_TYPE:
16448fd4 4066 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
02befdf4
ZW
4067 && TYPE_MODE (type) != TCmode)
4068 return GET_MODE_INNER (TYPE_MODE (type));
c65ebc55
JW
4069 else
4070 return VOIDmode;
4071
4072 case REAL_TYPE:
4073 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4074 mode if this is contained within an aggregate. */
02befdf4 4075 if (nested && TYPE_MODE (type) != TFmode)
c65ebc55
JW
4076 return TYPE_MODE (type);
4077 else
4078 return VOIDmode;
4079
4080 case ARRAY_TYPE:
46399021 4081 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
4082
4083 case RECORD_TYPE:
4084 case UNION_TYPE:
4085 case QUAL_UNION_TYPE:
4086 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
4087 {
4088 if (TREE_CODE (t) != FIELD_DECL)
4089 continue;
4090
4091 mode = hfa_element_mode (TREE_TYPE (t), 1);
4092 if (know_element_mode)
4093 {
4094 if (mode != element_mode)
4095 return VOIDmode;
4096 }
4097 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4098 return VOIDmode;
4099 else
4100 {
4101 know_element_mode = 1;
4102 element_mode = mode;
4103 }
4104 }
4105 return element_mode;
4106
4107 default:
4108 /* If we reach here, we probably have some front-end specific type
4109 that the backend doesn't know about. This can happen via the
4110 aggregate_value_p call in init_function_start. All we can do is
4111 ignore unknown tree types. */
4112 return VOIDmode;
4113 }
4114
4115 return VOIDmode;
4116}
4117
f57fc998
ZW
4118/* Return the number of words required to hold a quantity of TYPE and MODE
4119 when passed as an argument. */
4120static int
4121ia64_function_arg_words (tree type, enum machine_mode mode)
4122{
4123 int words;
4124
4125 if (mode == BLKmode)
4126 words = int_size_in_bytes (type);
4127 else
4128 words = GET_MODE_SIZE (mode);
4129
4130 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4131}
4132
4133/* Return the number of registers that should be skipped so the current
4134 argument (described by TYPE and WORDS) will be properly aligned.
4135
4136 Integer and float arguments larger than 8 bytes start at the next
4137 even boundary. Aggregates larger than 8 bytes start at the next
4138 even boundary if the aggregate has 16 byte alignment. Note that
4139 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4140 but are still to be aligned in registers.
4141
4142 ??? The ABI does not specify how to handle aggregates with
4143 alignment from 9 to 15 bytes, or greater than 16. We handle them
4144 all as if they had 16 byte alignment. Such aggregates can occur
4145 only if gcc extensions are used. */
4146static int
4147ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4148{
f2972bf8
DR
4149 /* No registers are skipped on VMS. */
4150 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
f57fc998
ZW
4151 return 0;
4152
4153 if (type
4154 && TREE_CODE (type) != INTEGER_TYPE
4155 && TREE_CODE (type) != REAL_TYPE)
4156 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4157 else
4158 return words > 1;
4159}
4160
c65ebc55
JW
4161/* Return rtx for register where argument is passed, or zero if it is passed
4162 on the stack. */
c65ebc55
JW
4163/* ??? 128-bit quad-precision floats are always passed in general
4164 registers. */
4165
4166rtx
9c808aad
AJ
4167ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4168 int named, int incoming)
c65ebc55
JW
4169{
4170 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
f57fc998
ZW
4171 int words = ia64_function_arg_words (type, mode);
4172 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4173 enum machine_mode hfa_mode = VOIDmode;
4174
f2972bf8
DR
4175 /* For OPEN VMS, emit the instruction setting up the argument register here,
4176 when we know this will be together with the other arguments setup related
4177 insns. This is not the conceptually best place to do this, but this is
4178 the easiest as we have convenient access to cumulative args info. */
4179
4180 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4181 && named == 1)
4182 {
4183 unsigned HOST_WIDE_INT regval = cum->words;
4184 int i;
4185
4186 for (i = 0; i < 8; i++)
4187 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4188
4189 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4190 GEN_INT (regval));
4191 }
4192
c65ebc55
JW
4193 /* If all argument slots are used, then it must go on the stack. */
4194 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4195 return 0;
4196
4197 /* Check for and handle homogeneous FP aggregates. */
4198 if (type)
4199 hfa_mode = hfa_element_mode (type, 0);
4200
4201 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4202 and unprototyped hfas are passed specially. */
4203 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4204 {
4205 rtx loc[16];
4206 int i = 0;
4207 int fp_regs = cum->fp_regs;
4208 int int_regs = cum->words + offset;
4209 int hfa_size = GET_MODE_SIZE (hfa_mode);
4210 int byte_size;
4211 int args_byte_size;
4212
4213 /* If prototyped, pass it in FR regs then GR regs.
4214 If not prototyped, pass it in both FR and GR regs.
4215
4216 If this is an SFmode aggregate, then it is possible to run out of
4217 FR regs while GR regs are still left. In that case, we pass the
4218 remaining part in the GR regs. */
4219
4220 /* Fill the FP regs. We do this always. We stop if we reach the end
4221 of the argument, the last FP register, or the last argument slot. */
4222
4223 byte_size = ((mode == BLKmode)
4224 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4225 args_byte_size = int_regs * UNITS_PER_WORD;
4226 offset = 0;
4227 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4228 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4229 {
4230 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4231 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4232 + fp_regs)),
4233 GEN_INT (offset));
c65ebc55
JW
4234 offset += hfa_size;
4235 args_byte_size += hfa_size;
4236 fp_regs++;
4237 }
4238
4239 /* If no prototype, then the whole thing must go in GR regs. */
4240 if (! cum->prototype)
4241 offset = 0;
4242 /* If this is an SFmode aggregate, then we might have some left over
4243 that needs to go in GR regs. */
4244 else if (byte_size != offset)
4245 int_regs += offset / UNITS_PER_WORD;
4246
4247 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4248
4249 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4250 {
4251 enum machine_mode gr_mode = DImode;
826b47cc 4252 unsigned int gr_size;
c65ebc55
JW
4253
4254 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4255 then this goes in a GR reg left adjusted/little endian, right
4256 adjusted/big endian. */
4257 /* ??? Currently this is handled wrong, because 4-byte hunks are
4258 always right adjusted/little endian. */
4259 if (offset & 0x4)
4260 gr_mode = SImode;
4261 /* If we have an even 4 byte hunk because the aggregate is a
4262 multiple of 4 bytes in size, then this goes in a GR reg right
4263 adjusted/little endian. */
4264 else if (byte_size - offset == 4)
4265 gr_mode = SImode;
4266
4267 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4268 gen_rtx_REG (gr_mode, (basereg
4269 + int_regs)),
4270 GEN_INT (offset));
826b47cc
ZW
4271
4272 gr_size = GET_MODE_SIZE (gr_mode);
4273 offset += gr_size;
4274 if (gr_size == UNITS_PER_WORD
4275 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4276 int_regs++;
4277 else if (gr_size > UNITS_PER_WORD)
4278 int_regs += gr_size / UNITS_PER_WORD;
c65ebc55 4279 }
9dec91d4 4280 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4281 }
f2972bf8
DR
4282
4283 /* On OpenVMS variable argument is either in Rn or Fn. */
4284 else if (TARGET_ABI_OPEN_VMS && named == 0)
4285 {
4286 if (FLOAT_MODE_P (mode))
4287 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4288 else
4289 return gen_rtx_REG (mode, basereg + cum->words);
4290 }
c65ebc55
JW
4291
4292 /* Integral and aggregates go in general registers. If we have run out of
4293 FR registers, then FP values must also go in general registers. This can
4294 happen when we have a SFmode HFA. */
02befdf4
ZW
4295 else if (mode == TFmode || mode == TCmode
4296 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
4297 {
4298 int byte_size = ((mode == BLKmode)
4299 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4300 if (BYTES_BIG_ENDIAN
4301 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4302 && byte_size < UNITS_PER_WORD
4303 && byte_size > 0)
4304 {
4305 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4306 gen_rtx_REG (DImode,
4307 (basereg + cum->words
4308 + offset)),
4309 const0_rtx);
4310 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4311 }
4312 else
4313 return gen_rtx_REG (mode, basereg + cum->words + offset);
4314
4315 }
c65ebc55
JW
4316
4317 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4318 named, and in a GR register when unnamed. */
c65ebc55
JW
4319 else if (cum->prototype)
4320 {
f9c887ac 4321 if (named)
c65ebc55 4322 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
f9c887ac
ZW
4323 /* In big-endian mode, an anonymous SFmode value must be represented
4324 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4325 the value into the high half of the general register. */
4326 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4327 return gen_rtx_PARALLEL (mode,
4328 gen_rtvec (1,
4329 gen_rtx_EXPR_LIST (VOIDmode,
4330 gen_rtx_REG (DImode, basereg + cum->words + offset),
4331 const0_rtx)));
4332 else
4333 return gen_rtx_REG (mode, basereg + cum->words + offset);
c65ebc55
JW
4334 }
4335 /* If there is no prototype, then FP values go in both FR and GR
4336 registers. */
4337 else
4338 {
f9c887ac
ZW
4339 /* See comment above. */
4340 enum machine_mode inner_mode =
4341 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4342
c65ebc55
JW
4343 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4344 gen_rtx_REG (mode, (FR_ARG_FIRST
4345 + cum->fp_regs)),
4346 const0_rtx);
4347 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
f9c887ac 4348 gen_rtx_REG (inner_mode,
c65ebc55
JW
4349 (basereg + cum->words
4350 + offset)),
4351 const0_rtx);
809d4ef1 4352
c65ebc55
JW
4353 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4354 }
4355}
4356
78a52f11 4357/* Return number of bytes, at the beginning of the argument, that must be
c65ebc55
JW
4358 put in registers. 0 is the argument is entirely in registers or entirely
4359 in memory. */
4360
78a52f11
RH
4361static int
4362ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4363 tree type, bool named ATTRIBUTE_UNUSED)
c65ebc55 4364{
f57fc998
ZW
4365 int words = ia64_function_arg_words (type, mode);
4366 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4367
4368 /* If all argument slots are used, then it must go on the stack. */
4369 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4370 return 0;
4371
4372 /* It doesn't matter whether the argument goes in FR or GR regs. If
4373 it fits within the 8 argument slots, then it goes entirely in
4374 registers. If it extends past the last argument slot, then the rest
4375 goes on the stack. */
4376
4377 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4378 return 0;
4379
78a52f11 4380 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
c65ebc55
JW
4381}
4382
f2972bf8
DR
4383/* Return ivms_arg_type based on machine_mode. */
4384
4385static enum ivms_arg_type
4386ia64_arg_type (enum machine_mode mode)
4387{
4388 switch (mode)
4389 {
4390 case SFmode:
4391 return FS;
4392 case DFmode:
4393 return FT;
4394 default:
4395 return I64;
4396 }
4397}
4398
c65ebc55
JW
4399/* Update CUM to point after this argument. This is patterned after
4400 ia64_function_arg. */
4401
4402void
9c808aad
AJ
4403ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4404 tree type, int named)
c65ebc55 4405{
f57fc998
ZW
4406 int words = ia64_function_arg_words (type, mode);
4407 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4408 enum machine_mode hfa_mode = VOIDmode;
4409
4410 /* If all arg slots are already full, then there is nothing to do. */
4411 if (cum->words >= MAX_ARGUMENT_SLOTS)
f2972bf8
DR
4412 {
4413 cum->words += words + offset;
4414 return;
4415 }
c65ebc55 4416
f2972bf8 4417 cum->atypes[cum->words] = ia64_arg_type (mode);
c65ebc55
JW
4418 cum->words += words + offset;
4419
4420 /* Check for and handle homogeneous FP aggregates. */
4421 if (type)
4422 hfa_mode = hfa_element_mode (type, 0);
4423
4424 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4425 and unprototyped hfas are passed specially. */
4426 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4427 {
4428 int fp_regs = cum->fp_regs;
4429 /* This is the original value of cum->words + offset. */
4430 int int_regs = cum->words - words;
4431 int hfa_size = GET_MODE_SIZE (hfa_mode);
4432 int byte_size;
4433 int args_byte_size;
4434
4435 /* If prototyped, pass it in FR regs then GR regs.
4436 If not prototyped, pass it in both FR and GR regs.
4437
4438 If this is an SFmode aggregate, then it is possible to run out of
4439 FR regs while GR regs are still left. In that case, we pass the
4440 remaining part in the GR regs. */
4441
4442 /* Fill the FP regs. We do this always. We stop if we reach the end
4443 of the argument, the last FP register, or the last argument slot. */
4444
4445 byte_size = ((mode == BLKmode)
4446 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4447 args_byte_size = int_regs * UNITS_PER_WORD;
4448 offset = 0;
4449 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4450 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4451 {
c65ebc55
JW
4452 offset += hfa_size;
4453 args_byte_size += hfa_size;
4454 fp_regs++;
4455 }
4456
4457 cum->fp_regs = fp_regs;
4458 }
4459
f2972bf8
DR
4460 /* On OpenVMS variable argument is either in Rn or Fn. */
4461 else if (TARGET_ABI_OPEN_VMS && named == 0)
4462 {
4463 cum->int_regs = cum->words;
4464 cum->fp_regs = cum->words;
4465 }
4466
d13256a3
SE
4467 /* Integral and aggregates go in general registers. So do TFmode FP values.
4468 If we have run out of FR registers, then other FP values must also go in
4469 general registers. This can happen when we have a SFmode HFA. */
4470 else if (mode == TFmode || mode == TCmode
4471 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
648fe28b 4472 cum->int_regs = cum->words;
c65ebc55
JW
4473
4474 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4475 named, and in a GR register when unnamed. */
c65ebc55
JW
4476 else if (cum->prototype)
4477 {
4478 if (! named)
648fe28b 4479 cum->int_regs = cum->words;
c65ebc55
JW
4480 else
4481 /* ??? Complex types should not reach here. */
4482 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4483 }
4484 /* If there is no prototype, then FP values go in both FR and GR
4485 registers. */
4486 else
9c808aad 4487 {
648fe28b
RH
4488 /* ??? Complex types should not reach here. */
4489 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4490 cum->int_regs = cum->words;
4491 }
c65ebc55 4492}
51dcde6f 4493
d13256a3 4494/* Arguments with alignment larger than 8 bytes start at the next even
93348822 4495 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
d13256a3
SE
4496 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4497
4498int
4499ia64_function_arg_boundary (enum machine_mode mode, tree type)
4500{
4501
4502 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4503 return PARM_BOUNDARY * 2;
4504
4505 if (type)
4506 {
4507 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4508 return PARM_BOUNDARY * 2;
4509 else
4510 return PARM_BOUNDARY;
4511 }
4512
4513 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4514 return PARM_BOUNDARY * 2;
4515 else
4516 return PARM_BOUNDARY;
4517}
4518
599aedd9
RH
4519/* True if it is OK to do sibling call optimization for the specified
4520 call expression EXP. DECL will be the called function, or NULL if
4521 this is an indirect call. */
4522static bool
9c808aad 4523ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
599aedd9 4524{
097f3d48
JW
4525 /* We can't perform a sibcall if the current function has the syscall_linkage
4526 attribute. */
4527 if (lookup_attribute ("syscall_linkage",
4528 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4529 return false;
4530
b23ba0b8 4531 /* We must always return with our current GP. This means we can
c208436c
SE
4532 only sibcall to functions defined in the current module unless
4533 TARGET_CONST_GP is set to true. */
4534 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
599aedd9 4535}
c65ebc55 4536\f
c65ebc55
JW
4537
4538/* Implement va_arg. */
4539
23a60a04 4540static tree
726a989a
RB
4541ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4542 gimple_seq *post_p)
cd3ce9b4 4543{
cd3ce9b4 4544 /* Variable sized types are passed by reference. */
08b0dc1b 4545 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
cd3ce9b4 4546 {
23a60a04
JM
4547 tree ptrtype = build_pointer_type (type);
4548 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
c2433d7d 4549 return build_va_arg_indirect_ref (addr);
cd3ce9b4
JM
4550 }
4551
4552 /* Aggregate arguments with alignment larger than 8 bytes start at
4553 the next even boundary. Integer and floating point arguments
4554 do so if they are larger than 8 bytes, whether or not they are
4555 also aligned larger than 8 bytes. */
4556 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4557 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4558 {
5be014d5
AP
4559 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4560 size_int (2 * UNITS_PER_WORD - 1));
4561 t = fold_convert (sizetype, t);
47a25a46 4562 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5be014d5
AP
4563 size_int (-2 * UNITS_PER_WORD));
4564 t = fold_convert (TREE_TYPE (valist), t);
726a989a 4565 gimplify_assign (unshare_expr (valist), t, pre_p);
cd3ce9b4
JM
4566 }
4567
23a60a04 4568 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4 4569}
c65ebc55
JW
4570\f
4571/* Return 1 if function return value returned in memory. Return 0 if it is
4572 in a register. */
4573
351a758b 4574static bool
586de218 4575ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
c65ebc55
JW
4576{
4577 enum machine_mode mode;
4578 enum machine_mode hfa_mode;
487b97e0 4579 HOST_WIDE_INT byte_size;
c65ebc55
JW
4580
4581 mode = TYPE_MODE (valtype);
487b97e0
RH
4582 byte_size = GET_MODE_SIZE (mode);
4583 if (mode == BLKmode)
4584 {
4585 byte_size = int_size_in_bytes (valtype);
4586 if (byte_size < 0)
351a758b 4587 return true;
487b97e0 4588 }
c65ebc55
JW
4589
4590 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4591
4592 hfa_mode = hfa_element_mode (valtype, 0);
4593 if (hfa_mode != VOIDmode)
4594 {
4595 int hfa_size = GET_MODE_SIZE (hfa_mode);
4596
c65ebc55 4597 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
351a758b 4598 return true;
c65ebc55 4599 else
351a758b 4600 return false;
c65ebc55 4601 }
c65ebc55 4602 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
351a758b 4603 return true;
c65ebc55 4604 else
351a758b 4605 return false;
c65ebc55
JW
4606}
4607
4608/* Return rtx for register that holds the function return value. */
4609
4610rtx
f2972bf8 4611ia64_function_value (const_tree valtype, const_tree func)
c65ebc55
JW
4612{
4613 enum machine_mode mode;
4614 enum machine_mode hfa_mode;
f2972bf8 4615 int unsignedp;
c65ebc55
JW
4616
4617 mode = TYPE_MODE (valtype);
4618 hfa_mode = hfa_element_mode (valtype, 0);
4619
4620 if (hfa_mode != VOIDmode)
4621 {
4622 rtx loc[8];
4623 int i;
4624 int hfa_size;
4625 int byte_size;
4626 int offset;
4627
4628 hfa_size = GET_MODE_SIZE (hfa_mode);
4629 byte_size = ((mode == BLKmode)
4630 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4631 offset = 0;
4632 for (i = 0; offset < byte_size; i++)
4633 {
4634 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4635 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4636 GEN_INT (offset));
c65ebc55
JW
4637 offset += hfa_size;
4638 }
9dec91d4 4639 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4640 }
f57fc998 4641 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
c65ebc55
JW
4642 return gen_rtx_REG (mode, FR_ARG_FIRST);
4643 else
3870df96 4644 {
8c5cacfd
RH
4645 bool need_parallel = false;
4646
4647 /* In big-endian mode, we need to manage the layout of aggregates
4648 in the registers so that we get the bits properly aligned in
4649 the highpart of the registers. */
3870df96
SE
4650 if (BYTES_BIG_ENDIAN
4651 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
8c5cacfd
RH
4652 need_parallel = true;
4653
4654 /* Something like struct S { long double x; char a[0] } is not an
4655 HFA structure, and therefore doesn't go in fp registers. But
4656 the middle-end will give it XFmode anyway, and XFmode values
4657 don't normally fit in integer registers. So we need to smuggle
4658 the value inside a parallel. */
4de67c26 4659 else if (mode == XFmode || mode == XCmode || mode == RFmode)
8c5cacfd
RH
4660 need_parallel = true;
4661
4662 if (need_parallel)
3870df96
SE
4663 {
4664 rtx loc[8];
4665 int offset;
4666 int bytesize;
4667 int i;
4668
4669 offset = 0;
4670 bytesize = int_size_in_bytes (valtype);
543144ed
JM
4671 /* An empty PARALLEL is invalid here, but the return value
4672 doesn't matter for empty structs. */
4673 if (bytesize == 0)
4674 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96
SE
4675 for (i = 0; offset < bytesize; i++)
4676 {
4677 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4678 gen_rtx_REG (DImode,
4679 GR_RET_FIRST + i),
4680 GEN_INT (offset));
4681 offset += UNITS_PER_WORD;
4682 }
4683 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4684 }
8c5cacfd 4685
f2972bf8
DR
4686 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4687 func ? TREE_TYPE (func) : NULL_TREE,
4688 true);
4689
8c5cacfd 4690 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96 4691 }
c65ebc55
JW
4692}
4693
fdbe66f2 4694/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6b2300b3
JJ
4695 We need to emit DTP-relative relocations. */
4696
fdbe66f2 4697static void
9c808aad 4698ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
6b2300b3 4699{
6f3113ed
SE
4700 gcc_assert (size == 4 || size == 8);
4701 if (size == 4)
4702 fputs ("\tdata4.ua\t@dtprel(", file);
4703 else
4704 fputs ("\tdata8.ua\t@dtprel(", file);
6b2300b3
JJ
4705 output_addr_const (file, x);
4706 fputs (")", file);
4707}
4708
c65ebc55
JW
4709/* Print a memory address as an operand to reference that memory location. */
4710
4711/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4712 also call this from ia64_print_operand for memory addresses. */
4713
4714void
9c808aad
AJ
4715ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4716 rtx address ATTRIBUTE_UNUSED)
c65ebc55
JW
4717{
4718}
4719
3569057d 4720/* Print an operand to an assembler instruction.
c65ebc55
JW
4721 C Swap and print a comparison operator.
4722 D Print an FP comparison operator.
4723 E Print 32 - constant, for SImode shifts as extract.
66db6b45 4724 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
4725 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4726 a floating point register emitted normally.
735b94a7 4727 G A floating point constant.
c65ebc55 4728 I Invert a predicate register by adding 1.
e5bde68a 4729 J Select the proper predicate register for a condition.
6b6c1201 4730 j Select the inverse predicate register for a condition.
c65ebc55
JW
4731 O Append .acq for volatile load.
4732 P Postincrement of a MEM.
4733 Q Append .rel for volatile store.
4883241c 4734 R Print .s .d or nothing for a single, double or no truncation.
c65ebc55
JW
4735 S Shift amount for shladd instruction.
4736 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4737 for Intel assembler.
4738 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4739 for Intel assembler.
a71aef0b 4740 X A pair of floating point registers.
c65ebc55 4741 r Print register name, or constant 0 as r0. HP compatibility for
f61134e8
RH
4742 Linux kernel.
4743 v Print vector constant value as an 8-byte integer value. */
4744
c65ebc55 4745void
9c808aad 4746ia64_print_operand (FILE * file, rtx x, int code)
c65ebc55 4747{
e57b9d65
RH
4748 const char *str;
4749
c65ebc55
JW
4750 switch (code)
4751 {
c65ebc55
JW
4752 case 0:
4753 /* Handled below. */
4754 break;
809d4ef1 4755
c65ebc55
JW
4756 case 'C':
4757 {
4758 enum rtx_code c = swap_condition (GET_CODE (x));
4759 fputs (GET_RTX_NAME (c), file);
4760 return;
4761 }
4762
4763 case 'D':
e57b9d65
RH
4764 switch (GET_CODE (x))
4765 {
4766 case NE:
4767 str = "neq";
4768 break;
4769 case UNORDERED:
4770 str = "unord";
4771 break;
4772 case ORDERED:
4773 str = "ord";
4774 break;
86ad1da0
SE
4775 case UNLT:
4776 str = "nge";
4777 break;
4778 case UNLE:
4779 str = "ngt";
4780 break;
4781 case UNGT:
4782 str = "nle";
4783 break;
4784 case UNGE:
4785 str = "nlt";
4786 break;
e57b9d65
RH
4787 default:
4788 str = GET_RTX_NAME (GET_CODE (x));
4789 break;
4790 }
4791 fputs (str, file);
c65ebc55
JW
4792 return;
4793
4794 case 'E':
4795 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4796 return;
4797
66db6b45
RH
4798 case 'e':
4799 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4800 return;
4801
c65ebc55
JW
4802 case 'F':
4803 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 4804 str = reg_names [FR_REG (0)];
c65ebc55 4805 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 4806 str = reg_names [FR_REG (1)];
c65ebc55 4807 else
e820471b
NS
4808 {
4809 gcc_assert (GET_CODE (x) == REG);
4810 str = reg_names [REGNO (x)];
4811 }
e57b9d65 4812 fputs (str, file);
c65ebc55
JW
4813 return;
4814
735b94a7
SE
4815 case 'G':
4816 {
4817 long val[4];
4818 REAL_VALUE_TYPE rv;
4819 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4820 real_to_target (val, &rv, GET_MODE (x));
4821 if (GET_MODE (x) == SFmode)
4822 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4823 else if (GET_MODE (x) == DFmode)
4824 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4825 & 0xffffffff,
4826 (WORDS_BIG_ENDIAN ? val[1] : val[0])
4827 & 0xffffffff);
4828 else
4829 output_operand_lossage ("invalid %%G mode");
4830 }
4831 return;
4832
c65ebc55
JW
4833 case 'I':
4834 fputs (reg_names [REGNO (x) + 1], file);
4835 return;
4836
e5bde68a 4837 case 'J':
6b6c1201
RH
4838 case 'j':
4839 {
4840 unsigned int regno = REGNO (XEXP (x, 0));
4841 if (GET_CODE (x) == EQ)
4842 regno += 1;
4843 if (code == 'j')
4844 regno ^= 1;
4845 fputs (reg_names [regno], file);
4846 }
e5bde68a
RH
4847 return;
4848
c65ebc55
JW
4849 case 'O':
4850 if (MEM_VOLATILE_P (x))
4851 fputs(".acq", file);
4852 return;
4853
4854 case 'P':
4855 {
4b983fdc 4856 HOST_WIDE_INT value;
c65ebc55 4857
4b983fdc
RH
4858 switch (GET_CODE (XEXP (x, 0)))
4859 {
4860 default:
4861 return;
4862
4863 case POST_MODIFY:
4864 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4865 if (GET_CODE (x) == CONST_INT)
08012cda 4866 value = INTVAL (x);
e820471b 4867 else
4b983fdc 4868 {
e820471b 4869 gcc_assert (GET_CODE (x) == REG);
08012cda 4870 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
4871 return;
4872 }
4b983fdc 4873 break;
c65ebc55 4874
4b983fdc
RH
4875 case POST_INC:
4876 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 4877 break;
c65ebc55 4878
4b983fdc 4879 case POST_DEC:
08012cda 4880 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
4881 break;
4882 }
809d4ef1 4883
4a0a75dd 4884 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
4885 return;
4886 }
4887
4888 case 'Q':
4889 if (MEM_VOLATILE_P (x))
4890 fputs(".rel", file);
4891 return;
4892
4883241c
SE
4893 case 'R':
4894 if (x == CONST0_RTX (GET_MODE (x)))
4895 fputs(".s", file);
4896 else if (x == CONST1_RTX (GET_MODE (x)))
4897 fputs(".d", file);
4898 else if (x == CONST2_RTX (GET_MODE (x)))
4899 ;
4900 else
4901 output_operand_lossage ("invalid %%R value");
4902 return;
4903
c65ebc55 4904 case 'S':
809d4ef1 4905 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
4906 return;
4907
4908 case 'T':
4909 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4910 {
809d4ef1 4911 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
4912 return;
4913 }
4914 break;
4915
4916 case 'U':
4917 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4918 {
3b572406 4919 const char *prefix = "0x";
c65ebc55
JW
4920 if (INTVAL (x) & 0x80000000)
4921 {
4922 fprintf (file, "0xffffffff");
4923 prefix = "";
4924 }
809d4ef1 4925 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
4926 return;
4927 }
4928 break;
809d4ef1 4929
a71aef0b
JB
4930 case 'X':
4931 {
4932 unsigned int regno = REGNO (x);
4933 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4934 }
4935 return;
4936
c65ebc55 4937 case 'r':
18a3c539
JW
4938 /* If this operand is the constant zero, write it as register zero.
4939 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
4940 if (GET_CODE (x) == REG)
4941 fputs (reg_names[REGNO (x)], file);
4942 else if (x == CONST0_RTX (GET_MODE (x)))
4943 fputs ("r0", file);
18a3c539
JW
4944 else if (GET_CODE (x) == CONST_INT)
4945 output_addr_const (file, x);
c65ebc55
JW
4946 else
4947 output_operand_lossage ("invalid %%r value");
4948 return;
4949
f61134e8
RH
4950 case 'v':
4951 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4952 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4953 break;
4954
85548039
RH
4955 case '+':
4956 {
4957 const char *which;
9c808aad 4958
85548039
RH
4959 /* For conditional branches, returns or calls, substitute
4960 sptk, dptk, dpnt, or spnt for %s. */
4961 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4962 if (x)
4963 {
4964 int pred_val = INTVAL (XEXP (x, 0));
4965
4966 /* Guess top and bottom 10% statically predicted. */
2c9e13f3
JH
4967 if (pred_val < REG_BR_PROB_BASE / 50
4968 && br_prob_note_reliable_p (x))
85548039
RH
4969 which = ".spnt";
4970 else if (pred_val < REG_BR_PROB_BASE / 2)
4971 which = ".dpnt";
2c9e13f3
JH
4972 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
4973 || !br_prob_note_reliable_p (x))
85548039
RH
4974 which = ".dptk";
4975 else
4976 which = ".sptk";
4977 }
4978 else if (GET_CODE (current_output_insn) == CALL_INSN)
4979 which = ".sptk";
4980 else
4981 which = ".dptk";
4982
4983 fputs (which, file);
4984 return;
4985 }
4986
6f8aa100
RH
4987 case ',':
4988 x = current_insn_predicate;
4989 if (x)
4990 {
4991 unsigned int regno = REGNO (XEXP (x, 0));
4992 if (GET_CODE (x) == EQ)
4993 regno += 1;
6f8aa100
RH
4994 fprintf (file, "(%s) ", reg_names [regno]);
4995 }
4996 return;
4997
c65ebc55
JW
4998 default:
4999 output_operand_lossage ("ia64_print_operand: unknown code");
5000 return;
5001 }
5002
5003 switch (GET_CODE (x))
5004 {
5005 /* This happens for the spill/restore instructions. */
5006 case POST_INC:
4b983fdc
RH
5007 case POST_DEC:
5008 case POST_MODIFY:
c65ebc55 5009 x = XEXP (x, 0);
ed168e45 5010 /* ... fall through ... */
c65ebc55
JW
5011
5012 case REG:
5013 fputs (reg_names [REGNO (x)], file);
5014 break;
5015
5016 case MEM:
5017 {
5018 rtx addr = XEXP (x, 0);
ec8e098d 5019 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
c65ebc55
JW
5020 addr = XEXP (addr, 0);
5021 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5022 break;
5023 }
809d4ef1 5024
c65ebc55
JW
5025 default:
5026 output_addr_const (file, x);
5027 break;
5028 }
5029
5030 return;
5031}
c65ebc55 5032\f
3c50106f
RH
5033/* Compute a (partial) cost for rtx X. Return true if the complete
5034 cost has been computed, and false if subexpressions should be
5035 scanned. In either case, *TOTAL contains the cost result. */
5036/* ??? This is incomplete. */
5037
5038static bool
f40751dd
JH
5039ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5040 bool speed ATTRIBUTE_UNUSED)
3c50106f
RH
5041{
5042 switch (code)
5043 {
5044 case CONST_INT:
5045 switch (outer_code)
5046 {
5047 case SET:
13f70342 5048 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
3c50106f
RH
5049 return true;
5050 case PLUS:
13f70342 5051 if (satisfies_constraint_I (x))
3c50106f 5052 *total = 0;
13f70342 5053 else if (satisfies_constraint_J (x))
3c50106f
RH
5054 *total = 1;
5055 else
5056 *total = COSTS_N_INSNS (1);
5057 return true;
5058 default:
13f70342 5059 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
3c50106f
RH
5060 *total = 0;
5061 else
5062 *total = COSTS_N_INSNS (1);
5063 return true;
5064 }
5065
5066 case CONST_DOUBLE:
5067 *total = COSTS_N_INSNS (1);
5068 return true;
5069
5070 case CONST:
5071 case SYMBOL_REF:
5072 case LABEL_REF:
5073 *total = COSTS_N_INSNS (3);
5074 return true;
5075
5076 case MULT:
5077 /* For multiplies wider than HImode, we have to go to the FPU,
5078 which normally involves copies. Plus there's the latency
5079 of the multiply itself, and the latency of the instructions to
5080 transfer integer regs to FP regs. */
5081 /* ??? Check for FP mode. */
5082 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5083 *total = COSTS_N_INSNS (10);
5084 else
5085 *total = COSTS_N_INSNS (2);
5086 return true;
5087
5088 case PLUS:
5089 case MINUS:
5090 case ASHIFT:
5091 case ASHIFTRT:
5092 case LSHIFTRT:
5093 *total = COSTS_N_INSNS (1);
5094 return true;
5095
5096 case DIV:
5097 case UDIV:
5098 case MOD:
5099 case UMOD:
5100 /* We make divide expensive, so that divide-by-constant will be
5101 optimized to a multiply. */
5102 *total = COSTS_N_INSNS (60);
5103 return true;
5104
5105 default:
5106 return false;
5107 }
5108}
5109
9e4f94de 5110/* Calculate the cost of moving data from a register in class FROM to
7109d286 5111 one in class TO, using MODE. */
5527bf14
RH
5112
5113int
9c808aad
AJ
5114ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
5115 enum reg_class to)
5527bf14 5116{
7109d286
RH
5117 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5118 if (to == ADDL_REGS)
5119 to = GR_REGS;
5120 if (from == ADDL_REGS)
5121 from = GR_REGS;
5122
5123 /* All costs are symmetric, so reduce cases by putting the
5124 lower number class as the destination. */
5125 if (from < to)
5126 {
5127 enum reg_class tmp = to;
5128 to = from, from = tmp;
5129 }
5130
02befdf4 5131 /* Moving from FR<->GR in XFmode must be more expensive than 2,
7109d286
RH
5132 so that we get secondary memory reloads. Between FR_REGS,
5133 we have to make this at least as expensive as MEMORY_MOVE_COST
5134 to avoid spectacularly poor register class preferencing. */
4de67c26 5135 if (mode == XFmode || mode == RFmode)
7109d286
RH
5136 {
5137 if (to != GR_REGS || from != GR_REGS)
5138 return MEMORY_MOVE_COST (mode, to, 0);
5139 else
5140 return 3;
5141 }
5142
5143 switch (to)
5144 {
5145 case PR_REGS:
5146 /* Moving between PR registers takes two insns. */
5147 if (from == PR_REGS)
5148 return 3;
5149 /* Moving between PR and anything but GR is impossible. */
5150 if (from != GR_REGS)
5151 return MEMORY_MOVE_COST (mode, to, 0);
5152 break;
5153
5154 case BR_REGS:
5155 /* Moving between BR and anything but GR is impossible. */
5156 if (from != GR_REGS && from != GR_AND_BR_REGS)
5157 return MEMORY_MOVE_COST (mode, to, 0);
5158 break;
5159
5160 case AR_I_REGS:
5161 case AR_M_REGS:
5162 /* Moving between AR and anything but GR is impossible. */
5163 if (from != GR_REGS)
5164 return MEMORY_MOVE_COST (mode, to, 0);
5165 break;
5166
5167 case GR_REGS:
5168 case FR_REGS:
a71aef0b 5169 case FP_REGS:
7109d286
RH
5170 case GR_AND_FR_REGS:
5171 case GR_AND_BR_REGS:
5172 case ALL_REGS:
5173 break;
5174
5175 default:
e820471b 5176 gcc_unreachable ();
7109d286 5177 }
3f622353 5178
5527bf14
RH
5179 return 2;
5180}
c65ebc55 5181
0a2aaacc 5182/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
f61134e8
RH
5183 to use when copying X into that class. */
5184
5185enum reg_class
0a2aaacc 5186ia64_preferred_reload_class (rtx x, enum reg_class rclass)
f61134e8 5187{
0a2aaacc 5188 switch (rclass)
f61134e8
RH
5189 {
5190 case FR_REGS:
a71aef0b 5191 case FP_REGS:
f61134e8
RH
5192 /* Don't allow volatile mem reloads into floating point registers.
5193 This is defined to force reload to choose the r/m case instead
5194 of the f/f case when reloading (set (reg fX) (mem/v)). */
5195 if (MEM_P (x) && MEM_VOLATILE_P (x))
5196 return NO_REGS;
5197
5198 /* Force all unrecognized constants into the constant pool. */
5199 if (CONSTANT_P (x))
5200 return NO_REGS;
5201 break;
5202
5203 case AR_M_REGS:
5204 case AR_I_REGS:
5205 if (!OBJECT_P (x))
5206 return NO_REGS;
5207 break;
5208
5209 default:
5210 break;
5211 }
5212
0a2aaacc 5213 return rclass;
f61134e8
RH
5214}
5215
c65ebc55 5216/* This function returns the register class required for a secondary
0a2aaacc 5217 register when copying between one of the registers in RCLASS, and X,
c65ebc55
JW
5218 using MODE. A return value of NO_REGS means that no secondary register
5219 is required. */
5220
5221enum reg_class
0a2aaacc 5222ia64_secondary_reload_class (enum reg_class rclass,
9c808aad 5223 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
c65ebc55
JW
5224{
5225 int regno = -1;
5226
5227 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5228 regno = true_regnum (x);
5229
0a2aaacc 5230 switch (rclass)
97e242b0
RH
5231 {
5232 case BR_REGS:
7109d286
RH
5233 case AR_M_REGS:
5234 case AR_I_REGS:
5235 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5236 interaction. We end up with two pseudos with overlapping lifetimes
5237 both of which are equiv to the same constant, and both which need
5238 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5239 changes depending on the path length, which means the qty_first_reg
5240 check in make_regs_eqv can give different answers at different times.
5241 At some point I'll probably need a reload_indi pattern to handle
5242 this.
5243
5244 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5245 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5246 non-general registers for good measure. */
5247 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
5248 return GR_REGS;
5249
5250 /* This is needed if a pseudo used as a call_operand gets spilled to a
5251 stack slot. */
5252 if (GET_CODE (x) == MEM)
5253 return GR_REGS;
5254 break;
5255
5256 case FR_REGS:
a71aef0b 5257 case FP_REGS:
c51e6d85 5258 /* Need to go through general registers to get to other class regs. */
7109d286
RH
5259 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5260 return GR_REGS;
9c808aad 5261
97e242b0
RH
5262 /* This can happen when a paradoxical subreg is an operand to the
5263 muldi3 pattern. */
5264 /* ??? This shouldn't be necessary after instruction scheduling is
5265 enabled, because paradoxical subregs are not accepted by
5266 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5267 stop the paradoxical subreg stupidity in the *_operand functions
5268 in recog.c. */
5269 if (GET_CODE (x) == MEM
5270 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5271 || GET_MODE (x) == QImode))
5272 return GR_REGS;
5273
5274 /* This can happen because of the ior/and/etc patterns that accept FP
5275 registers as operands. If the third operand is a constant, then it
5276 needs to be reloaded into a FP register. */
5277 if (GET_CODE (x) == CONST_INT)
5278 return GR_REGS;
5279
5280 /* This can happen because of register elimination in a muldi3 insn.
5281 E.g. `26107 * (unsigned long)&u'. */
5282 if (GET_CODE (x) == PLUS)
5283 return GR_REGS;
5284 break;
5285
5286 case PR_REGS:
f2f90c63 5287 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
5288 and the function has a nonlocal goto. This is because global
5289 does not allocate call crossing pseudos to hard registers when
e3b5732b 5290 crtl->has_nonlocal_goto is true. This is relatively
97e242b0
RH
5291 common for C++ programs that use exceptions. To reproduce,
5292 return NO_REGS and compile libstdc++. */
5293 if (GET_CODE (x) == MEM)
5294 return GR_REGS;
f2f90c63
RH
5295
5296 /* This can happen when we take a BImode subreg of a DImode value,
5297 and that DImode value winds up in some non-GR register. */
5298 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5299 return GR_REGS;
97e242b0
RH
5300 break;
5301
5302 default:
5303 break;
5304 }
c65ebc55
JW
5305
5306 return NO_REGS;
5307}
5308
215b063c
PB
5309\f
5310/* Implement targetm.unspec_may_trap_p hook. */
5311static int
5312ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5313{
5314 if (GET_CODE (x) == UNSPEC)
5315 {
5316 switch (XINT (x, 1))
5317 {
5318 case UNSPEC_LDA:
5319 case UNSPEC_LDS:
5320 case UNSPEC_LDSA:
5321 case UNSPEC_LDCCLR:
5322 case UNSPEC_CHKACLR:
5323 case UNSPEC_CHKS:
5324 /* These unspecs are just wrappers. */
5325 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5326 }
5327 }
5328
5329 return default_unspec_may_trap_p (x, flags);
5330}
5331
c65ebc55
JW
5332\f
5333/* Parse the -mfixed-range= option string. */
5334
5335static void
9c808aad 5336fix_range (const char *const_str)
c65ebc55
JW
5337{
5338 int i, first, last;
3b572406 5339 char *str, *dash, *comma;
c65ebc55
JW
5340
5341 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5342 REG2 are either register names or register numbers. The effect
5343 of this option is to mark the registers in the range from REG1 to
5344 REG2 as ``fixed'' so they won't be used by the compiler. This is
5345 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5346
3b572406
RH
5347 i = strlen (const_str);
5348 str = (char *) alloca (i + 1);
5349 memcpy (str, const_str, i + 1);
5350
c65ebc55
JW
5351 while (1)
5352 {
5353 dash = strchr (str, '-');
5354 if (!dash)
5355 {
d4ee4d25 5356 warning (0, "value of -mfixed-range must have form REG1-REG2");
c65ebc55
JW
5357 return;
5358 }
5359 *dash = '\0';
5360
5361 comma = strchr (dash + 1, ',');
5362 if (comma)
5363 *comma = '\0';
5364
5365 first = decode_reg_name (str);
5366 if (first < 0)
5367 {
d4ee4d25 5368 warning (0, "unknown register name: %s", str);
c65ebc55
JW
5369 return;
5370 }
5371
5372 last = decode_reg_name (dash + 1);
5373 if (last < 0)
5374 {
d4ee4d25 5375 warning (0, "unknown register name: %s", dash + 1);
c65ebc55
JW
5376 return;
5377 }
5378
5379 *dash = '-';
5380
5381 if (first > last)
5382 {
d4ee4d25 5383 warning (0, "%s-%s is an empty range", str, dash + 1);
c65ebc55
JW
5384 return;
5385 }
5386
5387 for (i = first; i <= last; ++i)
5388 fixed_regs[i] = call_used_regs[i] = 1;
5389
5390 if (!comma)
5391 break;
5392
5393 *comma = ',';
5394 str = comma + 1;
5395 }
5396}
5397
dbdd120f
RH
5398/* Implement TARGET_HANDLE_OPTION. */
5399
5400static bool
55bea00a 5401ia64_handle_option (size_t code, const char *arg, int value)
37b15744 5402{
bbb6eae8
SB
5403 static bool warned_itanium1_deprecated;
5404
dbdd120f
RH
5405 switch (code)
5406 {
5407 case OPT_mfixed_range_:
5408 fix_range (arg);
5409 return true;
5410
5411 case OPT_mtls_size_:
55bea00a
RS
5412 if (value != 14 && value != 22 && value != 64)
5413 error ("bad value %<%s%> for -mtls-size= switch", arg);
5414 return true;
dbdd120f
RH
5415
5416 case OPT_mtune_:
5417 {
5418 static struct pta
5419 {
5420 const char *name; /* processor name or nickname. */
5421 enum processor_type processor;
5422 }
5423 const processor_alias_table[] =
5424 {
5425 {"itanium", PROCESSOR_ITANIUM},
5426 {"itanium1", PROCESSOR_ITANIUM},
5427 {"merced", PROCESSOR_ITANIUM},
5428 {"itanium2", PROCESSOR_ITANIUM2},
5429 {"mckinley", PROCESSOR_ITANIUM2},
5430 };
5431 int const pta_size = ARRAY_SIZE (processor_alias_table);
5432 int i;
5433
5434 for (i = 0; i < pta_size; i++)
5435 if (!strcmp (arg, processor_alias_table[i].name))
5436 {
5437 ia64_tune = processor_alias_table[i].processor;
bbb6eae8
SB
5438 if (ia64_tune == PROCESSOR_ITANIUM
5439 && ! warned_itanium1_deprecated)
5440 {
5441 inform (0,
5442 "value %<%s%> for -mtune= switch is deprecated",
5443 arg);
5444 inform (0, "GCC 4.4 is the last release with "
5445 "Itanium1 tuning support");
5446 warned_itanium1_deprecated = true;
5447 }
dbdd120f
RH
5448 break;
5449 }
5450 if (i == pta_size)
5451 error ("bad value %<%s%> for -mtune= switch", arg);
5452 return true;
5453 }
5454
5455 default:
5456 return true;
5457 }
37b15744 5458}
0c96007e 5459
bacf5b96 5460/* Implement OVERRIDE_OPTIONS. */
c65ebc55
JW
5461
5462void
9c808aad 5463ia64_override_options (void)
c65ebc55 5464{
59da9a7d
JW
5465 if (TARGET_AUTO_PIC)
5466 target_flags |= MASK_CONST_GP;
5467
faae4ae7
L
5468 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5469 flag_schedule_insns_after_reload = 0;
5470
388092d5
AB
5471 if (optimize >= 3
5472 && ! sel_sched_switch_set)
5473 {
5474 flag_selective_scheduling2 = 1;
5475 flag_sel_sched_pipelining = 1;
5476 }
5477 if (mflag_sched_control_spec == 2)
5478 {
5479 /* Control speculation is on by default for the selective scheduler,
5480 but not for the Haifa scheduler. */
5481 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5482 }
5483 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5484 {
5485 /* FIXME: remove this when we'd implement breaking autoinsns as
5486 a transformation. */
5487 flag_auto_inc_dec = 0;
5488 }
5489
c65ebc55
JW
5490 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5491
0c96007e 5492 init_machine_status = ia64_init_machine_status;
388092d5
AB
5493
5494 if (align_functions <= 0)
5495 align_functions = 64;
5496 if (align_loops <= 0)
5497 align_loops = 32;
f2972bf8
DR
5498
5499 if (TARGET_ABI_OPEN_VMS)
5500 flag_no_common = 1;
c65ebc55 5501}
dbdd120f 5502
6fb5fa3c
DB
5503/* Initialize the record of emitted frame related registers. */
5504
5505void ia64_init_expanders (void)
5506{
5507 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5508}
5509
dbdd120f
RH
5510static struct machine_function *
5511ia64_init_machine_status (void)
5512{
5ead67f6 5513 return GGC_CNEW (struct machine_function);
dbdd120f 5514}
c65ebc55 5515\f
9c808aad
AJ
5516static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5517static enum attr_type ia64_safe_type (rtx);
2130b7fb 5518
2130b7fb 5519static enum attr_itanium_class
9c808aad 5520ia64_safe_itanium_class (rtx insn)
2130b7fb
BS
5521{
5522 if (recog_memoized (insn) >= 0)
5523 return get_attr_itanium_class (insn);
b5b8b0ac
AO
5524 else if (DEBUG_INSN_P (insn))
5525 return ITANIUM_CLASS_IGNORE;
2130b7fb
BS
5526 else
5527 return ITANIUM_CLASS_UNKNOWN;
5528}
5529
5530static enum attr_type
9c808aad 5531ia64_safe_type (rtx insn)
2130b7fb
BS
5532{
5533 if (recog_memoized (insn) >= 0)
5534 return get_attr_type (insn);
5535 else
5536 return TYPE_UNKNOWN;
5537}
5538\f
c65ebc55
JW
5539/* The following collection of routines emit instruction group stop bits as
5540 necessary to avoid dependencies. */
5541
5542/* Need to track some additional registers as far as serialization is
5543 concerned so we can properly handle br.call and br.ret. We could
5544 make these registers visible to gcc, but since these registers are
5545 never explicitly used in gcc generated code, it seems wasteful to
5546 do so (plus it would make the call and return patterns needlessly
5547 complex). */
c65ebc55 5548#define REG_RP (BR_REG (0))
c65ebc55 5549#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
5550/* This is used for volatile asms which may require a stop bit immediately
5551 before and after them. */
5527bf14 5552#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
5553#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5554#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 5555
f2f90c63
RH
5556/* For each register, we keep track of how it has been written in the
5557 current instruction group.
5558
5559 If a register is written unconditionally (no qualifying predicate),
5560 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5561
5562 If a register is written if its qualifying predicate P is true, we
5563 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5564 may be written again by the complement of P (P^1) and when this happens,
5565 WRITE_COUNT gets set to 2.
5566
5567 The result of this is that whenever an insn attempts to write a register
e03f5d43 5568 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
5569
5570 If a predicate register is written by a floating-point insn, we set
5571 WRITTEN_BY_FP to true.
5572
5573 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5574 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5575
444a356a
JJ
5576#if GCC_VERSION >= 4000
5577#define RWS_FIELD_TYPE __extension__ unsigned short
5578#else
5579#define RWS_FIELD_TYPE unsigned int
5580#endif
c65ebc55
JW
5581struct reg_write_state
5582{
444a356a
JJ
5583 RWS_FIELD_TYPE write_count : 2;
5584 RWS_FIELD_TYPE first_pred : 10;
5585 RWS_FIELD_TYPE written_by_fp : 1;
5586 RWS_FIELD_TYPE written_by_and : 1;
5587 RWS_FIELD_TYPE written_by_or : 1;
c65ebc55
JW
5588};
5589
5590/* Cumulative info for the current instruction group. */
5591struct reg_write_state rws_sum[NUM_REGS];
444a356a
JJ
5592#ifdef ENABLE_CHECKING
5593/* Bitmap whether a register has been written in the current insn. */
5594HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5595 / HOST_BITS_PER_WIDEST_FAST_INT];
5596
5597static inline void
5598rws_insn_set (int regno)
5599{
5600 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5601 SET_HARD_REG_BIT (rws_insn, regno);
5602}
5603
5604static inline int
5605rws_insn_test (int regno)
5606{
5607 return TEST_HARD_REG_BIT (rws_insn, regno);
5608}
5609#else
5610/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5611unsigned char rws_insn[2];
5612
5613static inline void
5614rws_insn_set (int regno)
5615{
5616 if (regno == REG_AR_CFM)
5617 rws_insn[0] = 1;
5618 else if (regno == REG_VOLATILE)
5619 rws_insn[1] = 1;
5620}
5621
5622static inline int
5623rws_insn_test (int regno)
5624{
5625 if (regno == REG_AR_CFM)
5626 return rws_insn[0];
5627 if (regno == REG_VOLATILE)
5628 return rws_insn[1];
5629 return 0;
5630}
5631#endif
c65ebc55 5632
25250265 5633/* Indicates whether this is the first instruction after a stop bit,
e820471b
NS
5634 in which case we don't need another stop bit. Without this,
5635 ia64_variable_issue will die when scheduling an alloc. */
25250265
JW
5636static int first_instruction;
5637
c65ebc55
JW
5638/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5639 RTL for one instruction. */
5640struct reg_flags
5641{
5642 unsigned int is_write : 1; /* Is register being written? */
5643 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5644 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
5645 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5646 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 5647 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
5648};
5649
444a356a 5650static void rws_update (int, struct reg_flags, int);
9c808aad
AJ
5651static int rws_access_regno (int, struct reg_flags, int);
5652static int rws_access_reg (rtx, struct reg_flags, int);
c1bc6ca8
JW
5653static void update_set_flags (rtx, struct reg_flags *);
5654static int set_src_needs_barrier (rtx, struct reg_flags, int);
9c808aad
AJ
5655static int rtx_needs_barrier (rtx, struct reg_flags, int);
5656static void init_insn_group_barriers (void);
c1bc6ca8
JW
5657static int group_barrier_needed (rtx);
5658static int safe_group_barrier_needed (rtx);
444a356a 5659static int in_safe_group_barrier;
3b572406 5660
c65ebc55
JW
5661/* Update *RWS for REGNO, which is being written by the current instruction,
5662 with predicate PRED, and associated register flags in FLAGS. */
5663
5664static void
444a356a 5665rws_update (int regno, struct reg_flags flags, int pred)
c65ebc55 5666{
3e7c7805 5667 if (pred)
444a356a 5668 rws_sum[regno].write_count++;
3e7c7805 5669 else
444a356a
JJ
5670 rws_sum[regno].write_count = 2;
5671 rws_sum[regno].written_by_fp |= flags.is_fp;
f2f90c63 5672 /* ??? Not tracking and/or across differing predicates. */
444a356a
JJ
5673 rws_sum[regno].written_by_and = flags.is_and;
5674 rws_sum[regno].written_by_or = flags.is_or;
5675 rws_sum[regno].first_pred = pred;
c65ebc55
JW
5676}
5677
5678/* Handle an access to register REGNO of type FLAGS using predicate register
444a356a 5679 PRED. Update rws_sum array. Return 1 if this access creates
c65ebc55
JW
5680 a dependency with an earlier instruction in the same group. */
5681
5682static int
9c808aad 5683rws_access_regno (int regno, struct reg_flags flags, int pred)
c65ebc55
JW
5684{
5685 int need_barrier = 0;
c65ebc55 5686
e820471b 5687 gcc_assert (regno < NUM_REGS);
c65ebc55 5688
f2f90c63
RH
5689 if (! PR_REGNO_P (regno))
5690 flags.is_and = flags.is_or = 0;
5691
c65ebc55
JW
5692 if (flags.is_write)
5693 {
12c2c7aa
JW
5694 int write_count;
5695
444a356a 5696 rws_insn_set (regno);
12c2c7aa 5697 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
5698
5699 switch (write_count)
c65ebc55
JW
5700 {
5701 case 0:
5702 /* The register has not been written yet. */
444a356a
JJ
5703 if (!in_safe_group_barrier)
5704 rws_update (regno, flags, pred);
c65ebc55
JW
5705 break;
5706
5707 case 1:
5708 /* The register has been written via a predicate. If this is
5709 not a complementary predicate, then we need a barrier. */
5710 /* ??? This assumes that P and P+1 are always complementary
5711 predicates for P even. */
f2f90c63 5712 if (flags.is_and && rws_sum[regno].written_by_and)
9c808aad 5713 ;
f2f90c63
RH
5714 else if (flags.is_or && rws_sum[regno].written_by_or)
5715 ;
5716 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55 5717 need_barrier = 1;
444a356a
JJ
5718 if (!in_safe_group_barrier)
5719 rws_update (regno, flags, pred);
c65ebc55
JW
5720 break;
5721
5722 case 2:
5723 /* The register has been unconditionally written already. We
5724 need a barrier. */
f2f90c63
RH
5725 if (flags.is_and && rws_sum[regno].written_by_and)
5726 ;
5727 else if (flags.is_or && rws_sum[regno].written_by_or)
5728 ;
5729 else
5730 need_barrier = 1;
444a356a
JJ
5731 if (!in_safe_group_barrier)
5732 {
5733 rws_sum[regno].written_by_and = flags.is_and;
5734 rws_sum[regno].written_by_or = flags.is_or;
5735 }
c65ebc55
JW
5736 break;
5737
5738 default:
e820471b 5739 gcc_unreachable ();
c65ebc55
JW
5740 }
5741 }
5742 else
5743 {
5744 if (flags.is_branch)
5745 {
5746 /* Branches have several RAW exceptions that allow to avoid
5747 barriers. */
5748
5527bf14 5749 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
5750 /* RAW dependencies on branch regs are permissible as long
5751 as the writer is a non-branch instruction. Since we
5752 never generate code that uses a branch register written
5753 by a branch instruction, handling this case is
5754 easy. */
5527bf14 5755 return 0;
c65ebc55
JW
5756
5757 if (REGNO_REG_CLASS (regno) == PR_REGS
5758 && ! rws_sum[regno].written_by_fp)
5759 /* The predicates of a branch are available within the
5760 same insn group as long as the predicate was written by
ed168e45 5761 something other than a floating-point instruction. */
c65ebc55
JW
5762 return 0;
5763 }
5764
f2f90c63
RH
5765 if (flags.is_and && rws_sum[regno].written_by_and)
5766 return 0;
5767 if (flags.is_or && rws_sum[regno].written_by_or)
5768 return 0;
5769
c65ebc55
JW
5770 switch (rws_sum[regno].write_count)
5771 {
5772 case 0:
5773 /* The register has not been written yet. */
5774 break;
5775
5776 case 1:
5777 /* The register has been written via a predicate. If this is
5778 not a complementary predicate, then we need a barrier. */
5779 /* ??? This assumes that P and P+1 are always complementary
5780 predicates for P even. */
5781 if ((rws_sum[regno].first_pred ^ 1) != pred)
5782 need_barrier = 1;
5783 break;
5784
5785 case 2:
5786 /* The register has been unconditionally written already. We
5787 need a barrier. */
5788 need_barrier = 1;
5789 break;
5790
5791 default:
e820471b 5792 gcc_unreachable ();
c65ebc55
JW
5793 }
5794 }
5795
5796 return need_barrier;
5797}
5798
97e242b0 5799static int
9c808aad 5800rws_access_reg (rtx reg, struct reg_flags flags, int pred)
97e242b0
RH
5801{
5802 int regno = REGNO (reg);
5803 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5804
5805 if (n == 1)
5806 return rws_access_regno (regno, flags, pred);
5807 else
5808 {
5809 int need_barrier = 0;
5810 while (--n >= 0)
5811 need_barrier |= rws_access_regno (regno + n, flags, pred);
5812 return need_barrier;
5813 }
5814}
5815
112333d3
BS
5816/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5817 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5818
5819static void
c1bc6ca8 5820update_set_flags (rtx x, struct reg_flags *pflags)
112333d3
BS
5821{
5822 rtx src = SET_SRC (x);
5823
112333d3
BS
5824 switch (GET_CODE (src))
5825 {
5826 case CALL:
5827 return;
5828
5829 case IF_THEN_ELSE:
048d0d36 5830 /* There are four cases here:
c8d3810f
RH
5831 (1) The destination is (pc), in which case this is a branch,
5832 nothing here applies.
5833 (2) The destination is ar.lc, in which case this is a
5834 doloop_end_internal,
5835 (3) The destination is an fp register, in which case this is
5836 an fselect instruction.
048d0d36
MK
5837 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5838 this is a check load.
c8d3810f
RH
5839 In all cases, nothing we do in this function applies. */
5840 return;
112333d3
BS
5841
5842 default:
ec8e098d 5843 if (COMPARISON_P (src)
c8d3810f 5844 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
112333d3
BS
5845 /* Set pflags->is_fp to 1 so that we know we're dealing
5846 with a floating point comparison when processing the
5847 destination of the SET. */
5848 pflags->is_fp = 1;
5849
5850 /* Discover if this is a parallel comparison. We only handle
5851 and.orcm and or.andcm at present, since we must retain a
5852 strict inverse on the predicate pair. */
5853 else if (GET_CODE (src) == AND)
5854 pflags->is_and = 1;
5855 else if (GET_CODE (src) == IOR)
5856 pflags->is_or = 1;
5857
5858 break;
5859 }
5860}
5861
5862/* Subroutine of rtx_needs_barrier; this function determines whether the
5863 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5864 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5865 for this insn. */
9c808aad 5866
112333d3 5867static int
c1bc6ca8 5868set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
112333d3
BS
5869{
5870 int need_barrier = 0;
5871 rtx dst;
5872 rtx src = SET_SRC (x);
5873
5874 if (GET_CODE (src) == CALL)
5875 /* We don't need to worry about the result registers that
5876 get written by subroutine call. */
5877 return rtx_needs_barrier (src, flags, pred);
5878 else if (SET_DEST (x) == pc_rtx)
5879 {
5880 /* X is a conditional branch. */
5881 /* ??? This seems redundant, as the caller sets this bit for
5882 all JUMP_INSNs. */
048d0d36
MK
5883 if (!ia64_spec_check_src_p (src))
5884 flags.is_branch = 1;
112333d3
BS
5885 return rtx_needs_barrier (src, flags, pred);
5886 }
5887
048d0d36
MK
5888 if (ia64_spec_check_src_p (src))
5889 /* Avoid checking one register twice (in condition
5890 and in 'then' section) for ldc pattern. */
5891 {
5892 gcc_assert (REG_P (XEXP (src, 2)));
5893 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5894
5895 /* We process MEM below. */
5896 src = XEXP (src, 1);
5897 }
5898
5899 need_barrier |= rtx_needs_barrier (src, flags, pred);
112333d3 5900
112333d3
BS
5901 dst = SET_DEST (x);
5902 if (GET_CODE (dst) == ZERO_EXTRACT)
5903 {
5904 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5905 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
112333d3
BS
5906 }
5907 return need_barrier;
5908}
5909
b38ba463
ZW
5910/* Handle an access to rtx X of type FLAGS using predicate register
5911 PRED. Return 1 if this access creates a dependency with an earlier
5912 instruction in the same group. */
c65ebc55
JW
5913
5914static int
9c808aad 5915rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
c65ebc55
JW
5916{
5917 int i, j;
5918 int is_complemented = 0;
5919 int need_barrier = 0;
5920 const char *format_ptr;
5921 struct reg_flags new_flags;
c1bc6ca8 5922 rtx cond;
c65ebc55
JW
5923
5924 if (! x)
5925 return 0;
5926
5927 new_flags = flags;
5928
5929 switch (GET_CODE (x))
5930 {
9c808aad 5931 case SET:
c1bc6ca8
JW
5932 update_set_flags (x, &new_flags);
5933 need_barrier = set_src_needs_barrier (x, new_flags, pred);
112333d3 5934 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 5935 {
112333d3
BS
5936 new_flags.is_write = 1;
5937 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 5938 }
c65ebc55
JW
5939 break;
5940
5941 case CALL:
5942 new_flags.is_write = 0;
97e242b0 5943 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
5944
5945 /* Avoid multiple register writes, in case this is a pattern with
e820471b 5946 multiple CALL rtx. This avoids a failure in rws_access_reg. */
444a356a 5947 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
c65ebc55
JW
5948 {
5949 new_flags.is_write = 1;
97e242b0
RH
5950 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5951 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5952 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
5953 }
5954 break;
5955
e5bde68a
RH
5956 case COND_EXEC:
5957 /* X is a predicated instruction. */
5958
5959 cond = COND_EXEC_TEST (x);
e820471b 5960 gcc_assert (!pred);
e5bde68a
RH
5961 need_barrier = rtx_needs_barrier (cond, flags, 0);
5962
5963 if (GET_CODE (cond) == EQ)
5964 is_complemented = 1;
5965 cond = XEXP (cond, 0);
e820471b 5966 gcc_assert (GET_CODE (cond) == REG
c1bc6ca8 5967 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
e5bde68a
RH
5968 pred = REGNO (cond);
5969 if (is_complemented)
5970 ++pred;
5971
5972 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5973 return need_barrier;
5974
c65ebc55 5975 case CLOBBER:
c65ebc55 5976 case USE:
c65ebc55
JW
5977 /* Clobber & use are for earlier compiler-phases only. */
5978 break;
5979
5980 case ASM_OPERANDS:
5981 case ASM_INPUT:
5982 /* We always emit stop bits for traditional asms. We emit stop bits
5983 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5984 if (GET_CODE (x) != ASM_OPERANDS
5985 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5986 {
5987 /* Avoid writing the register multiple times if we have multiple
e820471b 5988 asm outputs. This avoids a failure in rws_access_reg. */
444a356a 5989 if (! rws_insn_test (REG_VOLATILE))
c65ebc55
JW
5990 {
5991 new_flags.is_write = 1;
97e242b0 5992 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
5993 }
5994 return 1;
5995 }
5996
5997 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
1e5f1716 5998 We cannot just fall through here since then we would be confused
c65ebc55
JW
5999 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6000 traditional asms unlike their normal usage. */
6001
6002 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6003 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6004 need_barrier = 1;
6005 break;
6006
6007 case PARALLEL:
6008 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
6009 {
6010 rtx pat = XVECEXP (x, 0, i);
051d8245 6011 switch (GET_CODE (pat))
112333d3 6012 {
051d8245 6013 case SET:
c1bc6ca8
JW
6014 update_set_flags (pat, &new_flags);
6015 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
051d8245
RH
6016 break;
6017
6018 case USE:
6019 case CALL:
6020 case ASM_OPERANDS:
6021 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6022 break;
6023
6024 case CLOBBER:
6025 case RETURN:
6026 break;
6027
6028 default:
6029 gcc_unreachable ();
112333d3 6030 }
112333d3
BS
6031 }
6032 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6033 {
6034 rtx pat = XVECEXP (x, 0, i);
6035 if (GET_CODE (pat) == SET)
6036 {
6037 if (GET_CODE (SET_SRC (pat)) != CALL)
6038 {
6039 new_flags.is_write = 1;
6040 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6041 pred);
6042 }
6043 }
339cb12e 6044 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
6045 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6046 }
c65ebc55
JW
6047 break;
6048
6049 case SUBREG:
077bc924
JM
6050 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6051 break;
c65ebc55 6052 case REG:
870f9ec0
RH
6053 if (REGNO (x) == AR_UNAT_REGNUM)
6054 {
6055 for (i = 0; i < 64; ++i)
6056 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6057 }
6058 else
6059 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
6060 break;
6061
6062 case MEM:
6063 /* Find the regs used in memory address computation. */
6064 new_flags.is_write = 0;
6065 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6066 break;
6067
051d8245 6068 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
c65ebc55
JW
6069 case SYMBOL_REF: case LABEL_REF: case CONST:
6070 break;
6071
6072 /* Operators with side-effects. */
6073 case POST_INC: case POST_DEC:
e820471b 6074 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
c65ebc55
JW
6075
6076 new_flags.is_write = 0;
97e242b0 6077 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 6078 new_flags.is_write = 1;
97e242b0 6079 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6080 break;
6081
6082 case POST_MODIFY:
e820471b 6083 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
4b983fdc
RH
6084
6085 new_flags.is_write = 0;
97e242b0 6086 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6087 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6088 new_flags.is_write = 1;
97e242b0 6089 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
6090 break;
6091
6092 /* Handle common unary and binary ops for efficiency. */
6093 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6094 case MOD: case UDIV: case UMOD: case AND: case IOR:
6095 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6096 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6097 case NE: case EQ: case GE: case GT: case LE:
6098 case LT: case GEU: case GTU: case LEU: case LTU:
6099 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6100 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6101 break;
6102
6103 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6104 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6105 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 6106 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
6107 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6108 break;
6109
051d8245
RH
6110 case VEC_SELECT:
6111 /* VEC_SELECT's second argument is a PARALLEL with integers that
6112 describe the elements selected. On ia64, those integers are
6113 always constants. Avoid walking the PARALLEL so that we don't
e820471b 6114 get confused with "normal" parallels and then die. */
051d8245
RH
6115 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6116 break;
6117
c65ebc55
JW
6118 case UNSPEC:
6119 switch (XINT (x, 1))
6120 {
7b6e506e
RH
6121 case UNSPEC_LTOFF_DTPMOD:
6122 case UNSPEC_LTOFF_DTPREL:
6123 case UNSPEC_DTPREL:
6124 case UNSPEC_LTOFF_TPREL:
6125 case UNSPEC_TPREL:
6126 case UNSPEC_PRED_REL_MUTEX:
6127 case UNSPEC_PIC_CALL:
6128 case UNSPEC_MF:
6129 case UNSPEC_FETCHADD_ACQ:
6130 case UNSPEC_BSP_VALUE:
6131 case UNSPEC_FLUSHRS:
6132 case UNSPEC_BUNDLE_SELECTOR:
6133 break;
6134
086c0f96
RH
6135 case UNSPEC_GR_SPILL:
6136 case UNSPEC_GR_RESTORE:
870f9ec0
RH
6137 {
6138 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6139 HOST_WIDE_INT bit = (offset >> 3) & 63;
6140
6141 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
83338d15 6142 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
870f9ec0
RH
6143 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6144 new_flags, pred);
6145 break;
6146 }
9c808aad 6147
086c0f96
RH
6148 case UNSPEC_FR_SPILL:
6149 case UNSPEC_FR_RESTORE:
c407570a 6150 case UNSPEC_GETF_EXP:
b38ba463 6151 case UNSPEC_SETF_EXP:
086c0f96 6152 case UNSPEC_ADDP4:
b38ba463 6153 case UNSPEC_FR_SQRT_RECIP_APPROX:
07acc7b3 6154 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
048d0d36
MK
6155 case UNSPEC_LDA:
6156 case UNSPEC_LDS:
388092d5 6157 case UNSPEC_LDS_A:
048d0d36
MK
6158 case UNSPEC_LDSA:
6159 case UNSPEC_CHKACLR:
6160 case UNSPEC_CHKS:
6dd12198
SE
6161 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6162 break;
6163
086c0f96 6164 case UNSPEC_FR_RECIP_APPROX:
f526a3c8 6165 case UNSPEC_SHRP:
046625fa 6166 case UNSPEC_COPYSIGN:
1def9c3f 6167 case UNSPEC_FR_RECIP_APPROX_RES:
655f2eb9
RH
6168 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6169 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6170 break;
6171
086c0f96 6172 case UNSPEC_CMPXCHG_ACQ:
0551c32d
RH
6173 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6174 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6175 break;
6176
c65ebc55 6177 default:
e820471b 6178 gcc_unreachable ();
c65ebc55
JW
6179 }
6180 break;
6181
6182 case UNSPEC_VOLATILE:
6183 switch (XINT (x, 1))
6184 {
086c0f96 6185 case UNSPECV_ALLOC:
25250265
JW
6186 /* Alloc must always be the first instruction of a group.
6187 We force this by always returning true. */
6188 /* ??? We might get better scheduling if we explicitly check for
6189 input/local/output register dependencies, and modify the
6190 scheduler so that alloc is always reordered to the start of
6191 the current group. We could then eliminate all of the
6192 first_instruction code. */
6193 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6194
6195 new_flags.is_write = 1;
25250265
JW
6196 rws_access_regno (REG_AR_CFM, new_flags, pred);
6197 return 1;
c65ebc55 6198
086c0f96 6199 case UNSPECV_SET_BSP:
3b572406
RH
6200 need_barrier = 1;
6201 break;
6202
086c0f96
RH
6203 case UNSPECV_BLOCKAGE:
6204 case UNSPECV_INSN_GROUP_BARRIER:
6205 case UNSPECV_BREAK:
6206 case UNSPECV_PSAC_ALL:
6207 case UNSPECV_PSAC_NORMAL:
3b572406 6208 return 0;
0c96007e 6209
c65ebc55 6210 default:
e820471b 6211 gcc_unreachable ();
c65ebc55
JW
6212 }
6213 break;
6214
6215 case RETURN:
6216 new_flags.is_write = 0;
97e242b0
RH
6217 need_barrier = rws_access_regno (REG_RP, flags, pred);
6218 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6219
6220 new_flags.is_write = 1;
97e242b0
RH
6221 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6222 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6223 break;
6224
6225 default:
6226 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6227 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6228 switch (format_ptr[i])
6229 {
6230 case '0': /* unused field */
6231 case 'i': /* integer */
6232 case 'n': /* note */
6233 case 'w': /* wide integer */
6234 case 's': /* pointer to string */
6235 case 'S': /* optional pointer to string */
6236 break;
6237
6238 case 'e':
6239 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6240 need_barrier = 1;
6241 break;
6242
6243 case 'E':
6244 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6245 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6246 need_barrier = 1;
6247 break;
6248
6249 default:
e820471b 6250 gcc_unreachable ();
c65ebc55 6251 }
2ed4af6f 6252 break;
c65ebc55
JW
6253 }
6254 return need_barrier;
6255}
6256
c1bc6ca8 6257/* Clear out the state for group_barrier_needed at the start of a
2130b7fb
BS
6258 sequence of insns. */
6259
6260static void
9c808aad 6261init_insn_group_barriers (void)
2130b7fb
BS
6262{
6263 memset (rws_sum, 0, sizeof (rws_sum));
25250265 6264 first_instruction = 1;
2130b7fb
BS
6265}
6266
c1bc6ca8
JW
6267/* Given the current state, determine whether a group barrier (a stop bit) is
6268 necessary before INSN. Return nonzero if so. This modifies the state to
6269 include the effects of INSN as a side-effect. */
2130b7fb
BS
6270
6271static int
c1bc6ca8 6272group_barrier_needed (rtx insn)
2130b7fb
BS
6273{
6274 rtx pat;
6275 int need_barrier = 0;
6276 struct reg_flags flags;
6277
6278 memset (&flags, 0, sizeof (flags));
6279 switch (GET_CODE (insn))
6280 {
6281 case NOTE:
b5b8b0ac 6282 case DEBUG_INSN:
2130b7fb
BS
6283 break;
6284
6285 case BARRIER:
6286 /* A barrier doesn't imply an instruction group boundary. */
6287 break;
6288
6289 case CODE_LABEL:
6290 memset (rws_insn, 0, sizeof (rws_insn));
6291 return 1;
6292
6293 case CALL_INSN:
6294 flags.is_branch = 1;
6295 flags.is_sibcall = SIBLING_CALL_P (insn);
6296 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
6297
6298 /* Don't bundle a call following another call. */
6299 if ((pat = prev_active_insn (insn))
6300 && GET_CODE (pat) == CALL_INSN)
6301 {
6302 need_barrier = 1;
6303 break;
6304 }
6305
2130b7fb
BS
6306 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6307 break;
6308
6309 case JUMP_INSN:
048d0d36
MK
6310 if (!ia64_spec_check_p (insn))
6311 flags.is_branch = 1;
f12f25a7
RH
6312
6313 /* Don't bundle a jump following a call. */
6314 if ((pat = prev_active_insn (insn))
6315 && GET_CODE (pat) == CALL_INSN)
6316 {
6317 need_barrier = 1;
6318 break;
6319 }
5efb1046 6320 /* FALLTHRU */
2130b7fb
BS
6321
6322 case INSN:
6323 if (GET_CODE (PATTERN (insn)) == USE
6324 || GET_CODE (PATTERN (insn)) == CLOBBER)
6325 /* Don't care about USE and CLOBBER "insns"---those are used to
6326 indicate to the optimizer that it shouldn't get rid of
6327 certain operations. */
6328 break;
6329
6330 pat = PATTERN (insn);
6331
6332 /* Ug. Hack hacks hacked elsewhere. */
6333 switch (recog_memoized (insn))
6334 {
6335 /* We play dependency tricks with the epilogue in order
6336 to get proper schedules. Undo this for dv analysis. */
6337 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 6338 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
6339 pat = XVECEXP (pat, 0, 0);
6340 break;
6341
6342 /* The pattern we use for br.cloop confuses the code above.
6343 The second element of the vector is representative. */
6344 case CODE_FOR_doloop_end_internal:
6345 pat = XVECEXP (pat, 0, 1);
6346 break;
6347
6348 /* Doesn't generate code. */
6349 case CODE_FOR_pred_rel_mutex:
d0e82870 6350 case CODE_FOR_prologue_use:
2130b7fb
BS
6351 return 0;
6352
6353 default:
6354 break;
6355 }
6356
6357 memset (rws_insn, 0, sizeof (rws_insn));
6358 need_barrier = rtx_needs_barrier (pat, flags, 0);
6359
6360 /* Check to see if the previous instruction was a volatile
6361 asm. */
6362 if (! need_barrier)
6363 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
388092d5 6364
2130b7fb
BS
6365 break;
6366
6367 default:
e820471b 6368 gcc_unreachable ();
2130b7fb 6369 }
25250265 6370
30028c85
VM
6371 if (first_instruction && INSN_P (insn)
6372 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6373 && GET_CODE (PATTERN (insn)) != USE
6374 && GET_CODE (PATTERN (insn)) != CLOBBER)
25250265
JW
6375 {
6376 need_barrier = 0;
6377 first_instruction = 0;
6378 }
6379
2130b7fb
BS
6380 return need_barrier;
6381}
6382
c1bc6ca8 6383/* Like group_barrier_needed, but do not clobber the current state. */
2130b7fb
BS
6384
6385static int
c1bc6ca8 6386safe_group_barrier_needed (rtx insn)
2130b7fb 6387{
25250265 6388 int saved_first_instruction;
2130b7fb 6389 int t;
25250265 6390
25250265 6391 saved_first_instruction = first_instruction;
444a356a 6392 in_safe_group_barrier = 1;
25250265 6393
c1bc6ca8 6394 t = group_barrier_needed (insn);
25250265 6395
25250265 6396 first_instruction = saved_first_instruction;
444a356a 6397 in_safe_group_barrier = 0;
25250265 6398
2130b7fb
BS
6399 return t;
6400}
6401
18dbd950
RS
6402/* Scan the current function and insert stop bits as necessary to
6403 eliminate dependencies. This function assumes that a final
6404 instruction scheduling pass has been run which has already
6405 inserted most of the necessary stop bits. This function only
6406 inserts new ones at basic block boundaries, since these are
6407 invisible to the scheduler. */
2130b7fb
BS
6408
6409static void
9c808aad 6410emit_insn_group_barriers (FILE *dump)
2130b7fb
BS
6411{
6412 rtx insn;
6413 rtx last_label = 0;
6414 int insns_since_last_label = 0;
6415
6416 init_insn_group_barriers ();
6417
18dbd950 6418 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb
BS
6419 {
6420 if (GET_CODE (insn) == CODE_LABEL)
6421 {
6422 if (insns_since_last_label)
6423 last_label = insn;
6424 insns_since_last_label = 0;
6425 }
6426 else if (GET_CODE (insn) == NOTE
a38e7aa5 6427 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
2130b7fb
BS
6428 {
6429 if (insns_since_last_label)
6430 last_label = insn;
6431 insns_since_last_label = 0;
6432 }
6433 else if (GET_CODE (insn) == INSN
6434 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 6435 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
6436 {
6437 init_insn_group_barriers ();
6438 last_label = 0;
6439 }
b5b8b0ac 6440 else if (NONDEBUG_INSN_P (insn))
2130b7fb
BS
6441 {
6442 insns_since_last_label = 1;
6443
c1bc6ca8 6444 if (group_barrier_needed (insn))
2130b7fb
BS
6445 {
6446 if (last_label)
6447 {
6448 if (dump)
6449 fprintf (dump, "Emitting stop before label %d\n",
6450 INSN_UID (last_label));
6451 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6452 insn = last_label;
112333d3
BS
6453
6454 init_insn_group_barriers ();
6455 last_label = 0;
2130b7fb 6456 }
2130b7fb
BS
6457 }
6458 }
6459 }
6460}
f4d578da
BS
6461
6462/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6463 This function has to emit all necessary group barriers. */
6464
6465static void
9c808aad 6466emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
f4d578da
BS
6467{
6468 rtx insn;
6469
6470 init_insn_group_barriers ();
6471
18dbd950 6472 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 6473 {
bd7b9a0f
RH
6474 if (GET_CODE (insn) == BARRIER)
6475 {
6476 rtx last = prev_active_insn (insn);
6477
6478 if (! last)
6479 continue;
6480 if (GET_CODE (last) == JUMP_INSN
6481 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6482 last = prev_active_insn (last);
6483 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6484 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6485
6486 init_insn_group_barriers ();
6487 }
b5b8b0ac 6488 else if (NONDEBUG_INSN_P (insn))
f4d578da 6489 {
bd7b9a0f
RH
6490 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6491 init_insn_group_barriers ();
c1bc6ca8 6492 else if (group_barrier_needed (insn))
f4d578da
BS
6493 {
6494 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6495 init_insn_group_barriers ();
c1bc6ca8 6496 group_barrier_needed (insn);
f4d578da
BS
6497 }
6498 }
6499 }
6500}
30028c85 6501
2130b7fb 6502\f
2130b7fb 6503
30028c85 6504/* Instruction scheduling support. */
2130b7fb
BS
6505
6506#define NR_BUNDLES 10
6507
30028c85 6508/* A list of names of all available bundles. */
2130b7fb 6509
30028c85 6510static const char *bundle_name [NR_BUNDLES] =
2130b7fb 6511{
30028c85
VM
6512 ".mii",
6513 ".mmi",
6514 ".mfi",
6515 ".mmf",
2130b7fb 6516#if NR_BUNDLES == 10
30028c85
VM
6517 ".bbb",
6518 ".mbb",
2130b7fb 6519#endif
30028c85
VM
6520 ".mib",
6521 ".mmb",
6522 ".mfb",
6523 ".mlx"
2130b7fb
BS
6524};
6525
30028c85 6526/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 6527
30028c85 6528int ia64_final_schedule = 0;
2130b7fb 6529
35fd3193 6530/* Codes of the corresponding queried units: */
2130b7fb 6531
30028c85
VM
6532static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6533static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 6534
30028c85
VM
6535static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6536static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 6537
30028c85
VM
6538static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6539
6540/* The following variable value is an insn group barrier. */
6541
6542static rtx dfa_stop_insn;
6543
6544/* The following variable value is the last issued insn. */
6545
6546static rtx last_scheduled_insn;
6547
30028c85
VM
6548/* The following variable value is pointer to a DFA state used as
6549 temporary variable. */
6550
6551static state_t temp_dfa_state = NULL;
6552
6553/* The following variable value is DFA state after issuing the last
6554 insn. */
6555
6556static state_t prev_cycle_state = NULL;
6557
6558/* The following array element values are TRUE if the corresponding
9e4f94de 6559 insn requires to add stop bits before it. */
30028c85 6560
048d0d36
MK
6561static char *stops_p = NULL;
6562
30028c85
VM
6563/* The following variable is used to set up the mentioned above array. */
6564
6565static int stop_before_p = 0;
6566
6567/* The following variable value is length of the arrays `clocks' and
6568 `add_cycles'. */
6569
6570static int clocks_length;
6571
6572/* The following array element values are cycles on which the
6573 corresponding insn will be issued. The array is used only for
6574 Itanium1. */
6575
6576static int *clocks;
6577
6578/* The following array element values are numbers of cycles should be
6579 added to improve insn scheduling for MM_insns for Itanium1. */
6580
6581static int *add_cycles;
2130b7fb 6582
048d0d36
MK
6583/* The following variable value is number of data speculations in progress. */
6584static int pending_data_specs = 0;
6585
388092d5
AB
6586/* Number of memory references on current and three future processor cycles. */
6587static char mem_ops_in_group[4];
6588
6589/* Number of current processor cycle (from scheduler's point of view). */
6590static int current_cycle;
6591
9c808aad
AJ
6592static rtx ia64_single_set (rtx);
6593static void ia64_emit_insn_before (rtx, rtx);
2130b7fb
BS
6594
6595/* Map a bundle number to its pseudo-op. */
6596
6597const char *
9c808aad 6598get_bundle_name (int b)
2130b7fb 6599{
30028c85 6600 return bundle_name[b];
2130b7fb
BS
6601}
6602
2130b7fb
BS
6603
6604/* Return the maximum number of instructions a cpu can issue. */
6605
c237e94a 6606static int
9c808aad 6607ia64_issue_rate (void)
2130b7fb
BS
6608{
6609 return 6;
6610}
6611
6612/* Helper function - like single_set, but look inside COND_EXEC. */
6613
6614static rtx
9c808aad 6615ia64_single_set (rtx insn)
2130b7fb 6616{
30fa7e33 6617 rtx x = PATTERN (insn), ret;
2130b7fb
BS
6618 if (GET_CODE (x) == COND_EXEC)
6619 x = COND_EXEC_CODE (x);
6620 if (GET_CODE (x) == SET)
6621 return x;
bdbe5b8d
RH
6622
6623 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6624 Although they are not classical single set, the second set is there just
6625 to protect it from moving past FP-relative stack accesses. */
6626 switch (recog_memoized (insn))
30fa7e33 6627 {
bdbe5b8d
RH
6628 case CODE_FOR_prologue_allocate_stack:
6629 case CODE_FOR_epilogue_deallocate_stack:
6630 ret = XVECEXP (x, 0, 0);
6631 break;
6632
6633 default:
6634 ret = single_set_2 (insn, x);
6635 break;
30fa7e33 6636 }
bdbe5b8d 6637
30fa7e33 6638 return ret;
2130b7fb
BS
6639}
6640
388092d5
AB
6641/* Adjust the cost of a scheduling dependency.
6642 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6643 COST is the current cost, DW is dependency weakness. */
c237e94a 6644static int
388092d5 6645ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
2130b7fb 6646{
388092d5 6647 enum reg_note dep_type = (enum reg_note) dep_type1;
2130b7fb
BS
6648 enum attr_itanium_class dep_class;
6649 enum attr_itanium_class insn_class;
2130b7fb 6650
2130b7fb 6651 insn_class = ia64_safe_itanium_class (insn);
30028c85 6652 dep_class = ia64_safe_itanium_class (dep_insn);
388092d5
AB
6653
6654 /* Treat true memory dependencies separately. Ignore apparent true
6655 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6656 if (dep_type == REG_DEP_TRUE
6657 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6658 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6659 return 0;
6660
6661 if (dw == MIN_DEP_WEAK)
6662 /* Store and load are likely to alias, use higher cost to avoid stall. */
6663 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6664 else if (dw > MIN_DEP_WEAK)
6665 {
6666 /* Store and load are less likely to alias. */
6667 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6668 /* Assume there will be no cache conflict for floating-point data.
6669 For integer data, L1 conflict penalty is huge (17 cycles), so we
6670 never assume it will not cause a conflict. */
6671 return 0;
6672 else
6673 return cost;
6674 }
6675
6676 if (dep_type != REG_DEP_OUTPUT)
6677 return cost;
6678
30028c85
VM
6679 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6680 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
6681 return 0;
6682
2130b7fb
BS
6683 return cost;
6684}
6685
14d118d6
DM
6686/* Like emit_insn_before, but skip cycle_display notes.
6687 ??? When cycle display notes are implemented, update this. */
6688
6689static void
9c808aad 6690ia64_emit_insn_before (rtx insn, rtx before)
14d118d6
DM
6691{
6692 emit_insn_before (insn, before);
6693}
6694
30028c85
VM
6695/* The following function marks insns who produce addresses for load
6696 and store insns. Such insns will be placed into M slots because it
6697 decrease latency time for Itanium1 (see function
6698 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
6699
6700static void
9c808aad 6701ia64_dependencies_evaluation_hook (rtx head, rtx tail)
2130b7fb 6702{
b198261f 6703 rtx insn, next, next_tail;
9c808aad 6704
f12b785d
RH
6705 /* Before reload, which_alternative is not set, which means that
6706 ia64_safe_itanium_class will produce wrong results for (at least)
6707 move instructions. */
6708 if (!reload_completed)
6709 return;
6710
30028c85
VM
6711 next_tail = NEXT_INSN (tail);
6712 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6713 if (INSN_P (insn))
6714 insn->call = 0;
6715 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6716 if (INSN_P (insn)
6717 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6718 {
e2f6ff94
MK
6719 sd_iterator_def sd_it;
6720 dep_t dep;
6721 bool has_mem_op_consumer_p = false;
b198261f 6722
e2f6ff94 6723 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
30028c85 6724 {
a71aef0b
JB
6725 enum attr_itanium_class c;
6726
e2f6ff94 6727 if (DEP_TYPE (dep) != REG_DEP_TRUE)
f12b785d 6728 continue;
b198261f 6729
e2f6ff94 6730 next = DEP_CON (dep);
a71aef0b
JB
6731 c = ia64_safe_itanium_class (next);
6732 if ((c == ITANIUM_CLASS_ST
6733 || c == ITANIUM_CLASS_STF)
30028c85 6734 && ia64_st_address_bypass_p (insn, next))
e2f6ff94
MK
6735 {
6736 has_mem_op_consumer_p = true;
6737 break;
6738 }
a71aef0b
JB
6739 else if ((c == ITANIUM_CLASS_LD
6740 || c == ITANIUM_CLASS_FLD
6741 || c == ITANIUM_CLASS_FLDP)
30028c85 6742 && ia64_ld_address_bypass_p (insn, next))
e2f6ff94
MK
6743 {
6744 has_mem_op_consumer_p = true;
6745 break;
6746 }
30028c85 6747 }
e2f6ff94
MK
6748
6749 insn->call = has_mem_op_consumer_p;
30028c85
VM
6750 }
6751}
2130b7fb 6752
30028c85 6753/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 6754
30028c85 6755static void
9c808aad
AJ
6756ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6757 int sched_verbose ATTRIBUTE_UNUSED,
6758 int max_ready ATTRIBUTE_UNUSED)
30028c85
VM
6759{
6760#ifdef ENABLE_CHECKING
6761 rtx insn;
9c808aad 6762
388092d5 6763 if (!sel_sched_p () && reload_completed)
30028c85
VM
6764 for (insn = NEXT_INSN (current_sched_info->prev_head);
6765 insn != current_sched_info->next_tail;
6766 insn = NEXT_INSN (insn))
e820471b 6767 gcc_assert (!SCHED_GROUP_P (insn));
30028c85
VM
6768#endif
6769 last_scheduled_insn = NULL_RTX;
6770 init_insn_group_barriers ();
388092d5
AB
6771
6772 current_cycle = 0;
6773 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
2130b7fb
BS
6774}
6775
048d0d36
MK
6776/* We're beginning a scheduling pass. Check assertion. */
6777
6778static void
6779ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6780 int sched_verbose ATTRIBUTE_UNUSED,
6781 int max_ready ATTRIBUTE_UNUSED)
6782{
388092d5 6783 gcc_assert (pending_data_specs == 0);
048d0d36
MK
6784}
6785
6786/* Scheduling pass is now finished. Free/reset static variable. */
6787static void
6788ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6789 int sched_verbose ATTRIBUTE_UNUSED)
6790{
388092d5
AB
6791 gcc_assert (pending_data_specs == 0);
6792}
6793
6794/* Return TRUE if INSN is a load (either normal or speculative, but not a
6795 speculation check), FALSE otherwise. */
6796static bool
6797is_load_p (rtx insn)
6798{
6799 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6800
6801 return
6802 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6803 && get_attr_check_load (insn) == CHECK_LOAD_NO);
6804}
6805
6806/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6807 (taking account for 3-cycle cache reference postponing for stores: Intel
6808 Itanium 2 Reference Manual for Software Development and Optimization,
6809 6.7.3.1). */
6810static void
6811record_memory_reference (rtx insn)
6812{
6813 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6814
6815 switch (insn_class) {
6816 case ITANIUM_CLASS_FLD:
6817 case ITANIUM_CLASS_LD:
6818 mem_ops_in_group[current_cycle % 4]++;
6819 break;
6820 case ITANIUM_CLASS_STF:
6821 case ITANIUM_CLASS_ST:
6822 mem_ops_in_group[(current_cycle + 3) % 4]++;
6823 break;
6824 default:;
6825 }
048d0d36
MK
6826}
6827
30028c85
VM
6828/* We are about to being issuing insns for this clock cycle.
6829 Override the default sort algorithm to better slot instructions. */
2130b7fb 6830
30028c85 6831static int
9c808aad 6832ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
388092d5 6833 int *pn_ready, int clock_var,
9c808aad 6834 int reorder_type)
2130b7fb 6835{
30028c85
VM
6836 int n_asms;
6837 int n_ready = *pn_ready;
6838 rtx *e_ready = ready + n_ready;
6839 rtx *insnp;
2130b7fb 6840
30028c85
VM
6841 if (sched_verbose)
6842 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 6843
30028c85 6844 if (reorder_type == 0)
2130b7fb 6845 {
30028c85
VM
6846 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6847 n_asms = 0;
6848 for (insnp = ready; insnp < e_ready; insnp++)
6849 if (insnp < e_ready)
6850 {
6851 rtx insn = *insnp;
6852 enum attr_type t = ia64_safe_type (insn);
6853 if (t == TYPE_UNKNOWN)
6854 {
6855 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6856 || asm_noperands (PATTERN (insn)) >= 0)
6857 {
6858 rtx lowest = ready[n_asms];
6859 ready[n_asms] = insn;
6860 *insnp = lowest;
6861 n_asms++;
6862 }
6863 else
6864 {
6865 rtx highest = ready[n_ready - 1];
6866 ready[n_ready - 1] = insn;
6867 *insnp = highest;
6868 return 1;
6869 }
6870 }
6871 }
98d2b17e 6872
30028c85 6873 if (n_asms < n_ready)
98d2b17e 6874 {
30028c85
VM
6875 /* Some normal insns to process. Skip the asms. */
6876 ready += n_asms;
6877 n_ready -= n_asms;
98d2b17e 6878 }
30028c85
VM
6879 else if (n_ready > 0)
6880 return 1;
2130b7fb
BS
6881 }
6882
30028c85 6883 if (ia64_final_schedule)
2130b7fb 6884 {
30028c85
VM
6885 int deleted = 0;
6886 int nr_need_stop = 0;
6887
6888 for (insnp = ready; insnp < e_ready; insnp++)
c1bc6ca8 6889 if (safe_group_barrier_needed (*insnp))
30028c85 6890 nr_need_stop++;
9c808aad 6891
30028c85
VM
6892 if (reorder_type == 1 && n_ready == nr_need_stop)
6893 return 0;
6894 if (reorder_type == 0)
6895 return 1;
6896 insnp = e_ready;
6897 /* Move down everything that needs a stop bit, preserving
6898 relative order. */
6899 while (insnp-- > ready + deleted)
6900 while (insnp >= ready + deleted)
6901 {
6902 rtx insn = *insnp;
c1bc6ca8 6903 if (! safe_group_barrier_needed (insn))
30028c85
VM
6904 break;
6905 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6906 *ready = insn;
6907 deleted++;
6908 }
6909 n_ready -= deleted;
6910 ready += deleted;
2130b7fb 6911 }
2130b7fb 6912
388092d5
AB
6913 current_cycle = clock_var;
6914 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
6915 {
6916 int moved = 0;
6917
6918 insnp = e_ready;
6919 /* Move down loads/stores, preserving relative order. */
6920 while (insnp-- > ready + moved)
6921 while (insnp >= ready + moved)
6922 {
6923 rtx insn = *insnp;
6924 if (! is_load_p (insn))
6925 break;
6926 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6927 *ready = insn;
6928 moved++;
6929 }
6930 n_ready -= moved;
6931 ready += moved;
6932 }
6933
30028c85 6934 return 1;
2130b7fb 6935}
6b6c1201 6936
30028c85
VM
6937/* We are about to being issuing insns for this clock cycle. Override
6938 the default sort algorithm to better slot instructions. */
c65ebc55 6939
30028c85 6940static int
9c808aad
AJ
6941ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6942 int clock_var)
2130b7fb 6943{
30028c85
VM
6944 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6945 pn_ready, clock_var, 0);
2130b7fb
BS
6946}
6947
30028c85
VM
6948/* Like ia64_sched_reorder, but called after issuing each insn.
6949 Override the default sort algorithm to better slot instructions. */
2130b7fb 6950
30028c85 6951static int
9c808aad
AJ
6952ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6953 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6954 int *pn_ready, int clock_var)
30028c85
VM
6955{
6956 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6957 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6958 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6959 clock_var, 1);
2130b7fb
BS
6960}
6961
30028c85
VM
6962/* We are about to issue INSN. Return the number of insns left on the
6963 ready queue that can be issued this cycle. */
2130b7fb 6964
30028c85 6965static int
9c808aad
AJ
6966ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6967 int sched_verbose ATTRIBUTE_UNUSED,
6968 rtx insn ATTRIBUTE_UNUSED,
6969 int can_issue_more ATTRIBUTE_UNUSED)
2130b7fb 6970{
388092d5 6971 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
048d0d36 6972 /* Modulo scheduling does not extend h_i_d when emitting
388092d5 6973 new instructions. Don't use h_i_d, if we don't have to. */
048d0d36
MK
6974 {
6975 if (DONE_SPEC (insn) & BEGIN_DATA)
6976 pending_data_specs++;
6977 if (CHECK_SPEC (insn) & BEGIN_DATA)
6978 pending_data_specs--;
6979 }
6980
b5b8b0ac
AO
6981 if (DEBUG_INSN_P (insn))
6982 return 1;
6983
30028c85
VM
6984 last_scheduled_insn = insn;
6985 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6986 if (reload_completed)
2130b7fb 6987 {
c1bc6ca8 6988 int needed = group_barrier_needed (insn);
e820471b
NS
6989
6990 gcc_assert (!needed);
30028c85
VM
6991 if (GET_CODE (insn) == CALL_INSN)
6992 init_insn_group_barriers ();
6993 stops_p [INSN_UID (insn)] = stop_before_p;
6994 stop_before_p = 0;
388092d5
AB
6995
6996 record_memory_reference (insn);
2130b7fb 6997 }
30028c85
VM
6998 return 1;
6999}
c65ebc55 7000
30028c85
VM
7001/* We are choosing insn from the ready queue. Return nonzero if INSN
7002 can be chosen. */
c65ebc55 7003
30028c85 7004static int
9c808aad 7005ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
30028c85 7006{
388092d5 7007 gcc_assert (insn && INSN_P (insn));
048d0d36
MK
7008 return ((!reload_completed
7009 || !safe_group_barrier_needed (insn))
388092d5
AB
7010 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7011 && (!mflag_sched_mem_insns_hard_limit
7012 || !is_load_p (insn)
7013 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
048d0d36
MK
7014}
7015
7016/* We are choosing insn from the ready queue. Return nonzero if INSN
7017 can be chosen. */
7018
7019static bool
3101faab 7020ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
048d0d36
MK
7021{
7022 gcc_assert (insn && INSN_P (insn));
7023 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7024 we keep ALAT half-empty. */
7025 return (pending_data_specs < 16
7026 || !(TODO_SPEC (insn) & BEGIN_DATA));
2130b7fb
BS
7027}
7028
30028c85
VM
7029/* The following variable value is pseudo-insn used by the DFA insn
7030 scheduler to change the DFA state when the simulated clock is
7031 increased. */
2130b7fb 7032
30028c85 7033static rtx dfa_pre_cycle_insn;
2130b7fb 7034
388092d5
AB
7035/* Returns 1 when a meaningful insn was scheduled between the last group
7036 barrier and LAST. */
7037static int
7038scheduled_good_insn (rtx last)
7039{
7040 if (last && recog_memoized (last) >= 0)
7041 return 1;
7042
7043 for ( ;
7044 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7045 && !stops_p[INSN_UID (last)];
7046 last = PREV_INSN (last))
7047 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7048 the ebb we're scheduling. */
7049 if (INSN_P (last) && recog_memoized (last) >= 0)
7050 return 1;
7051
7052 return 0;
7053}
7054
1e5f1716 7055/* We are about to being issuing INSN. Return nonzero if we cannot
30028c85
VM
7056 issue it on given cycle CLOCK and return zero if we should not sort
7057 the ready queue on the next clock start. */
2130b7fb
BS
7058
7059static int
9c808aad
AJ
7060ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7061 int clock, int *sort_p)
2130b7fb 7062{
30028c85 7063 int setup_clocks_p = FALSE;
2130b7fb 7064
e820471b 7065 gcc_assert (insn && INSN_P (insn));
b5b8b0ac
AO
7066
7067 if (DEBUG_INSN_P (insn))
7068 return 0;
7069
388092d5
AB
7070 /* When a group barrier is needed for insn, last_scheduled_insn
7071 should be set. */
7072 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7073 || last_scheduled_insn);
7074
7075 if ((reload_completed
7076 && (safe_group_barrier_needed (insn)
7077 || (mflag_sched_stop_bits_after_every_cycle
7078 && last_clock != clock
7079 && last_scheduled_insn
7080 && scheduled_good_insn (last_scheduled_insn))))
30028c85
VM
7081 || (last_scheduled_insn
7082 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7083 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7084 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
2130b7fb 7085 {
30028c85 7086 init_insn_group_barriers ();
388092d5 7087
30028c85
VM
7088 if (verbose && dump)
7089 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7090 last_clock == clock ? " + cycle advance" : "");
388092d5 7091
30028c85 7092 stop_before_p = 1;
388092d5
AB
7093 current_cycle = clock;
7094 mem_ops_in_group[current_cycle % 4] = 0;
7095
30028c85 7096 if (last_clock == clock)
2130b7fb 7097 {
30028c85
VM
7098 state_transition (curr_state, dfa_stop_insn);
7099 if (TARGET_EARLY_STOP_BITS)
7100 *sort_p = (last_scheduled_insn == NULL_RTX
7101 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7102 else
7103 *sort_p = 0;
7104 return 1;
7105 }
7106 else if (reload_completed)
7107 setup_clocks_p = TRUE;
388092d5
AB
7108
7109 if (last_scheduled_insn)
25069b42 7110 {
388092d5
AB
7111 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7112 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7113 state_reset (curr_state);
7114 else
7115 {
7116 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7117 state_transition (curr_state, dfa_stop_insn);
7118 state_transition (curr_state, dfa_pre_cycle_insn);
7119 state_transition (curr_state, NULL);
7120 }
25069b42 7121 }
30028c85
VM
7122 }
7123 else if (reload_completed)
7124 setup_clocks_p = TRUE;
388092d5 7125
f75ce96a
VM
7126 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
7127 && GET_CODE (PATTERN (insn)) != ASM_INPUT
2d8f9759 7128 && asm_noperands (PATTERN (insn)) < 0)
30028c85
VM
7129 {
7130 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
9c808aad 7131
30028c85
VM
7132 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
7133 {
e2f6ff94
MK
7134 sd_iterator_def sd_it;
7135 dep_t dep;
30028c85 7136 int d = -1;
9c808aad 7137
e2f6ff94
MK
7138 FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep)
7139 if (DEP_TYPE (dep) == REG_DEP_TRUE)
30028c85
VM
7140 {
7141 enum attr_itanium_class dep_class;
e2f6ff94 7142 rtx dep_insn = DEP_PRO (dep);
9c808aad 7143
30028c85
VM
7144 dep_class = ia64_safe_itanium_class (dep_insn);
7145 if ((dep_class == ITANIUM_CLASS_MMMUL
7146 || dep_class == ITANIUM_CLASS_MMSHF)
7147 && last_clock - clocks [INSN_UID (dep_insn)] < 4
7148 && (d < 0
7149 || last_clock - clocks [INSN_UID (dep_insn)] < d))
7150 d = last_clock - clocks [INSN_UID (dep_insn)];
7151 }
7152 if (d >= 0)
7153 add_cycles [INSN_UID (insn)] = 3 - d;
2130b7fb
BS
7154 }
7155 }
388092d5 7156
30028c85 7157 return 0;
2130b7fb
BS
7158}
7159
048d0d36
MK
7160/* Implement targetm.sched.h_i_d_extended hook.
7161 Extend internal data structures. */
7162static void
7163ia64_h_i_d_extended (void)
7164{
048d0d36
MK
7165 if (stops_p != NULL)
7166 {
388092d5 7167 int new_clocks_length = get_max_uid () * 3 / 2;
048d0d36 7168
5ead67f6 7169 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
048d0d36
MK
7170
7171 if (ia64_tune == PROCESSOR_ITANIUM)
7172 {
5ead67f6
KG
7173 clocks = (int *) xrecalloc (clocks, new_clocks_length, clocks_length,
7174 sizeof (int));
7175 add_cycles = (int *) xrecalloc (add_cycles, new_clocks_length,
7176 clocks_length, sizeof (int));
048d0d36
MK
7177 }
7178
7179 clocks_length = new_clocks_length;
7180 }
7181}
388092d5
AB
7182\f
7183
7184/* This structure describes the data used by the backend to guide scheduling.
7185 When the current scheduling point is switched, this data should be saved
7186 and restored later, if the scheduler returns to this point. */
7187struct _ia64_sched_context
7188{
7189 state_t prev_cycle_state;
7190 rtx last_scheduled_insn;
7191 struct reg_write_state rws_sum[NUM_REGS];
7192 struct reg_write_state rws_insn[NUM_REGS];
7193 int first_instruction;
7194 int pending_data_specs;
7195 int current_cycle;
7196 char mem_ops_in_group[4];
7197};
7198typedef struct _ia64_sched_context *ia64_sched_context_t;
7199
7200/* Allocates a scheduling context. */
7201static void *
7202ia64_alloc_sched_context (void)
7203{
7204 return xmalloc (sizeof (struct _ia64_sched_context));
7205}
7206
7207/* Initializes the _SC context with clean data, if CLEAN_P, and from
7208 the global context otherwise. */
7209static void
7210ia64_init_sched_context (void *_sc, bool clean_p)
7211{
7212 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7213
7214 sc->prev_cycle_state = xmalloc (dfa_state_size);
7215 if (clean_p)
7216 {
7217 state_reset (sc->prev_cycle_state);
7218 sc->last_scheduled_insn = NULL_RTX;
7219 memset (sc->rws_sum, 0, sizeof (rws_sum));
7220 memset (sc->rws_insn, 0, sizeof (rws_insn));
7221 sc->first_instruction = 1;
7222 sc->pending_data_specs = 0;
7223 sc->current_cycle = 0;
7224 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7225 }
7226 else
7227 {
7228 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7229 sc->last_scheduled_insn = last_scheduled_insn;
7230 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7231 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7232 sc->first_instruction = first_instruction;
7233 sc->pending_data_specs = pending_data_specs;
7234 sc->current_cycle = current_cycle;
7235 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7236 }
7237}
7238
7239/* Sets the global scheduling context to the one pointed to by _SC. */
7240static void
7241ia64_set_sched_context (void *_sc)
7242{
7243 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7244
7245 gcc_assert (sc != NULL);
7246
7247 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7248 last_scheduled_insn = sc->last_scheduled_insn;
7249 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7250 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7251 first_instruction = sc->first_instruction;
7252 pending_data_specs = sc->pending_data_specs;
7253 current_cycle = sc->current_cycle;
7254 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7255}
7256
7257/* Clears the data in the _SC scheduling context. */
7258static void
7259ia64_clear_sched_context (void *_sc)
7260{
7261 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7262
7263 free (sc->prev_cycle_state);
7264 sc->prev_cycle_state = NULL;
7265}
7266
7267/* Frees the _SC scheduling context. */
7268static void
7269ia64_free_sched_context (void *_sc)
7270{
7271 gcc_assert (_sc != NULL);
7272
7273 free (_sc);
7274}
7275
7276typedef rtx (* gen_func_t) (rtx, rtx);
7277
7278/* Return a function that will generate a load of mode MODE_NO
7279 with speculation types TS. */
7280static gen_func_t
7281get_spec_load_gen_function (ds_t ts, int mode_no)
7282{
7283 static gen_func_t gen_ld_[] = {
7284 gen_movbi,
7285 gen_movqi_internal,
7286 gen_movhi_internal,
7287 gen_movsi_internal,
7288 gen_movdi_internal,
7289 gen_movsf_internal,
7290 gen_movdf_internal,
7291 gen_movxf_internal,
7292 gen_movti_internal,
7293 gen_zero_extendqidi2,
7294 gen_zero_extendhidi2,
7295 gen_zero_extendsidi2,
7296 };
7297
7298 static gen_func_t gen_ld_a[] = {
7299 gen_movbi_advanced,
7300 gen_movqi_advanced,
7301 gen_movhi_advanced,
7302 gen_movsi_advanced,
7303 gen_movdi_advanced,
7304 gen_movsf_advanced,
7305 gen_movdf_advanced,
7306 gen_movxf_advanced,
7307 gen_movti_advanced,
7308 gen_zero_extendqidi2_advanced,
7309 gen_zero_extendhidi2_advanced,
7310 gen_zero_extendsidi2_advanced,
7311 };
7312 static gen_func_t gen_ld_s[] = {
7313 gen_movbi_speculative,
7314 gen_movqi_speculative,
7315 gen_movhi_speculative,
7316 gen_movsi_speculative,
7317 gen_movdi_speculative,
7318 gen_movsf_speculative,
7319 gen_movdf_speculative,
7320 gen_movxf_speculative,
7321 gen_movti_speculative,
7322 gen_zero_extendqidi2_speculative,
7323 gen_zero_extendhidi2_speculative,
7324 gen_zero_extendsidi2_speculative,
7325 };
7326 static gen_func_t gen_ld_sa[] = {
7327 gen_movbi_speculative_advanced,
7328 gen_movqi_speculative_advanced,
7329 gen_movhi_speculative_advanced,
7330 gen_movsi_speculative_advanced,
7331 gen_movdi_speculative_advanced,
7332 gen_movsf_speculative_advanced,
7333 gen_movdf_speculative_advanced,
7334 gen_movxf_speculative_advanced,
7335 gen_movti_speculative_advanced,
7336 gen_zero_extendqidi2_speculative_advanced,
7337 gen_zero_extendhidi2_speculative_advanced,
7338 gen_zero_extendsidi2_speculative_advanced,
7339 };
7340 static gen_func_t gen_ld_s_a[] = {
7341 gen_movbi_speculative_a,
7342 gen_movqi_speculative_a,
7343 gen_movhi_speculative_a,
7344 gen_movsi_speculative_a,
7345 gen_movdi_speculative_a,
7346 gen_movsf_speculative_a,
7347 gen_movdf_speculative_a,
7348 gen_movxf_speculative_a,
7349 gen_movti_speculative_a,
7350 gen_zero_extendqidi2_speculative_a,
7351 gen_zero_extendhidi2_speculative_a,
7352 gen_zero_extendsidi2_speculative_a,
7353 };
7354
7355 gen_func_t *gen_ld;
7356
7357 if (ts & BEGIN_DATA)
7358 {
7359 if (ts & BEGIN_CONTROL)
7360 gen_ld = gen_ld_sa;
7361 else
7362 gen_ld = gen_ld_a;
7363 }
7364 else if (ts & BEGIN_CONTROL)
7365 {
7366 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7367 || ia64_needs_block_p (ts))
7368 gen_ld = gen_ld_s;
7369 else
7370 gen_ld = gen_ld_s_a;
7371 }
7372 else if (ts == 0)
7373 gen_ld = gen_ld_;
7374 else
7375 gcc_unreachable ();
7376
7377 return gen_ld[mode_no];
7378}
048d0d36
MK
7379
7380/* Constants that help mapping 'enum machine_mode' to int. */
7381enum SPEC_MODES
7382 {
7383 SPEC_MODE_INVALID = -1,
7384 SPEC_MODE_FIRST = 0,
7385 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7386 SPEC_MODE_FOR_EXTEND_LAST = 3,
7387 SPEC_MODE_LAST = 8
7388 };
7389
388092d5
AB
7390enum
7391 {
7392 /* Offset to reach ZERO_EXTEND patterns. */
7393 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7394 };
7395
048d0d36
MK
7396/* Return index of the MODE. */
7397static int
7398ia64_mode_to_int (enum machine_mode mode)
7399{
7400 switch (mode)
7401 {
7402 case BImode: return 0; /* SPEC_MODE_FIRST */
7403 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7404 case HImode: return 2;
7405 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7406 case DImode: return 4;
7407 case SFmode: return 5;
7408 case DFmode: return 6;
7409 case XFmode: return 7;
7410 case TImode:
7411 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7412 mentioned in itanium[12].md. Predicate fp_register_operand also
7413 needs to be defined. Bottom line: better disable for now. */
7414 return SPEC_MODE_INVALID;
7415 default: return SPEC_MODE_INVALID;
7416 }
7417}
7418
7419/* Provide information about speculation capabilities. */
7420static void
7421ia64_set_sched_flags (spec_info_t spec_info)
7422{
7423 unsigned int *flags = &(current_sched_info->flags);
7424
7425 if (*flags & SCHED_RGN
388092d5
AB
7426 || *flags & SCHED_EBB
7427 || *flags & SEL_SCHED)
048d0d36
MK
7428 {
7429 int mask = 0;
7430
a57aee2a 7431 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
388092d5 7432 || (mflag_sched_ar_data_spec && reload_completed))
048d0d36
MK
7433 {
7434 mask |= BEGIN_DATA;
388092d5
AB
7435
7436 if (!sel_sched_p ()
7437 && ((mflag_sched_br_in_data_spec && !reload_completed)
7438 || (mflag_sched_ar_in_data_spec && reload_completed)))
048d0d36
MK
7439 mask |= BE_IN_DATA;
7440 }
7441
388092d5
AB
7442 if (mflag_sched_control_spec
7443 && (!sel_sched_p ()
7444 || reload_completed))
048d0d36
MK
7445 {
7446 mask |= BEGIN_CONTROL;
7447
388092d5 7448 if (!sel_sched_p () && mflag_sched_in_control_spec)
048d0d36
MK
7449 mask |= BE_IN_CONTROL;
7450 }
7451
7ab5df48
AB
7452 spec_info->mask = mask;
7453
048d0d36
MK
7454 if (mask)
7455 {
6fb5fa3c
DB
7456 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7457
7458 if (mask & BE_IN_SPEC)
7459 *flags |= NEW_BBS;
048d0d36 7460
048d0d36
MK
7461 spec_info->flags = 0;
7462
7463 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7464 spec_info->flags |= PREFER_NON_DATA_SPEC;
7465
388092d5 7466 if (mask & CONTROL_SPEC)
048d0d36 7467 {
388092d5
AB
7468 if (mflag_sched_prefer_non_control_spec_insns)
7469 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7470
7471 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7472 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
048d0d36 7473 }
388092d5
AB
7474
7475 if (sched_verbose >= 1)
7476 spec_info->dump = sched_dump;
048d0d36
MK
7477 else
7478 spec_info->dump = 0;
7479
7480 if (mflag_sched_count_spec_in_critical_path)
7481 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7482 }
7483 }
cd510f15
AM
7484 else
7485 spec_info->mask = 0;
048d0d36
MK
7486}
7487
388092d5
AB
7488/* If INSN is an appropriate load return its mode.
7489 Return -1 otherwise. */
048d0d36 7490static int
388092d5
AB
7491get_mode_no_for_insn (rtx insn)
7492{
7493 rtx reg, mem, mode_rtx;
7494 int mode_no;
048d0d36 7495 bool extend_p;
048d0d36 7496
388092d5 7497 extract_insn_cached (insn);
048d0d36 7498
388092d5
AB
7499 /* We use WHICH_ALTERNATIVE only after reload. This will
7500 guarantee that reload won't touch a speculative insn. */
f6ec1d11 7501
388092d5 7502 if (recog_data.n_operands != 2)
048d0d36
MK
7503 return -1;
7504
388092d5
AB
7505 reg = recog_data.operand[0];
7506 mem = recog_data.operand[1];
f6ec1d11 7507
388092d5
AB
7508 /* We should use MEM's mode since REG's mode in presence of
7509 ZERO_EXTEND will always be DImode. */
7510 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7511 /* Process non-speculative ld. */
7512 {
7513 if (!reload_completed)
7514 {
7515 /* Do not speculate into regs like ar.lc. */
7516 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7517 return -1;
7518
7519 if (!MEM_P (mem))
7520 return -1;
7521
7522 {
7523 rtx mem_reg = XEXP (mem, 0);
7524
7525 if (!REG_P (mem_reg))
7526 return -1;
7527 }
7528
7529 mode_rtx = mem;
7530 }
7531 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7532 {
7533 gcc_assert (REG_P (reg) && MEM_P (mem));
7534 mode_rtx = mem;
7535 }
7536 else
7537 return -1;
7538 }
7539 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7540 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7541 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7542 /* Process speculative ld or ld.c. */
048d0d36 7543 {
388092d5
AB
7544 gcc_assert (REG_P (reg) && MEM_P (mem));
7545 mode_rtx = mem;
048d0d36
MK
7546 }
7547 else
048d0d36 7548 {
388092d5 7549 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
048d0d36 7550
388092d5
AB
7551 if (attr_class == ITANIUM_CLASS_CHK_A
7552 || attr_class == ITANIUM_CLASS_CHK_S_I
7553 || attr_class == ITANIUM_CLASS_CHK_S_F)
7554 /* Process chk. */
7555 mode_rtx = reg;
7556 else
7557 return -1;
048d0d36 7558 }
f6ec1d11 7559
388092d5 7560 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
f6ec1d11 7561
388092d5 7562 if (mode_no == SPEC_MODE_INVALID)
048d0d36
MK
7563 return -1;
7564
388092d5
AB
7565 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7566
7567 if (extend_p)
7568 {
7569 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7570 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7571 return -1;
f6ec1d11 7572
388092d5
AB
7573 mode_no += SPEC_GEN_EXTEND_OFFSET;
7574 }
048d0d36 7575
388092d5 7576 return mode_no;
048d0d36
MK
7577}
7578
388092d5
AB
7579/* If X is an unspec part of a speculative load, return its code.
7580 Return -1 otherwise. */
7581static int
7582get_spec_unspec_code (const_rtx x)
7583{
7584 if (GET_CODE (x) != UNSPEC)
7585 return -1;
048d0d36 7586
048d0d36 7587 {
388092d5 7588 int code;
048d0d36 7589
388092d5 7590 code = XINT (x, 1);
048d0d36 7591
388092d5
AB
7592 switch (code)
7593 {
7594 case UNSPEC_LDA:
7595 case UNSPEC_LDS:
7596 case UNSPEC_LDS_A:
7597 case UNSPEC_LDSA:
7598 return code;
048d0d36 7599
388092d5
AB
7600 default:
7601 return -1;
7602 }
7603 }
7604}
048d0d36 7605
388092d5
AB
7606/* Implement skip_rtx_p hook. */
7607static bool
7608ia64_skip_rtx_p (const_rtx x)
7609{
7610 return get_spec_unspec_code (x) != -1;
7611}
048d0d36 7612
388092d5
AB
7613/* If INSN is a speculative load, return its UNSPEC code.
7614 Return -1 otherwise. */
7615static int
7616get_insn_spec_code (const_rtx insn)
7617{
7618 rtx pat, reg, mem;
048d0d36 7619
388092d5 7620 pat = PATTERN (insn);
048d0d36 7621
388092d5
AB
7622 if (GET_CODE (pat) == COND_EXEC)
7623 pat = COND_EXEC_CODE (pat);
048d0d36 7624
388092d5
AB
7625 if (GET_CODE (pat) != SET)
7626 return -1;
7627
7628 reg = SET_DEST (pat);
7629 if (!REG_P (reg))
7630 return -1;
7631
7632 mem = SET_SRC (pat);
7633 if (GET_CODE (mem) == ZERO_EXTEND)
7634 mem = XEXP (mem, 0);
7635
7636 return get_spec_unspec_code (mem);
7637}
7638
7639/* If INSN is a speculative load, return a ds with the speculation types.
7640 Otherwise [if INSN is a normal instruction] return 0. */
7641static ds_t
7642ia64_get_insn_spec_ds (rtx insn)
7643{
7644 int code = get_insn_spec_code (insn);
7645
7646 switch (code)
048d0d36 7647 {
388092d5
AB
7648 case UNSPEC_LDA:
7649 return BEGIN_DATA;
048d0d36 7650
388092d5
AB
7651 case UNSPEC_LDS:
7652 case UNSPEC_LDS_A:
7653 return BEGIN_CONTROL;
048d0d36 7654
388092d5
AB
7655 case UNSPEC_LDSA:
7656 return BEGIN_DATA | BEGIN_CONTROL;
048d0d36 7657
388092d5
AB
7658 default:
7659 return 0;
048d0d36 7660 }
388092d5
AB
7661}
7662
7663/* If INSN is a speculative load return a ds with the speculation types that
7664 will be checked.
7665 Otherwise [if INSN is a normal instruction] return 0. */
7666static ds_t
7667ia64_get_insn_checked_ds (rtx insn)
7668{
7669 int code = get_insn_spec_code (insn);
7670
7671 switch (code)
048d0d36 7672 {
388092d5
AB
7673 case UNSPEC_LDA:
7674 return BEGIN_DATA | BEGIN_CONTROL;
7675
7676 case UNSPEC_LDS:
7677 return BEGIN_CONTROL;
7678
7679 case UNSPEC_LDS_A:
7680 case UNSPEC_LDSA:
7681 return BEGIN_DATA | BEGIN_CONTROL;
7682
7683 default:
7684 return 0;
048d0d36 7685 }
388092d5 7686}
048d0d36 7687
388092d5
AB
7688/* If GEN_P is true, calculate the index of needed speculation check and return
7689 speculative pattern for INSN with speculative mode TS, machine mode
7690 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7691 If GEN_P is false, just calculate the index of needed speculation check. */
7692static rtx
7693ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7694{
7695 rtx pat, new_pat;
7696 gen_func_t gen_load;
048d0d36 7697
388092d5 7698 gen_load = get_spec_load_gen_function (ts, mode_no);
048d0d36 7699
388092d5
AB
7700 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7701 copy_rtx (recog_data.operand[1]));
048d0d36
MK
7702
7703 pat = PATTERN (insn);
7704 if (GET_CODE (pat) == COND_EXEC)
388092d5
AB
7705 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7706 new_pat);
048d0d36
MK
7707
7708 return new_pat;
7709}
7710
048d0d36 7711static bool
388092d5
AB
7712insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7713 ds_t ds ATTRIBUTE_UNUSED)
048d0d36 7714{
388092d5
AB
7715 return false;
7716}
048d0d36 7717
388092d5
AB
7718/* Implement targetm.sched.speculate_insn hook.
7719 Check if the INSN can be TS speculative.
7720 If 'no' - return -1.
7721 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7722 If current pattern of the INSN already provides TS speculation,
7723 return 0. */
7724static int
7725ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7726{
7727 int mode_no;
7728 int res;
7729
7730 gcc_assert (!(ts & ~SPECULATIVE));
048d0d36 7731
388092d5
AB
7732 if (ia64_spec_check_p (insn))
7733 return -1;
048d0d36 7734
388092d5
AB
7735 if ((ts & BE_IN_SPEC)
7736 && !insn_can_be_in_speculative_p (insn, ts))
7737 return -1;
048d0d36 7738
388092d5 7739 mode_no = get_mode_no_for_insn (insn);
048d0d36 7740
388092d5
AB
7741 if (mode_no != SPEC_MODE_INVALID)
7742 {
7743 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7744 res = 0;
7745 else
7746 {
7747 res = 1;
7748 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7749 }
7750 }
7751 else
7752 res = -1;
048d0d36 7753
388092d5
AB
7754 return res;
7755}
048d0d36 7756
388092d5
AB
7757/* Return a function that will generate a check for speculation TS with mode
7758 MODE_NO.
7759 If simple check is needed, pass true for SIMPLE_CHECK_P.
7760 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7761static gen_func_t
7762get_spec_check_gen_function (ds_t ts, int mode_no,
7763 bool simple_check_p, bool clearing_check_p)
7764{
7765 static gen_func_t gen_ld_c_clr[] = {
048d0d36
MK
7766 gen_movbi_clr,
7767 gen_movqi_clr,
7768 gen_movhi_clr,
7769 gen_movsi_clr,
7770 gen_movdi_clr,
7771 gen_movsf_clr,
7772 gen_movdf_clr,
7773 gen_movxf_clr,
7774 gen_movti_clr,
7775 gen_zero_extendqidi2_clr,
7776 gen_zero_extendhidi2_clr,
7777 gen_zero_extendsidi2_clr,
388092d5
AB
7778 };
7779 static gen_func_t gen_ld_c_nc[] = {
7780 gen_movbi_nc,
7781 gen_movqi_nc,
7782 gen_movhi_nc,
7783 gen_movsi_nc,
7784 gen_movdi_nc,
7785 gen_movsf_nc,
7786 gen_movdf_nc,
7787 gen_movxf_nc,
7788 gen_movti_nc,
7789 gen_zero_extendqidi2_nc,
7790 gen_zero_extendhidi2_nc,
7791 gen_zero_extendsidi2_nc,
7792 };
7793 static gen_func_t gen_chk_a_clr[] = {
048d0d36
MK
7794 gen_advanced_load_check_clr_bi,
7795 gen_advanced_load_check_clr_qi,
7796 gen_advanced_load_check_clr_hi,
7797 gen_advanced_load_check_clr_si,
7798 gen_advanced_load_check_clr_di,
7799 gen_advanced_load_check_clr_sf,
7800 gen_advanced_load_check_clr_df,
7801 gen_advanced_load_check_clr_xf,
7802 gen_advanced_load_check_clr_ti,
7803 gen_advanced_load_check_clr_di,
7804 gen_advanced_load_check_clr_di,
7805 gen_advanced_load_check_clr_di,
388092d5
AB
7806 };
7807 static gen_func_t gen_chk_a_nc[] = {
7808 gen_advanced_load_check_nc_bi,
7809 gen_advanced_load_check_nc_qi,
7810 gen_advanced_load_check_nc_hi,
7811 gen_advanced_load_check_nc_si,
7812 gen_advanced_load_check_nc_di,
7813 gen_advanced_load_check_nc_sf,
7814 gen_advanced_load_check_nc_df,
7815 gen_advanced_load_check_nc_xf,
7816 gen_advanced_load_check_nc_ti,
7817 gen_advanced_load_check_nc_di,
7818 gen_advanced_load_check_nc_di,
7819 gen_advanced_load_check_nc_di,
7820 };
7821 static gen_func_t gen_chk_s[] = {
048d0d36
MK
7822 gen_speculation_check_bi,
7823 gen_speculation_check_qi,
7824 gen_speculation_check_hi,
7825 gen_speculation_check_si,
7826 gen_speculation_check_di,
7827 gen_speculation_check_sf,
7828 gen_speculation_check_df,
7829 gen_speculation_check_xf,
7830 gen_speculation_check_ti,
7831 gen_speculation_check_di,
7832 gen_speculation_check_di,
388092d5 7833 gen_speculation_check_di,
048d0d36
MK
7834 };
7835
388092d5 7836 gen_func_t *gen_check;
048d0d36 7837
388092d5 7838 if (ts & BEGIN_DATA)
048d0d36 7839 {
388092d5
AB
7840 /* We don't need recovery because even if this is ld.sa
7841 ALAT entry will be allocated only if NAT bit is set to zero.
7842 So it is enough to use ld.c here. */
7843
7844 if (simple_check_p)
7845 {
7846 gcc_assert (mflag_sched_spec_ldc);
7847
7848 if (clearing_check_p)
7849 gen_check = gen_ld_c_clr;
7850 else
7851 gen_check = gen_ld_c_nc;
7852 }
7853 else
7854 {
7855 if (clearing_check_p)
7856 gen_check = gen_chk_a_clr;
7857 else
7858 gen_check = gen_chk_a_nc;
7859 }
048d0d36 7860 }
388092d5 7861 else if (ts & BEGIN_CONTROL)
048d0d36 7862 {
388092d5
AB
7863 if (simple_check_p)
7864 /* We might want to use ld.sa -> ld.c instead of
7865 ld.s -> chk.s. */
048d0d36 7866 {
388092d5 7867 gcc_assert (!ia64_needs_block_p (ts));
048d0d36 7868
388092d5
AB
7869 if (clearing_check_p)
7870 gen_check = gen_ld_c_clr;
7871 else
7872 gen_check = gen_ld_c_nc;
7873 }
7874 else
7875 {
7876 gen_check = gen_chk_s;
048d0d36 7877 }
388092d5
AB
7878 }
7879 else
7880 gcc_unreachable ();
7881
7882 gcc_assert (mode_no >= 0);
7883 return gen_check[mode_no];
7884}
7885
7886/* Return nonzero, if INSN needs branchy recovery check. */
7887static bool
7888ia64_needs_block_p (ds_t ts)
7889{
7890 if (ts & BEGIN_DATA)
7891 return !mflag_sched_spec_ldc;
7892
7893 gcc_assert ((ts & BEGIN_CONTROL) != 0);
048d0d36 7894
388092d5
AB
7895 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
7896}
7897
7898/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7899 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7900 Otherwise, generate a simple check. */
7901static rtx
7902ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
7903{
7904 rtx op1, pat, check_pat;
7905 gen_func_t gen_check;
7906 int mode_no;
7907
7908 mode_no = get_mode_no_for_insn (insn);
7909 gcc_assert (mode_no >= 0);
7910
7911 if (label)
7912 op1 = label;
7913 else
7914 {
7915 gcc_assert (!ia64_needs_block_p (ds));
7916 op1 = copy_rtx (recog_data.operand[1]);
048d0d36 7917 }
388092d5
AB
7918
7919 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
7920 true);
048d0d36 7921
388092d5 7922 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
048d0d36
MK
7923
7924 pat = PATTERN (insn);
7925 if (GET_CODE (pat) == COND_EXEC)
7926 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7927 check_pat);
7928
7929 return check_pat;
7930}
7931
7932/* Return nonzero, if X is branchy recovery check. */
7933static int
7934ia64_spec_check_p (rtx x)
7935{
7936 x = PATTERN (x);
7937 if (GET_CODE (x) == COND_EXEC)
7938 x = COND_EXEC_CODE (x);
7939 if (GET_CODE (x) == SET)
7940 return ia64_spec_check_src_p (SET_SRC (x));
7941 return 0;
7942}
7943
7944/* Return nonzero, if SRC belongs to recovery check. */
7945static int
7946ia64_spec_check_src_p (rtx src)
7947{
7948 if (GET_CODE (src) == IF_THEN_ELSE)
7949 {
7950 rtx t;
7951
7952 t = XEXP (src, 0);
7953 if (GET_CODE (t) == NE)
7954 {
7955 t = XEXP (t, 0);
7956
7957 if (GET_CODE (t) == UNSPEC)
7958 {
7959 int code;
7960
7961 code = XINT (t, 1);
7962
388092d5
AB
7963 if (code == UNSPEC_LDCCLR
7964 || code == UNSPEC_LDCNC
7965 || code == UNSPEC_CHKACLR
7966 || code == UNSPEC_CHKANC
7967 || code == UNSPEC_CHKS)
048d0d36
MK
7968 {
7969 gcc_assert (code != 0);
7970 return code;
7971 }
7972 }
7973 }
7974 }
7975 return 0;
7976}
30028c85 7977\f
2130b7fb 7978
30028c85
VM
7979/* The following page contains abstract data `bundle states' which are
7980 used for bundling insns (inserting nops and template generation). */
7981
7982/* The following describes state of insn bundling. */
7983
7984struct bundle_state
7985{
7986 /* Unique bundle state number to identify them in the debugging
7987 output */
7988 int unique_num;
7989 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
7990 /* number nops before and after the insn */
7991 short before_nops_num, after_nops_num;
7992 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7993 insn */
7994 int cost; /* cost of the state in cycles */
7995 int accumulated_insns_num; /* number of all previous insns including
7996 nops. L is considered as 2 insns */
7997 int branch_deviation; /* deviation of previous branches from 3rd slots */
388092d5 7998 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
30028c85
VM
7999 struct bundle_state *next; /* next state with the same insn_num */
8000 struct bundle_state *originator; /* originator (previous insn state) */
8001 /* All bundle states are in the following chain. */
8002 struct bundle_state *allocated_states_chain;
8003 /* The DFA State after issuing the insn and the nops. */
8004 state_t dfa_state;
8005};
2130b7fb 8006
30028c85 8007/* The following is map insn number to the corresponding bundle state. */
2130b7fb 8008
30028c85 8009static struct bundle_state **index_to_bundle_states;
2130b7fb 8010
30028c85 8011/* The unique number of next bundle state. */
2130b7fb 8012
30028c85 8013static int bundle_states_num;
2130b7fb 8014
30028c85 8015/* All allocated bundle states are in the following chain. */
2130b7fb 8016
30028c85 8017static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 8018
30028c85
VM
8019/* All allocated but not used bundle states are in the following
8020 chain. */
870f9ec0 8021
30028c85 8022static struct bundle_state *free_bundle_state_chain;
2130b7fb 8023
2130b7fb 8024
30028c85 8025/* The following function returns a free bundle state. */
2130b7fb 8026
30028c85 8027static struct bundle_state *
9c808aad 8028get_free_bundle_state (void)
30028c85
VM
8029{
8030 struct bundle_state *result;
2130b7fb 8031
30028c85 8032 if (free_bundle_state_chain != NULL)
2130b7fb 8033 {
30028c85
VM
8034 result = free_bundle_state_chain;
8035 free_bundle_state_chain = result->next;
2130b7fb 8036 }
30028c85 8037 else
2130b7fb 8038 {
5ead67f6 8039 result = XNEW (struct bundle_state);
30028c85
VM
8040 result->dfa_state = xmalloc (dfa_state_size);
8041 result->allocated_states_chain = allocated_bundle_states_chain;
8042 allocated_bundle_states_chain = result;
2130b7fb 8043 }
30028c85
VM
8044 result->unique_num = bundle_states_num++;
8045 return result;
9c808aad 8046
30028c85 8047}
2130b7fb 8048
30028c85 8049/* The following function frees given bundle state. */
2130b7fb 8050
30028c85 8051static void
9c808aad 8052free_bundle_state (struct bundle_state *state)
30028c85
VM
8053{
8054 state->next = free_bundle_state_chain;
8055 free_bundle_state_chain = state;
8056}
2130b7fb 8057
30028c85 8058/* Start work with abstract data `bundle states'. */
2130b7fb 8059
30028c85 8060static void
9c808aad 8061initiate_bundle_states (void)
30028c85
VM
8062{
8063 bundle_states_num = 0;
8064 free_bundle_state_chain = NULL;
8065 allocated_bundle_states_chain = NULL;
2130b7fb
BS
8066}
8067
30028c85 8068/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
8069
8070static void
9c808aad 8071finish_bundle_states (void)
2130b7fb 8072{
30028c85
VM
8073 struct bundle_state *curr_state, *next_state;
8074
8075 for (curr_state = allocated_bundle_states_chain;
8076 curr_state != NULL;
8077 curr_state = next_state)
2130b7fb 8078 {
30028c85
VM
8079 next_state = curr_state->allocated_states_chain;
8080 free (curr_state->dfa_state);
8081 free (curr_state);
2130b7fb 8082 }
2130b7fb
BS
8083}
8084
30028c85
VM
8085/* Hash table of the bundle states. The key is dfa_state and insn_num
8086 of the bundle states. */
2130b7fb 8087
30028c85 8088static htab_t bundle_state_table;
2130b7fb 8089
30028c85 8090/* The function returns hash of BUNDLE_STATE. */
2130b7fb 8091
30028c85 8092static unsigned
9c808aad 8093bundle_state_hash (const void *bundle_state)
30028c85 8094{
586de218
KG
8095 const struct bundle_state *const state
8096 = (const struct bundle_state *) bundle_state;
30028c85 8097 unsigned result, i;
2130b7fb 8098
30028c85
VM
8099 for (result = i = 0; i < dfa_state_size; i++)
8100 result += (((unsigned char *) state->dfa_state) [i]
8101 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8102 return result + state->insn_num;
8103}
2130b7fb 8104
30028c85 8105/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 8106
30028c85 8107static int
9c808aad 8108bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
30028c85 8109{
586de218
KG
8110 const struct bundle_state *const state1
8111 = (const struct bundle_state *) bundle_state_1;
8112 const struct bundle_state *const state2
8113 = (const struct bundle_state *) bundle_state_2;
2130b7fb 8114
30028c85
VM
8115 return (state1->insn_num == state2->insn_num
8116 && memcmp (state1->dfa_state, state2->dfa_state,
8117 dfa_state_size) == 0);
8118}
2130b7fb 8119
30028c85
VM
8120/* The function inserts the BUNDLE_STATE into the hash table. The
8121 function returns nonzero if the bundle has been inserted into the
8122 table. The table contains the best bundle state with given key. */
2130b7fb 8123
30028c85 8124static int
9c808aad 8125insert_bundle_state (struct bundle_state *bundle_state)
30028c85
VM
8126{
8127 void **entry_ptr;
2130b7fb 8128
bbbbb16a 8129 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
30028c85
VM
8130 if (*entry_ptr == NULL)
8131 {
8132 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8133 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8134 *entry_ptr = (void *) bundle_state;
8135 return TRUE;
2130b7fb 8136 }
30028c85
VM
8137 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8138 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8139 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8140 > bundle_state->accumulated_insns_num
8141 || (((struct bundle_state *)
8142 *entry_ptr)->accumulated_insns_num
8143 == bundle_state->accumulated_insns_num
388092d5
AB
8144 && (((struct bundle_state *)
8145 *entry_ptr)->branch_deviation
8146 > bundle_state->branch_deviation
8147 || (((struct bundle_state *)
8148 *entry_ptr)->branch_deviation
8149 == bundle_state->branch_deviation
8150 && ((struct bundle_state *)
8151 *entry_ptr)->middle_bundle_stops
8152 > bundle_state->middle_bundle_stops))))))
9c808aad 8153
2130b7fb 8154 {
30028c85
VM
8155 struct bundle_state temp;
8156
8157 temp = *(struct bundle_state *) *entry_ptr;
8158 *(struct bundle_state *) *entry_ptr = *bundle_state;
8159 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8160 *bundle_state = temp;
2130b7fb 8161 }
30028c85
VM
8162 return FALSE;
8163}
2130b7fb 8164
30028c85
VM
8165/* Start work with the hash table. */
8166
8167static void
9c808aad 8168initiate_bundle_state_table (void)
30028c85
VM
8169{
8170 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8171 (htab_del) 0);
2130b7fb
BS
8172}
8173
30028c85 8174/* Finish work with the hash table. */
e4027dab
BS
8175
8176static void
9c808aad 8177finish_bundle_state_table (void)
e4027dab 8178{
30028c85 8179 htab_delete (bundle_state_table);
e4027dab
BS
8180}
8181
30028c85 8182\f
a0a7b566 8183
30028c85
VM
8184/* The following variable is a insn `nop' used to check bundle states
8185 with different number of inserted nops. */
a0a7b566 8186
30028c85 8187static rtx ia64_nop;
a0a7b566 8188
30028c85
VM
8189/* The following function tries to issue NOPS_NUM nops for the current
8190 state without advancing processor cycle. If it failed, the
8191 function returns FALSE and frees the current state. */
8192
8193static int
9c808aad 8194try_issue_nops (struct bundle_state *curr_state, int nops_num)
a0a7b566 8195{
30028c85 8196 int i;
a0a7b566 8197
30028c85
VM
8198 for (i = 0; i < nops_num; i++)
8199 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8200 {
8201 free_bundle_state (curr_state);
8202 return FALSE;
8203 }
8204 return TRUE;
8205}
a0a7b566 8206
30028c85
VM
8207/* The following function tries to issue INSN for the current
8208 state without advancing processor cycle. If it failed, the
8209 function returns FALSE and frees the current state. */
a0a7b566 8210
30028c85 8211static int
9c808aad 8212try_issue_insn (struct bundle_state *curr_state, rtx insn)
30028c85
VM
8213{
8214 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8215 {
8216 free_bundle_state (curr_state);
8217 return FALSE;
8218 }
8219 return TRUE;
8220}
a0a7b566 8221
30028c85
VM
8222/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8223 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
8224 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8225 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8226 If it was successful, the function creates new bundle state and
8227 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 8228
30028c85 8229static void
9c808aad
AJ
8230issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8231 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
30028c85
VM
8232{
8233 struct bundle_state *curr_state;
8234
8235 curr_state = get_free_bundle_state ();
8236 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8237 curr_state->insn = insn;
8238 curr_state->insn_num = originator->insn_num + 1;
8239 curr_state->cost = originator->cost;
8240 curr_state->originator = originator;
8241 curr_state->before_nops_num = before_nops_num;
8242 curr_state->after_nops_num = 0;
8243 curr_state->accumulated_insns_num
8244 = originator->accumulated_insns_num + before_nops_num;
8245 curr_state->branch_deviation = originator->branch_deviation;
388092d5 8246 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
e820471b
NS
8247 gcc_assert (insn);
8248 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
30028c85 8249 {
e820471b 8250 gcc_assert (GET_MODE (insn) != TImode);
30028c85
VM
8251 if (!try_issue_nops (curr_state, before_nops_num))
8252 return;
8253 if (!try_issue_insn (curr_state, insn))
8254 return;
8255 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
388092d5
AB
8256 if (curr_state->accumulated_insns_num % 3 != 0)
8257 curr_state->middle_bundle_stops++;
30028c85
VM
8258 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8259 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 8260 {
30028c85
VM
8261 free_bundle_state (curr_state);
8262 return;
a0a7b566 8263 }
a0a7b566 8264 }
30028c85 8265 else if (GET_MODE (insn) != TImode)
a0a7b566 8266 {
30028c85
VM
8267 if (!try_issue_nops (curr_state, before_nops_num))
8268 return;
8269 if (!try_issue_insn (curr_state, insn))
8270 return;
f32360c7 8271 curr_state->accumulated_insns_num++;
e820471b
NS
8272 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8273 && asm_noperands (PATTERN (insn)) < 0);
8274
30028c85
VM
8275 if (ia64_safe_type (insn) == TYPE_L)
8276 curr_state->accumulated_insns_num++;
8277 }
8278 else
8279 {
68e11b42
JW
8280 /* If this is an insn that must be first in a group, then don't allow
8281 nops to be emitted before it. Currently, alloc is the only such
8282 supported instruction. */
8283 /* ??? The bundling automatons should handle this for us, but they do
8284 not yet have support for the first_insn attribute. */
8285 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8286 {
8287 free_bundle_state (curr_state);
8288 return;
8289 }
8290
30028c85
VM
8291 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8292 state_transition (curr_state->dfa_state, NULL);
8293 curr_state->cost++;
8294 if (!try_issue_nops (curr_state, before_nops_num))
8295 return;
8296 if (!try_issue_insn (curr_state, insn))
8297 return;
f32360c7
VM
8298 curr_state->accumulated_insns_num++;
8299 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8300 || asm_noperands (PATTERN (insn)) >= 0)
8301 {
8302 /* Finish bundle containing asm insn. */
8303 curr_state->after_nops_num
8304 = 3 - curr_state->accumulated_insns_num % 3;
8305 curr_state->accumulated_insns_num
8306 += 3 - curr_state->accumulated_insns_num % 3;
8307 }
8308 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
8309 curr_state->accumulated_insns_num++;
8310 }
8311 if (ia64_safe_type (insn) == TYPE_B)
8312 curr_state->branch_deviation
8313 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8314 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8315 {
f32360c7 8316 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 8317 {
30028c85
VM
8318 state_t dfa_state;
8319 struct bundle_state *curr_state1;
8320 struct bundle_state *allocated_states_chain;
8321
8322 curr_state1 = get_free_bundle_state ();
8323 dfa_state = curr_state1->dfa_state;
8324 allocated_states_chain = curr_state1->allocated_states_chain;
8325 *curr_state1 = *curr_state;
8326 curr_state1->dfa_state = dfa_state;
8327 curr_state1->allocated_states_chain = allocated_states_chain;
8328 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8329 dfa_state_size);
8330 curr_state = curr_state1;
a0a7b566 8331 }
30028c85
VM
8332 if (!try_issue_nops (curr_state,
8333 3 - curr_state->accumulated_insns_num % 3))
8334 return;
8335 curr_state->after_nops_num
8336 = 3 - curr_state->accumulated_insns_num % 3;
8337 curr_state->accumulated_insns_num
8338 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 8339 }
30028c85
VM
8340 if (!insert_bundle_state (curr_state))
8341 free_bundle_state (curr_state);
8342 return;
8343}
e013f3c7 8344
30028c85
VM
8345/* The following function returns position in the two window bundle
8346 for given STATE. */
8347
8348static int
9c808aad 8349get_max_pos (state_t state)
30028c85
VM
8350{
8351 if (cpu_unit_reservation_p (state, pos_6))
8352 return 6;
8353 else if (cpu_unit_reservation_p (state, pos_5))
8354 return 5;
8355 else if (cpu_unit_reservation_p (state, pos_4))
8356 return 4;
8357 else if (cpu_unit_reservation_p (state, pos_3))
8358 return 3;
8359 else if (cpu_unit_reservation_p (state, pos_2))
8360 return 2;
8361 else if (cpu_unit_reservation_p (state, pos_1))
8362 return 1;
8363 else
8364 return 0;
a0a7b566
BS
8365}
8366
30028c85
VM
8367/* The function returns code of a possible template for given position
8368 and state. The function should be called only with 2 values of
96ddf8ef
VM
8369 position equal to 3 or 6. We avoid generating F NOPs by putting
8370 templates containing F insns at the end of the template search
8371 because undocumented anomaly in McKinley derived cores which can
8372 cause stalls if an F-unit insn (including a NOP) is issued within a
8373 six-cycle window after reading certain application registers (such
8374 as ar.bsp). Furthermore, power-considerations also argue against
8375 the use of F-unit instructions unless they're really needed. */
2130b7fb 8376
c237e94a 8377static int
9c808aad 8378get_template (state_t state, int pos)
2130b7fb 8379{
30028c85 8380 switch (pos)
2130b7fb 8381 {
30028c85 8382 case 3:
96ddf8ef 8383 if (cpu_unit_reservation_p (state, _0mmi_))
30028c85 8384 return 1;
96ddf8ef
VM
8385 else if (cpu_unit_reservation_p (state, _0mii_))
8386 return 0;
30028c85
VM
8387 else if (cpu_unit_reservation_p (state, _0mmb_))
8388 return 7;
96ddf8ef
VM
8389 else if (cpu_unit_reservation_p (state, _0mib_))
8390 return 6;
8391 else if (cpu_unit_reservation_p (state, _0mbb_))
8392 return 5;
8393 else if (cpu_unit_reservation_p (state, _0bbb_))
8394 return 4;
8395 else if (cpu_unit_reservation_p (state, _0mmf_))
8396 return 3;
8397 else if (cpu_unit_reservation_p (state, _0mfi_))
8398 return 2;
30028c85
VM
8399 else if (cpu_unit_reservation_p (state, _0mfb_))
8400 return 8;
8401 else if (cpu_unit_reservation_p (state, _0mlx_))
8402 return 9;
8403 else
e820471b 8404 gcc_unreachable ();
30028c85 8405 case 6:
96ddf8ef 8406 if (cpu_unit_reservation_p (state, _1mmi_))
30028c85 8407 return 1;
96ddf8ef
VM
8408 else if (cpu_unit_reservation_p (state, _1mii_))
8409 return 0;
30028c85
VM
8410 else if (cpu_unit_reservation_p (state, _1mmb_))
8411 return 7;
96ddf8ef
VM
8412 else if (cpu_unit_reservation_p (state, _1mib_))
8413 return 6;
8414 else if (cpu_unit_reservation_p (state, _1mbb_))
8415 return 5;
8416 else if (cpu_unit_reservation_p (state, _1bbb_))
8417 return 4;
8418 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8419 return 3;
8420 else if (cpu_unit_reservation_p (state, _1mfi_))
8421 return 2;
30028c85
VM
8422 else if (cpu_unit_reservation_p (state, _1mfb_))
8423 return 8;
8424 else if (cpu_unit_reservation_p (state, _1mlx_))
8425 return 9;
8426 else
e820471b 8427 gcc_unreachable ();
30028c85 8428 default:
e820471b 8429 gcc_unreachable ();
2130b7fb 8430 }
30028c85 8431}
2130b7fb 8432
388092d5
AB
8433/* True when INSN is important for bundling. */
8434static bool
8435important_for_bundling_p (rtx insn)
8436{
8437 return (INSN_P (insn)
8438 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8439 && GET_CODE (PATTERN (insn)) != USE
8440 && GET_CODE (PATTERN (insn)) != CLOBBER);
8441}
8442
30028c85
VM
8443/* The following function returns an insn important for insn bundling
8444 followed by INSN and before TAIL. */
a0a7b566 8445
30028c85 8446static rtx
9c808aad 8447get_next_important_insn (rtx insn, rtx tail)
30028c85
VM
8448{
8449 for (; insn && insn != tail; insn = NEXT_INSN (insn))
388092d5 8450 if (important_for_bundling_p (insn))
30028c85
VM
8451 return insn;
8452 return NULL_RTX;
8453}
8454
4a4cd49c
JJ
8455/* Add a bundle selector TEMPLATE0 before INSN. */
8456
8457static void
8458ia64_add_bundle_selector_before (int template0, rtx insn)
8459{
8460 rtx b = gen_bundle_selector (GEN_INT (template0));
8461
8462 ia64_emit_insn_before (b, insn);
8463#if NR_BUNDLES == 10
8464 if ((template0 == 4 || template0 == 5)
8465 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
8466 {
8467 int i;
8468 rtx note = NULL_RTX;
8469
8470 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8471 first or second slot. If it is and has REG_EH_NOTE set, copy it
8472 to following nops, as br.call sets rp to the address of following
8473 bundle and therefore an EH region end must be on a bundle
8474 boundary. */
8475 insn = PREV_INSN (insn);
8476 for (i = 0; i < 3; i++)
8477 {
8478 do
8479 insn = next_active_insn (insn);
8480 while (GET_CODE (insn) == INSN
8481 && get_attr_empty (insn) == EMPTY_YES);
8482 if (GET_CODE (insn) == CALL_INSN)
8483 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8484 else if (note)
8485 {
8486 int code;
8487
8488 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8489 || code == CODE_FOR_nop_b);
8490 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8491 note = NULL_RTX;
8492 else
bbbbb16a 8493 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
4a4cd49c
JJ
8494 }
8495 }
8496 }
8497#endif
8498}
8499
c856f536
VM
8500/* The following function does insn bundling. Bundling means
8501 inserting templates and nop insns to fit insn groups into permitted
8502 templates. Instruction scheduling uses NDFA (non-deterministic
8503 finite automata) encoding informations about the templates and the
8504 inserted nops. Nondeterminism of the automata permits follows
8505 all possible insn sequences very fast.
8506
8507 Unfortunately it is not possible to get information about inserting
8508 nop insns and used templates from the automata states. The
8509 automata only says that we can issue an insn possibly inserting
8510 some nops before it and using some template. Therefore insn
8511 bundling in this function is implemented by using DFA
048d0d36 8512 (deterministic finite automata). We follow all possible insn
c856f536
VM
8513 sequences by inserting 0-2 nops (that is what the NDFA describe for
8514 insn scheduling) before/after each insn being bundled. We know the
8515 start of simulated processor cycle from insn scheduling (insn
8516 starting a new cycle has TImode).
8517
8518 Simple implementation of insn bundling would create enormous
8519 number of possible insn sequences satisfying information about new
8520 cycle ticks taken from the insn scheduling. To make the algorithm
8521 practical we use dynamic programming. Each decision (about
8522 inserting nops and implicitly about previous decisions) is described
8523 by structure bundle_state (see above). If we generate the same
8524 bundle state (key is automaton state after issuing the insns and
8525 nops for it), we reuse already generated one. As consequence we
1e5f1716 8526 reject some decisions which cannot improve the solution and
c856f536
VM
8527 reduce memory for the algorithm.
8528
8529 When we reach the end of EBB (extended basic block), we choose the
8530 best sequence and then, moving back in EBB, insert templates for
8531 the best alternative. The templates are taken from querying
8532 automaton state for each insn in chosen bundle states.
8533
8534 So the algorithm makes two (forward and backward) passes through
8535 EBB. There is an additional forward pass through EBB for Itanium1
8536 processor. This pass inserts more nops to make dependency between
8537 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
a0a7b566 8538
30028c85 8539static void
9c808aad 8540bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
30028c85
VM
8541{
8542 struct bundle_state *curr_state, *next_state, *best_state;
8543 rtx insn, next_insn;
8544 int insn_num;
f32360c7 8545 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 8546 int pos = 0, max_pos, template0, template1;
30028c85
VM
8547 rtx b;
8548 rtx nop;
8549 enum attr_type type;
2d1b811d 8550
30028c85 8551 insn_num = 0;
c856f536 8552 /* Count insns in the EBB. */
30028c85
VM
8553 for (insn = NEXT_INSN (prev_head_insn);
8554 insn && insn != tail;
8555 insn = NEXT_INSN (insn))
8556 if (INSN_P (insn))
8557 insn_num++;
8558 if (insn_num == 0)
8559 return;
8560 bundling_p = 1;
8561 dfa_clean_insn_cache ();
8562 initiate_bundle_state_table ();
5ead67f6 8563 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
ff482c8d 8564 /* First (forward) pass -- generation of bundle states. */
30028c85
VM
8565 curr_state = get_free_bundle_state ();
8566 curr_state->insn = NULL;
8567 curr_state->before_nops_num = 0;
8568 curr_state->after_nops_num = 0;
8569 curr_state->insn_num = 0;
8570 curr_state->cost = 0;
8571 curr_state->accumulated_insns_num = 0;
8572 curr_state->branch_deviation = 0;
388092d5 8573 curr_state->middle_bundle_stops = 0;
30028c85
VM
8574 curr_state->next = NULL;
8575 curr_state->originator = NULL;
8576 state_reset (curr_state->dfa_state);
8577 index_to_bundle_states [0] = curr_state;
8578 insn_num = 0;
c856f536 8579 /* Shift cycle mark if it is put on insn which could be ignored. */
30028c85
VM
8580 for (insn = NEXT_INSN (prev_head_insn);
8581 insn != tail;
8582 insn = NEXT_INSN (insn))
8583 if (INSN_P (insn)
8584 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8585 || GET_CODE (PATTERN (insn)) == USE
8586 || GET_CODE (PATTERN (insn)) == CLOBBER)
8587 && GET_MODE (insn) == TImode)
2130b7fb 8588 {
30028c85
VM
8589 PUT_MODE (insn, VOIDmode);
8590 for (next_insn = NEXT_INSN (insn);
8591 next_insn != tail;
8592 next_insn = NEXT_INSN (next_insn))
8593 if (INSN_P (next_insn)
8594 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8595 && GET_CODE (PATTERN (next_insn)) != USE
388092d5
AB
8596 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8597 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
30028c85
VM
8598 {
8599 PUT_MODE (next_insn, TImode);
8600 break;
8601 }
2130b7fb 8602 }
048d0d36 8603 /* Forward pass: generation of bundle states. */
30028c85
VM
8604 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8605 insn != NULL_RTX;
8606 insn = next_insn)
1ad72cef 8607 {
e820471b
NS
8608 gcc_assert (INSN_P (insn)
8609 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8610 && GET_CODE (PATTERN (insn)) != USE
8611 && GET_CODE (PATTERN (insn)) != CLOBBER);
f32360c7 8612 type = ia64_safe_type (insn);
30028c85
VM
8613 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8614 insn_num++;
8615 index_to_bundle_states [insn_num] = NULL;
8616 for (curr_state = index_to_bundle_states [insn_num - 1];
8617 curr_state != NULL;
8618 curr_state = next_state)
f83594c4 8619 {
30028c85 8620 pos = curr_state->accumulated_insns_num % 3;
30028c85 8621 next_state = curr_state->next;
c856f536
VM
8622 /* We must fill up the current bundle in order to start a
8623 subsequent asm insn in a new bundle. Asm insn is always
8624 placed in a separate bundle. */
f32360c7
VM
8625 only_bundle_end_p
8626 = (next_insn != NULL_RTX
8627 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8628 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
c856f536
VM
8629 /* We may fill up the current bundle if it is the cycle end
8630 without a group barrier. */
30028c85 8631 bundle_end_p
f32360c7 8632 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
8633 || (GET_MODE (next_insn) == TImode
8634 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8635 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8636 || type == TYPE_S
c856f536
VM
8637 /* We need to insert 2 nops for cases like M_MII. To
8638 guarantee issuing all insns on the same cycle for
8639 Itanium 1, we need to issue 2 nops after the first M
8640 insn (MnnMII where n is a nop insn). */
de101ad2
VM
8641 || ((type == TYPE_M || type == TYPE_A)
8642 && ia64_tune == PROCESSOR_ITANIUM
30028c85 8643 && !bundle_end_p && pos == 1))
f32360c7
VM
8644 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8645 only_bundle_end_p);
8646 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8647 only_bundle_end_p);
8648 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8649 only_bundle_end_p);
f83594c4 8650 }
e820471b 8651 gcc_assert (index_to_bundle_states [insn_num]);
30028c85
VM
8652 for (curr_state = index_to_bundle_states [insn_num];
8653 curr_state != NULL;
8654 curr_state = curr_state->next)
8655 if (verbose >= 2 && dump)
8656 {
c856f536
VM
8657 /* This structure is taken from generated code of the
8658 pipeline hazard recognizer (see file insn-attrtab.c).
8659 Please don't forget to change the structure if a new
8660 automaton is added to .md file. */
30028c85
VM
8661 struct DFA_chip
8662 {
8663 unsigned short one_automaton_state;
8664 unsigned short oneb_automaton_state;
8665 unsigned short two_automaton_state;
8666 unsigned short twob_automaton_state;
8667 };
9c808aad 8668
30028c85
VM
8669 fprintf
8670 (dump,
388092d5 8671 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
30028c85
VM
8672 curr_state->unique_num,
8673 (curr_state->originator == NULL
8674 ? -1 : curr_state->originator->unique_num),
8675 curr_state->cost,
8676 curr_state->before_nops_num, curr_state->after_nops_num,
8677 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 8678 curr_state->middle_bundle_stops,
30028c85
VM
8679 (ia64_tune == PROCESSOR_ITANIUM
8680 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
8681 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
8682 INSN_UID (insn));
8683 }
1ad72cef 8684 }
e820471b
NS
8685
8686 /* We should find a solution because the 2nd insn scheduling has
8687 found one. */
8688 gcc_assert (index_to_bundle_states [insn_num]);
c856f536 8689 /* Find a state corresponding to the best insn sequence. */
30028c85
VM
8690 best_state = NULL;
8691 for (curr_state = index_to_bundle_states [insn_num];
8692 curr_state != NULL;
8693 curr_state = curr_state->next)
c856f536
VM
8694 /* We are just looking at the states with fully filled up last
8695 bundle. The first we prefer insn sequences with minimal cost
8696 then with minimal inserted nops and finally with branch insns
8697 placed in the 3rd slots. */
30028c85
VM
8698 if (curr_state->accumulated_insns_num % 3 == 0
8699 && (best_state == NULL || best_state->cost > curr_state->cost
8700 || (best_state->cost == curr_state->cost
8701 && (curr_state->accumulated_insns_num
8702 < best_state->accumulated_insns_num
8703 || (curr_state->accumulated_insns_num
8704 == best_state->accumulated_insns_num
388092d5
AB
8705 && (curr_state->branch_deviation
8706 < best_state->branch_deviation
8707 || (curr_state->branch_deviation
8708 == best_state->branch_deviation
8709 && curr_state->middle_bundle_stops
8710 < best_state->middle_bundle_stops)))))))
30028c85 8711 best_state = curr_state;
c856f536 8712 /* Second (backward) pass: adding nops and templates. */
388092d5 8713 gcc_assert (best_state);
30028c85
VM
8714 insn_num = best_state->before_nops_num;
8715 template0 = template1 = -1;
8716 for (curr_state = best_state;
8717 curr_state->originator != NULL;
8718 curr_state = curr_state->originator)
8719 {
8720 insn = curr_state->insn;
f32360c7
VM
8721 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8722 || asm_noperands (PATTERN (insn)) >= 0);
30028c85
VM
8723 insn_num++;
8724 if (verbose >= 2 && dump)
2130b7fb 8725 {
30028c85
VM
8726 struct DFA_chip
8727 {
8728 unsigned short one_automaton_state;
8729 unsigned short oneb_automaton_state;
8730 unsigned short two_automaton_state;
8731 unsigned short twob_automaton_state;
8732 };
9c808aad 8733
30028c85
VM
8734 fprintf
8735 (dump,
388092d5 8736 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
30028c85
VM
8737 curr_state->unique_num,
8738 (curr_state->originator == NULL
8739 ? -1 : curr_state->originator->unique_num),
8740 curr_state->cost,
8741 curr_state->before_nops_num, curr_state->after_nops_num,
8742 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 8743 curr_state->middle_bundle_stops,
30028c85
VM
8744 (ia64_tune == PROCESSOR_ITANIUM
8745 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
8746 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
8747 INSN_UID (insn));
2130b7fb 8748 }
c856f536
VM
8749 /* Find the position in the current bundle window. The window can
8750 contain at most two bundles. Two bundle window means that
8751 the processor will make two bundle rotation. */
30028c85 8752 max_pos = get_max_pos (curr_state->dfa_state);
c856f536
VM
8753 if (max_pos == 6
8754 /* The following (negative template number) means that the
8755 processor did one bundle rotation. */
8756 || (max_pos == 3 && template0 < 0))
2130b7fb 8757 {
c856f536
VM
8758 /* We are at the end of the window -- find template(s) for
8759 its bundle(s). */
30028c85
VM
8760 pos = max_pos;
8761 if (max_pos == 3)
8762 template0 = get_template (curr_state->dfa_state, 3);
8763 else
8764 {
8765 template1 = get_template (curr_state->dfa_state, 3);
8766 template0 = get_template (curr_state->dfa_state, 6);
8767 }
8768 }
8769 if (max_pos > 3 && template1 < 0)
c856f536 8770 /* It may happen when we have the stop inside a bundle. */
30028c85 8771 {
e820471b 8772 gcc_assert (pos <= 3);
30028c85
VM
8773 template1 = get_template (curr_state->dfa_state, 3);
8774 pos += 3;
8775 }
f32360c7 8776 if (!asm_p)
c856f536 8777 /* Emit nops after the current insn. */
f32360c7
VM
8778 for (i = 0; i < curr_state->after_nops_num; i++)
8779 {
8780 nop = gen_nop ();
8781 emit_insn_after (nop, insn);
8782 pos--;
e820471b 8783 gcc_assert (pos >= 0);
f32360c7
VM
8784 if (pos % 3 == 0)
8785 {
c856f536
VM
8786 /* We are at the start of a bundle: emit the template
8787 (it should be defined). */
e820471b 8788 gcc_assert (template0 >= 0);
4a4cd49c 8789 ia64_add_bundle_selector_before (template0, nop);
c856f536
VM
8790 /* If we have two bundle window, we make one bundle
8791 rotation. Otherwise template0 will be undefined
8792 (negative value). */
f32360c7
VM
8793 template0 = template1;
8794 template1 = -1;
8795 }
8796 }
c856f536
VM
8797 /* Move the position backward in the window. Group barrier has
8798 no slot. Asm insn takes all bundle. */
30028c85
VM
8799 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8800 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8801 && asm_noperands (PATTERN (insn)) < 0)
8802 pos--;
c856f536 8803 /* Long insn takes 2 slots. */
30028c85
VM
8804 if (ia64_safe_type (insn) == TYPE_L)
8805 pos--;
e820471b 8806 gcc_assert (pos >= 0);
30028c85
VM
8807 if (pos % 3 == 0
8808 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8809 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8810 && asm_noperands (PATTERN (insn)) < 0)
8811 {
c856f536
VM
8812 /* The current insn is at the bundle start: emit the
8813 template. */
e820471b 8814 gcc_assert (template0 >= 0);
4a4cd49c 8815 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
8816 b = PREV_INSN (insn);
8817 insn = b;
68776c43 8818 /* See comment above in analogous place for emitting nops
c856f536 8819 after the insn. */
30028c85
VM
8820 template0 = template1;
8821 template1 = -1;
8822 }
c856f536 8823 /* Emit nops after the current insn. */
30028c85
VM
8824 for (i = 0; i < curr_state->before_nops_num; i++)
8825 {
8826 nop = gen_nop ();
8827 ia64_emit_insn_before (nop, insn);
8828 nop = PREV_INSN (insn);
8829 insn = nop;
8830 pos--;
e820471b 8831 gcc_assert (pos >= 0);
30028c85
VM
8832 if (pos % 3 == 0)
8833 {
68776c43 8834 /* See comment above in analogous place for emitting nops
c856f536 8835 after the insn. */
e820471b 8836 gcc_assert (template0 >= 0);
4a4cd49c 8837 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
8838 b = PREV_INSN (insn);
8839 insn = b;
8840 template0 = template1;
8841 template1 = -1;
8842 }
2130b7fb
BS
8843 }
8844 }
30028c85 8845 if (ia64_tune == PROCESSOR_ITANIUM)
c856f536
VM
8846 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8847 Itanium1 has a strange design, if the distance between an insn
8848 and dependent MM-insn is less 4 then we have a 6 additional
8849 cycles stall. So we make the distance equal to 4 cycles if it
8850 is less. */
30028c85
VM
8851 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8852 insn != NULL_RTX;
8853 insn = next_insn)
8854 {
e820471b
NS
8855 gcc_assert (INSN_P (insn)
8856 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8857 && GET_CODE (PATTERN (insn)) != USE
8858 && GET_CODE (PATTERN (insn)) != CLOBBER);
30028c85
VM
8859 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8860 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
c856f536 8861 /* We found a MM-insn which needs additional cycles. */
30028c85
VM
8862 {
8863 rtx last;
8864 int i, j, n;
8865 int pred_stop_p;
9c808aad 8866
c856f536
VM
8867 /* Now we are searching for a template of the bundle in
8868 which the MM-insn is placed and the position of the
8869 insn in the bundle (0, 1, 2). Also we are searching
8870 for that there is a stop before the insn. */
30028c85
VM
8871 last = prev_active_insn (insn);
8872 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
8873 if (pred_stop_p)
8874 last = prev_active_insn (last);
8875 n = 0;
8876 for (;; last = prev_active_insn (last))
8877 if (recog_memoized (last) == CODE_FOR_bundle_selector)
8878 {
8879 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
8880 if (template0 == 9)
c856f536
VM
8881 /* The insn is in MLX bundle. Change the template
8882 onto MFI because we will add nops before the
8883 insn. It simplifies subsequent code a lot. */
30028c85 8884 PATTERN (last)
a556fd39 8885 = gen_bundle_selector (const2_rtx); /* -> MFI */
30028c85
VM
8886 break;
8887 }
52b754e8
VM
8888 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
8889 && (ia64_safe_itanium_class (last)
8890 != ITANIUM_CLASS_IGNORE))
30028c85 8891 n++;
c856f536
VM
8892 /* Some check of correctness: the stop is not at the
8893 bundle start, there are no more 3 insns in the bundle,
8894 and the MM-insn is not at the start of bundle with
8895 template MLX. */
e820471b
NS
8896 gcc_assert ((!pred_stop_p || n)
8897 && n <= 2
8898 && (template0 != 9 || !n));
c856f536 8899 /* Put nops after the insn in the bundle. */
30028c85
VM
8900 for (j = 3 - n; j > 0; j --)
8901 ia64_emit_insn_before (gen_nop (), insn);
c856f536
VM
8902 /* It takes into account that we will add more N nops
8903 before the insn lately -- please see code below. */
30028c85
VM
8904 add_cycles [INSN_UID (insn)]--;
8905 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
8906 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8907 insn);
8908 if (pred_stop_p)
8909 add_cycles [INSN_UID (insn)]--;
8910 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
8911 {
c856f536 8912 /* Insert "MII;" template. */
a556fd39 8913 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
30028c85
VM
8914 insn);
8915 ia64_emit_insn_before (gen_nop (), insn);
8916 ia64_emit_insn_before (gen_nop (), insn);
8917 if (i > 1)
8918 {
c856f536
VM
8919 /* To decrease code size, we use "MI;I;"
8920 template. */
30028c85
VM
8921 ia64_emit_insn_before
8922 (gen_insn_group_barrier (GEN_INT (3)), insn);
8923 i--;
8924 }
8925 ia64_emit_insn_before (gen_nop (), insn);
8926 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8927 insn);
8928 }
c856f536
VM
8929 /* Put the MM-insn in the same slot of a bundle with the
8930 same template as the original one. */
4a4cd49c 8931 ia64_add_bundle_selector_before (template0, insn);
c856f536
VM
8932 /* To put the insn in the same slot, add necessary number
8933 of nops. */
30028c85
VM
8934 for (j = n; j > 0; j --)
8935 ia64_emit_insn_before (gen_nop (), insn);
c856f536 8936 /* Put the stop if the original bundle had it. */
30028c85
VM
8937 if (pred_stop_p)
8938 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8939 insn);
8940 }
8941 }
388092d5
AB
8942
8943#ifdef ENABLE_CHECKING
8944 {
8945 /* Assert right calculation of middle_bundle_stops. */
8946 int num = best_state->middle_bundle_stops;
8947 bool start_bundle = true, end_bundle = false;
8948
8949 for (insn = NEXT_INSN (prev_head_insn);
8950 insn && insn != tail;
8951 insn = NEXT_INSN (insn))
8952 {
8953 if (!INSN_P (insn))
8954 continue;
8955 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8956 start_bundle = true;
8957 else
8958 {
8959 rtx next_insn;
8960
8961 for (next_insn = NEXT_INSN (insn);
8962 next_insn && next_insn != tail;
8963 next_insn = NEXT_INSN (next_insn))
8964 if (INSN_P (next_insn)
8965 && (ia64_safe_itanium_class (next_insn)
8966 != ITANIUM_CLASS_IGNORE
8967 || recog_memoized (next_insn)
8968 == CODE_FOR_bundle_selector)
8969 && GET_CODE (PATTERN (next_insn)) != USE
8970 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8971 break;
8972
8973 end_bundle = next_insn == NULL_RTX
8974 || next_insn == tail
8975 || (INSN_P (next_insn)
8976 && recog_memoized (next_insn)
8977 == CODE_FOR_bundle_selector);
8978 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8979 && !start_bundle && !end_bundle
8980 && next_insn
8981 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8982 && asm_noperands (PATTERN (next_insn)) < 0)
8983 num--;
8984
8985 start_bundle = false;
8986 }
8987 }
8988
8989 gcc_assert (num == 0);
8990 }
8991#endif
8992
30028c85
VM
8993 free (index_to_bundle_states);
8994 finish_bundle_state_table ();
8995 bundling_p = 0;
8996 dfa_clean_insn_cache ();
2130b7fb 8997}
c65ebc55 8998
30028c85
VM
8999/* The following function is called at the end of scheduling BB or
9000 EBB. After reload, it inserts stop bits and does insn bundling. */
9001
9002static void
9c808aad 9003ia64_sched_finish (FILE *dump, int sched_verbose)
c237e94a 9004{
30028c85
VM
9005 if (sched_verbose)
9006 fprintf (dump, "// Finishing schedule.\n");
9007 if (!reload_completed)
9008 return;
9009 if (reload_completed)
9010 {
9011 final_emit_insn_group_barriers (dump);
9012 bundling (dump, sched_verbose, current_sched_info->prev_head,
9013 current_sched_info->next_tail);
9014 if (sched_verbose && dump)
9015 fprintf (dump, "// finishing %d-%d\n",
9016 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9017 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9c808aad 9018
30028c85
VM
9019 return;
9020 }
c237e94a
ZW
9021}
9022
30028c85 9023/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 9024
30028c85 9025static void
9c808aad 9026final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
2130b7fb 9027{
30028c85
VM
9028 rtx insn;
9029 int need_barrier_p = 0;
388092d5 9030 int seen_good_insn = 0;
30028c85 9031 rtx prev_insn = NULL_RTX;
2130b7fb 9032
30028c85 9033 init_insn_group_barriers ();
2130b7fb 9034
30028c85
VM
9035 for (insn = NEXT_INSN (current_sched_info->prev_head);
9036 insn != current_sched_info->next_tail;
9037 insn = NEXT_INSN (insn))
9038 {
9039 if (GET_CODE (insn) == BARRIER)
b395ddbe 9040 {
30028c85 9041 rtx last = prev_active_insn (insn);
14d118d6 9042
30028c85 9043 if (! last)
b395ddbe 9044 continue;
30028c85
VM
9045 if (GET_CODE (last) == JUMP_INSN
9046 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9047 last = prev_active_insn (last);
9048 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9049 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 9050
30028c85 9051 init_insn_group_barriers ();
388092d5 9052 seen_good_insn = 0;
30028c85
VM
9053 need_barrier_p = 0;
9054 prev_insn = NULL_RTX;
b395ddbe 9055 }
b5b8b0ac 9056 else if (NONDEBUG_INSN_P (insn))
2130b7fb 9057 {
30028c85 9058 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 9059 {
30028c85 9060 init_insn_group_barriers ();
388092d5 9061 seen_good_insn = 0;
30028c85
VM
9062 need_barrier_p = 0;
9063 prev_insn = NULL_RTX;
c65ebc55 9064 }
388092d5
AB
9065 else if (need_barrier_p || group_barrier_needed (insn)
9066 || (mflag_sched_stop_bits_after_every_cycle
9067 && GET_MODE (insn) == TImode
9068 && seen_good_insn))
2130b7fb 9069 {
30028c85
VM
9070 if (TARGET_EARLY_STOP_BITS)
9071 {
9072 rtx last;
9c808aad 9073
30028c85
VM
9074 for (last = insn;
9075 last != current_sched_info->prev_head;
9076 last = PREV_INSN (last))
9077 if (INSN_P (last) && GET_MODE (last) == TImode
9078 && stops_p [INSN_UID (last)])
9079 break;
9080 if (last == current_sched_info->prev_head)
9081 last = insn;
9082 last = prev_active_insn (last);
9083 if (last
9084 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9085 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9086 last);
9087 init_insn_group_barriers ();
9088 for (last = NEXT_INSN (last);
9089 last != insn;
9090 last = NEXT_INSN (last))
9091 if (INSN_P (last))
388092d5
AB
9092 {
9093 group_barrier_needed (last);
9094 if (recog_memoized (last) >= 0
9095 && important_for_bundling_p (last))
9096 seen_good_insn = 1;
9097 }
30028c85
VM
9098 }
9099 else
9100 {
9101 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9102 insn);
9103 init_insn_group_barriers ();
388092d5 9104 seen_good_insn = 0;
30028c85 9105 }
c1bc6ca8 9106 group_barrier_needed (insn);
388092d5
AB
9107 if (recog_memoized (insn) >= 0
9108 && important_for_bundling_p (insn))
9109 seen_good_insn = 1;
30028c85 9110 prev_insn = NULL_RTX;
2130b7fb 9111 }
388092d5
AB
9112 else if (recog_memoized (insn) >= 0
9113 && important_for_bundling_p (insn))
9114 {
9115 prev_insn = insn;
9116 seen_good_insn = 1;
9117 }
30028c85
VM
9118 need_barrier_p = (GET_CODE (insn) == CALL_INSN
9119 || GET_CODE (PATTERN (insn)) == ASM_INPUT
9120 || asm_noperands (PATTERN (insn)) >= 0);
c65ebc55 9121 }
2130b7fb 9122 }
30028c85 9123}
2130b7fb 9124
30028c85 9125\f
2130b7fb 9126
a4d05547 9127/* If the following function returns TRUE, we will use the DFA
30028c85 9128 insn scheduler. */
2130b7fb 9129
c237e94a 9130static int
9c808aad 9131ia64_first_cycle_multipass_dfa_lookahead (void)
2130b7fb 9132{
30028c85
VM
9133 return (reload_completed ? 6 : 4);
9134}
2130b7fb 9135
30028c85 9136/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 9137
30028c85 9138static void
9c808aad 9139ia64_init_dfa_pre_cycle_insn (void)
30028c85
VM
9140{
9141 if (temp_dfa_state == NULL)
2130b7fb 9142 {
30028c85
VM
9143 dfa_state_size = state_size ();
9144 temp_dfa_state = xmalloc (dfa_state_size);
9145 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 9146 }
30028c85
VM
9147 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9148 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9149 recog_memoized (dfa_pre_cycle_insn);
9150 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9151 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9152 recog_memoized (dfa_stop_insn);
9153}
2130b7fb 9154
30028c85
VM
9155/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9156 used by the DFA insn scheduler. */
2130b7fb 9157
30028c85 9158static rtx
9c808aad 9159ia64_dfa_pre_cycle_insn (void)
30028c85
VM
9160{
9161 return dfa_pre_cycle_insn;
9162}
2130b7fb 9163
30028c85
VM
9164/* The following function returns TRUE if PRODUCER (of type ilog or
9165 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 9166
30028c85 9167int
9c808aad 9168ia64_st_address_bypass_p (rtx producer, rtx consumer)
30028c85
VM
9169{
9170 rtx dest, reg, mem;
2130b7fb 9171
e820471b 9172 gcc_assert (producer && consumer);
30028c85 9173 dest = ia64_single_set (producer);
e820471b
NS
9174 gcc_assert (dest);
9175 reg = SET_DEST (dest);
9176 gcc_assert (reg);
30028c85
VM
9177 if (GET_CODE (reg) == SUBREG)
9178 reg = SUBREG_REG (reg);
e820471b
NS
9179 gcc_assert (GET_CODE (reg) == REG);
9180
30028c85 9181 dest = ia64_single_set (consumer);
e820471b
NS
9182 gcc_assert (dest);
9183 mem = SET_DEST (dest);
9184 gcc_assert (mem && GET_CODE (mem) == MEM);
30028c85 9185 return reg_mentioned_p (reg, mem);
2130b7fb
BS
9186}
9187
30028c85
VM
9188/* The following function returns TRUE if PRODUCER (of type ilog or
9189 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 9190
30028c85 9191int
9c808aad 9192ia64_ld_address_bypass_p (rtx producer, rtx consumer)
2130b7fb 9193{
30028c85
VM
9194 rtx dest, src, reg, mem;
9195
e820471b 9196 gcc_assert (producer && consumer);
30028c85 9197 dest = ia64_single_set (producer);
e820471b
NS
9198 gcc_assert (dest);
9199 reg = SET_DEST (dest);
9200 gcc_assert (reg);
30028c85
VM
9201 if (GET_CODE (reg) == SUBREG)
9202 reg = SUBREG_REG (reg);
e820471b
NS
9203 gcc_assert (GET_CODE (reg) == REG);
9204
30028c85 9205 src = ia64_single_set (consumer);
e820471b
NS
9206 gcc_assert (src);
9207 mem = SET_SRC (src);
9208 gcc_assert (mem);
048d0d36 9209
30028c85
VM
9210 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9211 mem = XVECEXP (mem, 0, 0);
048d0d36 9212 else if (GET_CODE (mem) == IF_THEN_ELSE)
917f1b7e 9213 /* ??? Is this bypass necessary for ld.c? */
048d0d36
MK
9214 {
9215 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9216 mem = XEXP (mem, 1);
9217 }
9218
30028c85
VM
9219 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9220 mem = XEXP (mem, 0);
ef1ecf87 9221
048d0d36
MK
9222 if (GET_CODE (mem) == UNSPEC)
9223 {
9224 int c = XINT (mem, 1);
9225
388092d5
AB
9226 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9227 || c == UNSPEC_LDSA);
048d0d36
MK
9228 mem = XVECEXP (mem, 0, 0);
9229 }
9230
ef1ecf87 9231 /* Note that LO_SUM is used for GOT loads. */
e820471b 9232 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
ef1ecf87 9233
30028c85
VM
9234 return reg_mentioned_p (reg, mem);
9235}
9236
9237/* The following function returns TRUE if INSN produces address for a
9238 load/store insn. We will place such insns into M slot because it
ff482c8d 9239 decreases its latency time. */
30028c85
VM
9240
9241int
9c808aad 9242ia64_produce_address_p (rtx insn)
30028c85
VM
9243{
9244 return insn->call;
2130b7fb 9245}
30028c85 9246
2130b7fb 9247\f
3b572406
RH
9248/* Emit pseudo-ops for the assembler to describe predicate relations.
9249 At present this assumes that we only consider predicate pairs to
9250 be mutex, and that the assembler can deduce proper values from
9251 straight-line code. */
9252
9253static void
9c808aad 9254emit_predicate_relation_info (void)
3b572406 9255{
e0082a72 9256 basic_block bb;
3b572406 9257
e0082a72 9258 FOR_EACH_BB_REVERSE (bb)
3b572406 9259 {
3b572406 9260 int r;
a813c111 9261 rtx head = BB_HEAD (bb);
3b572406
RH
9262
9263 /* We only need such notes at code labels. */
9264 if (GET_CODE (head) != CODE_LABEL)
9265 continue;
740aeb38 9266 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
3b572406
RH
9267 head = NEXT_INSN (head);
9268
9f3b8452
RH
9269 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9270 grabbing the entire block of predicate registers. */
9271 for (r = PR_REG (2); r < PR_REG (64); r += 2)
6fb5fa3c 9272 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
3b572406 9273 {
f2f90c63 9274 rtx p = gen_rtx_REG (BImode, r);
054451ea 9275 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
a813c111
SB
9276 if (head == BB_END (bb))
9277 BB_END (bb) = n;
3b572406
RH
9278 head = n;
9279 }
9280 }
ca3920ad
JW
9281
9282 /* Look for conditional calls that do not return, and protect predicate
9283 relations around them. Otherwise the assembler will assume the call
9284 returns, and complain about uses of call-clobbered predicates after
9285 the call. */
e0082a72 9286 FOR_EACH_BB_REVERSE (bb)
ca3920ad 9287 {
a813c111 9288 rtx insn = BB_HEAD (bb);
9c808aad 9289
ca3920ad
JW
9290 while (1)
9291 {
9292 if (GET_CODE (insn) == CALL_INSN
9293 && GET_CODE (PATTERN (insn)) == COND_EXEC
9294 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9295 {
9296 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9297 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
a813c111
SB
9298 if (BB_HEAD (bb) == insn)
9299 BB_HEAD (bb) = b;
9300 if (BB_END (bb) == insn)
9301 BB_END (bb) = a;
ca3920ad 9302 }
9c808aad 9303
a813c111 9304 if (insn == BB_END (bb))
ca3920ad
JW
9305 break;
9306 insn = NEXT_INSN (insn);
9307 }
9308 }
3b572406
RH
9309}
9310
c65ebc55
JW
9311/* Perform machine dependent operations on the rtl chain INSNS. */
9312
18dbd950 9313static void
9c808aad 9314ia64_reorg (void)
c65ebc55 9315{
1e3881c2
JH
9316 /* We are freeing block_for_insn in the toplev to keep compatibility
9317 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 9318 compute_bb_for_insn ();
a00fe19f
RH
9319
9320 /* If optimizing, we'll have split before scheduling. */
9321 if (optimize == 0)
6fb5fa3c 9322 split_all_insns ();
2130b7fb 9323
388092d5
AB
9324 if (optimize && ia64_flag_schedule_insns2
9325 && dbg_cnt (ia64_sched2))
f4d578da 9326 {
eced69b5 9327 timevar_push (TV_SCHED2);
f4d578da 9328 ia64_final_schedule = 1;
30028c85
VM
9329
9330 initiate_bundle_states ();
9331 ia64_nop = make_insn_raw (gen_nop ());
9332 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9333 recog_memoized (ia64_nop);
9334 clocks_length = get_max_uid () + 1;
5ead67f6 9335 stops_p = XCNEWVEC (char, clocks_length);
30028c85
VM
9336 if (ia64_tune == PROCESSOR_ITANIUM)
9337 {
5ead67f6
KG
9338 clocks = XCNEWVEC (int, clocks_length);
9339 add_cycles = XCNEWVEC (int, clocks_length);
30028c85
VM
9340 }
9341 if (ia64_tune == PROCESSOR_ITANIUM2)
9342 {
9343 pos_1 = get_cpu_unit_code ("2_1");
9344 pos_2 = get_cpu_unit_code ("2_2");
9345 pos_3 = get_cpu_unit_code ("2_3");
9346 pos_4 = get_cpu_unit_code ("2_4");
9347 pos_5 = get_cpu_unit_code ("2_5");
9348 pos_6 = get_cpu_unit_code ("2_6");
9349 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9350 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9351 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9352 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9353 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9354 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9355 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9356 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9357 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9358 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9359 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9360 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9361 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9362 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9363 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9364 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9365 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9366 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9367 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9368 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9369 }
9370 else
9371 {
9372 pos_1 = get_cpu_unit_code ("1_1");
9373 pos_2 = get_cpu_unit_code ("1_2");
9374 pos_3 = get_cpu_unit_code ("1_3");
9375 pos_4 = get_cpu_unit_code ("1_4");
9376 pos_5 = get_cpu_unit_code ("1_5");
9377 pos_6 = get_cpu_unit_code ("1_6");
9378 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9379 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9380 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9381 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9382 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9383 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9384 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9385 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9386 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9387 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9388 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9389 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9390 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9391 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9392 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9393 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9394 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9395 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9396 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9397 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9398 }
388092d5
AB
9399
9400 if (flag_selective_scheduling2
9401 && !maybe_skip_selective_scheduling ())
9402 run_selective_scheduling ();
9403 else
9404 schedule_ebbs ();
9405
9406 /* Redo alignment computation, as it might gone wrong. */
9407 compute_alignments ();
9408
6fb5fa3c
DB
9409 /* We cannot reuse this one because it has been corrupted by the
9410 evil glat. */
30028c85
VM
9411 finish_bundle_states ();
9412 if (ia64_tune == PROCESSOR_ITANIUM)
9413 {
9414 free (add_cycles);
9415 free (clocks);
9416 }
9417 free (stops_p);
048d0d36 9418 stops_p = NULL;
c263766c 9419 emit_insn_group_barriers (dump_file);
30028c85 9420
f4d578da 9421 ia64_final_schedule = 0;
eced69b5 9422 timevar_pop (TV_SCHED2);
f4d578da
BS
9423 }
9424 else
c263766c 9425 emit_all_insn_group_barriers (dump_file);
f2f90c63 9426
6fb5fa3c
DB
9427 df_analyze ();
9428
f12f25a7
RH
9429 /* A call must not be the last instruction in a function, so that the
9430 return address is still within the function, so that unwinding works
9431 properly. Note that IA-64 differs from dwarf2 on this point. */
9432 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
9433 {
9434 rtx insn;
9435 int saw_stop = 0;
9436
9437 insn = get_last_insn ();
9438 if (! INSN_P (insn))
9439 insn = prev_active_insn (insn);
2ca57608 9440 if (insn)
f12f25a7 9441 {
2ca57608
L
9442 /* Skip over insns that expand to nothing. */
9443 while (GET_CODE (insn) == INSN
9444 && get_attr_empty (insn) == EMPTY_YES)
9445 {
9446 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9447 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9448 saw_stop = 1;
9449 insn = prev_active_insn (insn);
9450 }
9451 if (GET_CODE (insn) == CALL_INSN)
9452 {
9453 if (! saw_stop)
9454 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9455 emit_insn (gen_break_f ());
9456 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9457 }
f12f25a7
RH
9458 }
9459 }
9460
f2f90c63 9461 emit_predicate_relation_info ();
014a1138
JZ
9462
9463 if (ia64_flag_var_tracking)
9464 {
9465 timevar_push (TV_VAR_TRACKING);
9466 variable_tracking_main ();
9467 timevar_pop (TV_VAR_TRACKING);
9468 }
0d475361 9469 df_finish_pass (false);
c65ebc55
JW
9470}
9471\f
9472/* Return true if REGNO is used by the epilogue. */
9473
9474int
9c808aad 9475ia64_epilogue_uses (int regno)
c65ebc55 9476{
6ca3c22f
RH
9477 switch (regno)
9478 {
9479 case R_GR (1):
b23ba0b8
RH
9480 /* With a call to a function in another module, we will write a new
9481 value to "gp". After returning from such a call, we need to make
9482 sure the function restores the original gp-value, even if the
9483 function itself does not use the gp anymore. */
9484 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
6ca3c22f
RH
9485
9486 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9487 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9488 /* For functions defined with the syscall_linkage attribute, all
9489 input registers are marked as live at all function exits. This
9490 prevents the register allocator from using the input registers,
9491 which in turn makes it possible to restart a system call after
9492 an interrupt without having to save/restore the input registers.
9493 This also prevents kernel data from leaking to application code. */
9494 return lookup_attribute ("syscall_linkage",
9495 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9496
9497 case R_BR (0):
9498 /* Conditional return patterns can't represent the use of `b0' as
9499 the return address, so we force the value live this way. */
9500 return 1;
6b6c1201 9501
6ca3c22f
RH
9502 case AR_PFS_REGNUM:
9503 /* Likewise for ar.pfs, which is used by br.ret. */
9504 return 1;
5527bf14 9505
6ca3c22f
RH
9506 default:
9507 return 0;
9508 }
c65ebc55 9509}
15b5aef3
RH
9510
9511/* Return true if REGNO is used by the frame unwinder. */
9512
9513int
9c808aad 9514ia64_eh_uses (int regno)
15b5aef3 9515{
09639a83 9516 unsigned int r;
6fb5fa3c 9517
15b5aef3
RH
9518 if (! reload_completed)
9519 return 0;
9520
6fb5fa3c
DB
9521 if (regno == 0)
9522 return 0;
9523
9524 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9525 if (regno == current_frame_info.r[r]
9526 || regno == emitted_frame_related_regs[r])
9527 return 1;
15b5aef3
RH
9528
9529 return 0;
9530}
c65ebc55 9531\f
1cdbd630 9532/* Return true if this goes in small data/bss. */
c65ebc55
JW
9533
9534/* ??? We could also support own long data here. Generating movl/add/ld8
9535 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9536 code faster because there is one less load. This also includes incomplete
9537 types which can't go in sdata/sbss. */
9538
ae46c4e0 9539static bool
3101faab 9540ia64_in_small_data_p (const_tree exp)
ae46c4e0
RH
9541{
9542 if (TARGET_NO_SDATA)
9543 return false;
9544
3907500b
RH
9545 /* We want to merge strings, so we never consider them small data. */
9546 if (TREE_CODE (exp) == STRING_CST)
9547 return false;
9548
4c494a15
ZW
9549 /* Functions are never small data. */
9550 if (TREE_CODE (exp) == FUNCTION_DECL)
9551 return false;
9552
ae46c4e0
RH
9553 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9554 {
9555 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
826eb7ed 9556
ae46c4e0 9557 if (strcmp (section, ".sdata") == 0
826eb7ed
JB
9558 || strncmp (section, ".sdata.", 7) == 0
9559 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9560 || strcmp (section, ".sbss") == 0
9561 || strncmp (section, ".sbss.", 6) == 0
9562 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
ae46c4e0
RH
9563 return true;
9564 }
9565 else
9566 {
9567 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9568
9569 /* If this is an incomplete type with size 0, then we can't put it
9570 in sdata because it might be too big when completed. */
9571 if (size > 0 && size <= ia64_section_threshold)
9572 return true;
9573 }
9574
9575 return false;
9576}
0c96007e 9577\f
ad0fc698
JW
9578/* Output assembly directives for prologue regions. */
9579
9580/* The current basic block number. */
9581
e0082a72 9582static bool last_block;
ad0fc698
JW
9583
9584/* True if we need a copy_state command at the start of the next block. */
9585
e0082a72 9586static bool need_copy_state;
ad0fc698 9587
658f32fd
AO
9588#ifndef MAX_ARTIFICIAL_LABEL_BYTES
9589# define MAX_ARTIFICIAL_LABEL_BYTES 30
9590#endif
9591
9592/* Emit a debugging label after a call-frame-related insn. We'd
9593 rather output the label right away, but we'd have to output it
9594 after, not before, the instruction, and the instruction has not
9595 been output yet. So we emit the label after the insn, delete it to
9596 avoid introducing basic blocks, and mark it as preserved, such that
9597 it is still output, given that it is referenced in debug info. */
9598
9599static const char *
9600ia64_emit_deleted_label_after_insn (rtx insn)
9601{
9602 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9603 rtx lb = gen_label_rtx ();
9604 rtx label_insn = emit_label_after (lb, insn);
9605
9606 LABEL_PRESERVE_P (lb) = 1;
9607
9608 delete_insn (label_insn);
9609
9610 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9611
9612 return xstrdup (label);
9613}
9614
9615/* Define the CFA after INSN with the steady-state definition. */
9616
9617static void
b5b8b0ac 9618ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
658f32fd
AO
9619{
9620 rtx fp = frame_pointer_needed
9621 ? hard_frame_pointer_rtx
9622 : stack_pointer_rtx;
b5b8b0ac
AO
9623 const char *label = ia64_emit_deleted_label_after_insn (insn);
9624
9625 if (!frame)
9626 return;
658f32fd
AO
9627
9628 dwarf2out_def_cfa
b5b8b0ac 9629 (label, REGNO (fp),
658f32fd
AO
9630 ia64_initial_elimination_offset
9631 (REGNO (arg_pointer_rtx), REGNO (fp))
9632 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9633}
9634
9635/* The generic dwarf2 frame debug info generator does not define a
9636 separate region for the very end of the epilogue, so refrain from
9637 doing so in the IA64-specific code as well. */
9638
9639#define IA64_CHANGE_CFA_IN_EPILOGUE 0
9640
ad0fc698
JW
9641/* The function emits unwind directives for the start of an epilogue. */
9642
9643static void
658f32fd 9644process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
ad0fc698
JW
9645{
9646 /* If this isn't the last block of the function, then we need to label the
9647 current state, and copy it back in at the start of the next block. */
9648
e0082a72 9649 if (!last_block)
ad0fc698 9650 {
658f32fd
AO
9651 if (unwind)
9652 fprintf (asm_out_file, "\t.label_state %d\n",
9653 ++cfun->machine->state_num);
e0082a72 9654 need_copy_state = true;
ad0fc698
JW
9655 }
9656
658f32fd
AO
9657 if (unwind)
9658 fprintf (asm_out_file, "\t.restore sp\n");
9659 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9660 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9661 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
ad0fc698 9662}
0c96007e 9663
0c96007e
AM
9664/* This function processes a SET pattern looking for specific patterns
9665 which result in emitting an assembly directive required for unwinding. */
97e242b0 9666
0c96007e 9667static int
658f32fd 9668process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
0c96007e
AM
9669{
9670 rtx src = SET_SRC (pat);
9671 rtx dest = SET_DEST (pat);
97e242b0 9672 int src_regno, dest_regno;
0c96007e 9673
97e242b0
RH
9674 /* Look for the ALLOC insn. */
9675 if (GET_CODE (src) == UNSPEC_VOLATILE
086c0f96 9676 && XINT (src, 1) == UNSPECV_ALLOC
97e242b0 9677 && GET_CODE (dest) == REG)
0c96007e 9678 {
97e242b0
RH
9679 dest_regno = REGNO (dest);
9680
a8f5224e
DM
9681 /* If this is the final destination for ar.pfs, then this must
9682 be the alloc in the prologue. */
6fb5fa3c 9683 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
658f32fd
AO
9684 {
9685 if (unwind)
9686 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9687 ia64_dbx_register_number (dest_regno));
9688 }
a8f5224e
DM
9689 else
9690 {
9691 /* This must be an alloc before a sibcall. We must drop the
9692 old frame info. The easiest way to drop the old frame
9693 info is to ensure we had a ".restore sp" directive
9694 followed by a new prologue. If the procedure doesn't
9695 have a memory-stack frame, we'll issue a dummy ".restore
9696 sp" now. */
b1eae416 9697 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
a8f5224e 9698 /* if haven't done process_epilogue() yet, do it now */
658f32fd
AO
9699 process_epilogue (asm_out_file, insn, unwind, frame);
9700 if (unwind)
9701 fprintf (asm_out_file, "\t.prologue\n");
a8f5224e 9702 }
0c96007e
AM
9703 return 1;
9704 }
9705
ed168e45 9706 /* Look for SP = .... */
0c96007e
AM
9707 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
9708 {
9709 if (GET_CODE (src) == PLUS)
9710 {
9711 rtx op0 = XEXP (src, 0);
9712 rtx op1 = XEXP (src, 1);
e820471b
NS
9713
9714 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9715
9716 if (INTVAL (op1) < 0)
658f32fd
AO
9717 {
9718 gcc_assert (!frame_pointer_needed);
9719 if (unwind)
9720 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9721 -INTVAL (op1));
b5b8b0ac 9722 ia64_dwarf2out_def_steady_cfa (insn, frame);
658f32fd 9723 }
0186257f 9724 else
658f32fd 9725 process_epilogue (asm_out_file, insn, unwind, frame);
0c96007e 9726 }
0186257f 9727 else
e820471b
NS
9728 {
9729 gcc_assert (GET_CODE (src) == REG
9730 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
658f32fd 9731 process_epilogue (asm_out_file, insn, unwind, frame);
e820471b 9732 }
0186257f
JW
9733
9734 return 1;
0c96007e 9735 }
0c96007e
AM
9736
9737 /* Register move we need to look at. */
9738 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9739 {
97e242b0
RH
9740 src_regno = REGNO (src);
9741 dest_regno = REGNO (dest);
9742
9743 switch (src_regno)
9744 {
9745 case BR_REG (0):
0c96007e 9746 /* Saving return address pointer. */
6fb5fa3c 9747 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
658f32fd
AO
9748 if (unwind)
9749 fprintf (asm_out_file, "\t.save rp, r%d\n",
9750 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9751 return 1;
9752
9753 case PR_REG (0):
6fb5fa3c 9754 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
658f32fd
AO
9755 if (unwind)
9756 fprintf (asm_out_file, "\t.save pr, r%d\n",
9757 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9758 return 1;
9759
9760 case AR_UNAT_REGNUM:
6fb5fa3c 9761 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
658f32fd
AO
9762 if (unwind)
9763 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9764 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9765 return 1;
9766
9767 case AR_LC_REGNUM:
6fb5fa3c 9768 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
658f32fd
AO
9769 if (unwind)
9770 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9771 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9772 return 1;
9773
9774 case STACK_POINTER_REGNUM:
e820471b
NS
9775 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9776 && frame_pointer_needed);
658f32fd
AO
9777 if (unwind)
9778 fprintf (asm_out_file, "\t.vframe r%d\n",
9779 ia64_dbx_register_number (dest_regno));
b5b8b0ac 9780 ia64_dwarf2out_def_steady_cfa (insn, frame);
97e242b0
RH
9781 return 1;
9782
9783 default:
9784 /* Everything else should indicate being stored to memory. */
e820471b 9785 gcc_unreachable ();
0c96007e
AM
9786 }
9787 }
97e242b0
RH
9788
9789 /* Memory store we need to look at. */
9790 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 9791 {
97e242b0
RH
9792 long off;
9793 rtx base;
9794 const char *saveop;
9795
9796 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 9797 {
97e242b0
RH
9798 base = XEXP (dest, 0);
9799 off = 0;
0c96007e 9800 }
e820471b 9801 else
0c96007e 9802 {
e820471b
NS
9803 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9804 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
97e242b0
RH
9805 base = XEXP (XEXP (dest, 0), 0);
9806 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 9807 }
0c96007e 9808
97e242b0
RH
9809 if (base == hard_frame_pointer_rtx)
9810 {
9811 saveop = ".savepsp";
9812 off = - off;
9813 }
97e242b0 9814 else
e820471b
NS
9815 {
9816 gcc_assert (base == stack_pointer_rtx);
9817 saveop = ".savesp";
9818 }
97e242b0
RH
9819
9820 src_regno = REGNO (src);
9821 switch (src_regno)
9822 {
9823 case BR_REG (0):
6fb5fa3c 9824 gcc_assert (!current_frame_info.r[reg_save_b0]);
658f32fd
AO
9825 if (unwind)
9826 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
97e242b0
RH
9827 return 1;
9828
9829 case PR_REG (0):
6fb5fa3c 9830 gcc_assert (!current_frame_info.r[reg_save_pr]);
658f32fd
AO
9831 if (unwind)
9832 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
97e242b0
RH
9833 return 1;
9834
9835 case AR_LC_REGNUM:
6fb5fa3c 9836 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
658f32fd
AO
9837 if (unwind)
9838 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
97e242b0
RH
9839 return 1;
9840
9841 case AR_PFS_REGNUM:
6fb5fa3c 9842 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
658f32fd
AO
9843 if (unwind)
9844 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
97e242b0
RH
9845 return 1;
9846
9847 case AR_UNAT_REGNUM:
6fb5fa3c 9848 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
658f32fd
AO
9849 if (unwind)
9850 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
97e242b0
RH
9851 return 1;
9852
9853 case GR_REG (4):
9854 case GR_REG (5):
9855 case GR_REG (6):
9856 case GR_REG (7):
658f32fd
AO
9857 if (unwind)
9858 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9859 1 << (src_regno - GR_REG (4)));
97e242b0
RH
9860 return 1;
9861
9862 case BR_REG (1):
9863 case BR_REG (2):
9864 case BR_REG (3):
9865 case BR_REG (4):
9866 case BR_REG (5):
658f32fd
AO
9867 if (unwind)
9868 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9869 1 << (src_regno - BR_REG (1)));
0c96007e 9870 return 1;
97e242b0
RH
9871
9872 case FR_REG (2):
9873 case FR_REG (3):
9874 case FR_REG (4):
9875 case FR_REG (5):
658f32fd
AO
9876 if (unwind)
9877 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9878 1 << (src_regno - FR_REG (2)));
97e242b0
RH
9879 return 1;
9880
9881 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9882 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9883 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9884 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
658f32fd
AO
9885 if (unwind)
9886 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9887 1 << (src_regno - FR_REG (12)));
97e242b0
RH
9888 return 1;
9889
9890 default:
9891 return 0;
0c96007e
AM
9892 }
9893 }
97e242b0 9894
0c96007e
AM
9895 return 0;
9896}
9897
9898
9899/* This function looks at a single insn and emits any directives
9900 required to unwind this insn. */
9901void
9c808aad 9902process_for_unwind_directive (FILE *asm_out_file, rtx insn)
0c96007e 9903{
658f32fd
AO
9904 bool unwind = (flag_unwind_tables
9905 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9906 bool frame = dwarf2out_do_frame ();
9907
9908 if (unwind || frame)
0c96007e 9909 {
97e242b0
RH
9910 rtx pat;
9911
740aeb38 9912 if (NOTE_INSN_BASIC_BLOCK_P (insn))
ad0fc698 9913 {
e0082a72 9914 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
ad0fc698
JW
9915
9916 /* Restore unwind state from immediately before the epilogue. */
9917 if (need_copy_state)
9918 {
658f32fd
AO
9919 if (unwind)
9920 {
9921 fprintf (asm_out_file, "\t.body\n");
9922 fprintf (asm_out_file, "\t.copy_state %d\n",
9923 cfun->machine->state_num);
9924 }
b5b8b0ac
AO
9925 if (IA64_CHANGE_CFA_IN_EPILOGUE)
9926 ia64_dwarf2out_def_steady_cfa (insn, frame);
e0082a72 9927 need_copy_state = false;
ad0fc698
JW
9928 }
9929 }
9930
5a63e069 9931 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
ad0fc698
JW
9932 return;
9933
97e242b0
RH
9934 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9935 if (pat)
9936 pat = XEXP (pat, 0);
9937 else
9938 pat = PATTERN (insn);
0c96007e
AM
9939
9940 switch (GET_CODE (pat))
9941 {
809d4ef1 9942 case SET:
658f32fd 9943 process_set (asm_out_file, pat, insn, unwind, frame);
809d4ef1
RH
9944 break;
9945
9946 case PARALLEL:
9947 {
9948 int par_index;
9949 int limit = XVECLEN (pat, 0);
9950 for (par_index = 0; par_index < limit; par_index++)
9951 {
9952 rtx x = XVECEXP (pat, 0, par_index);
9953 if (GET_CODE (x) == SET)
658f32fd 9954 process_set (asm_out_file, x, insn, unwind, frame);
809d4ef1
RH
9955 }
9956 break;
9957 }
9958
9959 default:
e820471b 9960 gcc_unreachable ();
0c96007e
AM
9961 }
9962 }
9963}
c65ebc55 9964
0551c32d 9965\f
af795c3c
RH
9966enum ia64_builtins
9967{
9968 IA64_BUILTIN_BSP,
c252db20
L
9969 IA64_BUILTIN_COPYSIGNQ,
9970 IA64_BUILTIN_FABSQ,
9971 IA64_BUILTIN_FLUSHRS,
fcb82ab0
UB
9972 IA64_BUILTIN_INFQ,
9973 IA64_BUILTIN_HUGE_VALQ
af795c3c
RH
9974};
9975
c65ebc55 9976void
9c808aad 9977ia64_init_builtins (void)
c65ebc55 9978{
9649812a 9979 tree fpreg_type;
bf9ab6b6 9980 tree float80_type;
9649812a
MM
9981
9982 /* The __fpreg type. */
9983 fpreg_type = make_node (REAL_TYPE);
4de67c26 9984 TYPE_PRECISION (fpreg_type) = 82;
9649812a
MM
9985 layout_type (fpreg_type);
9986 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9987
9988 /* The __float80 type. */
bf9ab6b6 9989 float80_type = make_node (REAL_TYPE);
968a7562 9990 TYPE_PRECISION (float80_type) = 80;
bf9ab6b6
MM
9991 layout_type (float80_type);
9992 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9649812a
MM
9993
9994 /* The __float128 type. */
02befdf4 9995 if (!TARGET_HPUX)
9649812a 9996 {
c252db20 9997 tree ftype, decl;
9649812a 9998 tree float128_type = make_node (REAL_TYPE);
c252db20 9999
9649812a
MM
10000 TYPE_PRECISION (float128_type) = 128;
10001 layout_type (float128_type);
10002 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
c252db20
L
10003
10004 /* TFmode support builtins. */
10005 ftype = build_function_type (float128_type, void_list_node);
10006 add_builtin_function ("__builtin_infq", ftype,
10007 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10008 NULL, NULL_TREE);
10009
fcb82ab0
UB
10010 add_builtin_function ("__builtin_huge_valq", ftype,
10011 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10012 NULL, NULL_TREE);
10013
c252db20
L
10014 ftype = build_function_type_list (float128_type,
10015 float128_type,
10016 NULL_TREE);
10017 decl = add_builtin_function ("__builtin_fabsq", ftype,
10018 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10019 "__fabstf2", NULL_TREE);
10020 TREE_READONLY (decl) = 1;
10021
10022 ftype = build_function_type_list (float128_type,
10023 float128_type,
10024 float128_type,
10025 NULL_TREE);
10026 decl = add_builtin_function ("__builtin_copysignq", ftype,
10027 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10028 "__copysigntf3", NULL_TREE);
10029 TREE_READONLY (decl) = 1;
9649812a
MM
10030 }
10031 else
02befdf4 10032 /* Under HPUX, this is a synonym for "long double". */
9649812a
MM
10033 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10034 "__float128");
10035
f2972bf8
DR
10036 /* Fwrite on VMS is non-standard. */
10037 if (TARGET_ABI_OPEN_VMS)
10038 {
10039 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
10040 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
10041 }
10042
6e34d3a3 10043#define def_builtin(name, type, code) \
c79efc4d
RÁE
10044 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10045 NULL, NULL_TREE)
0551c32d 10046
3b572406 10047 def_builtin ("__builtin_ia64_bsp",
b4de2f7d 10048 build_function_type (ptr_type_node, void_list_node),
3b572406 10049 IA64_BUILTIN_BSP);
ce152ef8 10050
9c808aad
AJ
10051 def_builtin ("__builtin_ia64_flushrs",
10052 build_function_type (void_type_node, void_list_node),
ce152ef8
AM
10053 IA64_BUILTIN_FLUSHRS);
10054
0551c32d 10055#undef def_builtin
7d522000
SE
10056
10057 if (TARGET_HPUX)
10058 {
10059 if (built_in_decls [BUILT_IN_FINITE])
10060 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
10061 "_Isfinite");
10062 if (built_in_decls [BUILT_IN_FINITEF])
10063 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
10064 "_Isfinitef");
10065 if (built_in_decls [BUILT_IN_FINITEL])
10066 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
10067 "_Isfinitef128");
10068 }
c65ebc55
JW
10069}
10070
c65ebc55 10071rtx
9c808aad
AJ
10072ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10073 enum machine_mode mode ATTRIBUTE_UNUSED,
10074 int ignore ATTRIBUTE_UNUSED)
c65ebc55 10075{
767fad4c 10076 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
97e242b0 10077 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
c65ebc55
JW
10078
10079 switch (fcode)
10080 {
ce152ef8 10081 case IA64_BUILTIN_BSP:
0551c32d
RH
10082 if (! target || ! register_operand (target, DImode))
10083 target = gen_reg_rtx (DImode);
10084 emit_insn (gen_bsp_value (target));
8419b675
RK
10085#ifdef POINTERS_EXTEND_UNSIGNED
10086 target = convert_memory_address (ptr_mode, target);
10087#endif
0551c32d 10088 return target;
ce152ef8
AM
10089
10090 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
10091 emit_insn (gen_flushrs ());
10092 return const0_rtx;
ce152ef8 10093
c252db20 10094 case IA64_BUILTIN_INFQ:
fcb82ab0 10095 case IA64_BUILTIN_HUGE_VALQ:
c252db20
L
10096 {
10097 REAL_VALUE_TYPE inf;
10098 rtx tmp;
10099
10100 real_inf (&inf);
10101 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
10102
10103 tmp = validize_mem (force_const_mem (mode, tmp));
10104
10105 if (target == 0)
10106 target = gen_reg_rtx (mode);
10107
10108 emit_move_insn (target, tmp);
10109 return target;
10110 }
10111
10112 case IA64_BUILTIN_FABSQ:
10113 case IA64_BUILTIN_COPYSIGNQ:
10114 return expand_call (exp, target, ignore);
10115
c65ebc55 10116 default:
c252db20 10117 gcc_unreachable ();
c65ebc55
JW
10118 }
10119
0551c32d 10120 return NULL_RTX;
c65ebc55 10121}
0d7839da
SE
10122
10123/* For the HP-UX IA64 aggregate parameters are passed stored in the
10124 most significant bits of the stack slot. */
10125
10126enum direction
586de218 10127ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
0d7839da 10128{
ed168e45 10129 /* Exception to normal case for structures/unions/etc. */
0d7839da
SE
10130
10131 if (type && AGGREGATE_TYPE_P (type)
10132 && int_size_in_bytes (type) < UNITS_PER_WORD)
10133 return upward;
10134
d3704c46
KH
10135 /* Fall back to the default. */
10136 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
0d7839da 10137}
686f3bf0 10138
c47c29c8
L
10139/* Emit text to declare externally defined variables and functions, because
10140 the Intel assembler does not support undefined externals. */
686f3bf0 10141
c47c29c8
L
10142void
10143ia64_asm_output_external (FILE *file, tree decl, const char *name)
686f3bf0 10144{
c47c29c8
L
10145 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10146 set in order to avoid putting out names that are never really
10147 used. */
10148 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
686f3bf0 10149 {
c47c29c8 10150 /* maybe_assemble_visibility will return 1 if the assembler
2e226e66 10151 visibility directive is output. */
c47c29c8
L
10152 int need_visibility = ((*targetm.binds_local_p) (decl)
10153 && maybe_assemble_visibility (decl));
57d4f65c 10154
f2972bf8
DR
10155#ifdef DO_CRTL_NAMES
10156 DO_CRTL_NAMES;
10157#endif
10158
c47c29c8
L
10159 /* GNU as does not need anything here, but the HP linker does
10160 need something for external functions. */
10161 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10162 && TREE_CODE (decl) == FUNCTION_DECL)
812b587e 10163 (*targetm.asm_out.globalize_decl_name) (file, decl);
c47c29c8
L
10164 else if (need_visibility && !TARGET_GNU_AS)
10165 (*targetm.asm_out.globalize_label) (file, name);
686f3bf0
SE
10166 }
10167}
10168
1f7aa7cd 10169/* Set SImode div/mod functions, init_integral_libfuncs only initializes
6bc709c1
L
10170 modes of word_mode and larger. Rename the TFmode libfuncs using the
10171 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10172 backward compatibility. */
1f7aa7cd
SE
10173
10174static void
10175ia64_init_libfuncs (void)
10176{
10177 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10178 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10179 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10180 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
6bc709c1
L
10181
10182 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10183 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10184 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10185 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10186 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10187
10188 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10189 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10190 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10191 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10192 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10193 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10194
10195 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10196 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
4a73d865 10197 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
6bc709c1
L
10198 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10199 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10200
10201 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10202 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
4a73d865 10203 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
2a3ebe77
JM
10204 /* HP-UX 11.23 libc does not have a function for unsigned
10205 SImode-to-TFmode conversion. */
10206 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
1f7aa7cd
SE
10207}
10208
c15c90bb 10209/* Rename all the TFmode libfuncs using the HPUX conventions. */
738e7b39 10210
c15c90bb
ZW
10211static void
10212ia64_hpux_init_libfuncs (void)
10213{
1f7aa7cd
SE
10214 ia64_init_libfuncs ();
10215
bdbba3c2
SE
10216 /* The HP SI millicode division and mod functions expect DI arguments.
10217 By turning them off completely we avoid using both libgcc and the
10218 non-standard millicode routines and use the HP DI millicode routines
10219 instead. */
10220
10221 set_optab_libfunc (sdiv_optab, SImode, 0);
10222 set_optab_libfunc (udiv_optab, SImode, 0);
10223 set_optab_libfunc (smod_optab, SImode, 0);
10224 set_optab_libfunc (umod_optab, SImode, 0);
10225
10226 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10227 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10228 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10229 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10230
10231 /* HP-UX libc has TF min/max/abs routines in it. */
c15c90bb
ZW
10232 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10233 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10234 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
c15c90bb 10235
24ea7948
ZW
10236 /* ia64_expand_compare uses this. */
10237 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10238
10239 /* These should never be used. */
10240 set_optab_libfunc (eq_optab, TFmode, 0);
10241 set_optab_libfunc (ne_optab, TFmode, 0);
10242 set_optab_libfunc (gt_optab, TFmode, 0);
10243 set_optab_libfunc (ge_optab, TFmode, 0);
10244 set_optab_libfunc (lt_optab, TFmode, 0);
10245 set_optab_libfunc (le_optab, TFmode, 0);
c15c90bb 10246}
738e7b39
RK
10247
10248/* Rename the division and modulus functions in VMS. */
10249
10250static void
10251ia64_vms_init_libfuncs (void)
10252{
10253 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10254 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10255 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10256 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10257 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10258 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10259 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10260 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
f2972bf8
DR
10261 abort_libfunc = init_one_libfunc ("decc$abort");
10262 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10263#ifdef MEM_LIBFUNCS_INIT
10264 MEM_LIBFUNCS_INIT;
10265#endif
738e7b39 10266}
6bc709c1
L
10267
10268/* Rename the TFmode libfuncs available from soft-fp in glibc using
10269 the HPUX conventions. */
10270
10271static void
10272ia64_sysv4_init_libfuncs (void)
10273{
10274 ia64_init_libfuncs ();
10275
10276 /* These functions are not part of the HPUX TFmode interface. We
10277 use them instead of _U_Qfcmp, which doesn't work the way we
10278 expect. */
10279 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10280 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10281 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10282 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10283 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10284 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10285
10286 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10287 glibc doesn't have them. */
10288}
c252db20
L
10289
10290/* Use soft-fp. */
10291
10292static void
10293ia64_soft_fp_init_libfuncs (void)
10294{
10295}
f2972bf8
DR
10296
10297static bool
10298ia64_vms_valid_pointer_mode (enum machine_mode mode)
10299{
10300 return (mode == SImode || mode == DImode);
10301}
ae46c4e0 10302\f
9b580a0b
RH
10303/* For HPUX, it is illegal to have relocations in shared segments. */
10304
10305static int
10306ia64_hpux_reloc_rw_mask (void)
10307{
10308 return 3;
10309}
10310
10311/* For others, relax this so that relocations to local data goes in
10312 read-only segments, but we still cannot allow global relocations
10313 in read-only segments. */
10314
10315static int
10316ia64_reloc_rw_mask (void)
10317{
10318 return flag_pic ? 3 : 2;
10319}
10320
d6b5193b
RS
10321/* Return the section to use for X. The only special thing we do here
10322 is to honor small data. */
b64a1b53 10323
d6b5193b 10324static section *
9c808aad
AJ
10325ia64_select_rtx_section (enum machine_mode mode, rtx x,
10326 unsigned HOST_WIDE_INT align)
b64a1b53
RH
10327{
10328 if (GET_MODE_SIZE (mode) > 0
1f4a2e84
SE
10329 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10330 && !TARGET_NO_SDATA)
d6b5193b 10331 return sdata_section;
b64a1b53 10332 else
d6b5193b 10333 return default_elf_select_rtx_section (mode, x, align);
b64a1b53
RH
10334}
10335
1e1bd14e 10336static unsigned int
abb8b19a
AM
10337ia64_section_type_flags (tree decl, const char *name, int reloc)
10338{
10339 unsigned int flags = 0;
10340
10341 if (strcmp (name, ".sdata") == 0
10342 || strncmp (name, ".sdata.", 7) == 0
10343 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10344 || strncmp (name, ".sdata2.", 8) == 0
10345 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10346 || strcmp (name, ".sbss") == 0
10347 || strncmp (name, ".sbss.", 6) == 0
10348 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10349 flags = SECTION_SMALL;
10350
30ed9d3d
TG
10351#if TARGET_ABI_OPEN_VMS
10352 if (decl && DECL_ATTRIBUTES (decl)
10353 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10354 flags |= SECTION_VMS_OVERLAY;
10355#endif
10356
9b580a0b 10357 flags |= default_section_type_flags (decl, name, reloc);
abb8b19a 10358 return flags;
1e1bd14e
RH
10359}
10360
57782ad8
MM
10361/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10362 structure type and that the address of that type should be passed
10363 in out0, rather than in r8. */
10364
10365static bool
10366ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10367{
10368 tree ret_type = TREE_TYPE (fntype);
10369
10370 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10371 as the structure return address parameter, if the return value
10372 type has a non-trivial copy constructor or destructor. It is not
10373 clear if this same convention should be used for other
10374 programming languages. Until G++ 3.4, we incorrectly used r8 for
10375 these return values. */
10376 return (abi_version_at_least (2)
10377 && ret_type
10378 && TYPE_MODE (ret_type) == BLKmode
10379 && TREE_ADDRESSABLE (ret_type)
10380 && strcmp (lang_hooks.name, "GNU C++") == 0);
10381}
1e1bd14e 10382
5f13cfc6
RH
10383/* Output the assembler code for a thunk function. THUNK_DECL is the
10384 declaration for the thunk function itself, FUNCTION is the decl for
10385 the target function. DELTA is an immediate constant offset to be
272d0bee 10386 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
10387 *(*this + vcall_offset) should be added to THIS. */
10388
c590b625 10389static void
9c808aad
AJ
10390ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10391 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10392 tree function)
483ab821 10393{
0a2aaacc 10394 rtx this_rtx, insn, funexp;
57782ad8
MM
10395 unsigned int this_parmno;
10396 unsigned int this_regno;
13f70342 10397 rtx delta_rtx;
5f13cfc6 10398
599aedd9 10399 reload_completed = 1;
fe3ad572 10400 epilogue_completed = 1;
599aedd9 10401
5f13cfc6
RH
10402 /* Set things up as ia64_expand_prologue might. */
10403 last_scratch_gr_reg = 15;
10404
10405 memset (&current_frame_info, 0, sizeof (current_frame_info));
10406 current_frame_info.spill_cfa_off = -16;
10407 current_frame_info.n_input_regs = 1;
10408 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10409
5f13cfc6 10410 /* Mark the end of the (empty) prologue. */
2e040219 10411 emit_note (NOTE_INSN_PROLOGUE_END);
5f13cfc6 10412
57782ad8
MM
10413 /* Figure out whether "this" will be the first parameter (the
10414 typical case) or the second parameter (as happens when the
10415 virtual function returns certain class objects). */
10416 this_parmno
10417 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10418 ? 1 : 0);
10419 this_regno = IN_REG (this_parmno);
10420 if (!TARGET_REG_NAMES)
10421 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10422
0a2aaacc 10423 this_rtx = gen_rtx_REG (Pmode, this_regno);
13f70342
RH
10424
10425 /* Apply the constant offset, if required. */
10426 delta_rtx = GEN_INT (delta);
36c216e5
MM
10427 if (TARGET_ILP32)
10428 {
57782ad8 10429 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
36c216e5 10430 REG_POINTER (tmp) = 1;
13f70342 10431 if (delta && satisfies_constraint_I (delta_rtx))
36c216e5 10432 {
0a2aaacc 10433 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
36c216e5
MM
10434 delta = 0;
10435 }
10436 else
0a2aaacc 10437 emit_insn (gen_ptr_extend (this_rtx, tmp));
36c216e5 10438 }
5f13cfc6
RH
10439 if (delta)
10440 {
13f70342 10441 if (!satisfies_constraint_I (delta_rtx))
5f13cfc6
RH
10442 {
10443 rtx tmp = gen_rtx_REG (Pmode, 2);
10444 emit_move_insn (tmp, delta_rtx);
10445 delta_rtx = tmp;
10446 }
0a2aaacc 10447 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
5f13cfc6
RH
10448 }
10449
10450 /* Apply the offset from the vtable, if required. */
10451 if (vcall_offset)
10452 {
10453 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10454 rtx tmp = gen_rtx_REG (Pmode, 2);
10455
36c216e5
MM
10456 if (TARGET_ILP32)
10457 {
10458 rtx t = gen_rtx_REG (ptr_mode, 2);
10459 REG_POINTER (t) = 1;
0a2aaacc 10460 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
13f70342 10461 if (satisfies_constraint_I (vcall_offset_rtx))
36c216e5 10462 {
13f70342 10463 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
36c216e5
MM
10464 vcall_offset = 0;
10465 }
10466 else
10467 emit_insn (gen_ptr_extend (tmp, t));
10468 }
10469 else
0a2aaacc 10470 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
5f13cfc6 10471
36c216e5 10472 if (vcall_offset)
5f13cfc6 10473 {
13f70342 10474 if (!satisfies_constraint_J (vcall_offset_rtx))
36c216e5
MM
10475 {
10476 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10477 emit_move_insn (tmp2, vcall_offset_rtx);
10478 vcall_offset_rtx = tmp2;
10479 }
10480 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
5f13cfc6 10481 }
5f13cfc6 10482
36c216e5 10483 if (TARGET_ILP32)
13f70342 10484 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
36c216e5
MM
10485 else
10486 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
5f13cfc6 10487
0a2aaacc 10488 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
5f13cfc6
RH
10489 }
10490
10491 /* Generate a tail call to the target function. */
10492 if (! TREE_USED (function))
10493 {
10494 assemble_external (function);
10495 TREE_USED (function) = 1;
10496 }
10497 funexp = XEXP (DECL_RTL (function), 0);
10498 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10499 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10500 insn = get_last_insn ();
10501 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
10502
10503 /* Code generation for calls relies on splitting. */
10504 reload_completed = 1;
fe3ad572 10505 epilogue_completed = 1;
599aedd9
RH
10506 try_split (PATTERN (insn), insn, 0);
10507
5f13cfc6
RH
10508 emit_barrier ();
10509
10510 /* Run just enough of rest_of_compilation to get the insns emitted.
10511 There's not really enough bulk here to make other passes such as
10512 instruction scheduling worth while. Note that use_thunk calls
10513 assemble_start_function and assemble_end_function. */
599aedd9 10514
55e092c4 10515 insn_locators_alloc ();
18dbd950 10516 emit_all_insn_group_barriers (NULL);
5f13cfc6 10517 insn = get_insns ();
5f13cfc6
RH
10518 shorten_branches (insn);
10519 final_start_function (insn, file, 1);
c9d691e9 10520 final (insn, file, 1);
5f13cfc6 10521 final_end_function ();
599aedd9
RH
10522
10523 reload_completed = 0;
fe3ad572 10524 epilogue_completed = 0;
483ab821
MM
10525}
10526
351a758b
KH
10527/* Worker function for TARGET_STRUCT_VALUE_RTX. */
10528
10529static rtx
57782ad8 10530ia64_struct_value_rtx (tree fntype,
351a758b
KH
10531 int incoming ATTRIBUTE_UNUSED)
10532{
f2972bf8
DR
10533 if (TARGET_ABI_OPEN_VMS ||
10534 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
57782ad8 10535 return NULL_RTX;
351a758b
KH
10536 return gen_rtx_REG (Pmode, GR_REG (8));
10537}
10538
88ed5ef5
SE
10539static bool
10540ia64_scalar_mode_supported_p (enum machine_mode mode)
10541{
10542 switch (mode)
10543 {
10544 case QImode:
10545 case HImode:
10546 case SImode:
10547 case DImode:
10548 case TImode:
10549 return true;
10550
10551 case SFmode:
10552 case DFmode:
10553 case XFmode:
4de67c26 10554 case RFmode:
88ed5ef5
SE
10555 return true;
10556
10557 case TFmode:
c252db20 10558 return true;
88ed5ef5
SE
10559
10560 default:
10561 return false;
10562 }
10563}
10564
f61134e8
RH
10565static bool
10566ia64_vector_mode_supported_p (enum machine_mode mode)
10567{
10568 switch (mode)
10569 {
10570 case V8QImode:
10571 case V4HImode:
10572 case V2SImode:
10573 return true;
10574
10575 case V2SFmode:
10576 return true;
10577
10578 default:
10579 return false;
10580 }
10581}
10582
694a2f6e
EB
10583/* Implement the FUNCTION_PROFILER macro. */
10584
2b4f149b
RH
10585void
10586ia64_output_function_profiler (FILE *file, int labelno)
10587{
694a2f6e
EB
10588 bool indirect_call;
10589
10590 /* If the function needs a static chain and the static chain
10591 register is r15, we use an indirect call so as to bypass
10592 the PLT stub in case the executable is dynamically linked,
10593 because the stub clobbers r15 as per 5.3.6 of the psABI.
10594 We don't need to do that in non canonical PIC mode. */
10595
10596 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10597 {
10598 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10599 indirect_call = true;
10600 }
10601 else
10602 indirect_call = false;
10603
2b4f149b
RH
10604 if (TARGET_GNU_AS)
10605 fputs ("\t.prologue 4, r40\n", file);
10606 else
10607 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10608 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
bd8633a3
RH
10609
10610 if (NO_PROFILE_COUNTERS)
694a2f6e 10611 fputs ("\tmov out3 = r0\n", file);
bd8633a3
RH
10612 else
10613 {
10614 char buf[20];
10615 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10616
10617 if (TARGET_AUTO_PIC)
10618 fputs ("\tmovl out3 = @gprel(", file);
10619 else
10620 fputs ("\taddl out3 = @ltoff(", file);
10621 assemble_name (file, buf);
10622 if (TARGET_AUTO_PIC)
694a2f6e 10623 fputs (")\n", file);
bd8633a3 10624 else
694a2f6e 10625 fputs ("), r1\n", file);
bd8633a3
RH
10626 }
10627
694a2f6e
EB
10628 if (indirect_call)
10629 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10630 fputs ("\t;;\n", file);
10631
2b4f149b 10632 fputs ("\t.save rp, r42\n", file);
bd8633a3 10633 fputs ("\tmov out2 = b0\n", file);
694a2f6e
EB
10634 if (indirect_call)
10635 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
2b4f149b 10636 fputs ("\t.body\n", file);
2b4f149b 10637 fputs ("\tmov out1 = r1\n", file);
694a2f6e
EB
10638 if (indirect_call)
10639 {
10640 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10641 fputs ("\tmov b6 = r16\n", file);
10642 fputs ("\tld8 r1 = [r14]\n", file);
10643 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10644 }
10645 else
10646 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
2b4f149b
RH
10647}
10648
d26afa4f
SE
10649static GTY(()) rtx mcount_func_rtx;
10650static rtx
10651gen_mcount_func_rtx (void)
10652{
10653 if (!mcount_func_rtx)
10654 mcount_func_rtx = init_one_libfunc ("_mcount");
10655 return mcount_func_rtx;
10656}
10657
10658void
10659ia64_profile_hook (int labelno)
10660{
10661 rtx label, ip;
10662
10663 if (NO_PROFILE_COUNTERS)
10664 label = const0_rtx;
10665 else
10666 {
10667 char buf[30];
10668 const char *label_name;
10669 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10670 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10671 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10672 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10673 }
10674 ip = gen_reg_rtx (Pmode);
10675 emit_insn (gen_ip_value (ip));
10676 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10677 VOIDmode, 3,
10678 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10679 ip, Pmode,
10680 label, Pmode);
10681}
10682
cac24f06
JM
10683/* Return the mangling of TYPE if it is an extended fundamental type. */
10684
10685static const char *
3101faab 10686ia64_mangle_type (const_tree type)
cac24f06 10687{
608063c3
JB
10688 type = TYPE_MAIN_VARIANT (type);
10689
10690 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10691 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10692 return NULL;
10693
cac24f06
JM
10694 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10695 mangled as "e". */
10696 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10697 return "g";
10698 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10699 an extended mangling. Elsewhere, "e" is available since long
10700 double is 80 bits. */
10701 if (TYPE_MODE (type) == XFmode)
10702 return TARGET_HPUX ? "u9__float80" : "e";
4de67c26
JM
10703 if (TYPE_MODE (type) == RFmode)
10704 return "u7__fpreg";
10705 return NULL;
10706}
10707
10708/* Return the diagnostic message string if conversion from FROMTYPE to
10709 TOTYPE is not allowed, NULL otherwise. */
10710static const char *
3101faab 10711ia64_invalid_conversion (const_tree fromtype, const_tree totype)
4de67c26
JM
10712{
10713 /* Reject nontrivial conversion to or from __fpreg. */
10714 if (TYPE_MODE (fromtype) == RFmode
10715 && TYPE_MODE (totype) != RFmode
10716 && TYPE_MODE (totype) != VOIDmode)
10717 return N_("invalid conversion from %<__fpreg%>");
10718 if (TYPE_MODE (totype) == RFmode
10719 && TYPE_MODE (fromtype) != RFmode)
10720 return N_("invalid conversion to %<__fpreg%>");
10721 return NULL;
10722}
10723
10724/* Return the diagnostic message string if the unary operation OP is
10725 not permitted on TYPE, NULL otherwise. */
10726static const char *
3101faab 10727ia64_invalid_unary_op (int op, const_tree type)
4de67c26
JM
10728{
10729 /* Reject operations on __fpreg other than unary + or &. */
10730 if (TYPE_MODE (type) == RFmode
10731 && op != CONVERT_EXPR
10732 && op != ADDR_EXPR)
10733 return N_("invalid operation on %<__fpreg%>");
10734 return NULL;
10735}
10736
10737/* Return the diagnostic message string if the binary operation OP is
10738 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10739static const char *
3101faab 10740ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
4de67c26
JM
10741{
10742 /* Reject operations on __fpreg. */
10743 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10744 return N_("invalid operation on %<__fpreg%>");
cac24f06
JM
10745 return NULL;
10746}
10747
bb83aa4b
MK
10748/* Implement overriding of the optimization options. */
10749void
10750ia64_optimization_options (int level ATTRIBUTE_UNUSED,
10751 int size ATTRIBUTE_UNUSED)
10752{
10753 /* Let the scheduler form additional regions. */
10754 set_param_value ("max-sched-extend-regions-iters", 2);
47eb5b32
ZD
10755
10756 /* Set the default values for cache-related parameters. */
10757 set_param_value ("simultaneous-prefetches", 6);
10758 set_param_value ("l1-cache-line-size", 32);
10759
388092d5 10760 set_param_value("sched-mem-true-dep-cost", 4);
bb83aa4b
MK
10761}
10762
812b587e
SE
10763/* HP-UX version_id attribute.
10764 For object foo, if the version_id is set to 1234 put out an alias
10765 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10766 other than an alias statement because it is an illegal symbol name. */
10767
10768static tree
10769ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10770 tree name ATTRIBUTE_UNUSED,
10771 tree args,
10772 int flags ATTRIBUTE_UNUSED,
10773 bool *no_add_attrs)
10774{
10775 tree arg = TREE_VALUE (args);
10776
10777 if (TREE_CODE (arg) != STRING_CST)
10778 {
10779 error("version attribute is not a string");
10780 *no_add_attrs = true;
10781 return NULL_TREE;
10782 }
10783 return NULL_TREE;
10784}
10785
a31fa2e0
SE
10786/* Target hook for c_mode_for_suffix. */
10787
10788static enum machine_mode
10789ia64_c_mode_for_suffix (char suffix)
10790{
10791 if (suffix == 'q')
10792 return TFmode;
10793 if (suffix == 'w')
10794 return XFmode;
10795
10796 return VOIDmode;
10797}
10798
f2972bf8
DR
10799static enum machine_mode
10800ia64_promote_function_mode (const_tree type,
10801 enum machine_mode mode,
10802 int *punsignedp,
c3313412
SE
10803 const_tree funtype,
10804 int for_return)
f2972bf8
DR
10805{
10806 /* Special processing required for OpenVMS ... */
10807
10808 if (!TARGET_ABI_OPEN_VMS)
c3313412
SE
10809 return default_promote_function_mode(type, mode, punsignedp, funtype,
10810 for_return);
f2972bf8
DR
10811
10812 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10813 HP OpenVMS I64 Version 8.2EFT,
10814 chapter 4 "OpenVMS I64 Conventions"
10815 section 4.7 "Procedure Linkage"
10816 subsection 4.7.5.2, "Normal Register Parameters"
10817
10818 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10819 values passed in registers are zero-filled; signed integral values as
10820 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10821 For all other types passed in the general registers, unused bits are
10822 undefined." */
10823
10824 if (!AGGREGATE_TYPE_P (type)
10825 && GET_MODE_CLASS (mode) == MODE_INT
10826 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10827 {
10828 if (mode == SImode)
10829 *punsignedp = 0;
10830 return DImode;
10831 }
10832 else
10833 return promote_mode (type, mode, punsignedp);
10834}
10835
f3a83111
SE
10836static GTY(()) rtx ia64_dconst_0_5_rtx;
10837
10838rtx
10839ia64_dconst_0_5 (void)
10840{
10841 if (! ia64_dconst_0_5_rtx)
10842 {
10843 REAL_VALUE_TYPE rv;
10844 real_from_string (&rv, "0.5");
10845 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10846 }
10847 return ia64_dconst_0_5_rtx;
10848}
10849
10850static GTY(()) rtx ia64_dconst_0_375_rtx;
10851
10852rtx
10853ia64_dconst_0_375 (void)
10854{
10855 if (! ia64_dconst_0_375_rtx)
10856 {
10857 REAL_VALUE_TYPE rv;
10858 real_from_string (&rv, "0.375");
10859 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
10860 }
10861 return ia64_dconst_0_375_rtx;
10862}
10863
10864
e2500fed 10865#include "gt-ia64.h"