]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/ia64.c
Make-lang.in, [...]: Update copyright years.
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
66647d44 2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
c75c517d 3 2009, 2010
7b5cbb57 4 Free Software Foundation, Inc.
c65ebc55 5 Contributed by James E. Wilson <wilson@cygnus.com> and
9c808aad 6 David Mosberger <davidm@hpl.hp.com>.
c65ebc55 7
3bed2930 8This file is part of GCC.
c65ebc55 9
3bed2930 10GCC is free software; you can redistribute it and/or modify
c65ebc55 11it under the terms of the GNU General Public License as published by
2f83c7d6 12the Free Software Foundation; either version 3, or (at your option)
c65ebc55
JW
13any later version.
14
3bed2930 15GCC is distributed in the hope that it will be useful,
c65ebc55
JW
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
2f83c7d6
NC
21along with GCC; see the file COPYING3. If not see
22<http://www.gnu.org/licenses/>. */
c65ebc55 23
c65ebc55 24#include "config.h"
ed9ccd8a 25#include "system.h"
4977bab6
ZW
26#include "coretypes.h"
27#include "tm.h"
c65ebc55
JW
28#include "rtl.h"
29#include "tree.h"
c65ebc55
JW
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
c65ebc55
JW
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
e78d8e51 40#include "optabs.h"
c65ebc55
JW
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
f2972bf8 45#include "libfuncs.h"
809d4ef1 46#include "toplev.h"
2130b7fb 47#include "sched-int.h"
eced69b5 48#include "timevar.h"
672a6f42
NB
49#include "target.h"
50#include "target-def.h"
98d2b17e 51#include "tm_p.h"
30028c85 52#include "hashtab.h"
08744705 53#include "langhooks.h"
117dca74 54#include "cfglayout.h"
726a989a 55#include "gimple.h"
4de67c26 56#include "intl.h"
6fb5fa3c 57#include "df.h"
658f32fd 58#include "debug.h"
bb83aa4b 59#include "params.h"
6fb5fa3c 60#include "dbgcnt.h"
13f70342 61#include "tm-constrs.h"
388092d5 62#include "sel-sched.h"
c65ebc55
JW
63
64/* This is used for communication between ASM_OUTPUT_LABEL and
65 ASM_OUTPUT_LABELREF. */
66int ia64_asm_output_label = 0;
67
c65ebc55 68/* Register names for ia64_expand_prologue. */
3b572406 69static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
70{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79 "r104","r105","r106","r107","r108","r109","r110","r111",
80 "r112","r113","r114","r115","r116","r117","r118","r119",
81 "r120","r121","r122","r123","r124","r125","r126","r127"};
82
83/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 84static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
85{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
86
87/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 88static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
89{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
99
100/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 101static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
102{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
103
30028c85 104/* Which cpu are we scheduling for. */
dbdd120f 105enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
30028c85 106
68340ae9
BS
107/* Determines whether we run our final scheduling pass or not. We always
108 avoid the normal second scheduling pass. */
109static int ia64_flag_schedule_insns2;
110
014a1138
JZ
111/* Determines whether we run variable tracking in machine dependent
112 reorganization. */
113static int ia64_flag_var_tracking;
114
c65ebc55
JW
115/* Variables which are this size or smaller are put in the sdata/sbss
116 sections. */
117
3b572406 118unsigned int ia64_section_threshold;
30028c85
VM
119
120/* The following variable is used by the DFA insn scheduler. The value is
121 TRUE if we do insn bundling instead of insn scheduling. */
122int bundling_p = 0;
123
6fb5fa3c
DB
124enum ia64_frame_regs
125{
126 reg_fp,
127 reg_save_b0,
128 reg_save_pr,
129 reg_save_ar_pfs,
130 reg_save_ar_unat,
131 reg_save_ar_lc,
132 reg_save_gp,
133 number_of_ia64_frame_regs
134};
135
599aedd9
RH
136/* Structure to be filled in by ia64_compute_frame_size with register
137 save masks and offsets for the current function. */
138
139struct ia64_frame_info
140{
141 HOST_WIDE_INT total_size; /* size of the stack frame, not including
142 the caller's scratch area. */
143 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
144 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
145 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
146 HARD_REG_SET mask; /* mask of saved registers. */
9c808aad 147 unsigned int gr_used_mask; /* mask of registers in use as gr spill
599aedd9
RH
148 registers or long-term scratches. */
149 int n_spilled; /* number of spilled registers. */
6fb5fa3c 150 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
599aedd9
RH
151 int n_input_regs; /* number of input registers used. */
152 int n_local_regs; /* number of local registers used. */
153 int n_output_regs; /* number of output registers used. */
154 int n_rotate_regs; /* number of rotating registers used. */
155
156 char need_regstk; /* true if a .regstk directive needed. */
157 char initialized; /* true if the data is finalized. */
158};
159
160/* Current frame information calculated by ia64_compute_frame_size. */
161static struct ia64_frame_info current_frame_info;
6fb5fa3c
DB
162/* The actual registers that are emitted. */
163static int emitted_frame_related_regs[number_of_ia64_frame_regs];
3b572406 164\f
9c808aad
AJ
165static int ia64_first_cycle_multipass_dfa_lookahead (void);
166static void ia64_dependencies_evaluation_hook (rtx, rtx);
167static void ia64_init_dfa_pre_cycle_insn (void);
168static rtx ia64_dfa_pre_cycle_insn (void);
169static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
3101faab 170static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
9c808aad 171static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
048d0d36 172static void ia64_h_i_d_extended (void);
388092d5
AB
173static void * ia64_alloc_sched_context (void);
174static void ia64_init_sched_context (void *, bool);
175static void ia64_set_sched_context (void *);
176static void ia64_clear_sched_context (void *);
177static void ia64_free_sched_context (void *);
048d0d36
MK
178static int ia64_mode_to_int (enum machine_mode);
179static void ia64_set_sched_flags (spec_info_t);
388092d5
AB
180static ds_t ia64_get_insn_spec_ds (rtx);
181static ds_t ia64_get_insn_checked_ds (rtx);
182static bool ia64_skip_rtx_p (const_rtx);
048d0d36 183static int ia64_speculate_insn (rtx, ds_t, rtx *);
388092d5
AB
184static bool ia64_needs_block_p (int);
185static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
048d0d36
MK
186static int ia64_spec_check_p (rtx);
187static int ia64_spec_check_src_p (rtx);
9c808aad
AJ
188static rtx gen_tls_get_addr (void);
189static rtx gen_thread_pointer (void);
6fb5fa3c 190static int find_gr_spill (enum ia64_frame_regs, int);
9c808aad
AJ
191static int next_scratch_gr_reg (void);
192static void mark_reg_gr_used_mask (rtx, void *);
193static void ia64_compute_frame_size (HOST_WIDE_INT);
194static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
195static void finish_spill_pointers (void);
196static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
197static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
198static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
199static rtx gen_movdi_x (rtx, rtx, rtx);
200static rtx gen_fr_spill_x (rtx, rtx, rtx);
201static rtx gen_fr_restore_x (rtx, rtx, rtx);
202
7b5cbb57 203static bool ia64_can_eliminate (const int, const int);
586de218 204static enum machine_mode hfa_element_mode (const_tree, bool);
351a758b
KH
205static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
206 tree, int *, int);
78a52f11
RH
207static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
208 tree, bool);
9c808aad 209static bool ia64_function_ok_for_sibcall (tree, tree);
586de218 210static bool ia64_return_in_memory (const_tree, const_tree);
f40751dd 211static bool ia64_rtx_costs (rtx, int, int, int *, bool);
215b063c 212static int ia64_unspec_may_trap_p (const_rtx, unsigned);
9c808aad 213static void fix_range (const char *);
dbdd120f 214static bool ia64_handle_option (size_t, const char *, int);
9c808aad
AJ
215static struct machine_function * ia64_init_machine_status (void);
216static void emit_insn_group_barriers (FILE *);
217static void emit_all_insn_group_barriers (FILE *);
218static void final_emit_insn_group_barriers (FILE *);
219static void emit_predicate_relation_info (void);
220static void ia64_reorg (void);
3101faab 221static bool ia64_in_small_data_p (const_tree);
658f32fd
AO
222static void process_epilogue (FILE *, rtx, bool, bool);
223static int process_set (FILE *, rtx, rtx, bool, bool);
9c808aad 224
9c808aad
AJ
225static bool ia64_assemble_integer (rtx, unsigned int, int);
226static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
227static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
228static void ia64_output_function_end_prologue (FILE *);
229
230static int ia64_issue_rate (void);
388092d5 231static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
9c808aad 232static void ia64_sched_init (FILE *, int, int);
048d0d36
MK
233static void ia64_sched_init_global (FILE *, int, int);
234static void ia64_sched_finish_global (FILE *, int);
9c808aad
AJ
235static void ia64_sched_finish (FILE *, int);
236static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
237static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
238static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
239static int ia64_variable_issue (FILE *, int, rtx, int);
240
241static struct bundle_state *get_free_bundle_state (void);
242static void free_bundle_state (struct bundle_state *);
243static void initiate_bundle_states (void);
244static void finish_bundle_states (void);
245static unsigned bundle_state_hash (const void *);
246static int bundle_state_eq_p (const void *, const void *);
247static int insert_bundle_state (struct bundle_state *);
248static void initiate_bundle_state_table (void);
249static void finish_bundle_state_table (void);
250static int try_issue_nops (struct bundle_state *, int);
251static int try_issue_insn (struct bundle_state *, rtx);
252static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
253static int get_max_pos (state_t);
254static int get_template (state_t, int);
255
256static rtx get_next_important_insn (rtx, rtx);
388092d5 257static bool important_for_bundling_p (rtx);
9c808aad
AJ
258static void bundling (FILE *, int, rtx, rtx);
259
260static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
261 HOST_WIDE_INT, tree);
262static void ia64_file_start (void);
812b587e 263static void ia64_globalize_decl_name (FILE *, tree);
9c808aad 264
9b580a0b
RH
265static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
266static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
d6b5193b
RS
267static section *ia64_select_rtx_section (enum machine_mode, rtx,
268 unsigned HOST_WIDE_INT);
fdbe66f2
EB
269static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
270 ATTRIBUTE_UNUSED;
abb8b19a 271static unsigned int ia64_section_type_flags (tree, const char *, int);
1f7aa7cd
SE
272static void ia64_init_libfuncs (void)
273 ATTRIBUTE_UNUSED;
c15c90bb
ZW
274static void ia64_hpux_init_libfuncs (void)
275 ATTRIBUTE_UNUSED;
6bc709c1
L
276static void ia64_sysv4_init_libfuncs (void)
277 ATTRIBUTE_UNUSED;
738e7b39
RK
278static void ia64_vms_init_libfuncs (void)
279 ATTRIBUTE_UNUSED;
c252db20
L
280static void ia64_soft_fp_init_libfuncs (void)
281 ATTRIBUTE_UNUSED;
f2972bf8
DR
282static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
283 ATTRIBUTE_UNUSED;
30ed9d3d
TG
284static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
285 ATTRIBUTE_UNUSED;
a5fe455b 286
a32767e4 287static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
812b587e 288static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
a32767e4 289static void ia64_encode_section_info (tree, rtx, int);
351a758b 290static rtx ia64_struct_value_rtx (tree, int);
726a989a 291static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
88ed5ef5 292static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
f61134e8 293static bool ia64_vector_mode_supported_p (enum machine_mode mode);
5e6c8b64 294static bool ia64_cannot_force_const_mem (rtx);
3101faab
KG
295static const char *ia64_mangle_type (const_tree);
296static const char *ia64_invalid_conversion (const_tree, const_tree);
297static const char *ia64_invalid_unary_op (int, const_tree);
298static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
a31fa2e0 299static enum machine_mode ia64_c_mode_for_suffix (char);
f2972bf8
DR
300static enum machine_mode ia64_promote_function_mode (const_tree,
301 enum machine_mode,
302 int *,
303 const_tree,
304 int);
2a1211e5 305static void ia64_trampoline_init (rtx, tree, rtx);
2b7e2984 306static void ia64_override_options_after_change (void);
672a6f42 307\f
e6542f4e
RH
308/* Table of valid machine attributes. */
309static const struct attribute_spec ia64_attribute_table[] =
310{
311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
312 { "syscall_linkage", 0, 0, false, true, true, NULL },
a32767e4 313 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
30ed9d3d
TG
314#if TARGET_ABI_OPEN_VMS
315 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute},
316#endif
812b587e
SE
317 { "version_id", 1, 1, true, false, false,
318 ia64_handle_version_id_attribute },
a32767e4 319 { NULL, 0, 0, false, false, false, NULL }
e6542f4e
RH
320};
321
672a6f42 322/* Initialize the GCC target structure. */
91d231cb
JM
323#undef TARGET_ATTRIBUTE_TABLE
324#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 325
f6155fda
SS
326#undef TARGET_INIT_BUILTINS
327#define TARGET_INIT_BUILTINS ia64_init_builtins
328
329#undef TARGET_EXPAND_BUILTIN
330#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
331
301d03af
RS
332#undef TARGET_ASM_BYTE_OP
333#define TARGET_ASM_BYTE_OP "\tdata1\t"
334#undef TARGET_ASM_ALIGNED_HI_OP
335#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
336#undef TARGET_ASM_ALIGNED_SI_OP
337#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
338#undef TARGET_ASM_ALIGNED_DI_OP
339#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
340#undef TARGET_ASM_UNALIGNED_HI_OP
341#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
342#undef TARGET_ASM_UNALIGNED_SI_OP
343#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
344#undef TARGET_ASM_UNALIGNED_DI_OP
345#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
346#undef TARGET_ASM_INTEGER
347#define TARGET_ASM_INTEGER ia64_assemble_integer
348
08c148a8
NB
349#undef TARGET_ASM_FUNCTION_PROLOGUE
350#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
351#undef TARGET_ASM_FUNCTION_END_PROLOGUE
352#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
353#undef TARGET_ASM_FUNCTION_EPILOGUE
354#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
355
ae46c4e0
RH
356#undef TARGET_IN_SMALL_DATA_P
357#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
358
388092d5
AB
359#undef TARGET_SCHED_ADJUST_COST_2
360#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
c237e94a
ZW
361#undef TARGET_SCHED_ISSUE_RATE
362#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
363#undef TARGET_SCHED_VARIABLE_ISSUE
364#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
365#undef TARGET_SCHED_INIT
366#define TARGET_SCHED_INIT ia64_sched_init
367#undef TARGET_SCHED_FINISH
368#define TARGET_SCHED_FINISH ia64_sched_finish
048d0d36
MK
369#undef TARGET_SCHED_INIT_GLOBAL
370#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
371#undef TARGET_SCHED_FINISH_GLOBAL
372#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
c237e94a
ZW
373#undef TARGET_SCHED_REORDER
374#define TARGET_SCHED_REORDER ia64_sched_reorder
375#undef TARGET_SCHED_REORDER2
376#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 377
30028c85
VM
378#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
379#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
380
30028c85
VM
381#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
382#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
383
384#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
385#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
386#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
387#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
388
389#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
390#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
391 ia64_first_cycle_multipass_dfa_lookahead_guard
392
393#undef TARGET_SCHED_DFA_NEW_CYCLE
394#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
395
048d0d36
MK
396#undef TARGET_SCHED_H_I_D_EXTENDED
397#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
398
388092d5
AB
399#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
400#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
401
402#undef TARGET_SCHED_INIT_SCHED_CONTEXT
403#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
404
405#undef TARGET_SCHED_SET_SCHED_CONTEXT
406#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
407
408#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
409#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
410
411#undef TARGET_SCHED_FREE_SCHED_CONTEXT
412#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
413
048d0d36
MK
414#undef TARGET_SCHED_SET_SCHED_FLAGS
415#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
416
388092d5
AB
417#undef TARGET_SCHED_GET_INSN_SPEC_DS
418#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
419
420#undef TARGET_SCHED_GET_INSN_CHECKED_DS
421#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
422
048d0d36
MK
423#undef TARGET_SCHED_SPECULATE_INSN
424#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
425
426#undef TARGET_SCHED_NEEDS_BLOCK_P
427#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
428
e855c69d 429#undef TARGET_SCHED_GEN_SPEC_CHECK
388092d5 430#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
048d0d36
MK
431
432#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
433#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
434 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
435
388092d5
AB
436#undef TARGET_SCHED_SKIP_RTX_P
437#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
438
599aedd9
RH
439#undef TARGET_FUNCTION_OK_FOR_SIBCALL
440#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
78a52f11
RH
441#undef TARGET_ARG_PARTIAL_BYTES
442#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
599aedd9 443
c590b625
RH
444#undef TARGET_ASM_OUTPUT_MI_THUNK
445#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 446#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
3101faab 447#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
c590b625 448
1bc7c5b6
ZW
449#undef TARGET_ASM_FILE_START
450#define TARGET_ASM_FILE_START ia64_file_start
451
812b587e
SE
452#undef TARGET_ASM_GLOBALIZE_DECL_NAME
453#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
454
3c50106f
RH
455#undef TARGET_RTX_COSTS
456#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67 457#undef TARGET_ADDRESS_COST
8a88c276 458#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
3c50106f 459
215b063c
PB
460#undef TARGET_UNSPEC_MAY_TRAP_P
461#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
462
18dbd950
RS
463#undef TARGET_MACHINE_DEPENDENT_REORG
464#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
465
a32767e4
DM
466#undef TARGET_ENCODE_SECTION_INFO
467#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
468
abb8b19a
AM
469#undef TARGET_SECTION_TYPE_FLAGS
470#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
471
fdbe66f2
EB
472#ifdef HAVE_AS_TLS
473#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
474#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
475#endif
476
cde0f3fd 477#undef TARGET_PROMOTE_FUNCTION_MODE
f2972bf8 478#define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
351a758b
KH
479
480/* ??? Investigate. */
481#if 0
482#undef TARGET_PROMOTE_PROTOTYPES
483#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
484#endif
485
486#undef TARGET_STRUCT_VALUE_RTX
487#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
488#undef TARGET_RETURN_IN_MEMORY
489#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
351a758b
KH
490#undef TARGET_SETUP_INCOMING_VARARGS
491#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
492#undef TARGET_STRICT_ARGUMENT_NAMING
493#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
fe984136
RH
494#undef TARGET_MUST_PASS_IN_STACK
495#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
351a758b 496
cd3ce9b4
JM
497#undef TARGET_GIMPLIFY_VA_ARG_EXPR
498#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
499
951120ea
PB
500#undef TARGET_UNWIND_EMIT
501#define TARGET_UNWIND_EMIT process_for_unwind_directive
502
88ed5ef5
SE
503#undef TARGET_SCALAR_MODE_SUPPORTED_P
504#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
f61134e8
RH
505#undef TARGET_VECTOR_MODE_SUPPORTED_P
506#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
88ed5ef5 507
445cf5eb
JM
508/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
509 in an order different from the specified program order. */
510#undef TARGET_RELAXED_ORDERING
511#define TARGET_RELAXED_ORDERING true
512
dbdd120f
RH
513#undef TARGET_DEFAULT_TARGET_FLAGS
514#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
515#undef TARGET_HANDLE_OPTION
516#define TARGET_HANDLE_OPTION ia64_handle_option
517
5e6c8b64
RH
518#undef TARGET_CANNOT_FORCE_CONST_MEM
519#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
520
608063c3
JB
521#undef TARGET_MANGLE_TYPE
522#define TARGET_MANGLE_TYPE ia64_mangle_type
cac24f06 523
4de67c26
JM
524#undef TARGET_INVALID_CONVERSION
525#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
526#undef TARGET_INVALID_UNARY_OP
527#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
528#undef TARGET_INVALID_BINARY_OP
529#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
530
a31fa2e0
SE
531#undef TARGET_C_MODE_FOR_SUFFIX
532#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
533
7b5cbb57
AS
534#undef TARGET_CAN_ELIMINATE
535#define TARGET_CAN_ELIMINATE ia64_can_eliminate
536
2a1211e5
RH
537#undef TARGET_TRAMPOLINE_INIT
538#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
539
2b7e2984
SE
540#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
541#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
542
f6897b10 543struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 544\f
a32767e4
DM
545typedef enum
546 {
547 ADDR_AREA_NORMAL, /* normal address area */
548 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
549 }
550ia64_addr_area;
551
552static GTY(()) tree small_ident1;
553static GTY(()) tree small_ident2;
554
555static void
556init_idents (void)
557{
558 if (small_ident1 == 0)
559 {
560 small_ident1 = get_identifier ("small");
561 small_ident2 = get_identifier ("__small__");
562 }
563}
564
565/* Retrieve the address area that has been chosen for the given decl. */
566
567static ia64_addr_area
568ia64_get_addr_area (tree decl)
569{
570 tree model_attr;
571
572 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
573 if (model_attr)
574 {
575 tree id;
576
577 init_idents ();
578 id = TREE_VALUE (TREE_VALUE (model_attr));
579 if (id == small_ident1 || id == small_ident2)
580 return ADDR_AREA_SMALL;
581 }
582 return ADDR_AREA_NORMAL;
583}
584
585static tree
f61134e8
RH
586ia64_handle_model_attribute (tree *node, tree name, tree args,
587 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
a32767e4
DM
588{
589 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
590 ia64_addr_area area;
591 tree arg, decl = *node;
592
593 init_idents ();
594 arg = TREE_VALUE (args);
595 if (arg == small_ident1 || arg == small_ident2)
596 {
597 addr_area = ADDR_AREA_SMALL;
598 }
599 else
600 {
29d08eba
JM
601 warning (OPT_Wattributes, "invalid argument of %qE attribute",
602 name);
a32767e4
DM
603 *no_add_attrs = true;
604 }
605
606 switch (TREE_CODE (decl))
607 {
608 case VAR_DECL:
609 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
610 == FUNCTION_DECL)
611 && !TREE_STATIC (decl))
612 {
c5d75364
MLI
613 error_at (DECL_SOURCE_LOCATION (decl),
614 "an address area attribute cannot be specified for "
615 "local variables");
a32767e4
DM
616 *no_add_attrs = true;
617 }
618 area = ia64_get_addr_area (decl);
619 if (area != ADDR_AREA_NORMAL && addr_area != area)
620 {
dee15844
JM
621 error ("address area of %q+D conflicts with previous "
622 "declaration", decl);
a32767e4
DM
623 *no_add_attrs = true;
624 }
625 break;
626
627 case FUNCTION_DECL:
c5d75364 628 error_at (DECL_SOURCE_LOCATION (decl),
d575725b
L
629 "address area attribute cannot be specified for "
630 "functions");
a32767e4
DM
631 *no_add_attrs = true;
632 break;
633
634 default:
29d08eba
JM
635 warning (OPT_Wattributes, "%qE attribute ignored",
636 name);
a32767e4
DM
637 *no_add_attrs = true;
638 break;
639 }
640
641 return NULL_TREE;
642}
643
30ed9d3d
TG
644/* The section must have global and overlaid attributes. */
645#define SECTION_VMS_OVERLAY SECTION_MACH_DEP
646
647/* Part of the low level implementation of DEC Ada pragma Common_Object which
648 enables the shared use of variables stored in overlaid linker areas
649 corresponding to the use of Fortran COMMON. */
650
651static tree
652ia64_vms_common_object_attribute (tree *node, tree name, tree args,
653 int flags ATTRIBUTE_UNUSED,
654 bool *no_add_attrs)
655{
656 tree decl = *node;
657 tree id, val;
658 if (! DECL_P (decl))
659 abort ();
660
661 DECL_COMMON (decl) = 1;
662 id = TREE_VALUE (args);
663 if (TREE_CODE (id) == IDENTIFIER_NODE)
664 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
665 else if (TREE_CODE (id) == STRING_CST)
666 val = id;
667 else
668 {
669 warning (OPT_Wattributes,
670 "%qE attribute requires a string constant argument", name);
671 *no_add_attrs = true;
672 return NULL_TREE;
673 }
674 DECL_SECTION_NAME (decl) = val;
675 return NULL_TREE;
676}
677
678/* Part of the low level implementation of DEC Ada pragma Common_Object. */
679
680void
681ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
682 unsigned HOST_WIDE_INT size,
683 unsigned int align)
684{
685 tree attr = DECL_ATTRIBUTES (decl);
686
687 /* As common_object attribute set DECL_SECTION_NAME check it before
688 looking up the attribute. */
689 if (DECL_SECTION_NAME (decl) && attr)
690 attr = lookup_attribute ("common_object", attr);
691 else
692 attr = NULL_TREE;
693
694 if (!attr)
695 {
696 /* Code from elfos.h. */
697 fprintf (file, "%s", COMMON_ASM_OP);
698 assemble_name (file, name);
699 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
700 size, align / BITS_PER_UNIT);
701 }
702 else
703 {
704 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
705 ASM_OUTPUT_LABEL (file, name);
706 ASM_OUTPUT_SKIP (file, size ? size : 1);
707 }
708}
709
710/* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
711
712void
713ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
714 tree decl)
715{
716 if (!(flags & SECTION_VMS_OVERLAY))
717 {
718 default_elf_asm_named_section (name, flags, decl);
719 return;
720 }
721 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
722 abort ();
723
724 if (flags & SECTION_DECLARED)
725 {
726 fprintf (asm_out_file, "\t.section\t%s\n", name);
727 return;
728 }
729
730 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
731}
732
a32767e4
DM
733static void
734ia64_encode_addr_area (tree decl, rtx symbol)
735{
736 int flags;
737
738 flags = SYMBOL_REF_FLAGS (symbol);
739 switch (ia64_get_addr_area (decl))
740 {
741 case ADDR_AREA_NORMAL: break;
742 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
e820471b 743 default: gcc_unreachable ();
a32767e4
DM
744 }
745 SYMBOL_REF_FLAGS (symbol) = flags;
746}
747
748static void
749ia64_encode_section_info (tree decl, rtx rtl, int first)
750{
751 default_encode_section_info (decl, rtl, first);
752
2897f1d4 753 /* Careful not to prod global register variables. */
a32767e4 754 if (TREE_CODE (decl) == VAR_DECL
2897f1d4
L
755 && GET_CODE (DECL_RTL (decl)) == MEM
756 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
a32767e4
DM
757 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
758 ia64_encode_addr_area (decl, XEXP (rtl, 0));
759}
760\f
557b9df5
RH
761/* Return 1 if the operands of a move are ok. */
762
763int
9c808aad 764ia64_move_ok (rtx dst, rtx src)
557b9df5
RH
765{
766 /* If we're under init_recog_no_volatile, we'll not be able to use
767 memory_operand. So check the code directly and don't worry about
768 the validity of the underlying address, which should have been
769 checked elsewhere anyway. */
770 if (GET_CODE (dst) != MEM)
771 return 1;
772 if (GET_CODE (src) == MEM)
773 return 0;
774 if (register_operand (src, VOIDmode))
775 return 1;
776
777 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
778 if (INTEGRAL_MODE_P (GET_MODE (dst)))
779 return src == const0_rtx;
780 else
13f70342 781 return satisfies_constraint_G (src);
557b9df5 782}
9b7bf67d 783
a71aef0b
JB
784/* Return 1 if the operands are ok for a floating point load pair. */
785
786int
787ia64_load_pair_ok (rtx dst, rtx src)
788{
789 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
790 return 0;
791 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
792 return 0;
793 switch (GET_CODE (XEXP (src, 0)))
794 {
795 case REG:
796 case POST_INC:
797 break;
798 case POST_DEC:
799 return 0;
800 case POST_MODIFY:
801 {
802 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
803
804 if (GET_CODE (adjust) != CONST_INT
805 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
806 return 0;
807 }
808 break;
809 default:
810 abort ();
811 }
812 return 1;
813}
814
08744705 815int
9c808aad 816addp4_optimize_ok (rtx op1, rtx op2)
08744705 817{
08744705
SE
818 return (basereg_operand (op1, GET_MODE(op1)) !=
819 basereg_operand (op2, GET_MODE(op2)));
820}
821
9e4f94de 822/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
823 Return the length of the field, or <= 0 on failure. */
824
825int
9c808aad 826ia64_depz_field_mask (rtx rop, rtx rshift)
041f25e6
RH
827{
828 unsigned HOST_WIDE_INT op = INTVAL (rop);
829 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
830
831 /* Get rid of the zero bits we're shifting in. */
832 op >>= shift;
833
834 /* We must now have a solid block of 1's at bit 0. */
835 return exact_log2 (op + 1);
836}
837
5e6c8b64
RH
838/* Return the TLS model to use for ADDR. */
839
840static enum tls_model
841tls_symbolic_operand_type (rtx addr)
842{
81f40b79 843 enum tls_model tls_kind = TLS_MODEL_NONE;
5e6c8b64
RH
844
845 if (GET_CODE (addr) == CONST)
846 {
847 if (GET_CODE (XEXP (addr, 0)) == PLUS
848 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
849 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
850 }
851 else if (GET_CODE (addr) == SYMBOL_REF)
852 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
853
854 return tls_kind;
855}
856
857/* Return true if X is a constant that is valid for some immediate
858 field in an instruction. */
859
860bool
861ia64_legitimate_constant_p (rtx x)
862{
863 switch (GET_CODE (x))
864 {
865 case CONST_INT:
866 case LABEL_REF:
867 return true;
868
869 case CONST_DOUBLE:
735b94a7
SE
870 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
871 || GET_MODE (x) == DFmode)
5e6c8b64 872 return true;
13f70342 873 return satisfies_constraint_G (x);
5e6c8b64
RH
874
875 case CONST:
876 case SYMBOL_REF:
d0970db2
JW
877 /* ??? Short term workaround for PR 28490. We must make the code here
878 match the code in ia64_expand_move and move_operand, even though they
879 are both technically wrong. */
880 if (tls_symbolic_operand_type (x) == 0)
881 {
882 HOST_WIDE_INT addend = 0;
883 rtx op = x;
884
885 if (GET_CODE (op) == CONST
886 && GET_CODE (XEXP (op, 0)) == PLUS
887 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
888 {
889 addend = INTVAL (XEXP (XEXP (op, 0), 1));
890 op = XEXP (XEXP (op, 0), 0);
891 }
892
7ab62966
SE
893 if (any_offset_symbol_operand (op, GET_MODE (op))
894 || function_operand (op, GET_MODE (op)))
895 return true;
d0970db2
JW
896 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
897 return (addend & 0x3fff) == 0;
898 return false;
899 }
900 return false;
5e6c8b64 901
b4e3537b
RH
902 case CONST_VECTOR:
903 {
904 enum machine_mode mode = GET_MODE (x);
905
906 if (mode == V2SFmode)
13f70342 907 return satisfies_constraint_Y (x);
b4e3537b
RH
908
909 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
910 && GET_MODE_SIZE (mode) <= 8);
911 }
912
5e6c8b64
RH
913 default:
914 return false;
915 }
916}
917
918/* Don't allow TLS addresses to get spilled to memory. */
919
920static bool
921ia64_cannot_force_const_mem (rtx x)
922{
103a6411
SE
923 if (GET_MODE (x) == RFmode)
924 return true;
5e6c8b64
RH
925 return tls_symbolic_operand_type (x) != 0;
926}
927
9b7bf67d 928/* Expand a symbolic constant load. */
9b7bf67d 929
5e6c8b64 930bool
9c808aad 931ia64_expand_load_address (rtx dest, rtx src)
9b7bf67d 932{
e820471b 933 gcc_assert (GET_CODE (dest) == REG);
7b6e506e 934
ae49d6e5
RH
935 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
936 having to pointer-extend the value afterward. Other forms of address
937 computation below are also more natural to compute as 64-bit quantities.
938 If we've been given an SImode destination register, change it. */
939 if (GET_MODE (dest) != Pmode)
38ae7651
RS
940 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
941 byte_lowpart_offset (Pmode, GET_MODE (dest)));
ae49d6e5 942
5e6c8b64
RH
943 if (TARGET_NO_PIC)
944 return false;
945 if (small_addr_symbolic_operand (src, VOIDmode))
946 return false;
947
948 if (TARGET_AUTO_PIC)
949 emit_insn (gen_load_gprel64 (dest, src));
1cdbd630 950 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
5e6c8b64 951 emit_insn (gen_load_fptr (dest, src));
21515593 952 else if (sdata_symbolic_operand (src, VOIDmode))
5e6c8b64
RH
953 emit_insn (gen_load_gprel (dest, src));
954 else
21515593 955 {
5e6c8b64
RH
956 HOST_WIDE_INT addend = 0;
957 rtx tmp;
21515593 958
5e6c8b64
RH
959 /* We did split constant offsets in ia64_expand_move, and we did try
960 to keep them split in move_operand, but we also allowed reload to
961 rematerialize arbitrary constants rather than spill the value to
962 the stack and reload it. So we have to be prepared here to split
963 them apart again. */
964 if (GET_CODE (src) == CONST)
965 {
966 HOST_WIDE_INT hi, lo;
9b7bf67d 967
5e6c8b64
RH
968 hi = INTVAL (XEXP (XEXP (src, 0), 1));
969 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
970 hi = hi - lo;
9b7bf67d 971
5e6c8b64
RH
972 if (lo != 0)
973 {
974 addend = lo;
975 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
976 }
977 }
ae49d6e5
RH
978
979 tmp = gen_rtx_HIGH (Pmode, src);
980 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
981 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
982
5e6c8b64 983 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
ae49d6e5 984 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
5e6c8b64
RH
985
986 if (addend)
987 {
988 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
989 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
990 }
ae49d6e5 991 }
5e6c8b64
RH
992
993 return true;
9b7bf67d 994}
97e242b0 995
e2500fed 996static GTY(()) rtx gen_tls_tga;
7b6e506e 997static rtx
9c808aad 998gen_tls_get_addr (void)
7b6e506e 999{
e2500fed 1000 if (!gen_tls_tga)
21515593 1001 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 1002 return gen_tls_tga;
7b6e506e
RH
1003}
1004
e2500fed 1005static GTY(()) rtx thread_pointer_rtx;
7b6e506e 1006static rtx
9c808aad 1007gen_thread_pointer (void)
7b6e506e 1008{
e2500fed 1009 if (!thread_pointer_rtx)
389fdba0 1010 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
135ca7b2 1011 return thread_pointer_rtx;
7b6e506e
RH
1012}
1013
21515593 1014static rtx
5e6c8b64 1015ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
b15b83fb 1016 rtx orig_op1, HOST_WIDE_INT addend)
21515593
RH
1017{
1018 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
b15b83fb 1019 rtx orig_op0 = op0;
5e6c8b64
RH
1020 HOST_WIDE_INT addend_lo, addend_hi;
1021
21515593
RH
1022 switch (tls_kind)
1023 {
1024 case TLS_MODEL_GLOBAL_DYNAMIC:
1025 start_sequence ();
1026
1027 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1028 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1029
1030 tga_op2 = gen_reg_rtx (Pmode);
5e6c8b64 1031 emit_insn (gen_load_dtprel (tga_op2, op1));
9c808aad 1032
21515593
RH
1033 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1034 LCT_CONST, Pmode, 2, tga_op1,
1035 Pmode, tga_op2, Pmode);
1036
1037 insns = get_insns ();
1038 end_sequence ();
1039
0d433a6a
RH
1040 if (GET_MODE (op0) != Pmode)
1041 op0 = tga_ret;
21515593 1042 emit_libcall_block (insns, op0, tga_ret, op1);
0d433a6a 1043 break;
21515593
RH
1044
1045 case TLS_MODEL_LOCAL_DYNAMIC:
1046 /* ??? This isn't the completely proper way to do local-dynamic
1047 If the call to __tls_get_addr is used only by a single symbol,
1048 then we should (somehow) move the dtprel to the second arg
1049 to avoid the extra add. */
1050 start_sequence ();
1051
1052 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1053 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1054
1055 tga_op2 = const0_rtx;
1056
1057 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1058 LCT_CONST, Pmode, 2, tga_op1,
1059 Pmode, tga_op2, Pmode);
1060
1061 insns = get_insns ();
1062 end_sequence ();
1063
1064 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1065 UNSPEC_LD_BASE);
1066 tmp = gen_reg_rtx (Pmode);
1067 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1068
0d433a6a
RH
1069 if (!register_operand (op0, Pmode))
1070 op0 = gen_reg_rtx (Pmode);
21515593
RH
1071 if (TARGET_TLS64)
1072 {
0d433a6a
RH
1073 emit_insn (gen_load_dtprel (op0, op1));
1074 emit_insn (gen_adddi3 (op0, tmp, op0));
21515593
RH
1075 }
1076 else
5e6c8b64 1077 emit_insn (gen_add_dtprel (op0, op1, tmp));
0d433a6a 1078 break;
21515593
RH
1079
1080 case TLS_MODEL_INITIAL_EXEC:
b15b83fb
JJ
1081 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1082 addend_hi = addend - addend_lo;
1083
5e6c8b64
RH
1084 op1 = plus_constant (op1, addend_hi);
1085 addend = addend_lo;
1086
21515593 1087 tmp = gen_reg_rtx (Pmode);
5e6c8b64 1088 emit_insn (gen_load_tprel (tmp, op1));
21515593 1089
0d433a6a
RH
1090 if (!register_operand (op0, Pmode))
1091 op0 = gen_reg_rtx (Pmode);
1092 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1093 break;
21515593
RH
1094
1095 case TLS_MODEL_LOCAL_EXEC:
0d433a6a
RH
1096 if (!register_operand (op0, Pmode))
1097 op0 = gen_reg_rtx (Pmode);
5e6c8b64
RH
1098
1099 op1 = orig_op1;
1100 addend = 0;
21515593
RH
1101 if (TARGET_TLS64)
1102 {
0d433a6a 1103 emit_insn (gen_load_tprel (op0, op1));
5e6c8b64 1104 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
21515593
RH
1105 }
1106 else
5e6c8b64 1107 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
0d433a6a 1108 break;
21515593
RH
1109
1110 default:
e820471b 1111 gcc_unreachable ();
21515593 1112 }
0d433a6a 1113
5e6c8b64
RH
1114 if (addend)
1115 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1116 orig_op0, 1, OPTAB_DIRECT);
0d433a6a
RH
1117 if (orig_op0 == op0)
1118 return NULL_RTX;
1119 if (GET_MODE (orig_op0) == Pmode)
1120 return op0;
1121 return gen_lowpart (GET_MODE (orig_op0), op0);
21515593
RH
1122}
1123
7b6e506e 1124rtx
9c808aad 1125ia64_expand_move (rtx op0, rtx op1)
7b6e506e
RH
1126{
1127 enum machine_mode mode = GET_MODE (op0);
1128
1129 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1130 op1 = force_reg (mode, op1);
1131
21515593 1132 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e 1133 {
5e6c8b64 1134 HOST_WIDE_INT addend = 0;
7b6e506e 1135 enum tls_model tls_kind;
5e6c8b64
RH
1136 rtx sym = op1;
1137
1138 if (GET_CODE (op1) == CONST
1139 && GET_CODE (XEXP (op1, 0)) == PLUS
1140 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1141 {
1142 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1143 sym = XEXP (XEXP (op1, 0), 0);
1144 }
1145
1146 tls_kind = tls_symbolic_operand_type (sym);
1147 if (tls_kind)
b15b83fb 1148 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
5e6c8b64
RH
1149
1150 if (any_offset_symbol_operand (sym, mode))
1151 addend = 0;
1152 else if (aligned_offset_symbol_operand (sym, mode))
1153 {
1154 HOST_WIDE_INT addend_lo, addend_hi;
1155
1156 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1157 addend_hi = addend - addend_lo;
1158
1159 if (addend_lo != 0)
1160 {
1161 op1 = plus_constant (sym, addend_hi);
1162 addend = addend_lo;
1163 }
21e43850
L
1164 else
1165 addend = 0;
5e6c8b64
RH
1166 }
1167 else
1168 op1 = sym;
1169
1170 if (reload_completed)
1171 {
1172 /* We really should have taken care of this offset earlier. */
1173 gcc_assert (addend == 0);
1174 if (ia64_expand_load_address (op0, op1))
1175 return NULL_RTX;
1176 }
21515593 1177
5e6c8b64 1178 if (addend)
7b6e506e 1179 {
b3a13419 1180 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
5e6c8b64
RH
1181
1182 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1183
1184 op1 = expand_simple_binop (mode, PLUS, subtarget,
1185 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1186 if (op0 == op1)
1187 return NULL_RTX;
7b6e506e
RH
1188 }
1189 }
1190
1191 return op1;
1192}
1193
21515593
RH
1194/* Split a move from OP1 to OP0 conditional on COND. */
1195
1196void
9c808aad 1197ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
21515593
RH
1198{
1199 rtx insn, first = get_last_insn ();
1200
1201 emit_move_insn (op0, op1);
1202
1203 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1204 if (INSN_P (insn))
1205 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1206 PATTERN (insn));
1207}
1208
f57fc998 1209/* Split a post-reload TImode or TFmode reference into two DImode
2ffe0e02
ZW
1210 components. This is made extra difficult by the fact that we do
1211 not get any scratch registers to work with, because reload cannot
1212 be prevented from giving us a scratch that overlaps the register
1213 pair involved. So instead, when addressing memory, we tweak the
1214 pointer register up and back down with POST_INCs. Or up and not
1215 back down when we can get away with it.
1216
1217 REVERSED is true when the loads must be done in reversed order
1218 (high word first) for correctness. DEAD is true when the pointer
1219 dies with the second insn we generate and therefore the second
1220 address must not carry a postmodify.
1221
1222 May return an insn which is to be emitted after the moves. */
3f622353 1223
f57fc998 1224static rtx
2ffe0e02 1225ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
3f622353 1226{
2ffe0e02
ZW
1227 rtx fixup = 0;
1228
3f622353
RH
1229 switch (GET_CODE (in))
1230 {
1231 case REG:
2ffe0e02
ZW
1232 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1233 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1234 break;
3f622353
RH
1235
1236 case CONST_INT:
1237 case CONST_DOUBLE:
2ffe0e02 1238 /* Cannot occur reversed. */
e820471b 1239 gcc_assert (!reversed);
2ffe0e02 1240
f57fc998
ZW
1241 if (GET_MODE (in) != TFmode)
1242 split_double (in, &out[0], &out[1]);
1243 else
1244 /* split_double does not understand how to split a TFmode
1245 quantity into a pair of DImode constants. */
1246 {
1247 REAL_VALUE_TYPE r;
1248 unsigned HOST_WIDE_INT p[2];
1249 long l[4]; /* TFmode is 128 bits */
1250
1251 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1252 real_to_target (l, &r, TFmode);
1253
1254 if (FLOAT_WORDS_BIG_ENDIAN)
1255 {
1256 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1257 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1258 }
1259 else
1260 {
9eb578c8
L
1261 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1262 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
f57fc998
ZW
1263 }
1264 out[0] = GEN_INT (p[0]);
1265 out[1] = GEN_INT (p[1]);
1266 }
2ffe0e02
ZW
1267 break;
1268
1269 case MEM:
1270 {
1271 rtx base = XEXP (in, 0);
1272 rtx offset;
1273
1274 switch (GET_CODE (base))
1275 {
1276 case REG:
1277 if (!reversed)
1278 {
1279 out[0] = adjust_automodify_address
1280 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1281 out[1] = adjust_automodify_address
1282 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1283 }
1284 else
1285 {
1286 /* Reversal requires a pre-increment, which can only
1287 be done as a separate insn. */
1288 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1289 out[0] = adjust_automodify_address
1290 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1291 out[1] = adjust_address (in, DImode, 0);
1292 }
1293 break;
1294
1295 case POST_INC:
e820471b
NS
1296 gcc_assert (!reversed && !dead);
1297
2ffe0e02
ZW
1298 /* Just do the increment in two steps. */
1299 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1300 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1301 break;
1302
1303 case POST_DEC:
e820471b
NS
1304 gcc_assert (!reversed && !dead);
1305
2ffe0e02
ZW
1306 /* Add 8, subtract 24. */
1307 base = XEXP (base, 0);
1308 out[0] = adjust_automodify_address
1309 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1310 out[1] = adjust_automodify_address
1311 (in, DImode,
1312 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1313 8);
1314 break;
1315
1316 case POST_MODIFY:
e820471b
NS
1317 gcc_assert (!reversed && !dead);
1318
2ffe0e02
ZW
1319 /* Extract and adjust the modification. This case is
1320 trickier than the others, because we might have an
1321 index register, or we might have a combined offset that
1322 doesn't fit a signed 9-bit displacement field. We can
1323 assume the incoming expression is already legitimate. */
1324 offset = XEXP (base, 1);
1325 base = XEXP (base, 0);
1326
1327 out[0] = adjust_automodify_address
1328 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1329
1330 if (GET_CODE (XEXP (offset, 1)) == REG)
1331 {
1332 /* Can't adjust the postmodify to match. Emit the
1333 original, then a separate addition insn. */
1334 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1335 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1336 }
2ffe0e02
ZW
1337 else
1338 {
e820471b
NS
1339 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1340 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1341 {
1342 /* Again the postmodify cannot be made to match,
1343 but in this case it's more efficient to get rid
1344 of the postmodify entirely and fix up with an
1345 add insn. */
1346 out[1] = adjust_automodify_address (in, DImode, base, 8);
1347 fixup = gen_adddi3
1348 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1349 }
1350 else
1351 {
1352 /* Combined offset still fits in the displacement field.
1353 (We cannot overflow it at the high end.) */
1354 out[1] = adjust_automodify_address
1355 (in, DImode, gen_rtx_POST_MODIFY
1356 (Pmode, base, gen_rtx_PLUS
1357 (Pmode, base,
1358 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1359 8);
1360 }
2ffe0e02
ZW
1361 }
1362 break;
1363
1364 default:
e820471b 1365 gcc_unreachable ();
2ffe0e02
ZW
1366 }
1367 break;
1368 }
3f622353
RH
1369
1370 default:
e820471b 1371 gcc_unreachable ();
3f622353 1372 }
2ffe0e02
ZW
1373
1374 return fixup;
3f622353
RH
1375}
1376
f57fc998
ZW
1377/* Split a TImode or TFmode move instruction after reload.
1378 This is used by *movtf_internal and *movti_internal. */
1379void
1380ia64_split_tmode_move (rtx operands[])
1381{
2ffe0e02
ZW
1382 rtx in[2], out[2], insn;
1383 rtx fixup[2];
1384 bool dead = false;
1385 bool reversed = false;
1386
1387 /* It is possible for reload to decide to overwrite a pointer with
1388 the value it points to. In that case we have to do the loads in
1389 the appropriate order so that the pointer is not destroyed too
1390 early. Also we must not generate a postmodify for that second
e820471b 1391 load, or rws_access_regno will die. */
2ffe0e02
ZW
1392 if (GET_CODE (operands[1]) == MEM
1393 && reg_overlap_mentioned_p (operands[0], operands[1]))
f57fc998 1394 {
2ffe0e02
ZW
1395 rtx base = XEXP (operands[1], 0);
1396 while (GET_CODE (base) != REG)
1397 base = XEXP (base, 0);
f57fc998 1398
2ffe0e02
ZW
1399 if (REGNO (base) == REGNO (operands[0]))
1400 reversed = true;
1401 dead = true;
1402 }
1403 /* Another reason to do the moves in reversed order is if the first
1404 element of the target register pair is also the second element of
1405 the source register pair. */
1406 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1407 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1408 reversed = true;
1409
1410 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1411 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1412
1413#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1414 if (GET_CODE (EXP) == MEM \
1415 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1416 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1417 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
bbbbb16a 1418 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
2ffe0e02
ZW
1419
1420 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1421 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1422 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1423
1424 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1425 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1426 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1427
1428 if (fixup[0])
1429 emit_insn (fixup[0]);
1430 if (fixup[1])
1431 emit_insn (fixup[1]);
1432
1433#undef MAYBE_ADD_REG_INC_NOTE
f57fc998
ZW
1434}
1435
02befdf4 1436/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
3f622353
RH
1437 through memory plus an extra GR scratch register. Except that you can
1438 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1439 SECONDARY_RELOAD_CLASS, but not both.
1440
1441 We got into problems in the first place by allowing a construct like
02befdf4 1442 (subreg:XF (reg:TI)), which we got from a union containing a long double.
f5143c46 1443 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1444 we see something like the above, we spill the inner register to memory. */
1445
4de67c26
JM
1446static rtx
1447spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
3f622353
RH
1448{
1449 if (GET_CODE (in) == SUBREG
1450 && GET_MODE (SUBREG_REG (in)) == TImode
1451 && GET_CODE (SUBREG_REG (in)) == REG)
1452 {
68d22aa5
RH
1453 rtx memt = assign_stack_temp (TImode, 16, 0);
1454 emit_move_insn (memt, SUBREG_REG (in));
4de67c26 1455 return adjust_address (memt, mode, 0);
3f622353
RH
1456 }
1457 else if (force && GET_CODE (in) == REG)
1458 {
4de67c26 1459 rtx memx = assign_stack_temp (mode, 16, 0);
68d22aa5
RH
1460 emit_move_insn (memx, in);
1461 return memx;
3f622353 1462 }
3f622353
RH
1463 else
1464 return in;
1465}
f2f90c63 1466
4de67c26
JM
1467/* Expand the movxf or movrf pattern (MODE says which) with the given
1468 OPERANDS, returning true if the pattern should then invoke
1469 DONE. */
1470
1471bool
1472ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1473{
1474 rtx op0 = operands[0];
1475
1476 if (GET_CODE (op0) == SUBREG)
1477 op0 = SUBREG_REG (op0);
1478
1479 /* We must support XFmode loads into general registers for stdarg/vararg,
1480 unprototyped calls, and a rare case where a long double is passed as
1481 an argument after a float HFA fills the FP registers. We split them into
1482 DImode loads for convenience. We also need to support XFmode stores
1483 for the last case. This case does not happen for stdarg/vararg routines,
1484 because we do a block store to memory of unnamed arguments. */
1485
1486 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1487 {
1488 rtx out[2];
1489
1490 /* We're hoping to transform everything that deals with XFmode
1491 quantities and GR registers early in the compiler. */
b3a13419 1492 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1493
1494 /* Struct to register can just use TImode instead. */
1495 if ((GET_CODE (operands[1]) == SUBREG
1496 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1497 || (GET_CODE (operands[1]) == REG
1498 && GR_REGNO_P (REGNO (operands[1]))))
1499 {
1500 rtx op1 = operands[1];
1501
1502 if (GET_CODE (op1) == SUBREG)
1503 op1 = SUBREG_REG (op1);
1504 else
1505 op1 = gen_rtx_REG (TImode, REGNO (op1));
1506
1507 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1508 return true;
1509 }
1510
1511 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1512 {
ae4d3291 1513 /* Don't word-swap when reading in the constant. */
4de67c26 1514 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
ae4d3291
JW
1515 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1516 0, mode));
4de67c26 1517 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
ae4d3291
JW
1518 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1519 0, mode));
4de67c26
JM
1520 return true;
1521 }
1522
1523 /* If the quantity is in a register not known to be GR, spill it. */
1524 if (register_operand (operands[1], mode))
1525 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1526
1527 gcc_assert (GET_CODE (operands[1]) == MEM);
1528
ae4d3291
JW
1529 /* Don't word-swap when reading in the value. */
1530 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1531 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
4de67c26
JM
1532
1533 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1534 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1535 return true;
1536 }
1537
1538 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1539 {
1540 /* We're hoping to transform everything that deals with XFmode
1541 quantities and GR registers early in the compiler. */
b3a13419 1542 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1543
1544 /* Op0 can't be a GR_REG here, as that case is handled above.
1545 If op0 is a register, then we spill op1, so that we now have a
1546 MEM operand. This requires creating an XFmode subreg of a TImode reg
1547 to force the spill. */
1548 if (register_operand (operands[0], mode))
1549 {
1550 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1551 op1 = gen_rtx_SUBREG (mode, op1, 0);
1552 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1553 }
1554
1555 else
1556 {
1557 rtx in[2];
1558
ae4d3291
JW
1559 gcc_assert (GET_CODE (operands[0]) == MEM);
1560
1561 /* Don't word-swap when writing out the value. */
1562 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1563 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
4de67c26
JM
1564
1565 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1566 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1567 return true;
1568 }
1569 }
1570
1571 if (!reload_in_progress && !reload_completed)
1572 {
1573 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1574
1575 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1576 {
1577 rtx memt, memx, in = operands[1];
1578 if (CONSTANT_P (in))
1579 in = validize_mem (force_const_mem (mode, in));
1580 if (GET_CODE (in) == MEM)
1581 memt = adjust_address (in, TImode, 0);
1582 else
1583 {
1584 memt = assign_stack_temp (TImode, 16, 0);
1585 memx = adjust_address (memt, mode, 0);
1586 emit_move_insn (memx, in);
1587 }
1588 emit_move_insn (op0, memt);
1589 return true;
1590 }
1591
1592 if (!ia64_move_ok (operands[0], operands[1]))
1593 operands[1] = force_reg (mode, operands[1]);
1594 }
1595
1596 return false;
1597}
1598
f90b7a5a
PB
1599/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1600 with the expression that holds the compare result (in VOIDmode). */
f2f90c63 1601
24ea7948
ZW
1602static GTY(()) rtx cmptf_libfunc;
1603
f90b7a5a
PB
1604void
1605ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
f2f90c63 1606{
f90b7a5a 1607 enum rtx_code code = GET_CODE (*expr);
f2f90c63
RH
1608 rtx cmp;
1609
1610 /* If we have a BImode input, then we already have a compare result, and
1611 do not need to emit another comparison. */
f90b7a5a 1612 if (GET_MODE (*op0) == BImode)
f2f90c63 1613 {
f90b7a5a
PB
1614 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1615 cmp = *op0;
f2f90c63 1616 }
24ea7948
ZW
1617 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1618 magic number as its third argument, that indicates what to do.
1619 The return value is an integer to be compared against zero. */
f90b7a5a 1620 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
24ea7948
ZW
1621 {
1622 enum qfcmp_magic {
1623 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1624 QCMP_UNORD = 2,
1625 QCMP_EQ = 4,
1626 QCMP_LT = 8,
1627 QCMP_GT = 16
32e8bb8e
ILT
1628 };
1629 int magic;
24ea7948
ZW
1630 enum rtx_code ncode;
1631 rtx ret, insns;
e820471b 1632
f90b7a5a 1633 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
24ea7948
ZW
1634 switch (code)
1635 {
1636 /* 1 = equal, 0 = not equal. Equality operators do
1637 not raise FP_INVALID when given an SNaN operand. */
1638 case EQ: magic = QCMP_EQ; ncode = NE; break;
1639 case NE: magic = QCMP_EQ; ncode = EQ; break;
1640 /* isunordered() from C99. */
1641 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
b1346fa3 1642 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
24ea7948
ZW
1643 /* Relational operators raise FP_INVALID when given
1644 an SNaN operand. */
1645 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1646 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1647 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1648 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1649 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1650 Expanders for buneq etc. weuld have to be added to ia64.md
1651 for this to be useful. */
e820471b 1652 default: gcc_unreachable ();
24ea7948
ZW
1653 }
1654
1655 start_sequence ();
1656
1657 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
f90b7a5a 1658 *op0, TFmode, *op1, TFmode,
24ea7948
ZW
1659 GEN_INT (magic), DImode);
1660 cmp = gen_reg_rtx (BImode);
1661 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1662 gen_rtx_fmt_ee (ncode, BImode,
1663 ret, const0_rtx)));
1664
1665 insns = get_insns ();
1666 end_sequence ();
1667
1668 emit_libcall_block (insns, cmp, cmp,
f90b7a5a 1669 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
24ea7948
ZW
1670 code = NE;
1671 }
f2f90c63
RH
1672 else
1673 {
1674 cmp = gen_reg_rtx (BImode);
1675 emit_insn (gen_rtx_SET (VOIDmode, cmp,
f90b7a5a 1676 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
f2f90c63
RH
1677 code = NE;
1678 }
1679
f90b7a5a
PB
1680 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1681 *op0 = cmp;
1682 *op1 = const0_rtx;
f2f90c63 1683}
2ed4af6f 1684
e934ca47
RH
1685/* Generate an integral vector comparison. Return true if the condition has
1686 been reversed, and so the sense of the comparison should be inverted. */
f61134e8
RH
1687
1688static bool
1689ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1690 rtx dest, rtx op0, rtx op1)
1691{
1692 bool negate = false;
1693 rtx x;
1694
e934ca47 1695 /* Canonicalize the comparison to EQ, GT, GTU. */
f61134e8
RH
1696 switch (code)
1697 {
1698 case EQ:
1699 case GT:
e934ca47 1700 case GTU:
f61134e8
RH
1701 break;
1702
1703 case NE:
f61134e8 1704 case LE:
e934ca47
RH
1705 case LEU:
1706 code = reverse_condition (code);
f61134e8
RH
1707 negate = true;
1708 break;
1709
1710 case GE:
e934ca47
RH
1711 case GEU:
1712 code = reverse_condition (code);
f61134e8
RH
1713 negate = true;
1714 /* FALLTHRU */
1715
1716 case LT:
f61134e8 1717 case LTU:
e934ca47
RH
1718 code = swap_condition (code);
1719 x = op0, op0 = op1, op1 = x;
1720 break;
f61134e8 1721
e934ca47
RH
1722 default:
1723 gcc_unreachable ();
1724 }
f61134e8 1725
e934ca47 1726 /* Unsigned parallel compare is not supported by the hardware. Play some
6283ba26 1727 tricks to turn this into a signed comparison against 0. */
e934ca47
RH
1728 if (code == GTU)
1729 {
1730 switch (mode)
1731 {
1732 case V2SImode:
f61134e8 1733 {
e934ca47
RH
1734 rtx t1, t2, mask;
1735
9540f5ef
SE
1736 /* Subtract (-(INT MAX) - 1) from both operands to make
1737 them signed. */
1738 mask = GEN_INT (0x80000000);
e934ca47 1739 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
9540f5ef
SE
1740 mask = force_reg (mode, mask);
1741 t1 = gen_reg_rtx (mode);
1742 emit_insn (gen_subv2si3 (t1, op0, mask));
1743 t2 = gen_reg_rtx (mode);
1744 emit_insn (gen_subv2si3 (t2, op1, mask));
1745 op0 = t1;
1746 op1 = t2;
6283ba26 1747 code = GT;
f61134e8 1748 }
e934ca47
RH
1749 break;
1750
1751 case V8QImode:
1752 case V4HImode:
1753 /* Perform a parallel unsigned saturating subtraction. */
1754 x = gen_reg_rtx (mode);
1755 emit_insn (gen_rtx_SET (VOIDmode, x,
1756 gen_rtx_US_MINUS (mode, op0, op1)));
6283ba26
RH
1757
1758 code = EQ;
1759 op0 = x;
1760 op1 = CONST0_RTX (mode);
1761 negate = !negate;
e934ca47
RH
1762 break;
1763
1764 default:
1765 gcc_unreachable ();
1766 }
f61134e8
RH
1767 }
1768
1769 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1770 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1771
1772 return negate;
1773}
1774
f61134e8
RH
1775/* Emit an integral vector conditional move. */
1776
1777void
1778ia64_expand_vecint_cmov (rtx operands[])
1779{
1780 enum machine_mode mode = GET_MODE (operands[0]);
1781 enum rtx_code code = GET_CODE (operands[3]);
1782 bool negate;
1783 rtx cmp, x, ot, of;
1784
f61134e8
RH
1785 cmp = gen_reg_rtx (mode);
1786 negate = ia64_expand_vecint_compare (code, mode, cmp,
1787 operands[4], operands[5]);
1788
1789 ot = operands[1+negate];
1790 of = operands[2-negate];
1791
1792 if (ot == CONST0_RTX (mode))
1793 {
1794 if (of == CONST0_RTX (mode))
1795 {
1796 emit_move_insn (operands[0], ot);
1797 return;
1798 }
1799
1800 x = gen_rtx_NOT (mode, cmp);
1801 x = gen_rtx_AND (mode, x, of);
1802 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1803 }
1804 else if (of == CONST0_RTX (mode))
1805 {
1806 x = gen_rtx_AND (mode, cmp, ot);
1807 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1808 }
1809 else
1810 {
1811 rtx t, f;
1812
1813 t = gen_reg_rtx (mode);
1814 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1815 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1816
1817 f = gen_reg_rtx (mode);
1818 x = gen_rtx_NOT (mode, cmp);
1819 x = gen_rtx_AND (mode, x, operands[2-negate]);
1820 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1821
1822 x = gen_rtx_IOR (mode, t, f);
1823 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1824 }
1825}
1826
1827/* Emit an integral vector min or max operation. Return true if all done. */
1828
1829bool
1830ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1831 rtx operands[])
1832{
cabddb23 1833 rtx xops[6];
f61134e8
RH
1834
1835 /* These four combinations are supported directly. */
1836 if (mode == V8QImode && (code == UMIN || code == UMAX))
1837 return false;
1838 if (mode == V4HImode && (code == SMIN || code == SMAX))
1839 return false;
1840
93b4080b
RH
1841 /* This combination can be implemented with only saturating subtraction. */
1842 if (mode == V4HImode && code == UMAX)
1843 {
1844 rtx x, tmp = gen_reg_rtx (mode);
1845
1846 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1847 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1848
1849 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1850 return true;
1851 }
1852
f61134e8
RH
1853 /* Everything else implemented via vector comparisons. */
1854 xops[0] = operands[0];
1855 xops[4] = xops[1] = operands[1];
1856 xops[5] = xops[2] = operands[2];
1857
1858 switch (code)
1859 {
1860 case UMIN:
1861 code = LTU;
1862 break;
1863 case UMAX:
1864 code = GTU;
1865 break;
1866 case SMIN:
1867 code = LT;
1868 break;
1869 case SMAX:
1870 code = GT;
1871 break;
1872 default:
e820471b 1873 gcc_unreachable ();
f61134e8
RH
1874 }
1875 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1876
1877 ia64_expand_vecint_cmov (xops);
1878 return true;
1879}
1880
e898620c
RH
1881/* Emit an integral vector widening sum operations. */
1882
1883void
1884ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1885{
1886 rtx l, h, x, s;
1887 enum machine_mode wmode, mode;
1888 rtx (*unpack_l) (rtx, rtx, rtx);
1889 rtx (*unpack_h) (rtx, rtx, rtx);
1890 rtx (*plus) (rtx, rtx, rtx);
1891
1892 wmode = GET_MODE (operands[0]);
1893 mode = GET_MODE (operands[1]);
1894
1895 switch (mode)
1896 {
1897 case V8QImode:
1898 unpack_l = gen_unpack1_l;
1899 unpack_h = gen_unpack1_h;
1900 plus = gen_addv4hi3;
1901 break;
1902 case V4HImode:
1903 unpack_l = gen_unpack2_l;
1904 unpack_h = gen_unpack2_h;
1905 plus = gen_addv2si3;
1906 break;
1907 default:
1908 gcc_unreachable ();
1909 }
1910
1911 /* Fill in x with the sign extension of each element in op1. */
1912 if (unsignedp)
1913 x = CONST0_RTX (mode);
1914 else
1915 {
1916 bool neg;
1917
1918 x = gen_reg_rtx (mode);
1919
1920 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1921 CONST0_RTX (mode));
1922 gcc_assert (!neg);
1923 }
1924
1925 l = gen_reg_rtx (wmode);
1926 h = gen_reg_rtx (wmode);
1927 s = gen_reg_rtx (wmode);
1928
1929 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1930 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1931 emit_insn (plus (s, l, operands[2]));
1932 emit_insn (plus (operands[0], h, s));
1933}
1934
1935/* Emit a signed or unsigned V8QI dot product operation. */
1936
1937void
1938ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1939{
1940 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1941
1942 /* Fill in x1 and x2 with the sign extension of each element. */
1943 if (unsignedp)
1944 x1 = x2 = CONST0_RTX (V8QImode);
1945 else
1946 {
1947 bool neg;
1948
1949 x1 = gen_reg_rtx (V8QImode);
1950 x2 = gen_reg_rtx (V8QImode);
1951
1952 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1953 CONST0_RTX (V8QImode));
1954 gcc_assert (!neg);
1955 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1956 CONST0_RTX (V8QImode));
1957 gcc_assert (!neg);
1958 }
1959
1960 l1 = gen_reg_rtx (V4HImode);
1961 l2 = gen_reg_rtx (V4HImode);
1962 h1 = gen_reg_rtx (V4HImode);
1963 h2 = gen_reg_rtx (V4HImode);
1964
1965 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1966 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1967 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1968 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1969
1970 p1 = gen_reg_rtx (V2SImode);
1971 p2 = gen_reg_rtx (V2SImode);
1972 p3 = gen_reg_rtx (V2SImode);
1973 p4 = gen_reg_rtx (V2SImode);
1974 emit_insn (gen_pmpy2_r (p1, l1, l2));
1975 emit_insn (gen_pmpy2_l (p2, l1, l2));
1976 emit_insn (gen_pmpy2_r (p3, h1, h2));
1977 emit_insn (gen_pmpy2_l (p4, h1, h2));
1978
1979 s1 = gen_reg_rtx (V2SImode);
1980 s2 = gen_reg_rtx (V2SImode);
1981 s3 = gen_reg_rtx (V2SImode);
1982 emit_insn (gen_addv2si3 (s1, p1, p2));
1983 emit_insn (gen_addv2si3 (s2, p3, p4));
1984 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1985 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1986}
1987
2ed4af6f
RH
1988/* Emit the appropriate sequence for a call. */
1989
1990void
9c808aad
AJ
1991ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1992 int sibcall_p)
2ed4af6f 1993{
599aedd9 1994 rtx insn, b0;
2ed4af6f
RH
1995
1996 addr = XEXP (addr, 0);
c8083186 1997 addr = convert_memory_address (DImode, addr);
2ed4af6f 1998 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 1999
599aedd9 2000 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
2001 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2002 {
2003 if (sibcall_p)
599aedd9 2004 insn = gen_sibcall_nogp (addr);
2ed4af6f 2005 else if (! retval)
599aedd9 2006 insn = gen_call_nogp (addr, b0);
2ed4af6f 2007 else
599aedd9
RH
2008 insn = gen_call_value_nogp (retval, addr, b0);
2009 insn = emit_call_insn (insn);
2ed4af6f 2010 }
2ed4af6f 2011 else
599aedd9
RH
2012 {
2013 if (sibcall_p)
2014 insn = gen_sibcall_gp (addr);
2015 else if (! retval)
2016 insn = gen_call_gp (addr, b0);
2017 else
2018 insn = gen_call_value_gp (retval, addr, b0);
2019 insn = emit_call_insn (insn);
2ed4af6f 2020
599aedd9
RH
2021 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2022 }
6dad5a56 2023
599aedd9 2024 if (sibcall_p)
4e14f1f9 2025 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
f2972bf8
DR
2026
2027 if (TARGET_ABI_OPEN_VMS)
2028 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2029 gen_rtx_REG (DImode, GR_REG (25)));
599aedd9
RH
2030}
2031
6fb5fa3c
DB
2032static void
2033reg_emitted (enum ia64_frame_regs r)
2034{
2035 if (emitted_frame_related_regs[r] == 0)
2036 emitted_frame_related_regs[r] = current_frame_info.r[r];
2037 else
2038 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2039}
2040
2041static int
2042get_reg (enum ia64_frame_regs r)
2043{
2044 reg_emitted (r);
2045 return current_frame_info.r[r];
2046}
2047
2048static bool
2049is_emitted (int regno)
2050{
09639a83 2051 unsigned int r;
6fb5fa3c
DB
2052
2053 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2054 if (emitted_frame_related_regs[r] == regno)
2055 return true;
2056 return false;
2057}
2058
599aedd9 2059void
9c808aad 2060ia64_reload_gp (void)
599aedd9
RH
2061{
2062 rtx tmp;
2063
6fb5fa3c
DB
2064 if (current_frame_info.r[reg_save_gp])
2065 {
2066 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2067 }
2ed4af6f 2068 else
599aedd9
RH
2069 {
2070 HOST_WIDE_INT offset;
13f70342 2071 rtx offset_r;
599aedd9
RH
2072
2073 offset = (current_frame_info.spill_cfa_off
2074 + current_frame_info.spill_size);
2075 if (frame_pointer_needed)
2076 {
2077 tmp = hard_frame_pointer_rtx;
2078 offset = -offset;
2079 }
2080 else
2081 {
2082 tmp = stack_pointer_rtx;
2083 offset = current_frame_info.total_size - offset;
2084 }
2085
13f70342
RH
2086 offset_r = GEN_INT (offset);
2087 if (satisfies_constraint_I (offset_r))
2088 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
599aedd9
RH
2089 else
2090 {
13f70342 2091 emit_move_insn (pic_offset_table_rtx, offset_r);
599aedd9
RH
2092 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2093 pic_offset_table_rtx, tmp));
2094 }
2095
2096 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2097 }
2098
2099 emit_move_insn (pic_offset_table_rtx, tmp);
2100}
2101
2102void
9c808aad
AJ
2103ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2104 rtx scratch_b, int noreturn_p, int sibcall_p)
599aedd9
RH
2105{
2106 rtx insn;
2107 bool is_desc = false;
2108
2109 /* If we find we're calling through a register, then we're actually
2110 calling through a descriptor, so load up the values. */
4e14f1f9 2111 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
599aedd9
RH
2112 {
2113 rtx tmp;
2114 bool addr_dead_p;
2115
2116 /* ??? We are currently constrained to *not* use peep2, because
2a43945f 2117 we can legitimately change the global lifetime of the GP
9c808aad 2118 (in the form of killing where previously live). This is
599aedd9
RH
2119 because a call through a descriptor doesn't use the previous
2120 value of the GP, while a direct call does, and we do not
2121 commit to either form until the split here.
2122
2123 That said, this means that we lack precise life info for
2124 whether ADDR is dead after this call. This is not terribly
2125 important, since we can fix things up essentially for free
2126 with the POST_DEC below, but it's nice to not use it when we
2127 can immediately tell it's not necessary. */
2128 addr_dead_p = ((noreturn_p || sibcall_p
2129 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2130 REGNO (addr)))
2131 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2132
2133 /* Load the code address into scratch_b. */
2134 tmp = gen_rtx_POST_INC (Pmode, addr);
2135 tmp = gen_rtx_MEM (Pmode, tmp);
2136 emit_move_insn (scratch_r, tmp);
2137 emit_move_insn (scratch_b, scratch_r);
2138
2139 /* Load the GP address. If ADDR is not dead here, then we must
2140 revert the change made above via the POST_INCREMENT. */
2141 if (!addr_dead_p)
2142 tmp = gen_rtx_POST_DEC (Pmode, addr);
2143 else
2144 tmp = addr;
2145 tmp = gen_rtx_MEM (Pmode, tmp);
2146 emit_move_insn (pic_offset_table_rtx, tmp);
2147
2148 is_desc = true;
2149 addr = scratch_b;
2150 }
2ed4af6f 2151
6dad5a56 2152 if (sibcall_p)
599aedd9
RH
2153 insn = gen_sibcall_nogp (addr);
2154 else if (retval)
2155 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 2156 else
599aedd9 2157 insn = gen_call_nogp (addr, retaddr);
6dad5a56 2158 emit_call_insn (insn);
2ed4af6f 2159
599aedd9
RH
2160 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2161 ia64_reload_gp ();
2ed4af6f 2162}
16df4ee6
RH
2163
2164/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2165
2166 This differs from the generic code in that we know about the zero-extending
2167 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2168 also know that ld.acq+cmpxchg.rel equals a full barrier.
2169
2170 The loop we want to generate looks like
2171
2172 cmp_reg = mem;
2173 label:
2174 old_reg = cmp_reg;
2175 new_reg = cmp_reg op val;
2176 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2177 if (cmp_reg != old_reg)
2178 goto label;
2179
2180 Note that we only do the plain load from memory once. Subsequent
2181 iterations use the value loaded by the compare-and-swap pattern. */
2182
2183void
2184ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2185 rtx old_dst, rtx new_dst)
2186{
2187 enum machine_mode mode = GET_MODE (mem);
2188 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2189 enum insn_code icode;
2190
2191 /* Special case for using fetchadd. */
dca13767
JJ
2192 if ((mode == SImode || mode == DImode)
2193 && (code == PLUS || code == MINUS)
2194 && fetchadd_operand (val, mode))
16df4ee6 2195 {
dca13767
JJ
2196 if (code == MINUS)
2197 val = GEN_INT (-INTVAL (val));
2198
16df4ee6
RH
2199 if (!old_dst)
2200 old_dst = gen_reg_rtx (mode);
2201
2202 emit_insn (gen_memory_barrier ());
2203
2204 if (mode == SImode)
2205 icode = CODE_FOR_fetchadd_acq_si;
2206 else
2207 icode = CODE_FOR_fetchadd_acq_di;
2208 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2209
2210 if (new_dst)
2211 {
2212 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2213 true, OPTAB_WIDEN);
2214 if (new_reg != new_dst)
2215 emit_move_insn (new_dst, new_reg);
2216 }
2217 return;
2218 }
2219
2220 /* Because of the volatile mem read, we get an ld.acq, which is the
2221 front half of the full barrier. The end half is the cmpxchg.rel. */
2222 gcc_assert (MEM_VOLATILE_P (mem));
2223
2224 old_reg = gen_reg_rtx (DImode);
2225 cmp_reg = gen_reg_rtx (DImode);
2226 label = gen_label_rtx ();
2227
2228 if (mode != DImode)
2229 {
2230 val = simplify_gen_subreg (DImode, val, mode, 0);
2231 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2232 }
2233 else
2234 emit_move_insn (cmp_reg, mem);
2235
2236 emit_label (label);
2237
2238 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2239 emit_move_insn (old_reg, cmp_reg);
2240 emit_move_insn (ar_ccv, cmp_reg);
2241
2242 if (old_dst)
2243 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2244
2245 new_reg = cmp_reg;
2246 if (code == NOT)
2247 {
974920dc
UB
2248 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2249 true, OPTAB_DIRECT);
2250 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
16df4ee6 2251 }
974920dc
UB
2252 else
2253 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2254 true, OPTAB_DIRECT);
16df4ee6
RH
2255
2256 if (mode != DImode)
2257 new_reg = gen_lowpart (mode, new_reg);
2258 if (new_dst)
2259 emit_move_insn (new_dst, new_reg);
2260
2261 switch (mode)
2262 {
2263 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2264 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2265 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2266 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2267 default:
2268 gcc_unreachable ();
2269 }
2270
2271 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2272
6819a463 2273 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
16df4ee6 2274}
809d4ef1 2275\f
3b572406
RH
2276/* Begin the assembly file. */
2277
1bc7c5b6 2278static void
9c808aad 2279ia64_file_start (void)
1bc7c5b6 2280{
0f666d6e
JJ
2281 /* Variable tracking should be run after all optimizations which change order
2282 of insns. It also needs a valid CFG. This can't be done in
ee30710d 2283 ia64_override_options, because flag_var_tracking is finalized after
0f666d6e
JJ
2284 that. */
2285 ia64_flag_var_tracking = flag_var_tracking;
2286 flag_var_tracking = 0;
2287
1bc7c5b6
ZW
2288 default_file_start ();
2289 emit_safe_across_calls ();
2290}
2291
3b572406 2292void
9c808aad 2293emit_safe_across_calls (void)
3b572406
RH
2294{
2295 unsigned int rs, re;
2296 int out_state;
2297
2298 rs = 1;
2299 out_state = 0;
2300 while (1)
2301 {
2302 while (rs < 64 && call_used_regs[PR_REG (rs)])
2303 rs++;
2304 if (rs >= 64)
2305 break;
2306 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2307 continue;
2308 if (out_state == 0)
2309 {
1bc7c5b6 2310 fputs ("\t.pred.safe_across_calls ", asm_out_file);
3b572406
RH
2311 out_state = 1;
2312 }
2313 else
1bc7c5b6 2314 fputc (',', asm_out_file);
3b572406 2315 if (re == rs + 1)
1bc7c5b6 2316 fprintf (asm_out_file, "p%u", rs);
3b572406 2317 else
1bc7c5b6 2318 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
3b572406
RH
2319 rs = re + 1;
2320 }
2321 if (out_state)
1bc7c5b6 2322 fputc ('\n', asm_out_file);
3b572406
RH
2323}
2324
812b587e
SE
2325/* Globalize a declaration. */
2326
2327static void
2328ia64_globalize_decl_name (FILE * stream, tree decl)
2329{
2330 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2331 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2332 if (version_attr)
2333 {
2334 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2335 const char *p = TREE_STRING_POINTER (v);
2336 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2337 }
2338 targetm.asm_out.globalize_label (stream, name);
2339 if (TREE_CODE (decl) == FUNCTION_DECL)
2340 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2341}
2342
97e242b0
RH
2343/* Helper function for ia64_compute_frame_size: find an appropriate general
2344 register to spill some special register to. SPECIAL_SPILL_MASK contains
2345 bits in GR0 to GR31 that have already been allocated by this routine.
2346 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 2347
97e242b0 2348static int
6fb5fa3c 2349find_gr_spill (enum ia64_frame_regs r, int try_locals)
97e242b0
RH
2350{
2351 int regno;
2352
6fb5fa3c
DB
2353 if (emitted_frame_related_regs[r] != 0)
2354 {
2355 regno = emitted_frame_related_regs[r];
2951f79b
JJ
2356 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2357 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
6fb5fa3c
DB
2358 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2359 else if (current_function_is_leaf
2360 && regno >= GR_REG (1) && regno <= GR_REG (31))
2361 current_frame_info.gr_used_mask |= 1 << regno;
2362
2363 return regno;
2364 }
2365
97e242b0
RH
2366 /* If this is a leaf function, first try an otherwise unused
2367 call-clobbered register. */
2368 if (current_function_is_leaf)
2369 {
2370 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2371 if (! df_regs_ever_live_p (regno)
97e242b0
RH
2372 && call_used_regs[regno]
2373 && ! fixed_regs[regno]
2374 && ! global_regs[regno]
6fb5fa3c
DB
2375 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2376 && ! is_emitted (regno))
97e242b0
RH
2377 {
2378 current_frame_info.gr_used_mask |= 1 << regno;
2379 return regno;
2380 }
2381 }
2382
2383 if (try_locals)
2384 {
2385 regno = current_frame_info.n_local_regs;
9502c558
JW
2386 /* If there is a frame pointer, then we can't use loc79, because
2387 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2388 reg_name switching code in ia64_expand_prologue. */
2951f79b
JJ
2389 while (regno < (80 - frame_pointer_needed))
2390 if (! is_emitted (LOC_REG (regno++)))
2391 {
2392 current_frame_info.n_local_regs = regno;
2393 return LOC_REG (regno - 1);
2394 }
97e242b0
RH
2395 }
2396
2397 /* Failed to find a general register to spill to. Must use stack. */
2398 return 0;
2399}
2400
2401/* In order to make for nice schedules, we try to allocate every temporary
2402 to a different register. We must of course stay away from call-saved,
2403 fixed, and global registers. We must also stay away from registers
2404 allocated in current_frame_info.gr_used_mask, since those include regs
2405 used all through the prologue.
2406
2407 Any register allocated here must be used immediately. The idea is to
2408 aid scheduling, not to solve data flow problems. */
2409
2410static int last_scratch_gr_reg;
2411
2412static int
9c808aad 2413next_scratch_gr_reg (void)
97e242b0
RH
2414{
2415 int i, regno;
2416
2417 for (i = 0; i < 32; ++i)
2418 {
2419 regno = (last_scratch_gr_reg + i + 1) & 31;
2420 if (call_used_regs[regno]
2421 && ! fixed_regs[regno]
2422 && ! global_regs[regno]
2423 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2424 {
2425 last_scratch_gr_reg = regno;
2426 return regno;
2427 }
2428 }
2429
2430 /* There must be _something_ available. */
e820471b 2431 gcc_unreachable ();
97e242b0
RH
2432}
2433
2434/* Helper function for ia64_compute_frame_size, called through
2435 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2436
2437static void
9c808aad 2438mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
c65ebc55 2439{
97e242b0
RH
2440 unsigned int regno = REGNO (reg);
2441 if (regno < 32)
f95e79cc 2442 {
c8b622ff 2443 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
f95e79cc
RH
2444 for (i = 0; i < n; ++i)
2445 current_frame_info.gr_used_mask |= 1 << (regno + i);
2446 }
c65ebc55
JW
2447}
2448
6fb5fa3c 2449
c65ebc55
JW
2450/* Returns the number of bytes offset between the frame pointer and the stack
2451 pointer for the current function. SIZE is the number of bytes of space
2452 needed for local variables. */
97e242b0
RH
2453
2454static void
9c808aad 2455ia64_compute_frame_size (HOST_WIDE_INT size)
c65ebc55 2456{
97e242b0
RH
2457 HOST_WIDE_INT total_size;
2458 HOST_WIDE_INT spill_size = 0;
2459 HOST_WIDE_INT extra_spill_size = 0;
2460 HOST_WIDE_INT pretend_args_size;
c65ebc55 2461 HARD_REG_SET mask;
97e242b0
RH
2462 int n_spilled = 0;
2463 int spilled_gr_p = 0;
2464 int spilled_fr_p = 0;
2465 unsigned int regno;
2951f79b
JJ
2466 int min_regno;
2467 int max_regno;
97e242b0 2468 int i;
c65ebc55 2469
97e242b0
RH
2470 if (current_frame_info.initialized)
2471 return;
294dac80 2472
97e242b0 2473 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
2474 CLEAR_HARD_REG_SET (mask);
2475
97e242b0
RH
2476 /* Don't allocate scratches to the return register. */
2477 diddle_return_value (mark_reg_gr_used_mask, NULL);
2478
2479 /* Don't allocate scratches to the EH scratch registers. */
2480 if (cfun->machine->ia64_eh_epilogue_sp)
2481 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2482 if (cfun->machine->ia64_eh_epilogue_bsp)
2483 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 2484
97e242b0
RH
2485 /* Find the size of the register stack frame. We have only 80 local
2486 registers, because we reserve 8 for the inputs and 8 for the
2487 outputs. */
2488
2489 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2490 since we'll be adjusting that down later. */
2491 regno = LOC_REG (78) + ! frame_pointer_needed;
2492 for (; regno >= LOC_REG (0); regno--)
6fb5fa3c 2493 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
97e242b0
RH
2494 break;
2495 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 2496
3f67ac08
DM
2497 /* For functions marked with the syscall_linkage attribute, we must mark
2498 all eight input registers as in use, so that locals aren't visible to
2499 the caller. */
2500
2501 if (cfun->machine->n_varargs > 0
2502 || lookup_attribute ("syscall_linkage",
2503 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
2504 current_frame_info.n_input_regs = 8;
2505 else
2506 {
2507 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
6fb5fa3c 2508 if (df_regs_ever_live_p (regno))
97e242b0
RH
2509 break;
2510 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2511 }
2512
2513 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
6fb5fa3c 2514 if (df_regs_ever_live_p (regno))
97e242b0
RH
2515 break;
2516 i = regno - OUT_REG (0) + 1;
2517
d26afa4f 2518#ifndef PROFILE_HOOK
97e242b0 2519 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 2520 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
2521 profiling, we need two output registers for the two bb_init_trace_func
2522 arguments. */
e3b5732b 2523 if (crtl->profile)
97e242b0 2524 i = MAX (i, 1);
d26afa4f 2525#endif
97e242b0
RH
2526 current_frame_info.n_output_regs = i;
2527
2528 /* ??? No rotating register support yet. */
2529 current_frame_info.n_rotate_regs = 0;
2530
2531 /* Discover which registers need spilling, and how much room that
9c808aad 2532 will take. Begin with floating point and general registers,
97e242b0
RH
2533 which will always wind up on the stack. */
2534
2535 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
6fb5fa3c 2536 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2537 {
2538 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2539 spill_size += 16;
2540 n_spilled += 1;
2541 spilled_fr_p = 1;
c65ebc55
JW
2542 }
2543
97e242b0 2544 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2545 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2546 {
2547 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2548 spill_size += 8;
2549 n_spilled += 1;
2550 spilled_gr_p = 1;
c65ebc55
JW
2551 }
2552
97e242b0 2553 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
6fb5fa3c 2554 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2555 {
2556 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2557 spill_size += 8;
2558 n_spilled += 1;
c65ebc55
JW
2559 }
2560
97e242b0
RH
2561 /* Now come all special registers that might get saved in other
2562 general registers. */
9c808aad 2563
97e242b0
RH
2564 if (frame_pointer_needed)
2565 {
6fb5fa3c 2566 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
0c35f902
JW
2567 /* If we did not get a register, then we take LOC79. This is guaranteed
2568 to be free, even if regs_ever_live is already set, because this is
2569 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2570 as we don't count loc79 above. */
6fb5fa3c 2571 if (current_frame_info.r[reg_fp] == 0)
0c35f902 2572 {
6fb5fa3c
DB
2573 current_frame_info.r[reg_fp] = LOC_REG (79);
2574 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
0c35f902 2575 }
97e242b0
RH
2576 }
2577
2578 if (! current_function_is_leaf)
c65ebc55 2579 {
97e242b0
RH
2580 /* Emit a save of BR0 if we call other functions. Do this even
2581 if this function doesn't return, as EH depends on this to be
2582 able to unwind the stack. */
2583 SET_HARD_REG_BIT (mask, BR_REG (0));
2584
6fb5fa3c
DB
2585 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2586 if (current_frame_info.r[reg_save_b0] == 0)
97e242b0 2587 {
ae1e2d4c 2588 extra_spill_size += 8;
97e242b0
RH
2589 n_spilled += 1;
2590 }
2591
2592 /* Similarly for ar.pfs. */
2593 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2594 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2595 if (current_frame_info.r[reg_save_ar_pfs] == 0)
97e242b0
RH
2596 {
2597 extra_spill_size += 8;
2598 n_spilled += 1;
2599 }
599aedd9
RH
2600
2601 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2602 registers are clobbered, so we fall back to the stack. */
6fb5fa3c 2603 current_frame_info.r[reg_save_gp]
e3b5732b 2604 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
6fb5fa3c 2605 if (current_frame_info.r[reg_save_gp] == 0)
599aedd9
RH
2606 {
2607 SET_HARD_REG_BIT (mask, GR_REG (1));
2608 spill_size += 8;
2609 n_spilled += 1;
2610 }
c65ebc55
JW
2611 }
2612 else
97e242b0 2613 {
6fb5fa3c 2614 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
97e242b0
RH
2615 {
2616 SET_HARD_REG_BIT (mask, BR_REG (0));
ae1e2d4c 2617 extra_spill_size += 8;
97e242b0
RH
2618 n_spilled += 1;
2619 }
f5bdba44 2620
6fb5fa3c 2621 if (df_regs_ever_live_p (AR_PFS_REGNUM))
f5bdba44
RH
2622 {
2623 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2624 current_frame_info.r[reg_save_ar_pfs]
2625 = find_gr_spill (reg_save_ar_pfs, 1);
2626 if (current_frame_info.r[reg_save_ar_pfs] == 0)
f5bdba44
RH
2627 {
2628 extra_spill_size += 8;
2629 n_spilled += 1;
2630 }
2631 }
97e242b0 2632 }
c65ebc55 2633
97e242b0
RH
2634 /* Unwind descriptor hackery: things are most efficient if we allocate
2635 consecutive GR save registers for RP, PFS, FP in that order. However,
2636 it is absolutely critical that FP get the only hard register that's
2637 guaranteed to be free, so we allocated it first. If all three did
2638 happen to be allocated hard regs, and are consecutive, rearrange them
6fb5fa3c
DB
2639 into the preferred order now.
2640
2641 If we have already emitted code for any of those registers,
2642 then it's already too late to change. */
2951f79b
JJ
2643 min_regno = MIN (current_frame_info.r[reg_fp],
2644 MIN (current_frame_info.r[reg_save_b0],
2645 current_frame_info.r[reg_save_ar_pfs]));
2646 max_regno = MAX (current_frame_info.r[reg_fp],
2647 MAX (current_frame_info.r[reg_save_b0],
2648 current_frame_info.r[reg_save_ar_pfs]));
2649 if (min_regno > 0
2650 && min_regno + 2 == max_regno
2651 && (current_frame_info.r[reg_fp] == min_regno + 1
2652 || current_frame_info.r[reg_save_b0] == min_regno + 1
2653 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2654 && (emitted_frame_related_regs[reg_save_b0] == 0
2655 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2656 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2657 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2658 && (emitted_frame_related_regs[reg_fp] == 0
2659 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
5527bf14 2660 {
2951f79b
JJ
2661 current_frame_info.r[reg_save_b0] = min_regno;
2662 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2663 current_frame_info.r[reg_fp] = min_regno + 2;
5527bf14
RH
2664 }
2665
97e242b0
RH
2666 /* See if we need to store the predicate register block. */
2667 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2668 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
97e242b0
RH
2669 break;
2670 if (regno <= PR_REG (63))
c65ebc55 2671 {
97e242b0 2672 SET_HARD_REG_BIT (mask, PR_REG (0));
6fb5fa3c
DB
2673 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2674 if (current_frame_info.r[reg_save_pr] == 0)
97e242b0
RH
2675 {
2676 extra_spill_size += 8;
2677 n_spilled += 1;
2678 }
2679
2680 /* ??? Mark them all as used so that register renaming and such
2681 are free to use them. */
2682 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2683 df_set_regs_ever_live (regno, true);
c65ebc55
JW
2684 }
2685
97e242b0 2686 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
2687 ar.unat as well. The check for existing liveness allows inline asm
2688 to touch ar.unat. */
2689 if (spilled_gr_p || cfun->machine->n_varargs
6fb5fa3c 2690 || df_regs_ever_live_p (AR_UNAT_REGNUM))
97e242b0 2691 {
6fb5fa3c 2692 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
97e242b0 2693 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
6fb5fa3c
DB
2694 current_frame_info.r[reg_save_ar_unat]
2695 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2696 if (current_frame_info.r[reg_save_ar_unat] == 0)
97e242b0
RH
2697 {
2698 extra_spill_size += 8;
2699 n_spilled += 1;
2700 }
2701 }
2702
6fb5fa3c 2703 if (df_regs_ever_live_p (AR_LC_REGNUM))
97e242b0
RH
2704 {
2705 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
6fb5fa3c
DB
2706 current_frame_info.r[reg_save_ar_lc]
2707 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2708 if (current_frame_info.r[reg_save_ar_lc] == 0)
97e242b0
RH
2709 {
2710 extra_spill_size += 8;
2711 n_spilled += 1;
2712 }
2713 }
2714
2715 /* If we have an odd number of words of pretend arguments written to
2716 the stack, then the FR save area will be unaligned. We round the
2717 size of this area up to keep things 16 byte aligned. */
2718 if (spilled_fr_p)
38173d38 2719 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
97e242b0 2720 else
38173d38 2721 pretend_args_size = crtl->args.pretend_args_size;
97e242b0
RH
2722
2723 total_size = (spill_size + extra_spill_size + size + pretend_args_size
38173d38 2724 + crtl->outgoing_args_size);
97e242b0
RH
2725 total_size = IA64_STACK_ALIGN (total_size);
2726
2727 /* We always use the 16-byte scratch area provided by the caller, but
2728 if we are a leaf function, there's no one to which we need to provide
2729 a scratch area. */
2730 if (current_function_is_leaf)
2731 total_size = MAX (0, total_size - 16);
2732
c65ebc55 2733 current_frame_info.total_size = total_size;
97e242b0
RH
2734 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2735 current_frame_info.spill_size = spill_size;
2736 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2737 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2738 current_frame_info.n_spilled = n_spilled;
c65ebc55 2739 current_frame_info.initialized = reload_completed;
97e242b0
RH
2740}
2741
7b5cbb57
AS
2742/* Worker function for TARGET_CAN_ELIMINATE. */
2743
2744bool
2745ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2746{
2747 return (to == BR_REG (0) ? current_function_is_leaf : true);
2748}
2749
97e242b0
RH
2750/* Compute the initial difference between the specified pair of registers. */
2751
2752HOST_WIDE_INT
9c808aad 2753ia64_initial_elimination_offset (int from, int to)
97e242b0
RH
2754{
2755 HOST_WIDE_INT offset;
2756
2757 ia64_compute_frame_size (get_frame_size ());
2758 switch (from)
2759 {
2760 case FRAME_POINTER_REGNUM:
e820471b 2761 switch (to)
97e242b0 2762 {
e820471b 2763 case HARD_FRAME_POINTER_REGNUM:
97e242b0
RH
2764 if (current_function_is_leaf)
2765 offset = -current_frame_info.total_size;
2766 else
2767 offset = -(current_frame_info.total_size
38173d38 2768 - crtl->outgoing_args_size - 16);
e820471b
NS
2769 break;
2770
2771 case STACK_POINTER_REGNUM:
97e242b0
RH
2772 if (current_function_is_leaf)
2773 offset = 0;
2774 else
38173d38 2775 offset = 16 + crtl->outgoing_args_size;
e820471b
NS
2776 break;
2777
2778 default:
2779 gcc_unreachable ();
97e242b0 2780 }
97e242b0 2781 break;
c65ebc55 2782
97e242b0
RH
2783 case ARG_POINTER_REGNUM:
2784 /* Arguments start above the 16 byte save area, unless stdarg
2785 in which case we store through the 16 byte save area. */
e820471b
NS
2786 switch (to)
2787 {
2788 case HARD_FRAME_POINTER_REGNUM:
38173d38 2789 offset = 16 - crtl->args.pretend_args_size;
e820471b
NS
2790 break;
2791
2792 case STACK_POINTER_REGNUM:
2793 offset = (current_frame_info.total_size
38173d38 2794 + 16 - crtl->args.pretend_args_size);
e820471b
NS
2795 break;
2796
2797 default:
2798 gcc_unreachable ();
2799 }
97e242b0
RH
2800 break;
2801
97e242b0 2802 default:
e820471b 2803 gcc_unreachable ();
97e242b0
RH
2804 }
2805
2806 return offset;
c65ebc55
JW
2807}
2808
97e242b0
RH
2809/* If there are more than a trivial number of register spills, we use
2810 two interleaved iterators so that we can get two memory references
2811 per insn group.
2812
2813 In order to simplify things in the prologue and epilogue expanders,
2814 we use helper functions to fix up the memory references after the
2815 fact with the appropriate offsets to a POST_MODIFY memory mode.
2816 The following data structure tracks the state of the two iterators
2817 while insns are being emitted. */
2818
2819struct spill_fill_data
c65ebc55 2820{
d6a7951f 2821 rtx init_after; /* point at which to emit initializations */
97e242b0
RH
2822 rtx init_reg[2]; /* initial base register */
2823 rtx iter_reg[2]; /* the iterator registers */
2824 rtx *prev_addr[2]; /* address of last memory use */
703cf211 2825 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
2826 HOST_WIDE_INT prev_off[2]; /* last offset */
2827 int n_iter; /* number of iterators in use */
2828 int next_iter; /* next iterator to use */
2829 unsigned int save_gr_used_mask;
2830};
2831
2832static struct spill_fill_data spill_fill_data;
c65ebc55 2833
97e242b0 2834static void
9c808aad 2835setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2836{
2837 int i;
2838
2839 spill_fill_data.init_after = get_last_insn ();
2840 spill_fill_data.init_reg[0] = init_reg;
2841 spill_fill_data.init_reg[1] = init_reg;
2842 spill_fill_data.prev_addr[0] = NULL;
2843 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
2844 spill_fill_data.prev_insn[0] = NULL;
2845 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
2846 spill_fill_data.prev_off[0] = cfa_off;
2847 spill_fill_data.prev_off[1] = cfa_off;
2848 spill_fill_data.next_iter = 0;
2849 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2850
2851 spill_fill_data.n_iter = 1 + (n_spills > 2);
2852 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 2853 {
97e242b0
RH
2854 int regno = next_scratch_gr_reg ();
2855 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2856 current_frame_info.gr_used_mask |= 1 << regno;
2857 }
2858}
2859
2860static void
9c808aad 2861finish_spill_pointers (void)
97e242b0
RH
2862{
2863 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2864}
c65ebc55 2865
97e242b0 2866static rtx
9c808aad 2867spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2868{
2869 int iter = spill_fill_data.next_iter;
2870 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2871 rtx disp_rtx = GEN_INT (disp);
2872 rtx mem;
2873
2874 if (spill_fill_data.prev_addr[iter])
2875 {
13f70342 2876 if (satisfies_constraint_N (disp_rtx))
703cf211
BS
2877 {
2878 *spill_fill_data.prev_addr[iter]
2879 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2880 gen_rtx_PLUS (DImode,
2881 spill_fill_data.iter_reg[iter],
2882 disp_rtx));
bbbbb16a
ILT
2883 add_reg_note (spill_fill_data.prev_insn[iter],
2884 REG_INC, spill_fill_data.iter_reg[iter]);
703cf211 2885 }
c65ebc55
JW
2886 else
2887 {
97e242b0 2888 /* ??? Could use register post_modify for loads. */
13f70342 2889 if (!satisfies_constraint_I (disp_rtx))
97e242b0
RH
2890 {
2891 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2892 emit_move_insn (tmp, disp_rtx);
2893 disp_rtx = tmp;
2894 }
2895 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2896 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 2897 }
97e242b0
RH
2898 }
2899 /* Micro-optimization: if we've created a frame pointer, it's at
2900 CFA 0, which may allow the real iterator to be initialized lower,
2901 slightly increasing parallelism. Also, if there are few saves
2902 it may eliminate the iterator entirely. */
2903 else if (disp == 0
2904 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2905 && frame_pointer_needed)
2906 {
2907 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 2908 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
2909 return mem;
2910 }
2911 else
2912 {
892a4e60 2913 rtx seq, insn;
809d4ef1 2914
97e242b0
RH
2915 if (disp == 0)
2916 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2917 spill_fill_data.init_reg[iter]);
2918 else
c65ebc55 2919 {
97e242b0
RH
2920 start_sequence ();
2921
13f70342 2922 if (!satisfies_constraint_I (disp_rtx))
c65ebc55 2923 {
97e242b0
RH
2924 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2925 emit_move_insn (tmp, disp_rtx);
2926 disp_rtx = tmp;
c65ebc55 2927 }
97e242b0
RH
2928
2929 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2930 spill_fill_data.init_reg[iter],
2931 disp_rtx));
2932
2f937369 2933 seq = get_insns ();
97e242b0 2934 end_sequence ();
c65ebc55 2935 }
809d4ef1 2936
97e242b0
RH
2937 /* Careful for being the first insn in a sequence. */
2938 if (spill_fill_data.init_after)
892a4e60 2939 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 2940 else
bc08aefe
RH
2941 {
2942 rtx first = get_insns ();
2943 if (first)
892a4e60 2944 insn = emit_insn_before (seq, first);
bc08aefe 2945 else
892a4e60 2946 insn = emit_insn (seq);
bc08aefe 2947 }
892a4e60 2948 spill_fill_data.init_after = insn;
97e242b0 2949 }
c65ebc55 2950
97e242b0 2951 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 2952
97e242b0
RH
2953 /* ??? Not all of the spills are for varargs, but some of them are.
2954 The rest of the spills belong in an alias set of their own. But
2955 it doesn't actually hurt to include them here. */
ba4828e0 2956 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 2957
97e242b0
RH
2958 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2959 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 2960
97e242b0
RH
2961 if (++iter >= spill_fill_data.n_iter)
2962 iter = 0;
2963 spill_fill_data.next_iter = iter;
c65ebc55 2964
97e242b0
RH
2965 return mem;
2966}
5527bf14 2967
97e242b0 2968static void
9c808aad
AJ
2969do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2970 rtx frame_reg)
97e242b0 2971{
703cf211 2972 int iter = spill_fill_data.next_iter;
97e242b0 2973 rtx mem, insn;
5527bf14 2974
97e242b0 2975 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 2976 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 2977 spill_fill_data.prev_insn[iter] = insn;
5527bf14 2978
97e242b0
RH
2979 if (frame_reg)
2980 {
2981 rtx base;
2982 HOST_WIDE_INT off;
2983
2984 RTX_FRAME_RELATED_P (insn) = 1;
2985
9c808aad 2986 /* Don't even pretend that the unwind code can intuit its way
97e242b0
RH
2987 through a pair of interleaved post_modify iterators. Just
2988 provide the correct answer. */
2989
2990 if (frame_pointer_needed)
2991 {
2992 base = hard_frame_pointer_rtx;
2993 off = - cfa_off;
5527bf14 2994 }
97e242b0
RH
2995 else
2996 {
2997 base = stack_pointer_rtx;
2998 off = current_frame_info.total_size - cfa_off;
2999 }
3000
bbbbb16a
ILT
3001 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3002 gen_rtx_SET (VOIDmode,
3003 gen_rtx_MEM (GET_MODE (reg),
3004 plus_constant (base, off)),
3005 frame_reg));
c65ebc55
JW
3006 }
3007}
3008
97e242b0 3009static void
9c808aad 3010do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
97e242b0 3011{
703cf211
BS
3012 int iter = spill_fill_data.next_iter;
3013 rtx insn;
3014
3015 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3016 GEN_INT (cfa_off)));
3017 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
3018}
3019
870f9ec0
RH
3020/* Wrapper functions that discards the CONST_INT spill offset. These
3021 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 3022 use a consistent function interface. */
870f9ec0
RH
3023
3024static rtx
9c808aad 3025gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3026{
3027 return gen_movdi (dest, src);
3028}
3029
3030static rtx
9c808aad 3031gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3032{
3033 return gen_fr_spill (dest, src);
3034}
3035
3036static rtx
9c808aad 3037gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3038{
3039 return gen_fr_restore (dest, src);
3040}
c65ebc55
JW
3041
3042/* Called after register allocation to add any instructions needed for the
3043 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 3044 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3045 to intermix instructions with the saves of the caller saved registers. In
3046 some cases, it might be necessary to emit a barrier instruction as the last
3047 insn to prevent such scheduling.
3048
3049 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
3050 so that the debug info generation code can handle them properly.
3051
3052 The register save area is layed out like so:
3053 cfa+16
3054 [ varargs spill area ]
3055 [ fr register spill area ]
3056 [ br register spill area ]
3057 [ ar register spill area ]
3058 [ pr register spill area ]
3059 [ gr register spill area ] */
c65ebc55
JW
3060
3061/* ??? Get inefficient code when the frame size is larger than can fit in an
3062 adds instruction. */
3063
c65ebc55 3064void
9c808aad 3065ia64_expand_prologue (void)
c65ebc55 3066{
97e242b0
RH
3067 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3068 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3069 rtx reg, alt_reg;
3070
3071 ia64_compute_frame_size (get_frame_size ());
3072 last_scratch_gr_reg = 15;
3073
6fb5fa3c
DB
3074 if (dump_file)
3075 {
3076 fprintf (dump_file, "ia64 frame related registers "
3077 "recorded in current_frame_info.r[]:\n");
3078#define PRINTREG(a) if (current_frame_info.r[a]) \
3079 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3080 PRINTREG(reg_fp);
3081 PRINTREG(reg_save_b0);
3082 PRINTREG(reg_save_pr);
3083 PRINTREG(reg_save_ar_pfs);
3084 PRINTREG(reg_save_ar_unat);
3085 PRINTREG(reg_save_ar_lc);
3086 PRINTREG(reg_save_gp);
3087#undef PRINTREG
3088 }
3089
97e242b0
RH
3090 /* If there is no epilogue, then we don't need some prologue insns.
3091 We need to avoid emitting the dead prologue insns, because flow
3092 will complain about them. */
c65ebc55
JW
3093 if (optimize)
3094 {
97e242b0 3095 edge e;
9924d7d8 3096 edge_iterator ei;
97e242b0 3097
628f6a4e 3098 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
c65ebc55
JW
3099 if ((e->flags & EDGE_FAKE) == 0
3100 && (e->flags & EDGE_FALLTHRU) != 0)
3101 break;
3102 epilogue_p = (e != NULL);
3103 }
3104 else
3105 epilogue_p = 1;
3106
97e242b0
RH
3107 /* Set the local, input, and output register names. We need to do this
3108 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3109 half. If we use in/loc/out register names, then we get assembler errors
3110 in crtn.S because there is no alloc insn or regstk directive in there. */
3111 if (! TARGET_REG_NAMES)
3112 {
3113 int inputs = current_frame_info.n_input_regs;
3114 int locals = current_frame_info.n_local_regs;
3115 int outputs = current_frame_info.n_output_regs;
3116
3117 for (i = 0; i < inputs; i++)
3118 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3119 for (i = 0; i < locals; i++)
3120 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3121 for (i = 0; i < outputs; i++)
3122 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3123 }
c65ebc55 3124
97e242b0
RH
3125 /* Set the frame pointer register name. The regnum is logically loc79,
3126 but of course we'll not have allocated that many locals. Rather than
3127 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
3128 /* ??? This code means that we can never use one local register when
3129 there is a frame pointer. loc79 gets wasted in this case, as it is
3130 renamed to a register that will never be used. See also the try_locals
3131 code in find_gr_spill. */
6fb5fa3c 3132 if (current_frame_info.r[reg_fp])
97e242b0
RH
3133 {
3134 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3135 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3136 = reg_names[current_frame_info.r[reg_fp]];
3137 reg_names[current_frame_info.r[reg_fp]] = tmp;
97e242b0 3138 }
c65ebc55 3139
97e242b0
RH
3140 /* We don't need an alloc instruction if we've used no outputs or locals. */
3141 if (current_frame_info.n_local_regs == 0
2ed4af6f 3142 && current_frame_info.n_output_regs == 0
38173d38 3143 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
f5bdba44 3144 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
3145 {
3146 /* If there is no alloc, but there are input registers used, then we
3147 need a .regstk directive. */
3148 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3149 ar_pfs_save_reg = NULL_RTX;
3150 }
3151 else
3152 {
3153 current_frame_info.need_regstk = 0;
c65ebc55 3154
6fb5fa3c
DB
3155 if (current_frame_info.r[reg_save_ar_pfs])
3156 {
3157 regno = current_frame_info.r[reg_save_ar_pfs];
3158 reg_emitted (reg_save_ar_pfs);
3159 }
97e242b0
RH
3160 else
3161 regno = next_scratch_gr_reg ();
3162 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3163
9c808aad 3164 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
97e242b0
RH
3165 GEN_INT (current_frame_info.n_input_regs),
3166 GEN_INT (current_frame_info.n_local_regs),
3167 GEN_INT (current_frame_info.n_output_regs),
3168 GEN_INT (current_frame_info.n_rotate_regs)));
6fb5fa3c 3169 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
97e242b0 3170 }
c65ebc55 3171
97e242b0 3172 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 3173
26a110f5 3174 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
3175 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3176 stack_pointer_rtx, 0);
c65ebc55 3177
97e242b0
RH
3178 if (frame_pointer_needed)
3179 {
3180 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3181 RTX_FRAME_RELATED_P (insn) = 1;
3182 }
c65ebc55 3183
97e242b0
RH
3184 if (current_frame_info.total_size != 0)
3185 {
3186 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3187 rtx offset;
c65ebc55 3188
13f70342 3189 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3190 offset = frame_size_rtx;
3191 else
3192 {
3193 regno = next_scratch_gr_reg ();
9c808aad 3194 offset = gen_rtx_REG (DImode, regno);
97e242b0
RH
3195 emit_move_insn (offset, frame_size_rtx);
3196 }
c65ebc55 3197
97e242b0
RH
3198 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3199 stack_pointer_rtx, offset));
c65ebc55 3200
97e242b0
RH
3201 if (! frame_pointer_needed)
3202 {
3203 RTX_FRAME_RELATED_P (insn) = 1;
3204 if (GET_CODE (offset) != CONST_INT)
bbbbb16a
ILT
3205 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3206 gen_rtx_SET (VOIDmode,
3207 stack_pointer_rtx,
3208 gen_rtx_PLUS (DImode,
3209 stack_pointer_rtx,
3210 frame_size_rtx)));
97e242b0 3211 }
c65ebc55 3212
97e242b0
RH
3213 /* ??? At this point we must generate a magic insn that appears to
3214 modify the stack pointer, the frame pointer, and all spill
3215 iterators. This would allow the most scheduling freedom. For
3216 now, just hard stop. */
3217 emit_insn (gen_blockage ());
3218 }
c65ebc55 3219
97e242b0
RH
3220 /* Must copy out ar.unat before doing any integer spills. */
3221 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 3222 {
6fb5fa3c
DB
3223 if (current_frame_info.r[reg_save_ar_unat])
3224 {
3225 ar_unat_save_reg
3226 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3227 reg_emitted (reg_save_ar_unat);
3228 }
97e242b0 3229 else
c65ebc55 3230 {
97e242b0
RH
3231 alt_regno = next_scratch_gr_reg ();
3232 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3233 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 3234 }
c65ebc55 3235
97e242b0
RH
3236 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3237 insn = emit_move_insn (ar_unat_save_reg, reg);
6fb5fa3c 3238 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
97e242b0
RH
3239
3240 /* Even if we're not going to generate an epilogue, we still
3241 need to save the register so that EH works. */
6fb5fa3c 3242 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
d0e82870 3243 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
3244 }
3245 else
97e242b0
RH
3246 ar_unat_save_reg = NULL_RTX;
3247
3248 /* Spill all varargs registers. Do this before spilling any GR registers,
3249 since we want the UNAT bits for the GR registers to override the UNAT
3250 bits from varargs, which we don't care about. */
c65ebc55 3251
97e242b0
RH
3252 cfa_off = -16;
3253 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 3254 {
97e242b0 3255 reg = gen_rtx_REG (DImode, regno);
870f9ec0 3256 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 3257 }
c65ebc55 3258
97e242b0
RH
3259 /* Locate the bottom of the register save area. */
3260 cfa_off = (current_frame_info.spill_cfa_off
3261 + current_frame_info.spill_size
3262 + current_frame_info.extra_spill_size);
c65ebc55 3263
97e242b0
RH
3264 /* Save the predicate register block either in a register or in memory. */
3265 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3266 {
3267 reg = gen_rtx_REG (DImode, PR_REG (0));
6fb5fa3c 3268 if (current_frame_info.r[reg_save_pr] != 0)
1ff5b671 3269 {
6fb5fa3c
DB
3270 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3271 reg_emitted (reg_save_pr);
97e242b0 3272 insn = emit_move_insn (alt_reg, reg);
1ff5b671 3273
97e242b0
RH
3274 /* ??? Denote pr spill/fill by a DImode move that modifies all
3275 64 hard registers. */
1ff5b671 3276 RTX_FRAME_RELATED_P (insn) = 1;
bbbbb16a
ILT
3277 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3278 gen_rtx_SET (VOIDmode, alt_reg, reg));
46327bc5 3279
97e242b0
RH
3280 /* Even if we're not going to generate an epilogue, we still
3281 need to save the register so that EH works. */
3282 if (! epilogue_p)
d0e82870 3283 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
3284 }
3285 else
97e242b0
RH
3286 {
3287 alt_regno = next_scratch_gr_reg ();
3288 alt_reg = gen_rtx_REG (DImode, alt_regno);
3289 insn = emit_move_insn (alt_reg, reg);
870f9ec0 3290 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3291 cfa_off -= 8;
3292 }
c65ebc55
JW
3293 }
3294
97e242b0
RH
3295 /* Handle AR regs in numerical order. All of them get special handling. */
3296 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6fb5fa3c 3297 && current_frame_info.r[reg_save_ar_unat] == 0)
c65ebc55 3298 {
97e242b0 3299 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 3300 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 3301 cfa_off -= 8;
c65ebc55 3302 }
97e242b0
RH
3303
3304 /* The alloc insn already copied ar.pfs into a general register. The
3305 only thing we have to do now is copy that register to a stack slot
3306 if we'd not allocated a local register for the job. */
f5bdba44 3307 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
6fb5fa3c 3308 && current_frame_info.r[reg_save_ar_pfs] == 0)
c65ebc55 3309 {
97e242b0 3310 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 3311 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
3312 cfa_off -= 8;
3313 }
3314
3315 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3316 {
3317 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
6fb5fa3c 3318 if (current_frame_info.r[reg_save_ar_lc] != 0)
97e242b0 3319 {
6fb5fa3c
DB
3320 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3321 reg_emitted (reg_save_ar_lc);
97e242b0
RH
3322 insn = emit_move_insn (alt_reg, reg);
3323 RTX_FRAME_RELATED_P (insn) = 1;
3324
3325 /* Even if we're not going to generate an epilogue, we still
3326 need to save the register so that EH works. */
3327 if (! epilogue_p)
d0e82870 3328 emit_insn (gen_prologue_use (alt_reg));
97e242b0 3329 }
c65ebc55
JW
3330 else
3331 {
97e242b0
RH
3332 alt_regno = next_scratch_gr_reg ();
3333 alt_reg = gen_rtx_REG (DImode, alt_regno);
3334 emit_move_insn (alt_reg, reg);
870f9ec0 3335 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3336 cfa_off -= 8;
3337 }
3338 }
3339
ae1e2d4c
AS
3340 /* Save the return pointer. */
3341 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3342 {
3343 reg = gen_rtx_REG (DImode, BR_REG (0));
6fb5fa3c 3344 if (current_frame_info.r[reg_save_b0] != 0)
ae1e2d4c 3345 {
6fb5fa3c
DB
3346 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3347 reg_emitted (reg_save_b0);
ae1e2d4c
AS
3348 insn = emit_move_insn (alt_reg, reg);
3349 RTX_FRAME_RELATED_P (insn) = 1;
3350
3351 /* Even if we're not going to generate an epilogue, we still
3352 need to save the register so that EH works. */
3353 if (! epilogue_p)
3354 emit_insn (gen_prologue_use (alt_reg));
3355 }
3356 else
3357 {
3358 alt_regno = next_scratch_gr_reg ();
3359 alt_reg = gen_rtx_REG (DImode, alt_regno);
3360 emit_move_insn (alt_reg, reg);
3361 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3362 cfa_off -= 8;
3363 }
3364 }
3365
6fb5fa3c 3366 if (current_frame_info.r[reg_save_gp])
599aedd9 3367 {
6fb5fa3c 3368 reg_emitted (reg_save_gp);
599aedd9 3369 insn = emit_move_insn (gen_rtx_REG (DImode,
6fb5fa3c 3370 current_frame_info.r[reg_save_gp]),
599aedd9 3371 pic_offset_table_rtx);
599aedd9
RH
3372 }
3373
97e242b0 3374 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3375 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3376 + current_frame_info.spill_size));
97e242b0
RH
3377
3378 /* Spill all general registers. */
3379 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3380 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3381 {
3382 reg = gen_rtx_REG (DImode, regno);
3383 do_spill (gen_gr_spill, reg, cfa_off, reg);
3384 cfa_off -= 8;
3385 }
3386
97e242b0
RH
3387 /* Spill the rest of the BR registers. */
3388 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3389 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3390 {
3391 alt_regno = next_scratch_gr_reg ();
3392 alt_reg = gen_rtx_REG (DImode, alt_regno);
3393 reg = gen_rtx_REG (DImode, regno);
3394 emit_move_insn (alt_reg, reg);
870f9ec0 3395 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3396 cfa_off -= 8;
3397 }
3398
3399 /* Align the frame and spill all FR registers. */
3400 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3401 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3402 {
e820471b 3403 gcc_assert (!(cfa_off & 15));
02befdf4 3404 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3405 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
3406 cfa_off -= 16;
3407 }
3408
e820471b 3409 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3410
3411 finish_spill_pointers ();
c65ebc55
JW
3412}
3413
3414/* Called after register allocation to add any instructions needed for the
5519a4f9 3415 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 3416 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3417 to intermix instructions with the saves of the caller saved registers. In
3418 some cases, it might be necessary to emit a barrier instruction as the last
3419 insn to prevent such scheduling. */
3420
3421void
9c808aad 3422ia64_expand_epilogue (int sibcall_p)
c65ebc55 3423{
97e242b0
RH
3424 rtx insn, reg, alt_reg, ar_unat_save_reg;
3425 int regno, alt_regno, cfa_off;
3426
3427 ia64_compute_frame_size (get_frame_size ());
3428
3429 /* If there is a frame pointer, then we use it instead of the stack
3430 pointer, so that the stack pointer does not need to be valid when
3431 the epilogue starts. See EXIT_IGNORE_STACK. */
3432 if (frame_pointer_needed)
3433 setup_spill_pointers (current_frame_info.n_spilled,
3434 hard_frame_pointer_rtx, 0);
3435 else
9c808aad 3436 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
97e242b0
RH
3437 current_frame_info.total_size);
3438
3439 if (current_frame_info.total_size != 0)
3440 {
3441 /* ??? At this point we must generate a magic insn that appears to
3442 modify the spill iterators and the frame pointer. This would
3443 allow the most scheduling freedom. For now, just hard stop. */
3444 emit_insn (gen_blockage ());
3445 }
3446
3447 /* Locate the bottom of the register save area. */
3448 cfa_off = (current_frame_info.spill_cfa_off
3449 + current_frame_info.spill_size
3450 + current_frame_info.extra_spill_size);
3451
3452 /* Restore the predicate registers. */
3453 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3454 {
6fb5fa3c
DB
3455 if (current_frame_info.r[reg_save_pr] != 0)
3456 {
3457 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3458 reg_emitted (reg_save_pr);
3459 }
97e242b0
RH
3460 else
3461 {
3462 alt_regno = next_scratch_gr_reg ();
3463 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3464 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3465 cfa_off -= 8;
3466 }
3467 reg = gen_rtx_REG (DImode, PR_REG (0));
3468 emit_move_insn (reg, alt_reg);
3469 }
3470
3471 /* Restore the application registers. */
3472
3473 /* Load the saved unat from the stack, but do not restore it until
3474 after the GRs have been restored. */
3475 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3476 {
6fb5fa3c
DB
3477 if (current_frame_info.r[reg_save_ar_unat] != 0)
3478 {
3479 ar_unat_save_reg
3480 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3481 reg_emitted (reg_save_ar_unat);
3482 }
97e242b0
RH
3483 else
3484 {
3485 alt_regno = next_scratch_gr_reg ();
3486 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3487 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 3488 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
3489 cfa_off -= 8;
3490 }
3491 }
3492 else
3493 ar_unat_save_reg = NULL_RTX;
9c808aad 3494
6fb5fa3c 3495 if (current_frame_info.r[reg_save_ar_pfs] != 0)
97e242b0 3496 {
6fb5fa3c
DB
3497 reg_emitted (reg_save_ar_pfs);
3498 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
97e242b0
RH
3499 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3500 emit_move_insn (reg, alt_reg);
3501 }
4e14f1f9 3502 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
c65ebc55 3503 {
97e242b0
RH
3504 alt_regno = next_scratch_gr_reg ();
3505 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3506 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3507 cfa_off -= 8;
3508 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3509 emit_move_insn (reg, alt_reg);
3510 }
3511
3512 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3513 {
6fb5fa3c
DB
3514 if (current_frame_info.r[reg_save_ar_lc] != 0)
3515 {
3516 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3517 reg_emitted (reg_save_ar_lc);
3518 }
97e242b0
RH
3519 else
3520 {
3521 alt_regno = next_scratch_gr_reg ();
3522 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3523 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3524 cfa_off -= 8;
3525 }
3526 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3527 emit_move_insn (reg, alt_reg);
3528 }
3529
ae1e2d4c
AS
3530 /* Restore the return pointer. */
3531 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3532 {
6fb5fa3c
DB
3533 if (current_frame_info.r[reg_save_b0] != 0)
3534 {
3535 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3536 reg_emitted (reg_save_b0);
3537 }
ae1e2d4c
AS
3538 else
3539 {
3540 alt_regno = next_scratch_gr_reg ();
3541 alt_reg = gen_rtx_REG (DImode, alt_regno);
3542 do_restore (gen_movdi_x, alt_reg, cfa_off);
3543 cfa_off -= 8;
3544 }
3545 reg = gen_rtx_REG (DImode, BR_REG (0));
3546 emit_move_insn (reg, alt_reg);
3547 }
3548
97e242b0 3549 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3550 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3551 + current_frame_info.spill_size));
97e242b0 3552
599aedd9
RH
3553 /* The GP may be stored on the stack in the prologue, but it's
3554 never restored in the epilogue. Skip the stack slot. */
3555 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3556 cfa_off -= 8;
3557
97e242b0 3558 /* Restore all general registers. */
599aedd9 3559 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 3560 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3561 {
97e242b0
RH
3562 reg = gen_rtx_REG (DImode, regno);
3563 do_restore (gen_gr_restore, reg, cfa_off);
3564 cfa_off -= 8;
0c96007e 3565 }
9c808aad 3566
ae1e2d4c 3567 /* Restore the branch registers. */
97e242b0
RH
3568 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3569 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3570 {
97e242b0
RH
3571 alt_regno = next_scratch_gr_reg ();
3572 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3573 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3574 cfa_off -= 8;
3575 reg = gen_rtx_REG (DImode, regno);
3576 emit_move_insn (reg, alt_reg);
3577 }
c65ebc55 3578
97e242b0
RH
3579 /* Restore floating point registers. */
3580 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3581 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3582 {
e820471b 3583 gcc_assert (!(cfa_off & 15));
02befdf4 3584 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3585 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 3586 cfa_off -= 16;
0c96007e 3587 }
97e242b0
RH
3588
3589 /* Restore ar.unat for real. */
3590 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3591 {
3592 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3593 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
3594 }
3595
e820471b 3596 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3597
3598 finish_spill_pointers ();
c65ebc55 3599
c93646bd
JJ
3600 if (current_frame_info.total_size
3601 || cfun->machine->ia64_eh_epilogue_sp
3602 || frame_pointer_needed)
97e242b0
RH
3603 {
3604 /* ??? At this point we must generate a magic insn that appears to
3605 modify the spill iterators, the stack pointer, and the frame
3606 pointer. This would allow the most scheduling freedom. For now,
3607 just hard stop. */
3608 emit_insn (gen_blockage ());
3609 }
c65ebc55 3610
97e242b0
RH
3611 if (cfun->machine->ia64_eh_epilogue_sp)
3612 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3613 else if (frame_pointer_needed)
3614 {
3615 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3616 RTX_FRAME_RELATED_P (insn) = 1;
3617 }
3618 else if (current_frame_info.total_size)
0c96007e 3619 {
97e242b0
RH
3620 rtx offset, frame_size_rtx;
3621
3622 frame_size_rtx = GEN_INT (current_frame_info.total_size);
13f70342 3623 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3624 offset = frame_size_rtx;
3625 else
3626 {
3627 regno = next_scratch_gr_reg ();
3628 offset = gen_rtx_REG (DImode, regno);
3629 emit_move_insn (offset, frame_size_rtx);
3630 }
3631
3632 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3633 offset));
3634
3635 RTX_FRAME_RELATED_P (insn) = 1;
3636 if (GET_CODE (offset) != CONST_INT)
bbbbb16a
ILT
3637 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3638 gen_rtx_SET (VOIDmode,
3639 stack_pointer_rtx,
3640 gen_rtx_PLUS (DImode,
3641 stack_pointer_rtx,
3642 frame_size_rtx)));
0c96007e 3643 }
97e242b0
RH
3644
3645 if (cfun->machine->ia64_eh_epilogue_bsp)
3646 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
9c808aad 3647
2ed4af6f
RH
3648 if (! sibcall_p)
3649 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 3650 else
8206fc89
AM
3651 {
3652 int fp = GR_REG (2);
3653 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
9c808aad
AJ
3654 first available call clobbered register. If there was a frame_pointer
3655 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
8206fc89 3656 so we have to make sure we're using the string "r2" when emitting
9e4f94de 3657 the register name for the assembler. */
6fb5fa3c
DB
3658 if (current_frame_info.r[reg_fp]
3659 && current_frame_info.r[reg_fp] == GR_REG (2))
8206fc89
AM
3660 fp = HARD_FRAME_POINTER_REGNUM;
3661
3662 /* We must emit an alloc to force the input registers to become output
3663 registers. Otherwise, if the callee tries to pass its parameters
3664 through to another call without an intervening alloc, then these
3665 values get lost. */
3666 /* ??? We don't need to preserve all input registers. We only need to
3667 preserve those input registers used as arguments to the sibling call.
3668 It is unclear how to compute that number here. */
3669 if (current_frame_info.n_input_regs != 0)
a8f5224e
DM
3670 {
3671 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3672 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3673 const0_rtx, const0_rtx,
3674 n_inputs, const0_rtx));
3675 RTX_FRAME_RELATED_P (insn) = 1;
3676 }
8206fc89 3677 }
c65ebc55
JW
3678}
3679
97e242b0
RH
3680/* Return 1 if br.ret can do all the work required to return from a
3681 function. */
3682
3683int
9c808aad 3684ia64_direct_return (void)
97e242b0
RH
3685{
3686 if (reload_completed && ! frame_pointer_needed)
3687 {
3688 ia64_compute_frame_size (get_frame_size ());
3689
3690 return (current_frame_info.total_size == 0
3691 && current_frame_info.n_spilled == 0
6fb5fa3c
DB
3692 && current_frame_info.r[reg_save_b0] == 0
3693 && current_frame_info.r[reg_save_pr] == 0
3694 && current_frame_info.r[reg_save_ar_pfs] == 0
3695 && current_frame_info.r[reg_save_ar_unat] == 0
3696 && current_frame_info.r[reg_save_ar_lc] == 0);
97e242b0
RH
3697 }
3698 return 0;
3699}
3700
af1e5518
RH
3701/* Return the magic cookie that we use to hold the return address
3702 during early compilation. */
3703
3704rtx
9c808aad 3705ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
af1e5518
RH
3706{
3707 if (count != 0)
3708 return NULL;
3709 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3710}
3711
3712/* Split this value after reload, now that we know where the return
3713 address is saved. */
3714
3715void
9c808aad 3716ia64_split_return_addr_rtx (rtx dest)
af1e5518
RH
3717{
3718 rtx src;
3719
3720 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3721 {
6fb5fa3c
DB
3722 if (current_frame_info.r[reg_save_b0] != 0)
3723 {
3724 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3725 reg_emitted (reg_save_b0);
3726 }
af1e5518
RH
3727 else
3728 {
3729 HOST_WIDE_INT off;
3730 unsigned int regno;
13f70342 3731 rtx off_r;
af1e5518
RH
3732
3733 /* Compute offset from CFA for BR0. */
3734 /* ??? Must be kept in sync with ia64_expand_prologue. */
3735 off = (current_frame_info.spill_cfa_off
3736 + current_frame_info.spill_size);
3737 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3738 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3739 off -= 8;
3740
3741 /* Convert CFA offset to a register based offset. */
3742 if (frame_pointer_needed)
3743 src = hard_frame_pointer_rtx;
3744 else
3745 {
3746 src = stack_pointer_rtx;
3747 off += current_frame_info.total_size;
3748 }
3749
3750 /* Load address into scratch register. */
13f70342
RH
3751 off_r = GEN_INT (off);
3752 if (satisfies_constraint_I (off_r))
3753 emit_insn (gen_adddi3 (dest, src, off_r));
af1e5518
RH
3754 else
3755 {
13f70342 3756 emit_move_insn (dest, off_r);
af1e5518
RH
3757 emit_insn (gen_adddi3 (dest, src, dest));
3758 }
3759
3760 src = gen_rtx_MEM (Pmode, dest);
3761 }
3762 }
3763 else
3764 src = gen_rtx_REG (DImode, BR_REG (0));
3765
3766 emit_move_insn (dest, src);
3767}
3768
10c9f189 3769int
9c808aad 3770ia64_hard_regno_rename_ok (int from, int to)
10c9f189
RH
3771{
3772 /* Don't clobber any of the registers we reserved for the prologue. */
09639a83 3773 unsigned int r;
10c9f189 3774
6fb5fa3c
DB
3775 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3776 if (to == current_frame_info.r[r]
3777 || from == current_frame_info.r[r]
3778 || to == emitted_frame_related_regs[r]
3779 || from == emitted_frame_related_regs[r])
3780 return 0;
2130b7fb 3781
10c9f189
RH
3782 /* Don't use output registers outside the register frame. */
3783 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3784 return 0;
3785
3786 /* Retain even/oddness on predicate register pairs. */
3787 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3788 return (from & 1) == (to & 1);
3789
3790 return 1;
3791}
3792
301d03af
RS
3793/* Target hook for assembling integer objects. Handle word-sized
3794 aligned objects and detect the cases when @fptr is needed. */
3795
3796static bool
9c808aad 3797ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3798{
b6a41a62 3799 if (size == POINTER_SIZE / BITS_PER_UNIT
301d03af
RS
3800 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3801 && GET_CODE (x) == SYMBOL_REF
1cdbd630 3802 && SYMBOL_REF_FUNCTION_P (x))
301d03af 3803 {
1b79dc38
DM
3804 static const char * const directive[2][2] = {
3805 /* 64-bit pointer */ /* 32-bit pointer */
3806 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3807 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3808 };
3809 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
301d03af
RS
3810 output_addr_const (asm_out_file, x);
3811 fputs (")\n", asm_out_file);
3812 return true;
3813 }
3814 return default_assemble_integer (x, size, aligned_p);
3815}
3816
c65ebc55
JW
3817/* Emit the function prologue. */
3818
08c148a8 3819static void
9c808aad 3820ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3821{
97e242b0
RH
3822 int mask, grsave, grsave_prev;
3823
3824 if (current_frame_info.need_regstk)
3825 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3826 current_frame_info.n_input_regs,
3827 current_frame_info.n_local_regs,
3828 current_frame_info.n_output_regs,
3829 current_frame_info.n_rotate_regs);
c65ebc55 3830
531073e7 3831 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
3832 return;
3833
97e242b0 3834 /* Emit the .prologue directive. */
809d4ef1 3835
97e242b0
RH
3836 mask = 0;
3837 grsave = grsave_prev = 0;
6fb5fa3c 3838 if (current_frame_info.r[reg_save_b0] != 0)
0c96007e 3839 {
97e242b0 3840 mask |= 8;
6fb5fa3c 3841 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
97e242b0 3842 }
6fb5fa3c 3843 if (current_frame_info.r[reg_save_ar_pfs] != 0
97e242b0 3844 && (grsave_prev == 0
6fb5fa3c 3845 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
97e242b0
RH
3846 {
3847 mask |= 4;
3848 if (grsave_prev == 0)
6fb5fa3c
DB
3849 grsave = current_frame_info.r[reg_save_ar_pfs];
3850 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
0c96007e 3851 }
6fb5fa3c 3852 if (current_frame_info.r[reg_fp] != 0
97e242b0 3853 && (grsave_prev == 0
6fb5fa3c 3854 || current_frame_info.r[reg_fp] == grsave_prev + 1))
97e242b0
RH
3855 {
3856 mask |= 2;
3857 if (grsave_prev == 0)
3858 grsave = HARD_FRAME_POINTER_REGNUM;
6fb5fa3c 3859 grsave_prev = current_frame_info.r[reg_fp];
97e242b0 3860 }
6fb5fa3c 3861 if (current_frame_info.r[reg_save_pr] != 0
97e242b0 3862 && (grsave_prev == 0
6fb5fa3c 3863 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
97e242b0
RH
3864 {
3865 mask |= 1;
3866 if (grsave_prev == 0)
6fb5fa3c 3867 grsave = current_frame_info.r[reg_save_pr];
97e242b0
RH
3868 }
3869
738e7b39 3870 if (mask && TARGET_GNU_AS)
97e242b0
RH
3871 fprintf (file, "\t.prologue %d, %d\n", mask,
3872 ia64_dbx_register_number (grsave));
3873 else
3874 fputs ("\t.prologue\n", file);
3875
3876 /* Emit a .spill directive, if necessary, to relocate the base of
3877 the register spill area. */
3878 if (current_frame_info.spill_cfa_off != -16)
3879 fprintf (file, "\t.spill %ld\n",
3880 (long) (current_frame_info.spill_cfa_off
3881 + current_frame_info.spill_size));
c65ebc55
JW
3882}
3883
0186257f
JW
3884/* Emit the .body directive at the scheduled end of the prologue. */
3885
b4c25db2 3886static void
9c808aad 3887ia64_output_function_end_prologue (FILE *file)
0186257f 3888{
531073e7 3889 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
3890 return;
3891
3892 fputs ("\t.body\n", file);
3893}
3894
c65ebc55
JW
3895/* Emit the function epilogue. */
3896
08c148a8 3897static void
9c808aad
AJ
3898ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3899 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3900{
8a959ea5
RH
3901 int i;
3902
6fb5fa3c 3903 if (current_frame_info.r[reg_fp])
97e242b0
RH
3904 {
3905 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3906 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3907 = reg_names[current_frame_info.r[reg_fp]];
3908 reg_names[current_frame_info.r[reg_fp]] = tmp;
3909 reg_emitted (reg_fp);
97e242b0
RH
3910 }
3911 if (! TARGET_REG_NAMES)
3912 {
97e242b0
RH
3913 for (i = 0; i < current_frame_info.n_input_regs; i++)
3914 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3915 for (i = 0; i < current_frame_info.n_local_regs; i++)
3916 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3917 for (i = 0; i < current_frame_info.n_output_regs; i++)
3918 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3919 }
8a959ea5 3920
97e242b0
RH
3921 current_frame_info.initialized = 0;
3922}
c65ebc55
JW
3923
3924int
9c808aad 3925ia64_dbx_register_number (int regno)
c65ebc55 3926{
97e242b0
RH
3927 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3928 from its home at loc79 to something inside the register frame. We
3929 must perform the same renumbering here for the debug info. */
6fb5fa3c 3930 if (current_frame_info.r[reg_fp])
97e242b0
RH
3931 {
3932 if (regno == HARD_FRAME_POINTER_REGNUM)
6fb5fa3c
DB
3933 regno = current_frame_info.r[reg_fp];
3934 else if (regno == current_frame_info.r[reg_fp])
97e242b0
RH
3935 regno = HARD_FRAME_POINTER_REGNUM;
3936 }
3937
3938 if (IN_REGNO_P (regno))
3939 return 32 + regno - IN_REG (0);
3940 else if (LOC_REGNO_P (regno))
3941 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3942 else if (OUT_REGNO_P (regno))
3943 return (32 + current_frame_info.n_input_regs
3944 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3945 else
3946 return regno;
c65ebc55
JW
3947}
3948
2a1211e5
RH
3949/* Implement TARGET_TRAMPOLINE_INIT.
3950
3951 The trampoline should set the static chain pointer to value placed
3952 into the trampoline and should branch to the specified routine.
3953 To make the normal indirect-subroutine calling convention work,
3954 the trampoline must look like a function descriptor; the first
3955 word being the target address and the second being the target's
3956 global pointer.
3957
3958 We abuse the concept of a global pointer by arranging for it
3959 to point to the data we need to load. The complete trampoline
3960 has the following form:
3961
3962 +-------------------+ \
3963 TRAMP: | __ia64_trampoline | |
3964 +-------------------+ > fake function descriptor
3965 | TRAMP+16 | |
3966 +-------------------+ /
3967 | target descriptor |
3968 +-------------------+
3969 | static link |
3970 +-------------------+
3971*/
3972
3973static void
3974ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
97e242b0 3975{
2a1211e5
RH
3976 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3977 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
97e242b0 3978
738e7b39
RK
3979 /* The Intel assembler requires that the global __ia64_trampoline symbol
3980 be declared explicitly */
3981 if (!TARGET_GNU_AS)
3982 {
3983 static bool declared_ia64_trampoline = false;
3984
3985 if (!declared_ia64_trampoline)
3986 {
3987 declared_ia64_trampoline = true;
b6a41a62
RK
3988 (*targetm.asm_out.globalize_label) (asm_out_file,
3989 "__ia64_trampoline");
738e7b39
RK
3990 }
3991 }
3992
5e89a381 3993 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2a1211e5 3994 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
5e89a381
SE
3995 fnaddr = convert_memory_address (Pmode, fnaddr);
3996 static_chain = convert_memory_address (Pmode, static_chain);
3997
97e242b0 3998 /* Load up our iterator. */
2a1211e5
RH
3999 addr_reg = copy_to_reg (addr);
4000 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
97e242b0
RH
4001
4002 /* The first two words are the fake descriptor:
4003 __ia64_trampoline, ADDR+16. */
f2972bf8
DR
4004 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4005 if (TARGET_ABI_OPEN_VMS)
4006 {
4007 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4008 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4009 relocation against function symbols to make it identical to the
4010 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4011 strict ELF and dereference to get the bare code address. */
4012 rtx reg = gen_reg_rtx (Pmode);
4013 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4014 emit_move_insn (reg, tramp);
4015 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4016 tramp = reg;
4017 }
2a1211e5 4018 emit_move_insn (m_tramp, tramp);
97e242b0 4019 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4020 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0 4021
2a1211e5 4022 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
97e242b0 4023 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4024 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4025
4026 /* The third word is the target descriptor. */
2a1211e5 4027 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
97e242b0 4028 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4029 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4030
4031 /* The fourth word is the static chain. */
2a1211e5 4032 emit_move_insn (m_tramp, static_chain);
97e242b0 4033}
c65ebc55
JW
4034\f
4035/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
4036 for the last named argument which has type TYPE and mode MODE.
4037
4038 We generate the actual spill instructions during prologue generation. */
4039
351a758b
KH
4040static void
4041ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4042 tree type, int * pretend_size,
9c808aad 4043 int second_time ATTRIBUTE_UNUSED)
c65ebc55 4044{
351a758b
KH
4045 CUMULATIVE_ARGS next_cum = *cum;
4046
6c535c69 4047 /* Skip the current argument. */
351a758b 4048 ia64_function_arg_advance (&next_cum, mode, type, 1);
c65ebc55 4049
351a758b 4050 if (next_cum.words < MAX_ARGUMENT_SLOTS)
26a110f5 4051 {
351a758b 4052 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
26a110f5
RH
4053 *pretend_size = n * UNITS_PER_WORD;
4054 cfun->machine->n_varargs = n;
4055 }
c65ebc55
JW
4056}
4057
4058/* Check whether TYPE is a homogeneous floating point aggregate. If
4059 it is, return the mode of the floating point type that appears
4060 in all leafs. If it is not, return VOIDmode.
4061
4062 An aggregate is a homogeneous floating point aggregate is if all
4063 fields/elements in it have the same floating point type (e.g,
3d6a9acd
RH
4064 SFmode). 128-bit quad-precision floats are excluded.
4065
4066 Variable sized aggregates should never arrive here, since we should
4067 have already decided to pass them by reference. Top-level zero-sized
4068 aggregates are excluded because our parallels crash the middle-end. */
c65ebc55
JW
4069
4070static enum machine_mode
586de218 4071hfa_element_mode (const_tree type, bool nested)
c65ebc55
JW
4072{
4073 enum machine_mode element_mode = VOIDmode;
4074 enum machine_mode mode;
4075 enum tree_code code = TREE_CODE (type);
4076 int know_element_mode = 0;
4077 tree t;
4078
3d6a9acd
RH
4079 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4080 return VOIDmode;
4081
c65ebc55
JW
4082 switch (code)
4083 {
4084 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
0cc8f5c5 4085 case BOOLEAN_TYPE: case POINTER_TYPE:
c65ebc55 4086 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
5662a50d 4087 case LANG_TYPE: case FUNCTION_TYPE:
c65ebc55
JW
4088 return VOIDmode;
4089
4090 /* Fortran complex types are supposed to be HFAs, so we need to handle
4091 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4092 types though. */
4093 case COMPLEX_TYPE:
16448fd4 4094 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
02befdf4
ZW
4095 && TYPE_MODE (type) != TCmode)
4096 return GET_MODE_INNER (TYPE_MODE (type));
c65ebc55
JW
4097 else
4098 return VOIDmode;
4099
4100 case REAL_TYPE:
4101 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4102 mode if this is contained within an aggregate. */
02befdf4 4103 if (nested && TYPE_MODE (type) != TFmode)
c65ebc55
JW
4104 return TYPE_MODE (type);
4105 else
4106 return VOIDmode;
4107
4108 case ARRAY_TYPE:
46399021 4109 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
4110
4111 case RECORD_TYPE:
4112 case UNION_TYPE:
4113 case QUAL_UNION_TYPE:
4114 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
4115 {
4116 if (TREE_CODE (t) != FIELD_DECL)
4117 continue;
4118
4119 mode = hfa_element_mode (TREE_TYPE (t), 1);
4120 if (know_element_mode)
4121 {
4122 if (mode != element_mode)
4123 return VOIDmode;
4124 }
4125 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4126 return VOIDmode;
4127 else
4128 {
4129 know_element_mode = 1;
4130 element_mode = mode;
4131 }
4132 }
4133 return element_mode;
4134
4135 default:
4136 /* If we reach here, we probably have some front-end specific type
4137 that the backend doesn't know about. This can happen via the
4138 aggregate_value_p call in init_function_start. All we can do is
4139 ignore unknown tree types. */
4140 return VOIDmode;
4141 }
4142
4143 return VOIDmode;
4144}
4145
f57fc998
ZW
4146/* Return the number of words required to hold a quantity of TYPE and MODE
4147 when passed as an argument. */
4148static int
4149ia64_function_arg_words (tree type, enum machine_mode mode)
4150{
4151 int words;
4152
4153 if (mode == BLKmode)
4154 words = int_size_in_bytes (type);
4155 else
4156 words = GET_MODE_SIZE (mode);
4157
4158 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4159}
4160
4161/* Return the number of registers that should be skipped so the current
4162 argument (described by TYPE and WORDS) will be properly aligned.
4163
4164 Integer and float arguments larger than 8 bytes start at the next
4165 even boundary. Aggregates larger than 8 bytes start at the next
4166 even boundary if the aggregate has 16 byte alignment. Note that
4167 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4168 but are still to be aligned in registers.
4169
4170 ??? The ABI does not specify how to handle aggregates with
4171 alignment from 9 to 15 bytes, or greater than 16. We handle them
4172 all as if they had 16 byte alignment. Such aggregates can occur
4173 only if gcc extensions are used. */
4174static int
4175ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4176{
f2972bf8
DR
4177 /* No registers are skipped on VMS. */
4178 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
f57fc998
ZW
4179 return 0;
4180
4181 if (type
4182 && TREE_CODE (type) != INTEGER_TYPE
4183 && TREE_CODE (type) != REAL_TYPE)
4184 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4185 else
4186 return words > 1;
4187}
4188
c65ebc55
JW
4189/* Return rtx for register where argument is passed, or zero if it is passed
4190 on the stack. */
c65ebc55
JW
4191/* ??? 128-bit quad-precision floats are always passed in general
4192 registers. */
4193
4194rtx
9c808aad
AJ
4195ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4196 int named, int incoming)
c65ebc55
JW
4197{
4198 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
f57fc998
ZW
4199 int words = ia64_function_arg_words (type, mode);
4200 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4201 enum machine_mode hfa_mode = VOIDmode;
4202
f2972bf8
DR
4203 /* For OPEN VMS, emit the instruction setting up the argument register here,
4204 when we know this will be together with the other arguments setup related
4205 insns. This is not the conceptually best place to do this, but this is
4206 the easiest as we have convenient access to cumulative args info. */
4207
4208 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4209 && named == 1)
4210 {
4211 unsigned HOST_WIDE_INT regval = cum->words;
4212 int i;
4213
4214 for (i = 0; i < 8; i++)
4215 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4216
4217 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4218 GEN_INT (regval));
4219 }
4220
c65ebc55
JW
4221 /* If all argument slots are used, then it must go on the stack. */
4222 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4223 return 0;
4224
4225 /* Check for and handle homogeneous FP aggregates. */
4226 if (type)
4227 hfa_mode = hfa_element_mode (type, 0);
4228
4229 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4230 and unprototyped hfas are passed specially. */
4231 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4232 {
4233 rtx loc[16];
4234 int i = 0;
4235 int fp_regs = cum->fp_regs;
4236 int int_regs = cum->words + offset;
4237 int hfa_size = GET_MODE_SIZE (hfa_mode);
4238 int byte_size;
4239 int args_byte_size;
4240
4241 /* If prototyped, pass it in FR regs then GR regs.
4242 If not prototyped, pass it in both FR and GR regs.
4243
4244 If this is an SFmode aggregate, then it is possible to run out of
4245 FR regs while GR regs are still left. In that case, we pass the
4246 remaining part in the GR regs. */
4247
4248 /* Fill the FP regs. We do this always. We stop if we reach the end
4249 of the argument, the last FP register, or the last argument slot. */
4250
4251 byte_size = ((mode == BLKmode)
4252 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4253 args_byte_size = int_regs * UNITS_PER_WORD;
4254 offset = 0;
4255 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4256 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4257 {
4258 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4259 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4260 + fp_regs)),
4261 GEN_INT (offset));
c65ebc55
JW
4262 offset += hfa_size;
4263 args_byte_size += hfa_size;
4264 fp_regs++;
4265 }
4266
4267 /* If no prototype, then the whole thing must go in GR regs. */
4268 if (! cum->prototype)
4269 offset = 0;
4270 /* If this is an SFmode aggregate, then we might have some left over
4271 that needs to go in GR regs. */
4272 else if (byte_size != offset)
4273 int_regs += offset / UNITS_PER_WORD;
4274
4275 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4276
4277 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4278 {
4279 enum machine_mode gr_mode = DImode;
826b47cc 4280 unsigned int gr_size;
c65ebc55
JW
4281
4282 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4283 then this goes in a GR reg left adjusted/little endian, right
4284 adjusted/big endian. */
4285 /* ??? Currently this is handled wrong, because 4-byte hunks are
4286 always right adjusted/little endian. */
4287 if (offset & 0x4)
4288 gr_mode = SImode;
4289 /* If we have an even 4 byte hunk because the aggregate is a
4290 multiple of 4 bytes in size, then this goes in a GR reg right
4291 adjusted/little endian. */
4292 else if (byte_size - offset == 4)
4293 gr_mode = SImode;
4294
4295 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4296 gen_rtx_REG (gr_mode, (basereg
4297 + int_regs)),
4298 GEN_INT (offset));
826b47cc
ZW
4299
4300 gr_size = GET_MODE_SIZE (gr_mode);
4301 offset += gr_size;
4302 if (gr_size == UNITS_PER_WORD
4303 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4304 int_regs++;
4305 else if (gr_size > UNITS_PER_WORD)
4306 int_regs += gr_size / UNITS_PER_WORD;
c65ebc55 4307 }
9dec91d4 4308 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4309 }
f2972bf8
DR
4310
4311 /* On OpenVMS variable argument is either in Rn or Fn. */
4312 else if (TARGET_ABI_OPEN_VMS && named == 0)
4313 {
4314 if (FLOAT_MODE_P (mode))
4315 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4316 else
4317 return gen_rtx_REG (mode, basereg + cum->words);
4318 }
c65ebc55
JW
4319
4320 /* Integral and aggregates go in general registers. If we have run out of
4321 FR registers, then FP values must also go in general registers. This can
4322 happen when we have a SFmode HFA. */
02befdf4
ZW
4323 else if (mode == TFmode || mode == TCmode
4324 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
4325 {
4326 int byte_size = ((mode == BLKmode)
4327 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4328 if (BYTES_BIG_ENDIAN
4329 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4330 && byte_size < UNITS_PER_WORD
4331 && byte_size > 0)
4332 {
4333 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4334 gen_rtx_REG (DImode,
4335 (basereg + cum->words
4336 + offset)),
4337 const0_rtx);
4338 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4339 }
4340 else
4341 return gen_rtx_REG (mode, basereg + cum->words + offset);
4342
4343 }
c65ebc55
JW
4344
4345 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4346 named, and in a GR register when unnamed. */
c65ebc55
JW
4347 else if (cum->prototype)
4348 {
f9c887ac 4349 if (named)
c65ebc55 4350 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
f9c887ac
ZW
4351 /* In big-endian mode, an anonymous SFmode value must be represented
4352 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4353 the value into the high half of the general register. */
4354 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4355 return gen_rtx_PARALLEL (mode,
4356 gen_rtvec (1,
4357 gen_rtx_EXPR_LIST (VOIDmode,
4358 gen_rtx_REG (DImode, basereg + cum->words + offset),
4359 const0_rtx)));
4360 else
4361 return gen_rtx_REG (mode, basereg + cum->words + offset);
c65ebc55
JW
4362 }
4363 /* If there is no prototype, then FP values go in both FR and GR
4364 registers. */
4365 else
4366 {
f9c887ac
ZW
4367 /* See comment above. */
4368 enum machine_mode inner_mode =
4369 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4370
c65ebc55
JW
4371 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4372 gen_rtx_REG (mode, (FR_ARG_FIRST
4373 + cum->fp_regs)),
4374 const0_rtx);
4375 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
f9c887ac 4376 gen_rtx_REG (inner_mode,
c65ebc55
JW
4377 (basereg + cum->words
4378 + offset)),
4379 const0_rtx);
809d4ef1 4380
c65ebc55
JW
4381 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4382 }
4383}
4384
78a52f11 4385/* Return number of bytes, at the beginning of the argument, that must be
c65ebc55
JW
4386 put in registers. 0 is the argument is entirely in registers or entirely
4387 in memory. */
4388
78a52f11
RH
4389static int
4390ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4391 tree type, bool named ATTRIBUTE_UNUSED)
c65ebc55 4392{
f57fc998
ZW
4393 int words = ia64_function_arg_words (type, mode);
4394 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4395
4396 /* If all argument slots are used, then it must go on the stack. */
4397 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4398 return 0;
4399
4400 /* It doesn't matter whether the argument goes in FR or GR regs. If
4401 it fits within the 8 argument slots, then it goes entirely in
4402 registers. If it extends past the last argument slot, then the rest
4403 goes on the stack. */
4404
4405 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4406 return 0;
4407
78a52f11 4408 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
c65ebc55
JW
4409}
4410
f2972bf8
DR
4411/* Return ivms_arg_type based on machine_mode. */
4412
4413static enum ivms_arg_type
4414ia64_arg_type (enum machine_mode mode)
4415{
4416 switch (mode)
4417 {
4418 case SFmode:
4419 return FS;
4420 case DFmode:
4421 return FT;
4422 default:
4423 return I64;
4424 }
4425}
4426
c65ebc55
JW
4427/* Update CUM to point after this argument. This is patterned after
4428 ia64_function_arg. */
4429
4430void
9c808aad
AJ
4431ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4432 tree type, int named)
c65ebc55 4433{
f57fc998
ZW
4434 int words = ia64_function_arg_words (type, mode);
4435 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4436 enum machine_mode hfa_mode = VOIDmode;
4437
4438 /* If all arg slots are already full, then there is nothing to do. */
4439 if (cum->words >= MAX_ARGUMENT_SLOTS)
f2972bf8
DR
4440 {
4441 cum->words += words + offset;
4442 return;
4443 }
c65ebc55 4444
f2972bf8 4445 cum->atypes[cum->words] = ia64_arg_type (mode);
c65ebc55
JW
4446 cum->words += words + offset;
4447
4448 /* Check for and handle homogeneous FP aggregates. */
4449 if (type)
4450 hfa_mode = hfa_element_mode (type, 0);
4451
4452 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4453 and unprototyped hfas are passed specially. */
4454 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4455 {
4456 int fp_regs = cum->fp_regs;
4457 /* This is the original value of cum->words + offset. */
4458 int int_regs = cum->words - words;
4459 int hfa_size = GET_MODE_SIZE (hfa_mode);
4460 int byte_size;
4461 int args_byte_size;
4462
4463 /* If prototyped, pass it in FR regs then GR regs.
4464 If not prototyped, pass it in both FR and GR regs.
4465
4466 If this is an SFmode aggregate, then it is possible to run out of
4467 FR regs while GR regs are still left. In that case, we pass the
4468 remaining part in the GR regs. */
4469
4470 /* Fill the FP regs. We do this always. We stop if we reach the end
4471 of the argument, the last FP register, or the last argument slot. */
4472
4473 byte_size = ((mode == BLKmode)
4474 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4475 args_byte_size = int_regs * UNITS_PER_WORD;
4476 offset = 0;
4477 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4478 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4479 {
c65ebc55
JW
4480 offset += hfa_size;
4481 args_byte_size += hfa_size;
4482 fp_regs++;
4483 }
4484
4485 cum->fp_regs = fp_regs;
4486 }
4487
f2972bf8
DR
4488 /* On OpenVMS variable argument is either in Rn or Fn. */
4489 else if (TARGET_ABI_OPEN_VMS && named == 0)
4490 {
4491 cum->int_regs = cum->words;
4492 cum->fp_regs = cum->words;
4493 }
4494
d13256a3
SE
4495 /* Integral and aggregates go in general registers. So do TFmode FP values.
4496 If we have run out of FR registers, then other FP values must also go in
4497 general registers. This can happen when we have a SFmode HFA. */
4498 else if (mode == TFmode || mode == TCmode
4499 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
648fe28b 4500 cum->int_regs = cum->words;
c65ebc55
JW
4501
4502 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4503 named, and in a GR register when unnamed. */
c65ebc55
JW
4504 else if (cum->prototype)
4505 {
4506 if (! named)
648fe28b 4507 cum->int_regs = cum->words;
c65ebc55
JW
4508 else
4509 /* ??? Complex types should not reach here. */
4510 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4511 }
4512 /* If there is no prototype, then FP values go in both FR and GR
4513 registers. */
4514 else
9c808aad 4515 {
648fe28b
RH
4516 /* ??? Complex types should not reach here. */
4517 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4518 cum->int_regs = cum->words;
4519 }
c65ebc55 4520}
51dcde6f 4521
d13256a3 4522/* Arguments with alignment larger than 8 bytes start at the next even
93348822 4523 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
d13256a3
SE
4524 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4525
4526int
4527ia64_function_arg_boundary (enum machine_mode mode, tree type)
4528{
4529
4530 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4531 return PARM_BOUNDARY * 2;
4532
4533 if (type)
4534 {
4535 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4536 return PARM_BOUNDARY * 2;
4537 else
4538 return PARM_BOUNDARY;
4539 }
4540
4541 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4542 return PARM_BOUNDARY * 2;
4543 else
4544 return PARM_BOUNDARY;
4545}
4546
599aedd9
RH
4547/* True if it is OK to do sibling call optimization for the specified
4548 call expression EXP. DECL will be the called function, or NULL if
4549 this is an indirect call. */
4550static bool
9c808aad 4551ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
599aedd9 4552{
097f3d48
JW
4553 /* We can't perform a sibcall if the current function has the syscall_linkage
4554 attribute. */
4555 if (lookup_attribute ("syscall_linkage",
4556 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4557 return false;
4558
b23ba0b8 4559 /* We must always return with our current GP. This means we can
c208436c
SE
4560 only sibcall to functions defined in the current module unless
4561 TARGET_CONST_GP is set to true. */
4562 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
599aedd9 4563}
c65ebc55 4564\f
c65ebc55
JW
4565
4566/* Implement va_arg. */
4567
23a60a04 4568static tree
726a989a
RB
4569ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4570 gimple_seq *post_p)
cd3ce9b4 4571{
cd3ce9b4 4572 /* Variable sized types are passed by reference. */
08b0dc1b 4573 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
cd3ce9b4 4574 {
23a60a04
JM
4575 tree ptrtype = build_pointer_type (type);
4576 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
c2433d7d 4577 return build_va_arg_indirect_ref (addr);
cd3ce9b4
JM
4578 }
4579
4580 /* Aggregate arguments with alignment larger than 8 bytes start at
4581 the next even boundary. Integer and floating point arguments
4582 do so if they are larger than 8 bytes, whether or not they are
4583 also aligned larger than 8 bytes. */
4584 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4585 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4586 {
5be014d5
AP
4587 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4588 size_int (2 * UNITS_PER_WORD - 1));
4589 t = fold_convert (sizetype, t);
47a25a46 4590 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5be014d5
AP
4591 size_int (-2 * UNITS_PER_WORD));
4592 t = fold_convert (TREE_TYPE (valist), t);
726a989a 4593 gimplify_assign (unshare_expr (valist), t, pre_p);
cd3ce9b4
JM
4594 }
4595
23a60a04 4596 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4 4597}
c65ebc55
JW
4598\f
4599/* Return 1 if function return value returned in memory. Return 0 if it is
4600 in a register. */
4601
351a758b 4602static bool
586de218 4603ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
c65ebc55
JW
4604{
4605 enum machine_mode mode;
4606 enum machine_mode hfa_mode;
487b97e0 4607 HOST_WIDE_INT byte_size;
c65ebc55
JW
4608
4609 mode = TYPE_MODE (valtype);
487b97e0
RH
4610 byte_size = GET_MODE_SIZE (mode);
4611 if (mode == BLKmode)
4612 {
4613 byte_size = int_size_in_bytes (valtype);
4614 if (byte_size < 0)
351a758b 4615 return true;
487b97e0 4616 }
c65ebc55
JW
4617
4618 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4619
4620 hfa_mode = hfa_element_mode (valtype, 0);
4621 if (hfa_mode != VOIDmode)
4622 {
4623 int hfa_size = GET_MODE_SIZE (hfa_mode);
4624
c65ebc55 4625 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
351a758b 4626 return true;
c65ebc55 4627 else
351a758b 4628 return false;
c65ebc55 4629 }
c65ebc55 4630 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
351a758b 4631 return true;
c65ebc55 4632 else
351a758b 4633 return false;
c65ebc55
JW
4634}
4635
4636/* Return rtx for register that holds the function return value. */
4637
4638rtx
f2972bf8 4639ia64_function_value (const_tree valtype, const_tree func)
c65ebc55
JW
4640{
4641 enum machine_mode mode;
4642 enum machine_mode hfa_mode;
f2972bf8 4643 int unsignedp;
c65ebc55
JW
4644
4645 mode = TYPE_MODE (valtype);
4646 hfa_mode = hfa_element_mode (valtype, 0);
4647
4648 if (hfa_mode != VOIDmode)
4649 {
4650 rtx loc[8];
4651 int i;
4652 int hfa_size;
4653 int byte_size;
4654 int offset;
4655
4656 hfa_size = GET_MODE_SIZE (hfa_mode);
4657 byte_size = ((mode == BLKmode)
4658 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4659 offset = 0;
4660 for (i = 0; offset < byte_size; i++)
4661 {
4662 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4663 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4664 GEN_INT (offset));
c65ebc55
JW
4665 offset += hfa_size;
4666 }
9dec91d4 4667 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4668 }
f57fc998 4669 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
c65ebc55
JW
4670 return gen_rtx_REG (mode, FR_ARG_FIRST);
4671 else
3870df96 4672 {
8c5cacfd
RH
4673 bool need_parallel = false;
4674
4675 /* In big-endian mode, we need to manage the layout of aggregates
4676 in the registers so that we get the bits properly aligned in
4677 the highpart of the registers. */
3870df96
SE
4678 if (BYTES_BIG_ENDIAN
4679 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
8c5cacfd
RH
4680 need_parallel = true;
4681
4682 /* Something like struct S { long double x; char a[0] } is not an
4683 HFA structure, and therefore doesn't go in fp registers. But
4684 the middle-end will give it XFmode anyway, and XFmode values
4685 don't normally fit in integer registers. So we need to smuggle
4686 the value inside a parallel. */
4de67c26 4687 else if (mode == XFmode || mode == XCmode || mode == RFmode)
8c5cacfd
RH
4688 need_parallel = true;
4689
4690 if (need_parallel)
3870df96
SE
4691 {
4692 rtx loc[8];
4693 int offset;
4694 int bytesize;
4695 int i;
4696
4697 offset = 0;
4698 bytesize = int_size_in_bytes (valtype);
543144ed
JM
4699 /* An empty PARALLEL is invalid here, but the return value
4700 doesn't matter for empty structs. */
4701 if (bytesize == 0)
4702 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96
SE
4703 for (i = 0; offset < bytesize; i++)
4704 {
4705 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4706 gen_rtx_REG (DImode,
4707 GR_RET_FIRST + i),
4708 GEN_INT (offset));
4709 offset += UNITS_PER_WORD;
4710 }
4711 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4712 }
8c5cacfd 4713
f2972bf8
DR
4714 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4715 func ? TREE_TYPE (func) : NULL_TREE,
4716 true);
4717
8c5cacfd 4718 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96 4719 }
c65ebc55
JW
4720}
4721
fdbe66f2 4722/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6b2300b3
JJ
4723 We need to emit DTP-relative relocations. */
4724
fdbe66f2 4725static void
9c808aad 4726ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
6b2300b3 4727{
6f3113ed
SE
4728 gcc_assert (size == 4 || size == 8);
4729 if (size == 4)
4730 fputs ("\tdata4.ua\t@dtprel(", file);
4731 else
4732 fputs ("\tdata8.ua\t@dtprel(", file);
6b2300b3
JJ
4733 output_addr_const (file, x);
4734 fputs (")", file);
4735}
4736
c65ebc55
JW
4737/* Print a memory address as an operand to reference that memory location. */
4738
4739/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4740 also call this from ia64_print_operand for memory addresses. */
4741
4742void
9c808aad
AJ
4743ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4744 rtx address ATTRIBUTE_UNUSED)
c65ebc55
JW
4745{
4746}
4747
3569057d 4748/* Print an operand to an assembler instruction.
c65ebc55
JW
4749 C Swap and print a comparison operator.
4750 D Print an FP comparison operator.
4751 E Print 32 - constant, for SImode shifts as extract.
66db6b45 4752 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
4753 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4754 a floating point register emitted normally.
735b94a7 4755 G A floating point constant.
c65ebc55 4756 I Invert a predicate register by adding 1.
e5bde68a 4757 J Select the proper predicate register for a condition.
6b6c1201 4758 j Select the inverse predicate register for a condition.
c65ebc55
JW
4759 O Append .acq for volatile load.
4760 P Postincrement of a MEM.
4761 Q Append .rel for volatile store.
4883241c 4762 R Print .s .d or nothing for a single, double or no truncation.
c65ebc55
JW
4763 S Shift amount for shladd instruction.
4764 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4765 for Intel assembler.
4766 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4767 for Intel assembler.
a71aef0b 4768 X A pair of floating point registers.
c65ebc55 4769 r Print register name, or constant 0 as r0. HP compatibility for
f61134e8
RH
4770 Linux kernel.
4771 v Print vector constant value as an 8-byte integer value. */
4772
c65ebc55 4773void
9c808aad 4774ia64_print_operand (FILE * file, rtx x, int code)
c65ebc55 4775{
e57b9d65
RH
4776 const char *str;
4777
c65ebc55
JW
4778 switch (code)
4779 {
c65ebc55
JW
4780 case 0:
4781 /* Handled below. */
4782 break;
809d4ef1 4783
c65ebc55
JW
4784 case 'C':
4785 {
4786 enum rtx_code c = swap_condition (GET_CODE (x));
4787 fputs (GET_RTX_NAME (c), file);
4788 return;
4789 }
4790
4791 case 'D':
e57b9d65
RH
4792 switch (GET_CODE (x))
4793 {
4794 case NE:
4795 str = "neq";
4796 break;
4797 case UNORDERED:
4798 str = "unord";
4799 break;
4800 case ORDERED:
4801 str = "ord";
4802 break;
86ad1da0
SE
4803 case UNLT:
4804 str = "nge";
4805 break;
4806 case UNLE:
4807 str = "ngt";
4808 break;
4809 case UNGT:
4810 str = "nle";
4811 break;
4812 case UNGE:
4813 str = "nlt";
4814 break;
e57b9d65
RH
4815 default:
4816 str = GET_RTX_NAME (GET_CODE (x));
4817 break;
4818 }
4819 fputs (str, file);
c65ebc55
JW
4820 return;
4821
4822 case 'E':
4823 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4824 return;
4825
66db6b45
RH
4826 case 'e':
4827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4828 return;
4829
c65ebc55
JW
4830 case 'F':
4831 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 4832 str = reg_names [FR_REG (0)];
c65ebc55 4833 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 4834 str = reg_names [FR_REG (1)];
c65ebc55 4835 else
e820471b
NS
4836 {
4837 gcc_assert (GET_CODE (x) == REG);
4838 str = reg_names [REGNO (x)];
4839 }
e57b9d65 4840 fputs (str, file);
c65ebc55
JW
4841 return;
4842
735b94a7
SE
4843 case 'G':
4844 {
4845 long val[4];
4846 REAL_VALUE_TYPE rv;
4847 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4848 real_to_target (val, &rv, GET_MODE (x));
4849 if (GET_MODE (x) == SFmode)
4850 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4851 else if (GET_MODE (x) == DFmode)
4852 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4853 & 0xffffffff,
4854 (WORDS_BIG_ENDIAN ? val[1] : val[0])
4855 & 0xffffffff);
4856 else
4857 output_operand_lossage ("invalid %%G mode");
4858 }
4859 return;
4860
c65ebc55
JW
4861 case 'I':
4862 fputs (reg_names [REGNO (x) + 1], file);
4863 return;
4864
e5bde68a 4865 case 'J':
6b6c1201
RH
4866 case 'j':
4867 {
4868 unsigned int regno = REGNO (XEXP (x, 0));
4869 if (GET_CODE (x) == EQ)
4870 regno += 1;
4871 if (code == 'j')
4872 regno ^= 1;
4873 fputs (reg_names [regno], file);
4874 }
e5bde68a
RH
4875 return;
4876
c65ebc55
JW
4877 case 'O':
4878 if (MEM_VOLATILE_P (x))
4879 fputs(".acq", file);
4880 return;
4881
4882 case 'P':
4883 {
4b983fdc 4884 HOST_WIDE_INT value;
c65ebc55 4885
4b983fdc
RH
4886 switch (GET_CODE (XEXP (x, 0)))
4887 {
4888 default:
4889 return;
4890
4891 case POST_MODIFY:
4892 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4893 if (GET_CODE (x) == CONST_INT)
08012cda 4894 value = INTVAL (x);
e820471b 4895 else
4b983fdc 4896 {
e820471b 4897 gcc_assert (GET_CODE (x) == REG);
08012cda 4898 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
4899 return;
4900 }
4b983fdc 4901 break;
c65ebc55 4902
4b983fdc
RH
4903 case POST_INC:
4904 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 4905 break;
c65ebc55 4906
4b983fdc 4907 case POST_DEC:
08012cda 4908 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
4909 break;
4910 }
809d4ef1 4911
4a0a75dd 4912 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
4913 return;
4914 }
4915
4916 case 'Q':
4917 if (MEM_VOLATILE_P (x))
4918 fputs(".rel", file);
4919 return;
4920
4883241c
SE
4921 case 'R':
4922 if (x == CONST0_RTX (GET_MODE (x)))
4923 fputs(".s", file);
4924 else if (x == CONST1_RTX (GET_MODE (x)))
4925 fputs(".d", file);
4926 else if (x == CONST2_RTX (GET_MODE (x)))
4927 ;
4928 else
4929 output_operand_lossage ("invalid %%R value");
4930 return;
4931
c65ebc55 4932 case 'S':
809d4ef1 4933 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
4934 return;
4935
4936 case 'T':
4937 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4938 {
809d4ef1 4939 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
4940 return;
4941 }
4942 break;
4943
4944 case 'U':
4945 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4946 {
3b572406 4947 const char *prefix = "0x";
c65ebc55
JW
4948 if (INTVAL (x) & 0x80000000)
4949 {
4950 fprintf (file, "0xffffffff");
4951 prefix = "";
4952 }
809d4ef1 4953 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
4954 return;
4955 }
4956 break;
809d4ef1 4957
a71aef0b
JB
4958 case 'X':
4959 {
4960 unsigned int regno = REGNO (x);
4961 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4962 }
4963 return;
4964
c65ebc55 4965 case 'r':
18a3c539
JW
4966 /* If this operand is the constant zero, write it as register zero.
4967 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
4968 if (GET_CODE (x) == REG)
4969 fputs (reg_names[REGNO (x)], file);
4970 else if (x == CONST0_RTX (GET_MODE (x)))
4971 fputs ("r0", file);
18a3c539
JW
4972 else if (GET_CODE (x) == CONST_INT)
4973 output_addr_const (file, x);
c65ebc55
JW
4974 else
4975 output_operand_lossage ("invalid %%r value");
4976 return;
4977
f61134e8
RH
4978 case 'v':
4979 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4980 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4981 break;
4982
85548039
RH
4983 case '+':
4984 {
4985 const char *which;
9c808aad 4986
85548039
RH
4987 /* For conditional branches, returns or calls, substitute
4988 sptk, dptk, dpnt, or spnt for %s. */
4989 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4990 if (x)
4991 {
4992 int pred_val = INTVAL (XEXP (x, 0));
4993
4994 /* Guess top and bottom 10% statically predicted. */
2c9e13f3
JH
4995 if (pred_val < REG_BR_PROB_BASE / 50
4996 && br_prob_note_reliable_p (x))
85548039
RH
4997 which = ".spnt";
4998 else if (pred_val < REG_BR_PROB_BASE / 2)
4999 which = ".dpnt";
2c9e13f3
JH
5000 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5001 || !br_prob_note_reliable_p (x))
85548039
RH
5002 which = ".dptk";
5003 else
5004 which = ".sptk";
5005 }
5006 else if (GET_CODE (current_output_insn) == CALL_INSN)
5007 which = ".sptk";
5008 else
5009 which = ".dptk";
5010
5011 fputs (which, file);
5012 return;
5013 }
5014
6f8aa100
RH
5015 case ',':
5016 x = current_insn_predicate;
5017 if (x)
5018 {
5019 unsigned int regno = REGNO (XEXP (x, 0));
5020 if (GET_CODE (x) == EQ)
5021 regno += 1;
6f8aa100
RH
5022 fprintf (file, "(%s) ", reg_names [regno]);
5023 }
5024 return;
5025
c65ebc55
JW
5026 default:
5027 output_operand_lossage ("ia64_print_operand: unknown code");
5028 return;
5029 }
5030
5031 switch (GET_CODE (x))
5032 {
5033 /* This happens for the spill/restore instructions. */
5034 case POST_INC:
4b983fdc
RH
5035 case POST_DEC:
5036 case POST_MODIFY:
c65ebc55 5037 x = XEXP (x, 0);
ed168e45 5038 /* ... fall through ... */
c65ebc55
JW
5039
5040 case REG:
5041 fputs (reg_names [REGNO (x)], file);
5042 break;
5043
5044 case MEM:
5045 {
5046 rtx addr = XEXP (x, 0);
ec8e098d 5047 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
c65ebc55
JW
5048 addr = XEXP (addr, 0);
5049 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5050 break;
5051 }
809d4ef1 5052
c65ebc55
JW
5053 default:
5054 output_addr_const (file, x);
5055 break;
5056 }
5057
5058 return;
5059}
c65ebc55 5060\f
3c50106f
RH
5061/* Compute a (partial) cost for rtx X. Return true if the complete
5062 cost has been computed, and false if subexpressions should be
5063 scanned. In either case, *TOTAL contains the cost result. */
5064/* ??? This is incomplete. */
5065
5066static bool
f40751dd
JH
5067ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5068 bool speed ATTRIBUTE_UNUSED)
3c50106f
RH
5069{
5070 switch (code)
5071 {
5072 case CONST_INT:
5073 switch (outer_code)
5074 {
5075 case SET:
13f70342 5076 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
3c50106f
RH
5077 return true;
5078 case PLUS:
13f70342 5079 if (satisfies_constraint_I (x))
3c50106f 5080 *total = 0;
13f70342 5081 else if (satisfies_constraint_J (x))
3c50106f
RH
5082 *total = 1;
5083 else
5084 *total = COSTS_N_INSNS (1);
5085 return true;
5086 default:
13f70342 5087 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
3c50106f
RH
5088 *total = 0;
5089 else
5090 *total = COSTS_N_INSNS (1);
5091 return true;
5092 }
5093
5094 case CONST_DOUBLE:
5095 *total = COSTS_N_INSNS (1);
5096 return true;
5097
5098 case CONST:
5099 case SYMBOL_REF:
5100 case LABEL_REF:
5101 *total = COSTS_N_INSNS (3);
5102 return true;
5103
5104 case MULT:
5105 /* For multiplies wider than HImode, we have to go to the FPU,
5106 which normally involves copies. Plus there's the latency
5107 of the multiply itself, and the latency of the instructions to
5108 transfer integer regs to FP regs. */
5109 /* ??? Check for FP mode. */
5110 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5111 *total = COSTS_N_INSNS (10);
5112 else
5113 *total = COSTS_N_INSNS (2);
5114 return true;
5115
5116 case PLUS:
5117 case MINUS:
5118 case ASHIFT:
5119 case ASHIFTRT:
5120 case LSHIFTRT:
5121 *total = COSTS_N_INSNS (1);
5122 return true;
5123
5124 case DIV:
5125 case UDIV:
5126 case MOD:
5127 case UMOD:
5128 /* We make divide expensive, so that divide-by-constant will be
5129 optimized to a multiply. */
5130 *total = COSTS_N_INSNS (60);
5131 return true;
5132
5133 default:
5134 return false;
5135 }
5136}
5137
9e4f94de 5138/* Calculate the cost of moving data from a register in class FROM to
7109d286 5139 one in class TO, using MODE. */
5527bf14
RH
5140
5141int
9c808aad
AJ
5142ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
5143 enum reg_class to)
5527bf14 5144{
7109d286
RH
5145 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5146 if (to == ADDL_REGS)
5147 to = GR_REGS;
5148 if (from == ADDL_REGS)
5149 from = GR_REGS;
5150
5151 /* All costs are symmetric, so reduce cases by putting the
5152 lower number class as the destination. */
5153 if (from < to)
5154 {
5155 enum reg_class tmp = to;
5156 to = from, from = tmp;
5157 }
5158
02befdf4 5159 /* Moving from FR<->GR in XFmode must be more expensive than 2,
7109d286
RH
5160 so that we get secondary memory reloads. Between FR_REGS,
5161 we have to make this at least as expensive as MEMORY_MOVE_COST
5162 to avoid spectacularly poor register class preferencing. */
4de67c26 5163 if (mode == XFmode || mode == RFmode)
7109d286
RH
5164 {
5165 if (to != GR_REGS || from != GR_REGS)
5166 return MEMORY_MOVE_COST (mode, to, 0);
5167 else
5168 return 3;
5169 }
5170
5171 switch (to)
5172 {
5173 case PR_REGS:
5174 /* Moving between PR registers takes two insns. */
5175 if (from == PR_REGS)
5176 return 3;
5177 /* Moving between PR and anything but GR is impossible. */
5178 if (from != GR_REGS)
5179 return MEMORY_MOVE_COST (mode, to, 0);
5180 break;
5181
5182 case BR_REGS:
5183 /* Moving between BR and anything but GR is impossible. */
5184 if (from != GR_REGS && from != GR_AND_BR_REGS)
5185 return MEMORY_MOVE_COST (mode, to, 0);
5186 break;
5187
5188 case AR_I_REGS:
5189 case AR_M_REGS:
5190 /* Moving between AR and anything but GR is impossible. */
5191 if (from != GR_REGS)
5192 return MEMORY_MOVE_COST (mode, to, 0);
5193 break;
5194
5195 case GR_REGS:
5196 case FR_REGS:
a71aef0b 5197 case FP_REGS:
7109d286
RH
5198 case GR_AND_FR_REGS:
5199 case GR_AND_BR_REGS:
5200 case ALL_REGS:
5201 break;
5202
5203 default:
e820471b 5204 gcc_unreachable ();
7109d286 5205 }
3f622353 5206
5527bf14
RH
5207 return 2;
5208}
c65ebc55 5209
0a2aaacc 5210/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
f61134e8
RH
5211 to use when copying X into that class. */
5212
5213enum reg_class
0a2aaacc 5214ia64_preferred_reload_class (rtx x, enum reg_class rclass)
f61134e8 5215{
0a2aaacc 5216 switch (rclass)
f61134e8
RH
5217 {
5218 case FR_REGS:
a71aef0b 5219 case FP_REGS:
f61134e8
RH
5220 /* Don't allow volatile mem reloads into floating point registers.
5221 This is defined to force reload to choose the r/m case instead
5222 of the f/f case when reloading (set (reg fX) (mem/v)). */
5223 if (MEM_P (x) && MEM_VOLATILE_P (x))
5224 return NO_REGS;
5225
5226 /* Force all unrecognized constants into the constant pool. */
5227 if (CONSTANT_P (x))
5228 return NO_REGS;
5229 break;
5230
5231 case AR_M_REGS:
5232 case AR_I_REGS:
5233 if (!OBJECT_P (x))
5234 return NO_REGS;
5235 break;
5236
5237 default:
5238 break;
5239 }
5240
0a2aaacc 5241 return rclass;
f61134e8
RH
5242}
5243
c65ebc55 5244/* This function returns the register class required for a secondary
0a2aaacc 5245 register when copying between one of the registers in RCLASS, and X,
c65ebc55
JW
5246 using MODE. A return value of NO_REGS means that no secondary register
5247 is required. */
5248
5249enum reg_class
0a2aaacc 5250ia64_secondary_reload_class (enum reg_class rclass,
9c808aad 5251 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
c65ebc55
JW
5252{
5253 int regno = -1;
5254
5255 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5256 regno = true_regnum (x);
5257
0a2aaacc 5258 switch (rclass)
97e242b0
RH
5259 {
5260 case BR_REGS:
7109d286
RH
5261 case AR_M_REGS:
5262 case AR_I_REGS:
5263 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5264 interaction. We end up with two pseudos with overlapping lifetimes
5265 both of which are equiv to the same constant, and both which need
5266 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5267 changes depending on the path length, which means the qty_first_reg
5268 check in make_regs_eqv can give different answers at different times.
5269 At some point I'll probably need a reload_indi pattern to handle
5270 this.
5271
5272 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5273 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5274 non-general registers for good measure. */
5275 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
5276 return GR_REGS;
5277
5278 /* This is needed if a pseudo used as a call_operand gets spilled to a
5279 stack slot. */
5280 if (GET_CODE (x) == MEM)
5281 return GR_REGS;
5282 break;
5283
5284 case FR_REGS:
a71aef0b 5285 case FP_REGS:
c51e6d85 5286 /* Need to go through general registers to get to other class regs. */
7109d286
RH
5287 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5288 return GR_REGS;
9c808aad 5289
97e242b0
RH
5290 /* This can happen when a paradoxical subreg is an operand to the
5291 muldi3 pattern. */
5292 /* ??? This shouldn't be necessary after instruction scheduling is
5293 enabled, because paradoxical subregs are not accepted by
5294 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5295 stop the paradoxical subreg stupidity in the *_operand functions
5296 in recog.c. */
5297 if (GET_CODE (x) == MEM
5298 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5299 || GET_MODE (x) == QImode))
5300 return GR_REGS;
5301
5302 /* This can happen because of the ior/and/etc patterns that accept FP
5303 registers as operands. If the third operand is a constant, then it
5304 needs to be reloaded into a FP register. */
5305 if (GET_CODE (x) == CONST_INT)
5306 return GR_REGS;
5307
5308 /* This can happen because of register elimination in a muldi3 insn.
5309 E.g. `26107 * (unsigned long)&u'. */
5310 if (GET_CODE (x) == PLUS)
5311 return GR_REGS;
5312 break;
5313
5314 case PR_REGS:
f2f90c63 5315 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
5316 and the function has a nonlocal goto. This is because global
5317 does not allocate call crossing pseudos to hard registers when
e3b5732b 5318 crtl->has_nonlocal_goto is true. This is relatively
97e242b0
RH
5319 common for C++ programs that use exceptions. To reproduce,
5320 return NO_REGS and compile libstdc++. */
5321 if (GET_CODE (x) == MEM)
5322 return GR_REGS;
f2f90c63
RH
5323
5324 /* This can happen when we take a BImode subreg of a DImode value,
5325 and that DImode value winds up in some non-GR register. */
5326 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5327 return GR_REGS;
97e242b0
RH
5328 break;
5329
5330 default:
5331 break;
5332 }
c65ebc55
JW
5333
5334 return NO_REGS;
5335}
5336
215b063c
PB
5337\f
5338/* Implement targetm.unspec_may_trap_p hook. */
5339static int
5340ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5341{
5342 if (GET_CODE (x) == UNSPEC)
5343 {
5344 switch (XINT (x, 1))
5345 {
5346 case UNSPEC_LDA:
5347 case UNSPEC_LDS:
5348 case UNSPEC_LDSA:
5349 case UNSPEC_LDCCLR:
5350 case UNSPEC_CHKACLR:
5351 case UNSPEC_CHKS:
5352 /* These unspecs are just wrappers. */
5353 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5354 }
5355 }
5356
5357 return default_unspec_may_trap_p (x, flags);
5358}
5359
c65ebc55
JW
5360\f
5361/* Parse the -mfixed-range= option string. */
5362
5363static void
9c808aad 5364fix_range (const char *const_str)
c65ebc55
JW
5365{
5366 int i, first, last;
3b572406 5367 char *str, *dash, *comma;
c65ebc55
JW
5368
5369 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5370 REG2 are either register names or register numbers. The effect
5371 of this option is to mark the registers in the range from REG1 to
5372 REG2 as ``fixed'' so they won't be used by the compiler. This is
5373 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5374
3b572406
RH
5375 i = strlen (const_str);
5376 str = (char *) alloca (i + 1);
5377 memcpy (str, const_str, i + 1);
5378
c65ebc55
JW
5379 while (1)
5380 {
5381 dash = strchr (str, '-');
5382 if (!dash)
5383 {
d4ee4d25 5384 warning (0, "value of -mfixed-range must have form REG1-REG2");
c65ebc55
JW
5385 return;
5386 }
5387 *dash = '\0';
5388
5389 comma = strchr (dash + 1, ',');
5390 if (comma)
5391 *comma = '\0';
5392
5393 first = decode_reg_name (str);
5394 if (first < 0)
5395 {
d4ee4d25 5396 warning (0, "unknown register name: %s", str);
c65ebc55
JW
5397 return;
5398 }
5399
5400 last = decode_reg_name (dash + 1);
5401 if (last < 0)
5402 {
d4ee4d25 5403 warning (0, "unknown register name: %s", dash + 1);
c65ebc55
JW
5404 return;
5405 }
5406
5407 *dash = '-';
5408
5409 if (first > last)
5410 {
d4ee4d25 5411 warning (0, "%s-%s is an empty range", str, dash + 1);
c65ebc55
JW
5412 return;
5413 }
5414
5415 for (i = first; i <= last; ++i)
5416 fixed_regs[i] = call_used_regs[i] = 1;
5417
5418 if (!comma)
5419 break;
5420
5421 *comma = ',';
5422 str = comma + 1;
5423 }
5424}
5425
dbdd120f
RH
5426/* Implement TARGET_HANDLE_OPTION. */
5427
5428static bool
55bea00a 5429ia64_handle_option (size_t code, const char *arg, int value)
37b15744 5430{
dbdd120f
RH
5431 switch (code)
5432 {
5433 case OPT_mfixed_range_:
5434 fix_range (arg);
5435 return true;
5436
5437 case OPT_mtls_size_:
55bea00a
RS
5438 if (value != 14 && value != 22 && value != 64)
5439 error ("bad value %<%s%> for -mtls-size= switch", arg);
5440 return true;
dbdd120f
RH
5441
5442 case OPT_mtune_:
5443 {
5444 static struct pta
5445 {
5446 const char *name; /* processor name or nickname. */
5447 enum processor_type processor;
5448 }
5449 const processor_alias_table[] =
5450 {
dbdd120f
RH
5451 {"itanium2", PROCESSOR_ITANIUM2},
5452 {"mckinley", PROCESSOR_ITANIUM2},
5453 };
5454 int const pta_size = ARRAY_SIZE (processor_alias_table);
5455 int i;
5456
5457 for (i = 0; i < pta_size; i++)
5458 if (!strcmp (arg, processor_alias_table[i].name))
5459 {
5460 ia64_tune = processor_alias_table[i].processor;
5461 break;
5462 }
5463 if (i == pta_size)
5464 error ("bad value %<%s%> for -mtune= switch", arg);
5465 return true;
5466 }
5467
5468 default:
5469 return true;
5470 }
37b15744 5471}
0c96007e 5472
bacf5b96 5473/* Implement OVERRIDE_OPTIONS. */
c65ebc55
JW
5474
5475void
9c808aad 5476ia64_override_options (void)
c65ebc55 5477{
59da9a7d
JW
5478 if (TARGET_AUTO_PIC)
5479 target_flags |= MASK_CONST_GP;
5480
7e1e7d4c
VM
5481 /* Numerous experiment shows that IRA based loop pressure
5482 calculation works better for RTL loop invariant motion on targets
5483 with enough (>= 32) registers. It is an expensive optimization.
5484 So it is on only for peak performance. */
5485 if (optimize >= 3)
5486 flag_ira_loop_pressure = 1;
5487
5488
2b7e2984
SE
5489 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5490
5491 init_machine_status = ia64_init_machine_status;
5492
5493 if (align_functions <= 0)
5494 align_functions = 64;
5495 if (align_loops <= 0)
5496 align_loops = 32;
5497 if (TARGET_ABI_OPEN_VMS)
5498 flag_no_common = 1;
5499
5500 ia64_override_options_after_change();
5501}
5502
5503/* Implement targetm.override_options_after_change. */
5504
5505static void
5506ia64_override_options_after_change (void)
5507{
faae4ae7
L
5508 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5509 flag_schedule_insns_after_reload = 0;
5510
388092d5
AB
5511 if (optimize >= 3
5512 && ! sel_sched_switch_set)
5513 {
5514 flag_selective_scheduling2 = 1;
5515 flag_sel_sched_pipelining = 1;
5516 }
5517 if (mflag_sched_control_spec == 2)
5518 {
5519 /* Control speculation is on by default for the selective scheduler,
5520 but not for the Haifa scheduler. */
5521 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5522 }
5523 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5524 {
5525 /* FIXME: remove this when we'd implement breaking autoinsns as
5526 a transformation. */
5527 flag_auto_inc_dec = 0;
5528 }
c65ebc55 5529}
dbdd120f 5530
6fb5fa3c
DB
5531/* Initialize the record of emitted frame related registers. */
5532
5533void ia64_init_expanders (void)
5534{
5535 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5536}
5537
dbdd120f
RH
5538static struct machine_function *
5539ia64_init_machine_status (void)
5540{
5ead67f6 5541 return GGC_CNEW (struct machine_function);
dbdd120f 5542}
c65ebc55 5543\f
9c808aad
AJ
5544static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5545static enum attr_type ia64_safe_type (rtx);
2130b7fb 5546
2130b7fb 5547static enum attr_itanium_class
9c808aad 5548ia64_safe_itanium_class (rtx insn)
2130b7fb
BS
5549{
5550 if (recog_memoized (insn) >= 0)
5551 return get_attr_itanium_class (insn);
b5b8b0ac
AO
5552 else if (DEBUG_INSN_P (insn))
5553 return ITANIUM_CLASS_IGNORE;
2130b7fb
BS
5554 else
5555 return ITANIUM_CLASS_UNKNOWN;
5556}
5557
5558static enum attr_type
9c808aad 5559ia64_safe_type (rtx insn)
2130b7fb
BS
5560{
5561 if (recog_memoized (insn) >= 0)
5562 return get_attr_type (insn);
5563 else
5564 return TYPE_UNKNOWN;
5565}
5566\f
c65ebc55
JW
5567/* The following collection of routines emit instruction group stop bits as
5568 necessary to avoid dependencies. */
5569
5570/* Need to track some additional registers as far as serialization is
5571 concerned so we can properly handle br.call and br.ret. We could
5572 make these registers visible to gcc, but since these registers are
5573 never explicitly used in gcc generated code, it seems wasteful to
5574 do so (plus it would make the call and return patterns needlessly
5575 complex). */
c65ebc55 5576#define REG_RP (BR_REG (0))
c65ebc55 5577#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
5578/* This is used for volatile asms which may require a stop bit immediately
5579 before and after them. */
5527bf14 5580#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
5581#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5582#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 5583
f2f90c63
RH
5584/* For each register, we keep track of how it has been written in the
5585 current instruction group.
5586
5587 If a register is written unconditionally (no qualifying predicate),
5588 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5589
5590 If a register is written if its qualifying predicate P is true, we
5591 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5592 may be written again by the complement of P (P^1) and when this happens,
5593 WRITE_COUNT gets set to 2.
5594
5595 The result of this is that whenever an insn attempts to write a register
e03f5d43 5596 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
5597
5598 If a predicate register is written by a floating-point insn, we set
5599 WRITTEN_BY_FP to true.
5600
5601 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5602 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5603
444a356a
JJ
5604#if GCC_VERSION >= 4000
5605#define RWS_FIELD_TYPE __extension__ unsigned short
5606#else
5607#define RWS_FIELD_TYPE unsigned int
5608#endif
c65ebc55
JW
5609struct reg_write_state
5610{
444a356a
JJ
5611 RWS_FIELD_TYPE write_count : 2;
5612 RWS_FIELD_TYPE first_pred : 10;
5613 RWS_FIELD_TYPE written_by_fp : 1;
5614 RWS_FIELD_TYPE written_by_and : 1;
5615 RWS_FIELD_TYPE written_by_or : 1;
c65ebc55
JW
5616};
5617
5618/* Cumulative info for the current instruction group. */
5619struct reg_write_state rws_sum[NUM_REGS];
444a356a
JJ
5620#ifdef ENABLE_CHECKING
5621/* Bitmap whether a register has been written in the current insn. */
5622HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5623 / HOST_BITS_PER_WIDEST_FAST_INT];
5624
5625static inline void
5626rws_insn_set (int regno)
5627{
5628 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5629 SET_HARD_REG_BIT (rws_insn, regno);
5630}
5631
5632static inline int
5633rws_insn_test (int regno)
5634{
5635 return TEST_HARD_REG_BIT (rws_insn, regno);
5636}
5637#else
5638/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5639unsigned char rws_insn[2];
5640
5641static inline void
5642rws_insn_set (int regno)
5643{
5644 if (regno == REG_AR_CFM)
5645 rws_insn[0] = 1;
5646 else if (regno == REG_VOLATILE)
5647 rws_insn[1] = 1;
5648}
5649
5650static inline int
5651rws_insn_test (int regno)
5652{
5653 if (regno == REG_AR_CFM)
5654 return rws_insn[0];
5655 if (regno == REG_VOLATILE)
5656 return rws_insn[1];
5657 return 0;
5658}
5659#endif
c65ebc55 5660
25250265 5661/* Indicates whether this is the first instruction after a stop bit,
e820471b
NS
5662 in which case we don't need another stop bit. Without this,
5663 ia64_variable_issue will die when scheduling an alloc. */
25250265
JW
5664static int first_instruction;
5665
c65ebc55
JW
5666/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5667 RTL for one instruction. */
5668struct reg_flags
5669{
5670 unsigned int is_write : 1; /* Is register being written? */
5671 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5672 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
5673 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5674 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 5675 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
5676};
5677
444a356a 5678static void rws_update (int, struct reg_flags, int);
9c808aad
AJ
5679static int rws_access_regno (int, struct reg_flags, int);
5680static int rws_access_reg (rtx, struct reg_flags, int);
c1bc6ca8
JW
5681static void update_set_flags (rtx, struct reg_flags *);
5682static int set_src_needs_barrier (rtx, struct reg_flags, int);
9c808aad
AJ
5683static int rtx_needs_barrier (rtx, struct reg_flags, int);
5684static void init_insn_group_barriers (void);
c1bc6ca8
JW
5685static int group_barrier_needed (rtx);
5686static int safe_group_barrier_needed (rtx);
444a356a 5687static int in_safe_group_barrier;
3b572406 5688
c65ebc55
JW
5689/* Update *RWS for REGNO, which is being written by the current instruction,
5690 with predicate PRED, and associated register flags in FLAGS. */
5691
5692static void
444a356a 5693rws_update (int regno, struct reg_flags flags, int pred)
c65ebc55 5694{
3e7c7805 5695 if (pred)
444a356a 5696 rws_sum[regno].write_count++;
3e7c7805 5697 else
444a356a
JJ
5698 rws_sum[regno].write_count = 2;
5699 rws_sum[regno].written_by_fp |= flags.is_fp;
f2f90c63 5700 /* ??? Not tracking and/or across differing predicates. */
444a356a
JJ
5701 rws_sum[regno].written_by_and = flags.is_and;
5702 rws_sum[regno].written_by_or = flags.is_or;
5703 rws_sum[regno].first_pred = pred;
c65ebc55
JW
5704}
5705
5706/* Handle an access to register REGNO of type FLAGS using predicate register
444a356a 5707 PRED. Update rws_sum array. Return 1 if this access creates
c65ebc55
JW
5708 a dependency with an earlier instruction in the same group. */
5709
5710static int
9c808aad 5711rws_access_regno (int regno, struct reg_flags flags, int pred)
c65ebc55
JW
5712{
5713 int need_barrier = 0;
c65ebc55 5714
e820471b 5715 gcc_assert (regno < NUM_REGS);
c65ebc55 5716
f2f90c63
RH
5717 if (! PR_REGNO_P (regno))
5718 flags.is_and = flags.is_or = 0;
5719
c65ebc55
JW
5720 if (flags.is_write)
5721 {
12c2c7aa
JW
5722 int write_count;
5723
444a356a 5724 rws_insn_set (regno);
12c2c7aa 5725 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
5726
5727 switch (write_count)
c65ebc55
JW
5728 {
5729 case 0:
5730 /* The register has not been written yet. */
444a356a
JJ
5731 if (!in_safe_group_barrier)
5732 rws_update (regno, flags, pred);
c65ebc55
JW
5733 break;
5734
5735 case 1:
5736 /* The register has been written via a predicate. If this is
5737 not a complementary predicate, then we need a barrier. */
5738 /* ??? This assumes that P and P+1 are always complementary
5739 predicates for P even. */
f2f90c63 5740 if (flags.is_and && rws_sum[regno].written_by_and)
9c808aad 5741 ;
f2f90c63
RH
5742 else if (flags.is_or && rws_sum[regno].written_by_or)
5743 ;
5744 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55 5745 need_barrier = 1;
444a356a
JJ
5746 if (!in_safe_group_barrier)
5747 rws_update (regno, flags, pred);
c65ebc55
JW
5748 break;
5749
5750 case 2:
5751 /* The register has been unconditionally written already. We
5752 need a barrier. */
f2f90c63
RH
5753 if (flags.is_and && rws_sum[regno].written_by_and)
5754 ;
5755 else if (flags.is_or && rws_sum[regno].written_by_or)
5756 ;
5757 else
5758 need_barrier = 1;
444a356a
JJ
5759 if (!in_safe_group_barrier)
5760 {
5761 rws_sum[regno].written_by_and = flags.is_and;
5762 rws_sum[regno].written_by_or = flags.is_or;
5763 }
c65ebc55
JW
5764 break;
5765
5766 default:
e820471b 5767 gcc_unreachable ();
c65ebc55
JW
5768 }
5769 }
5770 else
5771 {
5772 if (flags.is_branch)
5773 {
5774 /* Branches have several RAW exceptions that allow to avoid
5775 barriers. */
5776
5527bf14 5777 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
5778 /* RAW dependencies on branch regs are permissible as long
5779 as the writer is a non-branch instruction. Since we
5780 never generate code that uses a branch register written
5781 by a branch instruction, handling this case is
5782 easy. */
5527bf14 5783 return 0;
c65ebc55
JW
5784
5785 if (REGNO_REG_CLASS (regno) == PR_REGS
5786 && ! rws_sum[regno].written_by_fp)
5787 /* The predicates of a branch are available within the
5788 same insn group as long as the predicate was written by
ed168e45 5789 something other than a floating-point instruction. */
c65ebc55
JW
5790 return 0;
5791 }
5792
f2f90c63
RH
5793 if (flags.is_and && rws_sum[regno].written_by_and)
5794 return 0;
5795 if (flags.is_or && rws_sum[regno].written_by_or)
5796 return 0;
5797
c65ebc55
JW
5798 switch (rws_sum[regno].write_count)
5799 {
5800 case 0:
5801 /* The register has not been written yet. */
5802 break;
5803
5804 case 1:
5805 /* The register has been written via a predicate. If this is
5806 not a complementary predicate, then we need a barrier. */
5807 /* ??? This assumes that P and P+1 are always complementary
5808 predicates for P even. */
5809 if ((rws_sum[regno].first_pred ^ 1) != pred)
5810 need_barrier = 1;
5811 break;
5812
5813 case 2:
5814 /* The register has been unconditionally written already. We
5815 need a barrier. */
5816 need_barrier = 1;
5817 break;
5818
5819 default:
e820471b 5820 gcc_unreachable ();
c65ebc55
JW
5821 }
5822 }
5823
5824 return need_barrier;
5825}
5826
97e242b0 5827static int
9c808aad 5828rws_access_reg (rtx reg, struct reg_flags flags, int pred)
97e242b0
RH
5829{
5830 int regno = REGNO (reg);
5831 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5832
5833 if (n == 1)
5834 return rws_access_regno (regno, flags, pred);
5835 else
5836 {
5837 int need_barrier = 0;
5838 while (--n >= 0)
5839 need_barrier |= rws_access_regno (regno + n, flags, pred);
5840 return need_barrier;
5841 }
5842}
5843
112333d3
BS
5844/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5845 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5846
5847static void
c1bc6ca8 5848update_set_flags (rtx x, struct reg_flags *pflags)
112333d3
BS
5849{
5850 rtx src = SET_SRC (x);
5851
112333d3
BS
5852 switch (GET_CODE (src))
5853 {
5854 case CALL:
5855 return;
5856
5857 case IF_THEN_ELSE:
048d0d36 5858 /* There are four cases here:
c8d3810f
RH
5859 (1) The destination is (pc), in which case this is a branch,
5860 nothing here applies.
5861 (2) The destination is ar.lc, in which case this is a
5862 doloop_end_internal,
5863 (3) The destination is an fp register, in which case this is
5864 an fselect instruction.
048d0d36
MK
5865 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5866 this is a check load.
c8d3810f
RH
5867 In all cases, nothing we do in this function applies. */
5868 return;
112333d3
BS
5869
5870 default:
ec8e098d 5871 if (COMPARISON_P (src)
c8d3810f 5872 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
112333d3
BS
5873 /* Set pflags->is_fp to 1 so that we know we're dealing
5874 with a floating point comparison when processing the
5875 destination of the SET. */
5876 pflags->is_fp = 1;
5877
5878 /* Discover if this is a parallel comparison. We only handle
5879 and.orcm and or.andcm at present, since we must retain a
5880 strict inverse on the predicate pair. */
5881 else if (GET_CODE (src) == AND)
5882 pflags->is_and = 1;
5883 else if (GET_CODE (src) == IOR)
5884 pflags->is_or = 1;
5885
5886 break;
5887 }
5888}
5889
5890/* Subroutine of rtx_needs_barrier; this function determines whether the
5891 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5892 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5893 for this insn. */
9c808aad 5894
112333d3 5895static int
c1bc6ca8 5896set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
112333d3
BS
5897{
5898 int need_barrier = 0;
5899 rtx dst;
5900 rtx src = SET_SRC (x);
5901
5902 if (GET_CODE (src) == CALL)
5903 /* We don't need to worry about the result registers that
5904 get written by subroutine call. */
5905 return rtx_needs_barrier (src, flags, pred);
5906 else if (SET_DEST (x) == pc_rtx)
5907 {
5908 /* X is a conditional branch. */
5909 /* ??? This seems redundant, as the caller sets this bit for
5910 all JUMP_INSNs. */
048d0d36
MK
5911 if (!ia64_spec_check_src_p (src))
5912 flags.is_branch = 1;
112333d3
BS
5913 return rtx_needs_barrier (src, flags, pred);
5914 }
5915
048d0d36
MK
5916 if (ia64_spec_check_src_p (src))
5917 /* Avoid checking one register twice (in condition
5918 and in 'then' section) for ldc pattern. */
5919 {
5920 gcc_assert (REG_P (XEXP (src, 2)));
5921 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5922
5923 /* We process MEM below. */
5924 src = XEXP (src, 1);
5925 }
5926
5927 need_barrier |= rtx_needs_barrier (src, flags, pred);
112333d3 5928
112333d3
BS
5929 dst = SET_DEST (x);
5930 if (GET_CODE (dst) == ZERO_EXTRACT)
5931 {
5932 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5933 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
112333d3
BS
5934 }
5935 return need_barrier;
5936}
5937
b38ba463
ZW
5938/* Handle an access to rtx X of type FLAGS using predicate register
5939 PRED. Return 1 if this access creates a dependency with an earlier
5940 instruction in the same group. */
c65ebc55
JW
5941
5942static int
9c808aad 5943rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
c65ebc55
JW
5944{
5945 int i, j;
5946 int is_complemented = 0;
5947 int need_barrier = 0;
5948 const char *format_ptr;
5949 struct reg_flags new_flags;
c1bc6ca8 5950 rtx cond;
c65ebc55
JW
5951
5952 if (! x)
5953 return 0;
5954
5955 new_flags = flags;
5956
5957 switch (GET_CODE (x))
5958 {
9c808aad 5959 case SET:
c1bc6ca8
JW
5960 update_set_flags (x, &new_flags);
5961 need_barrier = set_src_needs_barrier (x, new_flags, pred);
112333d3 5962 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 5963 {
112333d3
BS
5964 new_flags.is_write = 1;
5965 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 5966 }
c65ebc55
JW
5967 break;
5968
5969 case CALL:
5970 new_flags.is_write = 0;
97e242b0 5971 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
5972
5973 /* Avoid multiple register writes, in case this is a pattern with
e820471b 5974 multiple CALL rtx. This avoids a failure in rws_access_reg. */
444a356a 5975 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
c65ebc55
JW
5976 {
5977 new_flags.is_write = 1;
97e242b0
RH
5978 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5979 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5980 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
5981 }
5982 break;
5983
e5bde68a
RH
5984 case COND_EXEC:
5985 /* X is a predicated instruction. */
5986
5987 cond = COND_EXEC_TEST (x);
e820471b 5988 gcc_assert (!pred);
e5bde68a
RH
5989 need_barrier = rtx_needs_barrier (cond, flags, 0);
5990
5991 if (GET_CODE (cond) == EQ)
5992 is_complemented = 1;
5993 cond = XEXP (cond, 0);
e820471b 5994 gcc_assert (GET_CODE (cond) == REG
c1bc6ca8 5995 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
e5bde68a
RH
5996 pred = REGNO (cond);
5997 if (is_complemented)
5998 ++pred;
5999
6000 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6001 return need_barrier;
6002
c65ebc55 6003 case CLOBBER:
c65ebc55 6004 case USE:
c65ebc55
JW
6005 /* Clobber & use are for earlier compiler-phases only. */
6006 break;
6007
6008 case ASM_OPERANDS:
6009 case ASM_INPUT:
6010 /* We always emit stop bits for traditional asms. We emit stop bits
6011 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6012 if (GET_CODE (x) != ASM_OPERANDS
6013 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6014 {
6015 /* Avoid writing the register multiple times if we have multiple
e820471b 6016 asm outputs. This avoids a failure in rws_access_reg. */
444a356a 6017 if (! rws_insn_test (REG_VOLATILE))
c65ebc55
JW
6018 {
6019 new_flags.is_write = 1;
97e242b0 6020 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
6021 }
6022 return 1;
6023 }
6024
6025 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
1e5f1716 6026 We cannot just fall through here since then we would be confused
c65ebc55
JW
6027 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6028 traditional asms unlike their normal usage. */
6029
6030 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6031 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6032 need_barrier = 1;
6033 break;
6034
6035 case PARALLEL:
6036 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
6037 {
6038 rtx pat = XVECEXP (x, 0, i);
051d8245 6039 switch (GET_CODE (pat))
112333d3 6040 {
051d8245 6041 case SET:
c1bc6ca8
JW
6042 update_set_flags (pat, &new_flags);
6043 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
051d8245
RH
6044 break;
6045
6046 case USE:
6047 case CALL:
6048 case ASM_OPERANDS:
6049 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6050 break;
6051
6052 case CLOBBER:
6053 case RETURN:
6054 break;
6055
6056 default:
6057 gcc_unreachable ();
112333d3 6058 }
112333d3
BS
6059 }
6060 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6061 {
6062 rtx pat = XVECEXP (x, 0, i);
6063 if (GET_CODE (pat) == SET)
6064 {
6065 if (GET_CODE (SET_SRC (pat)) != CALL)
6066 {
6067 new_flags.is_write = 1;
6068 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6069 pred);
6070 }
6071 }
339cb12e 6072 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
6073 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6074 }
c65ebc55
JW
6075 break;
6076
6077 case SUBREG:
077bc924
JM
6078 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6079 break;
c65ebc55 6080 case REG:
870f9ec0
RH
6081 if (REGNO (x) == AR_UNAT_REGNUM)
6082 {
6083 for (i = 0; i < 64; ++i)
6084 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6085 }
6086 else
6087 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
6088 break;
6089
6090 case MEM:
6091 /* Find the regs used in memory address computation. */
6092 new_flags.is_write = 0;
6093 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6094 break;
6095
051d8245 6096 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
c65ebc55
JW
6097 case SYMBOL_REF: case LABEL_REF: case CONST:
6098 break;
6099
6100 /* Operators with side-effects. */
6101 case POST_INC: case POST_DEC:
e820471b 6102 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
c65ebc55
JW
6103
6104 new_flags.is_write = 0;
97e242b0 6105 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 6106 new_flags.is_write = 1;
97e242b0 6107 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6108 break;
6109
6110 case POST_MODIFY:
e820471b 6111 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
4b983fdc
RH
6112
6113 new_flags.is_write = 0;
97e242b0 6114 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6115 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6116 new_flags.is_write = 1;
97e242b0 6117 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
6118 break;
6119
6120 /* Handle common unary and binary ops for efficiency. */
6121 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6122 case MOD: case UDIV: case UMOD: case AND: case IOR:
6123 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6124 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6125 case NE: case EQ: case GE: case GT: case LE:
6126 case LT: case GEU: case GTU: case LEU: case LTU:
6127 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6128 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6129 break;
6130
6131 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6132 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6133 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 6134 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
6135 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6136 break;
6137
051d8245
RH
6138 case VEC_SELECT:
6139 /* VEC_SELECT's second argument is a PARALLEL with integers that
6140 describe the elements selected. On ia64, those integers are
6141 always constants. Avoid walking the PARALLEL so that we don't
e820471b 6142 get confused with "normal" parallels and then die. */
051d8245
RH
6143 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6144 break;
6145
c65ebc55
JW
6146 case UNSPEC:
6147 switch (XINT (x, 1))
6148 {
7b6e506e
RH
6149 case UNSPEC_LTOFF_DTPMOD:
6150 case UNSPEC_LTOFF_DTPREL:
6151 case UNSPEC_DTPREL:
6152 case UNSPEC_LTOFF_TPREL:
6153 case UNSPEC_TPREL:
6154 case UNSPEC_PRED_REL_MUTEX:
6155 case UNSPEC_PIC_CALL:
6156 case UNSPEC_MF:
6157 case UNSPEC_FETCHADD_ACQ:
6158 case UNSPEC_BSP_VALUE:
6159 case UNSPEC_FLUSHRS:
6160 case UNSPEC_BUNDLE_SELECTOR:
6161 break;
6162
086c0f96
RH
6163 case UNSPEC_GR_SPILL:
6164 case UNSPEC_GR_RESTORE:
870f9ec0
RH
6165 {
6166 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6167 HOST_WIDE_INT bit = (offset >> 3) & 63;
6168
6169 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
83338d15 6170 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
870f9ec0
RH
6171 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6172 new_flags, pred);
6173 break;
6174 }
9c808aad 6175
086c0f96
RH
6176 case UNSPEC_FR_SPILL:
6177 case UNSPEC_FR_RESTORE:
c407570a 6178 case UNSPEC_GETF_EXP:
b38ba463 6179 case UNSPEC_SETF_EXP:
086c0f96 6180 case UNSPEC_ADDP4:
b38ba463 6181 case UNSPEC_FR_SQRT_RECIP_APPROX:
07acc7b3 6182 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
048d0d36
MK
6183 case UNSPEC_LDA:
6184 case UNSPEC_LDS:
388092d5 6185 case UNSPEC_LDS_A:
048d0d36
MK
6186 case UNSPEC_LDSA:
6187 case UNSPEC_CHKACLR:
6188 case UNSPEC_CHKS:
6dd12198
SE
6189 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6190 break;
6191
086c0f96 6192 case UNSPEC_FR_RECIP_APPROX:
f526a3c8 6193 case UNSPEC_SHRP:
046625fa 6194 case UNSPEC_COPYSIGN:
1def9c3f 6195 case UNSPEC_FR_RECIP_APPROX_RES:
655f2eb9
RH
6196 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6197 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6198 break;
6199
086c0f96 6200 case UNSPEC_CMPXCHG_ACQ:
0551c32d
RH
6201 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6202 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6203 break;
6204
c65ebc55 6205 default:
e820471b 6206 gcc_unreachable ();
c65ebc55
JW
6207 }
6208 break;
6209
6210 case UNSPEC_VOLATILE:
6211 switch (XINT (x, 1))
6212 {
086c0f96 6213 case UNSPECV_ALLOC:
25250265
JW
6214 /* Alloc must always be the first instruction of a group.
6215 We force this by always returning true. */
6216 /* ??? We might get better scheduling if we explicitly check for
6217 input/local/output register dependencies, and modify the
6218 scheduler so that alloc is always reordered to the start of
6219 the current group. We could then eliminate all of the
6220 first_instruction code. */
6221 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6222
6223 new_flags.is_write = 1;
25250265
JW
6224 rws_access_regno (REG_AR_CFM, new_flags, pred);
6225 return 1;
c65ebc55 6226
086c0f96 6227 case UNSPECV_SET_BSP:
3b572406
RH
6228 need_barrier = 1;
6229 break;
6230
086c0f96
RH
6231 case UNSPECV_BLOCKAGE:
6232 case UNSPECV_INSN_GROUP_BARRIER:
6233 case UNSPECV_BREAK:
6234 case UNSPECV_PSAC_ALL:
6235 case UNSPECV_PSAC_NORMAL:
3b572406 6236 return 0;
0c96007e 6237
c65ebc55 6238 default:
e820471b 6239 gcc_unreachable ();
c65ebc55
JW
6240 }
6241 break;
6242
6243 case RETURN:
6244 new_flags.is_write = 0;
97e242b0
RH
6245 need_barrier = rws_access_regno (REG_RP, flags, pred);
6246 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6247
6248 new_flags.is_write = 1;
97e242b0
RH
6249 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6250 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6251 break;
6252
6253 default:
6254 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6255 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6256 switch (format_ptr[i])
6257 {
6258 case '0': /* unused field */
6259 case 'i': /* integer */
6260 case 'n': /* note */
6261 case 'w': /* wide integer */
6262 case 's': /* pointer to string */
6263 case 'S': /* optional pointer to string */
6264 break;
6265
6266 case 'e':
6267 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6268 need_barrier = 1;
6269 break;
6270
6271 case 'E':
6272 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6273 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6274 need_barrier = 1;
6275 break;
6276
6277 default:
e820471b 6278 gcc_unreachable ();
c65ebc55 6279 }
2ed4af6f 6280 break;
c65ebc55
JW
6281 }
6282 return need_barrier;
6283}
6284
c1bc6ca8 6285/* Clear out the state for group_barrier_needed at the start of a
2130b7fb
BS
6286 sequence of insns. */
6287
6288static void
9c808aad 6289init_insn_group_barriers (void)
2130b7fb
BS
6290{
6291 memset (rws_sum, 0, sizeof (rws_sum));
25250265 6292 first_instruction = 1;
2130b7fb
BS
6293}
6294
c1bc6ca8
JW
6295/* Given the current state, determine whether a group barrier (a stop bit) is
6296 necessary before INSN. Return nonzero if so. This modifies the state to
6297 include the effects of INSN as a side-effect. */
2130b7fb
BS
6298
6299static int
c1bc6ca8 6300group_barrier_needed (rtx insn)
2130b7fb
BS
6301{
6302 rtx pat;
6303 int need_barrier = 0;
6304 struct reg_flags flags;
6305
6306 memset (&flags, 0, sizeof (flags));
6307 switch (GET_CODE (insn))
6308 {
6309 case NOTE:
b5b8b0ac 6310 case DEBUG_INSN:
2130b7fb
BS
6311 break;
6312
6313 case BARRIER:
6314 /* A barrier doesn't imply an instruction group boundary. */
6315 break;
6316
6317 case CODE_LABEL:
6318 memset (rws_insn, 0, sizeof (rws_insn));
6319 return 1;
6320
6321 case CALL_INSN:
6322 flags.is_branch = 1;
6323 flags.is_sibcall = SIBLING_CALL_P (insn);
6324 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
6325
6326 /* Don't bundle a call following another call. */
6327 if ((pat = prev_active_insn (insn))
6328 && GET_CODE (pat) == CALL_INSN)
6329 {
6330 need_barrier = 1;
6331 break;
6332 }
6333
2130b7fb
BS
6334 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6335 break;
6336
6337 case JUMP_INSN:
048d0d36
MK
6338 if (!ia64_spec_check_p (insn))
6339 flags.is_branch = 1;
f12f25a7
RH
6340
6341 /* Don't bundle a jump following a call. */
6342 if ((pat = prev_active_insn (insn))
6343 && GET_CODE (pat) == CALL_INSN)
6344 {
6345 need_barrier = 1;
6346 break;
6347 }
5efb1046 6348 /* FALLTHRU */
2130b7fb
BS
6349
6350 case INSN:
6351 if (GET_CODE (PATTERN (insn)) == USE
6352 || GET_CODE (PATTERN (insn)) == CLOBBER)
6353 /* Don't care about USE and CLOBBER "insns"---those are used to
6354 indicate to the optimizer that it shouldn't get rid of
6355 certain operations. */
6356 break;
6357
6358 pat = PATTERN (insn);
6359
6360 /* Ug. Hack hacks hacked elsewhere. */
6361 switch (recog_memoized (insn))
6362 {
6363 /* We play dependency tricks with the epilogue in order
6364 to get proper schedules. Undo this for dv analysis. */
6365 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 6366 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
6367 pat = XVECEXP (pat, 0, 0);
6368 break;
6369
6370 /* The pattern we use for br.cloop confuses the code above.
6371 The second element of the vector is representative. */
6372 case CODE_FOR_doloop_end_internal:
6373 pat = XVECEXP (pat, 0, 1);
6374 break;
6375
6376 /* Doesn't generate code. */
6377 case CODE_FOR_pred_rel_mutex:
d0e82870 6378 case CODE_FOR_prologue_use:
2130b7fb
BS
6379 return 0;
6380
6381 default:
6382 break;
6383 }
6384
6385 memset (rws_insn, 0, sizeof (rws_insn));
6386 need_barrier = rtx_needs_barrier (pat, flags, 0);
6387
6388 /* Check to see if the previous instruction was a volatile
6389 asm. */
6390 if (! need_barrier)
6391 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
388092d5 6392
2130b7fb
BS
6393 break;
6394
6395 default:
e820471b 6396 gcc_unreachable ();
2130b7fb 6397 }
25250265 6398
30028c85
VM
6399 if (first_instruction && INSN_P (insn)
6400 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6401 && GET_CODE (PATTERN (insn)) != USE
6402 && GET_CODE (PATTERN (insn)) != CLOBBER)
25250265
JW
6403 {
6404 need_barrier = 0;
6405 first_instruction = 0;
6406 }
6407
2130b7fb
BS
6408 return need_barrier;
6409}
6410
c1bc6ca8 6411/* Like group_barrier_needed, but do not clobber the current state. */
2130b7fb
BS
6412
6413static int
c1bc6ca8 6414safe_group_barrier_needed (rtx insn)
2130b7fb 6415{
25250265 6416 int saved_first_instruction;
2130b7fb 6417 int t;
25250265 6418
25250265 6419 saved_first_instruction = first_instruction;
444a356a 6420 in_safe_group_barrier = 1;
25250265 6421
c1bc6ca8 6422 t = group_barrier_needed (insn);
25250265 6423
25250265 6424 first_instruction = saved_first_instruction;
444a356a 6425 in_safe_group_barrier = 0;
25250265 6426
2130b7fb
BS
6427 return t;
6428}
6429
18dbd950
RS
6430/* Scan the current function and insert stop bits as necessary to
6431 eliminate dependencies. This function assumes that a final
6432 instruction scheduling pass has been run which has already
6433 inserted most of the necessary stop bits. This function only
6434 inserts new ones at basic block boundaries, since these are
6435 invisible to the scheduler. */
2130b7fb
BS
6436
6437static void
9c808aad 6438emit_insn_group_barriers (FILE *dump)
2130b7fb
BS
6439{
6440 rtx insn;
6441 rtx last_label = 0;
6442 int insns_since_last_label = 0;
6443
6444 init_insn_group_barriers ();
6445
18dbd950 6446 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb
BS
6447 {
6448 if (GET_CODE (insn) == CODE_LABEL)
6449 {
6450 if (insns_since_last_label)
6451 last_label = insn;
6452 insns_since_last_label = 0;
6453 }
6454 else if (GET_CODE (insn) == NOTE
a38e7aa5 6455 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
2130b7fb
BS
6456 {
6457 if (insns_since_last_label)
6458 last_label = insn;
6459 insns_since_last_label = 0;
6460 }
6461 else if (GET_CODE (insn) == INSN
6462 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 6463 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
6464 {
6465 init_insn_group_barriers ();
6466 last_label = 0;
6467 }
b5b8b0ac 6468 else if (NONDEBUG_INSN_P (insn))
2130b7fb
BS
6469 {
6470 insns_since_last_label = 1;
6471
c1bc6ca8 6472 if (group_barrier_needed (insn))
2130b7fb
BS
6473 {
6474 if (last_label)
6475 {
6476 if (dump)
6477 fprintf (dump, "Emitting stop before label %d\n",
6478 INSN_UID (last_label));
6479 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6480 insn = last_label;
112333d3
BS
6481
6482 init_insn_group_barriers ();
6483 last_label = 0;
2130b7fb 6484 }
2130b7fb
BS
6485 }
6486 }
6487 }
6488}
f4d578da
BS
6489
6490/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6491 This function has to emit all necessary group barriers. */
6492
6493static void
9c808aad 6494emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
f4d578da
BS
6495{
6496 rtx insn;
6497
6498 init_insn_group_barriers ();
6499
18dbd950 6500 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 6501 {
bd7b9a0f
RH
6502 if (GET_CODE (insn) == BARRIER)
6503 {
6504 rtx last = prev_active_insn (insn);
6505
6506 if (! last)
6507 continue;
6508 if (GET_CODE (last) == JUMP_INSN
6509 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6510 last = prev_active_insn (last);
6511 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6512 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6513
6514 init_insn_group_barriers ();
6515 }
b5b8b0ac 6516 else if (NONDEBUG_INSN_P (insn))
f4d578da 6517 {
bd7b9a0f
RH
6518 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6519 init_insn_group_barriers ();
c1bc6ca8 6520 else if (group_barrier_needed (insn))
f4d578da
BS
6521 {
6522 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6523 init_insn_group_barriers ();
c1bc6ca8 6524 group_barrier_needed (insn);
f4d578da
BS
6525 }
6526 }
6527 }
6528}
30028c85 6529
2130b7fb 6530\f
2130b7fb 6531
30028c85 6532/* Instruction scheduling support. */
2130b7fb
BS
6533
6534#define NR_BUNDLES 10
6535
30028c85 6536/* A list of names of all available bundles. */
2130b7fb 6537
30028c85 6538static const char *bundle_name [NR_BUNDLES] =
2130b7fb 6539{
30028c85
VM
6540 ".mii",
6541 ".mmi",
6542 ".mfi",
6543 ".mmf",
2130b7fb 6544#if NR_BUNDLES == 10
30028c85
VM
6545 ".bbb",
6546 ".mbb",
2130b7fb 6547#endif
30028c85
VM
6548 ".mib",
6549 ".mmb",
6550 ".mfb",
6551 ".mlx"
2130b7fb
BS
6552};
6553
30028c85 6554/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 6555
30028c85 6556int ia64_final_schedule = 0;
2130b7fb 6557
35fd3193 6558/* Codes of the corresponding queried units: */
2130b7fb 6559
30028c85
VM
6560static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6561static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 6562
30028c85
VM
6563static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6564static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 6565
30028c85
VM
6566static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6567
6568/* The following variable value is an insn group barrier. */
6569
6570static rtx dfa_stop_insn;
6571
6572/* The following variable value is the last issued insn. */
6573
6574static rtx last_scheduled_insn;
6575
30028c85
VM
6576/* The following variable value is pointer to a DFA state used as
6577 temporary variable. */
6578
6579static state_t temp_dfa_state = NULL;
6580
6581/* The following variable value is DFA state after issuing the last
6582 insn. */
6583
6584static state_t prev_cycle_state = NULL;
6585
6586/* The following array element values are TRUE if the corresponding
9e4f94de 6587 insn requires to add stop bits before it. */
30028c85 6588
048d0d36
MK
6589static char *stops_p = NULL;
6590
30028c85
VM
6591/* The following variable is used to set up the mentioned above array. */
6592
6593static int stop_before_p = 0;
6594
6595/* The following variable value is length of the arrays `clocks' and
6596 `add_cycles'. */
6597
6598static int clocks_length;
6599
048d0d36
MK
6600/* The following variable value is number of data speculations in progress. */
6601static int pending_data_specs = 0;
6602
388092d5
AB
6603/* Number of memory references on current and three future processor cycles. */
6604static char mem_ops_in_group[4];
6605
6606/* Number of current processor cycle (from scheduler's point of view). */
6607static int current_cycle;
6608
9c808aad
AJ
6609static rtx ia64_single_set (rtx);
6610static void ia64_emit_insn_before (rtx, rtx);
2130b7fb
BS
6611
6612/* Map a bundle number to its pseudo-op. */
6613
6614const char *
9c808aad 6615get_bundle_name (int b)
2130b7fb 6616{
30028c85 6617 return bundle_name[b];
2130b7fb
BS
6618}
6619
2130b7fb
BS
6620
6621/* Return the maximum number of instructions a cpu can issue. */
6622
c237e94a 6623static int
9c808aad 6624ia64_issue_rate (void)
2130b7fb
BS
6625{
6626 return 6;
6627}
6628
6629/* Helper function - like single_set, but look inside COND_EXEC. */
6630
6631static rtx
9c808aad 6632ia64_single_set (rtx insn)
2130b7fb 6633{
30fa7e33 6634 rtx x = PATTERN (insn), ret;
2130b7fb
BS
6635 if (GET_CODE (x) == COND_EXEC)
6636 x = COND_EXEC_CODE (x);
6637 if (GET_CODE (x) == SET)
6638 return x;
bdbe5b8d
RH
6639
6640 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6641 Although they are not classical single set, the second set is there just
6642 to protect it from moving past FP-relative stack accesses. */
6643 switch (recog_memoized (insn))
30fa7e33 6644 {
bdbe5b8d
RH
6645 case CODE_FOR_prologue_allocate_stack:
6646 case CODE_FOR_epilogue_deallocate_stack:
6647 ret = XVECEXP (x, 0, 0);
6648 break;
6649
6650 default:
6651 ret = single_set_2 (insn, x);
6652 break;
30fa7e33 6653 }
bdbe5b8d 6654
30fa7e33 6655 return ret;
2130b7fb
BS
6656}
6657
388092d5
AB
6658/* Adjust the cost of a scheduling dependency.
6659 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6660 COST is the current cost, DW is dependency weakness. */
c237e94a 6661static int
388092d5 6662ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
2130b7fb 6663{
388092d5 6664 enum reg_note dep_type = (enum reg_note) dep_type1;
2130b7fb
BS
6665 enum attr_itanium_class dep_class;
6666 enum attr_itanium_class insn_class;
2130b7fb 6667
2130b7fb 6668 insn_class = ia64_safe_itanium_class (insn);
30028c85 6669 dep_class = ia64_safe_itanium_class (dep_insn);
388092d5
AB
6670
6671 /* Treat true memory dependencies separately. Ignore apparent true
6672 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6673 if (dep_type == REG_DEP_TRUE
6674 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6675 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6676 return 0;
6677
6678 if (dw == MIN_DEP_WEAK)
6679 /* Store and load are likely to alias, use higher cost to avoid stall. */
6680 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6681 else if (dw > MIN_DEP_WEAK)
6682 {
6683 /* Store and load are less likely to alias. */
6684 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6685 /* Assume there will be no cache conflict for floating-point data.
6686 For integer data, L1 conflict penalty is huge (17 cycles), so we
6687 never assume it will not cause a conflict. */
6688 return 0;
6689 else
6690 return cost;
6691 }
6692
6693 if (dep_type != REG_DEP_OUTPUT)
6694 return cost;
6695
30028c85
VM
6696 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6697 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
6698 return 0;
6699
2130b7fb
BS
6700 return cost;
6701}
6702
14d118d6
DM
6703/* Like emit_insn_before, but skip cycle_display notes.
6704 ??? When cycle display notes are implemented, update this. */
6705
6706static void
9c808aad 6707ia64_emit_insn_before (rtx insn, rtx before)
14d118d6
DM
6708{
6709 emit_insn_before (insn, before);
6710}
6711
30028c85
VM
6712/* The following function marks insns who produce addresses for load
6713 and store insns. Such insns will be placed into M slots because it
6714 decrease latency time for Itanium1 (see function
6715 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
6716
6717static void
9c808aad 6718ia64_dependencies_evaluation_hook (rtx head, rtx tail)
2130b7fb 6719{
b198261f 6720 rtx insn, next, next_tail;
9c808aad 6721
f12b785d
RH
6722 /* Before reload, which_alternative is not set, which means that
6723 ia64_safe_itanium_class will produce wrong results for (at least)
6724 move instructions. */
6725 if (!reload_completed)
6726 return;
6727
30028c85
VM
6728 next_tail = NEXT_INSN (tail);
6729 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6730 if (INSN_P (insn))
6731 insn->call = 0;
6732 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6733 if (INSN_P (insn)
6734 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6735 {
e2f6ff94
MK
6736 sd_iterator_def sd_it;
6737 dep_t dep;
6738 bool has_mem_op_consumer_p = false;
b198261f 6739
e2f6ff94 6740 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
30028c85 6741 {
a71aef0b
JB
6742 enum attr_itanium_class c;
6743
e2f6ff94 6744 if (DEP_TYPE (dep) != REG_DEP_TRUE)
f12b785d 6745 continue;
b198261f 6746
e2f6ff94 6747 next = DEP_CON (dep);
a71aef0b
JB
6748 c = ia64_safe_itanium_class (next);
6749 if ((c == ITANIUM_CLASS_ST
6750 || c == ITANIUM_CLASS_STF)
30028c85 6751 && ia64_st_address_bypass_p (insn, next))
e2f6ff94
MK
6752 {
6753 has_mem_op_consumer_p = true;
6754 break;
6755 }
a71aef0b
JB
6756 else if ((c == ITANIUM_CLASS_LD
6757 || c == ITANIUM_CLASS_FLD
6758 || c == ITANIUM_CLASS_FLDP)
30028c85 6759 && ia64_ld_address_bypass_p (insn, next))
e2f6ff94
MK
6760 {
6761 has_mem_op_consumer_p = true;
6762 break;
6763 }
30028c85 6764 }
e2f6ff94
MK
6765
6766 insn->call = has_mem_op_consumer_p;
30028c85
VM
6767 }
6768}
2130b7fb 6769
30028c85 6770/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 6771
30028c85 6772static void
9c808aad
AJ
6773ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6774 int sched_verbose ATTRIBUTE_UNUSED,
6775 int max_ready ATTRIBUTE_UNUSED)
30028c85
VM
6776{
6777#ifdef ENABLE_CHECKING
6778 rtx insn;
9c808aad 6779
388092d5 6780 if (!sel_sched_p () && reload_completed)
30028c85
VM
6781 for (insn = NEXT_INSN (current_sched_info->prev_head);
6782 insn != current_sched_info->next_tail;
6783 insn = NEXT_INSN (insn))
e820471b 6784 gcc_assert (!SCHED_GROUP_P (insn));
30028c85
VM
6785#endif
6786 last_scheduled_insn = NULL_RTX;
6787 init_insn_group_barriers ();
388092d5
AB
6788
6789 current_cycle = 0;
6790 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
2130b7fb
BS
6791}
6792
048d0d36
MK
6793/* We're beginning a scheduling pass. Check assertion. */
6794
6795static void
6796ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6797 int sched_verbose ATTRIBUTE_UNUSED,
6798 int max_ready ATTRIBUTE_UNUSED)
6799{
388092d5 6800 gcc_assert (pending_data_specs == 0);
048d0d36
MK
6801}
6802
6803/* Scheduling pass is now finished. Free/reset static variable. */
6804static void
6805ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6806 int sched_verbose ATTRIBUTE_UNUSED)
6807{
388092d5
AB
6808 gcc_assert (pending_data_specs == 0);
6809}
6810
6811/* Return TRUE if INSN is a load (either normal or speculative, but not a
6812 speculation check), FALSE otherwise. */
6813static bool
6814is_load_p (rtx insn)
6815{
6816 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6817
6818 return
6819 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6820 && get_attr_check_load (insn) == CHECK_LOAD_NO);
6821}
6822
6823/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6824 (taking account for 3-cycle cache reference postponing for stores: Intel
6825 Itanium 2 Reference Manual for Software Development and Optimization,
6826 6.7.3.1). */
6827static void
6828record_memory_reference (rtx insn)
6829{
6830 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6831
6832 switch (insn_class) {
6833 case ITANIUM_CLASS_FLD:
6834 case ITANIUM_CLASS_LD:
6835 mem_ops_in_group[current_cycle % 4]++;
6836 break;
6837 case ITANIUM_CLASS_STF:
6838 case ITANIUM_CLASS_ST:
6839 mem_ops_in_group[(current_cycle + 3) % 4]++;
6840 break;
6841 default:;
6842 }
048d0d36
MK
6843}
6844
30028c85
VM
6845/* We are about to being issuing insns for this clock cycle.
6846 Override the default sort algorithm to better slot instructions. */
2130b7fb 6847
30028c85 6848static int
9c808aad 6849ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
388092d5 6850 int *pn_ready, int clock_var,
9c808aad 6851 int reorder_type)
2130b7fb 6852{
30028c85
VM
6853 int n_asms;
6854 int n_ready = *pn_ready;
6855 rtx *e_ready = ready + n_ready;
6856 rtx *insnp;
2130b7fb 6857
30028c85
VM
6858 if (sched_verbose)
6859 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 6860
30028c85 6861 if (reorder_type == 0)
2130b7fb 6862 {
30028c85
VM
6863 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6864 n_asms = 0;
6865 for (insnp = ready; insnp < e_ready; insnp++)
6866 if (insnp < e_ready)
6867 {
6868 rtx insn = *insnp;
6869 enum attr_type t = ia64_safe_type (insn);
6870 if (t == TYPE_UNKNOWN)
6871 {
6872 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6873 || asm_noperands (PATTERN (insn)) >= 0)
6874 {
6875 rtx lowest = ready[n_asms];
6876 ready[n_asms] = insn;
6877 *insnp = lowest;
6878 n_asms++;
6879 }
6880 else
6881 {
6882 rtx highest = ready[n_ready - 1];
6883 ready[n_ready - 1] = insn;
6884 *insnp = highest;
6885 return 1;
6886 }
6887 }
6888 }
98d2b17e 6889
30028c85 6890 if (n_asms < n_ready)
98d2b17e 6891 {
30028c85
VM
6892 /* Some normal insns to process. Skip the asms. */
6893 ready += n_asms;
6894 n_ready -= n_asms;
98d2b17e 6895 }
30028c85
VM
6896 else if (n_ready > 0)
6897 return 1;
2130b7fb
BS
6898 }
6899
30028c85 6900 if (ia64_final_schedule)
2130b7fb 6901 {
30028c85
VM
6902 int deleted = 0;
6903 int nr_need_stop = 0;
6904
6905 for (insnp = ready; insnp < e_ready; insnp++)
c1bc6ca8 6906 if (safe_group_barrier_needed (*insnp))
30028c85 6907 nr_need_stop++;
9c808aad 6908
30028c85
VM
6909 if (reorder_type == 1 && n_ready == nr_need_stop)
6910 return 0;
6911 if (reorder_type == 0)
6912 return 1;
6913 insnp = e_ready;
6914 /* Move down everything that needs a stop bit, preserving
6915 relative order. */
6916 while (insnp-- > ready + deleted)
6917 while (insnp >= ready + deleted)
6918 {
6919 rtx insn = *insnp;
c1bc6ca8 6920 if (! safe_group_barrier_needed (insn))
30028c85
VM
6921 break;
6922 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6923 *ready = insn;
6924 deleted++;
6925 }
6926 n_ready -= deleted;
6927 ready += deleted;
2130b7fb 6928 }
2130b7fb 6929
388092d5
AB
6930 current_cycle = clock_var;
6931 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
6932 {
6933 int moved = 0;
6934
6935 insnp = e_ready;
6936 /* Move down loads/stores, preserving relative order. */
6937 while (insnp-- > ready + moved)
6938 while (insnp >= ready + moved)
6939 {
6940 rtx insn = *insnp;
6941 if (! is_load_p (insn))
6942 break;
6943 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6944 *ready = insn;
6945 moved++;
6946 }
6947 n_ready -= moved;
6948 ready += moved;
6949 }
6950
30028c85 6951 return 1;
2130b7fb 6952}
6b6c1201 6953
30028c85
VM
6954/* We are about to being issuing insns for this clock cycle. Override
6955 the default sort algorithm to better slot instructions. */
c65ebc55 6956
30028c85 6957static int
9c808aad
AJ
6958ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6959 int clock_var)
2130b7fb 6960{
30028c85
VM
6961 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6962 pn_ready, clock_var, 0);
2130b7fb
BS
6963}
6964
30028c85
VM
6965/* Like ia64_sched_reorder, but called after issuing each insn.
6966 Override the default sort algorithm to better slot instructions. */
2130b7fb 6967
30028c85 6968static int
9c808aad
AJ
6969ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6970 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6971 int *pn_ready, int clock_var)
30028c85 6972{
30028c85
VM
6973 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6974 clock_var, 1);
2130b7fb
BS
6975}
6976
30028c85
VM
6977/* We are about to issue INSN. Return the number of insns left on the
6978 ready queue that can be issued this cycle. */
2130b7fb 6979
30028c85 6980static int
9c808aad
AJ
6981ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6982 int sched_verbose ATTRIBUTE_UNUSED,
6983 rtx insn ATTRIBUTE_UNUSED,
6984 int can_issue_more ATTRIBUTE_UNUSED)
2130b7fb 6985{
388092d5 6986 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
048d0d36 6987 /* Modulo scheduling does not extend h_i_d when emitting
388092d5 6988 new instructions. Don't use h_i_d, if we don't have to. */
048d0d36
MK
6989 {
6990 if (DONE_SPEC (insn) & BEGIN_DATA)
6991 pending_data_specs++;
6992 if (CHECK_SPEC (insn) & BEGIN_DATA)
6993 pending_data_specs--;
6994 }
6995
b5b8b0ac
AO
6996 if (DEBUG_INSN_P (insn))
6997 return 1;
6998
30028c85
VM
6999 last_scheduled_insn = insn;
7000 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7001 if (reload_completed)
2130b7fb 7002 {
c1bc6ca8 7003 int needed = group_barrier_needed (insn);
e820471b
NS
7004
7005 gcc_assert (!needed);
30028c85
VM
7006 if (GET_CODE (insn) == CALL_INSN)
7007 init_insn_group_barriers ();
7008 stops_p [INSN_UID (insn)] = stop_before_p;
7009 stop_before_p = 0;
388092d5
AB
7010
7011 record_memory_reference (insn);
2130b7fb 7012 }
30028c85
VM
7013 return 1;
7014}
c65ebc55 7015
30028c85
VM
7016/* We are choosing insn from the ready queue. Return nonzero if INSN
7017 can be chosen. */
c65ebc55 7018
30028c85 7019static int
9c808aad 7020ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
30028c85 7021{
388092d5 7022 gcc_assert (insn && INSN_P (insn));
048d0d36
MK
7023 return ((!reload_completed
7024 || !safe_group_barrier_needed (insn))
388092d5
AB
7025 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7026 && (!mflag_sched_mem_insns_hard_limit
7027 || !is_load_p (insn)
7028 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
048d0d36
MK
7029}
7030
7031/* We are choosing insn from the ready queue. Return nonzero if INSN
7032 can be chosen. */
7033
7034static bool
3101faab 7035ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
048d0d36
MK
7036{
7037 gcc_assert (insn && INSN_P (insn));
7038 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7039 we keep ALAT half-empty. */
7040 return (pending_data_specs < 16
7041 || !(TODO_SPEC (insn) & BEGIN_DATA));
2130b7fb
BS
7042}
7043
30028c85
VM
7044/* The following variable value is pseudo-insn used by the DFA insn
7045 scheduler to change the DFA state when the simulated clock is
7046 increased. */
2130b7fb 7047
30028c85 7048static rtx dfa_pre_cycle_insn;
2130b7fb 7049
388092d5
AB
7050/* Returns 1 when a meaningful insn was scheduled between the last group
7051 barrier and LAST. */
7052static int
7053scheduled_good_insn (rtx last)
7054{
7055 if (last && recog_memoized (last) >= 0)
7056 return 1;
7057
7058 for ( ;
7059 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7060 && !stops_p[INSN_UID (last)];
7061 last = PREV_INSN (last))
7062 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7063 the ebb we're scheduling. */
7064 if (INSN_P (last) && recog_memoized (last) >= 0)
7065 return 1;
7066
7067 return 0;
7068}
7069
1e5f1716 7070/* We are about to being issuing INSN. Return nonzero if we cannot
30028c85
VM
7071 issue it on given cycle CLOCK and return zero if we should not sort
7072 the ready queue on the next clock start. */
2130b7fb
BS
7073
7074static int
9c808aad
AJ
7075ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7076 int clock, int *sort_p)
2130b7fb 7077{
30028c85 7078 int setup_clocks_p = FALSE;
2130b7fb 7079
e820471b 7080 gcc_assert (insn && INSN_P (insn));
b5b8b0ac
AO
7081
7082 if (DEBUG_INSN_P (insn))
7083 return 0;
7084
388092d5
AB
7085 /* When a group barrier is needed for insn, last_scheduled_insn
7086 should be set. */
7087 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7088 || last_scheduled_insn);
7089
7090 if ((reload_completed
7091 && (safe_group_barrier_needed (insn)
7092 || (mflag_sched_stop_bits_after_every_cycle
7093 && last_clock != clock
7094 && last_scheduled_insn
7095 && scheduled_good_insn (last_scheduled_insn))))
30028c85
VM
7096 || (last_scheduled_insn
7097 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7098 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7099 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
2130b7fb 7100 {
30028c85 7101 init_insn_group_barriers ();
388092d5 7102
30028c85
VM
7103 if (verbose && dump)
7104 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7105 last_clock == clock ? " + cycle advance" : "");
388092d5 7106
30028c85 7107 stop_before_p = 1;
388092d5
AB
7108 current_cycle = clock;
7109 mem_ops_in_group[current_cycle % 4] = 0;
7110
30028c85 7111 if (last_clock == clock)
2130b7fb 7112 {
30028c85
VM
7113 state_transition (curr_state, dfa_stop_insn);
7114 if (TARGET_EARLY_STOP_BITS)
7115 *sort_p = (last_scheduled_insn == NULL_RTX
7116 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7117 else
7118 *sort_p = 0;
7119 return 1;
7120 }
7121 else if (reload_completed)
7122 setup_clocks_p = TRUE;
388092d5
AB
7123
7124 if (last_scheduled_insn)
25069b42 7125 {
388092d5
AB
7126 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7127 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7128 state_reset (curr_state);
7129 else
7130 {
7131 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7132 state_transition (curr_state, dfa_stop_insn);
7133 state_transition (curr_state, dfa_pre_cycle_insn);
7134 state_transition (curr_state, NULL);
7135 }
25069b42 7136 }
30028c85
VM
7137 }
7138 else if (reload_completed)
7139 setup_clocks_p = TRUE;
388092d5 7140
30028c85 7141 return 0;
2130b7fb
BS
7142}
7143
048d0d36
MK
7144/* Implement targetm.sched.h_i_d_extended hook.
7145 Extend internal data structures. */
7146static void
7147ia64_h_i_d_extended (void)
7148{
048d0d36
MK
7149 if (stops_p != NULL)
7150 {
388092d5 7151 int new_clocks_length = get_max_uid () * 3 / 2;
5ead67f6 7152 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
048d0d36
MK
7153 clocks_length = new_clocks_length;
7154 }
7155}
388092d5
AB
7156\f
7157
7158/* This structure describes the data used by the backend to guide scheduling.
7159 When the current scheduling point is switched, this data should be saved
7160 and restored later, if the scheduler returns to this point. */
7161struct _ia64_sched_context
7162{
7163 state_t prev_cycle_state;
7164 rtx last_scheduled_insn;
7165 struct reg_write_state rws_sum[NUM_REGS];
7166 struct reg_write_state rws_insn[NUM_REGS];
7167 int first_instruction;
7168 int pending_data_specs;
7169 int current_cycle;
7170 char mem_ops_in_group[4];
7171};
7172typedef struct _ia64_sched_context *ia64_sched_context_t;
7173
7174/* Allocates a scheduling context. */
7175static void *
7176ia64_alloc_sched_context (void)
7177{
7178 return xmalloc (sizeof (struct _ia64_sched_context));
7179}
7180
7181/* Initializes the _SC context with clean data, if CLEAN_P, and from
7182 the global context otherwise. */
7183static void
7184ia64_init_sched_context (void *_sc, bool clean_p)
7185{
7186 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7187
7188 sc->prev_cycle_state = xmalloc (dfa_state_size);
7189 if (clean_p)
7190 {
7191 state_reset (sc->prev_cycle_state);
7192 sc->last_scheduled_insn = NULL_RTX;
7193 memset (sc->rws_sum, 0, sizeof (rws_sum));
7194 memset (sc->rws_insn, 0, sizeof (rws_insn));
7195 sc->first_instruction = 1;
7196 sc->pending_data_specs = 0;
7197 sc->current_cycle = 0;
7198 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7199 }
7200 else
7201 {
7202 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7203 sc->last_scheduled_insn = last_scheduled_insn;
7204 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7205 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7206 sc->first_instruction = first_instruction;
7207 sc->pending_data_specs = pending_data_specs;
7208 sc->current_cycle = current_cycle;
7209 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7210 }
7211}
7212
7213/* Sets the global scheduling context to the one pointed to by _SC. */
7214static void
7215ia64_set_sched_context (void *_sc)
7216{
7217 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7218
7219 gcc_assert (sc != NULL);
7220
7221 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7222 last_scheduled_insn = sc->last_scheduled_insn;
7223 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7224 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7225 first_instruction = sc->first_instruction;
7226 pending_data_specs = sc->pending_data_specs;
7227 current_cycle = sc->current_cycle;
7228 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7229}
7230
7231/* Clears the data in the _SC scheduling context. */
7232static void
7233ia64_clear_sched_context (void *_sc)
7234{
7235 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7236
7237 free (sc->prev_cycle_state);
7238 sc->prev_cycle_state = NULL;
7239}
7240
7241/* Frees the _SC scheduling context. */
7242static void
7243ia64_free_sched_context (void *_sc)
7244{
7245 gcc_assert (_sc != NULL);
7246
7247 free (_sc);
7248}
7249
7250typedef rtx (* gen_func_t) (rtx, rtx);
7251
7252/* Return a function that will generate a load of mode MODE_NO
7253 with speculation types TS. */
7254static gen_func_t
7255get_spec_load_gen_function (ds_t ts, int mode_no)
7256{
7257 static gen_func_t gen_ld_[] = {
7258 gen_movbi,
7259 gen_movqi_internal,
7260 gen_movhi_internal,
7261 gen_movsi_internal,
7262 gen_movdi_internal,
7263 gen_movsf_internal,
7264 gen_movdf_internal,
7265 gen_movxf_internal,
7266 gen_movti_internal,
7267 gen_zero_extendqidi2,
7268 gen_zero_extendhidi2,
7269 gen_zero_extendsidi2,
7270 };
7271
7272 static gen_func_t gen_ld_a[] = {
7273 gen_movbi_advanced,
7274 gen_movqi_advanced,
7275 gen_movhi_advanced,
7276 gen_movsi_advanced,
7277 gen_movdi_advanced,
7278 gen_movsf_advanced,
7279 gen_movdf_advanced,
7280 gen_movxf_advanced,
7281 gen_movti_advanced,
7282 gen_zero_extendqidi2_advanced,
7283 gen_zero_extendhidi2_advanced,
7284 gen_zero_extendsidi2_advanced,
7285 };
7286 static gen_func_t gen_ld_s[] = {
7287 gen_movbi_speculative,
7288 gen_movqi_speculative,
7289 gen_movhi_speculative,
7290 gen_movsi_speculative,
7291 gen_movdi_speculative,
7292 gen_movsf_speculative,
7293 gen_movdf_speculative,
7294 gen_movxf_speculative,
7295 gen_movti_speculative,
7296 gen_zero_extendqidi2_speculative,
7297 gen_zero_extendhidi2_speculative,
7298 gen_zero_extendsidi2_speculative,
7299 };
7300 static gen_func_t gen_ld_sa[] = {
7301 gen_movbi_speculative_advanced,
7302 gen_movqi_speculative_advanced,
7303 gen_movhi_speculative_advanced,
7304 gen_movsi_speculative_advanced,
7305 gen_movdi_speculative_advanced,
7306 gen_movsf_speculative_advanced,
7307 gen_movdf_speculative_advanced,
7308 gen_movxf_speculative_advanced,
7309 gen_movti_speculative_advanced,
7310 gen_zero_extendqidi2_speculative_advanced,
7311 gen_zero_extendhidi2_speculative_advanced,
7312 gen_zero_extendsidi2_speculative_advanced,
7313 };
7314 static gen_func_t gen_ld_s_a[] = {
7315 gen_movbi_speculative_a,
7316 gen_movqi_speculative_a,
7317 gen_movhi_speculative_a,
7318 gen_movsi_speculative_a,
7319 gen_movdi_speculative_a,
7320 gen_movsf_speculative_a,
7321 gen_movdf_speculative_a,
7322 gen_movxf_speculative_a,
7323 gen_movti_speculative_a,
7324 gen_zero_extendqidi2_speculative_a,
7325 gen_zero_extendhidi2_speculative_a,
7326 gen_zero_extendsidi2_speculative_a,
7327 };
7328
7329 gen_func_t *gen_ld;
7330
7331 if (ts & BEGIN_DATA)
7332 {
7333 if (ts & BEGIN_CONTROL)
7334 gen_ld = gen_ld_sa;
7335 else
7336 gen_ld = gen_ld_a;
7337 }
7338 else if (ts & BEGIN_CONTROL)
7339 {
7340 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7341 || ia64_needs_block_p (ts))
7342 gen_ld = gen_ld_s;
7343 else
7344 gen_ld = gen_ld_s_a;
7345 }
7346 else if (ts == 0)
7347 gen_ld = gen_ld_;
7348 else
7349 gcc_unreachable ();
7350
7351 return gen_ld[mode_no];
7352}
048d0d36
MK
7353
7354/* Constants that help mapping 'enum machine_mode' to int. */
7355enum SPEC_MODES
7356 {
7357 SPEC_MODE_INVALID = -1,
7358 SPEC_MODE_FIRST = 0,
7359 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7360 SPEC_MODE_FOR_EXTEND_LAST = 3,
7361 SPEC_MODE_LAST = 8
7362 };
7363
388092d5
AB
7364enum
7365 {
7366 /* Offset to reach ZERO_EXTEND patterns. */
7367 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7368 };
7369
048d0d36
MK
7370/* Return index of the MODE. */
7371static int
7372ia64_mode_to_int (enum machine_mode mode)
7373{
7374 switch (mode)
7375 {
7376 case BImode: return 0; /* SPEC_MODE_FIRST */
7377 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7378 case HImode: return 2;
7379 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7380 case DImode: return 4;
7381 case SFmode: return 5;
7382 case DFmode: return 6;
7383 case XFmode: return 7;
7384 case TImode:
7385 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7386 mentioned in itanium[12].md. Predicate fp_register_operand also
7387 needs to be defined. Bottom line: better disable for now. */
7388 return SPEC_MODE_INVALID;
7389 default: return SPEC_MODE_INVALID;
7390 }
7391}
7392
7393/* Provide information about speculation capabilities. */
7394static void
7395ia64_set_sched_flags (spec_info_t spec_info)
7396{
7397 unsigned int *flags = &(current_sched_info->flags);
7398
7399 if (*flags & SCHED_RGN
388092d5
AB
7400 || *flags & SCHED_EBB
7401 || *flags & SEL_SCHED)
048d0d36
MK
7402 {
7403 int mask = 0;
7404
a57aee2a 7405 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
388092d5 7406 || (mflag_sched_ar_data_spec && reload_completed))
048d0d36
MK
7407 {
7408 mask |= BEGIN_DATA;
388092d5
AB
7409
7410 if (!sel_sched_p ()
7411 && ((mflag_sched_br_in_data_spec && !reload_completed)
7412 || (mflag_sched_ar_in_data_spec && reload_completed)))
048d0d36
MK
7413 mask |= BE_IN_DATA;
7414 }
7415
388092d5
AB
7416 if (mflag_sched_control_spec
7417 && (!sel_sched_p ()
7418 || reload_completed))
048d0d36
MK
7419 {
7420 mask |= BEGIN_CONTROL;
7421
388092d5 7422 if (!sel_sched_p () && mflag_sched_in_control_spec)
048d0d36
MK
7423 mask |= BE_IN_CONTROL;
7424 }
7425
7ab5df48
AB
7426 spec_info->mask = mask;
7427
048d0d36
MK
7428 if (mask)
7429 {
6fb5fa3c
DB
7430 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7431
7432 if (mask & BE_IN_SPEC)
7433 *flags |= NEW_BBS;
048d0d36 7434
048d0d36
MK
7435 spec_info->flags = 0;
7436
7437 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7438 spec_info->flags |= PREFER_NON_DATA_SPEC;
7439
388092d5 7440 if (mask & CONTROL_SPEC)
048d0d36 7441 {
388092d5
AB
7442 if (mflag_sched_prefer_non_control_spec_insns)
7443 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7444
7445 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7446 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
048d0d36 7447 }
388092d5
AB
7448
7449 if (sched_verbose >= 1)
7450 spec_info->dump = sched_dump;
048d0d36
MK
7451 else
7452 spec_info->dump = 0;
7453
7454 if (mflag_sched_count_spec_in_critical_path)
7455 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7456 }
7457 }
cd510f15
AM
7458 else
7459 spec_info->mask = 0;
048d0d36
MK
7460}
7461
388092d5
AB
7462/* If INSN is an appropriate load return its mode.
7463 Return -1 otherwise. */
048d0d36 7464static int
388092d5
AB
7465get_mode_no_for_insn (rtx insn)
7466{
7467 rtx reg, mem, mode_rtx;
7468 int mode_no;
048d0d36 7469 bool extend_p;
048d0d36 7470
388092d5 7471 extract_insn_cached (insn);
048d0d36 7472
388092d5
AB
7473 /* We use WHICH_ALTERNATIVE only after reload. This will
7474 guarantee that reload won't touch a speculative insn. */
f6ec1d11 7475
388092d5 7476 if (recog_data.n_operands != 2)
048d0d36
MK
7477 return -1;
7478
388092d5
AB
7479 reg = recog_data.operand[0];
7480 mem = recog_data.operand[1];
f6ec1d11 7481
388092d5
AB
7482 /* We should use MEM's mode since REG's mode in presence of
7483 ZERO_EXTEND will always be DImode. */
7484 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7485 /* Process non-speculative ld. */
7486 {
7487 if (!reload_completed)
7488 {
7489 /* Do not speculate into regs like ar.lc. */
7490 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7491 return -1;
7492
7493 if (!MEM_P (mem))
7494 return -1;
7495
7496 {
7497 rtx mem_reg = XEXP (mem, 0);
7498
7499 if (!REG_P (mem_reg))
7500 return -1;
7501 }
7502
7503 mode_rtx = mem;
7504 }
7505 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7506 {
7507 gcc_assert (REG_P (reg) && MEM_P (mem));
7508 mode_rtx = mem;
7509 }
7510 else
7511 return -1;
7512 }
7513 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7514 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7515 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7516 /* Process speculative ld or ld.c. */
048d0d36 7517 {
388092d5
AB
7518 gcc_assert (REG_P (reg) && MEM_P (mem));
7519 mode_rtx = mem;
048d0d36
MK
7520 }
7521 else
048d0d36 7522 {
388092d5 7523 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
048d0d36 7524
388092d5
AB
7525 if (attr_class == ITANIUM_CLASS_CHK_A
7526 || attr_class == ITANIUM_CLASS_CHK_S_I
7527 || attr_class == ITANIUM_CLASS_CHK_S_F)
7528 /* Process chk. */
7529 mode_rtx = reg;
7530 else
7531 return -1;
048d0d36 7532 }
f6ec1d11 7533
388092d5 7534 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
f6ec1d11 7535
388092d5 7536 if (mode_no == SPEC_MODE_INVALID)
048d0d36
MK
7537 return -1;
7538
388092d5
AB
7539 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7540
7541 if (extend_p)
7542 {
7543 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7544 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7545 return -1;
f6ec1d11 7546
388092d5
AB
7547 mode_no += SPEC_GEN_EXTEND_OFFSET;
7548 }
048d0d36 7549
388092d5 7550 return mode_no;
048d0d36
MK
7551}
7552
388092d5
AB
7553/* If X is an unspec part of a speculative load, return its code.
7554 Return -1 otherwise. */
7555static int
7556get_spec_unspec_code (const_rtx x)
7557{
7558 if (GET_CODE (x) != UNSPEC)
7559 return -1;
048d0d36 7560
048d0d36 7561 {
388092d5 7562 int code;
048d0d36 7563
388092d5 7564 code = XINT (x, 1);
048d0d36 7565
388092d5
AB
7566 switch (code)
7567 {
7568 case UNSPEC_LDA:
7569 case UNSPEC_LDS:
7570 case UNSPEC_LDS_A:
7571 case UNSPEC_LDSA:
7572 return code;
048d0d36 7573
388092d5
AB
7574 default:
7575 return -1;
7576 }
7577 }
7578}
048d0d36 7579
388092d5
AB
7580/* Implement skip_rtx_p hook. */
7581static bool
7582ia64_skip_rtx_p (const_rtx x)
7583{
7584 return get_spec_unspec_code (x) != -1;
7585}
048d0d36 7586
388092d5
AB
7587/* If INSN is a speculative load, return its UNSPEC code.
7588 Return -1 otherwise. */
7589static int
7590get_insn_spec_code (const_rtx insn)
7591{
7592 rtx pat, reg, mem;
048d0d36 7593
388092d5 7594 pat = PATTERN (insn);
048d0d36 7595
388092d5
AB
7596 if (GET_CODE (pat) == COND_EXEC)
7597 pat = COND_EXEC_CODE (pat);
048d0d36 7598
388092d5
AB
7599 if (GET_CODE (pat) != SET)
7600 return -1;
7601
7602 reg = SET_DEST (pat);
7603 if (!REG_P (reg))
7604 return -1;
7605
7606 mem = SET_SRC (pat);
7607 if (GET_CODE (mem) == ZERO_EXTEND)
7608 mem = XEXP (mem, 0);
7609
7610 return get_spec_unspec_code (mem);
7611}
7612
7613/* If INSN is a speculative load, return a ds with the speculation types.
7614 Otherwise [if INSN is a normal instruction] return 0. */
7615static ds_t
7616ia64_get_insn_spec_ds (rtx insn)
7617{
7618 int code = get_insn_spec_code (insn);
7619
7620 switch (code)
048d0d36 7621 {
388092d5
AB
7622 case UNSPEC_LDA:
7623 return BEGIN_DATA;
048d0d36 7624
388092d5
AB
7625 case UNSPEC_LDS:
7626 case UNSPEC_LDS_A:
7627 return BEGIN_CONTROL;
048d0d36 7628
388092d5
AB
7629 case UNSPEC_LDSA:
7630 return BEGIN_DATA | BEGIN_CONTROL;
048d0d36 7631
388092d5
AB
7632 default:
7633 return 0;
048d0d36 7634 }
388092d5
AB
7635}
7636
7637/* If INSN is a speculative load return a ds with the speculation types that
7638 will be checked.
7639 Otherwise [if INSN is a normal instruction] return 0. */
7640static ds_t
7641ia64_get_insn_checked_ds (rtx insn)
7642{
7643 int code = get_insn_spec_code (insn);
7644
7645 switch (code)
048d0d36 7646 {
388092d5
AB
7647 case UNSPEC_LDA:
7648 return BEGIN_DATA | BEGIN_CONTROL;
7649
7650 case UNSPEC_LDS:
7651 return BEGIN_CONTROL;
7652
7653 case UNSPEC_LDS_A:
7654 case UNSPEC_LDSA:
7655 return BEGIN_DATA | BEGIN_CONTROL;
7656
7657 default:
7658 return 0;
048d0d36 7659 }
388092d5 7660}
048d0d36 7661
388092d5
AB
7662/* If GEN_P is true, calculate the index of needed speculation check and return
7663 speculative pattern for INSN with speculative mode TS, machine mode
7664 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7665 If GEN_P is false, just calculate the index of needed speculation check. */
7666static rtx
7667ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7668{
7669 rtx pat, new_pat;
7670 gen_func_t gen_load;
048d0d36 7671
388092d5 7672 gen_load = get_spec_load_gen_function (ts, mode_no);
048d0d36 7673
388092d5
AB
7674 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7675 copy_rtx (recog_data.operand[1]));
048d0d36
MK
7676
7677 pat = PATTERN (insn);
7678 if (GET_CODE (pat) == COND_EXEC)
388092d5
AB
7679 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7680 new_pat);
048d0d36
MK
7681
7682 return new_pat;
7683}
7684
048d0d36 7685static bool
388092d5
AB
7686insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7687 ds_t ds ATTRIBUTE_UNUSED)
048d0d36 7688{
388092d5
AB
7689 return false;
7690}
048d0d36 7691
388092d5
AB
7692/* Implement targetm.sched.speculate_insn hook.
7693 Check if the INSN can be TS speculative.
7694 If 'no' - return -1.
7695 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7696 If current pattern of the INSN already provides TS speculation,
7697 return 0. */
7698static int
7699ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7700{
7701 int mode_no;
7702 int res;
7703
7704 gcc_assert (!(ts & ~SPECULATIVE));
048d0d36 7705
388092d5
AB
7706 if (ia64_spec_check_p (insn))
7707 return -1;
048d0d36 7708
388092d5
AB
7709 if ((ts & BE_IN_SPEC)
7710 && !insn_can_be_in_speculative_p (insn, ts))
7711 return -1;
048d0d36 7712
388092d5 7713 mode_no = get_mode_no_for_insn (insn);
048d0d36 7714
388092d5
AB
7715 if (mode_no != SPEC_MODE_INVALID)
7716 {
7717 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7718 res = 0;
7719 else
7720 {
7721 res = 1;
7722 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7723 }
7724 }
7725 else
7726 res = -1;
048d0d36 7727
388092d5
AB
7728 return res;
7729}
048d0d36 7730
388092d5
AB
7731/* Return a function that will generate a check for speculation TS with mode
7732 MODE_NO.
7733 If simple check is needed, pass true for SIMPLE_CHECK_P.
7734 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7735static gen_func_t
7736get_spec_check_gen_function (ds_t ts, int mode_no,
7737 bool simple_check_p, bool clearing_check_p)
7738{
7739 static gen_func_t gen_ld_c_clr[] = {
048d0d36
MK
7740 gen_movbi_clr,
7741 gen_movqi_clr,
7742 gen_movhi_clr,
7743 gen_movsi_clr,
7744 gen_movdi_clr,
7745 gen_movsf_clr,
7746 gen_movdf_clr,
7747 gen_movxf_clr,
7748 gen_movti_clr,
7749 gen_zero_extendqidi2_clr,
7750 gen_zero_extendhidi2_clr,
7751 gen_zero_extendsidi2_clr,
388092d5
AB
7752 };
7753 static gen_func_t gen_ld_c_nc[] = {
7754 gen_movbi_nc,
7755 gen_movqi_nc,
7756 gen_movhi_nc,
7757 gen_movsi_nc,
7758 gen_movdi_nc,
7759 gen_movsf_nc,
7760 gen_movdf_nc,
7761 gen_movxf_nc,
7762 gen_movti_nc,
7763 gen_zero_extendqidi2_nc,
7764 gen_zero_extendhidi2_nc,
7765 gen_zero_extendsidi2_nc,
7766 };
7767 static gen_func_t gen_chk_a_clr[] = {
048d0d36
MK
7768 gen_advanced_load_check_clr_bi,
7769 gen_advanced_load_check_clr_qi,
7770 gen_advanced_load_check_clr_hi,
7771 gen_advanced_load_check_clr_si,
7772 gen_advanced_load_check_clr_di,
7773 gen_advanced_load_check_clr_sf,
7774 gen_advanced_load_check_clr_df,
7775 gen_advanced_load_check_clr_xf,
7776 gen_advanced_load_check_clr_ti,
7777 gen_advanced_load_check_clr_di,
7778 gen_advanced_load_check_clr_di,
7779 gen_advanced_load_check_clr_di,
388092d5
AB
7780 };
7781 static gen_func_t gen_chk_a_nc[] = {
7782 gen_advanced_load_check_nc_bi,
7783 gen_advanced_load_check_nc_qi,
7784 gen_advanced_load_check_nc_hi,
7785 gen_advanced_load_check_nc_si,
7786 gen_advanced_load_check_nc_di,
7787 gen_advanced_load_check_nc_sf,
7788 gen_advanced_load_check_nc_df,
7789 gen_advanced_load_check_nc_xf,
7790 gen_advanced_load_check_nc_ti,
7791 gen_advanced_load_check_nc_di,
7792 gen_advanced_load_check_nc_di,
7793 gen_advanced_load_check_nc_di,
7794 };
7795 static gen_func_t gen_chk_s[] = {
048d0d36
MK
7796 gen_speculation_check_bi,
7797 gen_speculation_check_qi,
7798 gen_speculation_check_hi,
7799 gen_speculation_check_si,
7800 gen_speculation_check_di,
7801 gen_speculation_check_sf,
7802 gen_speculation_check_df,
7803 gen_speculation_check_xf,
7804 gen_speculation_check_ti,
7805 gen_speculation_check_di,
7806 gen_speculation_check_di,
388092d5 7807 gen_speculation_check_di,
048d0d36
MK
7808 };
7809
388092d5 7810 gen_func_t *gen_check;
048d0d36 7811
388092d5 7812 if (ts & BEGIN_DATA)
048d0d36 7813 {
388092d5
AB
7814 /* We don't need recovery because even if this is ld.sa
7815 ALAT entry will be allocated only if NAT bit is set to zero.
7816 So it is enough to use ld.c here. */
7817
7818 if (simple_check_p)
7819 {
7820 gcc_assert (mflag_sched_spec_ldc);
7821
7822 if (clearing_check_p)
7823 gen_check = gen_ld_c_clr;
7824 else
7825 gen_check = gen_ld_c_nc;
7826 }
7827 else
7828 {
7829 if (clearing_check_p)
7830 gen_check = gen_chk_a_clr;
7831 else
7832 gen_check = gen_chk_a_nc;
7833 }
048d0d36 7834 }
388092d5 7835 else if (ts & BEGIN_CONTROL)
048d0d36 7836 {
388092d5
AB
7837 if (simple_check_p)
7838 /* We might want to use ld.sa -> ld.c instead of
7839 ld.s -> chk.s. */
048d0d36 7840 {
388092d5 7841 gcc_assert (!ia64_needs_block_p (ts));
048d0d36 7842
388092d5
AB
7843 if (clearing_check_p)
7844 gen_check = gen_ld_c_clr;
7845 else
7846 gen_check = gen_ld_c_nc;
7847 }
7848 else
7849 {
7850 gen_check = gen_chk_s;
048d0d36 7851 }
388092d5
AB
7852 }
7853 else
7854 gcc_unreachable ();
7855
7856 gcc_assert (mode_no >= 0);
7857 return gen_check[mode_no];
7858}
7859
7860/* Return nonzero, if INSN needs branchy recovery check. */
7861static bool
7862ia64_needs_block_p (ds_t ts)
7863{
7864 if (ts & BEGIN_DATA)
7865 return !mflag_sched_spec_ldc;
7866
7867 gcc_assert ((ts & BEGIN_CONTROL) != 0);
048d0d36 7868
388092d5
AB
7869 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
7870}
7871
7872/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7873 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7874 Otherwise, generate a simple check. */
7875static rtx
7876ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
7877{
7878 rtx op1, pat, check_pat;
7879 gen_func_t gen_check;
7880 int mode_no;
7881
7882 mode_no = get_mode_no_for_insn (insn);
7883 gcc_assert (mode_no >= 0);
7884
7885 if (label)
7886 op1 = label;
7887 else
7888 {
7889 gcc_assert (!ia64_needs_block_p (ds));
7890 op1 = copy_rtx (recog_data.operand[1]);
048d0d36 7891 }
388092d5
AB
7892
7893 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
7894 true);
048d0d36 7895
388092d5 7896 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
048d0d36
MK
7897
7898 pat = PATTERN (insn);
7899 if (GET_CODE (pat) == COND_EXEC)
7900 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7901 check_pat);
7902
7903 return check_pat;
7904}
7905
7906/* Return nonzero, if X is branchy recovery check. */
7907static int
7908ia64_spec_check_p (rtx x)
7909{
7910 x = PATTERN (x);
7911 if (GET_CODE (x) == COND_EXEC)
7912 x = COND_EXEC_CODE (x);
7913 if (GET_CODE (x) == SET)
7914 return ia64_spec_check_src_p (SET_SRC (x));
7915 return 0;
7916}
7917
7918/* Return nonzero, if SRC belongs to recovery check. */
7919static int
7920ia64_spec_check_src_p (rtx src)
7921{
7922 if (GET_CODE (src) == IF_THEN_ELSE)
7923 {
7924 rtx t;
7925
7926 t = XEXP (src, 0);
7927 if (GET_CODE (t) == NE)
7928 {
7929 t = XEXP (t, 0);
7930
7931 if (GET_CODE (t) == UNSPEC)
7932 {
7933 int code;
7934
7935 code = XINT (t, 1);
7936
388092d5
AB
7937 if (code == UNSPEC_LDCCLR
7938 || code == UNSPEC_LDCNC
7939 || code == UNSPEC_CHKACLR
7940 || code == UNSPEC_CHKANC
7941 || code == UNSPEC_CHKS)
048d0d36
MK
7942 {
7943 gcc_assert (code != 0);
7944 return code;
7945 }
7946 }
7947 }
7948 }
7949 return 0;
7950}
30028c85 7951\f
2130b7fb 7952
30028c85
VM
7953/* The following page contains abstract data `bundle states' which are
7954 used for bundling insns (inserting nops and template generation). */
7955
7956/* The following describes state of insn bundling. */
7957
7958struct bundle_state
7959{
7960 /* Unique bundle state number to identify them in the debugging
7961 output */
7962 int unique_num;
7963 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
7964 /* number nops before and after the insn */
7965 short before_nops_num, after_nops_num;
7966 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7967 insn */
7968 int cost; /* cost of the state in cycles */
7969 int accumulated_insns_num; /* number of all previous insns including
7970 nops. L is considered as 2 insns */
7971 int branch_deviation; /* deviation of previous branches from 3rd slots */
388092d5 7972 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
30028c85
VM
7973 struct bundle_state *next; /* next state with the same insn_num */
7974 struct bundle_state *originator; /* originator (previous insn state) */
7975 /* All bundle states are in the following chain. */
7976 struct bundle_state *allocated_states_chain;
7977 /* The DFA State after issuing the insn and the nops. */
7978 state_t dfa_state;
7979};
2130b7fb 7980
30028c85 7981/* The following is map insn number to the corresponding bundle state. */
2130b7fb 7982
30028c85 7983static struct bundle_state **index_to_bundle_states;
2130b7fb 7984
30028c85 7985/* The unique number of next bundle state. */
2130b7fb 7986
30028c85 7987static int bundle_states_num;
2130b7fb 7988
30028c85 7989/* All allocated bundle states are in the following chain. */
2130b7fb 7990
30028c85 7991static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 7992
30028c85
VM
7993/* All allocated but not used bundle states are in the following
7994 chain. */
870f9ec0 7995
30028c85 7996static struct bundle_state *free_bundle_state_chain;
2130b7fb 7997
2130b7fb 7998
30028c85 7999/* The following function returns a free bundle state. */
2130b7fb 8000
30028c85 8001static struct bundle_state *
9c808aad 8002get_free_bundle_state (void)
30028c85
VM
8003{
8004 struct bundle_state *result;
2130b7fb 8005
30028c85 8006 if (free_bundle_state_chain != NULL)
2130b7fb 8007 {
30028c85
VM
8008 result = free_bundle_state_chain;
8009 free_bundle_state_chain = result->next;
2130b7fb 8010 }
30028c85 8011 else
2130b7fb 8012 {
5ead67f6 8013 result = XNEW (struct bundle_state);
30028c85
VM
8014 result->dfa_state = xmalloc (dfa_state_size);
8015 result->allocated_states_chain = allocated_bundle_states_chain;
8016 allocated_bundle_states_chain = result;
2130b7fb 8017 }
30028c85
VM
8018 result->unique_num = bundle_states_num++;
8019 return result;
9c808aad 8020
30028c85 8021}
2130b7fb 8022
30028c85 8023/* The following function frees given bundle state. */
2130b7fb 8024
30028c85 8025static void
9c808aad 8026free_bundle_state (struct bundle_state *state)
30028c85
VM
8027{
8028 state->next = free_bundle_state_chain;
8029 free_bundle_state_chain = state;
8030}
2130b7fb 8031
30028c85 8032/* Start work with abstract data `bundle states'. */
2130b7fb 8033
30028c85 8034static void
9c808aad 8035initiate_bundle_states (void)
30028c85
VM
8036{
8037 bundle_states_num = 0;
8038 free_bundle_state_chain = NULL;
8039 allocated_bundle_states_chain = NULL;
2130b7fb
BS
8040}
8041
30028c85 8042/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
8043
8044static void
9c808aad 8045finish_bundle_states (void)
2130b7fb 8046{
30028c85
VM
8047 struct bundle_state *curr_state, *next_state;
8048
8049 for (curr_state = allocated_bundle_states_chain;
8050 curr_state != NULL;
8051 curr_state = next_state)
2130b7fb 8052 {
30028c85
VM
8053 next_state = curr_state->allocated_states_chain;
8054 free (curr_state->dfa_state);
8055 free (curr_state);
2130b7fb 8056 }
2130b7fb
BS
8057}
8058
30028c85
VM
8059/* Hash table of the bundle states. The key is dfa_state and insn_num
8060 of the bundle states. */
2130b7fb 8061
30028c85 8062static htab_t bundle_state_table;
2130b7fb 8063
30028c85 8064/* The function returns hash of BUNDLE_STATE. */
2130b7fb 8065
30028c85 8066static unsigned
9c808aad 8067bundle_state_hash (const void *bundle_state)
30028c85 8068{
586de218
KG
8069 const struct bundle_state *const state
8070 = (const struct bundle_state *) bundle_state;
30028c85 8071 unsigned result, i;
2130b7fb 8072
30028c85
VM
8073 for (result = i = 0; i < dfa_state_size; i++)
8074 result += (((unsigned char *) state->dfa_state) [i]
8075 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8076 return result + state->insn_num;
8077}
2130b7fb 8078
30028c85 8079/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 8080
30028c85 8081static int
9c808aad 8082bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
30028c85 8083{
586de218
KG
8084 const struct bundle_state *const state1
8085 = (const struct bundle_state *) bundle_state_1;
8086 const struct bundle_state *const state2
8087 = (const struct bundle_state *) bundle_state_2;
2130b7fb 8088
30028c85
VM
8089 return (state1->insn_num == state2->insn_num
8090 && memcmp (state1->dfa_state, state2->dfa_state,
8091 dfa_state_size) == 0);
8092}
2130b7fb 8093
30028c85
VM
8094/* The function inserts the BUNDLE_STATE into the hash table. The
8095 function returns nonzero if the bundle has been inserted into the
8096 table. The table contains the best bundle state with given key. */
2130b7fb 8097
30028c85 8098static int
9c808aad 8099insert_bundle_state (struct bundle_state *bundle_state)
30028c85
VM
8100{
8101 void **entry_ptr;
2130b7fb 8102
bbbbb16a 8103 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
30028c85
VM
8104 if (*entry_ptr == NULL)
8105 {
8106 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8107 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8108 *entry_ptr = (void *) bundle_state;
8109 return TRUE;
2130b7fb 8110 }
30028c85
VM
8111 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8112 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8113 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8114 > bundle_state->accumulated_insns_num
8115 || (((struct bundle_state *)
8116 *entry_ptr)->accumulated_insns_num
8117 == bundle_state->accumulated_insns_num
388092d5
AB
8118 && (((struct bundle_state *)
8119 *entry_ptr)->branch_deviation
8120 > bundle_state->branch_deviation
8121 || (((struct bundle_state *)
8122 *entry_ptr)->branch_deviation
8123 == bundle_state->branch_deviation
8124 && ((struct bundle_state *)
8125 *entry_ptr)->middle_bundle_stops
8126 > bundle_state->middle_bundle_stops))))))
9c808aad 8127
2130b7fb 8128 {
30028c85
VM
8129 struct bundle_state temp;
8130
8131 temp = *(struct bundle_state *) *entry_ptr;
8132 *(struct bundle_state *) *entry_ptr = *bundle_state;
8133 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8134 *bundle_state = temp;
2130b7fb 8135 }
30028c85
VM
8136 return FALSE;
8137}
2130b7fb 8138
30028c85
VM
8139/* Start work with the hash table. */
8140
8141static void
9c808aad 8142initiate_bundle_state_table (void)
30028c85
VM
8143{
8144 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8145 (htab_del) 0);
2130b7fb
BS
8146}
8147
30028c85 8148/* Finish work with the hash table. */
e4027dab
BS
8149
8150static void
9c808aad 8151finish_bundle_state_table (void)
e4027dab 8152{
30028c85 8153 htab_delete (bundle_state_table);
e4027dab
BS
8154}
8155
30028c85 8156\f
a0a7b566 8157
30028c85
VM
8158/* The following variable is a insn `nop' used to check bundle states
8159 with different number of inserted nops. */
a0a7b566 8160
30028c85 8161static rtx ia64_nop;
a0a7b566 8162
30028c85
VM
8163/* The following function tries to issue NOPS_NUM nops for the current
8164 state without advancing processor cycle. If it failed, the
8165 function returns FALSE and frees the current state. */
8166
8167static int
9c808aad 8168try_issue_nops (struct bundle_state *curr_state, int nops_num)
a0a7b566 8169{
30028c85 8170 int i;
a0a7b566 8171
30028c85
VM
8172 for (i = 0; i < nops_num; i++)
8173 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8174 {
8175 free_bundle_state (curr_state);
8176 return FALSE;
8177 }
8178 return TRUE;
8179}
a0a7b566 8180
30028c85
VM
8181/* The following function tries to issue INSN for the current
8182 state without advancing processor cycle. If it failed, the
8183 function returns FALSE and frees the current state. */
a0a7b566 8184
30028c85 8185static int
9c808aad 8186try_issue_insn (struct bundle_state *curr_state, rtx insn)
30028c85
VM
8187{
8188 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8189 {
8190 free_bundle_state (curr_state);
8191 return FALSE;
8192 }
8193 return TRUE;
8194}
a0a7b566 8195
30028c85
VM
8196/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8197 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
8198 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8199 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8200 If it was successful, the function creates new bundle state and
8201 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 8202
30028c85 8203static void
9c808aad
AJ
8204issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8205 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
30028c85
VM
8206{
8207 struct bundle_state *curr_state;
8208
8209 curr_state = get_free_bundle_state ();
8210 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8211 curr_state->insn = insn;
8212 curr_state->insn_num = originator->insn_num + 1;
8213 curr_state->cost = originator->cost;
8214 curr_state->originator = originator;
8215 curr_state->before_nops_num = before_nops_num;
8216 curr_state->after_nops_num = 0;
8217 curr_state->accumulated_insns_num
8218 = originator->accumulated_insns_num + before_nops_num;
8219 curr_state->branch_deviation = originator->branch_deviation;
388092d5 8220 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
e820471b
NS
8221 gcc_assert (insn);
8222 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
30028c85 8223 {
e820471b 8224 gcc_assert (GET_MODE (insn) != TImode);
30028c85
VM
8225 if (!try_issue_nops (curr_state, before_nops_num))
8226 return;
8227 if (!try_issue_insn (curr_state, insn))
8228 return;
8229 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
388092d5
AB
8230 if (curr_state->accumulated_insns_num % 3 != 0)
8231 curr_state->middle_bundle_stops++;
30028c85
VM
8232 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8233 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 8234 {
30028c85
VM
8235 free_bundle_state (curr_state);
8236 return;
a0a7b566 8237 }
a0a7b566 8238 }
30028c85 8239 else if (GET_MODE (insn) != TImode)
a0a7b566 8240 {
30028c85
VM
8241 if (!try_issue_nops (curr_state, before_nops_num))
8242 return;
8243 if (!try_issue_insn (curr_state, insn))
8244 return;
f32360c7 8245 curr_state->accumulated_insns_num++;
e820471b
NS
8246 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8247 && asm_noperands (PATTERN (insn)) < 0);
8248
30028c85
VM
8249 if (ia64_safe_type (insn) == TYPE_L)
8250 curr_state->accumulated_insns_num++;
8251 }
8252 else
8253 {
68e11b42
JW
8254 /* If this is an insn that must be first in a group, then don't allow
8255 nops to be emitted before it. Currently, alloc is the only such
8256 supported instruction. */
8257 /* ??? The bundling automatons should handle this for us, but they do
8258 not yet have support for the first_insn attribute. */
8259 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8260 {
8261 free_bundle_state (curr_state);
8262 return;
8263 }
8264
30028c85
VM
8265 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8266 state_transition (curr_state->dfa_state, NULL);
8267 curr_state->cost++;
8268 if (!try_issue_nops (curr_state, before_nops_num))
8269 return;
8270 if (!try_issue_insn (curr_state, insn))
8271 return;
f32360c7
VM
8272 curr_state->accumulated_insns_num++;
8273 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8274 || asm_noperands (PATTERN (insn)) >= 0)
8275 {
8276 /* Finish bundle containing asm insn. */
8277 curr_state->after_nops_num
8278 = 3 - curr_state->accumulated_insns_num % 3;
8279 curr_state->accumulated_insns_num
8280 += 3 - curr_state->accumulated_insns_num % 3;
8281 }
8282 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
8283 curr_state->accumulated_insns_num++;
8284 }
8285 if (ia64_safe_type (insn) == TYPE_B)
8286 curr_state->branch_deviation
8287 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8288 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8289 {
f32360c7 8290 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 8291 {
30028c85
VM
8292 state_t dfa_state;
8293 struct bundle_state *curr_state1;
8294 struct bundle_state *allocated_states_chain;
8295
8296 curr_state1 = get_free_bundle_state ();
8297 dfa_state = curr_state1->dfa_state;
8298 allocated_states_chain = curr_state1->allocated_states_chain;
8299 *curr_state1 = *curr_state;
8300 curr_state1->dfa_state = dfa_state;
8301 curr_state1->allocated_states_chain = allocated_states_chain;
8302 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8303 dfa_state_size);
8304 curr_state = curr_state1;
a0a7b566 8305 }
30028c85
VM
8306 if (!try_issue_nops (curr_state,
8307 3 - curr_state->accumulated_insns_num % 3))
8308 return;
8309 curr_state->after_nops_num
8310 = 3 - curr_state->accumulated_insns_num % 3;
8311 curr_state->accumulated_insns_num
8312 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 8313 }
30028c85
VM
8314 if (!insert_bundle_state (curr_state))
8315 free_bundle_state (curr_state);
8316 return;
8317}
e013f3c7 8318
30028c85
VM
8319/* The following function returns position in the two window bundle
8320 for given STATE. */
8321
8322static int
9c808aad 8323get_max_pos (state_t state)
30028c85
VM
8324{
8325 if (cpu_unit_reservation_p (state, pos_6))
8326 return 6;
8327 else if (cpu_unit_reservation_p (state, pos_5))
8328 return 5;
8329 else if (cpu_unit_reservation_p (state, pos_4))
8330 return 4;
8331 else if (cpu_unit_reservation_p (state, pos_3))
8332 return 3;
8333 else if (cpu_unit_reservation_p (state, pos_2))
8334 return 2;
8335 else if (cpu_unit_reservation_p (state, pos_1))
8336 return 1;
8337 else
8338 return 0;
a0a7b566
BS
8339}
8340
30028c85
VM
8341/* The function returns code of a possible template for given position
8342 and state. The function should be called only with 2 values of
96ddf8ef
VM
8343 position equal to 3 or 6. We avoid generating F NOPs by putting
8344 templates containing F insns at the end of the template search
8345 because undocumented anomaly in McKinley derived cores which can
8346 cause stalls if an F-unit insn (including a NOP) is issued within a
8347 six-cycle window after reading certain application registers (such
8348 as ar.bsp). Furthermore, power-considerations also argue against
8349 the use of F-unit instructions unless they're really needed. */
2130b7fb 8350
c237e94a 8351static int
9c808aad 8352get_template (state_t state, int pos)
2130b7fb 8353{
30028c85 8354 switch (pos)
2130b7fb 8355 {
30028c85 8356 case 3:
96ddf8ef 8357 if (cpu_unit_reservation_p (state, _0mmi_))
30028c85 8358 return 1;
96ddf8ef
VM
8359 else if (cpu_unit_reservation_p (state, _0mii_))
8360 return 0;
30028c85
VM
8361 else if (cpu_unit_reservation_p (state, _0mmb_))
8362 return 7;
96ddf8ef
VM
8363 else if (cpu_unit_reservation_p (state, _0mib_))
8364 return 6;
8365 else if (cpu_unit_reservation_p (state, _0mbb_))
8366 return 5;
8367 else if (cpu_unit_reservation_p (state, _0bbb_))
8368 return 4;
8369 else if (cpu_unit_reservation_p (state, _0mmf_))
8370 return 3;
8371 else if (cpu_unit_reservation_p (state, _0mfi_))
8372 return 2;
30028c85
VM
8373 else if (cpu_unit_reservation_p (state, _0mfb_))
8374 return 8;
8375 else if (cpu_unit_reservation_p (state, _0mlx_))
8376 return 9;
8377 else
e820471b 8378 gcc_unreachable ();
30028c85 8379 case 6:
96ddf8ef 8380 if (cpu_unit_reservation_p (state, _1mmi_))
30028c85 8381 return 1;
96ddf8ef
VM
8382 else if (cpu_unit_reservation_p (state, _1mii_))
8383 return 0;
30028c85
VM
8384 else if (cpu_unit_reservation_p (state, _1mmb_))
8385 return 7;
96ddf8ef
VM
8386 else if (cpu_unit_reservation_p (state, _1mib_))
8387 return 6;
8388 else if (cpu_unit_reservation_p (state, _1mbb_))
8389 return 5;
8390 else if (cpu_unit_reservation_p (state, _1bbb_))
8391 return 4;
8392 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8393 return 3;
8394 else if (cpu_unit_reservation_p (state, _1mfi_))
8395 return 2;
30028c85
VM
8396 else if (cpu_unit_reservation_p (state, _1mfb_))
8397 return 8;
8398 else if (cpu_unit_reservation_p (state, _1mlx_))
8399 return 9;
8400 else
e820471b 8401 gcc_unreachable ();
30028c85 8402 default:
e820471b 8403 gcc_unreachable ();
2130b7fb 8404 }
30028c85 8405}
2130b7fb 8406
388092d5
AB
8407/* True when INSN is important for bundling. */
8408static bool
8409important_for_bundling_p (rtx insn)
8410{
8411 return (INSN_P (insn)
8412 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8413 && GET_CODE (PATTERN (insn)) != USE
8414 && GET_CODE (PATTERN (insn)) != CLOBBER);
8415}
8416
30028c85
VM
8417/* The following function returns an insn important for insn bundling
8418 followed by INSN and before TAIL. */
a0a7b566 8419
30028c85 8420static rtx
9c808aad 8421get_next_important_insn (rtx insn, rtx tail)
30028c85
VM
8422{
8423 for (; insn && insn != tail; insn = NEXT_INSN (insn))
388092d5 8424 if (important_for_bundling_p (insn))
30028c85
VM
8425 return insn;
8426 return NULL_RTX;
8427}
8428
4a4cd49c
JJ
8429/* Add a bundle selector TEMPLATE0 before INSN. */
8430
8431static void
8432ia64_add_bundle_selector_before (int template0, rtx insn)
8433{
8434 rtx b = gen_bundle_selector (GEN_INT (template0));
8435
8436 ia64_emit_insn_before (b, insn);
8437#if NR_BUNDLES == 10
8438 if ((template0 == 4 || template0 == 5)
8439 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
8440 {
8441 int i;
8442 rtx note = NULL_RTX;
8443
8444 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8445 first or second slot. If it is and has REG_EH_NOTE set, copy it
8446 to following nops, as br.call sets rp to the address of following
8447 bundle and therefore an EH region end must be on a bundle
8448 boundary. */
8449 insn = PREV_INSN (insn);
8450 for (i = 0; i < 3; i++)
8451 {
8452 do
8453 insn = next_active_insn (insn);
8454 while (GET_CODE (insn) == INSN
8455 && get_attr_empty (insn) == EMPTY_YES);
8456 if (GET_CODE (insn) == CALL_INSN)
8457 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8458 else if (note)
8459 {
8460 int code;
8461
8462 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8463 || code == CODE_FOR_nop_b);
8464 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8465 note = NULL_RTX;
8466 else
bbbbb16a 8467 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
4a4cd49c
JJ
8468 }
8469 }
8470 }
8471#endif
8472}
8473
c856f536
VM
8474/* The following function does insn bundling. Bundling means
8475 inserting templates and nop insns to fit insn groups into permitted
8476 templates. Instruction scheduling uses NDFA (non-deterministic
8477 finite automata) encoding informations about the templates and the
8478 inserted nops. Nondeterminism of the automata permits follows
8479 all possible insn sequences very fast.
8480
8481 Unfortunately it is not possible to get information about inserting
8482 nop insns and used templates from the automata states. The
8483 automata only says that we can issue an insn possibly inserting
8484 some nops before it and using some template. Therefore insn
8485 bundling in this function is implemented by using DFA
048d0d36 8486 (deterministic finite automata). We follow all possible insn
c856f536
VM
8487 sequences by inserting 0-2 nops (that is what the NDFA describe for
8488 insn scheduling) before/after each insn being bundled. We know the
8489 start of simulated processor cycle from insn scheduling (insn
8490 starting a new cycle has TImode).
8491
8492 Simple implementation of insn bundling would create enormous
8493 number of possible insn sequences satisfying information about new
8494 cycle ticks taken from the insn scheduling. To make the algorithm
8495 practical we use dynamic programming. Each decision (about
8496 inserting nops and implicitly about previous decisions) is described
8497 by structure bundle_state (see above). If we generate the same
8498 bundle state (key is automaton state after issuing the insns and
8499 nops for it), we reuse already generated one. As consequence we
1e5f1716 8500 reject some decisions which cannot improve the solution and
c856f536
VM
8501 reduce memory for the algorithm.
8502
8503 When we reach the end of EBB (extended basic block), we choose the
8504 best sequence and then, moving back in EBB, insert templates for
8505 the best alternative. The templates are taken from querying
8506 automaton state for each insn in chosen bundle states.
8507
8508 So the algorithm makes two (forward and backward) passes through
7400e46b 8509 EBB. */
a0a7b566 8510
30028c85 8511static void
9c808aad 8512bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
30028c85
VM
8513{
8514 struct bundle_state *curr_state, *next_state, *best_state;
8515 rtx insn, next_insn;
8516 int insn_num;
f32360c7 8517 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 8518 int pos = 0, max_pos, template0, template1;
30028c85
VM
8519 rtx b;
8520 rtx nop;
8521 enum attr_type type;
2d1b811d 8522
30028c85 8523 insn_num = 0;
c856f536 8524 /* Count insns in the EBB. */
30028c85
VM
8525 for (insn = NEXT_INSN (prev_head_insn);
8526 insn && insn != tail;
8527 insn = NEXT_INSN (insn))
8528 if (INSN_P (insn))
8529 insn_num++;
8530 if (insn_num == 0)
8531 return;
8532 bundling_p = 1;
8533 dfa_clean_insn_cache ();
8534 initiate_bundle_state_table ();
5ead67f6 8535 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
ff482c8d 8536 /* First (forward) pass -- generation of bundle states. */
30028c85
VM
8537 curr_state = get_free_bundle_state ();
8538 curr_state->insn = NULL;
8539 curr_state->before_nops_num = 0;
8540 curr_state->after_nops_num = 0;
8541 curr_state->insn_num = 0;
8542 curr_state->cost = 0;
8543 curr_state->accumulated_insns_num = 0;
8544 curr_state->branch_deviation = 0;
388092d5 8545 curr_state->middle_bundle_stops = 0;
30028c85
VM
8546 curr_state->next = NULL;
8547 curr_state->originator = NULL;
8548 state_reset (curr_state->dfa_state);
8549 index_to_bundle_states [0] = curr_state;
8550 insn_num = 0;
c856f536 8551 /* Shift cycle mark if it is put on insn which could be ignored. */
30028c85
VM
8552 for (insn = NEXT_INSN (prev_head_insn);
8553 insn != tail;
8554 insn = NEXT_INSN (insn))
8555 if (INSN_P (insn)
8556 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8557 || GET_CODE (PATTERN (insn)) == USE
8558 || GET_CODE (PATTERN (insn)) == CLOBBER)
8559 && GET_MODE (insn) == TImode)
2130b7fb 8560 {
30028c85
VM
8561 PUT_MODE (insn, VOIDmode);
8562 for (next_insn = NEXT_INSN (insn);
8563 next_insn != tail;
8564 next_insn = NEXT_INSN (next_insn))
8565 if (INSN_P (next_insn)
8566 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8567 && GET_CODE (PATTERN (next_insn)) != USE
388092d5
AB
8568 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8569 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
30028c85
VM
8570 {
8571 PUT_MODE (next_insn, TImode);
8572 break;
8573 }
2130b7fb 8574 }
048d0d36 8575 /* Forward pass: generation of bundle states. */
30028c85
VM
8576 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8577 insn != NULL_RTX;
8578 insn = next_insn)
1ad72cef 8579 {
e820471b
NS
8580 gcc_assert (INSN_P (insn)
8581 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8582 && GET_CODE (PATTERN (insn)) != USE
8583 && GET_CODE (PATTERN (insn)) != CLOBBER);
f32360c7 8584 type = ia64_safe_type (insn);
30028c85
VM
8585 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8586 insn_num++;
8587 index_to_bundle_states [insn_num] = NULL;
8588 for (curr_state = index_to_bundle_states [insn_num - 1];
8589 curr_state != NULL;
8590 curr_state = next_state)
f83594c4 8591 {
30028c85 8592 pos = curr_state->accumulated_insns_num % 3;
30028c85 8593 next_state = curr_state->next;
c856f536
VM
8594 /* We must fill up the current bundle in order to start a
8595 subsequent asm insn in a new bundle. Asm insn is always
8596 placed in a separate bundle. */
f32360c7
VM
8597 only_bundle_end_p
8598 = (next_insn != NULL_RTX
8599 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8600 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
c856f536
VM
8601 /* We may fill up the current bundle if it is the cycle end
8602 without a group barrier. */
30028c85 8603 bundle_end_p
f32360c7 8604 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
8605 || (GET_MODE (next_insn) == TImode
8606 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8607 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7400e46b 8608 || type == TYPE_S)
f32360c7
VM
8609 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8610 only_bundle_end_p);
8611 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8612 only_bundle_end_p);
8613 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8614 only_bundle_end_p);
f83594c4 8615 }
e820471b 8616 gcc_assert (index_to_bundle_states [insn_num]);
30028c85
VM
8617 for (curr_state = index_to_bundle_states [insn_num];
8618 curr_state != NULL;
8619 curr_state = curr_state->next)
8620 if (verbose >= 2 && dump)
8621 {
c856f536
VM
8622 /* This structure is taken from generated code of the
8623 pipeline hazard recognizer (see file insn-attrtab.c).
8624 Please don't forget to change the structure if a new
8625 automaton is added to .md file. */
30028c85
VM
8626 struct DFA_chip
8627 {
8628 unsigned short one_automaton_state;
8629 unsigned short oneb_automaton_state;
8630 unsigned short two_automaton_state;
8631 unsigned short twob_automaton_state;
8632 };
9c808aad 8633
30028c85
VM
8634 fprintf
8635 (dump,
388092d5 8636 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
30028c85
VM
8637 curr_state->unique_num,
8638 (curr_state->originator == NULL
8639 ? -1 : curr_state->originator->unique_num),
8640 curr_state->cost,
8641 curr_state->before_nops_num, curr_state->after_nops_num,
8642 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 8643 curr_state->middle_bundle_stops,
7400e46b 8644 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85
VM
8645 INSN_UID (insn));
8646 }
1ad72cef 8647 }
e820471b
NS
8648
8649 /* We should find a solution because the 2nd insn scheduling has
8650 found one. */
8651 gcc_assert (index_to_bundle_states [insn_num]);
c856f536 8652 /* Find a state corresponding to the best insn sequence. */
30028c85
VM
8653 best_state = NULL;
8654 for (curr_state = index_to_bundle_states [insn_num];
8655 curr_state != NULL;
8656 curr_state = curr_state->next)
c856f536
VM
8657 /* We are just looking at the states with fully filled up last
8658 bundle. The first we prefer insn sequences with minimal cost
8659 then with minimal inserted nops and finally with branch insns
8660 placed in the 3rd slots. */
30028c85
VM
8661 if (curr_state->accumulated_insns_num % 3 == 0
8662 && (best_state == NULL || best_state->cost > curr_state->cost
8663 || (best_state->cost == curr_state->cost
8664 && (curr_state->accumulated_insns_num
8665 < best_state->accumulated_insns_num
8666 || (curr_state->accumulated_insns_num
8667 == best_state->accumulated_insns_num
388092d5
AB
8668 && (curr_state->branch_deviation
8669 < best_state->branch_deviation
8670 || (curr_state->branch_deviation
8671 == best_state->branch_deviation
8672 && curr_state->middle_bundle_stops
8673 < best_state->middle_bundle_stops)))))))
30028c85 8674 best_state = curr_state;
c856f536 8675 /* Second (backward) pass: adding nops and templates. */
388092d5 8676 gcc_assert (best_state);
30028c85
VM
8677 insn_num = best_state->before_nops_num;
8678 template0 = template1 = -1;
8679 for (curr_state = best_state;
8680 curr_state->originator != NULL;
8681 curr_state = curr_state->originator)
8682 {
8683 insn = curr_state->insn;
f32360c7
VM
8684 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8685 || asm_noperands (PATTERN (insn)) >= 0);
30028c85
VM
8686 insn_num++;
8687 if (verbose >= 2 && dump)
2130b7fb 8688 {
30028c85
VM
8689 struct DFA_chip
8690 {
8691 unsigned short one_automaton_state;
8692 unsigned short oneb_automaton_state;
8693 unsigned short two_automaton_state;
8694 unsigned short twob_automaton_state;
8695 };
9c808aad 8696
30028c85
VM
8697 fprintf
8698 (dump,
388092d5 8699 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
30028c85
VM
8700 curr_state->unique_num,
8701 (curr_state->originator == NULL
8702 ? -1 : curr_state->originator->unique_num),
8703 curr_state->cost,
8704 curr_state->before_nops_num, curr_state->after_nops_num,
8705 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 8706 curr_state->middle_bundle_stops,
7400e46b 8707 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85 8708 INSN_UID (insn));
2130b7fb 8709 }
c856f536
VM
8710 /* Find the position in the current bundle window. The window can
8711 contain at most two bundles. Two bundle window means that
8712 the processor will make two bundle rotation. */
30028c85 8713 max_pos = get_max_pos (curr_state->dfa_state);
c856f536
VM
8714 if (max_pos == 6
8715 /* The following (negative template number) means that the
8716 processor did one bundle rotation. */
8717 || (max_pos == 3 && template0 < 0))
2130b7fb 8718 {
c856f536
VM
8719 /* We are at the end of the window -- find template(s) for
8720 its bundle(s). */
30028c85
VM
8721 pos = max_pos;
8722 if (max_pos == 3)
8723 template0 = get_template (curr_state->dfa_state, 3);
8724 else
8725 {
8726 template1 = get_template (curr_state->dfa_state, 3);
8727 template0 = get_template (curr_state->dfa_state, 6);
8728 }
8729 }
8730 if (max_pos > 3 && template1 < 0)
c856f536 8731 /* It may happen when we have the stop inside a bundle. */
30028c85 8732 {
e820471b 8733 gcc_assert (pos <= 3);
30028c85
VM
8734 template1 = get_template (curr_state->dfa_state, 3);
8735 pos += 3;
8736 }
f32360c7 8737 if (!asm_p)
c856f536 8738 /* Emit nops after the current insn. */
f32360c7
VM
8739 for (i = 0; i < curr_state->after_nops_num; i++)
8740 {
8741 nop = gen_nop ();
8742 emit_insn_after (nop, insn);
8743 pos--;
e820471b 8744 gcc_assert (pos >= 0);
f32360c7
VM
8745 if (pos % 3 == 0)
8746 {
c856f536
VM
8747 /* We are at the start of a bundle: emit the template
8748 (it should be defined). */
e820471b 8749 gcc_assert (template0 >= 0);
4a4cd49c 8750 ia64_add_bundle_selector_before (template0, nop);
c856f536
VM
8751 /* If we have two bundle window, we make one bundle
8752 rotation. Otherwise template0 will be undefined
8753 (negative value). */
f32360c7
VM
8754 template0 = template1;
8755 template1 = -1;
8756 }
8757 }
c856f536
VM
8758 /* Move the position backward in the window. Group barrier has
8759 no slot. Asm insn takes all bundle. */
30028c85
VM
8760 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8761 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8762 && asm_noperands (PATTERN (insn)) < 0)
8763 pos--;
c856f536 8764 /* Long insn takes 2 slots. */
30028c85
VM
8765 if (ia64_safe_type (insn) == TYPE_L)
8766 pos--;
e820471b 8767 gcc_assert (pos >= 0);
30028c85
VM
8768 if (pos % 3 == 0
8769 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8770 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8771 && asm_noperands (PATTERN (insn)) < 0)
8772 {
c856f536
VM
8773 /* The current insn is at the bundle start: emit the
8774 template. */
e820471b 8775 gcc_assert (template0 >= 0);
4a4cd49c 8776 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
8777 b = PREV_INSN (insn);
8778 insn = b;
68776c43 8779 /* See comment above in analogous place for emitting nops
c856f536 8780 after the insn. */
30028c85
VM
8781 template0 = template1;
8782 template1 = -1;
8783 }
c856f536 8784 /* Emit nops after the current insn. */
30028c85
VM
8785 for (i = 0; i < curr_state->before_nops_num; i++)
8786 {
8787 nop = gen_nop ();
8788 ia64_emit_insn_before (nop, insn);
8789 nop = PREV_INSN (insn);
8790 insn = nop;
8791 pos--;
e820471b 8792 gcc_assert (pos >= 0);
30028c85
VM
8793 if (pos % 3 == 0)
8794 {
68776c43 8795 /* See comment above in analogous place for emitting nops
c856f536 8796 after the insn. */
e820471b 8797 gcc_assert (template0 >= 0);
4a4cd49c 8798 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
8799 b = PREV_INSN (insn);
8800 insn = b;
8801 template0 = template1;
8802 template1 = -1;
8803 }
2130b7fb
BS
8804 }
8805 }
388092d5
AB
8806
8807#ifdef ENABLE_CHECKING
8808 {
8809 /* Assert right calculation of middle_bundle_stops. */
8810 int num = best_state->middle_bundle_stops;
8811 bool start_bundle = true, end_bundle = false;
8812
8813 for (insn = NEXT_INSN (prev_head_insn);
8814 insn && insn != tail;
8815 insn = NEXT_INSN (insn))
8816 {
8817 if (!INSN_P (insn))
8818 continue;
8819 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8820 start_bundle = true;
8821 else
8822 {
8823 rtx next_insn;
8824
8825 for (next_insn = NEXT_INSN (insn);
8826 next_insn && next_insn != tail;
8827 next_insn = NEXT_INSN (next_insn))
8828 if (INSN_P (next_insn)
8829 && (ia64_safe_itanium_class (next_insn)
8830 != ITANIUM_CLASS_IGNORE
8831 || recog_memoized (next_insn)
8832 == CODE_FOR_bundle_selector)
8833 && GET_CODE (PATTERN (next_insn)) != USE
8834 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8835 break;
8836
8837 end_bundle = next_insn == NULL_RTX
8838 || next_insn == tail
8839 || (INSN_P (next_insn)
8840 && recog_memoized (next_insn)
8841 == CODE_FOR_bundle_selector);
8842 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8843 && !start_bundle && !end_bundle
8844 && next_insn
8845 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8846 && asm_noperands (PATTERN (next_insn)) < 0)
8847 num--;
8848
8849 start_bundle = false;
8850 }
8851 }
8852
8853 gcc_assert (num == 0);
8854 }
8855#endif
8856
30028c85
VM
8857 free (index_to_bundle_states);
8858 finish_bundle_state_table ();
8859 bundling_p = 0;
8860 dfa_clean_insn_cache ();
2130b7fb 8861}
c65ebc55 8862
30028c85
VM
8863/* The following function is called at the end of scheduling BB or
8864 EBB. After reload, it inserts stop bits and does insn bundling. */
8865
8866static void
9c808aad 8867ia64_sched_finish (FILE *dump, int sched_verbose)
c237e94a 8868{
30028c85
VM
8869 if (sched_verbose)
8870 fprintf (dump, "// Finishing schedule.\n");
8871 if (!reload_completed)
8872 return;
8873 if (reload_completed)
8874 {
8875 final_emit_insn_group_barriers (dump);
8876 bundling (dump, sched_verbose, current_sched_info->prev_head,
8877 current_sched_info->next_tail);
8878 if (sched_verbose && dump)
8879 fprintf (dump, "// finishing %d-%d\n",
8880 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8881 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9c808aad 8882
30028c85
VM
8883 return;
8884 }
c237e94a
ZW
8885}
8886
30028c85 8887/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 8888
30028c85 8889static void
9c808aad 8890final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
2130b7fb 8891{
30028c85
VM
8892 rtx insn;
8893 int need_barrier_p = 0;
388092d5 8894 int seen_good_insn = 0;
30028c85 8895 rtx prev_insn = NULL_RTX;
2130b7fb 8896
30028c85 8897 init_insn_group_barriers ();
2130b7fb 8898
30028c85
VM
8899 for (insn = NEXT_INSN (current_sched_info->prev_head);
8900 insn != current_sched_info->next_tail;
8901 insn = NEXT_INSN (insn))
8902 {
8903 if (GET_CODE (insn) == BARRIER)
b395ddbe 8904 {
30028c85 8905 rtx last = prev_active_insn (insn);
14d118d6 8906
30028c85 8907 if (! last)
b395ddbe 8908 continue;
30028c85
VM
8909 if (GET_CODE (last) == JUMP_INSN
8910 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8911 last = prev_active_insn (last);
8912 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8913 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 8914
30028c85 8915 init_insn_group_barriers ();
388092d5 8916 seen_good_insn = 0;
30028c85
VM
8917 need_barrier_p = 0;
8918 prev_insn = NULL_RTX;
b395ddbe 8919 }
b5b8b0ac 8920 else if (NONDEBUG_INSN_P (insn))
2130b7fb 8921 {
30028c85 8922 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 8923 {
30028c85 8924 init_insn_group_barriers ();
388092d5 8925 seen_good_insn = 0;
30028c85
VM
8926 need_barrier_p = 0;
8927 prev_insn = NULL_RTX;
c65ebc55 8928 }
388092d5
AB
8929 else if (need_barrier_p || group_barrier_needed (insn)
8930 || (mflag_sched_stop_bits_after_every_cycle
8931 && GET_MODE (insn) == TImode
8932 && seen_good_insn))
2130b7fb 8933 {
30028c85
VM
8934 if (TARGET_EARLY_STOP_BITS)
8935 {
8936 rtx last;
9c808aad 8937
30028c85
VM
8938 for (last = insn;
8939 last != current_sched_info->prev_head;
8940 last = PREV_INSN (last))
8941 if (INSN_P (last) && GET_MODE (last) == TImode
8942 && stops_p [INSN_UID (last)])
8943 break;
8944 if (last == current_sched_info->prev_head)
8945 last = insn;
8946 last = prev_active_insn (last);
8947 if (last
8948 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8949 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8950 last);
8951 init_insn_group_barriers ();
8952 for (last = NEXT_INSN (last);
8953 last != insn;
8954 last = NEXT_INSN (last))
8955 if (INSN_P (last))
388092d5
AB
8956 {
8957 group_barrier_needed (last);
8958 if (recog_memoized (last) >= 0
8959 && important_for_bundling_p (last))
8960 seen_good_insn = 1;
8961 }
30028c85
VM
8962 }
8963 else
8964 {
8965 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8966 insn);
8967 init_insn_group_barriers ();
388092d5 8968 seen_good_insn = 0;
30028c85 8969 }
c1bc6ca8 8970 group_barrier_needed (insn);
388092d5
AB
8971 if (recog_memoized (insn) >= 0
8972 && important_for_bundling_p (insn))
8973 seen_good_insn = 1;
30028c85 8974 prev_insn = NULL_RTX;
2130b7fb 8975 }
388092d5
AB
8976 else if (recog_memoized (insn) >= 0
8977 && important_for_bundling_p (insn))
8978 {
8979 prev_insn = insn;
8980 seen_good_insn = 1;
8981 }
30028c85
VM
8982 need_barrier_p = (GET_CODE (insn) == CALL_INSN
8983 || GET_CODE (PATTERN (insn)) == ASM_INPUT
8984 || asm_noperands (PATTERN (insn)) >= 0);
c65ebc55 8985 }
2130b7fb 8986 }
30028c85 8987}
2130b7fb 8988
30028c85 8989\f
2130b7fb 8990
a4d05547 8991/* If the following function returns TRUE, we will use the DFA
30028c85 8992 insn scheduler. */
2130b7fb 8993
c237e94a 8994static int
9c808aad 8995ia64_first_cycle_multipass_dfa_lookahead (void)
2130b7fb 8996{
30028c85
VM
8997 return (reload_completed ? 6 : 4);
8998}
2130b7fb 8999
30028c85 9000/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 9001
30028c85 9002static void
9c808aad 9003ia64_init_dfa_pre_cycle_insn (void)
30028c85
VM
9004{
9005 if (temp_dfa_state == NULL)
2130b7fb 9006 {
30028c85
VM
9007 dfa_state_size = state_size ();
9008 temp_dfa_state = xmalloc (dfa_state_size);
9009 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 9010 }
30028c85
VM
9011 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9012 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9013 recog_memoized (dfa_pre_cycle_insn);
9014 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9015 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9016 recog_memoized (dfa_stop_insn);
9017}
2130b7fb 9018
30028c85
VM
9019/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9020 used by the DFA insn scheduler. */
2130b7fb 9021
30028c85 9022static rtx
9c808aad 9023ia64_dfa_pre_cycle_insn (void)
30028c85
VM
9024{
9025 return dfa_pre_cycle_insn;
9026}
2130b7fb 9027
30028c85
VM
9028/* The following function returns TRUE if PRODUCER (of type ilog or
9029 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 9030
30028c85 9031int
9c808aad 9032ia64_st_address_bypass_p (rtx producer, rtx consumer)
30028c85
VM
9033{
9034 rtx dest, reg, mem;
2130b7fb 9035
e820471b 9036 gcc_assert (producer && consumer);
30028c85 9037 dest = ia64_single_set (producer);
e820471b
NS
9038 gcc_assert (dest);
9039 reg = SET_DEST (dest);
9040 gcc_assert (reg);
30028c85
VM
9041 if (GET_CODE (reg) == SUBREG)
9042 reg = SUBREG_REG (reg);
e820471b
NS
9043 gcc_assert (GET_CODE (reg) == REG);
9044
30028c85 9045 dest = ia64_single_set (consumer);
e820471b
NS
9046 gcc_assert (dest);
9047 mem = SET_DEST (dest);
9048 gcc_assert (mem && GET_CODE (mem) == MEM);
30028c85 9049 return reg_mentioned_p (reg, mem);
2130b7fb
BS
9050}
9051
30028c85
VM
9052/* The following function returns TRUE if PRODUCER (of type ilog or
9053 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 9054
30028c85 9055int
9c808aad 9056ia64_ld_address_bypass_p (rtx producer, rtx consumer)
2130b7fb 9057{
30028c85
VM
9058 rtx dest, src, reg, mem;
9059
e820471b 9060 gcc_assert (producer && consumer);
30028c85 9061 dest = ia64_single_set (producer);
e820471b
NS
9062 gcc_assert (dest);
9063 reg = SET_DEST (dest);
9064 gcc_assert (reg);
30028c85
VM
9065 if (GET_CODE (reg) == SUBREG)
9066 reg = SUBREG_REG (reg);
e820471b
NS
9067 gcc_assert (GET_CODE (reg) == REG);
9068
30028c85 9069 src = ia64_single_set (consumer);
e820471b
NS
9070 gcc_assert (src);
9071 mem = SET_SRC (src);
9072 gcc_assert (mem);
048d0d36 9073
30028c85
VM
9074 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9075 mem = XVECEXP (mem, 0, 0);
048d0d36 9076 else if (GET_CODE (mem) == IF_THEN_ELSE)
917f1b7e 9077 /* ??? Is this bypass necessary for ld.c? */
048d0d36
MK
9078 {
9079 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9080 mem = XEXP (mem, 1);
9081 }
9082
30028c85
VM
9083 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9084 mem = XEXP (mem, 0);
ef1ecf87 9085
048d0d36
MK
9086 if (GET_CODE (mem) == UNSPEC)
9087 {
9088 int c = XINT (mem, 1);
9089
388092d5
AB
9090 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9091 || c == UNSPEC_LDSA);
048d0d36
MK
9092 mem = XVECEXP (mem, 0, 0);
9093 }
9094
ef1ecf87 9095 /* Note that LO_SUM is used for GOT loads. */
e820471b 9096 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
ef1ecf87 9097
30028c85
VM
9098 return reg_mentioned_p (reg, mem);
9099}
9100
9101/* The following function returns TRUE if INSN produces address for a
9102 load/store insn. We will place such insns into M slot because it
ff482c8d 9103 decreases its latency time. */
30028c85
VM
9104
9105int
9c808aad 9106ia64_produce_address_p (rtx insn)
30028c85
VM
9107{
9108 return insn->call;
2130b7fb 9109}
30028c85 9110
2130b7fb 9111\f
3b572406
RH
9112/* Emit pseudo-ops for the assembler to describe predicate relations.
9113 At present this assumes that we only consider predicate pairs to
9114 be mutex, and that the assembler can deduce proper values from
9115 straight-line code. */
9116
9117static void
9c808aad 9118emit_predicate_relation_info (void)
3b572406 9119{
e0082a72 9120 basic_block bb;
3b572406 9121
e0082a72 9122 FOR_EACH_BB_REVERSE (bb)
3b572406 9123 {
3b572406 9124 int r;
a813c111 9125 rtx head = BB_HEAD (bb);
3b572406
RH
9126
9127 /* We only need such notes at code labels. */
9128 if (GET_CODE (head) != CODE_LABEL)
9129 continue;
740aeb38 9130 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
3b572406
RH
9131 head = NEXT_INSN (head);
9132
9f3b8452
RH
9133 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9134 grabbing the entire block of predicate registers. */
9135 for (r = PR_REG (2); r < PR_REG (64); r += 2)
6fb5fa3c 9136 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
3b572406 9137 {
f2f90c63 9138 rtx p = gen_rtx_REG (BImode, r);
054451ea 9139 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
a813c111
SB
9140 if (head == BB_END (bb))
9141 BB_END (bb) = n;
3b572406
RH
9142 head = n;
9143 }
9144 }
ca3920ad
JW
9145
9146 /* Look for conditional calls that do not return, and protect predicate
9147 relations around them. Otherwise the assembler will assume the call
9148 returns, and complain about uses of call-clobbered predicates after
9149 the call. */
e0082a72 9150 FOR_EACH_BB_REVERSE (bb)
ca3920ad 9151 {
a813c111 9152 rtx insn = BB_HEAD (bb);
9c808aad 9153
ca3920ad
JW
9154 while (1)
9155 {
9156 if (GET_CODE (insn) == CALL_INSN
9157 && GET_CODE (PATTERN (insn)) == COND_EXEC
9158 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9159 {
9160 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9161 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
a813c111
SB
9162 if (BB_HEAD (bb) == insn)
9163 BB_HEAD (bb) = b;
9164 if (BB_END (bb) == insn)
9165 BB_END (bb) = a;
ca3920ad 9166 }
9c808aad 9167
a813c111 9168 if (insn == BB_END (bb))
ca3920ad
JW
9169 break;
9170 insn = NEXT_INSN (insn);
9171 }
9172 }
3b572406
RH
9173}
9174
c65ebc55
JW
9175/* Perform machine dependent operations on the rtl chain INSNS. */
9176
18dbd950 9177static void
9c808aad 9178ia64_reorg (void)
c65ebc55 9179{
1e3881c2
JH
9180 /* We are freeing block_for_insn in the toplev to keep compatibility
9181 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 9182 compute_bb_for_insn ();
a00fe19f
RH
9183
9184 /* If optimizing, we'll have split before scheduling. */
9185 if (optimize == 0)
6fb5fa3c 9186 split_all_insns ();
2130b7fb 9187
388092d5
AB
9188 if (optimize && ia64_flag_schedule_insns2
9189 && dbg_cnt (ia64_sched2))
f4d578da 9190 {
eced69b5 9191 timevar_push (TV_SCHED2);
f4d578da 9192 ia64_final_schedule = 1;
30028c85
VM
9193
9194 initiate_bundle_states ();
9195 ia64_nop = make_insn_raw (gen_nop ());
9196 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9197 recog_memoized (ia64_nop);
9198 clocks_length = get_max_uid () + 1;
5ead67f6 9199 stops_p = XCNEWVEC (char, clocks_length);
7400e46b 9200
30028c85
VM
9201 if (ia64_tune == PROCESSOR_ITANIUM2)
9202 {
9203 pos_1 = get_cpu_unit_code ("2_1");
9204 pos_2 = get_cpu_unit_code ("2_2");
9205 pos_3 = get_cpu_unit_code ("2_3");
9206 pos_4 = get_cpu_unit_code ("2_4");
9207 pos_5 = get_cpu_unit_code ("2_5");
9208 pos_6 = get_cpu_unit_code ("2_6");
9209 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9210 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9211 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9212 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9213 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9214 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9215 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9216 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9217 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9218 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9219 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9220 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9221 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9222 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9223 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9224 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9225 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9226 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9227 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9228 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9229 }
9230 else
9231 {
9232 pos_1 = get_cpu_unit_code ("1_1");
9233 pos_2 = get_cpu_unit_code ("1_2");
9234 pos_3 = get_cpu_unit_code ("1_3");
9235 pos_4 = get_cpu_unit_code ("1_4");
9236 pos_5 = get_cpu_unit_code ("1_5");
9237 pos_6 = get_cpu_unit_code ("1_6");
9238 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9239 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9240 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9241 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9242 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9243 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9244 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9245 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9246 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9247 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9248 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9249 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9250 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9251 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9252 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9253 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9254 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9255 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9256 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9257 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9258 }
388092d5
AB
9259
9260 if (flag_selective_scheduling2
9261 && !maybe_skip_selective_scheduling ())
9262 run_selective_scheduling ();
9263 else
9264 schedule_ebbs ();
9265
9266 /* Redo alignment computation, as it might gone wrong. */
9267 compute_alignments ();
9268
6fb5fa3c
DB
9269 /* We cannot reuse this one because it has been corrupted by the
9270 evil glat. */
30028c85 9271 finish_bundle_states ();
30028c85 9272 free (stops_p);
048d0d36 9273 stops_p = NULL;
c263766c 9274 emit_insn_group_barriers (dump_file);
30028c85 9275
f4d578da 9276 ia64_final_schedule = 0;
eced69b5 9277 timevar_pop (TV_SCHED2);
f4d578da
BS
9278 }
9279 else
c263766c 9280 emit_all_insn_group_barriers (dump_file);
f2f90c63 9281
6fb5fa3c
DB
9282 df_analyze ();
9283
f12f25a7
RH
9284 /* A call must not be the last instruction in a function, so that the
9285 return address is still within the function, so that unwinding works
9286 properly. Note that IA-64 differs from dwarf2 on this point. */
9287 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
9288 {
9289 rtx insn;
9290 int saw_stop = 0;
9291
9292 insn = get_last_insn ();
9293 if (! INSN_P (insn))
9294 insn = prev_active_insn (insn);
2ca57608 9295 if (insn)
f12f25a7 9296 {
2ca57608
L
9297 /* Skip over insns that expand to nothing. */
9298 while (GET_CODE (insn) == INSN
9299 && get_attr_empty (insn) == EMPTY_YES)
9300 {
9301 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9302 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9303 saw_stop = 1;
9304 insn = prev_active_insn (insn);
9305 }
9306 if (GET_CODE (insn) == CALL_INSN)
9307 {
9308 if (! saw_stop)
9309 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9310 emit_insn (gen_break_f ());
9311 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9312 }
f12f25a7
RH
9313 }
9314 }
9315
f2f90c63 9316 emit_predicate_relation_info ();
014a1138
JZ
9317
9318 if (ia64_flag_var_tracking)
9319 {
9320 timevar_push (TV_VAR_TRACKING);
9321 variable_tracking_main ();
9322 timevar_pop (TV_VAR_TRACKING);
9323 }
0d475361 9324 df_finish_pass (false);
c65ebc55
JW
9325}
9326\f
9327/* Return true if REGNO is used by the epilogue. */
9328
9329int
9c808aad 9330ia64_epilogue_uses (int regno)
c65ebc55 9331{
6ca3c22f
RH
9332 switch (regno)
9333 {
9334 case R_GR (1):
b23ba0b8
RH
9335 /* With a call to a function in another module, we will write a new
9336 value to "gp". After returning from such a call, we need to make
9337 sure the function restores the original gp-value, even if the
9338 function itself does not use the gp anymore. */
9339 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
6ca3c22f
RH
9340
9341 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9342 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9343 /* For functions defined with the syscall_linkage attribute, all
9344 input registers are marked as live at all function exits. This
9345 prevents the register allocator from using the input registers,
9346 which in turn makes it possible to restart a system call after
9347 an interrupt without having to save/restore the input registers.
9348 This also prevents kernel data from leaking to application code. */
9349 return lookup_attribute ("syscall_linkage",
9350 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9351
9352 case R_BR (0):
9353 /* Conditional return patterns can't represent the use of `b0' as
9354 the return address, so we force the value live this way. */
9355 return 1;
6b6c1201 9356
6ca3c22f
RH
9357 case AR_PFS_REGNUM:
9358 /* Likewise for ar.pfs, which is used by br.ret. */
9359 return 1;
5527bf14 9360
6ca3c22f
RH
9361 default:
9362 return 0;
9363 }
c65ebc55 9364}
15b5aef3
RH
9365
9366/* Return true if REGNO is used by the frame unwinder. */
9367
9368int
9c808aad 9369ia64_eh_uses (int regno)
15b5aef3 9370{
09639a83 9371 unsigned int r;
6fb5fa3c 9372
15b5aef3
RH
9373 if (! reload_completed)
9374 return 0;
9375
6fb5fa3c
DB
9376 if (regno == 0)
9377 return 0;
9378
9379 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9380 if (regno == current_frame_info.r[r]
9381 || regno == emitted_frame_related_regs[r])
9382 return 1;
15b5aef3
RH
9383
9384 return 0;
9385}
c65ebc55 9386\f
1cdbd630 9387/* Return true if this goes in small data/bss. */
c65ebc55
JW
9388
9389/* ??? We could also support own long data here. Generating movl/add/ld8
9390 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9391 code faster because there is one less load. This also includes incomplete
9392 types which can't go in sdata/sbss. */
9393
ae46c4e0 9394static bool
3101faab 9395ia64_in_small_data_p (const_tree exp)
ae46c4e0
RH
9396{
9397 if (TARGET_NO_SDATA)
9398 return false;
9399
3907500b
RH
9400 /* We want to merge strings, so we never consider them small data. */
9401 if (TREE_CODE (exp) == STRING_CST)
9402 return false;
9403
4c494a15
ZW
9404 /* Functions are never small data. */
9405 if (TREE_CODE (exp) == FUNCTION_DECL)
9406 return false;
9407
ae46c4e0
RH
9408 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9409 {
9410 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
826eb7ed 9411
ae46c4e0 9412 if (strcmp (section, ".sdata") == 0
826eb7ed
JB
9413 || strncmp (section, ".sdata.", 7) == 0
9414 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9415 || strcmp (section, ".sbss") == 0
9416 || strncmp (section, ".sbss.", 6) == 0
9417 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
ae46c4e0
RH
9418 return true;
9419 }
9420 else
9421 {
9422 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9423
9424 /* If this is an incomplete type with size 0, then we can't put it
9425 in sdata because it might be too big when completed. */
9426 if (size > 0 && size <= ia64_section_threshold)
9427 return true;
9428 }
9429
9430 return false;
9431}
0c96007e 9432\f
ad0fc698
JW
9433/* Output assembly directives for prologue regions. */
9434
9435/* The current basic block number. */
9436
e0082a72 9437static bool last_block;
ad0fc698
JW
9438
9439/* True if we need a copy_state command at the start of the next block. */
9440
e0082a72 9441static bool need_copy_state;
ad0fc698 9442
658f32fd
AO
9443#ifndef MAX_ARTIFICIAL_LABEL_BYTES
9444# define MAX_ARTIFICIAL_LABEL_BYTES 30
9445#endif
9446
9447/* Emit a debugging label after a call-frame-related insn. We'd
9448 rather output the label right away, but we'd have to output it
9449 after, not before, the instruction, and the instruction has not
9450 been output yet. So we emit the label after the insn, delete it to
9451 avoid introducing basic blocks, and mark it as preserved, such that
9452 it is still output, given that it is referenced in debug info. */
9453
9454static const char *
9455ia64_emit_deleted_label_after_insn (rtx insn)
9456{
9457 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9458 rtx lb = gen_label_rtx ();
9459 rtx label_insn = emit_label_after (lb, insn);
9460
9461 LABEL_PRESERVE_P (lb) = 1;
9462
9463 delete_insn (label_insn);
9464
9465 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9466
9467 return xstrdup (label);
9468}
9469
9470/* Define the CFA after INSN with the steady-state definition. */
9471
9472static void
b5b8b0ac 9473ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
658f32fd
AO
9474{
9475 rtx fp = frame_pointer_needed
9476 ? hard_frame_pointer_rtx
9477 : stack_pointer_rtx;
b5b8b0ac
AO
9478 const char *label = ia64_emit_deleted_label_after_insn (insn);
9479
9480 if (!frame)
9481 return;
658f32fd
AO
9482
9483 dwarf2out_def_cfa
b5b8b0ac 9484 (label, REGNO (fp),
658f32fd
AO
9485 ia64_initial_elimination_offset
9486 (REGNO (arg_pointer_rtx), REGNO (fp))
9487 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9488}
9489
9490/* The generic dwarf2 frame debug info generator does not define a
9491 separate region for the very end of the epilogue, so refrain from
9492 doing so in the IA64-specific code as well. */
9493
9494#define IA64_CHANGE_CFA_IN_EPILOGUE 0
9495
ad0fc698
JW
9496/* The function emits unwind directives for the start of an epilogue. */
9497
9498static void
658f32fd 9499process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
ad0fc698
JW
9500{
9501 /* If this isn't the last block of the function, then we need to label the
9502 current state, and copy it back in at the start of the next block. */
9503
e0082a72 9504 if (!last_block)
ad0fc698 9505 {
658f32fd
AO
9506 if (unwind)
9507 fprintf (asm_out_file, "\t.label_state %d\n",
9508 ++cfun->machine->state_num);
e0082a72 9509 need_copy_state = true;
ad0fc698
JW
9510 }
9511
658f32fd
AO
9512 if (unwind)
9513 fprintf (asm_out_file, "\t.restore sp\n");
9514 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9515 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9516 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
ad0fc698 9517}
0c96007e 9518
0c96007e
AM
9519/* This function processes a SET pattern looking for specific patterns
9520 which result in emitting an assembly directive required for unwinding. */
97e242b0 9521
0c96007e 9522static int
658f32fd 9523process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
0c96007e
AM
9524{
9525 rtx src = SET_SRC (pat);
9526 rtx dest = SET_DEST (pat);
97e242b0 9527 int src_regno, dest_regno;
0c96007e 9528
97e242b0
RH
9529 /* Look for the ALLOC insn. */
9530 if (GET_CODE (src) == UNSPEC_VOLATILE
086c0f96 9531 && XINT (src, 1) == UNSPECV_ALLOC
97e242b0 9532 && GET_CODE (dest) == REG)
0c96007e 9533 {
97e242b0
RH
9534 dest_regno = REGNO (dest);
9535
a8f5224e
DM
9536 /* If this is the final destination for ar.pfs, then this must
9537 be the alloc in the prologue. */
6fb5fa3c 9538 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
658f32fd
AO
9539 {
9540 if (unwind)
9541 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9542 ia64_dbx_register_number (dest_regno));
9543 }
a8f5224e
DM
9544 else
9545 {
9546 /* This must be an alloc before a sibcall. We must drop the
9547 old frame info. The easiest way to drop the old frame
9548 info is to ensure we had a ".restore sp" directive
9549 followed by a new prologue. If the procedure doesn't
9550 have a memory-stack frame, we'll issue a dummy ".restore
9551 sp" now. */
b1eae416 9552 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
a8f5224e 9553 /* if haven't done process_epilogue() yet, do it now */
658f32fd
AO
9554 process_epilogue (asm_out_file, insn, unwind, frame);
9555 if (unwind)
9556 fprintf (asm_out_file, "\t.prologue\n");
a8f5224e 9557 }
0c96007e
AM
9558 return 1;
9559 }
9560
ed168e45 9561 /* Look for SP = .... */
0c96007e
AM
9562 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
9563 {
9564 if (GET_CODE (src) == PLUS)
9565 {
9566 rtx op0 = XEXP (src, 0);
9567 rtx op1 = XEXP (src, 1);
e820471b
NS
9568
9569 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9570
9571 if (INTVAL (op1) < 0)
658f32fd
AO
9572 {
9573 gcc_assert (!frame_pointer_needed);
9574 if (unwind)
9575 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9576 -INTVAL (op1));
b5b8b0ac 9577 ia64_dwarf2out_def_steady_cfa (insn, frame);
658f32fd 9578 }
0186257f 9579 else
658f32fd 9580 process_epilogue (asm_out_file, insn, unwind, frame);
0c96007e 9581 }
0186257f 9582 else
e820471b
NS
9583 {
9584 gcc_assert (GET_CODE (src) == REG
9585 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
658f32fd 9586 process_epilogue (asm_out_file, insn, unwind, frame);
e820471b 9587 }
0186257f
JW
9588
9589 return 1;
0c96007e 9590 }
0c96007e
AM
9591
9592 /* Register move we need to look at. */
9593 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9594 {
97e242b0
RH
9595 src_regno = REGNO (src);
9596 dest_regno = REGNO (dest);
9597
9598 switch (src_regno)
9599 {
9600 case BR_REG (0):
0c96007e 9601 /* Saving return address pointer. */
6fb5fa3c 9602 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
658f32fd
AO
9603 if (unwind)
9604 fprintf (asm_out_file, "\t.save rp, r%d\n",
9605 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9606 return 1;
9607
9608 case PR_REG (0):
6fb5fa3c 9609 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
658f32fd
AO
9610 if (unwind)
9611 fprintf (asm_out_file, "\t.save pr, r%d\n",
9612 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9613 return 1;
9614
9615 case AR_UNAT_REGNUM:
6fb5fa3c 9616 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
658f32fd
AO
9617 if (unwind)
9618 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9619 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9620 return 1;
9621
9622 case AR_LC_REGNUM:
6fb5fa3c 9623 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
658f32fd
AO
9624 if (unwind)
9625 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9626 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9627 return 1;
9628
9629 case STACK_POINTER_REGNUM:
e820471b
NS
9630 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9631 && frame_pointer_needed);
658f32fd
AO
9632 if (unwind)
9633 fprintf (asm_out_file, "\t.vframe r%d\n",
9634 ia64_dbx_register_number (dest_regno));
b5b8b0ac 9635 ia64_dwarf2out_def_steady_cfa (insn, frame);
97e242b0
RH
9636 return 1;
9637
9638 default:
9639 /* Everything else should indicate being stored to memory. */
e820471b 9640 gcc_unreachable ();
0c96007e
AM
9641 }
9642 }
97e242b0
RH
9643
9644 /* Memory store we need to look at. */
9645 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 9646 {
97e242b0
RH
9647 long off;
9648 rtx base;
9649 const char *saveop;
9650
9651 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 9652 {
97e242b0
RH
9653 base = XEXP (dest, 0);
9654 off = 0;
0c96007e 9655 }
e820471b 9656 else
0c96007e 9657 {
e820471b
NS
9658 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9659 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
97e242b0
RH
9660 base = XEXP (XEXP (dest, 0), 0);
9661 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 9662 }
0c96007e 9663
97e242b0
RH
9664 if (base == hard_frame_pointer_rtx)
9665 {
9666 saveop = ".savepsp";
9667 off = - off;
9668 }
97e242b0 9669 else
e820471b
NS
9670 {
9671 gcc_assert (base == stack_pointer_rtx);
9672 saveop = ".savesp";
9673 }
97e242b0
RH
9674
9675 src_regno = REGNO (src);
9676 switch (src_regno)
9677 {
9678 case BR_REG (0):
6fb5fa3c 9679 gcc_assert (!current_frame_info.r[reg_save_b0]);
658f32fd
AO
9680 if (unwind)
9681 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
97e242b0
RH
9682 return 1;
9683
9684 case PR_REG (0):
6fb5fa3c 9685 gcc_assert (!current_frame_info.r[reg_save_pr]);
658f32fd
AO
9686 if (unwind)
9687 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
97e242b0
RH
9688 return 1;
9689
9690 case AR_LC_REGNUM:
6fb5fa3c 9691 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
658f32fd
AO
9692 if (unwind)
9693 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
97e242b0
RH
9694 return 1;
9695
9696 case AR_PFS_REGNUM:
6fb5fa3c 9697 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
658f32fd
AO
9698 if (unwind)
9699 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
97e242b0
RH
9700 return 1;
9701
9702 case AR_UNAT_REGNUM:
6fb5fa3c 9703 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
658f32fd
AO
9704 if (unwind)
9705 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
97e242b0
RH
9706 return 1;
9707
9708 case GR_REG (4):
9709 case GR_REG (5):
9710 case GR_REG (6):
9711 case GR_REG (7):
658f32fd
AO
9712 if (unwind)
9713 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9714 1 << (src_regno - GR_REG (4)));
97e242b0
RH
9715 return 1;
9716
9717 case BR_REG (1):
9718 case BR_REG (2):
9719 case BR_REG (3):
9720 case BR_REG (4):
9721 case BR_REG (5):
658f32fd
AO
9722 if (unwind)
9723 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9724 1 << (src_regno - BR_REG (1)));
0c96007e 9725 return 1;
97e242b0
RH
9726
9727 case FR_REG (2):
9728 case FR_REG (3):
9729 case FR_REG (4):
9730 case FR_REG (5):
658f32fd
AO
9731 if (unwind)
9732 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9733 1 << (src_regno - FR_REG (2)));
97e242b0
RH
9734 return 1;
9735
9736 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9737 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9738 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9739 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
658f32fd
AO
9740 if (unwind)
9741 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9742 1 << (src_regno - FR_REG (12)));
97e242b0
RH
9743 return 1;
9744
9745 default:
9746 return 0;
0c96007e
AM
9747 }
9748 }
97e242b0 9749
0c96007e
AM
9750 return 0;
9751}
9752
9753
9754/* This function looks at a single insn and emits any directives
9755 required to unwind this insn. */
9756void
9c808aad 9757process_for_unwind_directive (FILE *asm_out_file, rtx insn)
0c96007e 9758{
658f32fd
AO
9759 bool unwind = (flag_unwind_tables
9760 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9761 bool frame = dwarf2out_do_frame ();
9762
9763 if (unwind || frame)
0c96007e 9764 {
97e242b0
RH
9765 rtx pat;
9766
740aeb38 9767 if (NOTE_INSN_BASIC_BLOCK_P (insn))
ad0fc698 9768 {
e0082a72 9769 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
ad0fc698
JW
9770
9771 /* Restore unwind state from immediately before the epilogue. */
9772 if (need_copy_state)
9773 {
658f32fd
AO
9774 if (unwind)
9775 {
9776 fprintf (asm_out_file, "\t.body\n");
9777 fprintf (asm_out_file, "\t.copy_state %d\n",
9778 cfun->machine->state_num);
9779 }
b5b8b0ac
AO
9780 if (IA64_CHANGE_CFA_IN_EPILOGUE)
9781 ia64_dwarf2out_def_steady_cfa (insn, frame);
e0082a72 9782 need_copy_state = false;
ad0fc698
JW
9783 }
9784 }
9785
5a63e069 9786 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
ad0fc698
JW
9787 return;
9788
97e242b0
RH
9789 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9790 if (pat)
9791 pat = XEXP (pat, 0);
9792 else
9793 pat = PATTERN (insn);
0c96007e
AM
9794
9795 switch (GET_CODE (pat))
9796 {
809d4ef1 9797 case SET:
658f32fd 9798 process_set (asm_out_file, pat, insn, unwind, frame);
809d4ef1
RH
9799 break;
9800
9801 case PARALLEL:
9802 {
9803 int par_index;
9804 int limit = XVECLEN (pat, 0);
9805 for (par_index = 0; par_index < limit; par_index++)
9806 {
9807 rtx x = XVECEXP (pat, 0, par_index);
9808 if (GET_CODE (x) == SET)
658f32fd 9809 process_set (asm_out_file, x, insn, unwind, frame);
809d4ef1
RH
9810 }
9811 break;
9812 }
9813
9814 default:
e820471b 9815 gcc_unreachable ();
0c96007e
AM
9816 }
9817 }
9818}
c65ebc55 9819
0551c32d 9820\f
af795c3c
RH
9821enum ia64_builtins
9822{
9823 IA64_BUILTIN_BSP,
c252db20
L
9824 IA64_BUILTIN_COPYSIGNQ,
9825 IA64_BUILTIN_FABSQ,
9826 IA64_BUILTIN_FLUSHRS,
fcb82ab0
UB
9827 IA64_BUILTIN_INFQ,
9828 IA64_BUILTIN_HUGE_VALQ
af795c3c
RH
9829};
9830
c65ebc55 9831void
9c808aad 9832ia64_init_builtins (void)
c65ebc55 9833{
9649812a 9834 tree fpreg_type;
bf9ab6b6 9835 tree float80_type;
9649812a
MM
9836
9837 /* The __fpreg type. */
9838 fpreg_type = make_node (REAL_TYPE);
4de67c26 9839 TYPE_PRECISION (fpreg_type) = 82;
9649812a
MM
9840 layout_type (fpreg_type);
9841 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9842
9843 /* The __float80 type. */
bf9ab6b6 9844 float80_type = make_node (REAL_TYPE);
968a7562 9845 TYPE_PRECISION (float80_type) = 80;
bf9ab6b6
MM
9846 layout_type (float80_type);
9847 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9649812a
MM
9848
9849 /* The __float128 type. */
02befdf4 9850 if (!TARGET_HPUX)
9649812a 9851 {
c252db20 9852 tree ftype, decl;
9649812a 9853 tree float128_type = make_node (REAL_TYPE);
c252db20 9854
9649812a
MM
9855 TYPE_PRECISION (float128_type) = 128;
9856 layout_type (float128_type);
9857 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
c252db20
L
9858
9859 /* TFmode support builtins. */
9860 ftype = build_function_type (float128_type, void_list_node);
9861 add_builtin_function ("__builtin_infq", ftype,
9862 IA64_BUILTIN_INFQ, BUILT_IN_MD,
9863 NULL, NULL_TREE);
9864
fcb82ab0
UB
9865 add_builtin_function ("__builtin_huge_valq", ftype,
9866 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
9867 NULL, NULL_TREE);
9868
c252db20
L
9869 ftype = build_function_type_list (float128_type,
9870 float128_type,
9871 NULL_TREE);
9872 decl = add_builtin_function ("__builtin_fabsq", ftype,
9873 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
9874 "__fabstf2", NULL_TREE);
9875 TREE_READONLY (decl) = 1;
9876
9877 ftype = build_function_type_list (float128_type,
9878 float128_type,
9879 float128_type,
9880 NULL_TREE);
9881 decl = add_builtin_function ("__builtin_copysignq", ftype,
9882 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
9883 "__copysigntf3", NULL_TREE);
9884 TREE_READONLY (decl) = 1;
9649812a
MM
9885 }
9886 else
02befdf4 9887 /* Under HPUX, this is a synonym for "long double". */
9649812a
MM
9888 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9889 "__float128");
9890
f2972bf8
DR
9891 /* Fwrite on VMS is non-standard. */
9892 if (TARGET_ABI_OPEN_VMS)
9893 {
9894 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
9895 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
9896 }
9897
6e34d3a3 9898#define def_builtin(name, type, code) \
c79efc4d
RÁE
9899 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
9900 NULL, NULL_TREE)
0551c32d 9901
3b572406 9902 def_builtin ("__builtin_ia64_bsp",
b4de2f7d 9903 build_function_type (ptr_type_node, void_list_node),
3b572406 9904 IA64_BUILTIN_BSP);
ce152ef8 9905
9c808aad
AJ
9906 def_builtin ("__builtin_ia64_flushrs",
9907 build_function_type (void_type_node, void_list_node),
ce152ef8
AM
9908 IA64_BUILTIN_FLUSHRS);
9909
0551c32d 9910#undef def_builtin
7d522000
SE
9911
9912 if (TARGET_HPUX)
9913 {
9914 if (built_in_decls [BUILT_IN_FINITE])
9915 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
9916 "_Isfinite");
9917 if (built_in_decls [BUILT_IN_FINITEF])
9918 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
9919 "_Isfinitef");
9920 if (built_in_decls [BUILT_IN_FINITEL])
9921 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
9922 "_Isfinitef128");
9923 }
c65ebc55
JW
9924}
9925
c65ebc55 9926rtx
9c808aad
AJ
9927ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9928 enum machine_mode mode ATTRIBUTE_UNUSED,
9929 int ignore ATTRIBUTE_UNUSED)
c65ebc55 9930{
767fad4c 9931 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
97e242b0 9932 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
c65ebc55
JW
9933
9934 switch (fcode)
9935 {
ce152ef8 9936 case IA64_BUILTIN_BSP:
0551c32d
RH
9937 if (! target || ! register_operand (target, DImode))
9938 target = gen_reg_rtx (DImode);
9939 emit_insn (gen_bsp_value (target));
8419b675
RK
9940#ifdef POINTERS_EXTEND_UNSIGNED
9941 target = convert_memory_address (ptr_mode, target);
9942#endif
0551c32d 9943 return target;
ce152ef8
AM
9944
9945 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
9946 emit_insn (gen_flushrs ());
9947 return const0_rtx;
ce152ef8 9948
c252db20 9949 case IA64_BUILTIN_INFQ:
fcb82ab0 9950 case IA64_BUILTIN_HUGE_VALQ:
c252db20
L
9951 {
9952 REAL_VALUE_TYPE inf;
9953 rtx tmp;
9954
9955 real_inf (&inf);
9956 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
9957
9958 tmp = validize_mem (force_const_mem (mode, tmp));
9959
9960 if (target == 0)
9961 target = gen_reg_rtx (mode);
9962
9963 emit_move_insn (target, tmp);
9964 return target;
9965 }
9966
9967 case IA64_BUILTIN_FABSQ:
9968 case IA64_BUILTIN_COPYSIGNQ:
9969 return expand_call (exp, target, ignore);
9970
c65ebc55 9971 default:
c252db20 9972 gcc_unreachable ();
c65ebc55
JW
9973 }
9974
0551c32d 9975 return NULL_RTX;
c65ebc55 9976}
0d7839da
SE
9977
9978/* For the HP-UX IA64 aggregate parameters are passed stored in the
9979 most significant bits of the stack slot. */
9980
9981enum direction
586de218 9982ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
0d7839da 9983{
ed168e45 9984 /* Exception to normal case for structures/unions/etc. */
0d7839da
SE
9985
9986 if (type && AGGREGATE_TYPE_P (type)
9987 && int_size_in_bytes (type) < UNITS_PER_WORD)
9988 return upward;
9989
d3704c46
KH
9990 /* Fall back to the default. */
9991 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
0d7839da 9992}
686f3bf0 9993
c47c29c8
L
9994/* Emit text to declare externally defined variables and functions, because
9995 the Intel assembler does not support undefined externals. */
686f3bf0 9996
c47c29c8
L
9997void
9998ia64_asm_output_external (FILE *file, tree decl, const char *name)
686f3bf0 9999{
c47c29c8
L
10000 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10001 set in order to avoid putting out names that are never really
10002 used. */
10003 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
686f3bf0 10004 {
c47c29c8 10005 /* maybe_assemble_visibility will return 1 if the assembler
2e226e66 10006 visibility directive is output. */
c47c29c8
L
10007 int need_visibility = ((*targetm.binds_local_p) (decl)
10008 && maybe_assemble_visibility (decl));
57d4f65c 10009
f2972bf8
DR
10010#ifdef DO_CRTL_NAMES
10011 DO_CRTL_NAMES;
10012#endif
10013
c47c29c8
L
10014 /* GNU as does not need anything here, but the HP linker does
10015 need something for external functions. */
10016 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10017 && TREE_CODE (decl) == FUNCTION_DECL)
812b587e 10018 (*targetm.asm_out.globalize_decl_name) (file, decl);
c47c29c8
L
10019 else if (need_visibility && !TARGET_GNU_AS)
10020 (*targetm.asm_out.globalize_label) (file, name);
686f3bf0
SE
10021 }
10022}
10023
1f7aa7cd 10024/* Set SImode div/mod functions, init_integral_libfuncs only initializes
6bc709c1
L
10025 modes of word_mode and larger. Rename the TFmode libfuncs using the
10026 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10027 backward compatibility. */
1f7aa7cd
SE
10028
10029static void
10030ia64_init_libfuncs (void)
10031{
10032 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10033 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10034 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10035 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
6bc709c1
L
10036
10037 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10038 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10039 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10040 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10041 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10042
10043 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10044 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10045 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10046 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10047 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10048 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10049
10050 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10051 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
4a73d865 10052 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
6bc709c1
L
10053 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10054 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10055
10056 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10057 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
4a73d865 10058 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
2a3ebe77
JM
10059 /* HP-UX 11.23 libc does not have a function for unsigned
10060 SImode-to-TFmode conversion. */
10061 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
1f7aa7cd
SE
10062}
10063
c15c90bb 10064/* Rename all the TFmode libfuncs using the HPUX conventions. */
738e7b39 10065
c15c90bb
ZW
10066static void
10067ia64_hpux_init_libfuncs (void)
10068{
1f7aa7cd
SE
10069 ia64_init_libfuncs ();
10070
bdbba3c2
SE
10071 /* The HP SI millicode division and mod functions expect DI arguments.
10072 By turning them off completely we avoid using both libgcc and the
10073 non-standard millicode routines and use the HP DI millicode routines
10074 instead. */
10075
10076 set_optab_libfunc (sdiv_optab, SImode, 0);
10077 set_optab_libfunc (udiv_optab, SImode, 0);
10078 set_optab_libfunc (smod_optab, SImode, 0);
10079 set_optab_libfunc (umod_optab, SImode, 0);
10080
10081 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10082 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10083 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10084 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10085
10086 /* HP-UX libc has TF min/max/abs routines in it. */
c15c90bb
ZW
10087 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10088 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10089 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
c15c90bb 10090
24ea7948
ZW
10091 /* ia64_expand_compare uses this. */
10092 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10093
10094 /* These should never be used. */
10095 set_optab_libfunc (eq_optab, TFmode, 0);
10096 set_optab_libfunc (ne_optab, TFmode, 0);
10097 set_optab_libfunc (gt_optab, TFmode, 0);
10098 set_optab_libfunc (ge_optab, TFmode, 0);
10099 set_optab_libfunc (lt_optab, TFmode, 0);
10100 set_optab_libfunc (le_optab, TFmode, 0);
c15c90bb 10101}
738e7b39
RK
10102
10103/* Rename the division and modulus functions in VMS. */
10104
10105static void
10106ia64_vms_init_libfuncs (void)
10107{
10108 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10109 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10110 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10111 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10112 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10113 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10114 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10115 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
f2972bf8
DR
10116 abort_libfunc = init_one_libfunc ("decc$abort");
10117 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10118#ifdef MEM_LIBFUNCS_INIT
10119 MEM_LIBFUNCS_INIT;
10120#endif
738e7b39 10121}
6bc709c1
L
10122
10123/* Rename the TFmode libfuncs available from soft-fp in glibc using
10124 the HPUX conventions. */
10125
10126static void
10127ia64_sysv4_init_libfuncs (void)
10128{
10129 ia64_init_libfuncs ();
10130
10131 /* These functions are not part of the HPUX TFmode interface. We
10132 use them instead of _U_Qfcmp, which doesn't work the way we
10133 expect. */
10134 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10135 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10136 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10137 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10138 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10139 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10140
10141 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10142 glibc doesn't have them. */
10143}
c252db20
L
10144
10145/* Use soft-fp. */
10146
10147static void
10148ia64_soft_fp_init_libfuncs (void)
10149{
10150}
f2972bf8
DR
10151
10152static bool
10153ia64_vms_valid_pointer_mode (enum machine_mode mode)
10154{
10155 return (mode == SImode || mode == DImode);
10156}
ae46c4e0 10157\f
9b580a0b
RH
10158/* For HPUX, it is illegal to have relocations in shared segments. */
10159
10160static int
10161ia64_hpux_reloc_rw_mask (void)
10162{
10163 return 3;
10164}
10165
10166/* For others, relax this so that relocations to local data goes in
10167 read-only segments, but we still cannot allow global relocations
10168 in read-only segments. */
10169
10170static int
10171ia64_reloc_rw_mask (void)
10172{
10173 return flag_pic ? 3 : 2;
10174}
10175
d6b5193b
RS
10176/* Return the section to use for X. The only special thing we do here
10177 is to honor small data. */
b64a1b53 10178
d6b5193b 10179static section *
9c808aad
AJ
10180ia64_select_rtx_section (enum machine_mode mode, rtx x,
10181 unsigned HOST_WIDE_INT align)
b64a1b53
RH
10182{
10183 if (GET_MODE_SIZE (mode) > 0
1f4a2e84
SE
10184 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10185 && !TARGET_NO_SDATA)
d6b5193b 10186 return sdata_section;
b64a1b53 10187 else
d6b5193b 10188 return default_elf_select_rtx_section (mode, x, align);
b64a1b53
RH
10189}
10190
1e1bd14e 10191static unsigned int
abb8b19a
AM
10192ia64_section_type_flags (tree decl, const char *name, int reloc)
10193{
10194 unsigned int flags = 0;
10195
10196 if (strcmp (name, ".sdata") == 0
10197 || strncmp (name, ".sdata.", 7) == 0
10198 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10199 || strncmp (name, ".sdata2.", 8) == 0
10200 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10201 || strcmp (name, ".sbss") == 0
10202 || strncmp (name, ".sbss.", 6) == 0
10203 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10204 flags = SECTION_SMALL;
10205
30ed9d3d
TG
10206#if TARGET_ABI_OPEN_VMS
10207 if (decl && DECL_ATTRIBUTES (decl)
10208 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10209 flags |= SECTION_VMS_OVERLAY;
10210#endif
10211
9b580a0b 10212 flags |= default_section_type_flags (decl, name, reloc);
abb8b19a 10213 return flags;
1e1bd14e
RH
10214}
10215
57782ad8
MM
10216/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10217 structure type and that the address of that type should be passed
10218 in out0, rather than in r8. */
10219
10220static bool
10221ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10222{
10223 tree ret_type = TREE_TYPE (fntype);
10224
10225 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10226 as the structure return address parameter, if the return value
10227 type has a non-trivial copy constructor or destructor. It is not
10228 clear if this same convention should be used for other
10229 programming languages. Until G++ 3.4, we incorrectly used r8 for
10230 these return values. */
10231 return (abi_version_at_least (2)
10232 && ret_type
10233 && TYPE_MODE (ret_type) == BLKmode
10234 && TREE_ADDRESSABLE (ret_type)
10235 && strcmp (lang_hooks.name, "GNU C++") == 0);
10236}
1e1bd14e 10237
5f13cfc6
RH
10238/* Output the assembler code for a thunk function. THUNK_DECL is the
10239 declaration for the thunk function itself, FUNCTION is the decl for
10240 the target function. DELTA is an immediate constant offset to be
272d0bee 10241 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
10242 *(*this + vcall_offset) should be added to THIS. */
10243
c590b625 10244static void
9c808aad
AJ
10245ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10246 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10247 tree function)
483ab821 10248{
0a2aaacc 10249 rtx this_rtx, insn, funexp;
57782ad8
MM
10250 unsigned int this_parmno;
10251 unsigned int this_regno;
13f70342 10252 rtx delta_rtx;
5f13cfc6 10253
599aedd9 10254 reload_completed = 1;
fe3ad572 10255 epilogue_completed = 1;
599aedd9 10256
5f13cfc6
RH
10257 /* Set things up as ia64_expand_prologue might. */
10258 last_scratch_gr_reg = 15;
10259
10260 memset (&current_frame_info, 0, sizeof (current_frame_info));
10261 current_frame_info.spill_cfa_off = -16;
10262 current_frame_info.n_input_regs = 1;
10263 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10264
5f13cfc6 10265 /* Mark the end of the (empty) prologue. */
2e040219 10266 emit_note (NOTE_INSN_PROLOGUE_END);
5f13cfc6 10267
57782ad8
MM
10268 /* Figure out whether "this" will be the first parameter (the
10269 typical case) or the second parameter (as happens when the
10270 virtual function returns certain class objects). */
10271 this_parmno
10272 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10273 ? 1 : 0);
10274 this_regno = IN_REG (this_parmno);
10275 if (!TARGET_REG_NAMES)
10276 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10277
0a2aaacc 10278 this_rtx = gen_rtx_REG (Pmode, this_regno);
13f70342
RH
10279
10280 /* Apply the constant offset, if required. */
10281 delta_rtx = GEN_INT (delta);
36c216e5
MM
10282 if (TARGET_ILP32)
10283 {
57782ad8 10284 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
36c216e5 10285 REG_POINTER (tmp) = 1;
13f70342 10286 if (delta && satisfies_constraint_I (delta_rtx))
36c216e5 10287 {
0a2aaacc 10288 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
36c216e5
MM
10289 delta = 0;
10290 }
10291 else
0a2aaacc 10292 emit_insn (gen_ptr_extend (this_rtx, tmp));
36c216e5 10293 }
5f13cfc6
RH
10294 if (delta)
10295 {
13f70342 10296 if (!satisfies_constraint_I (delta_rtx))
5f13cfc6
RH
10297 {
10298 rtx tmp = gen_rtx_REG (Pmode, 2);
10299 emit_move_insn (tmp, delta_rtx);
10300 delta_rtx = tmp;
10301 }
0a2aaacc 10302 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
5f13cfc6
RH
10303 }
10304
10305 /* Apply the offset from the vtable, if required. */
10306 if (vcall_offset)
10307 {
10308 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10309 rtx tmp = gen_rtx_REG (Pmode, 2);
10310
36c216e5
MM
10311 if (TARGET_ILP32)
10312 {
10313 rtx t = gen_rtx_REG (ptr_mode, 2);
10314 REG_POINTER (t) = 1;
0a2aaacc 10315 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
13f70342 10316 if (satisfies_constraint_I (vcall_offset_rtx))
36c216e5 10317 {
13f70342 10318 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
36c216e5
MM
10319 vcall_offset = 0;
10320 }
10321 else
10322 emit_insn (gen_ptr_extend (tmp, t));
10323 }
10324 else
0a2aaacc 10325 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
5f13cfc6 10326
36c216e5 10327 if (vcall_offset)
5f13cfc6 10328 {
13f70342 10329 if (!satisfies_constraint_J (vcall_offset_rtx))
36c216e5
MM
10330 {
10331 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10332 emit_move_insn (tmp2, vcall_offset_rtx);
10333 vcall_offset_rtx = tmp2;
10334 }
10335 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
5f13cfc6 10336 }
5f13cfc6 10337
36c216e5 10338 if (TARGET_ILP32)
13f70342 10339 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
36c216e5
MM
10340 else
10341 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
5f13cfc6 10342
0a2aaacc 10343 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
5f13cfc6
RH
10344 }
10345
10346 /* Generate a tail call to the target function. */
10347 if (! TREE_USED (function))
10348 {
10349 assemble_external (function);
10350 TREE_USED (function) = 1;
10351 }
10352 funexp = XEXP (DECL_RTL (function), 0);
10353 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10354 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10355 insn = get_last_insn ();
10356 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
10357
10358 /* Code generation for calls relies on splitting. */
10359 reload_completed = 1;
fe3ad572 10360 epilogue_completed = 1;
599aedd9
RH
10361 try_split (PATTERN (insn), insn, 0);
10362
5f13cfc6
RH
10363 emit_barrier ();
10364
10365 /* Run just enough of rest_of_compilation to get the insns emitted.
10366 There's not really enough bulk here to make other passes such as
10367 instruction scheduling worth while. Note that use_thunk calls
10368 assemble_start_function and assemble_end_function. */
599aedd9 10369
55e092c4 10370 insn_locators_alloc ();
18dbd950 10371 emit_all_insn_group_barriers (NULL);
5f13cfc6 10372 insn = get_insns ();
5f13cfc6
RH
10373 shorten_branches (insn);
10374 final_start_function (insn, file, 1);
c9d691e9 10375 final (insn, file, 1);
5f13cfc6 10376 final_end_function ();
599aedd9
RH
10377
10378 reload_completed = 0;
fe3ad572 10379 epilogue_completed = 0;
483ab821
MM
10380}
10381
351a758b
KH
10382/* Worker function for TARGET_STRUCT_VALUE_RTX. */
10383
10384static rtx
57782ad8 10385ia64_struct_value_rtx (tree fntype,
351a758b
KH
10386 int incoming ATTRIBUTE_UNUSED)
10387{
f2972bf8
DR
10388 if (TARGET_ABI_OPEN_VMS ||
10389 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
57782ad8 10390 return NULL_RTX;
351a758b
KH
10391 return gen_rtx_REG (Pmode, GR_REG (8));
10392}
10393
88ed5ef5
SE
10394static bool
10395ia64_scalar_mode_supported_p (enum machine_mode mode)
10396{
10397 switch (mode)
10398 {
10399 case QImode:
10400 case HImode:
10401 case SImode:
10402 case DImode:
10403 case TImode:
10404 return true;
10405
10406 case SFmode:
10407 case DFmode:
10408 case XFmode:
4de67c26 10409 case RFmode:
88ed5ef5
SE
10410 return true;
10411
10412 case TFmode:
c252db20 10413 return true;
88ed5ef5
SE
10414
10415 default:
10416 return false;
10417 }
10418}
10419
f61134e8
RH
10420static bool
10421ia64_vector_mode_supported_p (enum machine_mode mode)
10422{
10423 switch (mode)
10424 {
10425 case V8QImode:
10426 case V4HImode:
10427 case V2SImode:
10428 return true;
10429
10430 case V2SFmode:
10431 return true;
10432
10433 default:
10434 return false;
10435 }
10436}
10437
694a2f6e
EB
10438/* Implement the FUNCTION_PROFILER macro. */
10439
2b4f149b
RH
10440void
10441ia64_output_function_profiler (FILE *file, int labelno)
10442{
694a2f6e
EB
10443 bool indirect_call;
10444
10445 /* If the function needs a static chain and the static chain
10446 register is r15, we use an indirect call so as to bypass
10447 the PLT stub in case the executable is dynamically linked,
10448 because the stub clobbers r15 as per 5.3.6 of the psABI.
10449 We don't need to do that in non canonical PIC mode. */
10450
10451 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10452 {
10453 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10454 indirect_call = true;
10455 }
10456 else
10457 indirect_call = false;
10458
2b4f149b
RH
10459 if (TARGET_GNU_AS)
10460 fputs ("\t.prologue 4, r40\n", file);
10461 else
10462 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10463 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
bd8633a3
RH
10464
10465 if (NO_PROFILE_COUNTERS)
694a2f6e 10466 fputs ("\tmov out3 = r0\n", file);
bd8633a3
RH
10467 else
10468 {
10469 char buf[20];
10470 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10471
10472 if (TARGET_AUTO_PIC)
10473 fputs ("\tmovl out3 = @gprel(", file);
10474 else
10475 fputs ("\taddl out3 = @ltoff(", file);
10476 assemble_name (file, buf);
10477 if (TARGET_AUTO_PIC)
694a2f6e 10478 fputs (")\n", file);
bd8633a3 10479 else
694a2f6e 10480 fputs ("), r1\n", file);
bd8633a3
RH
10481 }
10482
694a2f6e
EB
10483 if (indirect_call)
10484 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10485 fputs ("\t;;\n", file);
10486
2b4f149b 10487 fputs ("\t.save rp, r42\n", file);
bd8633a3 10488 fputs ("\tmov out2 = b0\n", file);
694a2f6e
EB
10489 if (indirect_call)
10490 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
2b4f149b 10491 fputs ("\t.body\n", file);
2b4f149b 10492 fputs ("\tmov out1 = r1\n", file);
694a2f6e
EB
10493 if (indirect_call)
10494 {
10495 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10496 fputs ("\tmov b6 = r16\n", file);
10497 fputs ("\tld8 r1 = [r14]\n", file);
10498 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10499 }
10500 else
10501 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
2b4f149b
RH
10502}
10503
d26afa4f
SE
10504static GTY(()) rtx mcount_func_rtx;
10505static rtx
10506gen_mcount_func_rtx (void)
10507{
10508 if (!mcount_func_rtx)
10509 mcount_func_rtx = init_one_libfunc ("_mcount");
10510 return mcount_func_rtx;
10511}
10512
10513void
10514ia64_profile_hook (int labelno)
10515{
10516 rtx label, ip;
10517
10518 if (NO_PROFILE_COUNTERS)
10519 label = const0_rtx;
10520 else
10521 {
10522 char buf[30];
10523 const char *label_name;
10524 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10525 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10526 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10527 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10528 }
10529 ip = gen_reg_rtx (Pmode);
10530 emit_insn (gen_ip_value (ip));
10531 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10532 VOIDmode, 3,
10533 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10534 ip, Pmode,
10535 label, Pmode);
10536}
10537
cac24f06
JM
10538/* Return the mangling of TYPE if it is an extended fundamental type. */
10539
10540static const char *
3101faab 10541ia64_mangle_type (const_tree type)
cac24f06 10542{
608063c3
JB
10543 type = TYPE_MAIN_VARIANT (type);
10544
10545 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10546 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10547 return NULL;
10548
cac24f06
JM
10549 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10550 mangled as "e". */
10551 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10552 return "g";
10553 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10554 an extended mangling. Elsewhere, "e" is available since long
10555 double is 80 bits. */
10556 if (TYPE_MODE (type) == XFmode)
10557 return TARGET_HPUX ? "u9__float80" : "e";
4de67c26
JM
10558 if (TYPE_MODE (type) == RFmode)
10559 return "u7__fpreg";
10560 return NULL;
10561}
10562
10563/* Return the diagnostic message string if conversion from FROMTYPE to
10564 TOTYPE is not allowed, NULL otherwise. */
10565static const char *
3101faab 10566ia64_invalid_conversion (const_tree fromtype, const_tree totype)
4de67c26
JM
10567{
10568 /* Reject nontrivial conversion to or from __fpreg. */
10569 if (TYPE_MODE (fromtype) == RFmode
10570 && TYPE_MODE (totype) != RFmode
10571 && TYPE_MODE (totype) != VOIDmode)
10572 return N_("invalid conversion from %<__fpreg%>");
10573 if (TYPE_MODE (totype) == RFmode
10574 && TYPE_MODE (fromtype) != RFmode)
10575 return N_("invalid conversion to %<__fpreg%>");
10576 return NULL;
10577}
10578
10579/* Return the diagnostic message string if the unary operation OP is
10580 not permitted on TYPE, NULL otherwise. */
10581static const char *
3101faab 10582ia64_invalid_unary_op (int op, const_tree type)
4de67c26
JM
10583{
10584 /* Reject operations on __fpreg other than unary + or &. */
10585 if (TYPE_MODE (type) == RFmode
10586 && op != CONVERT_EXPR
10587 && op != ADDR_EXPR)
10588 return N_("invalid operation on %<__fpreg%>");
10589 return NULL;
10590}
10591
10592/* Return the diagnostic message string if the binary operation OP is
10593 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10594static const char *
3101faab 10595ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
4de67c26
JM
10596{
10597 /* Reject operations on __fpreg. */
10598 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10599 return N_("invalid operation on %<__fpreg%>");
cac24f06
JM
10600 return NULL;
10601}
10602
bb83aa4b
MK
10603/* Implement overriding of the optimization options. */
10604void
10605ia64_optimization_options (int level ATTRIBUTE_UNUSED,
10606 int size ATTRIBUTE_UNUSED)
10607{
10608 /* Let the scheduler form additional regions. */
10609 set_param_value ("max-sched-extend-regions-iters", 2);
47eb5b32
ZD
10610
10611 /* Set the default values for cache-related parameters. */
10612 set_param_value ("simultaneous-prefetches", 6);
10613 set_param_value ("l1-cache-line-size", 32);
10614
388092d5 10615 set_param_value("sched-mem-true-dep-cost", 4);
bb83aa4b
MK
10616}
10617
812b587e
SE
10618/* HP-UX version_id attribute.
10619 For object foo, if the version_id is set to 1234 put out an alias
10620 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10621 other than an alias statement because it is an illegal symbol name. */
10622
10623static tree
10624ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10625 tree name ATTRIBUTE_UNUSED,
10626 tree args,
10627 int flags ATTRIBUTE_UNUSED,
10628 bool *no_add_attrs)
10629{
10630 tree arg = TREE_VALUE (args);
10631
10632 if (TREE_CODE (arg) != STRING_CST)
10633 {
10634 error("version attribute is not a string");
10635 *no_add_attrs = true;
10636 return NULL_TREE;
10637 }
10638 return NULL_TREE;
10639}
10640
a31fa2e0
SE
10641/* Target hook for c_mode_for_suffix. */
10642
10643static enum machine_mode
10644ia64_c_mode_for_suffix (char suffix)
10645{
10646 if (suffix == 'q')
10647 return TFmode;
10648 if (suffix == 'w')
10649 return XFmode;
10650
10651 return VOIDmode;
10652}
10653
f2972bf8
DR
10654static enum machine_mode
10655ia64_promote_function_mode (const_tree type,
10656 enum machine_mode mode,
10657 int *punsignedp,
c3313412
SE
10658 const_tree funtype,
10659 int for_return)
f2972bf8
DR
10660{
10661 /* Special processing required for OpenVMS ... */
10662
10663 if (!TARGET_ABI_OPEN_VMS)
c3313412
SE
10664 return default_promote_function_mode(type, mode, punsignedp, funtype,
10665 for_return);
f2972bf8
DR
10666
10667 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10668 HP OpenVMS I64 Version 8.2EFT,
10669 chapter 4 "OpenVMS I64 Conventions"
10670 section 4.7 "Procedure Linkage"
10671 subsection 4.7.5.2, "Normal Register Parameters"
10672
10673 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10674 values passed in registers are zero-filled; signed integral values as
10675 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10676 For all other types passed in the general registers, unused bits are
10677 undefined." */
10678
10679 if (!AGGREGATE_TYPE_P (type)
10680 && GET_MODE_CLASS (mode) == MODE_INT
10681 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10682 {
10683 if (mode == SImode)
10684 *punsignedp = 0;
10685 return DImode;
10686 }
10687 else
10688 return promote_mode (type, mode, punsignedp);
10689}
10690
f3a83111
SE
10691static GTY(()) rtx ia64_dconst_0_5_rtx;
10692
10693rtx
10694ia64_dconst_0_5 (void)
10695{
10696 if (! ia64_dconst_0_5_rtx)
10697 {
10698 REAL_VALUE_TYPE rv;
10699 real_from_string (&rv, "0.5");
10700 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10701 }
10702 return ia64_dconst_0_5_rtx;
10703}
10704
10705static GTY(()) rtx ia64_dconst_0_375_rtx;
10706
10707rtx
10708ia64_dconst_0_375 (void)
10709{
10710 if (! ia64_dconst_0_375_rtx)
10711 {
10712 REAL_VALUE_TYPE rv;
10713 real_from_string (&rv, "0.375");
10714 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
10715 }
10716 return ia64_dconst_0_375_rtx;
10717}
10718
10719
e2500fed 10720#include "gt-ia64.h"