]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/ia64.c
host-hpux.c: Change copyright header to refer to version 3 of the GNU General Public...
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
9b580a0b 2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
351a758b 3 Free Software Foundation, Inc.
c65ebc55 4 Contributed by James E. Wilson <wilson@cygnus.com> and
9c808aad 5 David Mosberger <davidm@hpl.hp.com>.
c65ebc55 6
3bed2930 7This file is part of GCC.
c65ebc55 8
3bed2930 9GCC is free software; you can redistribute it and/or modify
c65ebc55 10it under the terms of the GNU General Public License as published by
2f83c7d6 11the Free Software Foundation; either version 3, or (at your option)
c65ebc55
JW
12any later version.
13
3bed2930 14GCC is distributed in the hope that it will be useful,
c65ebc55
JW
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
2f83c7d6
NC
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
c65ebc55 22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
4977bab6
ZW
25#include "coretypes.h"
26#include "tm.h"
c65ebc55
JW
27#include "rtl.h"
28#include "tree.h"
c65ebc55
JW
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
c65ebc55
JW
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "recog.h"
38#include "expr.h"
e78d8e51 39#include "optabs.h"
c65ebc55
JW
40#include "except.h"
41#include "function.h"
42#include "ggc.h"
43#include "basic-block.h"
809d4ef1 44#include "toplev.h"
2130b7fb 45#include "sched-int.h"
eced69b5 46#include "timevar.h"
672a6f42
NB
47#include "target.h"
48#include "target-def.h"
98d2b17e 49#include "tm_p.h"
30028c85 50#include "hashtab.h"
08744705 51#include "langhooks.h"
117dca74 52#include "cfglayout.h"
cd3ce9b4 53#include "tree-gimple.h"
4de67c26 54#include "intl.h"
6fb5fa3c 55#include "df.h"
658f32fd 56#include "debug.h"
bb83aa4b 57#include "params.h"
6fb5fa3c 58#include "dbgcnt.h"
13f70342 59#include "tm-constrs.h"
c65ebc55
JW
60
61/* This is used for communication between ASM_OUTPUT_LABEL and
62 ASM_OUTPUT_LABELREF. */
63int ia64_asm_output_label = 0;
64
65/* Define the information needed to generate branch and scc insns. This is
66 stored from the compare operation. */
67struct rtx_def * ia64_compare_op0;
68struct rtx_def * ia64_compare_op1;
69
c65ebc55 70/* Register names for ia64_expand_prologue. */
3b572406 71static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
72{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81 "r104","r105","r106","r107","r108","r109","r110","r111",
82 "r112","r113","r114","r115","r116","r117","r118","r119",
83 "r120","r121","r122","r123","r124","r125","r126","r127"};
84
85/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 86static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
87{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
88
89/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 90static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
91{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101
102/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 103static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
104{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105
30028c85 106/* Which cpu are we scheduling for. */
dbdd120f 107enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
30028c85 108
68340ae9
BS
109/* Determines whether we run our final scheduling pass or not. We always
110 avoid the normal second scheduling pass. */
111static int ia64_flag_schedule_insns2;
112
014a1138
JZ
113/* Determines whether we run variable tracking in machine dependent
114 reorganization. */
115static int ia64_flag_var_tracking;
116
c65ebc55
JW
117/* Variables which are this size or smaller are put in the sdata/sbss
118 sections. */
119
3b572406 120unsigned int ia64_section_threshold;
30028c85
VM
121
122/* The following variable is used by the DFA insn scheduler. The value is
123 TRUE if we do insn bundling instead of insn scheduling. */
124int bundling_p = 0;
125
6fb5fa3c
DB
126enum ia64_frame_regs
127{
128 reg_fp,
129 reg_save_b0,
130 reg_save_pr,
131 reg_save_ar_pfs,
132 reg_save_ar_unat,
133 reg_save_ar_lc,
134 reg_save_gp,
135 number_of_ia64_frame_regs
136};
137
599aedd9
RH
138/* Structure to be filled in by ia64_compute_frame_size with register
139 save masks and offsets for the current function. */
140
141struct ia64_frame_info
142{
143 HOST_WIDE_INT total_size; /* size of the stack frame, not including
144 the caller's scratch area. */
145 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
146 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
147 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
148 HARD_REG_SET mask; /* mask of saved registers. */
9c808aad 149 unsigned int gr_used_mask; /* mask of registers in use as gr spill
599aedd9
RH
150 registers or long-term scratches. */
151 int n_spilled; /* number of spilled registers. */
6fb5fa3c 152 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
599aedd9
RH
153 int n_input_regs; /* number of input registers used. */
154 int n_local_regs; /* number of local registers used. */
155 int n_output_regs; /* number of output registers used. */
156 int n_rotate_regs; /* number of rotating registers used. */
157
158 char need_regstk; /* true if a .regstk directive needed. */
159 char initialized; /* true if the data is finalized. */
160};
161
162/* Current frame information calculated by ia64_compute_frame_size. */
163static struct ia64_frame_info current_frame_info;
6fb5fa3c
DB
164/* The actual registers that are emitted. */
165static int emitted_frame_related_regs[number_of_ia64_frame_regs];
3b572406 166\f
9c808aad
AJ
167static int ia64_first_cycle_multipass_dfa_lookahead (void);
168static void ia64_dependencies_evaluation_hook (rtx, rtx);
169static void ia64_init_dfa_pre_cycle_insn (void);
170static rtx ia64_dfa_pre_cycle_insn (void);
171static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
048d0d36 172static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx);
9c808aad 173static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
048d0d36
MK
174static void ia64_h_i_d_extended (void);
175static int ia64_mode_to_int (enum machine_mode);
176static void ia64_set_sched_flags (spec_info_t);
177static int ia64_speculate_insn (rtx, ds_t, rtx *);
178static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool);
179static bool ia64_needs_block_p (rtx);
180static rtx ia64_gen_check (rtx, rtx, bool);
181static int ia64_spec_check_p (rtx);
182static int ia64_spec_check_src_p (rtx);
9c808aad
AJ
183static rtx gen_tls_get_addr (void);
184static rtx gen_thread_pointer (void);
6fb5fa3c 185static int find_gr_spill (enum ia64_frame_regs, int);
9c808aad
AJ
186static int next_scratch_gr_reg (void);
187static void mark_reg_gr_used_mask (rtx, void *);
188static void ia64_compute_frame_size (HOST_WIDE_INT);
189static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
190static void finish_spill_pointers (void);
191static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
192static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
193static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
194static rtx gen_movdi_x (rtx, rtx, rtx);
195static rtx gen_fr_spill_x (rtx, rtx, rtx);
196static rtx gen_fr_restore_x (rtx, rtx, rtx);
197
3d6a9acd 198static enum machine_mode hfa_element_mode (tree, bool);
351a758b
KH
199static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
200 tree, int *, int);
78a52f11
RH
201static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
202 tree, bool);
9c808aad 203static bool ia64_function_ok_for_sibcall (tree, tree);
351a758b 204static bool ia64_return_in_memory (tree, tree);
9c808aad
AJ
205static bool ia64_rtx_costs (rtx, int, int, int *);
206static void fix_range (const char *);
dbdd120f 207static bool ia64_handle_option (size_t, const char *, int);
9c808aad
AJ
208static struct machine_function * ia64_init_machine_status (void);
209static void emit_insn_group_barriers (FILE *);
210static void emit_all_insn_group_barriers (FILE *);
211static void final_emit_insn_group_barriers (FILE *);
212static void emit_predicate_relation_info (void);
213static void ia64_reorg (void);
214static bool ia64_in_small_data_p (tree);
658f32fd
AO
215static void process_epilogue (FILE *, rtx, bool, bool);
216static int process_set (FILE *, rtx, rtx, bool, bool);
9c808aad 217
9c808aad
AJ
218static bool ia64_assemble_integer (rtx, unsigned int, int);
219static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
220static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
221static void ia64_output_function_end_prologue (FILE *);
222
223static int ia64_issue_rate (void);
b198261f 224static int ia64_adjust_cost (rtx, rtx, rtx, int);
9c808aad 225static void ia64_sched_init (FILE *, int, int);
048d0d36
MK
226static void ia64_sched_init_global (FILE *, int, int);
227static void ia64_sched_finish_global (FILE *, int);
9c808aad
AJ
228static void ia64_sched_finish (FILE *, int);
229static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
230static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
231static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
232static int ia64_variable_issue (FILE *, int, rtx, int);
233
234static struct bundle_state *get_free_bundle_state (void);
235static void free_bundle_state (struct bundle_state *);
236static void initiate_bundle_states (void);
237static void finish_bundle_states (void);
238static unsigned bundle_state_hash (const void *);
239static int bundle_state_eq_p (const void *, const void *);
240static int insert_bundle_state (struct bundle_state *);
241static void initiate_bundle_state_table (void);
242static void finish_bundle_state_table (void);
243static int try_issue_nops (struct bundle_state *, int);
244static int try_issue_insn (struct bundle_state *, rtx);
245static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
246static int get_max_pos (state_t);
247static int get_template (state_t, int);
248
249static rtx get_next_important_insn (rtx, rtx);
250static void bundling (FILE *, int, rtx, rtx);
251
252static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
253 HOST_WIDE_INT, tree);
254static void ia64_file_start (void);
812b587e 255static void ia64_globalize_decl_name (FILE *, tree);
9c808aad 256
9b580a0b
RH
257static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
258static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
d6b5193b
RS
259static section *ia64_select_rtx_section (enum machine_mode, rtx,
260 unsigned HOST_WIDE_INT);
fdbe66f2
EB
261static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
262 ATTRIBUTE_UNUSED;
abb8b19a 263static unsigned int ia64_section_type_flags (tree, const char *, int);
1f7aa7cd
SE
264static void ia64_init_libfuncs (void)
265 ATTRIBUTE_UNUSED;
c15c90bb
ZW
266static void ia64_hpux_init_libfuncs (void)
267 ATTRIBUTE_UNUSED;
6bc709c1
L
268static void ia64_sysv4_init_libfuncs (void)
269 ATTRIBUTE_UNUSED;
738e7b39
RK
270static void ia64_vms_init_libfuncs (void)
271 ATTRIBUTE_UNUSED;
a5fe455b 272
a32767e4 273static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
812b587e 274static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
a32767e4 275static void ia64_encode_section_info (tree, rtx, int);
351a758b 276static rtx ia64_struct_value_rtx (tree, int);
23a60a04 277static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
88ed5ef5 278static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
f61134e8 279static bool ia64_vector_mode_supported_p (enum machine_mode mode);
5e6c8b64 280static bool ia64_cannot_force_const_mem (rtx);
608063c3 281static const char *ia64_mangle_type (tree);
4de67c26
JM
282static const char *ia64_invalid_conversion (tree, tree);
283static const char *ia64_invalid_unary_op (int, tree);
284static const char *ia64_invalid_binary_op (int, tree, tree);
672a6f42 285\f
e6542f4e
RH
286/* Table of valid machine attributes. */
287static const struct attribute_spec ia64_attribute_table[] =
288{
289 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
290 { "syscall_linkage", 0, 0, false, true, true, NULL },
a32767e4 291 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
812b587e
SE
292 { "version_id", 1, 1, true, false, false,
293 ia64_handle_version_id_attribute },
a32767e4 294 { NULL, 0, 0, false, false, false, NULL }
e6542f4e
RH
295};
296
672a6f42 297/* Initialize the GCC target structure. */
91d231cb
JM
298#undef TARGET_ATTRIBUTE_TABLE
299#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 300
f6155fda
SS
301#undef TARGET_INIT_BUILTINS
302#define TARGET_INIT_BUILTINS ia64_init_builtins
303
304#undef TARGET_EXPAND_BUILTIN
305#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
306
301d03af
RS
307#undef TARGET_ASM_BYTE_OP
308#define TARGET_ASM_BYTE_OP "\tdata1\t"
309#undef TARGET_ASM_ALIGNED_HI_OP
310#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
311#undef TARGET_ASM_ALIGNED_SI_OP
312#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
313#undef TARGET_ASM_ALIGNED_DI_OP
314#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
315#undef TARGET_ASM_UNALIGNED_HI_OP
316#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
317#undef TARGET_ASM_UNALIGNED_SI_OP
318#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
319#undef TARGET_ASM_UNALIGNED_DI_OP
320#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
321#undef TARGET_ASM_INTEGER
322#define TARGET_ASM_INTEGER ia64_assemble_integer
323
08c148a8
NB
324#undef TARGET_ASM_FUNCTION_PROLOGUE
325#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
326#undef TARGET_ASM_FUNCTION_END_PROLOGUE
327#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
328#undef TARGET_ASM_FUNCTION_EPILOGUE
329#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
330
ae46c4e0
RH
331#undef TARGET_IN_SMALL_DATA_P
332#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
333
b198261f
MK
334#undef TARGET_SCHED_ADJUST_COST
335#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
c237e94a
ZW
336#undef TARGET_SCHED_ISSUE_RATE
337#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
338#undef TARGET_SCHED_VARIABLE_ISSUE
339#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
340#undef TARGET_SCHED_INIT
341#define TARGET_SCHED_INIT ia64_sched_init
342#undef TARGET_SCHED_FINISH
343#define TARGET_SCHED_FINISH ia64_sched_finish
048d0d36
MK
344#undef TARGET_SCHED_INIT_GLOBAL
345#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
346#undef TARGET_SCHED_FINISH_GLOBAL
347#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
c237e94a
ZW
348#undef TARGET_SCHED_REORDER
349#define TARGET_SCHED_REORDER ia64_sched_reorder
350#undef TARGET_SCHED_REORDER2
351#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 352
30028c85
VM
353#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
354#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
355
30028c85
VM
356#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
357#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
358
359#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
360#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
361#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
362#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
363
364#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
365#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
366 ia64_first_cycle_multipass_dfa_lookahead_guard
367
368#undef TARGET_SCHED_DFA_NEW_CYCLE
369#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
370
048d0d36
MK
371#undef TARGET_SCHED_H_I_D_EXTENDED
372#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
373
374#undef TARGET_SCHED_SET_SCHED_FLAGS
375#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
376
377#undef TARGET_SCHED_SPECULATE_INSN
378#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
379
380#undef TARGET_SCHED_NEEDS_BLOCK_P
381#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
382
383#undef TARGET_SCHED_GEN_CHECK
384#define TARGET_SCHED_GEN_CHECK ia64_gen_check
385
386#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
387#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
388 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
389
599aedd9
RH
390#undef TARGET_FUNCTION_OK_FOR_SIBCALL
391#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
78a52f11
RH
392#undef TARGET_ARG_PARTIAL_BYTES
393#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
599aedd9 394
c590b625
RH
395#undef TARGET_ASM_OUTPUT_MI_THUNK
396#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 397#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
5f13cfc6 398#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
c590b625 399
1bc7c5b6
ZW
400#undef TARGET_ASM_FILE_START
401#define TARGET_ASM_FILE_START ia64_file_start
402
812b587e
SE
403#undef TARGET_ASM_GLOBALIZE_DECL_NAME
404#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
405
3c50106f
RH
406#undef TARGET_RTX_COSTS
407#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67
RH
408#undef TARGET_ADDRESS_COST
409#define TARGET_ADDRESS_COST hook_int_rtx_0
3c50106f 410
18dbd950
RS
411#undef TARGET_MACHINE_DEPENDENT_REORG
412#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
413
a32767e4
DM
414#undef TARGET_ENCODE_SECTION_INFO
415#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
416
abb8b19a
AM
417#undef TARGET_SECTION_TYPE_FLAGS
418#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
419
fdbe66f2
EB
420#ifdef HAVE_AS_TLS
421#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
422#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
423#endif
424
351a758b
KH
425/* ??? ABI doesn't allow us to define this. */
426#if 0
427#undef TARGET_PROMOTE_FUNCTION_ARGS
428#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
429#endif
430
431/* ??? ABI doesn't allow us to define this. */
432#if 0
433#undef TARGET_PROMOTE_FUNCTION_RETURN
434#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
435#endif
436
437/* ??? Investigate. */
438#if 0
439#undef TARGET_PROMOTE_PROTOTYPES
440#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
441#endif
442
443#undef TARGET_STRUCT_VALUE_RTX
444#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
445#undef TARGET_RETURN_IN_MEMORY
446#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
351a758b
KH
447#undef TARGET_SETUP_INCOMING_VARARGS
448#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
449#undef TARGET_STRICT_ARGUMENT_NAMING
450#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
fe984136
RH
451#undef TARGET_MUST_PASS_IN_STACK
452#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
351a758b 453
cd3ce9b4
JM
454#undef TARGET_GIMPLIFY_VA_ARG_EXPR
455#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
456
951120ea
PB
457#undef TARGET_UNWIND_EMIT
458#define TARGET_UNWIND_EMIT process_for_unwind_directive
459
88ed5ef5
SE
460#undef TARGET_SCALAR_MODE_SUPPORTED_P
461#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
f61134e8
RH
462#undef TARGET_VECTOR_MODE_SUPPORTED_P
463#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
88ed5ef5 464
445cf5eb
JM
465/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
466 in an order different from the specified program order. */
467#undef TARGET_RELAXED_ORDERING
468#define TARGET_RELAXED_ORDERING true
469
dbdd120f
RH
470#undef TARGET_DEFAULT_TARGET_FLAGS
471#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
472#undef TARGET_HANDLE_OPTION
473#define TARGET_HANDLE_OPTION ia64_handle_option
474
5e6c8b64
RH
475#undef TARGET_CANNOT_FORCE_CONST_MEM
476#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
477
608063c3
JB
478#undef TARGET_MANGLE_TYPE
479#define TARGET_MANGLE_TYPE ia64_mangle_type
cac24f06 480
4de67c26
JM
481#undef TARGET_INVALID_CONVERSION
482#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
483#undef TARGET_INVALID_UNARY_OP
484#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
485#undef TARGET_INVALID_BINARY_OP
486#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
487
f6897b10 488struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 489\f
a32767e4
DM
490typedef enum
491 {
492 ADDR_AREA_NORMAL, /* normal address area */
493 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
494 }
495ia64_addr_area;
496
497static GTY(()) tree small_ident1;
498static GTY(()) tree small_ident2;
499
500static void
501init_idents (void)
502{
503 if (small_ident1 == 0)
504 {
505 small_ident1 = get_identifier ("small");
506 small_ident2 = get_identifier ("__small__");
507 }
508}
509
510/* Retrieve the address area that has been chosen for the given decl. */
511
512static ia64_addr_area
513ia64_get_addr_area (tree decl)
514{
515 tree model_attr;
516
517 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
518 if (model_attr)
519 {
520 tree id;
521
522 init_idents ();
523 id = TREE_VALUE (TREE_VALUE (model_attr));
524 if (id == small_ident1 || id == small_ident2)
525 return ADDR_AREA_SMALL;
526 }
527 return ADDR_AREA_NORMAL;
528}
529
530static tree
f61134e8
RH
531ia64_handle_model_attribute (tree *node, tree name, tree args,
532 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
a32767e4
DM
533{
534 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
535 ia64_addr_area area;
536 tree arg, decl = *node;
537
538 init_idents ();
539 arg = TREE_VALUE (args);
540 if (arg == small_ident1 || arg == small_ident2)
541 {
542 addr_area = ADDR_AREA_SMALL;
543 }
544 else
545 {
5c498b10 546 warning (OPT_Wattributes, "invalid argument of %qs attribute",
a32767e4
DM
547 IDENTIFIER_POINTER (name));
548 *no_add_attrs = true;
549 }
550
551 switch (TREE_CODE (decl))
552 {
553 case VAR_DECL:
554 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
555 == FUNCTION_DECL)
556 && !TREE_STATIC (decl))
557 {
ddd2d57e 558 error ("%Jan address area attribute cannot be specified for "
dee15844 559 "local variables", decl);
a32767e4
DM
560 *no_add_attrs = true;
561 }
562 area = ia64_get_addr_area (decl);
563 if (area != ADDR_AREA_NORMAL && addr_area != area)
564 {
dee15844
JM
565 error ("address area of %q+D conflicts with previous "
566 "declaration", decl);
a32767e4
DM
567 *no_add_attrs = true;
568 }
569 break;
570
571 case FUNCTION_DECL:
ddd2d57e 572 error ("%Jaddress area attribute cannot be specified for functions",
dee15844 573 decl);
a32767e4
DM
574 *no_add_attrs = true;
575 break;
576
577 default:
5c498b10
DD
578 warning (OPT_Wattributes, "%qs attribute ignored",
579 IDENTIFIER_POINTER (name));
a32767e4
DM
580 *no_add_attrs = true;
581 break;
582 }
583
584 return NULL_TREE;
585}
586
587static void
588ia64_encode_addr_area (tree decl, rtx symbol)
589{
590 int flags;
591
592 flags = SYMBOL_REF_FLAGS (symbol);
593 switch (ia64_get_addr_area (decl))
594 {
595 case ADDR_AREA_NORMAL: break;
596 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
e820471b 597 default: gcc_unreachable ();
a32767e4
DM
598 }
599 SYMBOL_REF_FLAGS (symbol) = flags;
600}
601
602static void
603ia64_encode_section_info (tree decl, rtx rtl, int first)
604{
605 default_encode_section_info (decl, rtl, first);
606
2897f1d4 607 /* Careful not to prod global register variables. */
a32767e4 608 if (TREE_CODE (decl) == VAR_DECL
2897f1d4
L
609 && GET_CODE (DECL_RTL (decl)) == MEM
610 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
a32767e4
DM
611 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
612 ia64_encode_addr_area (decl, XEXP (rtl, 0));
613}
614\f
557b9df5
RH
615/* Return 1 if the operands of a move are ok. */
616
617int
9c808aad 618ia64_move_ok (rtx dst, rtx src)
557b9df5
RH
619{
620 /* If we're under init_recog_no_volatile, we'll not be able to use
621 memory_operand. So check the code directly and don't worry about
622 the validity of the underlying address, which should have been
623 checked elsewhere anyway. */
624 if (GET_CODE (dst) != MEM)
625 return 1;
626 if (GET_CODE (src) == MEM)
627 return 0;
628 if (register_operand (src, VOIDmode))
629 return 1;
630
631 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
632 if (INTEGRAL_MODE_P (GET_MODE (dst)))
633 return src == const0_rtx;
634 else
13f70342 635 return satisfies_constraint_G (src);
557b9df5 636}
9b7bf67d 637
a71aef0b
JB
638/* Return 1 if the operands are ok for a floating point load pair. */
639
640int
641ia64_load_pair_ok (rtx dst, rtx src)
642{
643 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
644 return 0;
645 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
646 return 0;
647 switch (GET_CODE (XEXP (src, 0)))
648 {
649 case REG:
650 case POST_INC:
651 break;
652 case POST_DEC:
653 return 0;
654 case POST_MODIFY:
655 {
656 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
657
658 if (GET_CODE (adjust) != CONST_INT
659 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
660 return 0;
661 }
662 break;
663 default:
664 abort ();
665 }
666 return 1;
667}
668
08744705 669int
9c808aad 670addp4_optimize_ok (rtx op1, rtx op2)
08744705 671{
08744705
SE
672 return (basereg_operand (op1, GET_MODE(op1)) !=
673 basereg_operand (op2, GET_MODE(op2)));
674}
675
9e4f94de 676/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
677 Return the length of the field, or <= 0 on failure. */
678
679int
9c808aad 680ia64_depz_field_mask (rtx rop, rtx rshift)
041f25e6
RH
681{
682 unsigned HOST_WIDE_INT op = INTVAL (rop);
683 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
684
685 /* Get rid of the zero bits we're shifting in. */
686 op >>= shift;
687
688 /* We must now have a solid block of 1's at bit 0. */
689 return exact_log2 (op + 1);
690}
691
5e6c8b64
RH
692/* Return the TLS model to use for ADDR. */
693
694static enum tls_model
695tls_symbolic_operand_type (rtx addr)
696{
697 enum tls_model tls_kind = 0;
698
699 if (GET_CODE (addr) == CONST)
700 {
701 if (GET_CODE (XEXP (addr, 0)) == PLUS
702 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
703 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
704 }
705 else if (GET_CODE (addr) == SYMBOL_REF)
706 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
707
708 return tls_kind;
709}
710
711/* Return true if X is a constant that is valid for some immediate
712 field in an instruction. */
713
714bool
715ia64_legitimate_constant_p (rtx x)
716{
717 switch (GET_CODE (x))
718 {
719 case CONST_INT:
720 case LABEL_REF:
721 return true;
722
723 case CONST_DOUBLE:
724 if (GET_MODE (x) == VOIDmode)
725 return true;
13f70342 726 return satisfies_constraint_G (x);
5e6c8b64
RH
727
728 case CONST:
729 case SYMBOL_REF:
d0970db2
JW
730 /* ??? Short term workaround for PR 28490. We must make the code here
731 match the code in ia64_expand_move and move_operand, even though they
732 are both technically wrong. */
733 if (tls_symbolic_operand_type (x) == 0)
734 {
735 HOST_WIDE_INT addend = 0;
736 rtx op = x;
737
738 if (GET_CODE (op) == CONST
739 && GET_CODE (XEXP (op, 0)) == PLUS
740 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
741 {
742 addend = INTVAL (XEXP (XEXP (op, 0), 1));
743 op = XEXP (XEXP (op, 0), 0);
744 }
745
7ab62966
SE
746 if (any_offset_symbol_operand (op, GET_MODE (op))
747 || function_operand (op, GET_MODE (op)))
748 return true;
d0970db2
JW
749 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
750 return (addend & 0x3fff) == 0;
751 return false;
752 }
753 return false;
5e6c8b64 754
b4e3537b
RH
755 case CONST_VECTOR:
756 {
757 enum machine_mode mode = GET_MODE (x);
758
759 if (mode == V2SFmode)
13f70342 760 return satisfies_constraint_Y (x);
b4e3537b
RH
761
762 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
763 && GET_MODE_SIZE (mode) <= 8);
764 }
765
5e6c8b64
RH
766 default:
767 return false;
768 }
769}
770
771/* Don't allow TLS addresses to get spilled to memory. */
772
773static bool
774ia64_cannot_force_const_mem (rtx x)
775{
776 return tls_symbolic_operand_type (x) != 0;
777}
778
9b7bf67d 779/* Expand a symbolic constant load. */
9b7bf67d 780
5e6c8b64 781bool
9c808aad 782ia64_expand_load_address (rtx dest, rtx src)
9b7bf67d 783{
e820471b 784 gcc_assert (GET_CODE (dest) == REG);
7b6e506e 785
ae49d6e5
RH
786 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
787 having to pointer-extend the value afterward. Other forms of address
788 computation below are also more natural to compute as 64-bit quantities.
789 If we've been given an SImode destination register, change it. */
790 if (GET_MODE (dest) != Pmode)
5e6c8b64 791 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
ae49d6e5 792
5e6c8b64
RH
793 if (TARGET_NO_PIC)
794 return false;
795 if (small_addr_symbolic_operand (src, VOIDmode))
796 return false;
797
798 if (TARGET_AUTO_PIC)
799 emit_insn (gen_load_gprel64 (dest, src));
1cdbd630 800 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
5e6c8b64 801 emit_insn (gen_load_fptr (dest, src));
21515593 802 else if (sdata_symbolic_operand (src, VOIDmode))
5e6c8b64
RH
803 emit_insn (gen_load_gprel (dest, src));
804 else
21515593 805 {
5e6c8b64
RH
806 HOST_WIDE_INT addend = 0;
807 rtx tmp;
21515593 808
5e6c8b64
RH
809 /* We did split constant offsets in ia64_expand_move, and we did try
810 to keep them split in move_operand, but we also allowed reload to
811 rematerialize arbitrary constants rather than spill the value to
812 the stack and reload it. So we have to be prepared here to split
813 them apart again. */
814 if (GET_CODE (src) == CONST)
815 {
816 HOST_WIDE_INT hi, lo;
9b7bf67d 817
5e6c8b64
RH
818 hi = INTVAL (XEXP (XEXP (src, 0), 1));
819 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
820 hi = hi - lo;
9b7bf67d 821
5e6c8b64
RH
822 if (lo != 0)
823 {
824 addend = lo;
825 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
826 }
827 }
ae49d6e5
RH
828
829 tmp = gen_rtx_HIGH (Pmode, src);
830 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
831 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
832
5e6c8b64 833 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
ae49d6e5 834 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
5e6c8b64
RH
835
836 if (addend)
837 {
838 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
839 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
840 }
ae49d6e5 841 }
5e6c8b64
RH
842
843 return true;
9b7bf67d 844}
97e242b0 845
e2500fed 846static GTY(()) rtx gen_tls_tga;
7b6e506e 847static rtx
9c808aad 848gen_tls_get_addr (void)
7b6e506e 849{
e2500fed 850 if (!gen_tls_tga)
21515593 851 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 852 return gen_tls_tga;
7b6e506e
RH
853}
854
e2500fed 855static GTY(()) rtx thread_pointer_rtx;
7b6e506e 856static rtx
9c808aad 857gen_thread_pointer (void)
7b6e506e 858{
e2500fed 859 if (!thread_pointer_rtx)
389fdba0 860 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
135ca7b2 861 return thread_pointer_rtx;
7b6e506e
RH
862}
863
21515593 864static rtx
5e6c8b64 865ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
b15b83fb 866 rtx orig_op1, HOST_WIDE_INT addend)
21515593
RH
867{
868 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
b15b83fb 869 rtx orig_op0 = op0;
5e6c8b64
RH
870 HOST_WIDE_INT addend_lo, addend_hi;
871
21515593
RH
872 switch (tls_kind)
873 {
874 case TLS_MODEL_GLOBAL_DYNAMIC:
875 start_sequence ();
876
877 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 878 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
879
880 tga_op2 = gen_reg_rtx (Pmode);
5e6c8b64 881 emit_insn (gen_load_dtprel (tga_op2, op1));
9c808aad 882
21515593
RH
883 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
884 LCT_CONST, Pmode, 2, tga_op1,
885 Pmode, tga_op2, Pmode);
886
887 insns = get_insns ();
888 end_sequence ();
889
0d433a6a
RH
890 if (GET_MODE (op0) != Pmode)
891 op0 = tga_ret;
21515593 892 emit_libcall_block (insns, op0, tga_ret, op1);
0d433a6a 893 break;
21515593
RH
894
895 case TLS_MODEL_LOCAL_DYNAMIC:
896 /* ??? This isn't the completely proper way to do local-dynamic
897 If the call to __tls_get_addr is used only by a single symbol,
898 then we should (somehow) move the dtprel to the second arg
899 to avoid the extra add. */
900 start_sequence ();
901
902 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 903 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
904
905 tga_op2 = const0_rtx;
906
907 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
908 LCT_CONST, Pmode, 2, tga_op1,
909 Pmode, tga_op2, Pmode);
910
911 insns = get_insns ();
912 end_sequence ();
913
914 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
915 UNSPEC_LD_BASE);
916 tmp = gen_reg_rtx (Pmode);
917 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
918
0d433a6a
RH
919 if (!register_operand (op0, Pmode))
920 op0 = gen_reg_rtx (Pmode);
21515593
RH
921 if (TARGET_TLS64)
922 {
0d433a6a
RH
923 emit_insn (gen_load_dtprel (op0, op1));
924 emit_insn (gen_adddi3 (op0, tmp, op0));
21515593
RH
925 }
926 else
5e6c8b64 927 emit_insn (gen_add_dtprel (op0, op1, tmp));
0d433a6a 928 break;
21515593
RH
929
930 case TLS_MODEL_INITIAL_EXEC:
b15b83fb
JJ
931 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
932 addend_hi = addend - addend_lo;
933
5e6c8b64
RH
934 op1 = plus_constant (op1, addend_hi);
935 addend = addend_lo;
936
21515593 937 tmp = gen_reg_rtx (Pmode);
5e6c8b64 938 emit_insn (gen_load_tprel (tmp, op1));
21515593 939
0d433a6a
RH
940 if (!register_operand (op0, Pmode))
941 op0 = gen_reg_rtx (Pmode);
942 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
943 break;
21515593
RH
944
945 case TLS_MODEL_LOCAL_EXEC:
0d433a6a
RH
946 if (!register_operand (op0, Pmode))
947 op0 = gen_reg_rtx (Pmode);
5e6c8b64
RH
948
949 op1 = orig_op1;
950 addend = 0;
21515593
RH
951 if (TARGET_TLS64)
952 {
0d433a6a 953 emit_insn (gen_load_tprel (op0, op1));
5e6c8b64 954 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
21515593
RH
955 }
956 else
5e6c8b64 957 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
0d433a6a 958 break;
21515593
RH
959
960 default:
e820471b 961 gcc_unreachable ();
21515593 962 }
0d433a6a 963
5e6c8b64
RH
964 if (addend)
965 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
966 orig_op0, 1, OPTAB_DIRECT);
0d433a6a
RH
967 if (orig_op0 == op0)
968 return NULL_RTX;
969 if (GET_MODE (orig_op0) == Pmode)
970 return op0;
971 return gen_lowpart (GET_MODE (orig_op0), op0);
21515593
RH
972}
973
7b6e506e 974rtx
9c808aad 975ia64_expand_move (rtx op0, rtx op1)
7b6e506e
RH
976{
977 enum machine_mode mode = GET_MODE (op0);
978
979 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
980 op1 = force_reg (mode, op1);
981
21515593 982 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e 983 {
5e6c8b64 984 HOST_WIDE_INT addend = 0;
7b6e506e 985 enum tls_model tls_kind;
5e6c8b64
RH
986 rtx sym = op1;
987
988 if (GET_CODE (op1) == CONST
989 && GET_CODE (XEXP (op1, 0)) == PLUS
990 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
991 {
992 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
993 sym = XEXP (XEXP (op1, 0), 0);
994 }
995
996 tls_kind = tls_symbolic_operand_type (sym);
997 if (tls_kind)
b15b83fb 998 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
5e6c8b64
RH
999
1000 if (any_offset_symbol_operand (sym, mode))
1001 addend = 0;
1002 else if (aligned_offset_symbol_operand (sym, mode))
1003 {
1004 HOST_WIDE_INT addend_lo, addend_hi;
1005
1006 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1007 addend_hi = addend - addend_lo;
1008
1009 if (addend_lo != 0)
1010 {
1011 op1 = plus_constant (sym, addend_hi);
1012 addend = addend_lo;
1013 }
21e43850
L
1014 else
1015 addend = 0;
5e6c8b64
RH
1016 }
1017 else
1018 op1 = sym;
1019
1020 if (reload_completed)
1021 {
1022 /* We really should have taken care of this offset earlier. */
1023 gcc_assert (addend == 0);
1024 if (ia64_expand_load_address (op0, op1))
1025 return NULL_RTX;
1026 }
21515593 1027
5e6c8b64 1028 if (addend)
7b6e506e 1029 {
b3a13419 1030 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
5e6c8b64
RH
1031
1032 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1033
1034 op1 = expand_simple_binop (mode, PLUS, subtarget,
1035 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1036 if (op0 == op1)
1037 return NULL_RTX;
7b6e506e
RH
1038 }
1039 }
1040
1041 return op1;
1042}
1043
21515593
RH
1044/* Split a move from OP1 to OP0 conditional on COND. */
1045
1046void
9c808aad 1047ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
21515593
RH
1048{
1049 rtx insn, first = get_last_insn ();
1050
1051 emit_move_insn (op0, op1);
1052
1053 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1054 if (INSN_P (insn))
1055 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1056 PATTERN (insn));
1057}
1058
f57fc998 1059/* Split a post-reload TImode or TFmode reference into two DImode
2ffe0e02
ZW
1060 components. This is made extra difficult by the fact that we do
1061 not get any scratch registers to work with, because reload cannot
1062 be prevented from giving us a scratch that overlaps the register
1063 pair involved. So instead, when addressing memory, we tweak the
1064 pointer register up and back down with POST_INCs. Or up and not
1065 back down when we can get away with it.
1066
1067 REVERSED is true when the loads must be done in reversed order
1068 (high word first) for correctness. DEAD is true when the pointer
1069 dies with the second insn we generate and therefore the second
1070 address must not carry a postmodify.
1071
1072 May return an insn which is to be emitted after the moves. */
3f622353 1073
f57fc998 1074static rtx
2ffe0e02 1075ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
3f622353 1076{
2ffe0e02
ZW
1077 rtx fixup = 0;
1078
3f622353
RH
1079 switch (GET_CODE (in))
1080 {
1081 case REG:
2ffe0e02
ZW
1082 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1083 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1084 break;
3f622353
RH
1085
1086 case CONST_INT:
1087 case CONST_DOUBLE:
2ffe0e02 1088 /* Cannot occur reversed. */
e820471b 1089 gcc_assert (!reversed);
2ffe0e02 1090
f57fc998
ZW
1091 if (GET_MODE (in) != TFmode)
1092 split_double (in, &out[0], &out[1]);
1093 else
1094 /* split_double does not understand how to split a TFmode
1095 quantity into a pair of DImode constants. */
1096 {
1097 REAL_VALUE_TYPE r;
1098 unsigned HOST_WIDE_INT p[2];
1099 long l[4]; /* TFmode is 128 bits */
1100
1101 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1102 real_to_target (l, &r, TFmode);
1103
1104 if (FLOAT_WORDS_BIG_ENDIAN)
1105 {
1106 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1107 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1108 }
1109 else
1110 {
1111 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1112 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1113 }
1114 out[0] = GEN_INT (p[0]);
1115 out[1] = GEN_INT (p[1]);
1116 }
2ffe0e02
ZW
1117 break;
1118
1119 case MEM:
1120 {
1121 rtx base = XEXP (in, 0);
1122 rtx offset;
1123
1124 switch (GET_CODE (base))
1125 {
1126 case REG:
1127 if (!reversed)
1128 {
1129 out[0] = adjust_automodify_address
1130 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1131 out[1] = adjust_automodify_address
1132 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1133 }
1134 else
1135 {
1136 /* Reversal requires a pre-increment, which can only
1137 be done as a separate insn. */
1138 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1139 out[0] = adjust_automodify_address
1140 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1141 out[1] = adjust_address (in, DImode, 0);
1142 }
1143 break;
1144
1145 case POST_INC:
e820471b
NS
1146 gcc_assert (!reversed && !dead);
1147
2ffe0e02
ZW
1148 /* Just do the increment in two steps. */
1149 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1150 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1151 break;
1152
1153 case POST_DEC:
e820471b
NS
1154 gcc_assert (!reversed && !dead);
1155
2ffe0e02
ZW
1156 /* Add 8, subtract 24. */
1157 base = XEXP (base, 0);
1158 out[0] = adjust_automodify_address
1159 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1160 out[1] = adjust_automodify_address
1161 (in, DImode,
1162 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1163 8);
1164 break;
1165
1166 case POST_MODIFY:
e820471b
NS
1167 gcc_assert (!reversed && !dead);
1168
2ffe0e02
ZW
1169 /* Extract and adjust the modification. This case is
1170 trickier than the others, because we might have an
1171 index register, or we might have a combined offset that
1172 doesn't fit a signed 9-bit displacement field. We can
1173 assume the incoming expression is already legitimate. */
1174 offset = XEXP (base, 1);
1175 base = XEXP (base, 0);
1176
1177 out[0] = adjust_automodify_address
1178 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1179
1180 if (GET_CODE (XEXP (offset, 1)) == REG)
1181 {
1182 /* Can't adjust the postmodify to match. Emit the
1183 original, then a separate addition insn. */
1184 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1185 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1186 }
2ffe0e02
ZW
1187 else
1188 {
e820471b
NS
1189 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1190 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1191 {
1192 /* Again the postmodify cannot be made to match,
1193 but in this case it's more efficient to get rid
1194 of the postmodify entirely and fix up with an
1195 add insn. */
1196 out[1] = adjust_automodify_address (in, DImode, base, 8);
1197 fixup = gen_adddi3
1198 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1199 }
1200 else
1201 {
1202 /* Combined offset still fits in the displacement field.
1203 (We cannot overflow it at the high end.) */
1204 out[1] = adjust_automodify_address
1205 (in, DImode, gen_rtx_POST_MODIFY
1206 (Pmode, base, gen_rtx_PLUS
1207 (Pmode, base,
1208 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1209 8);
1210 }
2ffe0e02
ZW
1211 }
1212 break;
1213
1214 default:
e820471b 1215 gcc_unreachable ();
2ffe0e02
ZW
1216 }
1217 break;
1218 }
3f622353
RH
1219
1220 default:
e820471b 1221 gcc_unreachable ();
3f622353 1222 }
2ffe0e02
ZW
1223
1224 return fixup;
3f622353
RH
1225}
1226
f57fc998
ZW
1227/* Split a TImode or TFmode move instruction after reload.
1228 This is used by *movtf_internal and *movti_internal. */
1229void
1230ia64_split_tmode_move (rtx operands[])
1231{
2ffe0e02
ZW
1232 rtx in[2], out[2], insn;
1233 rtx fixup[2];
1234 bool dead = false;
1235 bool reversed = false;
1236
1237 /* It is possible for reload to decide to overwrite a pointer with
1238 the value it points to. In that case we have to do the loads in
1239 the appropriate order so that the pointer is not destroyed too
1240 early. Also we must not generate a postmodify for that second
e820471b 1241 load, or rws_access_regno will die. */
2ffe0e02
ZW
1242 if (GET_CODE (operands[1]) == MEM
1243 && reg_overlap_mentioned_p (operands[0], operands[1]))
f57fc998 1244 {
2ffe0e02
ZW
1245 rtx base = XEXP (operands[1], 0);
1246 while (GET_CODE (base) != REG)
1247 base = XEXP (base, 0);
f57fc998 1248
2ffe0e02
ZW
1249 if (REGNO (base) == REGNO (operands[0]))
1250 reversed = true;
1251 dead = true;
1252 }
1253 /* Another reason to do the moves in reversed order is if the first
1254 element of the target register pair is also the second element of
1255 the source register pair. */
1256 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1257 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1258 reversed = true;
1259
1260 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1261 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1262
1263#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1264 if (GET_CODE (EXP) == MEM \
1265 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1266 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1267 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1268 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1269 XEXP (XEXP (EXP, 0), 0), \
1270 REG_NOTES (INSN))
1271
1272 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1273 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1274 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1275
1276 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1277 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1278 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1279
1280 if (fixup[0])
1281 emit_insn (fixup[0]);
1282 if (fixup[1])
1283 emit_insn (fixup[1]);
1284
1285#undef MAYBE_ADD_REG_INC_NOTE
f57fc998
ZW
1286}
1287
02befdf4 1288/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
3f622353
RH
1289 through memory plus an extra GR scratch register. Except that you can
1290 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1291 SECONDARY_RELOAD_CLASS, but not both.
1292
1293 We got into problems in the first place by allowing a construct like
02befdf4 1294 (subreg:XF (reg:TI)), which we got from a union containing a long double.
f5143c46 1295 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1296 we see something like the above, we spill the inner register to memory. */
1297
4de67c26
JM
1298static rtx
1299spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
3f622353
RH
1300{
1301 if (GET_CODE (in) == SUBREG
1302 && GET_MODE (SUBREG_REG (in)) == TImode
1303 && GET_CODE (SUBREG_REG (in)) == REG)
1304 {
68d22aa5
RH
1305 rtx memt = assign_stack_temp (TImode, 16, 0);
1306 emit_move_insn (memt, SUBREG_REG (in));
4de67c26 1307 return adjust_address (memt, mode, 0);
3f622353
RH
1308 }
1309 else if (force && GET_CODE (in) == REG)
1310 {
4de67c26 1311 rtx memx = assign_stack_temp (mode, 16, 0);
68d22aa5
RH
1312 emit_move_insn (memx, in);
1313 return memx;
3f622353 1314 }
3f622353
RH
1315 else
1316 return in;
1317}
f2f90c63 1318
4de67c26
JM
1319/* Expand the movxf or movrf pattern (MODE says which) with the given
1320 OPERANDS, returning true if the pattern should then invoke
1321 DONE. */
1322
1323bool
1324ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1325{
1326 rtx op0 = operands[0];
1327
1328 if (GET_CODE (op0) == SUBREG)
1329 op0 = SUBREG_REG (op0);
1330
1331 /* We must support XFmode loads into general registers for stdarg/vararg,
1332 unprototyped calls, and a rare case where a long double is passed as
1333 an argument after a float HFA fills the FP registers. We split them into
1334 DImode loads for convenience. We also need to support XFmode stores
1335 for the last case. This case does not happen for stdarg/vararg routines,
1336 because we do a block store to memory of unnamed arguments. */
1337
1338 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1339 {
1340 rtx out[2];
1341
1342 /* We're hoping to transform everything that deals with XFmode
1343 quantities and GR registers early in the compiler. */
b3a13419 1344 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1345
1346 /* Struct to register can just use TImode instead. */
1347 if ((GET_CODE (operands[1]) == SUBREG
1348 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1349 || (GET_CODE (operands[1]) == REG
1350 && GR_REGNO_P (REGNO (operands[1]))))
1351 {
1352 rtx op1 = operands[1];
1353
1354 if (GET_CODE (op1) == SUBREG)
1355 op1 = SUBREG_REG (op1);
1356 else
1357 op1 = gen_rtx_REG (TImode, REGNO (op1));
1358
1359 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1360 return true;
1361 }
1362
1363 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1364 {
ae4d3291 1365 /* Don't word-swap when reading in the constant. */
4de67c26 1366 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
ae4d3291
JW
1367 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1368 0, mode));
4de67c26 1369 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
ae4d3291
JW
1370 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1371 0, mode));
4de67c26
JM
1372 return true;
1373 }
1374
1375 /* If the quantity is in a register not known to be GR, spill it. */
1376 if (register_operand (operands[1], mode))
1377 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1378
1379 gcc_assert (GET_CODE (operands[1]) == MEM);
1380
ae4d3291
JW
1381 /* Don't word-swap when reading in the value. */
1382 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1383 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
4de67c26
JM
1384
1385 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1386 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1387 return true;
1388 }
1389
1390 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1391 {
1392 /* We're hoping to transform everything that deals with XFmode
1393 quantities and GR registers early in the compiler. */
b3a13419 1394 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1395
1396 /* Op0 can't be a GR_REG here, as that case is handled above.
1397 If op0 is a register, then we spill op1, so that we now have a
1398 MEM operand. This requires creating an XFmode subreg of a TImode reg
1399 to force the spill. */
1400 if (register_operand (operands[0], mode))
1401 {
1402 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1403 op1 = gen_rtx_SUBREG (mode, op1, 0);
1404 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1405 }
1406
1407 else
1408 {
1409 rtx in[2];
1410
ae4d3291
JW
1411 gcc_assert (GET_CODE (operands[0]) == MEM);
1412
1413 /* Don't word-swap when writing out the value. */
1414 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1415 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
4de67c26
JM
1416
1417 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1418 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1419 return true;
1420 }
1421 }
1422
1423 if (!reload_in_progress && !reload_completed)
1424 {
1425 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1426
1427 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1428 {
1429 rtx memt, memx, in = operands[1];
1430 if (CONSTANT_P (in))
1431 in = validize_mem (force_const_mem (mode, in));
1432 if (GET_CODE (in) == MEM)
1433 memt = adjust_address (in, TImode, 0);
1434 else
1435 {
1436 memt = assign_stack_temp (TImode, 16, 0);
1437 memx = adjust_address (memt, mode, 0);
1438 emit_move_insn (memx, in);
1439 }
1440 emit_move_insn (op0, memt);
1441 return true;
1442 }
1443
1444 if (!ia64_move_ok (operands[0], operands[1]))
1445 operands[1] = force_reg (mode, operands[1]);
1446 }
1447
1448 return false;
1449}
1450
f2f90c63
RH
1451/* Emit comparison instruction if necessary, returning the expression
1452 that holds the compare result in the proper mode. */
1453
24ea7948
ZW
1454static GTY(()) rtx cmptf_libfunc;
1455
f2f90c63 1456rtx
9c808aad 1457ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
f2f90c63
RH
1458{
1459 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1460 rtx cmp;
1461
1462 /* If we have a BImode input, then we already have a compare result, and
1463 do not need to emit another comparison. */
1464 if (GET_MODE (op0) == BImode)
1465 {
e820471b
NS
1466 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1467 cmp = op0;
f2f90c63 1468 }
24ea7948
ZW
1469 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1470 magic number as its third argument, that indicates what to do.
1471 The return value is an integer to be compared against zero. */
6bc709c1 1472 else if (GET_MODE (op0) == TFmode)
24ea7948
ZW
1473 {
1474 enum qfcmp_magic {
1475 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1476 QCMP_UNORD = 2,
1477 QCMP_EQ = 4,
1478 QCMP_LT = 8,
1479 QCMP_GT = 16
1480 } magic;
1481 enum rtx_code ncode;
1482 rtx ret, insns;
e820471b
NS
1483
1484 gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
24ea7948
ZW
1485 switch (code)
1486 {
1487 /* 1 = equal, 0 = not equal. Equality operators do
1488 not raise FP_INVALID when given an SNaN operand. */
1489 case EQ: magic = QCMP_EQ; ncode = NE; break;
1490 case NE: magic = QCMP_EQ; ncode = EQ; break;
1491 /* isunordered() from C99. */
1492 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
b1346fa3 1493 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
24ea7948
ZW
1494 /* Relational operators raise FP_INVALID when given
1495 an SNaN operand. */
1496 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1497 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1498 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1499 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1500 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1501 Expanders for buneq etc. weuld have to be added to ia64.md
1502 for this to be useful. */
e820471b 1503 default: gcc_unreachable ();
24ea7948
ZW
1504 }
1505
1506 start_sequence ();
1507
1508 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1509 op0, TFmode, op1, TFmode,
1510 GEN_INT (magic), DImode);
1511 cmp = gen_reg_rtx (BImode);
1512 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1513 gen_rtx_fmt_ee (ncode, BImode,
1514 ret, const0_rtx)));
1515
1516 insns = get_insns ();
1517 end_sequence ();
1518
1519 emit_libcall_block (insns, cmp, cmp,
1520 gen_rtx_fmt_ee (code, BImode, op0, op1));
1521 code = NE;
1522 }
f2f90c63
RH
1523 else
1524 {
1525 cmp = gen_reg_rtx (BImode);
1526 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1527 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1528 code = NE;
1529 }
1530
1531 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1532}
2ed4af6f 1533
e934ca47
RH
1534/* Generate an integral vector comparison. Return true if the condition has
1535 been reversed, and so the sense of the comparison should be inverted. */
f61134e8
RH
1536
1537static bool
1538ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1539 rtx dest, rtx op0, rtx op1)
1540{
1541 bool negate = false;
1542 rtx x;
1543
e934ca47 1544 /* Canonicalize the comparison to EQ, GT, GTU. */
f61134e8
RH
1545 switch (code)
1546 {
1547 case EQ:
1548 case GT:
e934ca47 1549 case GTU:
f61134e8
RH
1550 break;
1551
1552 case NE:
f61134e8 1553 case LE:
e934ca47
RH
1554 case LEU:
1555 code = reverse_condition (code);
f61134e8
RH
1556 negate = true;
1557 break;
1558
1559 case GE:
e934ca47
RH
1560 case GEU:
1561 code = reverse_condition (code);
f61134e8
RH
1562 negate = true;
1563 /* FALLTHRU */
1564
1565 case LT:
f61134e8 1566 case LTU:
e934ca47
RH
1567 code = swap_condition (code);
1568 x = op0, op0 = op1, op1 = x;
1569 break;
f61134e8 1570
e934ca47
RH
1571 default:
1572 gcc_unreachable ();
1573 }
f61134e8 1574
e934ca47 1575 /* Unsigned parallel compare is not supported by the hardware. Play some
6283ba26 1576 tricks to turn this into a signed comparison against 0. */
e934ca47
RH
1577 if (code == GTU)
1578 {
1579 switch (mode)
1580 {
1581 case V2SImode:
f61134e8 1582 {
e934ca47
RH
1583 rtx t1, t2, mask;
1584
1585 /* Perform a parallel modulo subtraction. */
1586 t1 = gen_reg_rtx (V2SImode);
1587 emit_insn (gen_subv2si3 (t1, op0, op1));
1588
1589 /* Extract the original sign bit of op0. */
1590 mask = GEN_INT (-0x80000000);
1591 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1592 mask = force_reg (V2SImode, mask);
1593 t2 = gen_reg_rtx (V2SImode);
1594 emit_insn (gen_andv2si3 (t2, op0, mask));
1595
1596 /* XOR it back into the result of the subtraction. This results
1597 in the sign bit set iff we saw unsigned underflow. */
1598 x = gen_reg_rtx (V2SImode);
1599 emit_insn (gen_xorv2si3 (x, t1, t2));
6283ba26
RH
1600
1601 code = GT;
1602 op0 = x;
1603 op1 = CONST0_RTX (mode);
f61134e8 1604 }
e934ca47
RH
1605 break;
1606
1607 case V8QImode:
1608 case V4HImode:
1609 /* Perform a parallel unsigned saturating subtraction. */
1610 x = gen_reg_rtx (mode);
1611 emit_insn (gen_rtx_SET (VOIDmode, x,
1612 gen_rtx_US_MINUS (mode, op0, op1)));
6283ba26
RH
1613
1614 code = EQ;
1615 op0 = x;
1616 op1 = CONST0_RTX (mode);
1617 negate = !negate;
e934ca47
RH
1618 break;
1619
1620 default:
1621 gcc_unreachable ();
1622 }
f61134e8
RH
1623 }
1624
1625 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1626 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1627
1628 return negate;
1629}
1630
f61134e8
RH
1631/* Emit an integral vector conditional move. */
1632
1633void
1634ia64_expand_vecint_cmov (rtx operands[])
1635{
1636 enum machine_mode mode = GET_MODE (operands[0]);
1637 enum rtx_code code = GET_CODE (operands[3]);
1638 bool negate;
1639 rtx cmp, x, ot, of;
1640
f61134e8
RH
1641 cmp = gen_reg_rtx (mode);
1642 negate = ia64_expand_vecint_compare (code, mode, cmp,
1643 operands[4], operands[5]);
1644
1645 ot = operands[1+negate];
1646 of = operands[2-negate];
1647
1648 if (ot == CONST0_RTX (mode))
1649 {
1650 if (of == CONST0_RTX (mode))
1651 {
1652 emit_move_insn (operands[0], ot);
1653 return;
1654 }
1655
1656 x = gen_rtx_NOT (mode, cmp);
1657 x = gen_rtx_AND (mode, x, of);
1658 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1659 }
1660 else if (of == CONST0_RTX (mode))
1661 {
1662 x = gen_rtx_AND (mode, cmp, ot);
1663 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1664 }
1665 else
1666 {
1667 rtx t, f;
1668
1669 t = gen_reg_rtx (mode);
1670 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1671 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1672
1673 f = gen_reg_rtx (mode);
1674 x = gen_rtx_NOT (mode, cmp);
1675 x = gen_rtx_AND (mode, x, operands[2-negate]);
1676 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1677
1678 x = gen_rtx_IOR (mode, t, f);
1679 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1680 }
1681}
1682
1683/* Emit an integral vector min or max operation. Return true if all done. */
1684
1685bool
1686ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1687 rtx operands[])
1688{
cabddb23 1689 rtx xops[6];
f61134e8
RH
1690
1691 /* These four combinations are supported directly. */
1692 if (mode == V8QImode && (code == UMIN || code == UMAX))
1693 return false;
1694 if (mode == V4HImode && (code == SMIN || code == SMAX))
1695 return false;
1696
93b4080b
RH
1697 /* This combination can be implemented with only saturating subtraction. */
1698 if (mode == V4HImode && code == UMAX)
1699 {
1700 rtx x, tmp = gen_reg_rtx (mode);
1701
1702 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1703 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1704
1705 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1706 return true;
1707 }
1708
f61134e8
RH
1709 /* Everything else implemented via vector comparisons. */
1710 xops[0] = operands[0];
1711 xops[4] = xops[1] = operands[1];
1712 xops[5] = xops[2] = operands[2];
1713
1714 switch (code)
1715 {
1716 case UMIN:
1717 code = LTU;
1718 break;
1719 case UMAX:
1720 code = GTU;
1721 break;
1722 case SMIN:
1723 code = LT;
1724 break;
1725 case SMAX:
1726 code = GT;
1727 break;
1728 default:
e820471b 1729 gcc_unreachable ();
f61134e8
RH
1730 }
1731 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1732
1733 ia64_expand_vecint_cmov (xops);
1734 return true;
1735}
1736
e898620c
RH
1737/* Emit an integral vector widening sum operations. */
1738
1739void
1740ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1741{
1742 rtx l, h, x, s;
1743 enum machine_mode wmode, mode;
1744 rtx (*unpack_l) (rtx, rtx, rtx);
1745 rtx (*unpack_h) (rtx, rtx, rtx);
1746 rtx (*plus) (rtx, rtx, rtx);
1747
1748 wmode = GET_MODE (operands[0]);
1749 mode = GET_MODE (operands[1]);
1750
1751 switch (mode)
1752 {
1753 case V8QImode:
1754 unpack_l = gen_unpack1_l;
1755 unpack_h = gen_unpack1_h;
1756 plus = gen_addv4hi3;
1757 break;
1758 case V4HImode:
1759 unpack_l = gen_unpack2_l;
1760 unpack_h = gen_unpack2_h;
1761 plus = gen_addv2si3;
1762 break;
1763 default:
1764 gcc_unreachable ();
1765 }
1766
1767 /* Fill in x with the sign extension of each element in op1. */
1768 if (unsignedp)
1769 x = CONST0_RTX (mode);
1770 else
1771 {
1772 bool neg;
1773
1774 x = gen_reg_rtx (mode);
1775
1776 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1777 CONST0_RTX (mode));
1778 gcc_assert (!neg);
1779 }
1780
1781 l = gen_reg_rtx (wmode);
1782 h = gen_reg_rtx (wmode);
1783 s = gen_reg_rtx (wmode);
1784
1785 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1786 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1787 emit_insn (plus (s, l, operands[2]));
1788 emit_insn (plus (operands[0], h, s));
1789}
1790
1791/* Emit a signed or unsigned V8QI dot product operation. */
1792
1793void
1794ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1795{
1796 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1797
1798 /* Fill in x1 and x2 with the sign extension of each element. */
1799 if (unsignedp)
1800 x1 = x2 = CONST0_RTX (V8QImode);
1801 else
1802 {
1803 bool neg;
1804
1805 x1 = gen_reg_rtx (V8QImode);
1806 x2 = gen_reg_rtx (V8QImode);
1807
1808 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1809 CONST0_RTX (V8QImode));
1810 gcc_assert (!neg);
1811 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1812 CONST0_RTX (V8QImode));
1813 gcc_assert (!neg);
1814 }
1815
1816 l1 = gen_reg_rtx (V4HImode);
1817 l2 = gen_reg_rtx (V4HImode);
1818 h1 = gen_reg_rtx (V4HImode);
1819 h2 = gen_reg_rtx (V4HImode);
1820
1821 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1822 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1823 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1824 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1825
1826 p1 = gen_reg_rtx (V2SImode);
1827 p2 = gen_reg_rtx (V2SImode);
1828 p3 = gen_reg_rtx (V2SImode);
1829 p4 = gen_reg_rtx (V2SImode);
1830 emit_insn (gen_pmpy2_r (p1, l1, l2));
1831 emit_insn (gen_pmpy2_l (p2, l1, l2));
1832 emit_insn (gen_pmpy2_r (p3, h1, h2));
1833 emit_insn (gen_pmpy2_l (p4, h1, h2));
1834
1835 s1 = gen_reg_rtx (V2SImode);
1836 s2 = gen_reg_rtx (V2SImode);
1837 s3 = gen_reg_rtx (V2SImode);
1838 emit_insn (gen_addv2si3 (s1, p1, p2));
1839 emit_insn (gen_addv2si3 (s2, p3, p4));
1840 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1841 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1842}
1843
2ed4af6f
RH
1844/* Emit the appropriate sequence for a call. */
1845
1846void
9c808aad
AJ
1847ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1848 int sibcall_p)
2ed4af6f 1849{
599aedd9 1850 rtx insn, b0;
2ed4af6f
RH
1851
1852 addr = XEXP (addr, 0);
c8083186 1853 addr = convert_memory_address (DImode, addr);
2ed4af6f 1854 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 1855
599aedd9 1856 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
1857 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1858 {
1859 if (sibcall_p)
599aedd9 1860 insn = gen_sibcall_nogp (addr);
2ed4af6f 1861 else if (! retval)
599aedd9 1862 insn = gen_call_nogp (addr, b0);
2ed4af6f 1863 else
599aedd9
RH
1864 insn = gen_call_value_nogp (retval, addr, b0);
1865 insn = emit_call_insn (insn);
2ed4af6f 1866 }
2ed4af6f 1867 else
599aedd9
RH
1868 {
1869 if (sibcall_p)
1870 insn = gen_sibcall_gp (addr);
1871 else if (! retval)
1872 insn = gen_call_gp (addr, b0);
1873 else
1874 insn = gen_call_value_gp (retval, addr, b0);
1875 insn = emit_call_insn (insn);
2ed4af6f 1876
599aedd9
RH
1877 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1878 }
6dad5a56 1879
599aedd9 1880 if (sibcall_p)
4e14f1f9 1881 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
599aedd9
RH
1882}
1883
6fb5fa3c
DB
1884static void
1885reg_emitted (enum ia64_frame_regs r)
1886{
1887 if (emitted_frame_related_regs[r] == 0)
1888 emitted_frame_related_regs[r] = current_frame_info.r[r];
1889 else
1890 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
1891}
1892
1893static int
1894get_reg (enum ia64_frame_regs r)
1895{
1896 reg_emitted (r);
1897 return current_frame_info.r[r];
1898}
1899
1900static bool
1901is_emitted (int regno)
1902{
1903 enum ia64_frame_regs r;
1904
1905 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
1906 if (emitted_frame_related_regs[r] == regno)
1907 return true;
1908 return false;
1909}
1910
599aedd9 1911void
9c808aad 1912ia64_reload_gp (void)
599aedd9
RH
1913{
1914 rtx tmp;
1915
6fb5fa3c
DB
1916 if (current_frame_info.r[reg_save_gp])
1917 {
1918 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
1919 }
2ed4af6f 1920 else
599aedd9
RH
1921 {
1922 HOST_WIDE_INT offset;
13f70342 1923 rtx offset_r;
599aedd9
RH
1924
1925 offset = (current_frame_info.spill_cfa_off
1926 + current_frame_info.spill_size);
1927 if (frame_pointer_needed)
1928 {
1929 tmp = hard_frame_pointer_rtx;
1930 offset = -offset;
1931 }
1932 else
1933 {
1934 tmp = stack_pointer_rtx;
1935 offset = current_frame_info.total_size - offset;
1936 }
1937
13f70342
RH
1938 offset_r = GEN_INT (offset);
1939 if (satisfies_constraint_I (offset_r))
1940 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
599aedd9
RH
1941 else
1942 {
13f70342 1943 emit_move_insn (pic_offset_table_rtx, offset_r);
599aedd9
RH
1944 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1945 pic_offset_table_rtx, tmp));
1946 }
1947
1948 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1949 }
1950
1951 emit_move_insn (pic_offset_table_rtx, tmp);
1952}
1953
1954void
9c808aad
AJ
1955ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1956 rtx scratch_b, int noreturn_p, int sibcall_p)
599aedd9
RH
1957{
1958 rtx insn;
1959 bool is_desc = false;
1960
1961 /* If we find we're calling through a register, then we're actually
1962 calling through a descriptor, so load up the values. */
4e14f1f9 1963 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
599aedd9
RH
1964 {
1965 rtx tmp;
1966 bool addr_dead_p;
1967
1968 /* ??? We are currently constrained to *not* use peep2, because
2a43945f 1969 we can legitimately change the global lifetime of the GP
9c808aad 1970 (in the form of killing where previously live). This is
599aedd9
RH
1971 because a call through a descriptor doesn't use the previous
1972 value of the GP, while a direct call does, and we do not
1973 commit to either form until the split here.
1974
1975 That said, this means that we lack precise life info for
1976 whether ADDR is dead after this call. This is not terribly
1977 important, since we can fix things up essentially for free
1978 with the POST_DEC below, but it's nice to not use it when we
1979 can immediately tell it's not necessary. */
1980 addr_dead_p = ((noreturn_p || sibcall_p
1981 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1982 REGNO (addr)))
1983 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1984
1985 /* Load the code address into scratch_b. */
1986 tmp = gen_rtx_POST_INC (Pmode, addr);
1987 tmp = gen_rtx_MEM (Pmode, tmp);
1988 emit_move_insn (scratch_r, tmp);
1989 emit_move_insn (scratch_b, scratch_r);
1990
1991 /* Load the GP address. If ADDR is not dead here, then we must
1992 revert the change made above via the POST_INCREMENT. */
1993 if (!addr_dead_p)
1994 tmp = gen_rtx_POST_DEC (Pmode, addr);
1995 else
1996 tmp = addr;
1997 tmp = gen_rtx_MEM (Pmode, tmp);
1998 emit_move_insn (pic_offset_table_rtx, tmp);
1999
2000 is_desc = true;
2001 addr = scratch_b;
2002 }
2ed4af6f 2003
6dad5a56 2004 if (sibcall_p)
599aedd9
RH
2005 insn = gen_sibcall_nogp (addr);
2006 else if (retval)
2007 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 2008 else
599aedd9 2009 insn = gen_call_nogp (addr, retaddr);
6dad5a56 2010 emit_call_insn (insn);
2ed4af6f 2011
599aedd9
RH
2012 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2013 ia64_reload_gp ();
2ed4af6f 2014}
16df4ee6
RH
2015
2016/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2017
2018 This differs from the generic code in that we know about the zero-extending
2019 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2020 also know that ld.acq+cmpxchg.rel equals a full barrier.
2021
2022 The loop we want to generate looks like
2023
2024 cmp_reg = mem;
2025 label:
2026 old_reg = cmp_reg;
2027 new_reg = cmp_reg op val;
2028 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2029 if (cmp_reg != old_reg)
2030 goto label;
2031
2032 Note that we only do the plain load from memory once. Subsequent
2033 iterations use the value loaded by the compare-and-swap pattern. */
2034
2035void
2036ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2037 rtx old_dst, rtx new_dst)
2038{
2039 enum machine_mode mode = GET_MODE (mem);
2040 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2041 enum insn_code icode;
2042
2043 /* Special case for using fetchadd. */
dca13767
JJ
2044 if ((mode == SImode || mode == DImode)
2045 && (code == PLUS || code == MINUS)
2046 && fetchadd_operand (val, mode))
16df4ee6 2047 {
dca13767
JJ
2048 if (code == MINUS)
2049 val = GEN_INT (-INTVAL (val));
2050
16df4ee6
RH
2051 if (!old_dst)
2052 old_dst = gen_reg_rtx (mode);
2053
2054 emit_insn (gen_memory_barrier ());
2055
2056 if (mode == SImode)
2057 icode = CODE_FOR_fetchadd_acq_si;
2058 else
2059 icode = CODE_FOR_fetchadd_acq_di;
2060 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2061
2062 if (new_dst)
2063 {
2064 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2065 true, OPTAB_WIDEN);
2066 if (new_reg != new_dst)
2067 emit_move_insn (new_dst, new_reg);
2068 }
2069 return;
2070 }
2071
2072 /* Because of the volatile mem read, we get an ld.acq, which is the
2073 front half of the full barrier. The end half is the cmpxchg.rel. */
2074 gcc_assert (MEM_VOLATILE_P (mem));
2075
2076 old_reg = gen_reg_rtx (DImode);
2077 cmp_reg = gen_reg_rtx (DImode);
2078 label = gen_label_rtx ();
2079
2080 if (mode != DImode)
2081 {
2082 val = simplify_gen_subreg (DImode, val, mode, 0);
2083 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2084 }
2085 else
2086 emit_move_insn (cmp_reg, mem);
2087
2088 emit_label (label);
2089
2090 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2091 emit_move_insn (old_reg, cmp_reg);
2092 emit_move_insn (ar_ccv, cmp_reg);
2093
2094 if (old_dst)
2095 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2096
2097 new_reg = cmp_reg;
2098 if (code == NOT)
2099 {
2100 new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
2101 code = AND;
2102 }
2103 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2104 true, OPTAB_DIRECT);
2105
2106 if (mode != DImode)
2107 new_reg = gen_lowpart (mode, new_reg);
2108 if (new_dst)
2109 emit_move_insn (new_dst, new_reg);
2110
2111 switch (mode)
2112 {
2113 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2114 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2115 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2116 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2117 default:
2118 gcc_unreachable ();
2119 }
2120
2121 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2122
6819a463 2123 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
16df4ee6 2124}
809d4ef1 2125\f
3b572406
RH
2126/* Begin the assembly file. */
2127
1bc7c5b6 2128static void
9c808aad 2129ia64_file_start (void)
1bc7c5b6 2130{
0f666d6e
JJ
2131 /* Variable tracking should be run after all optimizations which change order
2132 of insns. It also needs a valid CFG. This can't be done in
ee30710d 2133 ia64_override_options, because flag_var_tracking is finalized after
0f666d6e
JJ
2134 that. */
2135 ia64_flag_var_tracking = flag_var_tracking;
2136 flag_var_tracking = 0;
2137
1bc7c5b6
ZW
2138 default_file_start ();
2139 emit_safe_across_calls ();
2140}
2141
3b572406 2142void
9c808aad 2143emit_safe_across_calls (void)
3b572406
RH
2144{
2145 unsigned int rs, re;
2146 int out_state;
2147
2148 rs = 1;
2149 out_state = 0;
2150 while (1)
2151 {
2152 while (rs < 64 && call_used_regs[PR_REG (rs)])
2153 rs++;
2154 if (rs >= 64)
2155 break;
2156 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2157 continue;
2158 if (out_state == 0)
2159 {
1bc7c5b6 2160 fputs ("\t.pred.safe_across_calls ", asm_out_file);
3b572406
RH
2161 out_state = 1;
2162 }
2163 else
1bc7c5b6 2164 fputc (',', asm_out_file);
3b572406 2165 if (re == rs + 1)
1bc7c5b6 2166 fprintf (asm_out_file, "p%u", rs);
3b572406 2167 else
1bc7c5b6 2168 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
3b572406
RH
2169 rs = re + 1;
2170 }
2171 if (out_state)
1bc7c5b6 2172 fputc ('\n', asm_out_file);
3b572406
RH
2173}
2174
812b587e
SE
2175/* Globalize a declaration. */
2176
2177static void
2178ia64_globalize_decl_name (FILE * stream, tree decl)
2179{
2180 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2181 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2182 if (version_attr)
2183 {
2184 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2185 const char *p = TREE_STRING_POINTER (v);
2186 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2187 }
2188 targetm.asm_out.globalize_label (stream, name);
2189 if (TREE_CODE (decl) == FUNCTION_DECL)
2190 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2191}
2192
97e242b0
RH
2193/* Helper function for ia64_compute_frame_size: find an appropriate general
2194 register to spill some special register to. SPECIAL_SPILL_MASK contains
2195 bits in GR0 to GR31 that have already been allocated by this routine.
2196 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 2197
97e242b0 2198static int
6fb5fa3c 2199find_gr_spill (enum ia64_frame_regs r, int try_locals)
97e242b0
RH
2200{
2201 int regno;
2202
6fb5fa3c
DB
2203 if (emitted_frame_related_regs[r] != 0)
2204 {
2205 regno = emitted_frame_related_regs[r];
2206 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed))
2207 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2208 else if (current_function_is_leaf
2209 && regno >= GR_REG (1) && regno <= GR_REG (31))
2210 current_frame_info.gr_used_mask |= 1 << regno;
2211
2212 return regno;
2213 }
2214
97e242b0
RH
2215 /* If this is a leaf function, first try an otherwise unused
2216 call-clobbered register. */
2217 if (current_function_is_leaf)
2218 {
2219 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2220 if (! df_regs_ever_live_p (regno)
97e242b0
RH
2221 && call_used_regs[regno]
2222 && ! fixed_regs[regno]
2223 && ! global_regs[regno]
6fb5fa3c
DB
2224 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2225 && ! is_emitted (regno))
97e242b0
RH
2226 {
2227 current_frame_info.gr_used_mask |= 1 << regno;
2228 return regno;
2229 }
2230 }
2231
2232 if (try_locals)
2233 {
2234 regno = current_frame_info.n_local_regs;
9502c558
JW
2235 /* If there is a frame pointer, then we can't use loc79, because
2236 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2237 reg_name switching code in ia64_expand_prologue. */
2238 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
2239 {
2240 current_frame_info.n_local_regs = regno + 1;
2241 return LOC_REG (0) + regno;
2242 }
2243 }
2244
2245 /* Failed to find a general register to spill to. Must use stack. */
2246 return 0;
2247}
2248
2249/* In order to make for nice schedules, we try to allocate every temporary
2250 to a different register. We must of course stay away from call-saved,
2251 fixed, and global registers. We must also stay away from registers
2252 allocated in current_frame_info.gr_used_mask, since those include regs
2253 used all through the prologue.
2254
2255 Any register allocated here must be used immediately. The idea is to
2256 aid scheduling, not to solve data flow problems. */
2257
2258static int last_scratch_gr_reg;
2259
2260static int
9c808aad 2261next_scratch_gr_reg (void)
97e242b0
RH
2262{
2263 int i, regno;
2264
2265 for (i = 0; i < 32; ++i)
2266 {
2267 regno = (last_scratch_gr_reg + i + 1) & 31;
2268 if (call_used_regs[regno]
2269 && ! fixed_regs[regno]
2270 && ! global_regs[regno]
2271 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2272 {
2273 last_scratch_gr_reg = regno;
2274 return regno;
2275 }
2276 }
2277
2278 /* There must be _something_ available. */
e820471b 2279 gcc_unreachable ();
97e242b0
RH
2280}
2281
2282/* Helper function for ia64_compute_frame_size, called through
2283 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2284
2285static void
9c808aad 2286mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
c65ebc55 2287{
97e242b0
RH
2288 unsigned int regno = REGNO (reg);
2289 if (regno < 32)
f95e79cc 2290 {
c8b622ff 2291 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
f95e79cc
RH
2292 for (i = 0; i < n; ++i)
2293 current_frame_info.gr_used_mask |= 1 << (regno + i);
2294 }
c65ebc55
JW
2295}
2296
6fb5fa3c 2297
c65ebc55
JW
2298/* Returns the number of bytes offset between the frame pointer and the stack
2299 pointer for the current function. SIZE is the number of bytes of space
2300 needed for local variables. */
97e242b0
RH
2301
2302static void
9c808aad 2303ia64_compute_frame_size (HOST_WIDE_INT size)
c65ebc55 2304{
97e242b0
RH
2305 HOST_WIDE_INT total_size;
2306 HOST_WIDE_INT spill_size = 0;
2307 HOST_WIDE_INT extra_spill_size = 0;
2308 HOST_WIDE_INT pretend_args_size;
c65ebc55 2309 HARD_REG_SET mask;
97e242b0
RH
2310 int n_spilled = 0;
2311 int spilled_gr_p = 0;
2312 int spilled_fr_p = 0;
2313 unsigned int regno;
2314 int i;
c65ebc55 2315
97e242b0
RH
2316 if (current_frame_info.initialized)
2317 return;
294dac80 2318
97e242b0 2319 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
2320 CLEAR_HARD_REG_SET (mask);
2321
97e242b0
RH
2322 /* Don't allocate scratches to the return register. */
2323 diddle_return_value (mark_reg_gr_used_mask, NULL);
2324
2325 /* Don't allocate scratches to the EH scratch registers. */
2326 if (cfun->machine->ia64_eh_epilogue_sp)
2327 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2328 if (cfun->machine->ia64_eh_epilogue_bsp)
2329 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 2330
97e242b0
RH
2331 /* Find the size of the register stack frame. We have only 80 local
2332 registers, because we reserve 8 for the inputs and 8 for the
2333 outputs. */
2334
2335 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2336 since we'll be adjusting that down later. */
2337 regno = LOC_REG (78) + ! frame_pointer_needed;
2338 for (; regno >= LOC_REG (0); regno--)
6fb5fa3c 2339 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
97e242b0
RH
2340 break;
2341 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 2342
3f67ac08
DM
2343 /* For functions marked with the syscall_linkage attribute, we must mark
2344 all eight input registers as in use, so that locals aren't visible to
2345 the caller. */
2346
2347 if (cfun->machine->n_varargs > 0
2348 || lookup_attribute ("syscall_linkage",
2349 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
2350 current_frame_info.n_input_regs = 8;
2351 else
2352 {
2353 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
6fb5fa3c 2354 if (df_regs_ever_live_p (regno))
97e242b0
RH
2355 break;
2356 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2357 }
2358
2359 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
6fb5fa3c 2360 if (df_regs_ever_live_p (regno))
97e242b0
RH
2361 break;
2362 i = regno - OUT_REG (0) + 1;
2363
d26afa4f 2364#ifndef PROFILE_HOOK
97e242b0 2365 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 2366 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
2367 profiling, we need two output registers for the two bb_init_trace_func
2368 arguments. */
70f4f91c 2369 if (current_function_profile)
97e242b0 2370 i = MAX (i, 1);
d26afa4f 2371#endif
97e242b0
RH
2372 current_frame_info.n_output_regs = i;
2373
2374 /* ??? No rotating register support yet. */
2375 current_frame_info.n_rotate_regs = 0;
2376
2377 /* Discover which registers need spilling, and how much room that
9c808aad 2378 will take. Begin with floating point and general registers,
97e242b0
RH
2379 which will always wind up on the stack. */
2380
2381 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
6fb5fa3c 2382 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2383 {
2384 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2385 spill_size += 16;
2386 n_spilled += 1;
2387 spilled_fr_p = 1;
c65ebc55
JW
2388 }
2389
97e242b0 2390 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2391 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2392 {
2393 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2394 spill_size += 8;
2395 n_spilled += 1;
2396 spilled_gr_p = 1;
c65ebc55
JW
2397 }
2398
97e242b0 2399 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
6fb5fa3c 2400 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2401 {
2402 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2403 spill_size += 8;
2404 n_spilled += 1;
c65ebc55
JW
2405 }
2406
97e242b0
RH
2407 /* Now come all special registers that might get saved in other
2408 general registers. */
9c808aad 2409
97e242b0
RH
2410 if (frame_pointer_needed)
2411 {
6fb5fa3c 2412 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
0c35f902
JW
2413 /* If we did not get a register, then we take LOC79. This is guaranteed
2414 to be free, even if regs_ever_live is already set, because this is
2415 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2416 as we don't count loc79 above. */
6fb5fa3c 2417 if (current_frame_info.r[reg_fp] == 0)
0c35f902 2418 {
6fb5fa3c
DB
2419 current_frame_info.r[reg_fp] = LOC_REG (79);
2420 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
0c35f902 2421 }
97e242b0
RH
2422 }
2423
2424 if (! current_function_is_leaf)
c65ebc55 2425 {
97e242b0
RH
2426 /* Emit a save of BR0 if we call other functions. Do this even
2427 if this function doesn't return, as EH depends on this to be
2428 able to unwind the stack. */
2429 SET_HARD_REG_BIT (mask, BR_REG (0));
2430
6fb5fa3c
DB
2431 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2432 if (current_frame_info.r[reg_save_b0] == 0)
97e242b0 2433 {
ae1e2d4c 2434 extra_spill_size += 8;
97e242b0
RH
2435 n_spilled += 1;
2436 }
2437
2438 /* Similarly for ar.pfs. */
2439 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2440 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2441 if (current_frame_info.r[reg_save_ar_pfs] == 0)
97e242b0
RH
2442 {
2443 extra_spill_size += 8;
2444 n_spilled += 1;
2445 }
599aedd9
RH
2446
2447 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2448 registers are clobbered, so we fall back to the stack. */
6fb5fa3c
DB
2449 current_frame_info.r[reg_save_gp]
2450 = (current_function_calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2451 if (current_frame_info.r[reg_save_gp] == 0)
599aedd9
RH
2452 {
2453 SET_HARD_REG_BIT (mask, GR_REG (1));
2454 spill_size += 8;
2455 n_spilled += 1;
2456 }
c65ebc55
JW
2457 }
2458 else
97e242b0 2459 {
6fb5fa3c 2460 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
97e242b0
RH
2461 {
2462 SET_HARD_REG_BIT (mask, BR_REG (0));
ae1e2d4c 2463 extra_spill_size += 8;
97e242b0
RH
2464 n_spilled += 1;
2465 }
f5bdba44 2466
6fb5fa3c 2467 if (df_regs_ever_live_p (AR_PFS_REGNUM))
f5bdba44
RH
2468 {
2469 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2470 current_frame_info.r[reg_save_ar_pfs]
2471 = find_gr_spill (reg_save_ar_pfs, 1);
2472 if (current_frame_info.r[reg_save_ar_pfs] == 0)
f5bdba44
RH
2473 {
2474 extra_spill_size += 8;
2475 n_spilled += 1;
2476 }
2477 }
97e242b0 2478 }
c65ebc55 2479
97e242b0
RH
2480 /* Unwind descriptor hackery: things are most efficient if we allocate
2481 consecutive GR save registers for RP, PFS, FP in that order. However,
2482 it is absolutely critical that FP get the only hard register that's
2483 guaranteed to be free, so we allocated it first. If all three did
2484 happen to be allocated hard regs, and are consecutive, rearrange them
6fb5fa3c
DB
2485 into the preferred order now.
2486
2487 If we have already emitted code for any of those registers,
2488 then it's already too late to change. */
2489 if (current_frame_info.r[reg_fp] != 0
2490 && current_frame_info.r[reg_save_b0] == current_frame_info.r[reg_fp] + 1
2491 && current_frame_info.r[reg_save_ar_pfs] == current_frame_info.r[reg_fp] + 2
2492 && emitted_frame_related_regs[reg_save_b0] == 0
2493 && emitted_frame_related_regs[reg_save_ar_pfs] == 0
2494 && emitted_frame_related_regs[reg_fp] == 0)
5527bf14 2495 {
6fb5fa3c
DB
2496 current_frame_info.r[reg_save_b0] = current_frame_info.r[reg_fp];
2497 current_frame_info.r[reg_save_ar_pfs] = current_frame_info.r[reg_fp] + 1;
2498 current_frame_info.r[reg_fp] = current_frame_info.r[reg_fp] + 2;
5527bf14
RH
2499 }
2500
97e242b0
RH
2501 /* See if we need to store the predicate register block. */
2502 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2503 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
97e242b0
RH
2504 break;
2505 if (regno <= PR_REG (63))
c65ebc55 2506 {
97e242b0 2507 SET_HARD_REG_BIT (mask, PR_REG (0));
6fb5fa3c
DB
2508 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2509 if (current_frame_info.r[reg_save_pr] == 0)
97e242b0
RH
2510 {
2511 extra_spill_size += 8;
2512 n_spilled += 1;
2513 }
2514
2515 /* ??? Mark them all as used so that register renaming and such
2516 are free to use them. */
2517 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2518 df_set_regs_ever_live (regno, true);
c65ebc55
JW
2519 }
2520
97e242b0 2521 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
2522 ar.unat as well. The check for existing liveness allows inline asm
2523 to touch ar.unat. */
2524 if (spilled_gr_p || cfun->machine->n_varargs
6fb5fa3c 2525 || df_regs_ever_live_p (AR_UNAT_REGNUM))
97e242b0 2526 {
6fb5fa3c 2527 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
97e242b0 2528 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
6fb5fa3c
DB
2529 current_frame_info.r[reg_save_ar_unat]
2530 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2531 if (current_frame_info.r[reg_save_ar_unat] == 0)
97e242b0
RH
2532 {
2533 extra_spill_size += 8;
2534 n_spilled += 1;
2535 }
2536 }
2537
6fb5fa3c 2538 if (df_regs_ever_live_p (AR_LC_REGNUM))
97e242b0
RH
2539 {
2540 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
6fb5fa3c
DB
2541 current_frame_info.r[reg_save_ar_lc]
2542 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2543 if (current_frame_info.r[reg_save_ar_lc] == 0)
97e242b0
RH
2544 {
2545 extra_spill_size += 8;
2546 n_spilled += 1;
2547 }
2548 }
2549
2550 /* If we have an odd number of words of pretend arguments written to
2551 the stack, then the FR save area will be unaligned. We round the
2552 size of this area up to keep things 16 byte aligned. */
2553 if (spilled_fr_p)
2554 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2555 else
2556 pretend_args_size = current_function_pretend_args_size;
2557
2558 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2559 + current_function_outgoing_args_size);
2560 total_size = IA64_STACK_ALIGN (total_size);
2561
2562 /* We always use the 16-byte scratch area provided by the caller, but
2563 if we are a leaf function, there's no one to which we need to provide
2564 a scratch area. */
2565 if (current_function_is_leaf)
2566 total_size = MAX (0, total_size - 16);
2567
c65ebc55 2568 current_frame_info.total_size = total_size;
97e242b0
RH
2569 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2570 current_frame_info.spill_size = spill_size;
2571 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2572 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2573 current_frame_info.n_spilled = n_spilled;
c65ebc55 2574 current_frame_info.initialized = reload_completed;
97e242b0
RH
2575}
2576
2577/* Compute the initial difference between the specified pair of registers. */
2578
2579HOST_WIDE_INT
9c808aad 2580ia64_initial_elimination_offset (int from, int to)
97e242b0
RH
2581{
2582 HOST_WIDE_INT offset;
2583
2584 ia64_compute_frame_size (get_frame_size ());
2585 switch (from)
2586 {
2587 case FRAME_POINTER_REGNUM:
e820471b 2588 switch (to)
97e242b0 2589 {
e820471b 2590 case HARD_FRAME_POINTER_REGNUM:
97e242b0
RH
2591 if (current_function_is_leaf)
2592 offset = -current_frame_info.total_size;
2593 else
2594 offset = -(current_frame_info.total_size
2595 - current_function_outgoing_args_size - 16);
e820471b
NS
2596 break;
2597
2598 case STACK_POINTER_REGNUM:
97e242b0
RH
2599 if (current_function_is_leaf)
2600 offset = 0;
2601 else
2602 offset = 16 + current_function_outgoing_args_size;
e820471b
NS
2603 break;
2604
2605 default:
2606 gcc_unreachable ();
97e242b0 2607 }
97e242b0 2608 break;
c65ebc55 2609
97e242b0
RH
2610 case ARG_POINTER_REGNUM:
2611 /* Arguments start above the 16 byte save area, unless stdarg
2612 in which case we store through the 16 byte save area. */
e820471b
NS
2613 switch (to)
2614 {
2615 case HARD_FRAME_POINTER_REGNUM:
2616 offset = 16 - current_function_pretend_args_size;
2617 break;
2618
2619 case STACK_POINTER_REGNUM:
2620 offset = (current_frame_info.total_size
2621 + 16 - current_function_pretend_args_size);
2622 break;
2623
2624 default:
2625 gcc_unreachable ();
2626 }
97e242b0
RH
2627 break;
2628
97e242b0 2629 default:
e820471b 2630 gcc_unreachable ();
97e242b0
RH
2631 }
2632
2633 return offset;
c65ebc55
JW
2634}
2635
97e242b0
RH
2636/* If there are more than a trivial number of register spills, we use
2637 two interleaved iterators so that we can get two memory references
2638 per insn group.
2639
2640 In order to simplify things in the prologue and epilogue expanders,
2641 we use helper functions to fix up the memory references after the
2642 fact with the appropriate offsets to a POST_MODIFY memory mode.
2643 The following data structure tracks the state of the two iterators
2644 while insns are being emitted. */
2645
2646struct spill_fill_data
c65ebc55 2647{
d6a7951f 2648 rtx init_after; /* point at which to emit initializations */
97e242b0
RH
2649 rtx init_reg[2]; /* initial base register */
2650 rtx iter_reg[2]; /* the iterator registers */
2651 rtx *prev_addr[2]; /* address of last memory use */
703cf211 2652 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
2653 HOST_WIDE_INT prev_off[2]; /* last offset */
2654 int n_iter; /* number of iterators in use */
2655 int next_iter; /* next iterator to use */
2656 unsigned int save_gr_used_mask;
2657};
2658
2659static struct spill_fill_data spill_fill_data;
c65ebc55 2660
97e242b0 2661static void
9c808aad 2662setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2663{
2664 int i;
2665
2666 spill_fill_data.init_after = get_last_insn ();
2667 spill_fill_data.init_reg[0] = init_reg;
2668 spill_fill_data.init_reg[1] = init_reg;
2669 spill_fill_data.prev_addr[0] = NULL;
2670 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
2671 spill_fill_data.prev_insn[0] = NULL;
2672 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
2673 spill_fill_data.prev_off[0] = cfa_off;
2674 spill_fill_data.prev_off[1] = cfa_off;
2675 spill_fill_data.next_iter = 0;
2676 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2677
2678 spill_fill_data.n_iter = 1 + (n_spills > 2);
2679 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 2680 {
97e242b0
RH
2681 int regno = next_scratch_gr_reg ();
2682 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2683 current_frame_info.gr_used_mask |= 1 << regno;
2684 }
2685}
2686
2687static void
9c808aad 2688finish_spill_pointers (void)
97e242b0
RH
2689{
2690 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2691}
c65ebc55 2692
97e242b0 2693static rtx
9c808aad 2694spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2695{
2696 int iter = spill_fill_data.next_iter;
2697 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2698 rtx disp_rtx = GEN_INT (disp);
2699 rtx mem;
2700
2701 if (spill_fill_data.prev_addr[iter])
2702 {
13f70342 2703 if (satisfies_constraint_N (disp_rtx))
703cf211
BS
2704 {
2705 *spill_fill_data.prev_addr[iter]
2706 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2707 gen_rtx_PLUS (DImode,
2708 spill_fill_data.iter_reg[iter],
2709 disp_rtx));
2710 REG_NOTES (spill_fill_data.prev_insn[iter])
2711 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2712 REG_NOTES (spill_fill_data.prev_insn[iter]));
2713 }
c65ebc55
JW
2714 else
2715 {
97e242b0 2716 /* ??? Could use register post_modify for loads. */
13f70342 2717 if (!satisfies_constraint_I (disp_rtx))
97e242b0
RH
2718 {
2719 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2720 emit_move_insn (tmp, disp_rtx);
2721 disp_rtx = tmp;
2722 }
2723 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2724 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 2725 }
97e242b0
RH
2726 }
2727 /* Micro-optimization: if we've created a frame pointer, it's at
2728 CFA 0, which may allow the real iterator to be initialized lower,
2729 slightly increasing parallelism. Also, if there are few saves
2730 it may eliminate the iterator entirely. */
2731 else if (disp == 0
2732 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2733 && frame_pointer_needed)
2734 {
2735 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 2736 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
2737 return mem;
2738 }
2739 else
2740 {
892a4e60 2741 rtx seq, insn;
809d4ef1 2742
97e242b0
RH
2743 if (disp == 0)
2744 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2745 spill_fill_data.init_reg[iter]);
2746 else
c65ebc55 2747 {
97e242b0
RH
2748 start_sequence ();
2749
13f70342 2750 if (!satisfies_constraint_I (disp_rtx))
c65ebc55 2751 {
97e242b0
RH
2752 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2753 emit_move_insn (tmp, disp_rtx);
2754 disp_rtx = tmp;
c65ebc55 2755 }
97e242b0
RH
2756
2757 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2758 spill_fill_data.init_reg[iter],
2759 disp_rtx));
2760
2f937369 2761 seq = get_insns ();
97e242b0 2762 end_sequence ();
c65ebc55 2763 }
809d4ef1 2764
97e242b0
RH
2765 /* Careful for being the first insn in a sequence. */
2766 if (spill_fill_data.init_after)
892a4e60 2767 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 2768 else
bc08aefe
RH
2769 {
2770 rtx first = get_insns ();
2771 if (first)
892a4e60 2772 insn = emit_insn_before (seq, first);
bc08aefe 2773 else
892a4e60 2774 insn = emit_insn (seq);
bc08aefe 2775 }
892a4e60 2776 spill_fill_data.init_after = insn;
97e242b0 2777 }
c65ebc55 2778
97e242b0 2779 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 2780
97e242b0
RH
2781 /* ??? Not all of the spills are for varargs, but some of them are.
2782 The rest of the spills belong in an alias set of their own. But
2783 it doesn't actually hurt to include them here. */
ba4828e0 2784 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 2785
97e242b0
RH
2786 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2787 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 2788
97e242b0
RH
2789 if (++iter >= spill_fill_data.n_iter)
2790 iter = 0;
2791 spill_fill_data.next_iter = iter;
c65ebc55 2792
97e242b0
RH
2793 return mem;
2794}
5527bf14 2795
97e242b0 2796static void
9c808aad
AJ
2797do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2798 rtx frame_reg)
97e242b0 2799{
703cf211 2800 int iter = spill_fill_data.next_iter;
97e242b0 2801 rtx mem, insn;
5527bf14 2802
97e242b0 2803 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 2804 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 2805 spill_fill_data.prev_insn[iter] = insn;
5527bf14 2806
97e242b0
RH
2807 if (frame_reg)
2808 {
2809 rtx base;
2810 HOST_WIDE_INT off;
2811
2812 RTX_FRAME_RELATED_P (insn) = 1;
2813
9c808aad 2814 /* Don't even pretend that the unwind code can intuit its way
97e242b0
RH
2815 through a pair of interleaved post_modify iterators. Just
2816 provide the correct answer. */
2817
2818 if (frame_pointer_needed)
2819 {
2820 base = hard_frame_pointer_rtx;
2821 off = - cfa_off;
5527bf14 2822 }
97e242b0
RH
2823 else
2824 {
2825 base = stack_pointer_rtx;
2826 off = current_frame_info.total_size - cfa_off;
2827 }
2828
2829 REG_NOTES (insn)
2830 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2831 gen_rtx_SET (VOIDmode,
2832 gen_rtx_MEM (GET_MODE (reg),
2833 plus_constant (base, off)),
2834 frame_reg),
2835 REG_NOTES (insn));
c65ebc55
JW
2836 }
2837}
2838
97e242b0 2839static void
9c808aad 2840do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
97e242b0 2841{
703cf211
BS
2842 int iter = spill_fill_data.next_iter;
2843 rtx insn;
2844
2845 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2846 GEN_INT (cfa_off)));
2847 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
2848}
2849
870f9ec0
RH
2850/* Wrapper functions that discards the CONST_INT spill offset. These
2851 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 2852 use a consistent function interface. */
870f9ec0
RH
2853
2854static rtx
9c808aad 2855gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
2856{
2857 return gen_movdi (dest, src);
2858}
2859
2860static rtx
9c808aad 2861gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
2862{
2863 return gen_fr_spill (dest, src);
2864}
2865
2866static rtx
9c808aad 2867gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
2868{
2869 return gen_fr_restore (dest, src);
2870}
c65ebc55
JW
2871
2872/* Called after register allocation to add any instructions needed for the
2873 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 2874 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
2875 to intermix instructions with the saves of the caller saved registers. In
2876 some cases, it might be necessary to emit a barrier instruction as the last
2877 insn to prevent such scheduling.
2878
2879 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
2880 so that the debug info generation code can handle them properly.
2881
2882 The register save area is layed out like so:
2883 cfa+16
2884 [ varargs spill area ]
2885 [ fr register spill area ]
2886 [ br register spill area ]
2887 [ ar register spill area ]
2888 [ pr register spill area ]
2889 [ gr register spill area ] */
c65ebc55
JW
2890
2891/* ??? Get inefficient code when the frame size is larger than can fit in an
2892 adds instruction. */
2893
c65ebc55 2894void
9c808aad 2895ia64_expand_prologue (void)
c65ebc55 2896{
97e242b0
RH
2897 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2898 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2899 rtx reg, alt_reg;
2900
2901 ia64_compute_frame_size (get_frame_size ());
2902 last_scratch_gr_reg = 15;
2903
6fb5fa3c
DB
2904 if (dump_file)
2905 {
2906 fprintf (dump_file, "ia64 frame related registers "
2907 "recorded in current_frame_info.r[]:\n");
2908#define PRINTREG(a) if (current_frame_info.r[a]) \
2909 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
2910 PRINTREG(reg_fp);
2911 PRINTREG(reg_save_b0);
2912 PRINTREG(reg_save_pr);
2913 PRINTREG(reg_save_ar_pfs);
2914 PRINTREG(reg_save_ar_unat);
2915 PRINTREG(reg_save_ar_lc);
2916 PRINTREG(reg_save_gp);
2917#undef PRINTREG
2918 }
2919
97e242b0
RH
2920 /* If there is no epilogue, then we don't need some prologue insns.
2921 We need to avoid emitting the dead prologue insns, because flow
2922 will complain about them. */
c65ebc55
JW
2923 if (optimize)
2924 {
97e242b0 2925 edge e;
9924d7d8 2926 edge_iterator ei;
97e242b0 2927
628f6a4e 2928 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
c65ebc55
JW
2929 if ((e->flags & EDGE_FAKE) == 0
2930 && (e->flags & EDGE_FALLTHRU) != 0)
2931 break;
2932 epilogue_p = (e != NULL);
2933 }
2934 else
2935 epilogue_p = 1;
2936
97e242b0
RH
2937 /* Set the local, input, and output register names. We need to do this
2938 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2939 half. If we use in/loc/out register names, then we get assembler errors
2940 in crtn.S because there is no alloc insn or regstk directive in there. */
2941 if (! TARGET_REG_NAMES)
2942 {
2943 int inputs = current_frame_info.n_input_regs;
2944 int locals = current_frame_info.n_local_regs;
2945 int outputs = current_frame_info.n_output_regs;
2946
2947 for (i = 0; i < inputs; i++)
2948 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2949 for (i = 0; i < locals; i++)
2950 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2951 for (i = 0; i < outputs; i++)
2952 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2953 }
c65ebc55 2954
97e242b0
RH
2955 /* Set the frame pointer register name. The regnum is logically loc79,
2956 but of course we'll not have allocated that many locals. Rather than
2957 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
2958 /* ??? This code means that we can never use one local register when
2959 there is a frame pointer. loc79 gets wasted in this case, as it is
2960 renamed to a register that will never be used. See also the try_locals
2961 code in find_gr_spill. */
6fb5fa3c 2962 if (current_frame_info.r[reg_fp])
97e242b0
RH
2963 {
2964 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2965 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
2966 = reg_names[current_frame_info.r[reg_fp]];
2967 reg_names[current_frame_info.r[reg_fp]] = tmp;
97e242b0 2968 }
c65ebc55 2969
97e242b0
RH
2970 /* We don't need an alloc instruction if we've used no outputs or locals. */
2971 if (current_frame_info.n_local_regs == 0
2ed4af6f 2972 && current_frame_info.n_output_regs == 0
f5bdba44
RH
2973 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2974 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
2975 {
2976 /* If there is no alloc, but there are input registers used, then we
2977 need a .regstk directive. */
2978 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2979 ar_pfs_save_reg = NULL_RTX;
2980 }
2981 else
2982 {
2983 current_frame_info.need_regstk = 0;
c65ebc55 2984
6fb5fa3c
DB
2985 if (current_frame_info.r[reg_save_ar_pfs])
2986 {
2987 regno = current_frame_info.r[reg_save_ar_pfs];
2988 reg_emitted (reg_save_ar_pfs);
2989 }
97e242b0
RH
2990 else
2991 regno = next_scratch_gr_reg ();
2992 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2993
9c808aad 2994 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
97e242b0
RH
2995 GEN_INT (current_frame_info.n_input_regs),
2996 GEN_INT (current_frame_info.n_local_regs),
2997 GEN_INT (current_frame_info.n_output_regs),
2998 GEN_INT (current_frame_info.n_rotate_regs)));
6fb5fa3c 2999 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
97e242b0 3000 }
c65ebc55 3001
97e242b0 3002 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 3003
26a110f5 3004 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
3005 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3006 stack_pointer_rtx, 0);
c65ebc55 3007
97e242b0
RH
3008 if (frame_pointer_needed)
3009 {
3010 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3011 RTX_FRAME_RELATED_P (insn) = 1;
3012 }
c65ebc55 3013
97e242b0
RH
3014 if (current_frame_info.total_size != 0)
3015 {
3016 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3017 rtx offset;
c65ebc55 3018
13f70342 3019 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3020 offset = frame_size_rtx;
3021 else
3022 {
3023 regno = next_scratch_gr_reg ();
9c808aad 3024 offset = gen_rtx_REG (DImode, regno);
97e242b0
RH
3025 emit_move_insn (offset, frame_size_rtx);
3026 }
c65ebc55 3027
97e242b0
RH
3028 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3029 stack_pointer_rtx, offset));
c65ebc55 3030
97e242b0
RH
3031 if (! frame_pointer_needed)
3032 {
3033 RTX_FRAME_RELATED_P (insn) = 1;
3034 if (GET_CODE (offset) != CONST_INT)
3035 {
3036 REG_NOTES (insn)
3037 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3038 gen_rtx_SET (VOIDmode,
3039 stack_pointer_rtx,
3040 gen_rtx_PLUS (DImode,
3041 stack_pointer_rtx,
3042 frame_size_rtx)),
3043 REG_NOTES (insn));
3044 }
3045 }
c65ebc55 3046
97e242b0
RH
3047 /* ??? At this point we must generate a magic insn that appears to
3048 modify the stack pointer, the frame pointer, and all spill
3049 iterators. This would allow the most scheduling freedom. For
3050 now, just hard stop. */
3051 emit_insn (gen_blockage ());
3052 }
c65ebc55 3053
97e242b0
RH
3054 /* Must copy out ar.unat before doing any integer spills. */
3055 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 3056 {
6fb5fa3c
DB
3057 if (current_frame_info.r[reg_save_ar_unat])
3058 {
3059 ar_unat_save_reg
3060 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3061 reg_emitted (reg_save_ar_unat);
3062 }
97e242b0 3063 else
c65ebc55 3064 {
97e242b0
RH
3065 alt_regno = next_scratch_gr_reg ();
3066 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3067 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 3068 }
c65ebc55 3069
97e242b0
RH
3070 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3071 insn = emit_move_insn (ar_unat_save_reg, reg);
6fb5fa3c 3072 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
97e242b0
RH
3073
3074 /* Even if we're not going to generate an epilogue, we still
3075 need to save the register so that EH works. */
6fb5fa3c 3076 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
d0e82870 3077 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
3078 }
3079 else
97e242b0
RH
3080 ar_unat_save_reg = NULL_RTX;
3081
3082 /* Spill all varargs registers. Do this before spilling any GR registers,
3083 since we want the UNAT bits for the GR registers to override the UNAT
3084 bits from varargs, which we don't care about. */
c65ebc55 3085
97e242b0
RH
3086 cfa_off = -16;
3087 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 3088 {
97e242b0 3089 reg = gen_rtx_REG (DImode, regno);
870f9ec0 3090 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 3091 }
c65ebc55 3092
97e242b0
RH
3093 /* Locate the bottom of the register save area. */
3094 cfa_off = (current_frame_info.spill_cfa_off
3095 + current_frame_info.spill_size
3096 + current_frame_info.extra_spill_size);
c65ebc55 3097
97e242b0
RH
3098 /* Save the predicate register block either in a register or in memory. */
3099 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3100 {
3101 reg = gen_rtx_REG (DImode, PR_REG (0));
6fb5fa3c 3102 if (current_frame_info.r[reg_save_pr] != 0)
1ff5b671 3103 {
6fb5fa3c
DB
3104 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3105 reg_emitted (reg_save_pr);
97e242b0 3106 insn = emit_move_insn (alt_reg, reg);
1ff5b671 3107
97e242b0
RH
3108 /* ??? Denote pr spill/fill by a DImode move that modifies all
3109 64 hard registers. */
1ff5b671 3110 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
3111 REG_NOTES (insn)
3112 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3113 gen_rtx_SET (VOIDmode, alt_reg, reg),
3114 REG_NOTES (insn));
46327bc5 3115
97e242b0
RH
3116 /* Even if we're not going to generate an epilogue, we still
3117 need to save the register so that EH works. */
3118 if (! epilogue_p)
d0e82870 3119 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
3120 }
3121 else
97e242b0
RH
3122 {
3123 alt_regno = next_scratch_gr_reg ();
3124 alt_reg = gen_rtx_REG (DImode, alt_regno);
3125 insn = emit_move_insn (alt_reg, reg);
870f9ec0 3126 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3127 cfa_off -= 8;
3128 }
c65ebc55
JW
3129 }
3130
97e242b0
RH
3131 /* Handle AR regs in numerical order. All of them get special handling. */
3132 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6fb5fa3c 3133 && current_frame_info.r[reg_save_ar_unat] == 0)
c65ebc55 3134 {
97e242b0 3135 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 3136 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 3137 cfa_off -= 8;
c65ebc55 3138 }
97e242b0
RH
3139
3140 /* The alloc insn already copied ar.pfs into a general register. The
3141 only thing we have to do now is copy that register to a stack slot
3142 if we'd not allocated a local register for the job. */
f5bdba44 3143 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
6fb5fa3c 3144 && current_frame_info.r[reg_save_ar_pfs] == 0)
c65ebc55 3145 {
97e242b0 3146 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 3147 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
3148 cfa_off -= 8;
3149 }
3150
3151 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3152 {
3153 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
6fb5fa3c 3154 if (current_frame_info.r[reg_save_ar_lc] != 0)
97e242b0 3155 {
6fb5fa3c
DB
3156 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3157 reg_emitted (reg_save_ar_lc);
97e242b0
RH
3158 insn = emit_move_insn (alt_reg, reg);
3159 RTX_FRAME_RELATED_P (insn) = 1;
3160
3161 /* Even if we're not going to generate an epilogue, we still
3162 need to save the register so that EH works. */
3163 if (! epilogue_p)
d0e82870 3164 emit_insn (gen_prologue_use (alt_reg));
97e242b0 3165 }
c65ebc55
JW
3166 else
3167 {
97e242b0
RH
3168 alt_regno = next_scratch_gr_reg ();
3169 alt_reg = gen_rtx_REG (DImode, alt_regno);
3170 emit_move_insn (alt_reg, reg);
870f9ec0 3171 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3172 cfa_off -= 8;
3173 }
3174 }
3175
ae1e2d4c
AS
3176 /* Save the return pointer. */
3177 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3178 {
3179 reg = gen_rtx_REG (DImode, BR_REG (0));
6fb5fa3c 3180 if (current_frame_info.r[reg_save_b0] != 0)
ae1e2d4c 3181 {
6fb5fa3c
DB
3182 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3183 reg_emitted (reg_save_b0);
ae1e2d4c
AS
3184 insn = emit_move_insn (alt_reg, reg);
3185 RTX_FRAME_RELATED_P (insn) = 1;
3186
3187 /* Even if we're not going to generate an epilogue, we still
3188 need to save the register so that EH works. */
3189 if (! epilogue_p)
3190 emit_insn (gen_prologue_use (alt_reg));
3191 }
3192 else
3193 {
3194 alt_regno = next_scratch_gr_reg ();
3195 alt_reg = gen_rtx_REG (DImode, alt_regno);
3196 emit_move_insn (alt_reg, reg);
3197 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3198 cfa_off -= 8;
3199 }
3200 }
3201
6fb5fa3c 3202 if (current_frame_info.r[reg_save_gp])
599aedd9 3203 {
6fb5fa3c 3204 reg_emitted (reg_save_gp);
599aedd9 3205 insn = emit_move_insn (gen_rtx_REG (DImode,
6fb5fa3c 3206 current_frame_info.r[reg_save_gp]),
599aedd9 3207 pic_offset_table_rtx);
599aedd9
RH
3208 }
3209
97e242b0 3210 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3211 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3212 + current_frame_info.spill_size));
97e242b0
RH
3213
3214 /* Spill all general registers. */
3215 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3216 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3217 {
3218 reg = gen_rtx_REG (DImode, regno);
3219 do_spill (gen_gr_spill, reg, cfa_off, reg);
3220 cfa_off -= 8;
3221 }
3222
97e242b0
RH
3223 /* Spill the rest of the BR registers. */
3224 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3225 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3226 {
3227 alt_regno = next_scratch_gr_reg ();
3228 alt_reg = gen_rtx_REG (DImode, alt_regno);
3229 reg = gen_rtx_REG (DImode, regno);
3230 emit_move_insn (alt_reg, reg);
870f9ec0 3231 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3232 cfa_off -= 8;
3233 }
3234
3235 /* Align the frame and spill all FR registers. */
3236 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3237 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3238 {
e820471b 3239 gcc_assert (!(cfa_off & 15));
02befdf4 3240 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3241 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
3242 cfa_off -= 16;
3243 }
3244
e820471b 3245 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3246
3247 finish_spill_pointers ();
c65ebc55
JW
3248}
3249
3250/* Called after register allocation to add any instructions needed for the
5519a4f9 3251 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 3252 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3253 to intermix instructions with the saves of the caller saved registers. In
3254 some cases, it might be necessary to emit a barrier instruction as the last
3255 insn to prevent such scheduling. */
3256
3257void
9c808aad 3258ia64_expand_epilogue (int sibcall_p)
c65ebc55 3259{
97e242b0
RH
3260 rtx insn, reg, alt_reg, ar_unat_save_reg;
3261 int regno, alt_regno, cfa_off;
3262
3263 ia64_compute_frame_size (get_frame_size ());
3264
3265 /* If there is a frame pointer, then we use it instead of the stack
3266 pointer, so that the stack pointer does not need to be valid when
3267 the epilogue starts. See EXIT_IGNORE_STACK. */
3268 if (frame_pointer_needed)
3269 setup_spill_pointers (current_frame_info.n_spilled,
3270 hard_frame_pointer_rtx, 0);
3271 else
9c808aad 3272 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
97e242b0
RH
3273 current_frame_info.total_size);
3274
3275 if (current_frame_info.total_size != 0)
3276 {
3277 /* ??? At this point we must generate a magic insn that appears to
3278 modify the spill iterators and the frame pointer. This would
3279 allow the most scheduling freedom. For now, just hard stop. */
3280 emit_insn (gen_blockage ());
3281 }
3282
3283 /* Locate the bottom of the register save area. */
3284 cfa_off = (current_frame_info.spill_cfa_off
3285 + current_frame_info.spill_size
3286 + current_frame_info.extra_spill_size);
3287
3288 /* Restore the predicate registers. */
3289 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3290 {
6fb5fa3c
DB
3291 if (current_frame_info.r[reg_save_pr] != 0)
3292 {
3293 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3294 reg_emitted (reg_save_pr);
3295 }
97e242b0
RH
3296 else
3297 {
3298 alt_regno = next_scratch_gr_reg ();
3299 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3300 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3301 cfa_off -= 8;
3302 }
3303 reg = gen_rtx_REG (DImode, PR_REG (0));
3304 emit_move_insn (reg, alt_reg);
3305 }
3306
3307 /* Restore the application registers. */
3308
3309 /* Load the saved unat from the stack, but do not restore it until
3310 after the GRs have been restored. */
3311 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3312 {
6fb5fa3c
DB
3313 if (current_frame_info.r[reg_save_ar_unat] != 0)
3314 {
3315 ar_unat_save_reg
3316 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3317 reg_emitted (reg_save_ar_unat);
3318 }
97e242b0
RH
3319 else
3320 {
3321 alt_regno = next_scratch_gr_reg ();
3322 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3323 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 3324 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
3325 cfa_off -= 8;
3326 }
3327 }
3328 else
3329 ar_unat_save_reg = NULL_RTX;
9c808aad 3330
6fb5fa3c 3331 if (current_frame_info.r[reg_save_ar_pfs] != 0)
97e242b0 3332 {
6fb5fa3c
DB
3333 reg_emitted (reg_save_ar_pfs);
3334 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
97e242b0
RH
3335 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3336 emit_move_insn (reg, alt_reg);
3337 }
4e14f1f9 3338 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
c65ebc55 3339 {
97e242b0
RH
3340 alt_regno = next_scratch_gr_reg ();
3341 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3342 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3343 cfa_off -= 8;
3344 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3345 emit_move_insn (reg, alt_reg);
3346 }
3347
3348 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3349 {
6fb5fa3c
DB
3350 if (current_frame_info.r[reg_save_ar_lc] != 0)
3351 {
3352 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3353 reg_emitted (reg_save_ar_lc);
3354 }
97e242b0
RH
3355 else
3356 {
3357 alt_regno = next_scratch_gr_reg ();
3358 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3359 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3360 cfa_off -= 8;
3361 }
3362 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3363 emit_move_insn (reg, alt_reg);
3364 }
3365
ae1e2d4c
AS
3366 /* Restore the return pointer. */
3367 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3368 {
6fb5fa3c
DB
3369 if (current_frame_info.r[reg_save_b0] != 0)
3370 {
3371 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3372 reg_emitted (reg_save_b0);
3373 }
ae1e2d4c
AS
3374 else
3375 {
3376 alt_regno = next_scratch_gr_reg ();
3377 alt_reg = gen_rtx_REG (DImode, alt_regno);
3378 do_restore (gen_movdi_x, alt_reg, cfa_off);
3379 cfa_off -= 8;
3380 }
3381 reg = gen_rtx_REG (DImode, BR_REG (0));
3382 emit_move_insn (reg, alt_reg);
3383 }
3384
97e242b0 3385 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3386 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3387 + current_frame_info.spill_size));
97e242b0 3388
599aedd9
RH
3389 /* The GP may be stored on the stack in the prologue, but it's
3390 never restored in the epilogue. Skip the stack slot. */
3391 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3392 cfa_off -= 8;
3393
97e242b0 3394 /* Restore all general registers. */
599aedd9 3395 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 3396 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3397 {
97e242b0
RH
3398 reg = gen_rtx_REG (DImode, regno);
3399 do_restore (gen_gr_restore, reg, cfa_off);
3400 cfa_off -= 8;
0c96007e 3401 }
9c808aad 3402
ae1e2d4c 3403 /* Restore the branch registers. */
97e242b0
RH
3404 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3405 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3406 {
97e242b0
RH
3407 alt_regno = next_scratch_gr_reg ();
3408 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3409 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3410 cfa_off -= 8;
3411 reg = gen_rtx_REG (DImode, regno);
3412 emit_move_insn (reg, alt_reg);
3413 }
c65ebc55 3414
97e242b0
RH
3415 /* Restore floating point registers. */
3416 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3417 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3418 {
e820471b 3419 gcc_assert (!(cfa_off & 15));
02befdf4 3420 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3421 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 3422 cfa_off -= 16;
0c96007e 3423 }
97e242b0
RH
3424
3425 /* Restore ar.unat for real. */
3426 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3427 {
3428 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3429 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
3430 }
3431
e820471b 3432 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3433
3434 finish_spill_pointers ();
c65ebc55 3435
97e242b0
RH
3436 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3437 {
3438 /* ??? At this point we must generate a magic insn that appears to
3439 modify the spill iterators, the stack pointer, and the frame
3440 pointer. This would allow the most scheduling freedom. For now,
3441 just hard stop. */
3442 emit_insn (gen_blockage ());
3443 }
c65ebc55 3444
97e242b0
RH
3445 if (cfun->machine->ia64_eh_epilogue_sp)
3446 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3447 else if (frame_pointer_needed)
3448 {
3449 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3450 RTX_FRAME_RELATED_P (insn) = 1;
3451 }
3452 else if (current_frame_info.total_size)
0c96007e 3453 {
97e242b0
RH
3454 rtx offset, frame_size_rtx;
3455
3456 frame_size_rtx = GEN_INT (current_frame_info.total_size);
13f70342 3457 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3458 offset = frame_size_rtx;
3459 else
3460 {
3461 regno = next_scratch_gr_reg ();
3462 offset = gen_rtx_REG (DImode, regno);
3463 emit_move_insn (offset, frame_size_rtx);
3464 }
3465
3466 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3467 offset));
3468
3469 RTX_FRAME_RELATED_P (insn) = 1;
3470 if (GET_CODE (offset) != CONST_INT)
3471 {
3472 REG_NOTES (insn)
3473 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3474 gen_rtx_SET (VOIDmode,
3475 stack_pointer_rtx,
3476 gen_rtx_PLUS (DImode,
3477 stack_pointer_rtx,
3478 frame_size_rtx)),
3479 REG_NOTES (insn));
3480 }
0c96007e 3481 }
97e242b0
RH
3482
3483 if (cfun->machine->ia64_eh_epilogue_bsp)
3484 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
9c808aad 3485
2ed4af6f
RH
3486 if (! sibcall_p)
3487 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 3488 else
8206fc89
AM
3489 {
3490 int fp = GR_REG (2);
3491 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
9c808aad
AJ
3492 first available call clobbered register. If there was a frame_pointer
3493 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
8206fc89 3494 so we have to make sure we're using the string "r2" when emitting
9e4f94de 3495 the register name for the assembler. */
6fb5fa3c
DB
3496 if (current_frame_info.r[reg_fp]
3497 && current_frame_info.r[reg_fp] == GR_REG (2))
8206fc89
AM
3498 fp = HARD_FRAME_POINTER_REGNUM;
3499
3500 /* We must emit an alloc to force the input registers to become output
3501 registers. Otherwise, if the callee tries to pass its parameters
3502 through to another call without an intervening alloc, then these
3503 values get lost. */
3504 /* ??? We don't need to preserve all input registers. We only need to
3505 preserve those input registers used as arguments to the sibling call.
3506 It is unclear how to compute that number here. */
3507 if (current_frame_info.n_input_regs != 0)
a8f5224e
DM
3508 {
3509 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3510 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3511 const0_rtx, const0_rtx,
3512 n_inputs, const0_rtx));
3513 RTX_FRAME_RELATED_P (insn) = 1;
3514 }
8206fc89 3515 }
c65ebc55
JW
3516}
3517
97e242b0
RH
3518/* Return 1 if br.ret can do all the work required to return from a
3519 function. */
3520
3521int
9c808aad 3522ia64_direct_return (void)
97e242b0
RH
3523{
3524 if (reload_completed && ! frame_pointer_needed)
3525 {
3526 ia64_compute_frame_size (get_frame_size ());
3527
3528 return (current_frame_info.total_size == 0
3529 && current_frame_info.n_spilled == 0
6fb5fa3c
DB
3530 && current_frame_info.r[reg_save_b0] == 0
3531 && current_frame_info.r[reg_save_pr] == 0
3532 && current_frame_info.r[reg_save_ar_pfs] == 0
3533 && current_frame_info.r[reg_save_ar_unat] == 0
3534 && current_frame_info.r[reg_save_ar_lc] == 0);
97e242b0
RH
3535 }
3536 return 0;
3537}
3538
af1e5518
RH
3539/* Return the magic cookie that we use to hold the return address
3540 during early compilation. */
3541
3542rtx
9c808aad 3543ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
af1e5518
RH
3544{
3545 if (count != 0)
3546 return NULL;
3547 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3548}
3549
3550/* Split this value after reload, now that we know where the return
3551 address is saved. */
3552
3553void
9c808aad 3554ia64_split_return_addr_rtx (rtx dest)
af1e5518
RH
3555{
3556 rtx src;
3557
3558 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3559 {
6fb5fa3c
DB
3560 if (current_frame_info.r[reg_save_b0] != 0)
3561 {
3562 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3563 reg_emitted (reg_save_b0);
3564 }
af1e5518
RH
3565 else
3566 {
3567 HOST_WIDE_INT off;
3568 unsigned int regno;
13f70342 3569 rtx off_r;
af1e5518
RH
3570
3571 /* Compute offset from CFA for BR0. */
3572 /* ??? Must be kept in sync with ia64_expand_prologue. */
3573 off = (current_frame_info.spill_cfa_off
3574 + current_frame_info.spill_size);
3575 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3576 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3577 off -= 8;
3578
3579 /* Convert CFA offset to a register based offset. */
3580 if (frame_pointer_needed)
3581 src = hard_frame_pointer_rtx;
3582 else
3583 {
3584 src = stack_pointer_rtx;
3585 off += current_frame_info.total_size;
3586 }
3587
3588 /* Load address into scratch register. */
13f70342
RH
3589 off_r = GEN_INT (off);
3590 if (satisfies_constraint_I (off_r))
3591 emit_insn (gen_adddi3 (dest, src, off_r));
af1e5518
RH
3592 else
3593 {
13f70342 3594 emit_move_insn (dest, off_r);
af1e5518
RH
3595 emit_insn (gen_adddi3 (dest, src, dest));
3596 }
3597
3598 src = gen_rtx_MEM (Pmode, dest);
3599 }
3600 }
3601 else
3602 src = gen_rtx_REG (DImode, BR_REG (0));
3603
3604 emit_move_insn (dest, src);
3605}
3606
10c9f189 3607int
9c808aad 3608ia64_hard_regno_rename_ok (int from, int to)
10c9f189
RH
3609{
3610 /* Don't clobber any of the registers we reserved for the prologue. */
6fb5fa3c 3611 enum ia64_frame_regs r;
10c9f189 3612
6fb5fa3c
DB
3613 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3614 if (to == current_frame_info.r[r]
3615 || from == current_frame_info.r[r]
3616 || to == emitted_frame_related_regs[r]
3617 || from == emitted_frame_related_regs[r])
3618 return 0;
2130b7fb 3619
10c9f189
RH
3620 /* Don't use output registers outside the register frame. */
3621 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3622 return 0;
3623
3624 /* Retain even/oddness on predicate register pairs. */
3625 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3626 return (from & 1) == (to & 1);
3627
3628 return 1;
3629}
3630
301d03af
RS
3631/* Target hook for assembling integer objects. Handle word-sized
3632 aligned objects and detect the cases when @fptr is needed. */
3633
3634static bool
9c808aad 3635ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3636{
b6a41a62 3637 if (size == POINTER_SIZE / BITS_PER_UNIT
301d03af
RS
3638 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3639 && GET_CODE (x) == SYMBOL_REF
1cdbd630 3640 && SYMBOL_REF_FUNCTION_P (x))
301d03af 3641 {
1b79dc38
DM
3642 static const char * const directive[2][2] = {
3643 /* 64-bit pointer */ /* 32-bit pointer */
3644 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3645 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3646 };
3647 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
301d03af
RS
3648 output_addr_const (asm_out_file, x);
3649 fputs (")\n", asm_out_file);
3650 return true;
3651 }
3652 return default_assemble_integer (x, size, aligned_p);
3653}
3654
c65ebc55
JW
3655/* Emit the function prologue. */
3656
08c148a8 3657static void
9c808aad 3658ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3659{
97e242b0
RH
3660 int mask, grsave, grsave_prev;
3661
3662 if (current_frame_info.need_regstk)
3663 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3664 current_frame_info.n_input_regs,
3665 current_frame_info.n_local_regs,
3666 current_frame_info.n_output_regs,
3667 current_frame_info.n_rotate_regs);
c65ebc55 3668
531073e7 3669 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
3670 return;
3671
97e242b0 3672 /* Emit the .prologue directive. */
809d4ef1 3673
97e242b0
RH
3674 mask = 0;
3675 grsave = grsave_prev = 0;
6fb5fa3c 3676 if (current_frame_info.r[reg_save_b0] != 0)
0c96007e 3677 {
97e242b0 3678 mask |= 8;
6fb5fa3c 3679 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
97e242b0 3680 }
6fb5fa3c 3681 if (current_frame_info.r[reg_save_ar_pfs] != 0
97e242b0 3682 && (grsave_prev == 0
6fb5fa3c 3683 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
97e242b0
RH
3684 {
3685 mask |= 4;
3686 if (grsave_prev == 0)
6fb5fa3c
DB
3687 grsave = current_frame_info.r[reg_save_ar_pfs];
3688 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
0c96007e 3689 }
6fb5fa3c 3690 if (current_frame_info.r[reg_fp] != 0
97e242b0 3691 && (grsave_prev == 0
6fb5fa3c 3692 || current_frame_info.r[reg_fp] == grsave_prev + 1))
97e242b0
RH
3693 {
3694 mask |= 2;
3695 if (grsave_prev == 0)
3696 grsave = HARD_FRAME_POINTER_REGNUM;
6fb5fa3c 3697 grsave_prev = current_frame_info.r[reg_fp];
97e242b0 3698 }
6fb5fa3c 3699 if (current_frame_info.r[reg_save_pr] != 0
97e242b0 3700 && (grsave_prev == 0
6fb5fa3c 3701 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
97e242b0
RH
3702 {
3703 mask |= 1;
3704 if (grsave_prev == 0)
6fb5fa3c 3705 grsave = current_frame_info.r[reg_save_pr];
97e242b0
RH
3706 }
3707
738e7b39 3708 if (mask && TARGET_GNU_AS)
97e242b0
RH
3709 fprintf (file, "\t.prologue %d, %d\n", mask,
3710 ia64_dbx_register_number (grsave));
3711 else
3712 fputs ("\t.prologue\n", file);
3713
3714 /* Emit a .spill directive, if necessary, to relocate the base of
3715 the register spill area. */
3716 if (current_frame_info.spill_cfa_off != -16)
3717 fprintf (file, "\t.spill %ld\n",
3718 (long) (current_frame_info.spill_cfa_off
3719 + current_frame_info.spill_size));
c65ebc55
JW
3720}
3721
0186257f
JW
3722/* Emit the .body directive at the scheduled end of the prologue. */
3723
b4c25db2 3724static void
9c808aad 3725ia64_output_function_end_prologue (FILE *file)
0186257f 3726{
531073e7 3727 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
3728 return;
3729
3730 fputs ("\t.body\n", file);
3731}
3732
c65ebc55
JW
3733/* Emit the function epilogue. */
3734
08c148a8 3735static void
9c808aad
AJ
3736ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3737 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3738{
8a959ea5
RH
3739 int i;
3740
6fb5fa3c 3741 if (current_frame_info.r[reg_fp])
97e242b0
RH
3742 {
3743 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3744 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3745 = reg_names[current_frame_info.r[reg_fp]];
3746 reg_names[current_frame_info.r[reg_fp]] = tmp;
3747 reg_emitted (reg_fp);
97e242b0
RH
3748 }
3749 if (! TARGET_REG_NAMES)
3750 {
97e242b0
RH
3751 for (i = 0; i < current_frame_info.n_input_regs; i++)
3752 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3753 for (i = 0; i < current_frame_info.n_local_regs; i++)
3754 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3755 for (i = 0; i < current_frame_info.n_output_regs; i++)
3756 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3757 }
8a959ea5 3758
97e242b0
RH
3759 current_frame_info.initialized = 0;
3760}
c65ebc55
JW
3761
3762int
9c808aad 3763ia64_dbx_register_number (int regno)
c65ebc55 3764{
97e242b0
RH
3765 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3766 from its home at loc79 to something inside the register frame. We
3767 must perform the same renumbering here for the debug info. */
6fb5fa3c 3768 if (current_frame_info.r[reg_fp])
97e242b0
RH
3769 {
3770 if (regno == HARD_FRAME_POINTER_REGNUM)
6fb5fa3c
DB
3771 regno = current_frame_info.r[reg_fp];
3772 else if (regno == current_frame_info.r[reg_fp])
97e242b0
RH
3773 regno = HARD_FRAME_POINTER_REGNUM;
3774 }
3775
3776 if (IN_REGNO_P (regno))
3777 return 32 + regno - IN_REG (0);
3778 else if (LOC_REGNO_P (regno))
3779 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3780 else if (OUT_REGNO_P (regno))
3781 return (32 + current_frame_info.n_input_regs
3782 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3783 else
3784 return regno;
c65ebc55
JW
3785}
3786
97e242b0 3787void
9c808aad 3788ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
97e242b0
RH
3789{
3790 rtx addr_reg, eight = GEN_INT (8);
3791
738e7b39
RK
3792 /* The Intel assembler requires that the global __ia64_trampoline symbol
3793 be declared explicitly */
3794 if (!TARGET_GNU_AS)
3795 {
3796 static bool declared_ia64_trampoline = false;
3797
3798 if (!declared_ia64_trampoline)
3799 {
3800 declared_ia64_trampoline = true;
b6a41a62
RK
3801 (*targetm.asm_out.globalize_label) (asm_out_file,
3802 "__ia64_trampoline");
738e7b39
RK
3803 }
3804 }
3805
5e89a381
SE
3806 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3807 addr = convert_memory_address (Pmode, addr);
3808 fnaddr = convert_memory_address (Pmode, fnaddr);
3809 static_chain = convert_memory_address (Pmode, static_chain);
3810
97e242b0
RH
3811 /* Load up our iterator. */
3812 addr_reg = gen_reg_rtx (Pmode);
3813 emit_move_insn (addr_reg, addr);
3814
3815 /* The first two words are the fake descriptor:
3816 __ia64_trampoline, ADDR+16. */
3817 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3818 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3819 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3820
3821 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3822 copy_to_reg (plus_constant (addr, 16)));
3823 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3824
3825 /* The third word is the target descriptor. */
3826 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3827 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3828
3829 /* The fourth word is the static chain. */
3830 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3831}
c65ebc55
JW
3832\f
3833/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
3834 for the last named argument which has type TYPE and mode MODE.
3835
3836 We generate the actual spill instructions during prologue generation. */
3837
351a758b
KH
3838static void
3839ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3840 tree type, int * pretend_size,
9c808aad 3841 int second_time ATTRIBUTE_UNUSED)
c65ebc55 3842{
351a758b
KH
3843 CUMULATIVE_ARGS next_cum = *cum;
3844
6c535c69 3845 /* Skip the current argument. */
351a758b 3846 ia64_function_arg_advance (&next_cum, mode, type, 1);
c65ebc55 3847
351a758b 3848 if (next_cum.words < MAX_ARGUMENT_SLOTS)
26a110f5 3849 {
351a758b 3850 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
26a110f5
RH
3851 *pretend_size = n * UNITS_PER_WORD;
3852 cfun->machine->n_varargs = n;
3853 }
c65ebc55
JW
3854}
3855
3856/* Check whether TYPE is a homogeneous floating point aggregate. If
3857 it is, return the mode of the floating point type that appears
3858 in all leafs. If it is not, return VOIDmode.
3859
3860 An aggregate is a homogeneous floating point aggregate is if all
3861 fields/elements in it have the same floating point type (e.g,
3d6a9acd
RH
3862 SFmode). 128-bit quad-precision floats are excluded.
3863
3864 Variable sized aggregates should never arrive here, since we should
3865 have already decided to pass them by reference. Top-level zero-sized
3866 aggregates are excluded because our parallels crash the middle-end. */
c65ebc55
JW
3867
3868static enum machine_mode
3d6a9acd 3869hfa_element_mode (tree type, bool nested)
c65ebc55
JW
3870{
3871 enum machine_mode element_mode = VOIDmode;
3872 enum machine_mode mode;
3873 enum tree_code code = TREE_CODE (type);
3874 int know_element_mode = 0;
3875 tree t;
3876
3d6a9acd
RH
3877 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3878 return VOIDmode;
3879
c65ebc55
JW
3880 switch (code)
3881 {
3882 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
0cc8f5c5 3883 case BOOLEAN_TYPE: case POINTER_TYPE:
c65ebc55 3884 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
5662a50d 3885 case LANG_TYPE: case FUNCTION_TYPE:
c65ebc55
JW
3886 return VOIDmode;
3887
3888 /* Fortran complex types are supposed to be HFAs, so we need to handle
3889 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3890 types though. */
3891 case COMPLEX_TYPE:
16448fd4 3892 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
02befdf4
ZW
3893 && TYPE_MODE (type) != TCmode)
3894 return GET_MODE_INNER (TYPE_MODE (type));
c65ebc55
JW
3895 else
3896 return VOIDmode;
3897
3898 case REAL_TYPE:
3899 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3900 mode if this is contained within an aggregate. */
02befdf4 3901 if (nested && TYPE_MODE (type) != TFmode)
c65ebc55
JW
3902 return TYPE_MODE (type);
3903 else
3904 return VOIDmode;
3905
3906 case ARRAY_TYPE:
46399021 3907 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
3908
3909 case RECORD_TYPE:
3910 case UNION_TYPE:
3911 case QUAL_UNION_TYPE:
3912 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3913 {
3914 if (TREE_CODE (t) != FIELD_DECL)
3915 continue;
3916
3917 mode = hfa_element_mode (TREE_TYPE (t), 1);
3918 if (know_element_mode)
3919 {
3920 if (mode != element_mode)
3921 return VOIDmode;
3922 }
3923 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3924 return VOIDmode;
3925 else
3926 {
3927 know_element_mode = 1;
3928 element_mode = mode;
3929 }
3930 }
3931 return element_mode;
3932
3933 default:
3934 /* If we reach here, we probably have some front-end specific type
3935 that the backend doesn't know about. This can happen via the
3936 aggregate_value_p call in init_function_start. All we can do is
3937 ignore unknown tree types. */
3938 return VOIDmode;
3939 }
3940
3941 return VOIDmode;
3942}
3943
f57fc998
ZW
3944/* Return the number of words required to hold a quantity of TYPE and MODE
3945 when passed as an argument. */
3946static int
3947ia64_function_arg_words (tree type, enum machine_mode mode)
3948{
3949 int words;
3950
3951 if (mode == BLKmode)
3952 words = int_size_in_bytes (type);
3953 else
3954 words = GET_MODE_SIZE (mode);
3955
3956 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3957}
3958
3959/* Return the number of registers that should be skipped so the current
3960 argument (described by TYPE and WORDS) will be properly aligned.
3961
3962 Integer and float arguments larger than 8 bytes start at the next
3963 even boundary. Aggregates larger than 8 bytes start at the next
3964 even boundary if the aggregate has 16 byte alignment. Note that
3965 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3966 but are still to be aligned in registers.
3967
3968 ??? The ABI does not specify how to handle aggregates with
3969 alignment from 9 to 15 bytes, or greater than 16. We handle them
3970 all as if they had 16 byte alignment. Such aggregates can occur
3971 only if gcc extensions are used. */
3972static int
3973ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3974{
3975 if ((cum->words & 1) == 0)
3976 return 0;
3977
3978 if (type
3979 && TREE_CODE (type) != INTEGER_TYPE
3980 && TREE_CODE (type) != REAL_TYPE)
3981 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3982 else
3983 return words > 1;
3984}
3985
c65ebc55
JW
3986/* Return rtx for register where argument is passed, or zero if it is passed
3987 on the stack. */
c65ebc55
JW
3988/* ??? 128-bit quad-precision floats are always passed in general
3989 registers. */
3990
3991rtx
9c808aad
AJ
3992ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3993 int named, int incoming)
c65ebc55
JW
3994{
3995 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
f57fc998
ZW
3996 int words = ia64_function_arg_words (type, mode);
3997 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
3998 enum machine_mode hfa_mode = VOIDmode;
3999
c65ebc55
JW
4000 /* If all argument slots are used, then it must go on the stack. */
4001 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4002 return 0;
4003
4004 /* Check for and handle homogeneous FP aggregates. */
4005 if (type)
4006 hfa_mode = hfa_element_mode (type, 0);
4007
4008 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4009 and unprototyped hfas are passed specially. */
4010 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4011 {
4012 rtx loc[16];
4013 int i = 0;
4014 int fp_regs = cum->fp_regs;
4015 int int_regs = cum->words + offset;
4016 int hfa_size = GET_MODE_SIZE (hfa_mode);
4017 int byte_size;
4018 int args_byte_size;
4019
4020 /* If prototyped, pass it in FR regs then GR regs.
4021 If not prototyped, pass it in both FR and GR regs.
4022
4023 If this is an SFmode aggregate, then it is possible to run out of
4024 FR regs while GR regs are still left. In that case, we pass the
4025 remaining part in the GR regs. */
4026
4027 /* Fill the FP regs. We do this always. We stop if we reach the end
4028 of the argument, the last FP register, or the last argument slot. */
4029
4030 byte_size = ((mode == BLKmode)
4031 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4032 args_byte_size = int_regs * UNITS_PER_WORD;
4033 offset = 0;
4034 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4035 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4036 {
4037 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4038 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4039 + fp_regs)),
4040 GEN_INT (offset));
c65ebc55
JW
4041 offset += hfa_size;
4042 args_byte_size += hfa_size;
4043 fp_regs++;
4044 }
4045
4046 /* If no prototype, then the whole thing must go in GR regs. */
4047 if (! cum->prototype)
4048 offset = 0;
4049 /* If this is an SFmode aggregate, then we might have some left over
4050 that needs to go in GR regs. */
4051 else if (byte_size != offset)
4052 int_regs += offset / UNITS_PER_WORD;
4053
4054 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4055
4056 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4057 {
4058 enum machine_mode gr_mode = DImode;
826b47cc 4059 unsigned int gr_size;
c65ebc55
JW
4060
4061 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4062 then this goes in a GR reg left adjusted/little endian, right
4063 adjusted/big endian. */
4064 /* ??? Currently this is handled wrong, because 4-byte hunks are
4065 always right adjusted/little endian. */
4066 if (offset & 0x4)
4067 gr_mode = SImode;
4068 /* If we have an even 4 byte hunk because the aggregate is a
4069 multiple of 4 bytes in size, then this goes in a GR reg right
4070 adjusted/little endian. */
4071 else if (byte_size - offset == 4)
4072 gr_mode = SImode;
4073
4074 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4075 gen_rtx_REG (gr_mode, (basereg
4076 + int_regs)),
4077 GEN_INT (offset));
826b47cc
ZW
4078
4079 gr_size = GET_MODE_SIZE (gr_mode);
4080 offset += gr_size;
4081 if (gr_size == UNITS_PER_WORD
4082 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4083 int_regs++;
4084 else if (gr_size > UNITS_PER_WORD)
4085 int_regs += gr_size / UNITS_PER_WORD;
c65ebc55 4086 }
9dec91d4 4087 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55
JW
4088 }
4089
4090 /* Integral and aggregates go in general registers. If we have run out of
4091 FR registers, then FP values must also go in general registers. This can
4092 happen when we have a SFmode HFA. */
02befdf4
ZW
4093 else if (mode == TFmode || mode == TCmode
4094 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
4095 {
4096 int byte_size = ((mode == BLKmode)
4097 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4098 if (BYTES_BIG_ENDIAN
4099 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4100 && byte_size < UNITS_PER_WORD
4101 && byte_size > 0)
4102 {
4103 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4104 gen_rtx_REG (DImode,
4105 (basereg + cum->words
4106 + offset)),
4107 const0_rtx);
4108 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4109 }
4110 else
4111 return gen_rtx_REG (mode, basereg + cum->words + offset);
4112
4113 }
c65ebc55
JW
4114
4115 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4116 named, and in a GR register when unnamed. */
c65ebc55
JW
4117 else if (cum->prototype)
4118 {
f9c887ac 4119 if (named)
c65ebc55 4120 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
f9c887ac
ZW
4121 /* In big-endian mode, an anonymous SFmode value must be represented
4122 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4123 the value into the high half of the general register. */
4124 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4125 return gen_rtx_PARALLEL (mode,
4126 gen_rtvec (1,
4127 gen_rtx_EXPR_LIST (VOIDmode,
4128 gen_rtx_REG (DImode, basereg + cum->words + offset),
4129 const0_rtx)));
4130 else
4131 return gen_rtx_REG (mode, basereg + cum->words + offset);
c65ebc55
JW
4132 }
4133 /* If there is no prototype, then FP values go in both FR and GR
4134 registers. */
4135 else
4136 {
f9c887ac
ZW
4137 /* See comment above. */
4138 enum machine_mode inner_mode =
4139 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4140
c65ebc55
JW
4141 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4142 gen_rtx_REG (mode, (FR_ARG_FIRST
4143 + cum->fp_regs)),
4144 const0_rtx);
4145 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
f9c887ac 4146 gen_rtx_REG (inner_mode,
c65ebc55
JW
4147 (basereg + cum->words
4148 + offset)),
4149 const0_rtx);
809d4ef1 4150
c65ebc55
JW
4151 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4152 }
4153}
4154
78a52f11 4155/* Return number of bytes, at the beginning of the argument, that must be
c65ebc55
JW
4156 put in registers. 0 is the argument is entirely in registers or entirely
4157 in memory. */
4158
78a52f11
RH
4159static int
4160ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4161 tree type, bool named ATTRIBUTE_UNUSED)
c65ebc55 4162{
f57fc998
ZW
4163 int words = ia64_function_arg_words (type, mode);
4164 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4165
4166 /* If all argument slots are used, then it must go on the stack. */
4167 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4168 return 0;
4169
4170 /* It doesn't matter whether the argument goes in FR or GR regs. If
4171 it fits within the 8 argument slots, then it goes entirely in
4172 registers. If it extends past the last argument slot, then the rest
4173 goes on the stack. */
4174
4175 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4176 return 0;
4177
78a52f11 4178 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
c65ebc55
JW
4179}
4180
4181/* Update CUM to point after this argument. This is patterned after
4182 ia64_function_arg. */
4183
4184void
9c808aad
AJ
4185ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4186 tree type, int named)
c65ebc55 4187{
f57fc998
ZW
4188 int words = ia64_function_arg_words (type, mode);
4189 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4190 enum machine_mode hfa_mode = VOIDmode;
4191
4192 /* If all arg slots are already full, then there is nothing to do. */
4193 if (cum->words >= MAX_ARGUMENT_SLOTS)
4194 return;
4195
c65ebc55
JW
4196 cum->words += words + offset;
4197
4198 /* Check for and handle homogeneous FP aggregates. */
4199 if (type)
4200 hfa_mode = hfa_element_mode (type, 0);
4201
4202 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4203 and unprototyped hfas are passed specially. */
4204 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4205 {
4206 int fp_regs = cum->fp_regs;
4207 /* This is the original value of cum->words + offset. */
4208 int int_regs = cum->words - words;
4209 int hfa_size = GET_MODE_SIZE (hfa_mode);
4210 int byte_size;
4211 int args_byte_size;
4212
4213 /* If prototyped, pass it in FR regs then GR regs.
4214 If not prototyped, pass it in both FR and GR regs.
4215
4216 If this is an SFmode aggregate, then it is possible to run out of
4217 FR regs while GR regs are still left. In that case, we pass the
4218 remaining part in the GR regs. */
4219
4220 /* Fill the FP regs. We do this always. We stop if we reach the end
4221 of the argument, the last FP register, or the last argument slot. */
4222
4223 byte_size = ((mode == BLKmode)
4224 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4225 args_byte_size = int_regs * UNITS_PER_WORD;
4226 offset = 0;
4227 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4228 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4229 {
c65ebc55
JW
4230 offset += hfa_size;
4231 args_byte_size += hfa_size;
4232 fp_regs++;
4233 }
4234
4235 cum->fp_regs = fp_regs;
4236 }
4237
d13256a3
SE
4238 /* Integral and aggregates go in general registers. So do TFmode FP values.
4239 If we have run out of FR registers, then other FP values must also go in
4240 general registers. This can happen when we have a SFmode HFA. */
4241 else if (mode == TFmode || mode == TCmode
4242 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
648fe28b 4243 cum->int_regs = cum->words;
c65ebc55
JW
4244
4245 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4246 named, and in a GR register when unnamed. */
c65ebc55
JW
4247 else if (cum->prototype)
4248 {
4249 if (! named)
648fe28b 4250 cum->int_regs = cum->words;
c65ebc55
JW
4251 else
4252 /* ??? Complex types should not reach here. */
4253 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4254 }
4255 /* If there is no prototype, then FP values go in both FR and GR
4256 registers. */
4257 else
9c808aad 4258 {
648fe28b
RH
4259 /* ??? Complex types should not reach here. */
4260 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4261 cum->int_regs = cum->words;
4262 }
c65ebc55 4263}
51dcde6f 4264
d13256a3 4265/* Arguments with alignment larger than 8 bytes start at the next even
93348822 4266 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
d13256a3
SE
4267 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4268
4269int
4270ia64_function_arg_boundary (enum machine_mode mode, tree type)
4271{
4272
4273 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4274 return PARM_BOUNDARY * 2;
4275
4276 if (type)
4277 {
4278 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4279 return PARM_BOUNDARY * 2;
4280 else
4281 return PARM_BOUNDARY;
4282 }
4283
4284 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4285 return PARM_BOUNDARY * 2;
4286 else
4287 return PARM_BOUNDARY;
4288}
4289
599aedd9
RH
4290/* True if it is OK to do sibling call optimization for the specified
4291 call expression EXP. DECL will be the called function, or NULL if
4292 this is an indirect call. */
4293static bool
9c808aad 4294ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
599aedd9 4295{
097f3d48
JW
4296 /* We can't perform a sibcall if the current function has the syscall_linkage
4297 attribute. */
4298 if (lookup_attribute ("syscall_linkage",
4299 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4300 return false;
4301
b23ba0b8
RH
4302 /* We must always return with our current GP. This means we can
4303 only sibcall to functions defined in the current module. */
4304 return decl && (*targetm.binds_local_p) (decl);
599aedd9 4305}
c65ebc55 4306\f
c65ebc55
JW
4307
4308/* Implement va_arg. */
4309
23a60a04
JM
4310static tree
4311ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 4312{
cd3ce9b4 4313 /* Variable sized types are passed by reference. */
08b0dc1b 4314 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
cd3ce9b4 4315 {
23a60a04
JM
4316 tree ptrtype = build_pointer_type (type);
4317 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
c2433d7d 4318 return build_va_arg_indirect_ref (addr);
cd3ce9b4
JM
4319 }
4320
4321 /* Aggregate arguments with alignment larger than 8 bytes start at
4322 the next even boundary. Integer and floating point arguments
4323 do so if they are larger than 8 bytes, whether or not they are
4324 also aligned larger than 8 bytes. */
4325 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4326 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4327 {
5be014d5
AP
4328 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4329 size_int (2 * UNITS_PER_WORD - 1));
4330 t = fold_convert (sizetype, t);
47a25a46 4331 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5be014d5
AP
4332 size_int (-2 * UNITS_PER_WORD));
4333 t = fold_convert (TREE_TYPE (valist), t);
07beea0d 4334 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (valist), valist, t);
cd3ce9b4
JM
4335 gimplify_and_add (t, pre_p);
4336 }
4337
23a60a04 4338 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4 4339}
c65ebc55
JW
4340\f
4341/* Return 1 if function return value returned in memory. Return 0 if it is
4342 in a register. */
4343
351a758b
KH
4344static bool
4345ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
c65ebc55
JW
4346{
4347 enum machine_mode mode;
4348 enum machine_mode hfa_mode;
487b97e0 4349 HOST_WIDE_INT byte_size;
c65ebc55
JW
4350
4351 mode = TYPE_MODE (valtype);
487b97e0
RH
4352 byte_size = GET_MODE_SIZE (mode);
4353 if (mode == BLKmode)
4354 {
4355 byte_size = int_size_in_bytes (valtype);
4356 if (byte_size < 0)
351a758b 4357 return true;
487b97e0 4358 }
c65ebc55
JW
4359
4360 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4361
4362 hfa_mode = hfa_element_mode (valtype, 0);
4363 if (hfa_mode != VOIDmode)
4364 {
4365 int hfa_size = GET_MODE_SIZE (hfa_mode);
4366
c65ebc55 4367 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
351a758b 4368 return true;
c65ebc55 4369 else
351a758b 4370 return false;
c65ebc55 4371 }
c65ebc55 4372 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
351a758b 4373 return true;
c65ebc55 4374 else
351a758b 4375 return false;
c65ebc55
JW
4376}
4377
4378/* Return rtx for register that holds the function return value. */
4379
4380rtx
9c808aad 4381ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
c65ebc55
JW
4382{
4383 enum machine_mode mode;
4384 enum machine_mode hfa_mode;
4385
4386 mode = TYPE_MODE (valtype);
4387 hfa_mode = hfa_element_mode (valtype, 0);
4388
4389 if (hfa_mode != VOIDmode)
4390 {
4391 rtx loc[8];
4392 int i;
4393 int hfa_size;
4394 int byte_size;
4395 int offset;
4396
4397 hfa_size = GET_MODE_SIZE (hfa_mode);
4398 byte_size = ((mode == BLKmode)
4399 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4400 offset = 0;
4401 for (i = 0; offset < byte_size; i++)
4402 {
4403 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4404 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4405 GEN_INT (offset));
c65ebc55
JW
4406 offset += hfa_size;
4407 }
9dec91d4 4408 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4409 }
f57fc998 4410 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
c65ebc55
JW
4411 return gen_rtx_REG (mode, FR_ARG_FIRST);
4412 else
3870df96 4413 {
8c5cacfd
RH
4414 bool need_parallel = false;
4415
4416 /* In big-endian mode, we need to manage the layout of aggregates
4417 in the registers so that we get the bits properly aligned in
4418 the highpart of the registers. */
3870df96
SE
4419 if (BYTES_BIG_ENDIAN
4420 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
8c5cacfd
RH
4421 need_parallel = true;
4422
4423 /* Something like struct S { long double x; char a[0] } is not an
4424 HFA structure, and therefore doesn't go in fp registers. But
4425 the middle-end will give it XFmode anyway, and XFmode values
4426 don't normally fit in integer registers. So we need to smuggle
4427 the value inside a parallel. */
4de67c26 4428 else if (mode == XFmode || mode == XCmode || mode == RFmode)
8c5cacfd
RH
4429 need_parallel = true;
4430
4431 if (need_parallel)
3870df96
SE
4432 {
4433 rtx loc[8];
4434 int offset;
4435 int bytesize;
4436 int i;
4437
4438 offset = 0;
4439 bytesize = int_size_in_bytes (valtype);
543144ed
JM
4440 /* An empty PARALLEL is invalid here, but the return value
4441 doesn't matter for empty structs. */
4442 if (bytesize == 0)
4443 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96
SE
4444 for (i = 0; offset < bytesize; i++)
4445 {
4446 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4447 gen_rtx_REG (DImode,
4448 GR_RET_FIRST + i),
4449 GEN_INT (offset));
4450 offset += UNITS_PER_WORD;
4451 }
4452 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4453 }
8c5cacfd
RH
4454
4455 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96 4456 }
c65ebc55
JW
4457}
4458
fdbe66f2 4459/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6b2300b3
JJ
4460 We need to emit DTP-relative relocations. */
4461
fdbe66f2 4462static void
9c808aad 4463ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
6b2300b3 4464{
6f3113ed
SE
4465 gcc_assert (size == 4 || size == 8);
4466 if (size == 4)
4467 fputs ("\tdata4.ua\t@dtprel(", file);
4468 else
4469 fputs ("\tdata8.ua\t@dtprel(", file);
6b2300b3
JJ
4470 output_addr_const (file, x);
4471 fputs (")", file);
4472}
4473
c65ebc55
JW
4474/* Print a memory address as an operand to reference that memory location. */
4475
4476/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4477 also call this from ia64_print_operand for memory addresses. */
4478
4479void
9c808aad
AJ
4480ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4481 rtx address ATTRIBUTE_UNUSED)
c65ebc55
JW
4482{
4483}
4484
3569057d 4485/* Print an operand to an assembler instruction.
c65ebc55
JW
4486 C Swap and print a comparison operator.
4487 D Print an FP comparison operator.
4488 E Print 32 - constant, for SImode shifts as extract.
66db6b45 4489 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
4490 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4491 a floating point register emitted normally.
4492 I Invert a predicate register by adding 1.
e5bde68a 4493 J Select the proper predicate register for a condition.
6b6c1201 4494 j Select the inverse predicate register for a condition.
c65ebc55
JW
4495 O Append .acq for volatile load.
4496 P Postincrement of a MEM.
4497 Q Append .rel for volatile store.
4883241c 4498 R Print .s .d or nothing for a single, double or no truncation.
c65ebc55
JW
4499 S Shift amount for shladd instruction.
4500 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4501 for Intel assembler.
4502 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4503 for Intel assembler.
a71aef0b 4504 X A pair of floating point registers.
c65ebc55 4505 r Print register name, or constant 0 as r0. HP compatibility for
f61134e8
RH
4506 Linux kernel.
4507 v Print vector constant value as an 8-byte integer value. */
4508
c65ebc55 4509void
9c808aad 4510ia64_print_operand (FILE * file, rtx x, int code)
c65ebc55 4511{
e57b9d65
RH
4512 const char *str;
4513
c65ebc55
JW
4514 switch (code)
4515 {
c65ebc55
JW
4516 case 0:
4517 /* Handled below. */
4518 break;
809d4ef1 4519
c65ebc55
JW
4520 case 'C':
4521 {
4522 enum rtx_code c = swap_condition (GET_CODE (x));
4523 fputs (GET_RTX_NAME (c), file);
4524 return;
4525 }
4526
4527 case 'D':
e57b9d65
RH
4528 switch (GET_CODE (x))
4529 {
4530 case NE:
4531 str = "neq";
4532 break;
4533 case UNORDERED:
4534 str = "unord";
4535 break;
4536 case ORDERED:
4537 str = "ord";
4538 break;
86ad1da0
SE
4539 case UNLT:
4540 str = "nge";
4541 break;
4542 case UNLE:
4543 str = "ngt";
4544 break;
4545 case UNGT:
4546 str = "nle";
4547 break;
4548 case UNGE:
4549 str = "nlt";
4550 break;
e57b9d65
RH
4551 default:
4552 str = GET_RTX_NAME (GET_CODE (x));
4553 break;
4554 }
4555 fputs (str, file);
c65ebc55
JW
4556 return;
4557
4558 case 'E':
4559 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4560 return;
4561
66db6b45
RH
4562 case 'e':
4563 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4564 return;
4565
c65ebc55
JW
4566 case 'F':
4567 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 4568 str = reg_names [FR_REG (0)];
c65ebc55 4569 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 4570 str = reg_names [FR_REG (1)];
c65ebc55 4571 else
e820471b
NS
4572 {
4573 gcc_assert (GET_CODE (x) == REG);
4574 str = reg_names [REGNO (x)];
4575 }
e57b9d65 4576 fputs (str, file);
c65ebc55
JW
4577 return;
4578
4579 case 'I':
4580 fputs (reg_names [REGNO (x) + 1], file);
4581 return;
4582
e5bde68a 4583 case 'J':
6b6c1201
RH
4584 case 'j':
4585 {
4586 unsigned int regno = REGNO (XEXP (x, 0));
4587 if (GET_CODE (x) == EQ)
4588 regno += 1;
4589 if (code == 'j')
4590 regno ^= 1;
4591 fputs (reg_names [regno], file);
4592 }
e5bde68a
RH
4593 return;
4594
c65ebc55
JW
4595 case 'O':
4596 if (MEM_VOLATILE_P (x))
4597 fputs(".acq", file);
4598 return;
4599
4600 case 'P':
4601 {
4b983fdc 4602 HOST_WIDE_INT value;
c65ebc55 4603
4b983fdc
RH
4604 switch (GET_CODE (XEXP (x, 0)))
4605 {
4606 default:
4607 return;
4608
4609 case POST_MODIFY:
4610 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4611 if (GET_CODE (x) == CONST_INT)
08012cda 4612 value = INTVAL (x);
e820471b 4613 else
4b983fdc 4614 {
e820471b 4615 gcc_assert (GET_CODE (x) == REG);
08012cda 4616 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
4617 return;
4618 }
4b983fdc 4619 break;
c65ebc55 4620
4b983fdc
RH
4621 case POST_INC:
4622 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 4623 break;
c65ebc55 4624
4b983fdc 4625 case POST_DEC:
08012cda 4626 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
4627 break;
4628 }
809d4ef1 4629
4a0a75dd 4630 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
4631 return;
4632 }
4633
4634 case 'Q':
4635 if (MEM_VOLATILE_P (x))
4636 fputs(".rel", file);
4637 return;
4638
4883241c
SE
4639 case 'R':
4640 if (x == CONST0_RTX (GET_MODE (x)))
4641 fputs(".s", file);
4642 else if (x == CONST1_RTX (GET_MODE (x)))
4643 fputs(".d", file);
4644 else if (x == CONST2_RTX (GET_MODE (x)))
4645 ;
4646 else
4647 output_operand_lossage ("invalid %%R value");
4648 return;
4649
c65ebc55 4650 case 'S':
809d4ef1 4651 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
4652 return;
4653
4654 case 'T':
4655 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4656 {
809d4ef1 4657 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
4658 return;
4659 }
4660 break;
4661
4662 case 'U':
4663 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4664 {
3b572406 4665 const char *prefix = "0x";
c65ebc55
JW
4666 if (INTVAL (x) & 0x80000000)
4667 {
4668 fprintf (file, "0xffffffff");
4669 prefix = "";
4670 }
809d4ef1 4671 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
4672 return;
4673 }
4674 break;
809d4ef1 4675
a71aef0b
JB
4676 case 'X':
4677 {
4678 unsigned int regno = REGNO (x);
4679 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4680 }
4681 return;
4682
c65ebc55 4683 case 'r':
18a3c539
JW
4684 /* If this operand is the constant zero, write it as register zero.
4685 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
4686 if (GET_CODE (x) == REG)
4687 fputs (reg_names[REGNO (x)], file);
4688 else if (x == CONST0_RTX (GET_MODE (x)))
4689 fputs ("r0", file);
18a3c539
JW
4690 else if (GET_CODE (x) == CONST_INT)
4691 output_addr_const (file, x);
c65ebc55
JW
4692 else
4693 output_operand_lossage ("invalid %%r value");
4694 return;
4695
f61134e8
RH
4696 case 'v':
4697 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4698 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4699 break;
4700
85548039
RH
4701 case '+':
4702 {
4703 const char *which;
9c808aad 4704
85548039
RH
4705 /* For conditional branches, returns or calls, substitute
4706 sptk, dptk, dpnt, or spnt for %s. */
4707 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4708 if (x)
4709 {
4710 int pred_val = INTVAL (XEXP (x, 0));
4711
4712 /* Guess top and bottom 10% statically predicted. */
2c9e13f3
JH
4713 if (pred_val < REG_BR_PROB_BASE / 50
4714 && br_prob_note_reliable_p (x))
85548039
RH
4715 which = ".spnt";
4716 else if (pred_val < REG_BR_PROB_BASE / 2)
4717 which = ".dpnt";
2c9e13f3
JH
4718 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
4719 || !br_prob_note_reliable_p (x))
85548039
RH
4720 which = ".dptk";
4721 else
4722 which = ".sptk";
4723 }
4724 else if (GET_CODE (current_output_insn) == CALL_INSN)
4725 which = ".sptk";
4726 else
4727 which = ".dptk";
4728
4729 fputs (which, file);
4730 return;
4731 }
4732
6f8aa100
RH
4733 case ',':
4734 x = current_insn_predicate;
4735 if (x)
4736 {
4737 unsigned int regno = REGNO (XEXP (x, 0));
4738 if (GET_CODE (x) == EQ)
4739 regno += 1;
6f8aa100
RH
4740 fprintf (file, "(%s) ", reg_names [regno]);
4741 }
4742 return;
4743
c65ebc55
JW
4744 default:
4745 output_operand_lossage ("ia64_print_operand: unknown code");
4746 return;
4747 }
4748
4749 switch (GET_CODE (x))
4750 {
4751 /* This happens for the spill/restore instructions. */
4752 case POST_INC:
4b983fdc
RH
4753 case POST_DEC:
4754 case POST_MODIFY:
c65ebc55 4755 x = XEXP (x, 0);
ed168e45 4756 /* ... fall through ... */
c65ebc55
JW
4757
4758 case REG:
4759 fputs (reg_names [REGNO (x)], file);
4760 break;
4761
4762 case MEM:
4763 {
4764 rtx addr = XEXP (x, 0);
ec8e098d 4765 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
c65ebc55
JW
4766 addr = XEXP (addr, 0);
4767 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4768 break;
4769 }
809d4ef1 4770
c65ebc55
JW
4771 default:
4772 output_addr_const (file, x);
4773 break;
4774 }
4775
4776 return;
4777}
c65ebc55 4778\f
3c50106f
RH
4779/* Compute a (partial) cost for rtx X. Return true if the complete
4780 cost has been computed, and false if subexpressions should be
4781 scanned. In either case, *TOTAL contains the cost result. */
4782/* ??? This is incomplete. */
4783
4784static bool
9c808aad 4785ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
4786{
4787 switch (code)
4788 {
4789 case CONST_INT:
4790 switch (outer_code)
4791 {
4792 case SET:
13f70342 4793 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
3c50106f
RH
4794 return true;
4795 case PLUS:
13f70342 4796 if (satisfies_constraint_I (x))
3c50106f 4797 *total = 0;
13f70342 4798 else if (satisfies_constraint_J (x))
3c50106f
RH
4799 *total = 1;
4800 else
4801 *total = COSTS_N_INSNS (1);
4802 return true;
4803 default:
13f70342 4804 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
3c50106f
RH
4805 *total = 0;
4806 else
4807 *total = COSTS_N_INSNS (1);
4808 return true;
4809 }
4810
4811 case CONST_DOUBLE:
4812 *total = COSTS_N_INSNS (1);
4813 return true;
4814
4815 case CONST:
4816 case SYMBOL_REF:
4817 case LABEL_REF:
4818 *total = COSTS_N_INSNS (3);
4819 return true;
4820
4821 case MULT:
4822 /* For multiplies wider than HImode, we have to go to the FPU,
4823 which normally involves copies. Plus there's the latency
4824 of the multiply itself, and the latency of the instructions to
4825 transfer integer regs to FP regs. */
4826 /* ??? Check for FP mode. */
4827 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4828 *total = COSTS_N_INSNS (10);
4829 else
4830 *total = COSTS_N_INSNS (2);
4831 return true;
4832
4833 case PLUS:
4834 case MINUS:
4835 case ASHIFT:
4836 case ASHIFTRT:
4837 case LSHIFTRT:
4838 *total = COSTS_N_INSNS (1);
4839 return true;
4840
4841 case DIV:
4842 case UDIV:
4843 case MOD:
4844 case UMOD:
4845 /* We make divide expensive, so that divide-by-constant will be
4846 optimized to a multiply. */
4847 *total = COSTS_N_INSNS (60);
4848 return true;
4849
4850 default:
4851 return false;
4852 }
4853}
4854
9e4f94de 4855/* Calculate the cost of moving data from a register in class FROM to
7109d286 4856 one in class TO, using MODE. */
5527bf14
RH
4857
4858int
9c808aad
AJ
4859ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4860 enum reg_class to)
5527bf14 4861{
7109d286
RH
4862 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4863 if (to == ADDL_REGS)
4864 to = GR_REGS;
4865 if (from == ADDL_REGS)
4866 from = GR_REGS;
4867
4868 /* All costs are symmetric, so reduce cases by putting the
4869 lower number class as the destination. */
4870 if (from < to)
4871 {
4872 enum reg_class tmp = to;
4873 to = from, from = tmp;
4874 }
4875
02befdf4 4876 /* Moving from FR<->GR in XFmode must be more expensive than 2,
7109d286
RH
4877 so that we get secondary memory reloads. Between FR_REGS,
4878 we have to make this at least as expensive as MEMORY_MOVE_COST
4879 to avoid spectacularly poor register class preferencing. */
4de67c26 4880 if (mode == XFmode || mode == RFmode)
7109d286
RH
4881 {
4882 if (to != GR_REGS || from != GR_REGS)
4883 return MEMORY_MOVE_COST (mode, to, 0);
4884 else
4885 return 3;
4886 }
4887
4888 switch (to)
4889 {
4890 case PR_REGS:
4891 /* Moving between PR registers takes two insns. */
4892 if (from == PR_REGS)
4893 return 3;
4894 /* Moving between PR and anything but GR is impossible. */
4895 if (from != GR_REGS)
4896 return MEMORY_MOVE_COST (mode, to, 0);
4897 break;
4898
4899 case BR_REGS:
4900 /* Moving between BR and anything but GR is impossible. */
4901 if (from != GR_REGS && from != GR_AND_BR_REGS)
4902 return MEMORY_MOVE_COST (mode, to, 0);
4903 break;
4904
4905 case AR_I_REGS:
4906 case AR_M_REGS:
4907 /* Moving between AR and anything but GR is impossible. */
4908 if (from != GR_REGS)
4909 return MEMORY_MOVE_COST (mode, to, 0);
4910 break;
4911
4912 case GR_REGS:
4913 case FR_REGS:
a71aef0b 4914 case FP_REGS:
7109d286
RH
4915 case GR_AND_FR_REGS:
4916 case GR_AND_BR_REGS:
4917 case ALL_REGS:
4918 break;
4919
4920 default:
e820471b 4921 gcc_unreachable ();
7109d286 4922 }
3f622353 4923
5527bf14
RH
4924 return 2;
4925}
c65ebc55 4926
f61134e8
RH
4927/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
4928 to use when copying X into that class. */
4929
4930enum reg_class
4931ia64_preferred_reload_class (rtx x, enum reg_class class)
4932{
4933 switch (class)
4934 {
4935 case FR_REGS:
a71aef0b 4936 case FP_REGS:
f61134e8
RH
4937 /* Don't allow volatile mem reloads into floating point registers.
4938 This is defined to force reload to choose the r/m case instead
4939 of the f/f case when reloading (set (reg fX) (mem/v)). */
4940 if (MEM_P (x) && MEM_VOLATILE_P (x))
4941 return NO_REGS;
4942
4943 /* Force all unrecognized constants into the constant pool. */
4944 if (CONSTANT_P (x))
4945 return NO_REGS;
4946 break;
4947
4948 case AR_M_REGS:
4949 case AR_I_REGS:
4950 if (!OBJECT_P (x))
4951 return NO_REGS;
4952 break;
4953
4954 default:
4955 break;
4956 }
4957
4958 return class;
4959}
4960
c65ebc55
JW
4961/* This function returns the register class required for a secondary
4962 register when copying between one of the registers in CLASS, and X,
4963 using MODE. A return value of NO_REGS means that no secondary register
4964 is required. */
4965
4966enum reg_class
9c808aad
AJ
4967ia64_secondary_reload_class (enum reg_class class,
4968 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
c65ebc55
JW
4969{
4970 int regno = -1;
4971
4972 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4973 regno = true_regnum (x);
4974
97e242b0
RH
4975 switch (class)
4976 {
4977 case BR_REGS:
7109d286
RH
4978 case AR_M_REGS:
4979 case AR_I_REGS:
4980 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4981 interaction. We end up with two pseudos with overlapping lifetimes
4982 both of which are equiv to the same constant, and both which need
4983 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4984 changes depending on the path length, which means the qty_first_reg
4985 check in make_regs_eqv can give different answers at different times.
4986 At some point I'll probably need a reload_indi pattern to handle
4987 this.
4988
4989 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4990 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4991 non-general registers for good measure. */
4992 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
4993 return GR_REGS;
4994
4995 /* This is needed if a pseudo used as a call_operand gets spilled to a
4996 stack slot. */
4997 if (GET_CODE (x) == MEM)
4998 return GR_REGS;
4999 break;
5000
5001 case FR_REGS:
a71aef0b 5002 case FP_REGS:
c51e6d85 5003 /* Need to go through general registers to get to other class regs. */
7109d286
RH
5004 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5005 return GR_REGS;
9c808aad 5006
97e242b0
RH
5007 /* This can happen when a paradoxical subreg is an operand to the
5008 muldi3 pattern. */
5009 /* ??? This shouldn't be necessary after instruction scheduling is
5010 enabled, because paradoxical subregs are not accepted by
5011 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5012 stop the paradoxical subreg stupidity in the *_operand functions
5013 in recog.c. */
5014 if (GET_CODE (x) == MEM
5015 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5016 || GET_MODE (x) == QImode))
5017 return GR_REGS;
5018
5019 /* This can happen because of the ior/and/etc patterns that accept FP
5020 registers as operands. If the third operand is a constant, then it
5021 needs to be reloaded into a FP register. */
5022 if (GET_CODE (x) == CONST_INT)
5023 return GR_REGS;
5024
5025 /* This can happen because of register elimination in a muldi3 insn.
5026 E.g. `26107 * (unsigned long)&u'. */
5027 if (GET_CODE (x) == PLUS)
5028 return GR_REGS;
5029 break;
5030
5031 case PR_REGS:
f2f90c63 5032 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
5033 and the function has a nonlocal goto. This is because global
5034 does not allocate call crossing pseudos to hard registers when
5035 current_function_has_nonlocal_goto is true. This is relatively
5036 common for C++ programs that use exceptions. To reproduce,
5037 return NO_REGS and compile libstdc++. */
5038 if (GET_CODE (x) == MEM)
5039 return GR_REGS;
f2f90c63
RH
5040
5041 /* This can happen when we take a BImode subreg of a DImode value,
5042 and that DImode value winds up in some non-GR register. */
5043 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5044 return GR_REGS;
97e242b0
RH
5045 break;
5046
5047 default:
5048 break;
5049 }
c65ebc55
JW
5050
5051 return NO_REGS;
5052}
5053
c65ebc55
JW
5054\f
5055/* Parse the -mfixed-range= option string. */
5056
5057static void
9c808aad 5058fix_range (const char *const_str)
c65ebc55
JW
5059{
5060 int i, first, last;
3b572406 5061 char *str, *dash, *comma;
c65ebc55
JW
5062
5063 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5064 REG2 are either register names or register numbers. The effect
5065 of this option is to mark the registers in the range from REG1 to
5066 REG2 as ``fixed'' so they won't be used by the compiler. This is
5067 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5068
3b572406
RH
5069 i = strlen (const_str);
5070 str = (char *) alloca (i + 1);
5071 memcpy (str, const_str, i + 1);
5072
c65ebc55
JW
5073 while (1)
5074 {
5075 dash = strchr (str, '-');
5076 if (!dash)
5077 {
d4ee4d25 5078 warning (0, "value of -mfixed-range must have form REG1-REG2");
c65ebc55
JW
5079 return;
5080 }
5081 *dash = '\0';
5082
5083 comma = strchr (dash + 1, ',');
5084 if (comma)
5085 *comma = '\0';
5086
5087 first = decode_reg_name (str);
5088 if (first < 0)
5089 {
d4ee4d25 5090 warning (0, "unknown register name: %s", str);
c65ebc55
JW
5091 return;
5092 }
5093
5094 last = decode_reg_name (dash + 1);
5095 if (last < 0)
5096 {
d4ee4d25 5097 warning (0, "unknown register name: %s", dash + 1);
c65ebc55
JW
5098 return;
5099 }
5100
5101 *dash = '-';
5102
5103 if (first > last)
5104 {
d4ee4d25 5105 warning (0, "%s-%s is an empty range", str, dash + 1);
c65ebc55
JW
5106 return;
5107 }
5108
5109 for (i = first; i <= last; ++i)
5110 fixed_regs[i] = call_used_regs[i] = 1;
5111
5112 if (!comma)
5113 break;
5114
5115 *comma = ',';
5116 str = comma + 1;
5117 }
5118}
5119
dbdd120f
RH
5120/* Implement TARGET_HANDLE_OPTION. */
5121
5122static bool
55bea00a 5123ia64_handle_option (size_t code, const char *arg, int value)
37b15744 5124{
dbdd120f
RH
5125 switch (code)
5126 {
5127 case OPT_mfixed_range_:
5128 fix_range (arg);
5129 return true;
5130
5131 case OPT_mtls_size_:
55bea00a
RS
5132 if (value != 14 && value != 22 && value != 64)
5133 error ("bad value %<%s%> for -mtls-size= switch", arg);
5134 return true;
dbdd120f
RH
5135
5136 case OPT_mtune_:
5137 {
5138 static struct pta
5139 {
5140 const char *name; /* processor name or nickname. */
5141 enum processor_type processor;
5142 }
5143 const processor_alias_table[] =
5144 {
5145 {"itanium", PROCESSOR_ITANIUM},
5146 {"itanium1", PROCESSOR_ITANIUM},
5147 {"merced", PROCESSOR_ITANIUM},
5148 {"itanium2", PROCESSOR_ITANIUM2},
5149 {"mckinley", PROCESSOR_ITANIUM2},
5150 };
5151 int const pta_size = ARRAY_SIZE (processor_alias_table);
5152 int i;
5153
5154 for (i = 0; i < pta_size; i++)
5155 if (!strcmp (arg, processor_alias_table[i].name))
5156 {
5157 ia64_tune = processor_alias_table[i].processor;
5158 break;
5159 }
5160 if (i == pta_size)
5161 error ("bad value %<%s%> for -mtune= switch", arg);
5162 return true;
5163 }
5164
5165 default:
5166 return true;
5167 }
37b15744 5168}
0c96007e 5169
bacf5b96 5170/* Implement OVERRIDE_OPTIONS. */
c65ebc55
JW
5171
5172void
9c808aad 5173ia64_override_options (void)
c65ebc55 5174{
59da9a7d
JW
5175 if (TARGET_AUTO_PIC)
5176 target_flags |= MASK_CONST_GP;
5177
dbdd120f 5178 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
b38ba463 5179 {
d4ee4d25 5180 warning (0, "not yet implemented: latency-optimized inline square root");
dbdd120f 5181 TARGET_INLINE_SQRT = INL_MAX_THR;
7b6e506e
RH
5182 }
5183
68340ae9
BS
5184 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5185 flag_schedule_insns_after_reload = 0;
5186
c65ebc55
JW
5187 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5188
0c96007e 5189 init_machine_status = ia64_init_machine_status;
c65ebc55 5190}
dbdd120f 5191
6fb5fa3c
DB
5192/* Initialize the record of emitted frame related registers. */
5193
5194void ia64_init_expanders (void)
5195{
5196 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5197}
5198
dbdd120f
RH
5199static struct machine_function *
5200ia64_init_machine_status (void)
5201{
5202 return ggc_alloc_cleared (sizeof (struct machine_function));
5203}
c65ebc55 5204\f
9c808aad
AJ
5205static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5206static enum attr_type ia64_safe_type (rtx);
2130b7fb 5207
2130b7fb 5208static enum attr_itanium_class
9c808aad 5209ia64_safe_itanium_class (rtx insn)
2130b7fb
BS
5210{
5211 if (recog_memoized (insn) >= 0)
5212 return get_attr_itanium_class (insn);
5213 else
5214 return ITANIUM_CLASS_UNKNOWN;
5215}
5216
5217static enum attr_type
9c808aad 5218ia64_safe_type (rtx insn)
2130b7fb
BS
5219{
5220 if (recog_memoized (insn) >= 0)
5221 return get_attr_type (insn);
5222 else
5223 return TYPE_UNKNOWN;
5224}
5225\f
c65ebc55
JW
5226/* The following collection of routines emit instruction group stop bits as
5227 necessary to avoid dependencies. */
5228
5229/* Need to track some additional registers as far as serialization is
5230 concerned so we can properly handle br.call and br.ret. We could
5231 make these registers visible to gcc, but since these registers are
5232 never explicitly used in gcc generated code, it seems wasteful to
5233 do so (plus it would make the call and return patterns needlessly
5234 complex). */
c65ebc55 5235#define REG_RP (BR_REG (0))
c65ebc55 5236#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
5237/* This is used for volatile asms which may require a stop bit immediately
5238 before and after them. */
5527bf14 5239#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
5240#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5241#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 5242
f2f90c63
RH
5243/* For each register, we keep track of how it has been written in the
5244 current instruction group.
5245
5246 If a register is written unconditionally (no qualifying predicate),
5247 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5248
5249 If a register is written if its qualifying predicate P is true, we
5250 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5251 may be written again by the complement of P (P^1) and when this happens,
5252 WRITE_COUNT gets set to 2.
5253
5254 The result of this is that whenever an insn attempts to write a register
e03f5d43 5255 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
5256
5257 If a predicate register is written by a floating-point insn, we set
5258 WRITTEN_BY_FP to true.
5259
5260 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5261 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5262
c65ebc55
JW
5263struct reg_write_state
5264{
f2f90c63
RH
5265 unsigned int write_count : 2;
5266 unsigned int first_pred : 16;
5267 unsigned int written_by_fp : 1;
5268 unsigned int written_by_and : 1;
5269 unsigned int written_by_or : 1;
c65ebc55
JW
5270};
5271
5272/* Cumulative info for the current instruction group. */
5273struct reg_write_state rws_sum[NUM_REGS];
5274/* Info for the current instruction. This gets copied to rws_sum after a
5275 stop bit is emitted. */
5276struct reg_write_state rws_insn[NUM_REGS];
5277
25250265 5278/* Indicates whether this is the first instruction after a stop bit,
e820471b
NS
5279 in which case we don't need another stop bit. Without this,
5280 ia64_variable_issue will die when scheduling an alloc. */
25250265
JW
5281static int first_instruction;
5282
c65ebc55
JW
5283/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5284 RTL for one instruction. */
5285struct reg_flags
5286{
5287 unsigned int is_write : 1; /* Is register being written? */
5288 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5289 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
5290 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5291 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 5292 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
5293};
5294
9c808aad
AJ
5295static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
5296static int rws_access_regno (int, struct reg_flags, int);
5297static int rws_access_reg (rtx, struct reg_flags, int);
c1bc6ca8
JW
5298static void update_set_flags (rtx, struct reg_flags *);
5299static int set_src_needs_barrier (rtx, struct reg_flags, int);
9c808aad
AJ
5300static int rtx_needs_barrier (rtx, struct reg_flags, int);
5301static void init_insn_group_barriers (void);
c1bc6ca8
JW
5302static int group_barrier_needed (rtx);
5303static int safe_group_barrier_needed (rtx);
3b572406 5304
c65ebc55
JW
5305/* Update *RWS for REGNO, which is being written by the current instruction,
5306 with predicate PRED, and associated register flags in FLAGS. */
5307
5308static void
9c808aad 5309rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
c65ebc55 5310{
3e7c7805
BS
5311 if (pred)
5312 rws[regno].write_count++;
5313 else
5314 rws[regno].write_count = 2;
c65ebc55 5315 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
5316 /* ??? Not tracking and/or across differing predicates. */
5317 rws[regno].written_by_and = flags.is_and;
5318 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
5319 rws[regno].first_pred = pred;
5320}
5321
5322/* Handle an access to register REGNO of type FLAGS using predicate register
5323 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
5324 a dependency with an earlier instruction in the same group. */
5325
5326static int
9c808aad 5327rws_access_regno (int regno, struct reg_flags flags, int pred)
c65ebc55
JW
5328{
5329 int need_barrier = 0;
c65ebc55 5330
e820471b 5331 gcc_assert (regno < NUM_REGS);
c65ebc55 5332
f2f90c63
RH
5333 if (! PR_REGNO_P (regno))
5334 flags.is_and = flags.is_or = 0;
5335
c65ebc55
JW
5336 if (flags.is_write)
5337 {
12c2c7aa
JW
5338 int write_count;
5339
c65ebc55 5340 /* One insn writes same reg multiple times? */
e820471b 5341 gcc_assert (!rws_insn[regno].write_count);
c65ebc55
JW
5342
5343 /* Update info for current instruction. */
5344 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 5345 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
5346
5347 switch (write_count)
c65ebc55
JW
5348 {
5349 case 0:
5350 /* The register has not been written yet. */
5351 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
5352 break;
5353
5354 case 1:
5355 /* The register has been written via a predicate. If this is
5356 not a complementary predicate, then we need a barrier. */
5357 /* ??? This assumes that P and P+1 are always complementary
5358 predicates for P even. */
f2f90c63 5359 if (flags.is_and && rws_sum[regno].written_by_and)
9c808aad 5360 ;
f2f90c63
RH
5361 else if (flags.is_or && rws_sum[regno].written_by_or)
5362 ;
5363 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
5364 need_barrier = 1;
5365 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
5366 break;
5367
5368 case 2:
5369 /* The register has been unconditionally written already. We
5370 need a barrier. */
f2f90c63
RH
5371 if (flags.is_and && rws_sum[regno].written_by_and)
5372 ;
5373 else if (flags.is_or && rws_sum[regno].written_by_or)
5374 ;
5375 else
5376 need_barrier = 1;
5377 rws_sum[regno].written_by_and = flags.is_and;
5378 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
5379 break;
5380
5381 default:
e820471b 5382 gcc_unreachable ();
c65ebc55
JW
5383 }
5384 }
5385 else
5386 {
5387 if (flags.is_branch)
5388 {
5389 /* Branches have several RAW exceptions that allow to avoid
5390 barriers. */
5391
5527bf14 5392 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
5393 /* RAW dependencies on branch regs are permissible as long
5394 as the writer is a non-branch instruction. Since we
5395 never generate code that uses a branch register written
5396 by a branch instruction, handling this case is
5397 easy. */
5527bf14 5398 return 0;
c65ebc55
JW
5399
5400 if (REGNO_REG_CLASS (regno) == PR_REGS
5401 && ! rws_sum[regno].written_by_fp)
5402 /* The predicates of a branch are available within the
5403 same insn group as long as the predicate was written by
ed168e45 5404 something other than a floating-point instruction. */
c65ebc55
JW
5405 return 0;
5406 }
5407
f2f90c63
RH
5408 if (flags.is_and && rws_sum[regno].written_by_and)
5409 return 0;
5410 if (flags.is_or && rws_sum[regno].written_by_or)
5411 return 0;
5412
c65ebc55
JW
5413 switch (rws_sum[regno].write_count)
5414 {
5415 case 0:
5416 /* The register has not been written yet. */
5417 break;
5418
5419 case 1:
5420 /* The register has been written via a predicate. If this is
5421 not a complementary predicate, then we need a barrier. */
5422 /* ??? This assumes that P and P+1 are always complementary
5423 predicates for P even. */
5424 if ((rws_sum[regno].first_pred ^ 1) != pred)
5425 need_barrier = 1;
5426 break;
5427
5428 case 2:
5429 /* The register has been unconditionally written already. We
5430 need a barrier. */
5431 need_barrier = 1;
5432 break;
5433
5434 default:
e820471b 5435 gcc_unreachable ();
c65ebc55
JW
5436 }
5437 }
5438
5439 return need_barrier;
5440}
5441
97e242b0 5442static int
9c808aad 5443rws_access_reg (rtx reg, struct reg_flags flags, int pred)
97e242b0
RH
5444{
5445 int regno = REGNO (reg);
5446 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5447
5448 if (n == 1)
5449 return rws_access_regno (regno, flags, pred);
5450 else
5451 {
5452 int need_barrier = 0;
5453 while (--n >= 0)
5454 need_barrier |= rws_access_regno (regno + n, flags, pred);
5455 return need_barrier;
5456 }
5457}
5458
112333d3
BS
5459/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5460 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5461
5462static void
c1bc6ca8 5463update_set_flags (rtx x, struct reg_flags *pflags)
112333d3
BS
5464{
5465 rtx src = SET_SRC (x);
5466
112333d3
BS
5467 switch (GET_CODE (src))
5468 {
5469 case CALL:
5470 return;
5471
5472 case IF_THEN_ELSE:
048d0d36 5473 /* There are four cases here:
c8d3810f
RH
5474 (1) The destination is (pc), in which case this is a branch,
5475 nothing here applies.
5476 (2) The destination is ar.lc, in which case this is a
5477 doloop_end_internal,
5478 (3) The destination is an fp register, in which case this is
5479 an fselect instruction.
048d0d36
MK
5480 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5481 this is a check load.
c8d3810f
RH
5482 In all cases, nothing we do in this function applies. */
5483 return;
112333d3
BS
5484
5485 default:
ec8e098d 5486 if (COMPARISON_P (src)
c8d3810f 5487 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
112333d3
BS
5488 /* Set pflags->is_fp to 1 so that we know we're dealing
5489 with a floating point comparison when processing the
5490 destination of the SET. */
5491 pflags->is_fp = 1;
5492
5493 /* Discover if this is a parallel comparison. We only handle
5494 and.orcm and or.andcm at present, since we must retain a
5495 strict inverse on the predicate pair. */
5496 else if (GET_CODE (src) == AND)
5497 pflags->is_and = 1;
5498 else if (GET_CODE (src) == IOR)
5499 pflags->is_or = 1;
5500
5501 break;
5502 }
5503}
5504
5505/* Subroutine of rtx_needs_barrier; this function determines whether the
5506 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5507 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5508 for this insn. */
9c808aad 5509
112333d3 5510static int
c1bc6ca8 5511set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
112333d3
BS
5512{
5513 int need_barrier = 0;
5514 rtx dst;
5515 rtx src = SET_SRC (x);
5516
5517 if (GET_CODE (src) == CALL)
5518 /* We don't need to worry about the result registers that
5519 get written by subroutine call. */
5520 return rtx_needs_barrier (src, flags, pred);
5521 else if (SET_DEST (x) == pc_rtx)
5522 {
5523 /* X is a conditional branch. */
5524 /* ??? This seems redundant, as the caller sets this bit for
5525 all JUMP_INSNs. */
048d0d36
MK
5526 if (!ia64_spec_check_src_p (src))
5527 flags.is_branch = 1;
112333d3
BS
5528 return rtx_needs_barrier (src, flags, pred);
5529 }
5530
048d0d36
MK
5531 if (ia64_spec_check_src_p (src))
5532 /* Avoid checking one register twice (in condition
5533 and in 'then' section) for ldc pattern. */
5534 {
5535 gcc_assert (REG_P (XEXP (src, 2)));
5536 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5537
5538 /* We process MEM below. */
5539 src = XEXP (src, 1);
5540 }
5541
5542 need_barrier |= rtx_needs_barrier (src, flags, pred);
112333d3 5543
112333d3
BS
5544 dst = SET_DEST (x);
5545 if (GET_CODE (dst) == ZERO_EXTRACT)
5546 {
5547 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5548 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
112333d3
BS
5549 }
5550 return need_barrier;
5551}
5552
b38ba463
ZW
5553/* Handle an access to rtx X of type FLAGS using predicate register
5554 PRED. Return 1 if this access creates a dependency with an earlier
5555 instruction in the same group. */
c65ebc55
JW
5556
5557static int
9c808aad 5558rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
c65ebc55
JW
5559{
5560 int i, j;
5561 int is_complemented = 0;
5562 int need_barrier = 0;
5563 const char *format_ptr;
5564 struct reg_flags new_flags;
c1bc6ca8 5565 rtx cond;
c65ebc55
JW
5566
5567 if (! x)
5568 return 0;
5569
5570 new_flags = flags;
5571
5572 switch (GET_CODE (x))
5573 {
9c808aad 5574 case SET:
c1bc6ca8
JW
5575 update_set_flags (x, &new_flags);
5576 need_barrier = set_src_needs_barrier (x, new_flags, pred);
112333d3 5577 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 5578 {
112333d3
BS
5579 new_flags.is_write = 1;
5580 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 5581 }
c65ebc55
JW
5582 break;
5583
5584 case CALL:
5585 new_flags.is_write = 0;
97e242b0 5586 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
5587
5588 /* Avoid multiple register writes, in case this is a pattern with
e820471b 5589 multiple CALL rtx. This avoids a failure in rws_access_reg. */
2ed4af6f 5590 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
5591 {
5592 new_flags.is_write = 1;
97e242b0
RH
5593 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5594 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5595 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
5596 }
5597 break;
5598
e5bde68a
RH
5599 case COND_EXEC:
5600 /* X is a predicated instruction. */
5601
5602 cond = COND_EXEC_TEST (x);
e820471b 5603 gcc_assert (!pred);
e5bde68a
RH
5604 need_barrier = rtx_needs_barrier (cond, flags, 0);
5605
5606 if (GET_CODE (cond) == EQ)
5607 is_complemented = 1;
5608 cond = XEXP (cond, 0);
e820471b 5609 gcc_assert (GET_CODE (cond) == REG
c1bc6ca8 5610 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
e5bde68a
RH
5611 pred = REGNO (cond);
5612 if (is_complemented)
5613 ++pred;
5614
5615 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5616 return need_barrier;
5617
c65ebc55 5618 case CLOBBER:
c65ebc55 5619 case USE:
c65ebc55
JW
5620 /* Clobber & use are for earlier compiler-phases only. */
5621 break;
5622
5623 case ASM_OPERANDS:
5624 case ASM_INPUT:
5625 /* We always emit stop bits for traditional asms. We emit stop bits
5626 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5627 if (GET_CODE (x) != ASM_OPERANDS
5628 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5629 {
5630 /* Avoid writing the register multiple times if we have multiple
e820471b 5631 asm outputs. This avoids a failure in rws_access_reg. */
c65ebc55
JW
5632 if (! rws_insn[REG_VOLATILE].write_count)
5633 {
5634 new_flags.is_write = 1;
97e242b0 5635 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
5636 }
5637 return 1;
5638 }
5639
5640 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
1e5f1716 5641 We cannot just fall through here since then we would be confused
c65ebc55
JW
5642 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5643 traditional asms unlike their normal usage. */
5644
5645 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5646 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5647 need_barrier = 1;
5648 break;
5649
5650 case PARALLEL:
5651 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
5652 {
5653 rtx pat = XVECEXP (x, 0, i);
051d8245 5654 switch (GET_CODE (pat))
112333d3 5655 {
051d8245 5656 case SET:
c1bc6ca8
JW
5657 update_set_flags (pat, &new_flags);
5658 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
051d8245
RH
5659 break;
5660
5661 case USE:
5662 case CALL:
5663 case ASM_OPERANDS:
5664 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5665 break;
5666
5667 case CLOBBER:
5668 case RETURN:
5669 break;
5670
5671 default:
5672 gcc_unreachable ();
112333d3 5673 }
112333d3
BS
5674 }
5675 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5676 {
5677 rtx pat = XVECEXP (x, 0, i);
5678 if (GET_CODE (pat) == SET)
5679 {
5680 if (GET_CODE (SET_SRC (pat)) != CALL)
5681 {
5682 new_flags.is_write = 1;
5683 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5684 pred);
5685 }
5686 }
339cb12e 5687 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
5688 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5689 }
c65ebc55
JW
5690 break;
5691
5692 case SUBREG:
077bc924
JM
5693 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5694 break;
c65ebc55 5695 case REG:
870f9ec0
RH
5696 if (REGNO (x) == AR_UNAT_REGNUM)
5697 {
5698 for (i = 0; i < 64; ++i)
5699 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5700 }
5701 else
5702 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
5703 break;
5704
5705 case MEM:
5706 /* Find the regs used in memory address computation. */
5707 new_flags.is_write = 0;
5708 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5709 break;
5710
051d8245 5711 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
c65ebc55
JW
5712 case SYMBOL_REF: case LABEL_REF: case CONST:
5713 break;
5714
5715 /* Operators with side-effects. */
5716 case POST_INC: case POST_DEC:
e820471b 5717 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
c65ebc55
JW
5718
5719 new_flags.is_write = 0;
97e242b0 5720 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 5721 new_flags.is_write = 1;
97e242b0 5722 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
5723 break;
5724
5725 case POST_MODIFY:
e820471b 5726 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
4b983fdc
RH
5727
5728 new_flags.is_write = 0;
97e242b0 5729 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
5730 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5731 new_flags.is_write = 1;
97e242b0 5732 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
5733 break;
5734
5735 /* Handle common unary and binary ops for efficiency. */
5736 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5737 case MOD: case UDIV: case UMOD: case AND: case IOR:
5738 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5739 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5740 case NE: case EQ: case GE: case GT: case LE:
5741 case LT: case GEU: case GTU: case LEU: case LTU:
5742 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5743 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5744 break;
5745
5746 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5747 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5748 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 5749 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
5750 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5751 break;
5752
051d8245
RH
5753 case VEC_SELECT:
5754 /* VEC_SELECT's second argument is a PARALLEL with integers that
5755 describe the elements selected. On ia64, those integers are
5756 always constants. Avoid walking the PARALLEL so that we don't
e820471b 5757 get confused with "normal" parallels and then die. */
051d8245
RH
5758 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5759 break;
5760
c65ebc55
JW
5761 case UNSPEC:
5762 switch (XINT (x, 1))
5763 {
7b6e506e
RH
5764 case UNSPEC_LTOFF_DTPMOD:
5765 case UNSPEC_LTOFF_DTPREL:
5766 case UNSPEC_DTPREL:
5767 case UNSPEC_LTOFF_TPREL:
5768 case UNSPEC_TPREL:
5769 case UNSPEC_PRED_REL_MUTEX:
5770 case UNSPEC_PIC_CALL:
5771 case UNSPEC_MF:
5772 case UNSPEC_FETCHADD_ACQ:
5773 case UNSPEC_BSP_VALUE:
5774 case UNSPEC_FLUSHRS:
5775 case UNSPEC_BUNDLE_SELECTOR:
5776 break;
5777
086c0f96
RH
5778 case UNSPEC_GR_SPILL:
5779 case UNSPEC_GR_RESTORE:
870f9ec0
RH
5780 {
5781 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5782 HOST_WIDE_INT bit = (offset >> 3) & 63;
5783
5784 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
83338d15 5785 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
870f9ec0
RH
5786 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5787 new_flags, pred);
5788 break;
5789 }
9c808aad 5790
086c0f96
RH
5791 case UNSPEC_FR_SPILL:
5792 case UNSPEC_FR_RESTORE:
c407570a 5793 case UNSPEC_GETF_EXP:
b38ba463 5794 case UNSPEC_SETF_EXP:
086c0f96 5795 case UNSPEC_ADDP4:
b38ba463 5796 case UNSPEC_FR_SQRT_RECIP_APPROX:
048d0d36
MK
5797 case UNSPEC_LDA:
5798 case UNSPEC_LDS:
5799 case UNSPEC_LDSA:
5800 case UNSPEC_CHKACLR:
5801 case UNSPEC_CHKS:
6dd12198
SE
5802 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5803 break;
5804
086c0f96 5805 case UNSPEC_FR_RECIP_APPROX:
f526a3c8 5806 case UNSPEC_SHRP:
046625fa 5807 case UNSPEC_COPYSIGN:
655f2eb9
RH
5808 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5809 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5810 break;
5811
086c0f96 5812 case UNSPEC_CMPXCHG_ACQ:
0551c32d
RH
5813 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5814 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5815 break;
5816
c65ebc55 5817 default:
e820471b 5818 gcc_unreachable ();
c65ebc55
JW
5819 }
5820 break;
5821
5822 case UNSPEC_VOLATILE:
5823 switch (XINT (x, 1))
5824 {
086c0f96 5825 case UNSPECV_ALLOC:
25250265
JW
5826 /* Alloc must always be the first instruction of a group.
5827 We force this by always returning true. */
5828 /* ??? We might get better scheduling if we explicitly check for
5829 input/local/output register dependencies, and modify the
5830 scheduler so that alloc is always reordered to the start of
5831 the current group. We could then eliminate all of the
5832 first_instruction code. */
5833 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
5834
5835 new_flags.is_write = 1;
25250265
JW
5836 rws_access_regno (REG_AR_CFM, new_flags, pred);
5837 return 1;
c65ebc55 5838
086c0f96 5839 case UNSPECV_SET_BSP:
3b572406
RH
5840 need_barrier = 1;
5841 break;
5842
086c0f96
RH
5843 case UNSPECV_BLOCKAGE:
5844 case UNSPECV_INSN_GROUP_BARRIER:
5845 case UNSPECV_BREAK:
5846 case UNSPECV_PSAC_ALL:
5847 case UNSPECV_PSAC_NORMAL:
3b572406 5848 return 0;
0c96007e 5849
c65ebc55 5850 default:
e820471b 5851 gcc_unreachable ();
c65ebc55
JW
5852 }
5853 break;
5854
5855 case RETURN:
5856 new_flags.is_write = 0;
97e242b0
RH
5857 need_barrier = rws_access_regno (REG_RP, flags, pred);
5858 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
5859
5860 new_flags.is_write = 1;
97e242b0
RH
5861 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5862 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
5863 break;
5864
5865 default:
5866 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5867 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5868 switch (format_ptr[i])
5869 {
5870 case '0': /* unused field */
5871 case 'i': /* integer */
5872 case 'n': /* note */
5873 case 'w': /* wide integer */
5874 case 's': /* pointer to string */
5875 case 'S': /* optional pointer to string */
5876 break;
5877
5878 case 'e':
5879 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5880 need_barrier = 1;
5881 break;
5882
5883 case 'E':
5884 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5885 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5886 need_barrier = 1;
5887 break;
5888
5889 default:
e820471b 5890 gcc_unreachable ();
c65ebc55 5891 }
2ed4af6f 5892 break;
c65ebc55
JW
5893 }
5894 return need_barrier;
5895}
5896
c1bc6ca8 5897/* Clear out the state for group_barrier_needed at the start of a
2130b7fb
BS
5898 sequence of insns. */
5899
5900static void
9c808aad 5901init_insn_group_barriers (void)
2130b7fb
BS
5902{
5903 memset (rws_sum, 0, sizeof (rws_sum));
25250265 5904 first_instruction = 1;
2130b7fb
BS
5905}
5906
c1bc6ca8
JW
5907/* Given the current state, determine whether a group barrier (a stop bit) is
5908 necessary before INSN. Return nonzero if so. This modifies the state to
5909 include the effects of INSN as a side-effect. */
2130b7fb
BS
5910
5911static int
c1bc6ca8 5912group_barrier_needed (rtx insn)
2130b7fb
BS
5913{
5914 rtx pat;
5915 int need_barrier = 0;
5916 struct reg_flags flags;
5917
5918 memset (&flags, 0, sizeof (flags));
5919 switch (GET_CODE (insn))
5920 {
5921 case NOTE:
5922 break;
5923
5924 case BARRIER:
5925 /* A barrier doesn't imply an instruction group boundary. */
5926 break;
5927
5928 case CODE_LABEL:
5929 memset (rws_insn, 0, sizeof (rws_insn));
5930 return 1;
5931
5932 case CALL_INSN:
5933 flags.is_branch = 1;
5934 flags.is_sibcall = SIBLING_CALL_P (insn);
5935 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
5936
5937 /* Don't bundle a call following another call. */
5938 if ((pat = prev_active_insn (insn))
5939 && GET_CODE (pat) == CALL_INSN)
5940 {
5941 need_barrier = 1;
5942 break;
5943 }
5944
2130b7fb
BS
5945 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5946 break;
5947
5948 case JUMP_INSN:
048d0d36
MK
5949 if (!ia64_spec_check_p (insn))
5950 flags.is_branch = 1;
f12f25a7
RH
5951
5952 /* Don't bundle a jump following a call. */
5953 if ((pat = prev_active_insn (insn))
5954 && GET_CODE (pat) == CALL_INSN)
5955 {
5956 need_barrier = 1;
5957 break;
5958 }
5efb1046 5959 /* FALLTHRU */
2130b7fb
BS
5960
5961 case INSN:
5962 if (GET_CODE (PATTERN (insn)) == USE
5963 || GET_CODE (PATTERN (insn)) == CLOBBER)
5964 /* Don't care about USE and CLOBBER "insns"---those are used to
5965 indicate to the optimizer that it shouldn't get rid of
5966 certain operations. */
5967 break;
5968
5969 pat = PATTERN (insn);
5970
5971 /* Ug. Hack hacks hacked elsewhere. */
5972 switch (recog_memoized (insn))
5973 {
5974 /* We play dependency tricks with the epilogue in order
5975 to get proper schedules. Undo this for dv analysis. */
5976 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 5977 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
5978 pat = XVECEXP (pat, 0, 0);
5979 break;
5980
5981 /* The pattern we use for br.cloop confuses the code above.
5982 The second element of the vector is representative. */
5983 case CODE_FOR_doloop_end_internal:
5984 pat = XVECEXP (pat, 0, 1);
5985 break;
5986
5987 /* Doesn't generate code. */
5988 case CODE_FOR_pred_rel_mutex:
d0e82870 5989 case CODE_FOR_prologue_use:
2130b7fb
BS
5990 return 0;
5991
5992 default:
5993 break;
5994 }
5995
5996 memset (rws_insn, 0, sizeof (rws_insn));
5997 need_barrier = rtx_needs_barrier (pat, flags, 0);
5998
5999 /* Check to see if the previous instruction was a volatile
6000 asm. */
6001 if (! need_barrier)
6002 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
2130b7fb
BS
6003 break;
6004
6005 default:
e820471b 6006 gcc_unreachable ();
2130b7fb 6007 }
25250265 6008
30028c85
VM
6009 if (first_instruction && INSN_P (insn)
6010 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6011 && GET_CODE (PATTERN (insn)) != USE
6012 && GET_CODE (PATTERN (insn)) != CLOBBER)
25250265
JW
6013 {
6014 need_barrier = 0;
6015 first_instruction = 0;
6016 }
6017
2130b7fb
BS
6018 return need_barrier;
6019}
6020
c1bc6ca8 6021/* Like group_barrier_needed, but do not clobber the current state. */
2130b7fb
BS
6022
6023static int
c1bc6ca8 6024safe_group_barrier_needed (rtx insn)
2130b7fb
BS
6025{
6026 struct reg_write_state rws_saved[NUM_REGS];
25250265 6027 int saved_first_instruction;
2130b7fb 6028 int t;
25250265 6029
2130b7fb 6030 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
25250265
JW
6031 saved_first_instruction = first_instruction;
6032
c1bc6ca8 6033 t = group_barrier_needed (insn);
25250265 6034
2130b7fb 6035 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
25250265
JW
6036 first_instruction = saved_first_instruction;
6037
2130b7fb
BS
6038 return t;
6039}
6040
18dbd950
RS
6041/* Scan the current function and insert stop bits as necessary to
6042 eliminate dependencies. This function assumes that a final
6043 instruction scheduling pass has been run which has already
6044 inserted most of the necessary stop bits. This function only
6045 inserts new ones at basic block boundaries, since these are
6046 invisible to the scheduler. */
2130b7fb
BS
6047
6048static void
9c808aad 6049emit_insn_group_barriers (FILE *dump)
2130b7fb
BS
6050{
6051 rtx insn;
6052 rtx last_label = 0;
6053 int insns_since_last_label = 0;
6054
6055 init_insn_group_barriers ();
6056
18dbd950 6057 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb
BS
6058 {
6059 if (GET_CODE (insn) == CODE_LABEL)
6060 {
6061 if (insns_since_last_label)
6062 last_label = insn;
6063 insns_since_last_label = 0;
6064 }
6065 else if (GET_CODE (insn) == NOTE
a38e7aa5 6066 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
2130b7fb
BS
6067 {
6068 if (insns_since_last_label)
6069 last_label = insn;
6070 insns_since_last_label = 0;
6071 }
6072 else if (GET_CODE (insn) == INSN
6073 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 6074 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
6075 {
6076 init_insn_group_barriers ();
6077 last_label = 0;
6078 }
6079 else if (INSN_P (insn))
6080 {
6081 insns_since_last_label = 1;
6082
c1bc6ca8 6083 if (group_barrier_needed (insn))
2130b7fb
BS
6084 {
6085 if (last_label)
6086 {
6087 if (dump)
6088 fprintf (dump, "Emitting stop before label %d\n",
6089 INSN_UID (last_label));
6090 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6091 insn = last_label;
112333d3
BS
6092
6093 init_insn_group_barriers ();
6094 last_label = 0;
2130b7fb 6095 }
2130b7fb
BS
6096 }
6097 }
6098 }
6099}
f4d578da
BS
6100
6101/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6102 This function has to emit all necessary group barriers. */
6103
6104static void
9c808aad 6105emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
f4d578da
BS
6106{
6107 rtx insn;
6108
6109 init_insn_group_barriers ();
6110
18dbd950 6111 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 6112 {
bd7b9a0f
RH
6113 if (GET_CODE (insn) == BARRIER)
6114 {
6115 rtx last = prev_active_insn (insn);
6116
6117 if (! last)
6118 continue;
6119 if (GET_CODE (last) == JUMP_INSN
6120 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6121 last = prev_active_insn (last);
6122 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6123 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6124
6125 init_insn_group_barriers ();
6126 }
f4d578da
BS
6127 else if (INSN_P (insn))
6128 {
bd7b9a0f
RH
6129 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6130 init_insn_group_barriers ();
c1bc6ca8 6131 else if (group_barrier_needed (insn))
f4d578da
BS
6132 {
6133 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6134 init_insn_group_barriers ();
c1bc6ca8 6135 group_barrier_needed (insn);
f4d578da
BS
6136 }
6137 }
6138 }
6139}
30028c85 6140
2130b7fb 6141\f
2130b7fb 6142
30028c85 6143/* Instruction scheduling support. */
2130b7fb
BS
6144
6145#define NR_BUNDLES 10
6146
30028c85 6147/* A list of names of all available bundles. */
2130b7fb 6148
30028c85 6149static const char *bundle_name [NR_BUNDLES] =
2130b7fb 6150{
30028c85
VM
6151 ".mii",
6152 ".mmi",
6153 ".mfi",
6154 ".mmf",
2130b7fb 6155#if NR_BUNDLES == 10
30028c85
VM
6156 ".bbb",
6157 ".mbb",
2130b7fb 6158#endif
30028c85
VM
6159 ".mib",
6160 ".mmb",
6161 ".mfb",
6162 ".mlx"
2130b7fb
BS
6163};
6164
30028c85 6165/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 6166
30028c85 6167int ia64_final_schedule = 0;
2130b7fb 6168
35fd3193 6169/* Codes of the corresponding queried units: */
2130b7fb 6170
30028c85
VM
6171static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6172static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 6173
30028c85
VM
6174static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6175static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 6176
30028c85
VM
6177static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6178
6179/* The following variable value is an insn group barrier. */
6180
6181static rtx dfa_stop_insn;
6182
6183/* The following variable value is the last issued insn. */
6184
6185static rtx last_scheduled_insn;
6186
6187/* The following variable value is size of the DFA state. */
6188
6189static size_t dfa_state_size;
6190
6191/* The following variable value is pointer to a DFA state used as
6192 temporary variable. */
6193
6194static state_t temp_dfa_state = NULL;
6195
6196/* The following variable value is DFA state after issuing the last
6197 insn. */
6198
6199static state_t prev_cycle_state = NULL;
6200
6201/* The following array element values are TRUE if the corresponding
9e4f94de 6202 insn requires to add stop bits before it. */
30028c85 6203
048d0d36
MK
6204static char *stops_p = NULL;
6205
6206/* The following array element values are ZERO for non-speculative
6207 instructions and hold corresponding speculation check number for
6208 speculative instructions. */
6209static int *spec_check_no = NULL;
6210
6211/* Size of spec_check_no array. */
6212static int max_uid = 0;
30028c85
VM
6213
6214/* The following variable is used to set up the mentioned above array. */
6215
6216static int stop_before_p = 0;
6217
6218/* The following variable value is length of the arrays `clocks' and
6219 `add_cycles'. */
6220
6221static int clocks_length;
6222
6223/* The following array element values are cycles on which the
6224 corresponding insn will be issued. The array is used only for
6225 Itanium1. */
6226
6227static int *clocks;
6228
6229/* The following array element values are numbers of cycles should be
6230 added to improve insn scheduling for MM_insns for Itanium1. */
6231
6232static int *add_cycles;
2130b7fb 6233
048d0d36
MK
6234/* The following variable value is number of data speculations in progress. */
6235static int pending_data_specs = 0;
6236
9c808aad
AJ
6237static rtx ia64_single_set (rtx);
6238static void ia64_emit_insn_before (rtx, rtx);
2130b7fb
BS
6239
6240/* Map a bundle number to its pseudo-op. */
6241
6242const char *
9c808aad 6243get_bundle_name (int b)
2130b7fb 6244{
30028c85 6245 return bundle_name[b];
2130b7fb
BS
6246}
6247
2130b7fb
BS
6248
6249/* Return the maximum number of instructions a cpu can issue. */
6250
c237e94a 6251static int
9c808aad 6252ia64_issue_rate (void)
2130b7fb
BS
6253{
6254 return 6;
6255}
6256
6257/* Helper function - like single_set, but look inside COND_EXEC. */
6258
6259static rtx
9c808aad 6260ia64_single_set (rtx insn)
2130b7fb 6261{
30fa7e33 6262 rtx x = PATTERN (insn), ret;
2130b7fb
BS
6263 if (GET_CODE (x) == COND_EXEC)
6264 x = COND_EXEC_CODE (x);
6265 if (GET_CODE (x) == SET)
6266 return x;
bdbe5b8d
RH
6267
6268 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6269 Although they are not classical single set, the second set is there just
6270 to protect it from moving past FP-relative stack accesses. */
6271 switch (recog_memoized (insn))
30fa7e33 6272 {
bdbe5b8d
RH
6273 case CODE_FOR_prologue_allocate_stack:
6274 case CODE_FOR_epilogue_deallocate_stack:
6275 ret = XVECEXP (x, 0, 0);
6276 break;
6277
6278 default:
6279 ret = single_set_2 (insn, x);
6280 break;
30fa7e33 6281 }
bdbe5b8d 6282
30fa7e33 6283 return ret;
2130b7fb
BS
6284}
6285
b198261f
MK
6286/* Adjust the cost of a scheduling dependency. Return the new cost of
6287 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
2130b7fb 6288
c237e94a 6289static int
b198261f 6290ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
2130b7fb 6291{
2130b7fb
BS
6292 enum attr_itanium_class dep_class;
6293 enum attr_itanium_class insn_class;
2130b7fb 6294
b198261f 6295 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
30028c85 6296 return cost;
2130b7fb 6297
2130b7fb 6298 insn_class = ia64_safe_itanium_class (insn);
30028c85
VM
6299 dep_class = ia64_safe_itanium_class (dep_insn);
6300 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6301 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
6302 return 0;
6303
2130b7fb
BS
6304 return cost;
6305}
6306
14d118d6
DM
6307/* Like emit_insn_before, but skip cycle_display notes.
6308 ??? When cycle display notes are implemented, update this. */
6309
6310static void
9c808aad 6311ia64_emit_insn_before (rtx insn, rtx before)
14d118d6
DM
6312{
6313 emit_insn_before (insn, before);
6314}
6315
30028c85
VM
6316/* The following function marks insns who produce addresses for load
6317 and store insns. Such insns will be placed into M slots because it
6318 decrease latency time for Itanium1 (see function
6319 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
6320
6321static void
9c808aad 6322ia64_dependencies_evaluation_hook (rtx head, rtx tail)
2130b7fb 6323{
b198261f 6324 rtx insn, next, next_tail;
9c808aad 6325
f12b785d
RH
6326 /* Before reload, which_alternative is not set, which means that
6327 ia64_safe_itanium_class will produce wrong results for (at least)
6328 move instructions. */
6329 if (!reload_completed)
6330 return;
6331
30028c85
VM
6332 next_tail = NEXT_INSN (tail);
6333 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6334 if (INSN_P (insn))
6335 insn->call = 0;
6336 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6337 if (INSN_P (insn)
6338 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6339 {
b198261f
MK
6340 dep_link_t link;
6341
6342 FOR_EACH_DEP_LINK (link, INSN_FORW_DEPS (insn))
30028c85 6343 {
a71aef0b
JB
6344 enum attr_itanium_class c;
6345
b198261f 6346 if (DEP_LINK_KIND (link) != REG_DEP_TRUE)
f12b785d 6347 continue;
b198261f
MK
6348
6349 next = DEP_LINK_CON (link);
a71aef0b
JB
6350 c = ia64_safe_itanium_class (next);
6351 if ((c == ITANIUM_CLASS_ST
6352 || c == ITANIUM_CLASS_STF)
30028c85
VM
6353 && ia64_st_address_bypass_p (insn, next))
6354 break;
a71aef0b
JB
6355 else if ((c == ITANIUM_CLASS_LD
6356 || c == ITANIUM_CLASS_FLD
6357 || c == ITANIUM_CLASS_FLDP)
30028c85
VM
6358 && ia64_ld_address_bypass_p (insn, next))
6359 break;
6360 }
6361 insn->call = link != 0;
6362 }
6363}
2130b7fb 6364
30028c85 6365/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 6366
30028c85 6367static void
9c808aad
AJ
6368ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6369 int sched_verbose ATTRIBUTE_UNUSED,
6370 int max_ready ATTRIBUTE_UNUSED)
30028c85
VM
6371{
6372#ifdef ENABLE_CHECKING
6373 rtx insn;
9c808aad 6374
30028c85
VM
6375 if (reload_completed)
6376 for (insn = NEXT_INSN (current_sched_info->prev_head);
6377 insn != current_sched_info->next_tail;
6378 insn = NEXT_INSN (insn))
e820471b 6379 gcc_assert (!SCHED_GROUP_P (insn));
30028c85
VM
6380#endif
6381 last_scheduled_insn = NULL_RTX;
6382 init_insn_group_barriers ();
2130b7fb
BS
6383}
6384
048d0d36
MK
6385/* We're beginning a scheduling pass. Check assertion. */
6386
6387static void
6388ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6389 int sched_verbose ATTRIBUTE_UNUSED,
6390 int max_ready ATTRIBUTE_UNUSED)
6391{
6392 gcc_assert (!pending_data_specs);
6393}
6394
6395/* Scheduling pass is now finished. Free/reset static variable. */
6396static void
6397ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6398 int sched_verbose ATTRIBUTE_UNUSED)
6399{
6400 free (spec_check_no);
6401 spec_check_no = 0;
6402 max_uid = 0;
6403}
6404
30028c85
VM
6405/* We are about to being issuing insns for this clock cycle.
6406 Override the default sort algorithm to better slot instructions. */
2130b7fb 6407
30028c85 6408static int
9c808aad
AJ
6409ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6410 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6411 int reorder_type)
2130b7fb 6412{
30028c85
VM
6413 int n_asms;
6414 int n_ready = *pn_ready;
6415 rtx *e_ready = ready + n_ready;
6416 rtx *insnp;
2130b7fb 6417
30028c85
VM
6418 if (sched_verbose)
6419 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 6420
30028c85 6421 if (reorder_type == 0)
2130b7fb 6422 {
30028c85
VM
6423 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6424 n_asms = 0;
6425 for (insnp = ready; insnp < e_ready; insnp++)
6426 if (insnp < e_ready)
6427 {
6428 rtx insn = *insnp;
6429 enum attr_type t = ia64_safe_type (insn);
6430 if (t == TYPE_UNKNOWN)
6431 {
6432 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6433 || asm_noperands (PATTERN (insn)) >= 0)
6434 {
6435 rtx lowest = ready[n_asms];
6436 ready[n_asms] = insn;
6437 *insnp = lowest;
6438 n_asms++;
6439 }
6440 else
6441 {
6442 rtx highest = ready[n_ready - 1];
6443 ready[n_ready - 1] = insn;
6444 *insnp = highest;
6445 return 1;
6446 }
6447 }
6448 }
98d2b17e 6449
30028c85 6450 if (n_asms < n_ready)
98d2b17e 6451 {
30028c85
VM
6452 /* Some normal insns to process. Skip the asms. */
6453 ready += n_asms;
6454 n_ready -= n_asms;
98d2b17e 6455 }
30028c85
VM
6456 else if (n_ready > 0)
6457 return 1;
2130b7fb
BS
6458 }
6459
30028c85 6460 if (ia64_final_schedule)
2130b7fb 6461 {
30028c85
VM
6462 int deleted = 0;
6463 int nr_need_stop = 0;
6464
6465 for (insnp = ready; insnp < e_ready; insnp++)
c1bc6ca8 6466 if (safe_group_barrier_needed (*insnp))
30028c85 6467 nr_need_stop++;
9c808aad 6468
30028c85
VM
6469 if (reorder_type == 1 && n_ready == nr_need_stop)
6470 return 0;
6471 if (reorder_type == 0)
6472 return 1;
6473 insnp = e_ready;
6474 /* Move down everything that needs a stop bit, preserving
6475 relative order. */
6476 while (insnp-- > ready + deleted)
6477 while (insnp >= ready + deleted)
6478 {
6479 rtx insn = *insnp;
c1bc6ca8 6480 if (! safe_group_barrier_needed (insn))
30028c85
VM
6481 break;
6482 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6483 *ready = insn;
6484 deleted++;
6485 }
6486 n_ready -= deleted;
6487 ready += deleted;
2130b7fb 6488 }
2130b7fb 6489
30028c85 6490 return 1;
2130b7fb 6491}
6b6c1201 6492
30028c85
VM
6493/* We are about to being issuing insns for this clock cycle. Override
6494 the default sort algorithm to better slot instructions. */
c65ebc55 6495
30028c85 6496static int
9c808aad
AJ
6497ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6498 int clock_var)
2130b7fb 6499{
30028c85
VM
6500 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6501 pn_ready, clock_var, 0);
2130b7fb
BS
6502}
6503
30028c85
VM
6504/* Like ia64_sched_reorder, but called after issuing each insn.
6505 Override the default sort algorithm to better slot instructions. */
2130b7fb 6506
30028c85 6507static int
9c808aad
AJ
6508ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6509 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6510 int *pn_ready, int clock_var)
30028c85
VM
6511{
6512 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6513 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6514 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6515 clock_var, 1);
2130b7fb
BS
6516}
6517
30028c85
VM
6518/* We are about to issue INSN. Return the number of insns left on the
6519 ready queue that can be issued this cycle. */
2130b7fb 6520
30028c85 6521static int
9c808aad
AJ
6522ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6523 int sched_verbose ATTRIBUTE_UNUSED,
6524 rtx insn ATTRIBUTE_UNUSED,
6525 int can_issue_more ATTRIBUTE_UNUSED)
2130b7fb 6526{
048d0d36
MK
6527 if (current_sched_info->flags & DO_SPECULATION)
6528 /* Modulo scheduling does not extend h_i_d when emitting
6529 new instructions. Deal with it. */
6530 {
6531 if (DONE_SPEC (insn) & BEGIN_DATA)
6532 pending_data_specs++;
6533 if (CHECK_SPEC (insn) & BEGIN_DATA)
6534 pending_data_specs--;
6535 }
6536
30028c85
VM
6537 last_scheduled_insn = insn;
6538 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6539 if (reload_completed)
2130b7fb 6540 {
c1bc6ca8 6541 int needed = group_barrier_needed (insn);
e820471b
NS
6542
6543 gcc_assert (!needed);
30028c85
VM
6544 if (GET_CODE (insn) == CALL_INSN)
6545 init_insn_group_barriers ();
6546 stops_p [INSN_UID (insn)] = stop_before_p;
6547 stop_before_p = 0;
2130b7fb 6548 }
30028c85
VM
6549 return 1;
6550}
c65ebc55 6551
30028c85
VM
6552/* We are choosing insn from the ready queue. Return nonzero if INSN
6553 can be chosen. */
c65ebc55 6554
30028c85 6555static int
9c808aad 6556ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
30028c85 6557{
e820471b 6558 gcc_assert (insn && INSN_P (insn));
048d0d36
MK
6559 return ((!reload_completed
6560 || !safe_group_barrier_needed (insn))
6561 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn));
6562}
6563
6564/* We are choosing insn from the ready queue. Return nonzero if INSN
6565 can be chosen. */
6566
6567static bool
6568ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx insn)
6569{
6570 gcc_assert (insn && INSN_P (insn));
6571 /* Size of ALAT is 32. As far as we perform conservative data speculation,
6572 we keep ALAT half-empty. */
6573 return (pending_data_specs < 16
6574 || !(TODO_SPEC (insn) & BEGIN_DATA));
2130b7fb
BS
6575}
6576
30028c85
VM
6577/* The following variable value is pseudo-insn used by the DFA insn
6578 scheduler to change the DFA state when the simulated clock is
6579 increased. */
2130b7fb 6580
30028c85 6581static rtx dfa_pre_cycle_insn;
2130b7fb 6582
1e5f1716 6583/* We are about to being issuing INSN. Return nonzero if we cannot
30028c85
VM
6584 issue it on given cycle CLOCK and return zero if we should not sort
6585 the ready queue on the next clock start. */
2130b7fb
BS
6586
6587static int
9c808aad
AJ
6588ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6589 int clock, int *sort_p)
2130b7fb 6590{
30028c85 6591 int setup_clocks_p = FALSE;
2130b7fb 6592
e820471b 6593 gcc_assert (insn && INSN_P (insn));
c1bc6ca8 6594 if ((reload_completed && safe_group_barrier_needed (insn))
30028c85
VM
6595 || (last_scheduled_insn
6596 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6597 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6598 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
2130b7fb 6599 {
30028c85
VM
6600 init_insn_group_barriers ();
6601 if (verbose && dump)
6602 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6603 last_clock == clock ? " + cycle advance" : "");
6604 stop_before_p = 1;
6605 if (last_clock == clock)
2130b7fb 6606 {
30028c85
VM
6607 state_transition (curr_state, dfa_stop_insn);
6608 if (TARGET_EARLY_STOP_BITS)
6609 *sort_p = (last_scheduled_insn == NULL_RTX
6610 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6611 else
6612 *sort_p = 0;
6613 return 1;
6614 }
6615 else if (reload_completed)
6616 setup_clocks_p = TRUE;
25069b42
VM
6617 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6618 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6619 state_reset (curr_state);
6620 else
6621 {
6622 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6623 state_transition (curr_state, dfa_stop_insn);
6624 state_transition (curr_state, dfa_pre_cycle_insn);
6625 state_transition (curr_state, NULL);
6626 }
30028c85
VM
6627 }
6628 else if (reload_completed)
6629 setup_clocks_p = TRUE;
f75ce96a
VM
6630 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6631 && GET_CODE (PATTERN (insn)) != ASM_INPUT
2d8f9759 6632 && asm_noperands (PATTERN (insn)) < 0)
30028c85
VM
6633 {
6634 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
9c808aad 6635
30028c85
VM
6636 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6637 {
b198261f 6638 dep_link_t link;
30028c85 6639 int d = -1;
9c808aad 6640
b198261f
MK
6641 FOR_EACH_DEP_LINK (link, INSN_BACK_DEPS (insn))
6642 if (DEP_LINK_KIND (link) == REG_DEP_TRUE)
30028c85
VM
6643 {
6644 enum attr_itanium_class dep_class;
b198261f 6645 rtx dep_insn = DEP_LINK_PRO (link);
9c808aad 6646
30028c85
VM
6647 dep_class = ia64_safe_itanium_class (dep_insn);
6648 if ((dep_class == ITANIUM_CLASS_MMMUL
6649 || dep_class == ITANIUM_CLASS_MMSHF)
6650 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6651 && (d < 0
6652 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6653 d = last_clock - clocks [INSN_UID (dep_insn)];
6654 }
6655 if (d >= 0)
6656 add_cycles [INSN_UID (insn)] = 3 - d;
2130b7fb
BS
6657 }
6658 }
30028c85 6659 return 0;
2130b7fb
BS
6660}
6661
048d0d36
MK
6662/* Implement targetm.sched.h_i_d_extended hook.
6663 Extend internal data structures. */
6664static void
6665ia64_h_i_d_extended (void)
6666{
6667 if (current_sched_info->flags & DO_SPECULATION)
6668 {
6669 int new_max_uid = get_max_uid () + 1;
6670
6671 spec_check_no = xrecalloc (spec_check_no, new_max_uid,
6672 max_uid, sizeof (*spec_check_no));
6673 max_uid = new_max_uid;
6674 }
6675
6676 if (stops_p != NULL)
6677 {
6678 int new_clocks_length = get_max_uid () + 1;
6679
6680 stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
6681
6682 if (ia64_tune == PROCESSOR_ITANIUM)
6683 {
6684 clocks = xrecalloc (clocks, new_clocks_length, clocks_length,
6685 sizeof (int));
6686 add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length,
6687 sizeof (int));
6688 }
6689
6690 clocks_length = new_clocks_length;
6691 }
6692}
6693
6694/* Constants that help mapping 'enum machine_mode' to int. */
6695enum SPEC_MODES
6696 {
6697 SPEC_MODE_INVALID = -1,
6698 SPEC_MODE_FIRST = 0,
6699 SPEC_MODE_FOR_EXTEND_FIRST = 1,
6700 SPEC_MODE_FOR_EXTEND_LAST = 3,
6701 SPEC_MODE_LAST = 8
6702 };
6703
6704/* Return index of the MODE. */
6705static int
6706ia64_mode_to_int (enum machine_mode mode)
6707{
6708 switch (mode)
6709 {
6710 case BImode: return 0; /* SPEC_MODE_FIRST */
6711 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
6712 case HImode: return 2;
6713 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
6714 case DImode: return 4;
6715 case SFmode: return 5;
6716 case DFmode: return 6;
6717 case XFmode: return 7;
6718 case TImode:
6719 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
6720 mentioned in itanium[12].md. Predicate fp_register_operand also
6721 needs to be defined. Bottom line: better disable for now. */
6722 return SPEC_MODE_INVALID;
6723 default: return SPEC_MODE_INVALID;
6724 }
6725}
6726
6727/* Provide information about speculation capabilities. */
6728static void
6729ia64_set_sched_flags (spec_info_t spec_info)
6730{
6731 unsigned int *flags = &(current_sched_info->flags);
6732
6733 if (*flags & SCHED_RGN
6734 || *flags & SCHED_EBB)
6735 {
6736 int mask = 0;
6737
a57aee2a 6738 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
048d0d36
MK
6739 || (mflag_sched_ar_data_spec && reload_completed))
6740 {
6741 mask |= BEGIN_DATA;
6742
6743 if ((mflag_sched_br_in_data_spec && !reload_completed)
6744 || (mflag_sched_ar_in_data_spec && reload_completed))
6745 mask |= BE_IN_DATA;
6746 }
6747
6748 if (mflag_sched_control_spec)
6749 {
6750 mask |= BEGIN_CONTROL;
6751
6752 if (mflag_sched_in_control_spec)
6753 mask |= BE_IN_CONTROL;
6754 }
6755
048d0d36
MK
6756 if (mask)
6757 {
6fb5fa3c
DB
6758 *flags |= USE_DEPS_LIST | DO_SPECULATION;
6759
6760 if (mask & BE_IN_SPEC)
6761 *flags |= NEW_BBS;
048d0d36
MK
6762
6763 spec_info->mask = mask;
6764 spec_info->flags = 0;
6765
6766 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
6767 spec_info->flags |= PREFER_NON_DATA_SPEC;
6768
6769 if ((mask & CONTROL_SPEC)
6770 && mflag_sched_prefer_non_control_spec_insns)
6771 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
6772
6773 if (mflag_sched_spec_verbose)
6774 {
6775 if (sched_verbose >= 1)
6776 spec_info->dump = sched_dump;
6777 else
6778 spec_info->dump = stderr;
6779 }
6780 else
6781 spec_info->dump = 0;
6782
6783 if (mflag_sched_count_spec_in_critical_path)
6784 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
6785 }
6786 }
6787}
6788
6789/* Implement targetm.sched.speculate_insn hook.
6790 Check if the INSN can be TS speculative.
6791 If 'no' - return -1.
6792 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
6793 If current pattern of the INSN already provides TS speculation, return 0. */
6794static int
6795ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
6796{
6797 rtx pat, reg, mem, mem_reg;
6798 int mode_no, gen_p = 1;
6799 bool extend_p;
6800
6801 gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
6802
6803 pat = PATTERN (insn);
6804
6805 if (GET_CODE (pat) == COND_EXEC)
6806 pat = COND_EXEC_CODE (pat);
6807
f6ec1d11 6808 /* This should be a SET ... */
048d0d36
MK
6809 if (GET_CODE (pat) != SET)
6810 return -1;
f6ec1d11 6811
048d0d36 6812 reg = SET_DEST (pat);
f6ec1d11
MK
6813 /* ... to the general/fp register ... */
6814 if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg))))
048d0d36
MK
6815 return -1;
6816
f6ec1d11
MK
6817 /* ... from the mem ... */
6818 mem = SET_SRC (pat);
6819
6820 /* ... that can, possibly, be a zero_extend ... */
048d0d36
MK
6821 if (GET_CODE (mem) == ZERO_EXTEND)
6822 {
6823 mem = XEXP (mem, 0);
6824 extend_p = true;
6825 }
6826 else
6827 extend_p = false;
6828
f6ec1d11 6829 /* ... or a speculative load. */
048d0d36
MK
6830 if (GET_CODE (mem) == UNSPEC)
6831 {
6832 int code;
6833
6834 code = XINT (mem, 1);
6835 if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
6836 return -1;
6837
6838 if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
6839 || (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
6840 || code == UNSPEC_LDSA)
6841 gen_p = 0;
6842
6843 mem = XVECEXP (mem, 0, 0);
6844 gcc_assert (MEM_P (mem));
6845 }
f6ec1d11
MK
6846
6847 /* Source should be a mem ... */
048d0d36
MK
6848 if (!MEM_P (mem))
6849 return -1;
f6ec1d11
MK
6850
6851 /* ... addressed by a register. */
048d0d36
MK
6852 mem_reg = XEXP (mem, 0);
6853 if (!REG_P (mem_reg))
6854 return -1;
6855
6856 /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
6857 will always be DImode. */
6858 mode_no = ia64_mode_to_int (GET_MODE (mem));
6859
6860 if (mode_no == SPEC_MODE_INVALID
6861 || (extend_p
6862 && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
6863 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
6864 return -1;
6865
6866 extract_insn_cached (insn);
6867 gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
f6ec1d11 6868
048d0d36
MK
6869 *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
6870
6871 return gen_p;
6872}
6873
6874enum
6875 {
6876 /* Offset to reach ZERO_EXTEND patterns. */
6877 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1,
6878 /* Number of patterns for each speculation mode. */
6879 SPEC_N = (SPEC_MODE_LAST
6880 + SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2)
6881 };
6882
6883enum SPEC_GEN_LD_MAP
6884 {
6885 /* Offset to ld.a patterns. */
6886 SPEC_GEN_A = 0 * SPEC_N,
6887 /* Offset to ld.s patterns. */
6888 SPEC_GEN_S = 1 * SPEC_N,
6889 /* Offset to ld.sa patterns. */
6890 SPEC_GEN_SA = 2 * SPEC_N,
6891 /* Offset to ld.sa patterns. For this patterns corresponding ld.c will
6892 mutate to chk.s. */
6893 SPEC_GEN_SA_FOR_S = 3 * SPEC_N
6894 };
6895
6896/* These offsets are used to get (4 * SPEC_N). */
6897enum SPEC_GEN_CHECK_OFFSET
6898 {
6899 SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A,
6900 SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA
6901 };
6902
6903/* If GEN_P is true, calculate the index of needed speculation check and return
6904 speculative pattern for INSN with speculative mode TS, machine mode
6905 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
6906 If GEN_P is false, just calculate the index of needed speculation check. */
6907static rtx
6908ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p)
6909{
6910 rtx pat, new_pat;
6911 int load_no;
6912 int shift = 0;
6913
6914 static rtx (* const gen_load[]) (rtx, rtx) = {
6915 gen_movbi_advanced,
6916 gen_movqi_advanced,
6917 gen_movhi_advanced,
6918 gen_movsi_advanced,
6919 gen_movdi_advanced,
6920 gen_movsf_advanced,
6921 gen_movdf_advanced,
6922 gen_movxf_advanced,
6923 gen_movti_advanced,
6924 gen_zero_extendqidi2_advanced,
6925 gen_zero_extendhidi2_advanced,
6926 gen_zero_extendsidi2_advanced,
6927
6928 gen_movbi_speculative,
6929 gen_movqi_speculative,
6930 gen_movhi_speculative,
6931 gen_movsi_speculative,
6932 gen_movdi_speculative,
6933 gen_movsf_speculative,
6934 gen_movdf_speculative,
6935 gen_movxf_speculative,
6936 gen_movti_speculative,
6937 gen_zero_extendqidi2_speculative,
6938 gen_zero_extendhidi2_speculative,
6939 gen_zero_extendsidi2_speculative,
6940
6941 gen_movbi_speculative_advanced,
6942 gen_movqi_speculative_advanced,
6943 gen_movhi_speculative_advanced,
6944 gen_movsi_speculative_advanced,
6945 gen_movdi_speculative_advanced,
6946 gen_movsf_speculative_advanced,
6947 gen_movdf_speculative_advanced,
6948 gen_movxf_speculative_advanced,
6949 gen_movti_speculative_advanced,
6950 gen_zero_extendqidi2_speculative_advanced,
6951 gen_zero_extendhidi2_speculative_advanced,
6952 gen_zero_extendsidi2_speculative_advanced,
6953
6954 gen_movbi_speculative_advanced,
6955 gen_movqi_speculative_advanced,
6956 gen_movhi_speculative_advanced,
6957 gen_movsi_speculative_advanced,
6958 gen_movdi_speculative_advanced,
6959 gen_movsf_speculative_advanced,
6960 gen_movdf_speculative_advanced,
6961 gen_movxf_speculative_advanced,
6962 gen_movti_speculative_advanced,
6963 gen_zero_extendqidi2_speculative_advanced,
6964 gen_zero_extendhidi2_speculative_advanced,
6965 gen_zero_extendsidi2_speculative_advanced
6966 };
6967
6968 load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no;
6969
6970 if (ts & BEGIN_DATA)
6971 {
6972 /* We don't need recovery because even if this is ld.sa
6973 ALAT entry will be allocated only if NAT bit is set to zero.
6974 So it is enough to use ld.c here. */
6975
6976 if (ts & BEGIN_CONTROL)
6977 {
6978 load_no += SPEC_GEN_SA;
6979
6980 if (!mflag_sched_ldc)
6981 shift = SPEC_GEN_CHKA_FOR_SA_OFFSET;
6982 }
6983 else
6984 {
6985 load_no += SPEC_GEN_A;
6986
6987 if (!mflag_sched_ldc)
6988 shift = SPEC_GEN_CHKA_FOR_A_OFFSET;
6989 }
6990 }
6991 else if (ts & BEGIN_CONTROL)
6992 {
6993 /* ld.sa can be used instead of ld.s to avoid basic block splitting. */
6994 if (!mflag_control_ldc)
6995 load_no += SPEC_GEN_S;
6996 else
6997 {
6998 gcc_assert (mflag_sched_ldc);
6999 load_no += SPEC_GEN_SA_FOR_S;
7000 }
7001 }
7002 else
7003 gcc_unreachable ();
7004
7005 /* Set the desired check index. We add '1', because zero element in this
7006 array means, that instruction with such uid is non-speculative. */
7007 spec_check_no[INSN_UID (insn)] = load_no + shift + 1;
7008
7009 if (!gen_p)
7010 return 0;
7011
7012 new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]),
7013 copy_rtx (recog_data.operand[1]));
7014
7015 pat = PATTERN (insn);
7016 if (GET_CODE (pat) == COND_EXEC)
7017 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx
7018 (COND_EXEC_TEST (pat)), new_pat);
7019
7020 return new_pat;
7021}
7022
7023/* Offset to branchy checks. */
7024enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N };
7025
7026/* Return nonzero, if INSN needs branchy recovery check. */
7027static bool
7028ia64_needs_block_p (rtx insn)
7029{
7030 int check_no;
7031
7032 check_no = spec_check_no[INSN_UID(insn)] - 1;
7033 gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET);
7034
7035 return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N)
7036 || (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N));
7037}
7038
7039/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7040 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7041 Otherwise, generate a simple check. */
7042static rtx
7043ia64_gen_check (rtx insn, rtx label, bool mutate_p)
7044{
7045 rtx op1, pat, check_pat;
7046
7047 static rtx (* const gen_check[]) (rtx, rtx) = {
7048 gen_movbi_clr,
7049 gen_movqi_clr,
7050 gen_movhi_clr,
7051 gen_movsi_clr,
7052 gen_movdi_clr,
7053 gen_movsf_clr,
7054 gen_movdf_clr,
7055 gen_movxf_clr,
7056 gen_movti_clr,
7057 gen_zero_extendqidi2_clr,
7058 gen_zero_extendhidi2_clr,
7059 gen_zero_extendsidi2_clr,
7060
7061 gen_speculation_check_bi,
7062 gen_speculation_check_qi,
7063 gen_speculation_check_hi,
7064 gen_speculation_check_si,
7065 gen_speculation_check_di,
7066 gen_speculation_check_sf,
7067 gen_speculation_check_df,
7068 gen_speculation_check_xf,
7069 gen_speculation_check_ti,
7070 gen_speculation_check_di,
7071 gen_speculation_check_di,
7072 gen_speculation_check_di,
7073
7074 gen_movbi_clr,
7075 gen_movqi_clr,
7076 gen_movhi_clr,
7077 gen_movsi_clr,
7078 gen_movdi_clr,
7079 gen_movsf_clr,
7080 gen_movdf_clr,
7081 gen_movxf_clr,
7082 gen_movti_clr,
7083 gen_zero_extendqidi2_clr,
7084 gen_zero_extendhidi2_clr,
7085 gen_zero_extendsidi2_clr,
7086
7087 gen_movbi_clr,
7088 gen_movqi_clr,
7089 gen_movhi_clr,
7090 gen_movsi_clr,
7091 gen_movdi_clr,
7092 gen_movsf_clr,
7093 gen_movdf_clr,
7094 gen_movxf_clr,
7095 gen_movti_clr,
7096 gen_zero_extendqidi2_clr,
7097 gen_zero_extendhidi2_clr,
7098 gen_zero_extendsidi2_clr,
7099
7100 gen_advanced_load_check_clr_bi,
7101 gen_advanced_load_check_clr_qi,
7102 gen_advanced_load_check_clr_hi,
7103 gen_advanced_load_check_clr_si,
7104 gen_advanced_load_check_clr_di,
7105 gen_advanced_load_check_clr_sf,
7106 gen_advanced_load_check_clr_df,
7107 gen_advanced_load_check_clr_xf,
7108 gen_advanced_load_check_clr_ti,
7109 gen_advanced_load_check_clr_di,
7110 gen_advanced_load_check_clr_di,
7111 gen_advanced_load_check_clr_di,
7112
7113 /* Following checks are generated during mutation. */
7114 gen_advanced_load_check_clr_bi,
7115 gen_advanced_load_check_clr_qi,
7116 gen_advanced_load_check_clr_hi,
7117 gen_advanced_load_check_clr_si,
7118 gen_advanced_load_check_clr_di,
7119 gen_advanced_load_check_clr_sf,
7120 gen_advanced_load_check_clr_df,
7121 gen_advanced_load_check_clr_xf,
7122 gen_advanced_load_check_clr_ti,
7123 gen_advanced_load_check_clr_di,
7124 gen_advanced_load_check_clr_di,
7125 gen_advanced_load_check_clr_di,
7126
7127 0,0,0,0,0,0,0,0,0,0,0,0,
7128
7129 gen_advanced_load_check_clr_bi,
7130 gen_advanced_load_check_clr_qi,
7131 gen_advanced_load_check_clr_hi,
7132 gen_advanced_load_check_clr_si,
7133 gen_advanced_load_check_clr_di,
7134 gen_advanced_load_check_clr_sf,
7135 gen_advanced_load_check_clr_df,
7136 gen_advanced_load_check_clr_xf,
7137 gen_advanced_load_check_clr_ti,
7138 gen_advanced_load_check_clr_di,
7139 gen_advanced_load_check_clr_di,
7140 gen_advanced_load_check_clr_di,
7141
7142 gen_speculation_check_bi,
7143 gen_speculation_check_qi,
7144 gen_speculation_check_hi,
7145 gen_speculation_check_si,
7146 gen_speculation_check_di,
7147 gen_speculation_check_sf,
7148 gen_speculation_check_df,
7149 gen_speculation_check_xf,
7150 gen_speculation_check_ti,
7151 gen_speculation_check_di,
7152 gen_speculation_check_di,
7153 gen_speculation_check_di
7154 };
7155
7156 extract_insn_cached (insn);
7157
7158 if (label)
7159 {
7160 gcc_assert (mutate_p || ia64_needs_block_p (insn));
7161 op1 = label;
7162 }
7163 else
7164 {
7165 gcc_assert (!mutate_p && !ia64_needs_block_p (insn));
7166 op1 = copy_rtx (recog_data.operand[1]);
7167 }
7168
7169 if (mutate_p)
7170 /* INSN is ld.c.
7171 Find the speculation check number by searching for original
7172 speculative load in the RESOLVED_DEPS list of INSN.
7173 As long as patterns are unique for each instruction, this can be
7174 accomplished by matching ORIG_PAT fields. */
7175 {
b198261f 7176 dep_link_t link;
048d0d36
MK
7177 int check_no = 0;
7178 rtx orig_pat = ORIG_PAT (insn);
7179
b198261f 7180 FOR_EACH_DEP_LINK (link, INSN_RESOLVED_BACK_DEPS (insn))
048d0d36 7181 {
b198261f 7182 rtx x = DEP_LINK_PRO (link);
048d0d36
MK
7183
7184 if (ORIG_PAT (x) == orig_pat)
7185 check_no = spec_check_no[INSN_UID (x)];
7186 }
7187 gcc_assert (check_no);
7188
7189 spec_check_no[INSN_UID (insn)] = (check_no
7190 + SPEC_GEN_CHECK_MUTATION_OFFSET);
7191 }
7192
7193 check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1]
7194 (copy_rtx (recog_data.operand[0]), op1));
7195
7196 pat = PATTERN (insn);
7197 if (GET_CODE (pat) == COND_EXEC)
7198 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7199 check_pat);
7200
7201 return check_pat;
7202}
7203
7204/* Return nonzero, if X is branchy recovery check. */
7205static int
7206ia64_spec_check_p (rtx x)
7207{
7208 x = PATTERN (x);
7209 if (GET_CODE (x) == COND_EXEC)
7210 x = COND_EXEC_CODE (x);
7211 if (GET_CODE (x) == SET)
7212 return ia64_spec_check_src_p (SET_SRC (x));
7213 return 0;
7214}
7215
7216/* Return nonzero, if SRC belongs to recovery check. */
7217static int
7218ia64_spec_check_src_p (rtx src)
7219{
7220 if (GET_CODE (src) == IF_THEN_ELSE)
7221 {
7222 rtx t;
7223
7224 t = XEXP (src, 0);
7225 if (GET_CODE (t) == NE)
7226 {
7227 t = XEXP (t, 0);
7228
7229 if (GET_CODE (t) == UNSPEC)
7230 {
7231 int code;
7232
7233 code = XINT (t, 1);
7234
7235 if (code == UNSPEC_CHKACLR
7236 || code == UNSPEC_CHKS
7237 || code == UNSPEC_LDCCLR)
7238 {
7239 gcc_assert (code != 0);
7240 return code;
7241 }
7242 }
7243 }
7244 }
7245 return 0;
7246}
30028c85 7247\f
2130b7fb 7248
30028c85
VM
7249/* The following page contains abstract data `bundle states' which are
7250 used for bundling insns (inserting nops and template generation). */
7251
7252/* The following describes state of insn bundling. */
7253
7254struct bundle_state
7255{
7256 /* Unique bundle state number to identify them in the debugging
7257 output */
7258 int unique_num;
7259 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
7260 /* number nops before and after the insn */
7261 short before_nops_num, after_nops_num;
7262 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7263 insn */
7264 int cost; /* cost of the state in cycles */
7265 int accumulated_insns_num; /* number of all previous insns including
7266 nops. L is considered as 2 insns */
7267 int branch_deviation; /* deviation of previous branches from 3rd slots */
7268 struct bundle_state *next; /* next state with the same insn_num */
7269 struct bundle_state *originator; /* originator (previous insn state) */
7270 /* All bundle states are in the following chain. */
7271 struct bundle_state *allocated_states_chain;
7272 /* The DFA State after issuing the insn and the nops. */
7273 state_t dfa_state;
7274};
2130b7fb 7275
30028c85 7276/* The following is map insn number to the corresponding bundle state. */
2130b7fb 7277
30028c85 7278static struct bundle_state **index_to_bundle_states;
2130b7fb 7279
30028c85 7280/* The unique number of next bundle state. */
2130b7fb 7281
30028c85 7282static int bundle_states_num;
2130b7fb 7283
30028c85 7284/* All allocated bundle states are in the following chain. */
2130b7fb 7285
30028c85 7286static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 7287
30028c85
VM
7288/* All allocated but not used bundle states are in the following
7289 chain. */
870f9ec0 7290
30028c85 7291static struct bundle_state *free_bundle_state_chain;
2130b7fb 7292
2130b7fb 7293
30028c85 7294/* The following function returns a free bundle state. */
2130b7fb 7295
30028c85 7296static struct bundle_state *
9c808aad 7297get_free_bundle_state (void)
30028c85
VM
7298{
7299 struct bundle_state *result;
2130b7fb 7300
30028c85 7301 if (free_bundle_state_chain != NULL)
2130b7fb 7302 {
30028c85
VM
7303 result = free_bundle_state_chain;
7304 free_bundle_state_chain = result->next;
2130b7fb 7305 }
30028c85 7306 else
2130b7fb 7307 {
30028c85
VM
7308 result = xmalloc (sizeof (struct bundle_state));
7309 result->dfa_state = xmalloc (dfa_state_size);
7310 result->allocated_states_chain = allocated_bundle_states_chain;
7311 allocated_bundle_states_chain = result;
2130b7fb 7312 }
30028c85
VM
7313 result->unique_num = bundle_states_num++;
7314 return result;
9c808aad 7315
30028c85 7316}
2130b7fb 7317
30028c85 7318/* The following function frees given bundle state. */
2130b7fb 7319
30028c85 7320static void
9c808aad 7321free_bundle_state (struct bundle_state *state)
30028c85
VM
7322{
7323 state->next = free_bundle_state_chain;
7324 free_bundle_state_chain = state;
7325}
2130b7fb 7326
30028c85 7327/* Start work with abstract data `bundle states'. */
2130b7fb 7328
30028c85 7329static void
9c808aad 7330initiate_bundle_states (void)
30028c85
VM
7331{
7332 bundle_states_num = 0;
7333 free_bundle_state_chain = NULL;
7334 allocated_bundle_states_chain = NULL;
2130b7fb
BS
7335}
7336
30028c85 7337/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
7338
7339static void
9c808aad 7340finish_bundle_states (void)
2130b7fb 7341{
30028c85
VM
7342 struct bundle_state *curr_state, *next_state;
7343
7344 for (curr_state = allocated_bundle_states_chain;
7345 curr_state != NULL;
7346 curr_state = next_state)
2130b7fb 7347 {
30028c85
VM
7348 next_state = curr_state->allocated_states_chain;
7349 free (curr_state->dfa_state);
7350 free (curr_state);
2130b7fb 7351 }
2130b7fb
BS
7352}
7353
30028c85
VM
7354/* Hash table of the bundle states. The key is dfa_state and insn_num
7355 of the bundle states. */
2130b7fb 7356
30028c85 7357static htab_t bundle_state_table;
2130b7fb 7358
30028c85 7359/* The function returns hash of BUNDLE_STATE. */
2130b7fb 7360
30028c85 7361static unsigned
9c808aad 7362bundle_state_hash (const void *bundle_state)
30028c85
VM
7363{
7364 const struct bundle_state *state = (struct bundle_state *) bundle_state;
7365 unsigned result, i;
2130b7fb 7366
30028c85
VM
7367 for (result = i = 0; i < dfa_state_size; i++)
7368 result += (((unsigned char *) state->dfa_state) [i]
7369 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
7370 return result + state->insn_num;
7371}
2130b7fb 7372
30028c85 7373/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 7374
30028c85 7375static int
9c808aad 7376bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
30028c85
VM
7377{
7378 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
7379 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
2130b7fb 7380
30028c85
VM
7381 return (state1->insn_num == state2->insn_num
7382 && memcmp (state1->dfa_state, state2->dfa_state,
7383 dfa_state_size) == 0);
7384}
2130b7fb 7385
30028c85
VM
7386/* The function inserts the BUNDLE_STATE into the hash table. The
7387 function returns nonzero if the bundle has been inserted into the
7388 table. The table contains the best bundle state with given key. */
2130b7fb 7389
30028c85 7390static int
9c808aad 7391insert_bundle_state (struct bundle_state *bundle_state)
30028c85
VM
7392{
7393 void **entry_ptr;
2130b7fb 7394
30028c85
VM
7395 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
7396 if (*entry_ptr == NULL)
7397 {
7398 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
7399 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
7400 *entry_ptr = (void *) bundle_state;
7401 return TRUE;
2130b7fb 7402 }
30028c85
VM
7403 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
7404 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
7405 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
7406 > bundle_state->accumulated_insns_num
7407 || (((struct bundle_state *)
7408 *entry_ptr)->accumulated_insns_num
7409 == bundle_state->accumulated_insns_num
7410 && ((struct bundle_state *)
7411 *entry_ptr)->branch_deviation
7412 > bundle_state->branch_deviation))))
9c808aad 7413
2130b7fb 7414 {
30028c85
VM
7415 struct bundle_state temp;
7416
7417 temp = *(struct bundle_state *) *entry_ptr;
7418 *(struct bundle_state *) *entry_ptr = *bundle_state;
7419 ((struct bundle_state *) *entry_ptr)->next = temp.next;
7420 *bundle_state = temp;
2130b7fb 7421 }
30028c85
VM
7422 return FALSE;
7423}
2130b7fb 7424
30028c85
VM
7425/* Start work with the hash table. */
7426
7427static void
9c808aad 7428initiate_bundle_state_table (void)
30028c85
VM
7429{
7430 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
7431 (htab_del) 0);
2130b7fb
BS
7432}
7433
30028c85 7434/* Finish work with the hash table. */
e4027dab
BS
7435
7436static void
9c808aad 7437finish_bundle_state_table (void)
e4027dab 7438{
30028c85 7439 htab_delete (bundle_state_table);
e4027dab
BS
7440}
7441
30028c85 7442\f
a0a7b566 7443
30028c85
VM
7444/* The following variable is a insn `nop' used to check bundle states
7445 with different number of inserted nops. */
a0a7b566 7446
30028c85 7447static rtx ia64_nop;
a0a7b566 7448
30028c85
VM
7449/* The following function tries to issue NOPS_NUM nops for the current
7450 state without advancing processor cycle. If it failed, the
7451 function returns FALSE and frees the current state. */
7452
7453static int
9c808aad 7454try_issue_nops (struct bundle_state *curr_state, int nops_num)
a0a7b566 7455{
30028c85 7456 int i;
a0a7b566 7457
30028c85
VM
7458 for (i = 0; i < nops_num; i++)
7459 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
7460 {
7461 free_bundle_state (curr_state);
7462 return FALSE;
7463 }
7464 return TRUE;
7465}
a0a7b566 7466
30028c85
VM
7467/* The following function tries to issue INSN for the current
7468 state without advancing processor cycle. If it failed, the
7469 function returns FALSE and frees the current state. */
a0a7b566 7470
30028c85 7471static int
9c808aad 7472try_issue_insn (struct bundle_state *curr_state, rtx insn)
30028c85
VM
7473{
7474 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
7475 {
7476 free_bundle_state (curr_state);
7477 return FALSE;
7478 }
7479 return TRUE;
7480}
a0a7b566 7481
30028c85
VM
7482/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
7483 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
7484 TRY_BUNDLE_END_P is TRUE, the function also/only (if
7485 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
7486 If it was successful, the function creates new bundle state and
7487 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 7488
30028c85 7489static void
9c808aad
AJ
7490issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
7491 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
30028c85
VM
7492{
7493 struct bundle_state *curr_state;
7494
7495 curr_state = get_free_bundle_state ();
7496 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
7497 curr_state->insn = insn;
7498 curr_state->insn_num = originator->insn_num + 1;
7499 curr_state->cost = originator->cost;
7500 curr_state->originator = originator;
7501 curr_state->before_nops_num = before_nops_num;
7502 curr_state->after_nops_num = 0;
7503 curr_state->accumulated_insns_num
7504 = originator->accumulated_insns_num + before_nops_num;
7505 curr_state->branch_deviation = originator->branch_deviation;
e820471b
NS
7506 gcc_assert (insn);
7507 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
30028c85 7508 {
e820471b 7509 gcc_assert (GET_MODE (insn) != TImode);
30028c85
VM
7510 if (!try_issue_nops (curr_state, before_nops_num))
7511 return;
7512 if (!try_issue_insn (curr_state, insn))
7513 return;
7514 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
7515 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
7516 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 7517 {
30028c85
VM
7518 free_bundle_state (curr_state);
7519 return;
a0a7b566 7520 }
a0a7b566 7521 }
30028c85 7522 else if (GET_MODE (insn) != TImode)
a0a7b566 7523 {
30028c85
VM
7524 if (!try_issue_nops (curr_state, before_nops_num))
7525 return;
7526 if (!try_issue_insn (curr_state, insn))
7527 return;
f32360c7 7528 curr_state->accumulated_insns_num++;
e820471b
NS
7529 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
7530 && asm_noperands (PATTERN (insn)) < 0);
7531
30028c85
VM
7532 if (ia64_safe_type (insn) == TYPE_L)
7533 curr_state->accumulated_insns_num++;
7534 }
7535 else
7536 {
68e11b42
JW
7537 /* If this is an insn that must be first in a group, then don't allow
7538 nops to be emitted before it. Currently, alloc is the only such
7539 supported instruction. */
7540 /* ??? The bundling automatons should handle this for us, but they do
7541 not yet have support for the first_insn attribute. */
7542 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
7543 {
7544 free_bundle_state (curr_state);
7545 return;
7546 }
7547
30028c85
VM
7548 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
7549 state_transition (curr_state->dfa_state, NULL);
7550 curr_state->cost++;
7551 if (!try_issue_nops (curr_state, before_nops_num))
7552 return;
7553 if (!try_issue_insn (curr_state, insn))
7554 return;
f32360c7
VM
7555 curr_state->accumulated_insns_num++;
7556 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7557 || asm_noperands (PATTERN (insn)) >= 0)
7558 {
7559 /* Finish bundle containing asm insn. */
7560 curr_state->after_nops_num
7561 = 3 - curr_state->accumulated_insns_num % 3;
7562 curr_state->accumulated_insns_num
7563 += 3 - curr_state->accumulated_insns_num % 3;
7564 }
7565 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
7566 curr_state->accumulated_insns_num++;
7567 }
7568 if (ia64_safe_type (insn) == TYPE_B)
7569 curr_state->branch_deviation
7570 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
7571 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
7572 {
f32360c7 7573 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 7574 {
30028c85
VM
7575 state_t dfa_state;
7576 struct bundle_state *curr_state1;
7577 struct bundle_state *allocated_states_chain;
7578
7579 curr_state1 = get_free_bundle_state ();
7580 dfa_state = curr_state1->dfa_state;
7581 allocated_states_chain = curr_state1->allocated_states_chain;
7582 *curr_state1 = *curr_state;
7583 curr_state1->dfa_state = dfa_state;
7584 curr_state1->allocated_states_chain = allocated_states_chain;
7585 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
7586 dfa_state_size);
7587 curr_state = curr_state1;
a0a7b566 7588 }
30028c85
VM
7589 if (!try_issue_nops (curr_state,
7590 3 - curr_state->accumulated_insns_num % 3))
7591 return;
7592 curr_state->after_nops_num
7593 = 3 - curr_state->accumulated_insns_num % 3;
7594 curr_state->accumulated_insns_num
7595 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 7596 }
30028c85
VM
7597 if (!insert_bundle_state (curr_state))
7598 free_bundle_state (curr_state);
7599 return;
7600}
e013f3c7 7601
30028c85
VM
7602/* The following function returns position in the two window bundle
7603 for given STATE. */
7604
7605static int
9c808aad 7606get_max_pos (state_t state)
30028c85
VM
7607{
7608 if (cpu_unit_reservation_p (state, pos_6))
7609 return 6;
7610 else if (cpu_unit_reservation_p (state, pos_5))
7611 return 5;
7612 else if (cpu_unit_reservation_p (state, pos_4))
7613 return 4;
7614 else if (cpu_unit_reservation_p (state, pos_3))
7615 return 3;
7616 else if (cpu_unit_reservation_p (state, pos_2))
7617 return 2;
7618 else if (cpu_unit_reservation_p (state, pos_1))
7619 return 1;
7620 else
7621 return 0;
a0a7b566
BS
7622}
7623
30028c85
VM
7624/* The function returns code of a possible template for given position
7625 and state. The function should be called only with 2 values of
96ddf8ef
VM
7626 position equal to 3 or 6. We avoid generating F NOPs by putting
7627 templates containing F insns at the end of the template search
7628 because undocumented anomaly in McKinley derived cores which can
7629 cause stalls if an F-unit insn (including a NOP) is issued within a
7630 six-cycle window after reading certain application registers (such
7631 as ar.bsp). Furthermore, power-considerations also argue against
7632 the use of F-unit instructions unless they're really needed. */
2130b7fb 7633
c237e94a 7634static int
9c808aad 7635get_template (state_t state, int pos)
2130b7fb 7636{
30028c85 7637 switch (pos)
2130b7fb 7638 {
30028c85 7639 case 3:
96ddf8ef 7640 if (cpu_unit_reservation_p (state, _0mmi_))
30028c85 7641 return 1;
96ddf8ef
VM
7642 else if (cpu_unit_reservation_p (state, _0mii_))
7643 return 0;
30028c85
VM
7644 else if (cpu_unit_reservation_p (state, _0mmb_))
7645 return 7;
96ddf8ef
VM
7646 else if (cpu_unit_reservation_p (state, _0mib_))
7647 return 6;
7648 else if (cpu_unit_reservation_p (state, _0mbb_))
7649 return 5;
7650 else if (cpu_unit_reservation_p (state, _0bbb_))
7651 return 4;
7652 else if (cpu_unit_reservation_p (state, _0mmf_))
7653 return 3;
7654 else if (cpu_unit_reservation_p (state, _0mfi_))
7655 return 2;
30028c85
VM
7656 else if (cpu_unit_reservation_p (state, _0mfb_))
7657 return 8;
7658 else if (cpu_unit_reservation_p (state, _0mlx_))
7659 return 9;
7660 else
e820471b 7661 gcc_unreachable ();
30028c85 7662 case 6:
96ddf8ef 7663 if (cpu_unit_reservation_p (state, _1mmi_))
30028c85 7664 return 1;
96ddf8ef
VM
7665 else if (cpu_unit_reservation_p (state, _1mii_))
7666 return 0;
30028c85
VM
7667 else if (cpu_unit_reservation_p (state, _1mmb_))
7668 return 7;
96ddf8ef
VM
7669 else if (cpu_unit_reservation_p (state, _1mib_))
7670 return 6;
7671 else if (cpu_unit_reservation_p (state, _1mbb_))
7672 return 5;
7673 else if (cpu_unit_reservation_p (state, _1bbb_))
7674 return 4;
7675 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
7676 return 3;
7677 else if (cpu_unit_reservation_p (state, _1mfi_))
7678 return 2;
30028c85
VM
7679 else if (cpu_unit_reservation_p (state, _1mfb_))
7680 return 8;
7681 else if (cpu_unit_reservation_p (state, _1mlx_))
7682 return 9;
7683 else
e820471b 7684 gcc_unreachable ();
30028c85 7685 default:
e820471b 7686 gcc_unreachable ();
2130b7fb 7687 }
30028c85 7688}
2130b7fb 7689
30028c85
VM
7690/* The following function returns an insn important for insn bundling
7691 followed by INSN and before TAIL. */
a0a7b566 7692
30028c85 7693static rtx
9c808aad 7694get_next_important_insn (rtx insn, rtx tail)
30028c85
VM
7695{
7696 for (; insn && insn != tail; insn = NEXT_INSN (insn))
7697 if (INSN_P (insn)
7698 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7699 && GET_CODE (PATTERN (insn)) != USE
7700 && GET_CODE (PATTERN (insn)) != CLOBBER)
7701 return insn;
7702 return NULL_RTX;
7703}
7704
4a4cd49c
JJ
7705/* Add a bundle selector TEMPLATE0 before INSN. */
7706
7707static void
7708ia64_add_bundle_selector_before (int template0, rtx insn)
7709{
7710 rtx b = gen_bundle_selector (GEN_INT (template0));
7711
7712 ia64_emit_insn_before (b, insn);
7713#if NR_BUNDLES == 10
7714 if ((template0 == 4 || template0 == 5)
7715 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
7716 {
7717 int i;
7718 rtx note = NULL_RTX;
7719
7720 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
7721 first or second slot. If it is and has REG_EH_NOTE set, copy it
7722 to following nops, as br.call sets rp to the address of following
7723 bundle and therefore an EH region end must be on a bundle
7724 boundary. */
7725 insn = PREV_INSN (insn);
7726 for (i = 0; i < 3; i++)
7727 {
7728 do
7729 insn = next_active_insn (insn);
7730 while (GET_CODE (insn) == INSN
7731 && get_attr_empty (insn) == EMPTY_YES);
7732 if (GET_CODE (insn) == CALL_INSN)
7733 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
7734 else if (note)
7735 {
7736 int code;
7737
7738 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
7739 || code == CODE_FOR_nop_b);
7740 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
7741 note = NULL_RTX;
7742 else
7743 REG_NOTES (insn)
7744 = gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0),
7745 REG_NOTES (insn));
7746 }
7747 }
7748 }
7749#endif
7750}
7751
c856f536
VM
7752/* The following function does insn bundling. Bundling means
7753 inserting templates and nop insns to fit insn groups into permitted
7754 templates. Instruction scheduling uses NDFA (non-deterministic
7755 finite automata) encoding informations about the templates and the
7756 inserted nops. Nondeterminism of the automata permits follows
7757 all possible insn sequences very fast.
7758
7759 Unfortunately it is not possible to get information about inserting
7760 nop insns and used templates from the automata states. The
7761 automata only says that we can issue an insn possibly inserting
7762 some nops before it and using some template. Therefore insn
7763 bundling in this function is implemented by using DFA
048d0d36 7764 (deterministic finite automata). We follow all possible insn
c856f536
VM
7765 sequences by inserting 0-2 nops (that is what the NDFA describe for
7766 insn scheduling) before/after each insn being bundled. We know the
7767 start of simulated processor cycle from insn scheduling (insn
7768 starting a new cycle has TImode).
7769
7770 Simple implementation of insn bundling would create enormous
7771 number of possible insn sequences satisfying information about new
7772 cycle ticks taken from the insn scheduling. To make the algorithm
7773 practical we use dynamic programming. Each decision (about
7774 inserting nops and implicitly about previous decisions) is described
7775 by structure bundle_state (see above). If we generate the same
7776 bundle state (key is automaton state after issuing the insns and
7777 nops for it), we reuse already generated one. As consequence we
1e5f1716 7778 reject some decisions which cannot improve the solution and
c856f536
VM
7779 reduce memory for the algorithm.
7780
7781 When we reach the end of EBB (extended basic block), we choose the
7782 best sequence and then, moving back in EBB, insert templates for
7783 the best alternative. The templates are taken from querying
7784 automaton state for each insn in chosen bundle states.
7785
7786 So the algorithm makes two (forward and backward) passes through
7787 EBB. There is an additional forward pass through EBB for Itanium1
7788 processor. This pass inserts more nops to make dependency between
7789 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
a0a7b566 7790
30028c85 7791static void
9c808aad 7792bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
30028c85
VM
7793{
7794 struct bundle_state *curr_state, *next_state, *best_state;
7795 rtx insn, next_insn;
7796 int insn_num;
f32360c7 7797 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 7798 int pos = 0, max_pos, template0, template1;
30028c85
VM
7799 rtx b;
7800 rtx nop;
7801 enum attr_type type;
2d1b811d 7802
30028c85 7803 insn_num = 0;
c856f536 7804 /* Count insns in the EBB. */
30028c85
VM
7805 for (insn = NEXT_INSN (prev_head_insn);
7806 insn && insn != tail;
7807 insn = NEXT_INSN (insn))
7808 if (INSN_P (insn))
7809 insn_num++;
7810 if (insn_num == 0)
7811 return;
7812 bundling_p = 1;
7813 dfa_clean_insn_cache ();
7814 initiate_bundle_state_table ();
7815 index_to_bundle_states = xmalloc ((insn_num + 2)
7816 * sizeof (struct bundle_state *));
ff482c8d 7817 /* First (forward) pass -- generation of bundle states. */
30028c85
VM
7818 curr_state = get_free_bundle_state ();
7819 curr_state->insn = NULL;
7820 curr_state->before_nops_num = 0;
7821 curr_state->after_nops_num = 0;
7822 curr_state->insn_num = 0;
7823 curr_state->cost = 0;
7824 curr_state->accumulated_insns_num = 0;
7825 curr_state->branch_deviation = 0;
7826 curr_state->next = NULL;
7827 curr_state->originator = NULL;
7828 state_reset (curr_state->dfa_state);
7829 index_to_bundle_states [0] = curr_state;
7830 insn_num = 0;
c856f536 7831 /* Shift cycle mark if it is put on insn which could be ignored. */
30028c85
VM
7832 for (insn = NEXT_INSN (prev_head_insn);
7833 insn != tail;
7834 insn = NEXT_INSN (insn))
7835 if (INSN_P (insn)
7836 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7837 || GET_CODE (PATTERN (insn)) == USE
7838 || GET_CODE (PATTERN (insn)) == CLOBBER)
7839 && GET_MODE (insn) == TImode)
2130b7fb 7840 {
30028c85
VM
7841 PUT_MODE (insn, VOIDmode);
7842 for (next_insn = NEXT_INSN (insn);
7843 next_insn != tail;
7844 next_insn = NEXT_INSN (next_insn))
7845 if (INSN_P (next_insn)
7846 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
7847 && GET_CODE (PATTERN (next_insn)) != USE
7848 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
7849 {
7850 PUT_MODE (next_insn, TImode);
7851 break;
7852 }
2130b7fb 7853 }
048d0d36 7854 /* Forward pass: generation of bundle states. */
30028c85
VM
7855 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7856 insn != NULL_RTX;
7857 insn = next_insn)
1ad72cef 7858 {
e820471b
NS
7859 gcc_assert (INSN_P (insn)
7860 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7861 && GET_CODE (PATTERN (insn)) != USE
7862 && GET_CODE (PATTERN (insn)) != CLOBBER);
f32360c7 7863 type = ia64_safe_type (insn);
30028c85
VM
7864 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7865 insn_num++;
7866 index_to_bundle_states [insn_num] = NULL;
7867 for (curr_state = index_to_bundle_states [insn_num - 1];
7868 curr_state != NULL;
7869 curr_state = next_state)
f83594c4 7870 {
30028c85 7871 pos = curr_state->accumulated_insns_num % 3;
30028c85 7872 next_state = curr_state->next;
c856f536
VM
7873 /* We must fill up the current bundle in order to start a
7874 subsequent asm insn in a new bundle. Asm insn is always
7875 placed in a separate bundle. */
f32360c7
VM
7876 only_bundle_end_p
7877 = (next_insn != NULL_RTX
7878 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7879 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
c856f536
VM
7880 /* We may fill up the current bundle if it is the cycle end
7881 without a group barrier. */
30028c85 7882 bundle_end_p
f32360c7 7883 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
7884 || (GET_MODE (next_insn) == TImode
7885 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
7886 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7887 || type == TYPE_S
c856f536
VM
7888 /* We need to insert 2 nops for cases like M_MII. To
7889 guarantee issuing all insns on the same cycle for
7890 Itanium 1, we need to issue 2 nops after the first M
7891 insn (MnnMII where n is a nop insn). */
de101ad2
VM
7892 || ((type == TYPE_M || type == TYPE_A)
7893 && ia64_tune == PROCESSOR_ITANIUM
30028c85 7894 && !bundle_end_p && pos == 1))
f32360c7
VM
7895 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
7896 only_bundle_end_p);
7897 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
7898 only_bundle_end_p);
7899 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
7900 only_bundle_end_p);
f83594c4 7901 }
e820471b 7902 gcc_assert (index_to_bundle_states [insn_num]);
30028c85
VM
7903 for (curr_state = index_to_bundle_states [insn_num];
7904 curr_state != NULL;
7905 curr_state = curr_state->next)
7906 if (verbose >= 2 && dump)
7907 {
c856f536
VM
7908 /* This structure is taken from generated code of the
7909 pipeline hazard recognizer (see file insn-attrtab.c).
7910 Please don't forget to change the structure if a new
7911 automaton is added to .md file. */
30028c85
VM
7912 struct DFA_chip
7913 {
7914 unsigned short one_automaton_state;
7915 unsigned short oneb_automaton_state;
7916 unsigned short two_automaton_state;
7917 unsigned short twob_automaton_state;
7918 };
9c808aad 7919
30028c85
VM
7920 fprintf
7921 (dump,
7922 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7923 curr_state->unique_num,
7924 (curr_state->originator == NULL
7925 ? -1 : curr_state->originator->unique_num),
7926 curr_state->cost,
7927 curr_state->before_nops_num, curr_state->after_nops_num,
7928 curr_state->accumulated_insns_num, curr_state->branch_deviation,
7929 (ia64_tune == PROCESSOR_ITANIUM
7930 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7931 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7932 INSN_UID (insn));
7933 }
1ad72cef 7934 }
e820471b
NS
7935
7936 /* We should find a solution because the 2nd insn scheduling has
7937 found one. */
7938 gcc_assert (index_to_bundle_states [insn_num]);
c856f536 7939 /* Find a state corresponding to the best insn sequence. */
30028c85
VM
7940 best_state = NULL;
7941 for (curr_state = index_to_bundle_states [insn_num];
7942 curr_state != NULL;
7943 curr_state = curr_state->next)
c856f536
VM
7944 /* We are just looking at the states with fully filled up last
7945 bundle. The first we prefer insn sequences with minimal cost
7946 then with minimal inserted nops and finally with branch insns
7947 placed in the 3rd slots. */
30028c85
VM
7948 if (curr_state->accumulated_insns_num % 3 == 0
7949 && (best_state == NULL || best_state->cost > curr_state->cost
7950 || (best_state->cost == curr_state->cost
7951 && (curr_state->accumulated_insns_num
7952 < best_state->accumulated_insns_num
7953 || (curr_state->accumulated_insns_num
7954 == best_state->accumulated_insns_num
7955 && curr_state->branch_deviation
7956 < best_state->branch_deviation)))))
7957 best_state = curr_state;
c856f536 7958 /* Second (backward) pass: adding nops and templates. */
30028c85
VM
7959 insn_num = best_state->before_nops_num;
7960 template0 = template1 = -1;
7961 for (curr_state = best_state;
7962 curr_state->originator != NULL;
7963 curr_state = curr_state->originator)
7964 {
7965 insn = curr_state->insn;
f32360c7
VM
7966 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7967 || asm_noperands (PATTERN (insn)) >= 0);
30028c85
VM
7968 insn_num++;
7969 if (verbose >= 2 && dump)
2130b7fb 7970 {
30028c85
VM
7971 struct DFA_chip
7972 {
7973 unsigned short one_automaton_state;
7974 unsigned short oneb_automaton_state;
7975 unsigned short two_automaton_state;
7976 unsigned short twob_automaton_state;
7977 };
9c808aad 7978
30028c85
VM
7979 fprintf
7980 (dump,
7981 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7982 curr_state->unique_num,
7983 (curr_state->originator == NULL
7984 ? -1 : curr_state->originator->unique_num),
7985 curr_state->cost,
7986 curr_state->before_nops_num, curr_state->after_nops_num,
7987 curr_state->accumulated_insns_num, curr_state->branch_deviation,
7988 (ia64_tune == PROCESSOR_ITANIUM
7989 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7990 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7991 INSN_UID (insn));
2130b7fb 7992 }
c856f536
VM
7993 /* Find the position in the current bundle window. The window can
7994 contain at most two bundles. Two bundle window means that
7995 the processor will make two bundle rotation. */
30028c85 7996 max_pos = get_max_pos (curr_state->dfa_state);
c856f536
VM
7997 if (max_pos == 6
7998 /* The following (negative template number) means that the
7999 processor did one bundle rotation. */
8000 || (max_pos == 3 && template0 < 0))
2130b7fb 8001 {
c856f536
VM
8002 /* We are at the end of the window -- find template(s) for
8003 its bundle(s). */
30028c85
VM
8004 pos = max_pos;
8005 if (max_pos == 3)
8006 template0 = get_template (curr_state->dfa_state, 3);
8007 else
8008 {
8009 template1 = get_template (curr_state->dfa_state, 3);
8010 template0 = get_template (curr_state->dfa_state, 6);
8011 }
8012 }
8013 if (max_pos > 3 && template1 < 0)
c856f536 8014 /* It may happen when we have the stop inside a bundle. */
30028c85 8015 {
e820471b 8016 gcc_assert (pos <= 3);
30028c85
VM
8017 template1 = get_template (curr_state->dfa_state, 3);
8018 pos += 3;
8019 }
f32360c7 8020 if (!asm_p)
c856f536 8021 /* Emit nops after the current insn. */
f32360c7
VM
8022 for (i = 0; i < curr_state->after_nops_num; i++)
8023 {
8024 nop = gen_nop ();
8025 emit_insn_after (nop, insn);
8026 pos--;
e820471b 8027 gcc_assert (pos >= 0);
f32360c7
VM
8028 if (pos % 3 == 0)
8029 {
c856f536
VM
8030 /* We are at the start of a bundle: emit the template
8031 (it should be defined). */
e820471b 8032 gcc_assert (template0 >= 0);
4a4cd49c 8033 ia64_add_bundle_selector_before (template0, nop);
c856f536
VM
8034 /* If we have two bundle window, we make one bundle
8035 rotation. Otherwise template0 will be undefined
8036 (negative value). */
f32360c7
VM
8037 template0 = template1;
8038 template1 = -1;
8039 }
8040 }
c856f536
VM
8041 /* Move the position backward in the window. Group barrier has
8042 no slot. Asm insn takes all bundle. */
30028c85
VM
8043 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8044 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8045 && asm_noperands (PATTERN (insn)) < 0)
8046 pos--;
c856f536 8047 /* Long insn takes 2 slots. */
30028c85
VM
8048 if (ia64_safe_type (insn) == TYPE_L)
8049 pos--;
e820471b 8050 gcc_assert (pos >= 0);
30028c85
VM
8051 if (pos % 3 == 0
8052 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8053 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8054 && asm_noperands (PATTERN (insn)) < 0)
8055 {
c856f536
VM
8056 /* The current insn is at the bundle start: emit the
8057 template. */
e820471b 8058 gcc_assert (template0 >= 0);
4a4cd49c 8059 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
8060 b = PREV_INSN (insn);
8061 insn = b;
68776c43 8062 /* See comment above in analogous place for emitting nops
c856f536 8063 after the insn. */
30028c85
VM
8064 template0 = template1;
8065 template1 = -1;
8066 }
c856f536 8067 /* Emit nops after the current insn. */
30028c85
VM
8068 for (i = 0; i < curr_state->before_nops_num; i++)
8069 {
8070 nop = gen_nop ();
8071 ia64_emit_insn_before (nop, insn);
8072 nop = PREV_INSN (insn);
8073 insn = nop;
8074 pos--;
e820471b 8075 gcc_assert (pos >= 0);
30028c85
VM
8076 if (pos % 3 == 0)
8077 {
68776c43 8078 /* See comment above in analogous place for emitting nops
c856f536 8079 after the insn. */
e820471b 8080 gcc_assert (template0 >= 0);
4a4cd49c 8081 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
8082 b = PREV_INSN (insn);
8083 insn = b;
8084 template0 = template1;
8085 template1 = -1;
8086 }
2130b7fb
BS
8087 }
8088 }
30028c85 8089 if (ia64_tune == PROCESSOR_ITANIUM)
c856f536
VM
8090 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8091 Itanium1 has a strange design, if the distance between an insn
8092 and dependent MM-insn is less 4 then we have a 6 additional
8093 cycles stall. So we make the distance equal to 4 cycles if it
8094 is less. */
30028c85
VM
8095 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8096 insn != NULL_RTX;
8097 insn = next_insn)
8098 {
e820471b
NS
8099 gcc_assert (INSN_P (insn)
8100 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8101 && GET_CODE (PATTERN (insn)) != USE
8102 && GET_CODE (PATTERN (insn)) != CLOBBER);
30028c85
VM
8103 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8104 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
c856f536 8105 /* We found a MM-insn which needs additional cycles. */
30028c85
VM
8106 {
8107 rtx last;
8108 int i, j, n;
8109 int pred_stop_p;
9c808aad 8110
c856f536
VM
8111 /* Now we are searching for a template of the bundle in
8112 which the MM-insn is placed and the position of the
8113 insn in the bundle (0, 1, 2). Also we are searching
8114 for that there is a stop before the insn. */
30028c85
VM
8115 last = prev_active_insn (insn);
8116 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
8117 if (pred_stop_p)
8118 last = prev_active_insn (last);
8119 n = 0;
8120 for (;; last = prev_active_insn (last))
8121 if (recog_memoized (last) == CODE_FOR_bundle_selector)
8122 {
8123 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
8124 if (template0 == 9)
c856f536
VM
8125 /* The insn is in MLX bundle. Change the template
8126 onto MFI because we will add nops before the
8127 insn. It simplifies subsequent code a lot. */
30028c85 8128 PATTERN (last)
a556fd39 8129 = gen_bundle_selector (const2_rtx); /* -> MFI */
30028c85
VM
8130 break;
8131 }
52b754e8
VM
8132 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
8133 && (ia64_safe_itanium_class (last)
8134 != ITANIUM_CLASS_IGNORE))
30028c85 8135 n++;
c856f536
VM
8136 /* Some check of correctness: the stop is not at the
8137 bundle start, there are no more 3 insns in the bundle,
8138 and the MM-insn is not at the start of bundle with
8139 template MLX. */
e820471b
NS
8140 gcc_assert ((!pred_stop_p || n)
8141 && n <= 2
8142 && (template0 != 9 || !n));
c856f536 8143 /* Put nops after the insn in the bundle. */
30028c85
VM
8144 for (j = 3 - n; j > 0; j --)
8145 ia64_emit_insn_before (gen_nop (), insn);
c856f536
VM
8146 /* It takes into account that we will add more N nops
8147 before the insn lately -- please see code below. */
30028c85
VM
8148 add_cycles [INSN_UID (insn)]--;
8149 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
8150 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8151 insn);
8152 if (pred_stop_p)
8153 add_cycles [INSN_UID (insn)]--;
8154 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
8155 {
c856f536 8156 /* Insert "MII;" template. */
a556fd39 8157 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
30028c85
VM
8158 insn);
8159 ia64_emit_insn_before (gen_nop (), insn);
8160 ia64_emit_insn_before (gen_nop (), insn);
8161 if (i > 1)
8162 {
c856f536
VM
8163 /* To decrease code size, we use "MI;I;"
8164 template. */
30028c85
VM
8165 ia64_emit_insn_before
8166 (gen_insn_group_barrier (GEN_INT (3)), insn);
8167 i--;
8168 }
8169 ia64_emit_insn_before (gen_nop (), insn);
8170 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8171 insn);
8172 }
c856f536
VM
8173 /* Put the MM-insn in the same slot of a bundle with the
8174 same template as the original one. */
4a4cd49c 8175 ia64_add_bundle_selector_before (template0, insn);
c856f536
VM
8176 /* To put the insn in the same slot, add necessary number
8177 of nops. */
30028c85
VM
8178 for (j = n; j > 0; j --)
8179 ia64_emit_insn_before (gen_nop (), insn);
c856f536 8180 /* Put the stop if the original bundle had it. */
30028c85
VM
8181 if (pred_stop_p)
8182 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8183 insn);
8184 }
8185 }
8186 free (index_to_bundle_states);
8187 finish_bundle_state_table ();
8188 bundling_p = 0;
8189 dfa_clean_insn_cache ();
2130b7fb 8190}
c65ebc55 8191
30028c85
VM
8192/* The following function is called at the end of scheduling BB or
8193 EBB. After reload, it inserts stop bits and does insn bundling. */
8194
8195static void
9c808aad 8196ia64_sched_finish (FILE *dump, int sched_verbose)
c237e94a 8197{
30028c85
VM
8198 if (sched_verbose)
8199 fprintf (dump, "// Finishing schedule.\n");
8200 if (!reload_completed)
8201 return;
8202 if (reload_completed)
8203 {
8204 final_emit_insn_group_barriers (dump);
8205 bundling (dump, sched_verbose, current_sched_info->prev_head,
8206 current_sched_info->next_tail);
8207 if (sched_verbose && dump)
8208 fprintf (dump, "// finishing %d-%d\n",
8209 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8210 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9c808aad 8211
30028c85
VM
8212 return;
8213 }
c237e94a
ZW
8214}
8215
30028c85 8216/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 8217
30028c85 8218static void
9c808aad 8219final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
2130b7fb 8220{
30028c85
VM
8221 rtx insn;
8222 int need_barrier_p = 0;
8223 rtx prev_insn = NULL_RTX;
2130b7fb 8224
30028c85 8225 init_insn_group_barriers ();
2130b7fb 8226
30028c85
VM
8227 for (insn = NEXT_INSN (current_sched_info->prev_head);
8228 insn != current_sched_info->next_tail;
8229 insn = NEXT_INSN (insn))
8230 {
8231 if (GET_CODE (insn) == BARRIER)
b395ddbe 8232 {
30028c85 8233 rtx last = prev_active_insn (insn);
14d118d6 8234
30028c85 8235 if (! last)
b395ddbe 8236 continue;
30028c85
VM
8237 if (GET_CODE (last) == JUMP_INSN
8238 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8239 last = prev_active_insn (last);
8240 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8241 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 8242
30028c85
VM
8243 init_insn_group_barriers ();
8244 need_barrier_p = 0;
8245 prev_insn = NULL_RTX;
b395ddbe 8246 }
30028c85 8247 else if (INSN_P (insn))
2130b7fb 8248 {
30028c85 8249 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 8250 {
30028c85
VM
8251 init_insn_group_barriers ();
8252 need_barrier_p = 0;
8253 prev_insn = NULL_RTX;
c65ebc55 8254 }
c1bc6ca8 8255 else if (need_barrier_p || group_barrier_needed (insn))
2130b7fb 8256 {
30028c85
VM
8257 if (TARGET_EARLY_STOP_BITS)
8258 {
8259 rtx last;
9c808aad 8260
30028c85
VM
8261 for (last = insn;
8262 last != current_sched_info->prev_head;
8263 last = PREV_INSN (last))
8264 if (INSN_P (last) && GET_MODE (last) == TImode
8265 && stops_p [INSN_UID (last)])
8266 break;
8267 if (last == current_sched_info->prev_head)
8268 last = insn;
8269 last = prev_active_insn (last);
8270 if (last
8271 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8272 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8273 last);
8274 init_insn_group_barriers ();
8275 for (last = NEXT_INSN (last);
8276 last != insn;
8277 last = NEXT_INSN (last))
8278 if (INSN_P (last))
c1bc6ca8 8279 group_barrier_needed (last);
30028c85
VM
8280 }
8281 else
8282 {
8283 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8284 insn);
8285 init_insn_group_barriers ();
8286 }
c1bc6ca8 8287 group_barrier_needed (insn);
30028c85 8288 prev_insn = NULL_RTX;
2130b7fb 8289 }
30028c85
VM
8290 else if (recog_memoized (insn) >= 0)
8291 prev_insn = insn;
8292 need_barrier_p = (GET_CODE (insn) == CALL_INSN
8293 || GET_CODE (PATTERN (insn)) == ASM_INPUT
8294 || asm_noperands (PATTERN (insn)) >= 0);
c65ebc55 8295 }
2130b7fb 8296 }
30028c85 8297}
2130b7fb 8298
30028c85 8299\f
2130b7fb 8300
a4d05547 8301/* If the following function returns TRUE, we will use the DFA
30028c85 8302 insn scheduler. */
2130b7fb 8303
c237e94a 8304static int
9c808aad 8305ia64_first_cycle_multipass_dfa_lookahead (void)
2130b7fb 8306{
30028c85
VM
8307 return (reload_completed ? 6 : 4);
8308}
2130b7fb 8309
30028c85 8310/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 8311
30028c85 8312static void
9c808aad 8313ia64_init_dfa_pre_cycle_insn (void)
30028c85
VM
8314{
8315 if (temp_dfa_state == NULL)
2130b7fb 8316 {
30028c85
VM
8317 dfa_state_size = state_size ();
8318 temp_dfa_state = xmalloc (dfa_state_size);
8319 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 8320 }
30028c85
VM
8321 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
8322 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
8323 recog_memoized (dfa_pre_cycle_insn);
8324 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
8325 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
8326 recog_memoized (dfa_stop_insn);
8327}
2130b7fb 8328
30028c85
VM
8329/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
8330 used by the DFA insn scheduler. */
2130b7fb 8331
30028c85 8332static rtx
9c808aad 8333ia64_dfa_pre_cycle_insn (void)
30028c85
VM
8334{
8335 return dfa_pre_cycle_insn;
8336}
2130b7fb 8337
30028c85
VM
8338/* The following function returns TRUE if PRODUCER (of type ilog or
8339 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 8340
30028c85 8341int
9c808aad 8342ia64_st_address_bypass_p (rtx producer, rtx consumer)
30028c85
VM
8343{
8344 rtx dest, reg, mem;
2130b7fb 8345
e820471b 8346 gcc_assert (producer && consumer);
30028c85 8347 dest = ia64_single_set (producer);
e820471b
NS
8348 gcc_assert (dest);
8349 reg = SET_DEST (dest);
8350 gcc_assert (reg);
30028c85
VM
8351 if (GET_CODE (reg) == SUBREG)
8352 reg = SUBREG_REG (reg);
e820471b
NS
8353 gcc_assert (GET_CODE (reg) == REG);
8354
30028c85 8355 dest = ia64_single_set (consumer);
e820471b
NS
8356 gcc_assert (dest);
8357 mem = SET_DEST (dest);
8358 gcc_assert (mem && GET_CODE (mem) == MEM);
30028c85 8359 return reg_mentioned_p (reg, mem);
2130b7fb
BS
8360}
8361
30028c85
VM
8362/* The following function returns TRUE if PRODUCER (of type ilog or
8363 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 8364
30028c85 8365int
9c808aad 8366ia64_ld_address_bypass_p (rtx producer, rtx consumer)
2130b7fb 8367{
30028c85
VM
8368 rtx dest, src, reg, mem;
8369
e820471b 8370 gcc_assert (producer && consumer);
30028c85 8371 dest = ia64_single_set (producer);
e820471b
NS
8372 gcc_assert (dest);
8373 reg = SET_DEST (dest);
8374 gcc_assert (reg);
30028c85
VM
8375 if (GET_CODE (reg) == SUBREG)
8376 reg = SUBREG_REG (reg);
e820471b
NS
8377 gcc_assert (GET_CODE (reg) == REG);
8378
30028c85 8379 src = ia64_single_set (consumer);
e820471b
NS
8380 gcc_assert (src);
8381 mem = SET_SRC (src);
8382 gcc_assert (mem);
048d0d36 8383
30028c85
VM
8384 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
8385 mem = XVECEXP (mem, 0, 0);
048d0d36 8386 else if (GET_CODE (mem) == IF_THEN_ELSE)
917f1b7e 8387 /* ??? Is this bypass necessary for ld.c? */
048d0d36
MK
8388 {
8389 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
8390 mem = XEXP (mem, 1);
8391 }
8392
30028c85
VM
8393 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
8394 mem = XEXP (mem, 0);
ef1ecf87 8395
048d0d36
MK
8396 if (GET_CODE (mem) == UNSPEC)
8397 {
8398 int c = XINT (mem, 1);
8399
8400 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDSA);
8401 mem = XVECEXP (mem, 0, 0);
8402 }
8403
ef1ecf87 8404 /* Note that LO_SUM is used for GOT loads. */
e820471b 8405 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
ef1ecf87 8406
30028c85
VM
8407 return reg_mentioned_p (reg, mem);
8408}
8409
8410/* The following function returns TRUE if INSN produces address for a
8411 load/store insn. We will place such insns into M slot because it
ff482c8d 8412 decreases its latency time. */
30028c85
VM
8413
8414int
9c808aad 8415ia64_produce_address_p (rtx insn)
30028c85
VM
8416{
8417 return insn->call;
2130b7fb 8418}
30028c85 8419
2130b7fb 8420\f
3b572406
RH
8421/* Emit pseudo-ops for the assembler to describe predicate relations.
8422 At present this assumes that we only consider predicate pairs to
8423 be mutex, and that the assembler can deduce proper values from
8424 straight-line code. */
8425
8426static void
9c808aad 8427emit_predicate_relation_info (void)
3b572406 8428{
e0082a72 8429 basic_block bb;
3b572406 8430
e0082a72 8431 FOR_EACH_BB_REVERSE (bb)
3b572406 8432 {
3b572406 8433 int r;
a813c111 8434 rtx head = BB_HEAD (bb);
3b572406
RH
8435
8436 /* We only need such notes at code labels. */
8437 if (GET_CODE (head) != CODE_LABEL)
8438 continue;
740aeb38 8439 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
3b572406
RH
8440 head = NEXT_INSN (head);
8441
9f3b8452
RH
8442 /* Skip p0, which may be thought to be live due to (reg:DI p0)
8443 grabbing the entire block of predicate registers. */
8444 for (r = PR_REG (2); r < PR_REG (64); r += 2)
6fb5fa3c 8445 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
3b572406 8446 {
f2f90c63 8447 rtx p = gen_rtx_REG (BImode, r);
054451ea 8448 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
a813c111
SB
8449 if (head == BB_END (bb))
8450 BB_END (bb) = n;
3b572406
RH
8451 head = n;
8452 }
8453 }
ca3920ad
JW
8454
8455 /* Look for conditional calls that do not return, and protect predicate
8456 relations around them. Otherwise the assembler will assume the call
8457 returns, and complain about uses of call-clobbered predicates after
8458 the call. */
e0082a72 8459 FOR_EACH_BB_REVERSE (bb)
ca3920ad 8460 {
a813c111 8461 rtx insn = BB_HEAD (bb);
9c808aad 8462
ca3920ad
JW
8463 while (1)
8464 {
8465 if (GET_CODE (insn) == CALL_INSN
8466 && GET_CODE (PATTERN (insn)) == COND_EXEC
8467 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
8468 {
8469 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
8470 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
a813c111
SB
8471 if (BB_HEAD (bb) == insn)
8472 BB_HEAD (bb) = b;
8473 if (BB_END (bb) == insn)
8474 BB_END (bb) = a;
ca3920ad 8475 }
9c808aad 8476
a813c111 8477 if (insn == BB_END (bb))
ca3920ad
JW
8478 break;
8479 insn = NEXT_INSN (insn);
8480 }
8481 }
3b572406
RH
8482}
8483
c65ebc55
JW
8484/* Perform machine dependent operations on the rtl chain INSNS. */
8485
18dbd950 8486static void
9c808aad 8487ia64_reorg (void)
c65ebc55 8488{
1e3881c2
JH
8489 /* We are freeing block_for_insn in the toplev to keep compatibility
8490 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 8491 compute_bb_for_insn ();
a00fe19f
RH
8492
8493 /* If optimizing, we'll have split before scheduling. */
8494 if (optimize == 0)
6fb5fa3c 8495 split_all_insns ();
2130b7fb 8496
6fb5fa3c 8497 if (optimize && ia64_flag_schedule_insns2 && dbg_cnt (ia64_sched2))
f4d578da 8498 {
eced69b5 8499 timevar_push (TV_SCHED2);
f4d578da 8500 ia64_final_schedule = 1;
30028c85
VM
8501
8502 initiate_bundle_states ();
8503 ia64_nop = make_insn_raw (gen_nop ());
8504 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
8505 recog_memoized (ia64_nop);
8506 clocks_length = get_max_uid () + 1;
29da5c92 8507 stops_p = xcalloc (1, clocks_length);
30028c85
VM
8508 if (ia64_tune == PROCESSOR_ITANIUM)
8509 {
29da5c92
KG
8510 clocks = xcalloc (clocks_length, sizeof (int));
8511 add_cycles = xcalloc (clocks_length, sizeof (int));
30028c85
VM
8512 }
8513 if (ia64_tune == PROCESSOR_ITANIUM2)
8514 {
8515 pos_1 = get_cpu_unit_code ("2_1");
8516 pos_2 = get_cpu_unit_code ("2_2");
8517 pos_3 = get_cpu_unit_code ("2_3");
8518 pos_4 = get_cpu_unit_code ("2_4");
8519 pos_5 = get_cpu_unit_code ("2_5");
8520 pos_6 = get_cpu_unit_code ("2_6");
8521 _0mii_ = get_cpu_unit_code ("2b_0mii.");
8522 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
8523 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
8524 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
8525 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
8526 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
8527 _0mib_ = get_cpu_unit_code ("2b_0mib.");
8528 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
8529 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
8530 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
8531 _1mii_ = get_cpu_unit_code ("2b_1mii.");
8532 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
8533 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
8534 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
8535 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
8536 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
8537 _1mib_ = get_cpu_unit_code ("2b_1mib.");
8538 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
8539 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
8540 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
8541 }
8542 else
8543 {
8544 pos_1 = get_cpu_unit_code ("1_1");
8545 pos_2 = get_cpu_unit_code ("1_2");
8546 pos_3 = get_cpu_unit_code ("1_3");
8547 pos_4 = get_cpu_unit_code ("1_4");
8548 pos_5 = get_cpu_unit_code ("1_5");
8549 pos_6 = get_cpu_unit_code ("1_6");
8550 _0mii_ = get_cpu_unit_code ("1b_0mii.");
8551 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
8552 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
8553 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
8554 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
8555 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
8556 _0mib_ = get_cpu_unit_code ("1b_0mib.");
8557 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
8558 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
8559 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
8560 _1mii_ = get_cpu_unit_code ("1b_1mii.");
8561 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
8562 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
8563 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
8564 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
8565 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
8566 _1mib_ = get_cpu_unit_code ("1b_1mib.");
8567 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
8568 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
8569 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
8570 }
10d22567 8571 schedule_ebbs ();
6fb5fa3c
DB
8572 /* We cannot reuse this one because it has been corrupted by the
8573 evil glat. */
30028c85
VM
8574 finish_bundle_states ();
8575 if (ia64_tune == PROCESSOR_ITANIUM)
8576 {
8577 free (add_cycles);
8578 free (clocks);
8579 }
8580 free (stops_p);
048d0d36 8581 stops_p = NULL;
c263766c 8582 emit_insn_group_barriers (dump_file);
30028c85 8583
f4d578da 8584 ia64_final_schedule = 0;
eced69b5 8585 timevar_pop (TV_SCHED2);
f4d578da
BS
8586 }
8587 else
c263766c 8588 emit_all_insn_group_barriers (dump_file);
f2f90c63 8589
6fb5fa3c
DB
8590 df_analyze ();
8591
f12f25a7
RH
8592 /* A call must not be the last instruction in a function, so that the
8593 return address is still within the function, so that unwinding works
8594 properly. Note that IA-64 differs from dwarf2 on this point. */
8595 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8596 {
8597 rtx insn;
8598 int saw_stop = 0;
8599
8600 insn = get_last_insn ();
8601 if (! INSN_P (insn))
8602 insn = prev_active_insn (insn);
fa978426
AS
8603 /* Skip over insns that expand to nothing. */
8604 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
8605 {
8606 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
8607 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
8608 saw_stop = 1;
f12f25a7
RH
8609 insn = prev_active_insn (insn);
8610 }
8611 if (GET_CODE (insn) == CALL_INSN)
8612 {
8613 if (! saw_stop)
8614 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8615 emit_insn (gen_break_f ());
8616 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8617 }
8618 }
8619
f2f90c63 8620 emit_predicate_relation_info ();
014a1138
JZ
8621
8622 if (ia64_flag_var_tracking)
8623 {
8624 timevar_push (TV_VAR_TRACKING);
8625 variable_tracking_main ();
8626 timevar_pop (TV_VAR_TRACKING);
8627 }
6fb5fa3c 8628 df_finish_pass ();
c65ebc55
JW
8629}
8630\f
8631/* Return true if REGNO is used by the epilogue. */
8632
8633int
9c808aad 8634ia64_epilogue_uses (int regno)
c65ebc55 8635{
6ca3c22f
RH
8636 switch (regno)
8637 {
8638 case R_GR (1):
b23ba0b8
RH
8639 /* With a call to a function in another module, we will write a new
8640 value to "gp". After returning from such a call, we need to make
8641 sure the function restores the original gp-value, even if the
8642 function itself does not use the gp anymore. */
8643 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
6ca3c22f
RH
8644
8645 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
8646 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
8647 /* For functions defined with the syscall_linkage attribute, all
8648 input registers are marked as live at all function exits. This
8649 prevents the register allocator from using the input registers,
8650 which in turn makes it possible to restart a system call after
8651 an interrupt without having to save/restore the input registers.
8652 This also prevents kernel data from leaking to application code. */
8653 return lookup_attribute ("syscall_linkage",
8654 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
8655
8656 case R_BR (0):
8657 /* Conditional return patterns can't represent the use of `b0' as
8658 the return address, so we force the value live this way. */
8659 return 1;
6b6c1201 8660
6ca3c22f
RH
8661 case AR_PFS_REGNUM:
8662 /* Likewise for ar.pfs, which is used by br.ret. */
8663 return 1;
5527bf14 8664
6ca3c22f
RH
8665 default:
8666 return 0;
8667 }
c65ebc55 8668}
15b5aef3
RH
8669
8670/* Return true if REGNO is used by the frame unwinder. */
8671
8672int
9c808aad 8673ia64_eh_uses (int regno)
15b5aef3 8674{
6fb5fa3c
DB
8675 enum ia64_frame_regs r;
8676
15b5aef3
RH
8677 if (! reload_completed)
8678 return 0;
8679
6fb5fa3c
DB
8680 if (regno == 0)
8681 return 0;
8682
8683 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
8684 if (regno == current_frame_info.r[r]
8685 || regno == emitted_frame_related_regs[r])
8686 return 1;
15b5aef3
RH
8687
8688 return 0;
8689}
c65ebc55 8690\f
1cdbd630 8691/* Return true if this goes in small data/bss. */
c65ebc55
JW
8692
8693/* ??? We could also support own long data here. Generating movl/add/ld8
8694 instead of addl,ld8/ld8. This makes the code bigger, but should make the
8695 code faster because there is one less load. This also includes incomplete
8696 types which can't go in sdata/sbss. */
8697
ae46c4e0 8698static bool
9c808aad 8699ia64_in_small_data_p (tree exp)
ae46c4e0
RH
8700{
8701 if (TARGET_NO_SDATA)
8702 return false;
8703
3907500b
RH
8704 /* We want to merge strings, so we never consider them small data. */
8705 if (TREE_CODE (exp) == STRING_CST)
8706 return false;
8707
4c494a15
ZW
8708 /* Functions are never small data. */
8709 if (TREE_CODE (exp) == FUNCTION_DECL)
8710 return false;
8711
ae46c4e0
RH
8712 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
8713 {
8714 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
826eb7ed 8715
ae46c4e0 8716 if (strcmp (section, ".sdata") == 0
826eb7ed
JB
8717 || strncmp (section, ".sdata.", 7) == 0
8718 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
8719 || strcmp (section, ".sbss") == 0
8720 || strncmp (section, ".sbss.", 6) == 0
8721 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
ae46c4e0
RH
8722 return true;
8723 }
8724 else
8725 {
8726 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
8727
8728 /* If this is an incomplete type with size 0, then we can't put it
8729 in sdata because it might be too big when completed. */
8730 if (size > 0 && size <= ia64_section_threshold)
8731 return true;
8732 }
8733
8734 return false;
8735}
0c96007e 8736\f
ad0fc698
JW
8737/* Output assembly directives for prologue regions. */
8738
8739/* The current basic block number. */
8740
e0082a72 8741static bool last_block;
ad0fc698
JW
8742
8743/* True if we need a copy_state command at the start of the next block. */
8744
e0082a72 8745static bool need_copy_state;
ad0fc698 8746
658f32fd
AO
8747#ifndef MAX_ARTIFICIAL_LABEL_BYTES
8748# define MAX_ARTIFICIAL_LABEL_BYTES 30
8749#endif
8750
8751/* Emit a debugging label after a call-frame-related insn. We'd
8752 rather output the label right away, but we'd have to output it
8753 after, not before, the instruction, and the instruction has not
8754 been output yet. So we emit the label after the insn, delete it to
8755 avoid introducing basic blocks, and mark it as preserved, such that
8756 it is still output, given that it is referenced in debug info. */
8757
8758static const char *
8759ia64_emit_deleted_label_after_insn (rtx insn)
8760{
8761 char label[MAX_ARTIFICIAL_LABEL_BYTES];
8762 rtx lb = gen_label_rtx ();
8763 rtx label_insn = emit_label_after (lb, insn);
8764
8765 LABEL_PRESERVE_P (lb) = 1;
8766
8767 delete_insn (label_insn);
8768
8769 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
8770
8771 return xstrdup (label);
8772}
8773
8774/* Define the CFA after INSN with the steady-state definition. */
8775
8776static void
8777ia64_dwarf2out_def_steady_cfa (rtx insn)
8778{
8779 rtx fp = frame_pointer_needed
8780 ? hard_frame_pointer_rtx
8781 : stack_pointer_rtx;
8782
8783 dwarf2out_def_cfa
8784 (ia64_emit_deleted_label_after_insn (insn),
8785 REGNO (fp),
8786 ia64_initial_elimination_offset
8787 (REGNO (arg_pointer_rtx), REGNO (fp))
8788 + ARG_POINTER_CFA_OFFSET (current_function_decl));
8789}
8790
8791/* The generic dwarf2 frame debug info generator does not define a
8792 separate region for the very end of the epilogue, so refrain from
8793 doing so in the IA64-specific code as well. */
8794
8795#define IA64_CHANGE_CFA_IN_EPILOGUE 0
8796
ad0fc698
JW
8797/* The function emits unwind directives for the start of an epilogue. */
8798
8799static void
658f32fd 8800process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
ad0fc698
JW
8801{
8802 /* If this isn't the last block of the function, then we need to label the
8803 current state, and copy it back in at the start of the next block. */
8804
e0082a72 8805 if (!last_block)
ad0fc698 8806 {
658f32fd
AO
8807 if (unwind)
8808 fprintf (asm_out_file, "\t.label_state %d\n",
8809 ++cfun->machine->state_num);
e0082a72 8810 need_copy_state = true;
ad0fc698
JW
8811 }
8812
658f32fd
AO
8813 if (unwind)
8814 fprintf (asm_out_file, "\t.restore sp\n");
8815 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
8816 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
8817 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
ad0fc698 8818}
0c96007e 8819
0c96007e
AM
8820/* This function processes a SET pattern looking for specific patterns
8821 which result in emitting an assembly directive required for unwinding. */
97e242b0 8822
0c96007e 8823static int
658f32fd 8824process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
0c96007e
AM
8825{
8826 rtx src = SET_SRC (pat);
8827 rtx dest = SET_DEST (pat);
97e242b0 8828 int src_regno, dest_regno;
0c96007e 8829
97e242b0
RH
8830 /* Look for the ALLOC insn. */
8831 if (GET_CODE (src) == UNSPEC_VOLATILE
086c0f96 8832 && XINT (src, 1) == UNSPECV_ALLOC
97e242b0 8833 && GET_CODE (dest) == REG)
0c96007e 8834 {
97e242b0
RH
8835 dest_regno = REGNO (dest);
8836
a8f5224e
DM
8837 /* If this is the final destination for ar.pfs, then this must
8838 be the alloc in the prologue. */
6fb5fa3c 8839 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
658f32fd
AO
8840 {
8841 if (unwind)
8842 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
8843 ia64_dbx_register_number (dest_regno));
8844 }
a8f5224e
DM
8845 else
8846 {
8847 /* This must be an alloc before a sibcall. We must drop the
8848 old frame info. The easiest way to drop the old frame
8849 info is to ensure we had a ".restore sp" directive
8850 followed by a new prologue. If the procedure doesn't
8851 have a memory-stack frame, we'll issue a dummy ".restore
8852 sp" now. */
b1eae416 8853 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
a8f5224e 8854 /* if haven't done process_epilogue() yet, do it now */
658f32fd
AO
8855 process_epilogue (asm_out_file, insn, unwind, frame);
8856 if (unwind)
8857 fprintf (asm_out_file, "\t.prologue\n");
a8f5224e 8858 }
0c96007e
AM
8859 return 1;
8860 }
8861
ed168e45 8862 /* Look for SP = .... */
0c96007e
AM
8863 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
8864 {
8865 if (GET_CODE (src) == PLUS)
8866 {
8867 rtx op0 = XEXP (src, 0);
8868 rtx op1 = XEXP (src, 1);
e820471b
NS
8869
8870 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
8871
8872 if (INTVAL (op1) < 0)
658f32fd
AO
8873 {
8874 gcc_assert (!frame_pointer_needed);
8875 if (unwind)
8876 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
8877 -INTVAL (op1));
8878 if (frame)
8879 ia64_dwarf2out_def_steady_cfa (insn);
8880 }
0186257f 8881 else
658f32fd 8882 process_epilogue (asm_out_file, insn, unwind, frame);
0c96007e 8883 }
0186257f 8884 else
e820471b
NS
8885 {
8886 gcc_assert (GET_CODE (src) == REG
8887 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
658f32fd 8888 process_epilogue (asm_out_file, insn, unwind, frame);
e820471b 8889 }
0186257f
JW
8890
8891 return 1;
0c96007e 8892 }
0c96007e
AM
8893
8894 /* Register move we need to look at. */
8895 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
8896 {
97e242b0
RH
8897 src_regno = REGNO (src);
8898 dest_regno = REGNO (dest);
8899
8900 switch (src_regno)
8901 {
8902 case BR_REG (0):
0c96007e 8903 /* Saving return address pointer. */
6fb5fa3c 8904 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
658f32fd
AO
8905 if (unwind)
8906 fprintf (asm_out_file, "\t.save rp, r%d\n",
8907 ia64_dbx_register_number (dest_regno));
97e242b0
RH
8908 return 1;
8909
8910 case PR_REG (0):
6fb5fa3c 8911 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
658f32fd
AO
8912 if (unwind)
8913 fprintf (asm_out_file, "\t.save pr, r%d\n",
8914 ia64_dbx_register_number (dest_regno));
97e242b0
RH
8915 return 1;
8916
8917 case AR_UNAT_REGNUM:
6fb5fa3c 8918 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
658f32fd
AO
8919 if (unwind)
8920 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
8921 ia64_dbx_register_number (dest_regno));
97e242b0
RH
8922 return 1;
8923
8924 case AR_LC_REGNUM:
6fb5fa3c 8925 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
658f32fd
AO
8926 if (unwind)
8927 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
8928 ia64_dbx_register_number (dest_regno));
97e242b0
RH
8929 return 1;
8930
8931 case STACK_POINTER_REGNUM:
e820471b
NS
8932 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
8933 && frame_pointer_needed);
658f32fd
AO
8934 if (unwind)
8935 fprintf (asm_out_file, "\t.vframe r%d\n",
8936 ia64_dbx_register_number (dest_regno));
8937 if (frame)
8938 ia64_dwarf2out_def_steady_cfa (insn);
97e242b0
RH
8939 return 1;
8940
8941 default:
8942 /* Everything else should indicate being stored to memory. */
e820471b 8943 gcc_unreachable ();
0c96007e
AM
8944 }
8945 }
97e242b0
RH
8946
8947 /* Memory store we need to look at. */
8948 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 8949 {
97e242b0
RH
8950 long off;
8951 rtx base;
8952 const char *saveop;
8953
8954 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 8955 {
97e242b0
RH
8956 base = XEXP (dest, 0);
8957 off = 0;
0c96007e 8958 }
e820471b 8959 else
0c96007e 8960 {
e820471b
NS
8961 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
8962 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
97e242b0
RH
8963 base = XEXP (XEXP (dest, 0), 0);
8964 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 8965 }
0c96007e 8966
97e242b0
RH
8967 if (base == hard_frame_pointer_rtx)
8968 {
8969 saveop = ".savepsp";
8970 off = - off;
8971 }
97e242b0 8972 else
e820471b
NS
8973 {
8974 gcc_assert (base == stack_pointer_rtx);
8975 saveop = ".savesp";
8976 }
97e242b0
RH
8977
8978 src_regno = REGNO (src);
8979 switch (src_regno)
8980 {
8981 case BR_REG (0):
6fb5fa3c 8982 gcc_assert (!current_frame_info.r[reg_save_b0]);
658f32fd
AO
8983 if (unwind)
8984 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
97e242b0
RH
8985 return 1;
8986
8987 case PR_REG (0):
6fb5fa3c 8988 gcc_assert (!current_frame_info.r[reg_save_pr]);
658f32fd
AO
8989 if (unwind)
8990 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
97e242b0
RH
8991 return 1;
8992
8993 case AR_LC_REGNUM:
6fb5fa3c 8994 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
658f32fd
AO
8995 if (unwind)
8996 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
97e242b0
RH
8997 return 1;
8998
8999 case AR_PFS_REGNUM:
6fb5fa3c 9000 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
658f32fd
AO
9001 if (unwind)
9002 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
97e242b0
RH
9003 return 1;
9004
9005 case AR_UNAT_REGNUM:
6fb5fa3c 9006 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
658f32fd
AO
9007 if (unwind)
9008 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
97e242b0
RH
9009 return 1;
9010
9011 case GR_REG (4):
9012 case GR_REG (5):
9013 case GR_REG (6):
9014 case GR_REG (7):
658f32fd
AO
9015 if (unwind)
9016 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9017 1 << (src_regno - GR_REG (4)));
97e242b0
RH
9018 return 1;
9019
9020 case BR_REG (1):
9021 case BR_REG (2):
9022 case BR_REG (3):
9023 case BR_REG (4):
9024 case BR_REG (5):
658f32fd
AO
9025 if (unwind)
9026 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9027 1 << (src_regno - BR_REG (1)));
0c96007e 9028 return 1;
97e242b0
RH
9029
9030 case FR_REG (2):
9031 case FR_REG (3):
9032 case FR_REG (4):
9033 case FR_REG (5):
658f32fd
AO
9034 if (unwind)
9035 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9036 1 << (src_regno - FR_REG (2)));
97e242b0
RH
9037 return 1;
9038
9039 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9040 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9041 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9042 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
658f32fd
AO
9043 if (unwind)
9044 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9045 1 << (src_regno - FR_REG (12)));
97e242b0
RH
9046 return 1;
9047
9048 default:
9049 return 0;
0c96007e
AM
9050 }
9051 }
97e242b0 9052
0c96007e
AM
9053 return 0;
9054}
9055
9056
9057/* This function looks at a single insn and emits any directives
9058 required to unwind this insn. */
9059void
9c808aad 9060process_for_unwind_directive (FILE *asm_out_file, rtx insn)
0c96007e 9061{
658f32fd
AO
9062 bool unwind = (flag_unwind_tables
9063 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9064 bool frame = dwarf2out_do_frame ();
9065
9066 if (unwind || frame)
0c96007e 9067 {
97e242b0
RH
9068 rtx pat;
9069
740aeb38 9070 if (NOTE_INSN_BASIC_BLOCK_P (insn))
ad0fc698 9071 {
e0082a72 9072 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
ad0fc698
JW
9073
9074 /* Restore unwind state from immediately before the epilogue. */
9075 if (need_copy_state)
9076 {
658f32fd
AO
9077 if (unwind)
9078 {
9079 fprintf (asm_out_file, "\t.body\n");
9080 fprintf (asm_out_file, "\t.copy_state %d\n",
9081 cfun->machine->state_num);
9082 }
9083 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9084 ia64_dwarf2out_def_steady_cfa (insn);
e0082a72 9085 need_copy_state = false;
ad0fc698
JW
9086 }
9087 }
9088
5a63e069 9089 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
ad0fc698
JW
9090 return;
9091
97e242b0
RH
9092 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9093 if (pat)
9094 pat = XEXP (pat, 0);
9095 else
9096 pat = PATTERN (insn);
0c96007e
AM
9097
9098 switch (GET_CODE (pat))
9099 {
809d4ef1 9100 case SET:
658f32fd 9101 process_set (asm_out_file, pat, insn, unwind, frame);
809d4ef1
RH
9102 break;
9103
9104 case PARALLEL:
9105 {
9106 int par_index;
9107 int limit = XVECLEN (pat, 0);
9108 for (par_index = 0; par_index < limit; par_index++)
9109 {
9110 rtx x = XVECEXP (pat, 0, par_index);
9111 if (GET_CODE (x) == SET)
658f32fd 9112 process_set (asm_out_file, x, insn, unwind, frame);
809d4ef1
RH
9113 }
9114 break;
9115 }
9116
9117 default:
e820471b 9118 gcc_unreachable ();
0c96007e
AM
9119 }
9120 }
9121}
c65ebc55 9122
0551c32d 9123\f
af795c3c
RH
9124enum ia64_builtins
9125{
9126 IA64_BUILTIN_BSP,
9127 IA64_BUILTIN_FLUSHRS
9128};
9129
c65ebc55 9130void
9c808aad 9131ia64_init_builtins (void)
c65ebc55 9132{
9649812a 9133 tree fpreg_type;
bf9ab6b6 9134 tree float80_type;
9649812a
MM
9135
9136 /* The __fpreg type. */
9137 fpreg_type = make_node (REAL_TYPE);
4de67c26 9138 TYPE_PRECISION (fpreg_type) = 82;
9649812a
MM
9139 layout_type (fpreg_type);
9140 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9141
9142 /* The __float80 type. */
bf9ab6b6 9143 float80_type = make_node (REAL_TYPE);
968a7562 9144 TYPE_PRECISION (float80_type) = 80;
bf9ab6b6
MM
9145 layout_type (float80_type);
9146 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9649812a
MM
9147
9148 /* The __float128 type. */
02befdf4 9149 if (!TARGET_HPUX)
9649812a
MM
9150 {
9151 tree float128_type = make_node (REAL_TYPE);
9152 TYPE_PRECISION (float128_type) = 128;
9153 layout_type (float128_type);
9154 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9155 }
9156 else
02befdf4 9157 /* Under HPUX, this is a synonym for "long double". */
9649812a
MM
9158 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9159 "__float128");
9160
6e34d3a3 9161#define def_builtin(name, type, code) \
c79efc4d
RÁE
9162 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
9163 NULL, NULL_TREE)
0551c32d 9164
3b572406 9165 def_builtin ("__builtin_ia64_bsp",
b4de2f7d 9166 build_function_type (ptr_type_node, void_list_node),
3b572406 9167 IA64_BUILTIN_BSP);
ce152ef8 9168
9c808aad
AJ
9169 def_builtin ("__builtin_ia64_flushrs",
9170 build_function_type (void_type_node, void_list_node),
ce152ef8
AM
9171 IA64_BUILTIN_FLUSHRS);
9172
0551c32d 9173#undef def_builtin
7d522000
SE
9174
9175 if (TARGET_HPUX)
9176 {
9177 if (built_in_decls [BUILT_IN_FINITE])
9178 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
9179 "_Isfinite");
9180 if (built_in_decls [BUILT_IN_FINITEF])
9181 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
9182 "_Isfinitef");
9183 if (built_in_decls [BUILT_IN_FINITEL])
9184 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
9185 "_Isfinitef128");
9186 }
c65ebc55
JW
9187}
9188
c65ebc55 9189rtx
9c808aad
AJ
9190ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9191 enum machine_mode mode ATTRIBUTE_UNUSED,
9192 int ignore ATTRIBUTE_UNUSED)
c65ebc55 9193{
767fad4c 9194 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
97e242b0 9195 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
c65ebc55
JW
9196
9197 switch (fcode)
9198 {
ce152ef8 9199 case IA64_BUILTIN_BSP:
0551c32d
RH
9200 if (! target || ! register_operand (target, DImode))
9201 target = gen_reg_rtx (DImode);
9202 emit_insn (gen_bsp_value (target));
8419b675
RK
9203#ifdef POINTERS_EXTEND_UNSIGNED
9204 target = convert_memory_address (ptr_mode, target);
9205#endif
0551c32d 9206 return target;
ce152ef8
AM
9207
9208 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
9209 emit_insn (gen_flushrs ());
9210 return const0_rtx;
ce152ef8 9211
c65ebc55
JW
9212 default:
9213 break;
9214 }
9215
0551c32d 9216 return NULL_RTX;
c65ebc55 9217}
0d7839da
SE
9218
9219/* For the HP-UX IA64 aggregate parameters are passed stored in the
9220 most significant bits of the stack slot. */
9221
9222enum direction
9c808aad 9223ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
0d7839da 9224{
ed168e45 9225 /* Exception to normal case for structures/unions/etc. */
0d7839da
SE
9226
9227 if (type && AGGREGATE_TYPE_P (type)
9228 && int_size_in_bytes (type) < UNITS_PER_WORD)
9229 return upward;
9230
d3704c46
KH
9231 /* Fall back to the default. */
9232 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
0d7839da 9233}
686f3bf0 9234
c47c29c8
L
9235/* Emit text to declare externally defined variables and functions, because
9236 the Intel assembler does not support undefined externals. */
686f3bf0 9237
c47c29c8
L
9238void
9239ia64_asm_output_external (FILE *file, tree decl, const char *name)
686f3bf0 9240{
c47c29c8
L
9241 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
9242 set in order to avoid putting out names that are never really
9243 used. */
9244 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
686f3bf0 9245 {
c47c29c8 9246 /* maybe_assemble_visibility will return 1 if the assembler
2e226e66 9247 visibility directive is output. */
c47c29c8
L
9248 int need_visibility = ((*targetm.binds_local_p) (decl)
9249 && maybe_assemble_visibility (decl));
57d4f65c 9250
c47c29c8
L
9251 /* GNU as does not need anything here, but the HP linker does
9252 need something for external functions. */
9253 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
9254 && TREE_CODE (decl) == FUNCTION_DECL)
812b587e 9255 (*targetm.asm_out.globalize_decl_name) (file, decl);
c47c29c8
L
9256 else if (need_visibility && !TARGET_GNU_AS)
9257 (*targetm.asm_out.globalize_label) (file, name);
686f3bf0
SE
9258 }
9259}
9260
1f7aa7cd 9261/* Set SImode div/mod functions, init_integral_libfuncs only initializes
6bc709c1
L
9262 modes of word_mode and larger. Rename the TFmode libfuncs using the
9263 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
9264 backward compatibility. */
1f7aa7cd
SE
9265
9266static void
9267ia64_init_libfuncs (void)
9268{
9269 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
9270 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
9271 set_optab_libfunc (smod_optab, SImode, "__modsi3");
9272 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
6bc709c1
L
9273
9274 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
9275 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
9276 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
9277 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
9278 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
9279
9280 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
9281 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
9282 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
9283 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
9284 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
9285 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
9286
9287 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
9288 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
4a73d865 9289 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
6bc709c1
L
9290 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
9291 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
9292
9293 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
9294 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
4a73d865 9295 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
2a3ebe77
JM
9296 /* HP-UX 11.23 libc does not have a function for unsigned
9297 SImode-to-TFmode conversion. */
9298 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
1f7aa7cd
SE
9299}
9300
c15c90bb 9301/* Rename all the TFmode libfuncs using the HPUX conventions. */
738e7b39 9302
c15c90bb
ZW
9303static void
9304ia64_hpux_init_libfuncs (void)
9305{
1f7aa7cd
SE
9306 ia64_init_libfuncs ();
9307
bdbba3c2
SE
9308 /* The HP SI millicode division and mod functions expect DI arguments.
9309 By turning them off completely we avoid using both libgcc and the
9310 non-standard millicode routines and use the HP DI millicode routines
9311 instead. */
9312
9313 set_optab_libfunc (sdiv_optab, SImode, 0);
9314 set_optab_libfunc (udiv_optab, SImode, 0);
9315 set_optab_libfunc (smod_optab, SImode, 0);
9316 set_optab_libfunc (umod_optab, SImode, 0);
9317
9318 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
9319 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
9320 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
9321 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
9322
9323 /* HP-UX libc has TF min/max/abs routines in it. */
c15c90bb
ZW
9324 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
9325 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
9326 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
c15c90bb 9327
24ea7948
ZW
9328 /* ia64_expand_compare uses this. */
9329 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
9330
9331 /* These should never be used. */
9332 set_optab_libfunc (eq_optab, TFmode, 0);
9333 set_optab_libfunc (ne_optab, TFmode, 0);
9334 set_optab_libfunc (gt_optab, TFmode, 0);
9335 set_optab_libfunc (ge_optab, TFmode, 0);
9336 set_optab_libfunc (lt_optab, TFmode, 0);
9337 set_optab_libfunc (le_optab, TFmode, 0);
c15c90bb 9338}
738e7b39
RK
9339
9340/* Rename the division and modulus functions in VMS. */
9341
9342static void
9343ia64_vms_init_libfuncs (void)
9344{
9345 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9346 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9347 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9348 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9349 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9350 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9351 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9352 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9353}
6bc709c1
L
9354
9355/* Rename the TFmode libfuncs available from soft-fp in glibc using
9356 the HPUX conventions. */
9357
9358static void
9359ia64_sysv4_init_libfuncs (void)
9360{
9361 ia64_init_libfuncs ();
9362
9363 /* These functions are not part of the HPUX TFmode interface. We
9364 use them instead of _U_Qfcmp, which doesn't work the way we
9365 expect. */
9366 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
9367 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
9368 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
9369 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
9370 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
9371 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
9372
9373 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
9374 glibc doesn't have them. */
9375}
ae46c4e0 9376\f
9b580a0b
RH
9377/* For HPUX, it is illegal to have relocations in shared segments. */
9378
9379static int
9380ia64_hpux_reloc_rw_mask (void)
9381{
9382 return 3;
9383}
9384
9385/* For others, relax this so that relocations to local data goes in
9386 read-only segments, but we still cannot allow global relocations
9387 in read-only segments. */
9388
9389static int
9390ia64_reloc_rw_mask (void)
9391{
9392 return flag_pic ? 3 : 2;
9393}
9394
d6b5193b
RS
9395/* Return the section to use for X. The only special thing we do here
9396 is to honor small data. */
b64a1b53 9397
d6b5193b 9398static section *
9c808aad
AJ
9399ia64_select_rtx_section (enum machine_mode mode, rtx x,
9400 unsigned HOST_WIDE_INT align)
b64a1b53
RH
9401{
9402 if (GET_MODE_SIZE (mode) > 0
1f4a2e84
SE
9403 && GET_MODE_SIZE (mode) <= ia64_section_threshold
9404 && !TARGET_NO_SDATA)
d6b5193b 9405 return sdata_section;
b64a1b53 9406 else
d6b5193b 9407 return default_elf_select_rtx_section (mode, x, align);
b64a1b53
RH
9408}
9409
1e1bd14e 9410static unsigned int
abb8b19a
AM
9411ia64_section_type_flags (tree decl, const char *name, int reloc)
9412{
9413 unsigned int flags = 0;
9414
9415 if (strcmp (name, ".sdata") == 0
9416 || strncmp (name, ".sdata.", 7) == 0
9417 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9418 || strncmp (name, ".sdata2.", 8) == 0
9419 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
9420 || strcmp (name, ".sbss") == 0
9421 || strncmp (name, ".sbss.", 6) == 0
9422 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9423 flags = SECTION_SMALL;
9424
9b580a0b 9425 flags |= default_section_type_flags (decl, name, reloc);
abb8b19a 9426 return flags;
1e1bd14e
RH
9427}
9428
57782ad8
MM
9429/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
9430 structure type and that the address of that type should be passed
9431 in out0, rather than in r8. */
9432
9433static bool
9434ia64_struct_retval_addr_is_first_parm_p (tree fntype)
9435{
9436 tree ret_type = TREE_TYPE (fntype);
9437
9438 /* The Itanium C++ ABI requires that out0, rather than r8, be used
9439 as the structure return address parameter, if the return value
9440 type has a non-trivial copy constructor or destructor. It is not
9441 clear if this same convention should be used for other
9442 programming languages. Until G++ 3.4, we incorrectly used r8 for
9443 these return values. */
9444 return (abi_version_at_least (2)
9445 && ret_type
9446 && TYPE_MODE (ret_type) == BLKmode
9447 && TREE_ADDRESSABLE (ret_type)
9448 && strcmp (lang_hooks.name, "GNU C++") == 0);
9449}
1e1bd14e 9450
5f13cfc6
RH
9451/* Output the assembler code for a thunk function. THUNK_DECL is the
9452 declaration for the thunk function itself, FUNCTION is the decl for
9453 the target function. DELTA is an immediate constant offset to be
272d0bee 9454 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
9455 *(*this + vcall_offset) should be added to THIS. */
9456
c590b625 9457static void
9c808aad
AJ
9458ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9459 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9460 tree function)
483ab821 9461{
5f13cfc6 9462 rtx this, insn, funexp;
57782ad8
MM
9463 unsigned int this_parmno;
9464 unsigned int this_regno;
13f70342 9465 rtx delta_rtx;
5f13cfc6 9466
599aedd9 9467 reload_completed = 1;
fe3ad572 9468 epilogue_completed = 1;
599aedd9 9469
5f13cfc6
RH
9470 /* Set things up as ia64_expand_prologue might. */
9471 last_scratch_gr_reg = 15;
9472
9473 memset (&current_frame_info, 0, sizeof (current_frame_info));
9474 current_frame_info.spill_cfa_off = -16;
9475 current_frame_info.n_input_regs = 1;
9476 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
9477
5f13cfc6 9478 /* Mark the end of the (empty) prologue. */
2e040219 9479 emit_note (NOTE_INSN_PROLOGUE_END);
5f13cfc6 9480
57782ad8
MM
9481 /* Figure out whether "this" will be the first parameter (the
9482 typical case) or the second parameter (as happens when the
9483 virtual function returns certain class objects). */
9484 this_parmno
9485 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
9486 ? 1 : 0);
9487 this_regno = IN_REG (this_parmno);
9488 if (!TARGET_REG_NAMES)
9489 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
9490
9491 this = gen_rtx_REG (Pmode, this_regno);
13f70342
RH
9492
9493 /* Apply the constant offset, if required. */
9494 delta_rtx = GEN_INT (delta);
36c216e5
MM
9495 if (TARGET_ILP32)
9496 {
57782ad8 9497 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
36c216e5 9498 REG_POINTER (tmp) = 1;
13f70342 9499 if (delta && satisfies_constraint_I (delta_rtx))
36c216e5 9500 {
13f70342 9501 emit_insn (gen_ptr_extend_plus_imm (this, tmp, delta_rtx));
36c216e5
MM
9502 delta = 0;
9503 }
9504 else
9505 emit_insn (gen_ptr_extend (this, tmp));
9506 }
5f13cfc6
RH
9507 if (delta)
9508 {
13f70342 9509 if (!satisfies_constraint_I (delta_rtx))
5f13cfc6
RH
9510 {
9511 rtx tmp = gen_rtx_REG (Pmode, 2);
9512 emit_move_insn (tmp, delta_rtx);
9513 delta_rtx = tmp;
9514 }
9515 emit_insn (gen_adddi3 (this, this, delta_rtx));
9516 }
9517
9518 /* Apply the offset from the vtable, if required. */
9519 if (vcall_offset)
9520 {
9521 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
9522 rtx tmp = gen_rtx_REG (Pmode, 2);
9523
36c216e5
MM
9524 if (TARGET_ILP32)
9525 {
9526 rtx t = gen_rtx_REG (ptr_mode, 2);
9527 REG_POINTER (t) = 1;
9528 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
13f70342 9529 if (satisfies_constraint_I (vcall_offset_rtx))
36c216e5 9530 {
13f70342 9531 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
36c216e5
MM
9532 vcall_offset = 0;
9533 }
9534 else
9535 emit_insn (gen_ptr_extend (tmp, t));
9536 }
9537 else
9538 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
5f13cfc6 9539
36c216e5 9540 if (vcall_offset)
5f13cfc6 9541 {
13f70342 9542 if (!satisfies_constraint_J (vcall_offset_rtx))
36c216e5
MM
9543 {
9544 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9545 emit_move_insn (tmp2, vcall_offset_rtx);
9546 vcall_offset_rtx = tmp2;
9547 }
9548 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
5f13cfc6 9549 }
5f13cfc6 9550
36c216e5 9551 if (TARGET_ILP32)
13f70342 9552 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
36c216e5
MM
9553 else
9554 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
5f13cfc6
RH
9555
9556 emit_insn (gen_adddi3 (this, this, tmp));
9557 }
9558
9559 /* Generate a tail call to the target function. */
9560 if (! TREE_USED (function))
9561 {
9562 assemble_external (function);
9563 TREE_USED (function) = 1;
9564 }
9565 funexp = XEXP (DECL_RTL (function), 0);
9566 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9567 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9568 insn = get_last_insn ();
9569 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
9570
9571 /* Code generation for calls relies on splitting. */
9572 reload_completed = 1;
fe3ad572 9573 epilogue_completed = 1;
599aedd9
RH
9574 try_split (PATTERN (insn), insn, 0);
9575
5f13cfc6
RH
9576 emit_barrier ();
9577
9578 /* Run just enough of rest_of_compilation to get the insns emitted.
9579 There's not really enough bulk here to make other passes such as
9580 instruction scheduling worth while. Note that use_thunk calls
9581 assemble_start_function and assemble_end_function. */
599aedd9 9582
55e092c4 9583 insn_locators_alloc ();
18dbd950 9584 emit_all_insn_group_barriers (NULL);
5f13cfc6 9585 insn = get_insns ();
5f13cfc6
RH
9586 shorten_branches (insn);
9587 final_start_function (insn, file, 1);
c9d691e9 9588 final (insn, file, 1);
5f13cfc6 9589 final_end_function ();
599aedd9
RH
9590
9591 reload_completed = 0;
fe3ad572 9592 epilogue_completed = 0;
483ab821
MM
9593}
9594
351a758b
KH
9595/* Worker function for TARGET_STRUCT_VALUE_RTX. */
9596
9597static rtx
57782ad8 9598ia64_struct_value_rtx (tree fntype,
351a758b
KH
9599 int incoming ATTRIBUTE_UNUSED)
9600{
8d04e6db 9601 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
57782ad8 9602 return NULL_RTX;
351a758b
KH
9603 return gen_rtx_REG (Pmode, GR_REG (8));
9604}
9605
88ed5ef5
SE
9606static bool
9607ia64_scalar_mode_supported_p (enum machine_mode mode)
9608{
9609 switch (mode)
9610 {
9611 case QImode:
9612 case HImode:
9613 case SImode:
9614 case DImode:
9615 case TImode:
9616 return true;
9617
9618 case SFmode:
9619 case DFmode:
9620 case XFmode:
4de67c26 9621 case RFmode:
88ed5ef5
SE
9622 return true;
9623
9624 case TFmode:
9625 return TARGET_HPUX;
9626
9627 default:
9628 return false;
9629 }
9630}
9631
f61134e8
RH
9632static bool
9633ia64_vector_mode_supported_p (enum machine_mode mode)
9634{
9635 switch (mode)
9636 {
9637 case V8QImode:
9638 case V4HImode:
9639 case V2SImode:
9640 return true;
9641
9642 case V2SFmode:
9643 return true;
9644
9645 default:
9646 return false;
9647 }
9648}
9649
694a2f6e
EB
9650/* Implement the FUNCTION_PROFILER macro. */
9651
2b4f149b
RH
9652void
9653ia64_output_function_profiler (FILE *file, int labelno)
9654{
694a2f6e
EB
9655 bool indirect_call;
9656
9657 /* If the function needs a static chain and the static chain
9658 register is r15, we use an indirect call so as to bypass
9659 the PLT stub in case the executable is dynamically linked,
9660 because the stub clobbers r15 as per 5.3.6 of the psABI.
9661 We don't need to do that in non canonical PIC mode. */
9662
9663 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
9664 {
9665 gcc_assert (STATIC_CHAIN_REGNUM == 15);
9666 indirect_call = true;
9667 }
9668 else
9669 indirect_call = false;
9670
2b4f149b
RH
9671 if (TARGET_GNU_AS)
9672 fputs ("\t.prologue 4, r40\n", file);
9673 else
9674 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
9675 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
bd8633a3
RH
9676
9677 if (NO_PROFILE_COUNTERS)
694a2f6e 9678 fputs ("\tmov out3 = r0\n", file);
bd8633a3
RH
9679 else
9680 {
9681 char buf[20];
9682 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9683
9684 if (TARGET_AUTO_PIC)
9685 fputs ("\tmovl out3 = @gprel(", file);
9686 else
9687 fputs ("\taddl out3 = @ltoff(", file);
9688 assemble_name (file, buf);
9689 if (TARGET_AUTO_PIC)
694a2f6e 9690 fputs (")\n", file);
bd8633a3 9691 else
694a2f6e 9692 fputs ("), r1\n", file);
bd8633a3
RH
9693 }
9694
694a2f6e
EB
9695 if (indirect_call)
9696 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
9697 fputs ("\t;;\n", file);
9698
2b4f149b 9699 fputs ("\t.save rp, r42\n", file);
bd8633a3 9700 fputs ("\tmov out2 = b0\n", file);
694a2f6e
EB
9701 if (indirect_call)
9702 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
2b4f149b 9703 fputs ("\t.body\n", file);
2b4f149b 9704 fputs ("\tmov out1 = r1\n", file);
694a2f6e
EB
9705 if (indirect_call)
9706 {
9707 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
9708 fputs ("\tmov b6 = r16\n", file);
9709 fputs ("\tld8 r1 = [r14]\n", file);
9710 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
9711 }
9712 else
9713 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
2b4f149b
RH
9714}
9715
d26afa4f
SE
9716static GTY(()) rtx mcount_func_rtx;
9717static rtx
9718gen_mcount_func_rtx (void)
9719{
9720 if (!mcount_func_rtx)
9721 mcount_func_rtx = init_one_libfunc ("_mcount");
9722 return mcount_func_rtx;
9723}
9724
9725void
9726ia64_profile_hook (int labelno)
9727{
9728 rtx label, ip;
9729
9730 if (NO_PROFILE_COUNTERS)
9731 label = const0_rtx;
9732 else
9733 {
9734 char buf[30];
9735 const char *label_name;
9736 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9737 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
9738 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
9739 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
9740 }
9741 ip = gen_reg_rtx (Pmode);
9742 emit_insn (gen_ip_value (ip));
9743 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
9744 VOIDmode, 3,
9745 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
9746 ip, Pmode,
9747 label, Pmode);
9748}
9749
cac24f06
JM
9750/* Return the mangling of TYPE if it is an extended fundamental type. */
9751
9752static const char *
608063c3 9753ia64_mangle_type (tree type)
cac24f06 9754{
608063c3
JB
9755 type = TYPE_MAIN_VARIANT (type);
9756
9757 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
9758 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
9759 return NULL;
9760
cac24f06
JM
9761 /* On HP-UX, "long double" is mangled as "e" so __float128 is
9762 mangled as "e". */
9763 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
9764 return "g";
9765 /* On HP-UX, "e" is not available as a mangling of __float80 so use
9766 an extended mangling. Elsewhere, "e" is available since long
9767 double is 80 bits. */
9768 if (TYPE_MODE (type) == XFmode)
9769 return TARGET_HPUX ? "u9__float80" : "e";
4de67c26
JM
9770 if (TYPE_MODE (type) == RFmode)
9771 return "u7__fpreg";
9772 return NULL;
9773}
9774
9775/* Return the diagnostic message string if conversion from FROMTYPE to
9776 TOTYPE is not allowed, NULL otherwise. */
9777static const char *
9778ia64_invalid_conversion (tree fromtype, tree totype)
9779{
9780 /* Reject nontrivial conversion to or from __fpreg. */
9781 if (TYPE_MODE (fromtype) == RFmode
9782 && TYPE_MODE (totype) != RFmode
9783 && TYPE_MODE (totype) != VOIDmode)
9784 return N_("invalid conversion from %<__fpreg%>");
9785 if (TYPE_MODE (totype) == RFmode
9786 && TYPE_MODE (fromtype) != RFmode)
9787 return N_("invalid conversion to %<__fpreg%>");
9788 return NULL;
9789}
9790
9791/* Return the diagnostic message string if the unary operation OP is
9792 not permitted on TYPE, NULL otherwise. */
9793static const char *
9794ia64_invalid_unary_op (int op, tree type)
9795{
9796 /* Reject operations on __fpreg other than unary + or &. */
9797 if (TYPE_MODE (type) == RFmode
9798 && op != CONVERT_EXPR
9799 && op != ADDR_EXPR)
9800 return N_("invalid operation on %<__fpreg%>");
9801 return NULL;
9802}
9803
9804/* Return the diagnostic message string if the binary operation OP is
9805 not permitted on TYPE1 and TYPE2, NULL otherwise. */
9806static const char *
9807ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, tree type1, tree type2)
9808{
9809 /* Reject operations on __fpreg. */
9810 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
9811 return N_("invalid operation on %<__fpreg%>");
cac24f06
JM
9812 return NULL;
9813}
9814
bb83aa4b
MK
9815/* Implement overriding of the optimization options. */
9816void
9817ia64_optimization_options (int level ATTRIBUTE_UNUSED,
9818 int size ATTRIBUTE_UNUSED)
9819{
9820 /* Let the scheduler form additional regions. */
9821 set_param_value ("max-sched-extend-regions-iters", 2);
47eb5b32
ZD
9822
9823 /* Set the default values for cache-related parameters. */
9824 set_param_value ("simultaneous-prefetches", 6);
9825 set_param_value ("l1-cache-line-size", 32);
9826
bb83aa4b
MK
9827}
9828
812b587e
SE
9829/* HP-UX version_id attribute.
9830 For object foo, if the version_id is set to 1234 put out an alias
9831 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
9832 other than an alias statement because it is an illegal symbol name. */
9833
9834static tree
9835ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
9836 tree name ATTRIBUTE_UNUSED,
9837 tree args,
9838 int flags ATTRIBUTE_UNUSED,
9839 bool *no_add_attrs)
9840{
9841 tree arg = TREE_VALUE (args);
9842
9843 if (TREE_CODE (arg) != STRING_CST)
9844 {
9845 error("version attribute is not a string");
9846 *no_add_attrs = true;
9847 return NULL_TREE;
9848 }
9849 return NULL_TREE;
9850}
9851
e2500fed 9852#include "gt-ia64.h"