]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/ia64.c
This patch rewrites the old VEC macro-based interface into a new one
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
ac445222 1/* Definitions of target machine for GNU compiler.
cfaf579d 2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
fba5dd52 3 2009, 2010, 2011
cd90919d 4 Free Software Foundation, Inc.
ac445222 5 Contributed by James E. Wilson <wilson@cygnus.com> and
b40da9a7 6 David Mosberger <davidm@hpl.hp.com>.
ac445222 7
73f45a5c 8This file is part of GCC.
ac445222 9
73f45a5c 10GCC is free software; you can redistribute it and/or modify
ac445222 11it under the terms of the GNU General Public License as published by
038d1e19 12the Free Software Foundation; either version 3, or (at your option)
ac445222 13any later version.
14
73f45a5c 15GCC is distributed in the hope that it will be useful,
ac445222 16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
038d1e19 21along with GCC; see the file COPYING3. If not see
22<http://www.gnu.org/licenses/>. */
ac445222 23
ac445222 24#include "config.h"
7ba6df4b 25#include "system.h"
805e22b2 26#include "coretypes.h"
27#include "tm.h"
ac445222 28#include "rtl.h"
29#include "tree.h"
ac445222 30#include "regs.h"
31#include "hard-reg-set.h"
ac445222 32#include "insn-config.h"
33#include "conditions.h"
ac445222 34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "recog.h"
38#include "expr.h"
d8fc4d0b 39#include "optabs.h"
ac445222 40#include "except.h"
41#include "function.h"
42#include "ggc.h"
43#include "basic-block.h"
8b8d3752 44#include "libfuncs.h"
0b205f4c 45#include "diagnostic-core.h"
a0bb0d20 46#include "sched-int.h"
703f29bc 47#include "timevar.h"
a767736d 48#include "target.h"
49#include "target-def.h"
28d5c3d9 50#include "common/common-target.h"
60e8331c 51#include "tm_p.h"
58ada791 52#include "hashtab.h"
19316b5e 53#include "langhooks.h"
75a70cf9 54#include "gimple.h"
7a979707 55#include "intl.h"
3072d30e 56#include "df.h"
34986748 57#include "debug.h"
475a74e0 58#include "params.h"
3072d30e 59#include "dbgcnt.h"
269f7060 60#include "tm-constrs.h"
cf7898a6 61#include "sel-sched.h"
f501ee23 62#include "reload.h"
fba5dd52 63#include "opts.h"
b9ed1410 64#include "dumpfile.h"
ac445222 65
66/* This is used for communication between ASM_OUTPUT_LABEL and
67 ASM_OUTPUT_LABELREF. */
68int ia64_asm_output_label = 0;
69
ac445222 70/* Register names for ia64_expand_prologue. */
9b06caff 71static const char * const ia64_reg_numbers[96] =
ac445222 72{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81 "r104","r105","r106","r107","r108","r109","r110","r111",
82 "r112","r113","r114","r115","r116","r117","r118","r119",
83 "r120","r121","r122","r123","r124","r125","r126","r127"};
84
85/* ??? These strings could be shared with REGISTER_NAMES. */
9b06caff 86static const char * const ia64_input_reg_names[8] =
ac445222 87{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
88
89/* ??? These strings could be shared with REGISTER_NAMES. */
9b06caff 90static const char * const ia64_local_reg_names[80] =
ac445222 91{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101
102/* ??? These strings could be shared with REGISTER_NAMES. */
9b06caff 103static const char * const ia64_output_reg_names[8] =
ac445222 104{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105
ac445222 106/* Variables which are this size or smaller are put in the sdata/sbss
107 sections. */
108
9b06caff 109unsigned int ia64_section_threshold;
58ada791 110
111/* The following variable is used by the DFA insn scheduler. The value is
112 TRUE if we do insn bundling instead of insn scheduling. */
113int bundling_p = 0;
114
3072d30e 115enum ia64_frame_regs
116{
117 reg_fp,
118 reg_save_b0,
119 reg_save_pr,
120 reg_save_ar_pfs,
121 reg_save_ar_unat,
122 reg_save_ar_lc,
123 reg_save_gp,
124 number_of_ia64_frame_regs
125};
126
e13693ec 127/* Structure to be filled in by ia64_compute_frame_size with register
128 save masks and offsets for the current function. */
129
130struct ia64_frame_info
131{
132 HOST_WIDE_INT total_size; /* size of the stack frame, not including
133 the caller's scratch area. */
134 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
135 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
136 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
137 HARD_REG_SET mask; /* mask of saved registers. */
b40da9a7 138 unsigned int gr_used_mask; /* mask of registers in use as gr spill
e13693ec 139 registers or long-term scratches. */
140 int n_spilled; /* number of spilled registers. */
3072d30e 141 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
e13693ec 142 int n_input_regs; /* number of input registers used. */
143 int n_local_regs; /* number of local registers used. */
144 int n_output_regs; /* number of output registers used. */
145 int n_rotate_regs; /* number of rotating registers used. */
146
147 char need_regstk; /* true if a .regstk directive needed. */
148 char initialized; /* true if the data is finalized. */
149};
150
151/* Current frame information calculated by ia64_compute_frame_size. */
152static struct ia64_frame_info current_frame_info;
3072d30e 153/* The actual registers that are emitted. */
154static int emitted_frame_related_regs[number_of_ia64_frame_regs];
9b06caff 155\f
b40da9a7 156static int ia64_first_cycle_multipass_dfa_lookahead (void);
157static void ia64_dependencies_evaluation_hook (rtx, rtx);
158static void ia64_init_dfa_pre_cycle_insn (void);
159static rtx ia64_dfa_pre_cycle_insn (void);
160static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
a9f1838b 161static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
b40da9a7 162static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
ea13ae4c 163static void ia64_h_i_d_extended (void);
cf7898a6 164static void * ia64_alloc_sched_context (void);
165static void ia64_init_sched_context (void *, bool);
166static void ia64_set_sched_context (void *);
167static void ia64_clear_sched_context (void *);
168static void ia64_free_sched_context (void *);
ea13ae4c 169static int ia64_mode_to_int (enum machine_mode);
170static void ia64_set_sched_flags (spec_info_t);
cf7898a6 171static ds_t ia64_get_insn_spec_ds (rtx);
172static ds_t ia64_get_insn_checked_ds (rtx);
173static bool ia64_skip_rtx_p (const_rtx);
ea13ae4c 174static int ia64_speculate_insn (rtx, ds_t, rtx *);
cf7898a6 175static bool ia64_needs_block_p (int);
176static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
ea13ae4c 177static int ia64_spec_check_p (rtx);
178static int ia64_spec_check_src_p (rtx);
b40da9a7 179static rtx gen_tls_get_addr (void);
180static rtx gen_thread_pointer (void);
3072d30e 181static int find_gr_spill (enum ia64_frame_regs, int);
b40da9a7 182static int next_scratch_gr_reg (void);
183static void mark_reg_gr_used_mask (rtx, void *);
184static void ia64_compute_frame_size (HOST_WIDE_INT);
185static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
186static void finish_spill_pointers (void);
187static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
188static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
189static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
190static rtx gen_movdi_x (rtx, rtx, rtx);
191static rtx gen_fr_spill_x (rtx, rtx, rtx);
192static rtx gen_fr_restore_x (rtx, rtx, rtx);
193
e81d24dd 194static void ia64_option_override (void);
cd90919d 195static bool ia64_can_eliminate (const int, const int);
fb80456a 196static enum machine_mode hfa_element_mode (const_tree, bool);
39cba157 197static void ia64_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
ac9a2599 198 tree, int *, int);
39cba157 199static int ia64_arg_partial_bytes (cumulative_args_t, enum machine_mode,
f054eb3c 200 tree, bool);
39cba157 201static rtx ia64_function_arg_1 (cumulative_args_t, enum machine_mode,
4bac51c9 202 const_tree, bool, bool);
39cba157 203static rtx ia64_function_arg (cumulative_args_t, enum machine_mode,
4bac51c9 204 const_tree, bool);
39cba157 205static rtx ia64_function_incoming_arg (cumulative_args_t,
4bac51c9 206 enum machine_mode, const_tree, bool);
39cba157 207static void ia64_function_arg_advance (cumulative_args_t, enum machine_mode,
4bac51c9 208 const_tree, bool);
bd99ba64 209static unsigned int ia64_function_arg_boundary (enum machine_mode,
210 const_tree);
b40da9a7 211static bool ia64_function_ok_for_sibcall (tree, tree);
fb80456a 212static bool ia64_return_in_memory (const_tree, const_tree);
15f3e541 213static rtx ia64_function_value (const_tree, const_tree, bool);
214static rtx ia64_libcall_value (enum machine_mode, const_rtx);
215static bool ia64_function_value_regno_p (const unsigned int);
3a184ccc 216static int ia64_register_move_cost (enum machine_mode, reg_class_t,
217 reg_class_t);
f501ee23 218static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
219 bool);
20d892d1 220static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
77ad8e5a 221static int ia64_unspec_may_trap_p (const_rtx, unsigned);
b40da9a7 222static void fix_range (const char *);
223static struct machine_function * ia64_init_machine_status (void);
224static void emit_insn_group_barriers (FILE *);
225static void emit_all_insn_group_barriers (FILE *);
226static void final_emit_insn_group_barriers (FILE *);
227static void emit_predicate_relation_info (void);
228static void ia64_reorg (void);
a9f1838b 229static bool ia64_in_small_data_p (const_tree);
34986748 230static void process_epilogue (FILE *, rtx, bool, bool);
b40da9a7 231
b40da9a7 232static bool ia64_assemble_integer (rtx, unsigned int, int);
233static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
234static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
235static void ia64_output_function_end_prologue (FILE *);
236
b486e21a 237static void ia64_print_operand (FILE *, rtx, int);
238static void ia64_print_operand_address (FILE *, rtx);
239static bool ia64_print_operand_punct_valid_p (unsigned char code);
240
b40da9a7 241static int ia64_issue_rate (void);
cf7898a6 242static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
b40da9a7 243static void ia64_sched_init (FILE *, int, int);
ea13ae4c 244static void ia64_sched_init_global (FILE *, int, int);
245static void ia64_sched_finish_global (FILE *, int);
b40da9a7 246static void ia64_sched_finish (FILE *, int);
247static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
248static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
249static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
250static int ia64_variable_issue (FILE *, int, rtx, int);
251
37966699 252static void ia64_asm_unwind_emit (FILE *, rtx);
253static void ia64_asm_emit_except_personality (rtx);
254static void ia64_asm_init_sections (void);
255
cc7d6aed 256static enum unwind_info_type ia64_debug_unwind_info (void);
cc7d6aed 257
b40da9a7 258static struct bundle_state *get_free_bundle_state (void);
259static void free_bundle_state (struct bundle_state *);
260static void initiate_bundle_states (void);
261static void finish_bundle_states (void);
262static unsigned bundle_state_hash (const void *);
263static int bundle_state_eq_p (const void *, const void *);
264static int insert_bundle_state (struct bundle_state *);
265static void initiate_bundle_state_table (void);
266static void finish_bundle_state_table (void);
267static int try_issue_nops (struct bundle_state *, int);
268static int try_issue_insn (struct bundle_state *, rtx);
269static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
270static int get_max_pos (state_t);
271static int get_template (state_t, int);
272
273static rtx get_next_important_insn (rtx, rtx);
cf7898a6 274static bool important_for_bundling_p (rtx);
28d5c3d9 275static bool unknown_for_bundling_p (rtx);
b40da9a7 276static void bundling (FILE *, int, rtx, rtx);
277
278static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
279 HOST_WIDE_INT, tree);
280static void ia64_file_start (void);
9afff52d 281static void ia64_globalize_decl_name (FILE *, tree);
b40da9a7 282
4e151b05 283static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
284static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
2f14b1f9 285static section *ia64_select_rtx_section (enum machine_mode, rtx,
286 unsigned HOST_WIDE_INT);
40af64cc 287static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
288 ATTRIBUTE_UNUSED;
64717bc5 289static unsigned int ia64_section_type_flags (tree, const char *, int);
ea5219c2 290static void ia64_init_libfuncs (void)
291 ATTRIBUTE_UNUSED;
f2f543a3 292static void ia64_hpux_init_libfuncs (void)
293 ATTRIBUTE_UNUSED;
0a5425ff 294static void ia64_sysv4_init_libfuncs (void)
295 ATTRIBUTE_UNUSED;
66c17c96 296static void ia64_vms_init_libfuncs (void)
297 ATTRIBUTE_UNUSED;
8b21beb2 298static void ia64_soft_fp_init_libfuncs (void)
299 ATTRIBUTE_UNUSED;
8b8d3752 300static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
301 ATTRIBUTE_UNUSED;
fd42340b 302static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
303 ATTRIBUTE_UNUSED;
f6940372 304
b8629bcb 305static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
9afff52d 306static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
b8629bcb 307static void ia64_encode_section_info (tree, rtx, int);
ac9a2599 308static rtx ia64_struct_value_rtx (tree, int);
75a70cf9 309static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
e64e1ea1 310static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
a5c5f9d3 311static bool ia64_vector_mode_supported_p (enum machine_mode mode);
ca316360 312static bool ia64_legitimate_constant_p (enum machine_mode, rtx);
06e8431b 313static bool ia64_legitimate_address_p (enum machine_mode, rtx, bool);
7d7d7bd2 314static bool ia64_cannot_force_const_mem (enum machine_mode, rtx);
a9f1838b 315static const char *ia64_mangle_type (const_tree);
316static const char *ia64_invalid_conversion (const_tree, const_tree);
317static const char *ia64_invalid_unary_op (int, const_tree);
318static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
0ddb2244 319static enum machine_mode ia64_c_mode_for_suffix (char);
7193d01d 320static void ia64_trampoline_init (rtx, tree, rtx);
4bec06b3 321static void ia64_override_options_after_change (void);
f91ed644 322static bool ia64_member_type_forces_blk (const_tree, enum machine_mode);
585d208e 323
d889cd75 324static tree ia64_builtin_decl (unsigned, bool);
3f60eeb4 325
326static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
4bac51c9 327static enum machine_mode ia64_get_reg_raw_mode (int regno);
3b73548b 328static section * ia64_hpux_function_section (tree, enum node_frequency,
329 bool, bool);
b155a608 330
331static bool ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
332 const unsigned char *sel);
333
334#define MAX_VECT_LEN 8
335
336struct expand_vec_perm_d
337{
338 rtx target, op0, op1;
339 unsigned char perm[MAX_VECT_LEN];
340 enum machine_mode vmode;
341 unsigned char nelt;
342 bool one_operand_p;
343 bool testing_p;
344};
345
346static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
347
a767736d 348\f
59ec9229 349/* Table of valid machine attributes. */
350static const struct attribute_spec ia64_attribute_table[] =
351{
ac86af5d 352 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
353 affects_type_identity } */
354 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
355 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
356 false },
fd42340b 357#if TARGET_ABI_OPEN_VMS
ac86af5d 358 { "common_object", 1, 1, true, false, false,
359 ia64_vms_common_object_attribute, false },
fd42340b 360#endif
9afff52d 361 { "version_id", 1, 1, true, false, false,
ac86af5d 362 ia64_handle_version_id_attribute, false },
363 { NULL, 0, 0, false, false, false, NULL, false }
59ec9229 364};
365
a767736d 366/* Initialize the GCC target structure. */
e3c541f0 367#undef TARGET_ATTRIBUTE_TABLE
368#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
a767736d 369
fc2a2dcb 370#undef TARGET_INIT_BUILTINS
371#define TARGET_INIT_BUILTINS ia64_init_builtins
372
373#undef TARGET_EXPAND_BUILTIN
374#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
375
d889cd75 376#undef TARGET_BUILTIN_DECL
377#define TARGET_BUILTIN_DECL ia64_builtin_decl
378
58356836 379#undef TARGET_ASM_BYTE_OP
380#define TARGET_ASM_BYTE_OP "\tdata1\t"
381#undef TARGET_ASM_ALIGNED_HI_OP
382#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
383#undef TARGET_ASM_ALIGNED_SI_OP
384#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
385#undef TARGET_ASM_ALIGNED_DI_OP
386#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
387#undef TARGET_ASM_UNALIGNED_HI_OP
388#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
389#undef TARGET_ASM_UNALIGNED_SI_OP
390#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
391#undef TARGET_ASM_UNALIGNED_DI_OP
392#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
393#undef TARGET_ASM_INTEGER
394#define TARGET_ASM_INTEGER ia64_assemble_integer
395
e81d24dd 396#undef TARGET_OPTION_OVERRIDE
397#define TARGET_OPTION_OVERRIDE ia64_option_override
398
17d9b0c3 399#undef TARGET_ASM_FUNCTION_PROLOGUE
400#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
85ae73e8 401#undef TARGET_ASM_FUNCTION_END_PROLOGUE
402#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
17d9b0c3 403#undef TARGET_ASM_FUNCTION_EPILOGUE
404#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
405
b486e21a 406#undef TARGET_PRINT_OPERAND
407#define TARGET_PRINT_OPERAND ia64_print_operand
408#undef TARGET_PRINT_OPERAND_ADDRESS
409#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
410#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
412
52470889 413#undef TARGET_IN_SMALL_DATA_P
414#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
415
cf7898a6 416#undef TARGET_SCHED_ADJUST_COST_2
417#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
747af5e7 418#undef TARGET_SCHED_ISSUE_RATE
419#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
420#undef TARGET_SCHED_VARIABLE_ISSUE
421#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
422#undef TARGET_SCHED_INIT
423#define TARGET_SCHED_INIT ia64_sched_init
424#undef TARGET_SCHED_FINISH
425#define TARGET_SCHED_FINISH ia64_sched_finish
ea13ae4c 426#undef TARGET_SCHED_INIT_GLOBAL
427#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
428#undef TARGET_SCHED_FINISH_GLOBAL
429#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
747af5e7 430#undef TARGET_SCHED_REORDER
431#define TARGET_SCHED_REORDER ia64_sched_reorder
432#undef TARGET_SCHED_REORDER2
433#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
747af5e7 434
58ada791 435#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
436#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
437
58ada791 438#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
439#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
440
441#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
442#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
443#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
444#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
445
446#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
447#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
448 ia64_first_cycle_multipass_dfa_lookahead_guard
449
450#undef TARGET_SCHED_DFA_NEW_CYCLE
451#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
452
ea13ae4c 453#undef TARGET_SCHED_H_I_D_EXTENDED
454#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
455
cf7898a6 456#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
457#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
458
459#undef TARGET_SCHED_INIT_SCHED_CONTEXT
460#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
461
462#undef TARGET_SCHED_SET_SCHED_CONTEXT
463#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
464
465#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
466#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
467
468#undef TARGET_SCHED_FREE_SCHED_CONTEXT
469#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
470
ea13ae4c 471#undef TARGET_SCHED_SET_SCHED_FLAGS
472#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
473
cf7898a6 474#undef TARGET_SCHED_GET_INSN_SPEC_DS
475#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
476
477#undef TARGET_SCHED_GET_INSN_CHECKED_DS
478#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
479
ea13ae4c 480#undef TARGET_SCHED_SPECULATE_INSN
481#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
482
483#undef TARGET_SCHED_NEEDS_BLOCK_P
484#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
485
e1ab7874 486#undef TARGET_SCHED_GEN_SPEC_CHECK
cf7898a6 487#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
ea13ae4c 488
489#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
490#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
491 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
492
cf7898a6 493#undef TARGET_SCHED_SKIP_RTX_P
494#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
495
e13693ec 496#undef TARGET_FUNCTION_OK_FOR_SIBCALL
497#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
f054eb3c 498#undef TARGET_ARG_PARTIAL_BYTES
499#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
4bac51c9 500#undef TARGET_FUNCTION_ARG
501#define TARGET_FUNCTION_ARG ia64_function_arg
502#undef TARGET_FUNCTION_INCOMING_ARG
503#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
504#undef TARGET_FUNCTION_ARG_ADVANCE
505#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
bd99ba64 506#undef TARGET_FUNCTION_ARG_BOUNDARY
507#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
e13693ec 508
6988553d 509#undef TARGET_ASM_OUTPUT_MI_THUNK
510#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
eb344f43 511#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
a9f1838b 512#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
6988553d 513
92c473b8 514#undef TARGET_ASM_FILE_START
515#define TARGET_ASM_FILE_START ia64_file_start
516
9afff52d 517#undef TARGET_ASM_GLOBALIZE_DECL_NAME
518#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
519
e6078fbb 520#undef TARGET_REGISTER_MOVE_COST
521#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
f501ee23 522#undef TARGET_MEMORY_MOVE_COST
523#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
fab7adbf 524#undef TARGET_RTX_COSTS
525#define TARGET_RTX_COSTS ia64_rtx_costs
ec0457a8 526#undef TARGET_ADDRESS_COST
d9c5e5f4 527#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
fab7adbf 528
77ad8e5a 529#undef TARGET_UNSPEC_MAY_TRAP_P
530#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
531
2efea8c0 532#undef TARGET_MACHINE_DEPENDENT_REORG
533#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
534
b8629bcb 535#undef TARGET_ENCODE_SECTION_INFO
536#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
537
64717bc5 538#undef TARGET_SECTION_TYPE_FLAGS
539#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
540
40af64cc 541#ifdef HAVE_AS_TLS
542#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
543#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
544#endif
545
ac9a2599 546/* ??? Investigate. */
547#if 0
548#undef TARGET_PROMOTE_PROTOTYPES
549#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
550#endif
551
15f3e541 552#undef TARGET_FUNCTION_VALUE
553#define TARGET_FUNCTION_VALUE ia64_function_value
554#undef TARGET_LIBCALL_VALUE
555#define TARGET_LIBCALL_VALUE ia64_libcall_value
556#undef TARGET_FUNCTION_VALUE_REGNO_P
557#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
558
ac9a2599 559#undef TARGET_STRUCT_VALUE_RTX
560#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
561#undef TARGET_RETURN_IN_MEMORY
562#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
ac9a2599 563#undef TARGET_SETUP_INCOMING_VARARGS
564#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
565#undef TARGET_STRICT_ARGUMENT_NAMING
566#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
0336f0f0 567#undef TARGET_MUST_PASS_IN_STACK
568#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
4bac51c9 569#undef TARGET_GET_RAW_RESULT_MODE
570#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
571#undef TARGET_GET_RAW_ARG_MODE
572#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
ac9a2599 573
f91ed644 574#undef TARGET_MEMBER_TYPE_FORCES_BLK
575#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
576
fcdd3ab3 577#undef TARGET_GIMPLIFY_VA_ARG_EXPR
578#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
579
202d6e5f 580#undef TARGET_ASM_UNWIND_EMIT
37966699 581#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
582#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
583#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
584#undef TARGET_ASM_INIT_SECTIONS
585#define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
8ec87476 586
cc7d6aed 587#undef TARGET_DEBUG_UNWIND_INFO
588#define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
cc7d6aed 589
e64e1ea1 590#undef TARGET_SCALAR_MODE_SUPPORTED_P
591#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
a5c5f9d3 592#undef TARGET_VECTOR_MODE_SUPPORTED_P
593#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
e64e1ea1 594
2ca3d426 595/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
596 in an order different from the specified program order. */
597#undef TARGET_RELAXED_ORDERING
598#define TARGET_RELAXED_ORDERING true
599
ca316360 600#undef TARGET_LEGITIMATE_CONSTANT_P
601#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
06e8431b 602#undef TARGET_LEGITIMATE_ADDRESS_P
603#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
ca316360 604
3ec3c84f 605#undef TARGET_CANNOT_FORCE_CONST_MEM
606#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
607
eddcdde1 608#undef TARGET_MANGLE_TYPE
609#define TARGET_MANGLE_TYPE ia64_mangle_type
4e11e0fc 610
7a979707 611#undef TARGET_INVALID_CONVERSION
612#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
613#undef TARGET_INVALID_UNARY_OP
614#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
615#undef TARGET_INVALID_BINARY_OP
616#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
617
0ddb2244 618#undef TARGET_C_MODE_FOR_SUFFIX
619#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
620
cd90919d 621#undef TARGET_CAN_ELIMINATE
622#define TARGET_CAN_ELIMINATE ia64_can_eliminate
623
7193d01d 624#undef TARGET_TRAMPOLINE_INIT
625#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
626
1dd98d5a 627#undef TARGET_INVALID_WITHIN_DOLOOP
628#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
629
4bec06b3 630#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
631#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
632
3f60eeb4 633#undef TARGET_PREFERRED_RELOAD_CLASS
634#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
635
8a42230a 636#undef TARGET_DELAY_SCHED2
637#define TARGET_DELAY_SCHED2 true
638
639/* Variable tracking should be run after all optimizations which
640 change order of insns. It also needs a valid CFG. */
641#undef TARGET_DELAY_VARTRACK
642#define TARGET_DELAY_VARTRACK true
643
b155a608 644#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
645#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
646
57e4bbfb 647struct gcc_target targetm = TARGET_INITIALIZER;
9b06caff 648\f
b8629bcb 649typedef enum
650 {
651 ADDR_AREA_NORMAL, /* normal address area */
652 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
653 }
654ia64_addr_area;
655
656static GTY(()) tree small_ident1;
657static GTY(()) tree small_ident2;
658
659static void
660init_idents (void)
661{
662 if (small_ident1 == 0)
663 {
664 small_ident1 = get_identifier ("small");
665 small_ident2 = get_identifier ("__small__");
666 }
667}
668
669/* Retrieve the address area that has been chosen for the given decl. */
670
671static ia64_addr_area
672ia64_get_addr_area (tree decl)
673{
674 tree model_attr;
675
676 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
677 if (model_attr)
678 {
679 tree id;
680
681 init_idents ();
682 id = TREE_VALUE (TREE_VALUE (model_attr));
683 if (id == small_ident1 || id == small_ident2)
684 return ADDR_AREA_SMALL;
685 }
686 return ADDR_AREA_NORMAL;
687}
688
689static tree
a5c5f9d3 690ia64_handle_model_attribute (tree *node, tree name, tree args,
691 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
b8629bcb 692{
693 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
694 ia64_addr_area area;
695 tree arg, decl = *node;
696
697 init_idents ();
698 arg = TREE_VALUE (args);
699 if (arg == small_ident1 || arg == small_ident2)
700 {
701 addr_area = ADDR_AREA_SMALL;
702 }
703 else
704 {
67a779df 705 warning (OPT_Wattributes, "invalid argument of %qE attribute",
706 name);
b8629bcb 707 *no_add_attrs = true;
708 }
709
710 switch (TREE_CODE (decl))
711 {
712 case VAR_DECL:
713 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
714 == FUNCTION_DECL)
715 && !TREE_STATIC (decl))
716 {
712d2297 717 error_at (DECL_SOURCE_LOCATION (decl),
718 "an address area attribute cannot be specified for "
719 "local variables");
b8629bcb 720 *no_add_attrs = true;
721 }
722 area = ia64_get_addr_area (decl);
723 if (area != ADDR_AREA_NORMAL && addr_area != area)
724 {
3cf8b391 725 error ("address area of %q+D conflicts with previous "
726 "declaration", decl);
b8629bcb 727 *no_add_attrs = true;
728 }
729 break;
730
731 case FUNCTION_DECL:
712d2297 732 error_at (DECL_SOURCE_LOCATION (decl),
8b52eeb5 733 "address area attribute cannot be specified for "
734 "functions");
b8629bcb 735 *no_add_attrs = true;
736 break;
737
738 default:
67a779df 739 warning (OPT_Wattributes, "%qE attribute ignored",
740 name);
b8629bcb 741 *no_add_attrs = true;
742 break;
743 }
744
745 return NULL_TREE;
746}
747
fd42340b 748/* Part of the low level implementation of DEC Ada pragma Common_Object which
749 enables the shared use of variables stored in overlaid linker areas
750 corresponding to the use of Fortran COMMON. */
751
752static tree
753ia64_vms_common_object_attribute (tree *node, tree name, tree args,
754 int flags ATTRIBUTE_UNUSED,
755 bool *no_add_attrs)
756{
757 tree decl = *node;
3cab3251 758 tree id;
759
760 gcc_assert (DECL_P (decl));
fd42340b 761
762 DECL_COMMON (decl) = 1;
763 id = TREE_VALUE (args);
3cab3251 764 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
fd42340b 765 {
3cab3251 766 error ("%qE attribute requires a string constant argument", name);
fd42340b 767 *no_add_attrs = true;
768 return NULL_TREE;
769 }
fd42340b 770 return NULL_TREE;
771}
772
773/* Part of the low level implementation of DEC Ada pragma Common_Object. */
774
775void
776ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
777 unsigned HOST_WIDE_INT size,
778 unsigned int align)
779{
780 tree attr = DECL_ATTRIBUTES (decl);
781
3cab3251 782 if (attr)
fd42340b 783 attr = lookup_attribute ("common_object", attr);
3cab3251 784 if (attr)
fd42340b 785 {
3cab3251 786 tree id = TREE_VALUE (TREE_VALUE (attr));
787 const char *name;
fd42340b 788
3cab3251 789 if (TREE_CODE (id) == IDENTIFIER_NODE)
790 name = IDENTIFIER_POINTER (id);
791 else if (TREE_CODE (id) == STRING_CST)
792 name = TREE_STRING_POINTER (id);
793 else
794 abort ();
fd42340b 795
3cab3251 796 fprintf (file, "\t.vms_common\t\"%s\",", name);
fd42340b 797 }
3cab3251 798 else
799 fprintf (file, "%s", COMMON_ASM_OP);
fd42340b 800
3cab3251 801 /* Code from elfos.h. */
802 assemble_name (file, name);
803 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u",
804 size, align / BITS_PER_UNIT);
fd42340b 805
3cab3251 806 fputc ('\n', file);
fd42340b 807}
808
b8629bcb 809static void
810ia64_encode_addr_area (tree decl, rtx symbol)
811{
812 int flags;
813
814 flags = SYMBOL_REF_FLAGS (symbol);
815 switch (ia64_get_addr_area (decl))
816 {
817 case ADDR_AREA_NORMAL: break;
818 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
c5c17bca 819 default: gcc_unreachable ();
b8629bcb 820 }
821 SYMBOL_REF_FLAGS (symbol) = flags;
822}
823
824static void
825ia64_encode_section_info (tree decl, rtx rtl, int first)
826{
827 default_encode_section_info (decl, rtl, first);
828
479a430e 829 /* Careful not to prod global register variables. */
b8629bcb 830 if (TREE_CODE (decl) == VAR_DECL
479a430e 831 && GET_CODE (DECL_RTL (decl)) == MEM
832 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
b8629bcb 833 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
834 ia64_encode_addr_area (decl, XEXP (rtl, 0));
835}
836\f
12d750dc 837/* Return 1 if the operands of a move are ok. */
838
839int
b40da9a7 840ia64_move_ok (rtx dst, rtx src)
12d750dc 841{
842 /* If we're under init_recog_no_volatile, we'll not be able to use
843 memory_operand. So check the code directly and don't worry about
844 the validity of the underlying address, which should have been
845 checked elsewhere anyway. */
846 if (GET_CODE (dst) != MEM)
847 return 1;
848 if (GET_CODE (src) == MEM)
849 return 0;
850 if (register_operand (src, VOIDmode))
851 return 1;
852
853 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
854 if (INTEGRAL_MODE_P (GET_MODE (dst)))
855 return src == const0_rtx;
856 else
269f7060 857 return satisfies_constraint_G (src);
12d750dc 858}
7a525da6 859
4d053aca 860/* Return 1 if the operands are ok for a floating point load pair. */
861
862int
863ia64_load_pair_ok (rtx dst, rtx src)
864{
2688e10b 865 /* ??? There is a thinko in the implementation of the "x" constraint and the
866 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
867 also return false for it. */
868 if (GET_CODE (dst) != REG
869 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
4d053aca 870 return 0;
871 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
872 return 0;
873 switch (GET_CODE (XEXP (src, 0)))
874 {
875 case REG:
876 case POST_INC:
877 break;
878 case POST_DEC:
879 return 0;
880 case POST_MODIFY:
881 {
882 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
883
884 if (GET_CODE (adjust) != CONST_INT
885 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
886 return 0;
887 }
888 break;
889 default:
890 abort ();
891 }
892 return 1;
893}
894
19316b5e 895int
b40da9a7 896addp4_optimize_ok (rtx op1, rtx op2)
19316b5e 897{
19316b5e 898 return (basereg_operand (op1, GET_MODE(op1)) !=
899 basereg_operand (op2, GET_MODE(op2)));
900}
901
1d5a21db 902/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
6709362a 903 Return the length of the field, or <= 0 on failure. */
904
905int
b40da9a7 906ia64_depz_field_mask (rtx rop, rtx rshift)
6709362a 907{
908 unsigned HOST_WIDE_INT op = INTVAL (rop);
909 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
910
911 /* Get rid of the zero bits we're shifting in. */
912 op >>= shift;
913
914 /* We must now have a solid block of 1's at bit 0. */
915 return exact_log2 (op + 1);
916}
917
3ec3c84f 918/* Return the TLS model to use for ADDR. */
919
920static enum tls_model
921tls_symbolic_operand_type (rtx addr)
922{
bc620c5c 923 enum tls_model tls_kind = TLS_MODEL_NONE;
3ec3c84f 924
925 if (GET_CODE (addr) == CONST)
926 {
927 if (GET_CODE (XEXP (addr, 0)) == PLUS
928 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
929 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
930 }
931 else if (GET_CODE (addr) == SYMBOL_REF)
932 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
933
934 return tls_kind;
935}
936
06e8431b 937/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
938 as a base register. */
939
940static inline bool
941ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
942{
943 if (strict
944 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
945 return true;
946 else if (!strict
947 && (GENERAL_REGNO_P (REGNO (reg))
948 || !HARD_REGISTER_P (reg)))
949 return true;
950 else
951 return false;
952}
953
954static bool
955ia64_legitimate_address_reg (const_rtx reg, bool strict)
956{
957 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
958 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
959 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
960 return true;
961
962 return false;
963}
964
965static bool
966ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
967{
968 if (GET_CODE (disp) == PLUS
969 && rtx_equal_p (reg, XEXP (disp, 0))
970 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
971 || (CONST_INT_P (XEXP (disp, 1))
972 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
973 return true;
974
975 return false;
976}
977
978/* Implement TARGET_LEGITIMATE_ADDRESS_P. */
979
980static bool
981ia64_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
982 rtx x, bool strict)
983{
984 if (ia64_legitimate_address_reg (x, strict))
985 return true;
986 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
987 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
988 && XEXP (x, 0) != arg_pointer_rtx)
989 return true;
990 else if (GET_CODE (x) == POST_MODIFY
991 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
992 && XEXP (x, 0) != arg_pointer_rtx
993 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
994 return true;
995 else
996 return false;
997}
998
3ec3c84f 999/* Return true if X is a constant that is valid for some immediate
1000 field in an instruction. */
1001
ca316360 1002static bool
1003ia64_legitimate_constant_p (enum machine_mode mode, rtx x)
3ec3c84f 1004{
1005 switch (GET_CODE (x))
1006 {
1007 case CONST_INT:
1008 case LABEL_REF:
1009 return true;
1010
1011 case CONST_DOUBLE:
ca316360 1012 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
3ec3c84f 1013 return true;
269f7060 1014 return satisfies_constraint_G (x);
3ec3c84f 1015
1016 case CONST:
1017 case SYMBOL_REF:
1a29d5df 1018 /* ??? Short term workaround for PR 28490. We must make the code here
1019 match the code in ia64_expand_move and move_operand, even though they
1020 are both technically wrong. */
1021 if (tls_symbolic_operand_type (x) == 0)
1022 {
1023 HOST_WIDE_INT addend = 0;
1024 rtx op = x;
1025
1026 if (GET_CODE (op) == CONST
1027 && GET_CODE (XEXP (op, 0)) == PLUS
1028 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1029 {
1030 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1031 op = XEXP (XEXP (op, 0), 0);
1032 }
1033
ca316360 1034 if (any_offset_symbol_operand (op, mode)
1035 || function_operand (op, mode))
27ab60b9 1036 return true;
ca316360 1037 if (aligned_offset_symbol_operand (op, mode))
1a29d5df 1038 return (addend & 0x3fff) == 0;
1039 return false;
1040 }
1041 return false;
3ec3c84f 1042
18c8ddaa 1043 case CONST_VECTOR:
ca316360 1044 if (mode == V2SFmode)
1045 return satisfies_constraint_Y (x);
18c8ddaa 1046
ca316360 1047 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1048 && GET_MODE_SIZE (mode) <= 8);
18c8ddaa 1049
3ec3c84f 1050 default:
1051 return false;
1052 }
1053}
1054
1055/* Don't allow TLS addresses to get spilled to memory. */
1056
1057static bool
7d7d7bd2 1058ia64_cannot_force_const_mem (enum machine_mode mode, rtx x)
3ec3c84f 1059{
7d7d7bd2 1060 if (mode == RFmode)
e2239f8a 1061 return true;
3ec3c84f 1062 return tls_symbolic_operand_type (x) != 0;
1063}
1064
7a525da6 1065/* Expand a symbolic constant load. */
7a525da6 1066
3ec3c84f 1067bool
b40da9a7 1068ia64_expand_load_address (rtx dest, rtx src)
7a525da6 1069{
c5c17bca 1070 gcc_assert (GET_CODE (dest) == REG);
c87e09ad 1071
22637cc0 1072 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1073 having to pointer-extend the value afterward. Other forms of address
1074 computation below are also more natural to compute as 64-bit quantities.
1075 If we've been given an SImode destination register, change it. */
1076 if (GET_MODE (dest) != Pmode)
80c70e76 1077 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1078 byte_lowpart_offset (Pmode, GET_MODE (dest)));
22637cc0 1079
3ec3c84f 1080 if (TARGET_NO_PIC)
1081 return false;
1082 if (small_addr_symbolic_operand (src, VOIDmode))
1083 return false;
1084
1085 if (TARGET_AUTO_PIC)
1086 emit_insn (gen_load_gprel64 (dest, src));
c50e596a 1087 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
3ec3c84f 1088 emit_insn (gen_load_fptr (dest, src));
58b90768 1089 else if (sdata_symbolic_operand (src, VOIDmode))
3ec3c84f 1090 emit_insn (gen_load_gprel (dest, src));
1091 else
58b90768 1092 {
3ec3c84f 1093 HOST_WIDE_INT addend = 0;
1094 rtx tmp;
58b90768 1095
3ec3c84f 1096 /* We did split constant offsets in ia64_expand_move, and we did try
1097 to keep them split in move_operand, but we also allowed reload to
1098 rematerialize arbitrary constants rather than spill the value to
1099 the stack and reload it. So we have to be prepared here to split
1100 them apart again. */
1101 if (GET_CODE (src) == CONST)
1102 {
1103 HOST_WIDE_INT hi, lo;
7a525da6 1104
3ec3c84f 1105 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1106 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1107 hi = hi - lo;
7a525da6 1108
3ec3c84f 1109 if (lo != 0)
1110 {
1111 addend = lo;
29c05e22 1112 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
3ec3c84f 1113 }
1114 }
22637cc0 1115
1116 tmp = gen_rtx_HIGH (Pmode, src);
1117 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1118 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1119
7ea22d30 1120 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
22637cc0 1121 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
3ec3c84f 1122
1123 if (addend)
1124 {
1125 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1126 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1127 }
22637cc0 1128 }
3ec3c84f 1129
1130 return true;
7a525da6 1131}
cac50a9f 1132
1f3233d1 1133static GTY(()) rtx gen_tls_tga;
c87e09ad 1134static rtx
b40da9a7 1135gen_tls_get_addr (void)
c87e09ad 1136{
1f3233d1 1137 if (!gen_tls_tga)
58b90768 1138 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1f3233d1 1139 return gen_tls_tga;
c87e09ad 1140}
1141
1f3233d1 1142static GTY(()) rtx thread_pointer_rtx;
c87e09ad 1143static rtx
b40da9a7 1144gen_thread_pointer (void)
c87e09ad 1145{
1f3233d1 1146 if (!thread_pointer_rtx)
b04fab2a 1147 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
f6a5cf64 1148 return thread_pointer_rtx;
c87e09ad 1149}
1150
58b90768 1151static rtx
3ec3c84f 1152ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
801e18de 1153 rtx orig_op1, HOST_WIDE_INT addend)
58b90768 1154{
1155 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
801e18de 1156 rtx orig_op0 = op0;
3ec3c84f 1157 HOST_WIDE_INT addend_lo, addend_hi;
1158
58b90768 1159 switch (tls_kind)
1160 {
1161 case TLS_MODEL_GLOBAL_DYNAMIC:
1162 start_sequence ();
1163
1164 tga_op1 = gen_reg_rtx (Pmode);
3ec3c84f 1165 emit_insn (gen_load_dtpmod (tga_op1, op1));
58b90768 1166
1167 tga_op2 = gen_reg_rtx (Pmode);
3ec3c84f 1168 emit_insn (gen_load_dtprel (tga_op2, op1));
b40da9a7 1169
58b90768 1170 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1171 LCT_CONST, Pmode, 2, tga_op1,
1172 Pmode, tga_op2, Pmode);
1173
1174 insns = get_insns ();
1175 end_sequence ();
1176
7e3a7e7f 1177 if (GET_MODE (op0) != Pmode)
1178 op0 = tga_ret;
58b90768 1179 emit_libcall_block (insns, op0, tga_ret, op1);
7e3a7e7f 1180 break;
58b90768 1181
1182 case TLS_MODEL_LOCAL_DYNAMIC:
1183 /* ??? This isn't the completely proper way to do local-dynamic
1184 If the call to __tls_get_addr is used only by a single symbol,
1185 then we should (somehow) move the dtprel to the second arg
1186 to avoid the extra add. */
1187 start_sequence ();
1188
1189 tga_op1 = gen_reg_rtx (Pmode);
3ec3c84f 1190 emit_insn (gen_load_dtpmod (tga_op1, op1));
58b90768 1191
1192 tga_op2 = const0_rtx;
1193
1194 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1195 LCT_CONST, Pmode, 2, tga_op1,
1196 Pmode, tga_op2, Pmode);
1197
1198 insns = get_insns ();
1199 end_sequence ();
1200
1201 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1202 UNSPEC_LD_BASE);
1203 tmp = gen_reg_rtx (Pmode);
1204 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1205
7e3a7e7f 1206 if (!register_operand (op0, Pmode))
1207 op0 = gen_reg_rtx (Pmode);
58b90768 1208 if (TARGET_TLS64)
1209 {
7e3a7e7f 1210 emit_insn (gen_load_dtprel (op0, op1));
1211 emit_insn (gen_adddi3 (op0, tmp, op0));
58b90768 1212 }
1213 else
3ec3c84f 1214 emit_insn (gen_add_dtprel (op0, op1, tmp));
7e3a7e7f 1215 break;
58b90768 1216
1217 case TLS_MODEL_INITIAL_EXEC:
801e18de 1218 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1219 addend_hi = addend - addend_lo;
1220
29c05e22 1221 op1 = plus_constant (Pmode, op1, addend_hi);
3ec3c84f 1222 addend = addend_lo;
1223
58b90768 1224 tmp = gen_reg_rtx (Pmode);
3ec3c84f 1225 emit_insn (gen_load_tprel (tmp, op1));
58b90768 1226
7e3a7e7f 1227 if (!register_operand (op0, Pmode))
1228 op0 = gen_reg_rtx (Pmode);
1229 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1230 break;
58b90768 1231
1232 case TLS_MODEL_LOCAL_EXEC:
7e3a7e7f 1233 if (!register_operand (op0, Pmode))
1234 op0 = gen_reg_rtx (Pmode);
3ec3c84f 1235
1236 op1 = orig_op1;
1237 addend = 0;
58b90768 1238 if (TARGET_TLS64)
1239 {
7e3a7e7f 1240 emit_insn (gen_load_tprel (op0, op1));
3ec3c84f 1241 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
58b90768 1242 }
1243 else
3ec3c84f 1244 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
7e3a7e7f 1245 break;
58b90768 1246
1247 default:
c5c17bca 1248 gcc_unreachable ();
58b90768 1249 }
7e3a7e7f 1250
3ec3c84f 1251 if (addend)
1252 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1253 orig_op0, 1, OPTAB_DIRECT);
7e3a7e7f 1254 if (orig_op0 == op0)
1255 return NULL_RTX;
1256 if (GET_MODE (orig_op0) == Pmode)
1257 return op0;
1258 return gen_lowpart (GET_MODE (orig_op0), op0);
58b90768 1259}
1260
c87e09ad 1261rtx
b40da9a7 1262ia64_expand_move (rtx op0, rtx op1)
c87e09ad 1263{
1264 enum machine_mode mode = GET_MODE (op0);
1265
1266 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1267 op1 = force_reg (mode, op1);
1268
58b90768 1269 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
c87e09ad 1270 {
3ec3c84f 1271 HOST_WIDE_INT addend = 0;
c87e09ad 1272 enum tls_model tls_kind;
3ec3c84f 1273 rtx sym = op1;
1274
1275 if (GET_CODE (op1) == CONST
1276 && GET_CODE (XEXP (op1, 0)) == PLUS
1277 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1278 {
1279 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1280 sym = XEXP (XEXP (op1, 0), 0);
1281 }
1282
1283 tls_kind = tls_symbolic_operand_type (sym);
1284 if (tls_kind)
801e18de 1285 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
3ec3c84f 1286
1287 if (any_offset_symbol_operand (sym, mode))
1288 addend = 0;
1289 else if (aligned_offset_symbol_operand (sym, mode))
1290 {
1291 HOST_WIDE_INT addend_lo, addend_hi;
1292
1293 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1294 addend_hi = addend - addend_lo;
1295
1296 if (addend_lo != 0)
1297 {
29c05e22 1298 op1 = plus_constant (mode, sym, addend_hi);
3ec3c84f 1299 addend = addend_lo;
1300 }
0f0129f7 1301 else
1302 addend = 0;
3ec3c84f 1303 }
1304 else
1305 op1 = sym;
1306
1307 if (reload_completed)
1308 {
1309 /* We really should have taken care of this offset earlier. */
1310 gcc_assert (addend == 0);
1311 if (ia64_expand_load_address (op0, op1))
1312 return NULL_RTX;
1313 }
58b90768 1314
3ec3c84f 1315 if (addend)
c87e09ad 1316 {
e1ba4a27 1317 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
3ec3c84f 1318
1319 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1320
1321 op1 = expand_simple_binop (mode, PLUS, subtarget,
1322 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1323 if (op0 == op1)
1324 return NULL_RTX;
c87e09ad 1325 }
1326 }
1327
1328 return op1;
1329}
1330
58b90768 1331/* Split a move from OP1 to OP0 conditional on COND. */
1332
1333void
b40da9a7 1334ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
58b90768 1335{
1336 rtx insn, first = get_last_insn ();
1337
1338 emit_move_insn (op0, op1);
1339
1340 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1341 if (INSN_P (insn))
1342 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1343 PATTERN (insn));
1344}
1345
223b25f9 1346/* Split a post-reload TImode or TFmode reference into two DImode
2cb948c5 1347 components. This is made extra difficult by the fact that we do
1348 not get any scratch registers to work with, because reload cannot
1349 be prevented from giving us a scratch that overlaps the register
1350 pair involved. So instead, when addressing memory, we tweak the
1351 pointer register up and back down with POST_INCs. Or up and not
1352 back down when we can get away with it.
1353
1354 REVERSED is true when the loads must be done in reversed order
1355 (high word first) for correctness. DEAD is true when the pointer
1356 dies with the second insn we generate and therefore the second
1357 address must not carry a postmodify.
1358
1359 May return an insn which is to be emitted after the moves. */
d5f10acf 1360
223b25f9 1361static rtx
2cb948c5 1362ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
d5f10acf 1363{
2cb948c5 1364 rtx fixup = 0;
1365
d5f10acf 1366 switch (GET_CODE (in))
1367 {
1368 case REG:
2cb948c5 1369 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1370 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1371 break;
d5f10acf 1372
1373 case CONST_INT:
1374 case CONST_DOUBLE:
2cb948c5 1375 /* Cannot occur reversed. */
c5c17bca 1376 gcc_assert (!reversed);
2cb948c5 1377
223b25f9 1378 if (GET_MODE (in) != TFmode)
1379 split_double (in, &out[0], &out[1]);
1380 else
1381 /* split_double does not understand how to split a TFmode
1382 quantity into a pair of DImode constants. */
1383 {
1384 REAL_VALUE_TYPE r;
1385 unsigned HOST_WIDE_INT p[2];
1386 long l[4]; /* TFmode is 128 bits */
1387
1388 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1389 real_to_target (l, &r, TFmode);
1390
1391 if (FLOAT_WORDS_BIG_ENDIAN)
1392 {
1393 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1394 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1395 }
1396 else
1397 {
9c562a7e 1398 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1399 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
223b25f9 1400 }
1401 out[0] = GEN_INT (p[0]);
1402 out[1] = GEN_INT (p[1]);
1403 }
2cb948c5 1404 break;
1405
1406 case MEM:
1407 {
1408 rtx base = XEXP (in, 0);
1409 rtx offset;
1410
1411 switch (GET_CODE (base))
1412 {
1413 case REG:
1414 if (!reversed)
1415 {
1416 out[0] = adjust_automodify_address
1417 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1418 out[1] = adjust_automodify_address
1419 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1420 }
1421 else
1422 {
1423 /* Reversal requires a pre-increment, which can only
1424 be done as a separate insn. */
1425 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1426 out[0] = adjust_automodify_address
1427 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1428 out[1] = adjust_address (in, DImode, 0);
1429 }
1430 break;
1431
1432 case POST_INC:
c5c17bca 1433 gcc_assert (!reversed && !dead);
1434
2cb948c5 1435 /* Just do the increment in two steps. */
1436 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1437 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1438 break;
1439
1440 case POST_DEC:
c5c17bca 1441 gcc_assert (!reversed && !dead);
1442
2cb948c5 1443 /* Add 8, subtract 24. */
1444 base = XEXP (base, 0);
1445 out[0] = adjust_automodify_address
1446 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1447 out[1] = adjust_automodify_address
1448 (in, DImode,
29c05e22 1449 gen_rtx_POST_MODIFY (Pmode, base,
1450 plus_constant (Pmode, base, -24)),
2cb948c5 1451 8);
1452 break;
1453
1454 case POST_MODIFY:
c5c17bca 1455 gcc_assert (!reversed && !dead);
1456
2cb948c5 1457 /* Extract and adjust the modification. This case is
1458 trickier than the others, because we might have an
1459 index register, or we might have a combined offset that
1460 doesn't fit a signed 9-bit displacement field. We can
1461 assume the incoming expression is already legitimate. */
1462 offset = XEXP (base, 1);
1463 base = XEXP (base, 0);
1464
1465 out[0] = adjust_automodify_address
1466 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1467
1468 if (GET_CODE (XEXP (offset, 1)) == REG)
1469 {
1470 /* Can't adjust the postmodify to match. Emit the
1471 original, then a separate addition insn. */
1472 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1473 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1474 }
2cb948c5 1475 else
1476 {
c5c17bca 1477 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1478 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1479 {
1480 /* Again the postmodify cannot be made to match,
1481 but in this case it's more efficient to get rid
1482 of the postmodify entirely and fix up with an
1483 add insn. */
1484 out[1] = adjust_automodify_address (in, DImode, base, 8);
1485 fixup = gen_adddi3
1486 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1487 }
1488 else
1489 {
1490 /* Combined offset still fits in the displacement field.
1491 (We cannot overflow it at the high end.) */
1492 out[1] = adjust_automodify_address
1493 (in, DImode, gen_rtx_POST_MODIFY
1494 (Pmode, base, gen_rtx_PLUS
1495 (Pmode, base,
1496 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1497 8);
1498 }
2cb948c5 1499 }
1500 break;
1501
1502 default:
c5c17bca 1503 gcc_unreachable ();
2cb948c5 1504 }
1505 break;
1506 }
d5f10acf 1507
1508 default:
c5c17bca 1509 gcc_unreachable ();
d5f10acf 1510 }
2cb948c5 1511
1512 return fixup;
d5f10acf 1513}
1514
223b25f9 1515/* Split a TImode or TFmode move instruction after reload.
1516 This is used by *movtf_internal and *movti_internal. */
1517void
1518ia64_split_tmode_move (rtx operands[])
1519{
2cb948c5 1520 rtx in[2], out[2], insn;
1521 rtx fixup[2];
1522 bool dead = false;
1523 bool reversed = false;
1524
1525 /* It is possible for reload to decide to overwrite a pointer with
1526 the value it points to. In that case we have to do the loads in
1527 the appropriate order so that the pointer is not destroyed too
1528 early. Also we must not generate a postmodify for that second
c5c17bca 1529 load, or rws_access_regno will die. */
2cb948c5 1530 if (GET_CODE (operands[1]) == MEM
1531 && reg_overlap_mentioned_p (operands[0], operands[1]))
223b25f9 1532 {
2cb948c5 1533 rtx base = XEXP (operands[1], 0);
1534 while (GET_CODE (base) != REG)
1535 base = XEXP (base, 0);
223b25f9 1536
2cb948c5 1537 if (REGNO (base) == REGNO (operands[0]))
1538 reversed = true;
1539 dead = true;
1540 }
1541 /* Another reason to do the moves in reversed order is if the first
1542 element of the target register pair is also the second element of
1543 the source register pair. */
1544 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1545 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1546 reversed = true;
1547
1548 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1549 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1550
1551#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1552 if (GET_CODE (EXP) == MEM \
1553 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1554 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1555 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
b9c74b4d 1556 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
2cb948c5 1557
1558 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1559 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1560 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1561
1562 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1563 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1564 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1565
1566 if (fixup[0])
1567 emit_insn (fixup[0]);
1568 if (fixup[1])
1569 emit_insn (fixup[1]);
1570
1571#undef MAYBE_ADD_REG_INC_NOTE
223b25f9 1572}
1573
b8bc42e9 1574/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
d5f10acf 1575 through memory plus an extra GR scratch register. Except that you can
1576 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1577 SECONDARY_RELOAD_CLASS, but not both.
1578
1579 We got into problems in the first place by allowing a construct like
b8bc42e9 1580 (subreg:XF (reg:TI)), which we got from a union containing a long double.
35a3065a 1581 This solution attempts to prevent this situation from occurring. When
d5f10acf 1582 we see something like the above, we spill the inner register to memory. */
1583
7a979707 1584static rtx
1585spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
d5f10acf 1586{
1587 if (GET_CODE (in) == SUBREG
1588 && GET_MODE (SUBREG_REG (in)) == TImode
1589 && GET_CODE (SUBREG_REG (in)) == REG)
1590 {
0ab48139 1591 rtx memt = assign_stack_temp (TImode, 16);
3f56a1ab 1592 emit_move_insn (memt, SUBREG_REG (in));
7a979707 1593 return adjust_address (memt, mode, 0);
d5f10acf 1594 }
1595 else if (force && GET_CODE (in) == REG)
1596 {
0ab48139 1597 rtx memx = assign_stack_temp (mode, 16);
3f56a1ab 1598 emit_move_insn (memx, in);
1599 return memx;
d5f10acf 1600 }
d5f10acf 1601 else
1602 return in;
1603}
33c8f6d1 1604
7a979707 1605/* Expand the movxf or movrf pattern (MODE says which) with the given
1606 OPERANDS, returning true if the pattern should then invoke
1607 DONE. */
1608
1609bool
1610ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1611{
1612 rtx op0 = operands[0];
1613
1614 if (GET_CODE (op0) == SUBREG)
1615 op0 = SUBREG_REG (op0);
1616
1617 /* We must support XFmode loads into general registers for stdarg/vararg,
1618 unprototyped calls, and a rare case where a long double is passed as
1619 an argument after a float HFA fills the FP registers. We split them into
1620 DImode loads for convenience. We also need to support XFmode stores
1621 for the last case. This case does not happen for stdarg/vararg routines,
1622 because we do a block store to memory of unnamed arguments. */
1623
1624 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1625 {
1626 rtx out[2];
1627
1628 /* We're hoping to transform everything that deals with XFmode
1629 quantities and GR registers early in the compiler. */
e1ba4a27 1630 gcc_assert (can_create_pseudo_p ());
7a979707 1631
1632 /* Struct to register can just use TImode instead. */
1633 if ((GET_CODE (operands[1]) == SUBREG
1634 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1635 || (GET_CODE (operands[1]) == REG
1636 && GR_REGNO_P (REGNO (operands[1]))))
1637 {
1638 rtx op1 = operands[1];
1639
1640 if (GET_CODE (op1) == SUBREG)
1641 op1 = SUBREG_REG (op1);
1642 else
1643 op1 = gen_rtx_REG (TImode, REGNO (op1));
1644
1645 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1646 return true;
1647 }
1648
1649 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1650 {
2f9177f0 1651 /* Don't word-swap when reading in the constant. */
7a979707 1652 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
2f9177f0 1653 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1654 0, mode));
7a979707 1655 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
2f9177f0 1656 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1657 0, mode));
7a979707 1658 return true;
1659 }
1660
1661 /* If the quantity is in a register not known to be GR, spill it. */
1662 if (register_operand (operands[1], mode))
1663 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1664
1665 gcc_assert (GET_CODE (operands[1]) == MEM);
1666
2f9177f0 1667 /* Don't word-swap when reading in the value. */
1668 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1669 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
7a979707 1670
1671 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1672 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1673 return true;
1674 }
1675
1676 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1677 {
1678 /* We're hoping to transform everything that deals with XFmode
1679 quantities and GR registers early in the compiler. */
e1ba4a27 1680 gcc_assert (can_create_pseudo_p ());
7a979707 1681
1682 /* Op0 can't be a GR_REG here, as that case is handled above.
1683 If op0 is a register, then we spill op1, so that we now have a
1684 MEM operand. This requires creating an XFmode subreg of a TImode reg
1685 to force the spill. */
1686 if (register_operand (operands[0], mode))
1687 {
1688 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1689 op1 = gen_rtx_SUBREG (mode, op1, 0);
1690 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1691 }
1692
1693 else
1694 {
1695 rtx in[2];
1696
2f9177f0 1697 gcc_assert (GET_CODE (operands[0]) == MEM);
1698
1699 /* Don't word-swap when writing out the value. */
1700 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1701 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
7a979707 1702
1703 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1704 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1705 return true;
1706 }
1707 }
1708
1709 if (!reload_in_progress && !reload_completed)
1710 {
1711 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1712
1713 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1714 {
1715 rtx memt, memx, in = operands[1];
1716 if (CONSTANT_P (in))
1717 in = validize_mem (force_const_mem (mode, in));
1718 if (GET_CODE (in) == MEM)
1719 memt = adjust_address (in, TImode, 0);
1720 else
1721 {
0ab48139 1722 memt = assign_stack_temp (TImode, 16);
7a979707 1723 memx = adjust_address (memt, mode, 0);
1724 emit_move_insn (memx, in);
1725 }
1726 emit_move_insn (op0, memt);
1727 return true;
1728 }
1729
1730 if (!ia64_move_ok (operands[0], operands[1]))
1731 operands[1] = force_reg (mode, operands[1]);
1732 }
1733
1734 return false;
1735}
1736
74f4459c 1737/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1738 with the expression that holds the compare result (in VOIDmode). */
33c8f6d1 1739
9abe3ad4 1740static GTY(()) rtx cmptf_libfunc;
1741
74f4459c 1742void
1743ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
33c8f6d1 1744{
74f4459c 1745 enum rtx_code code = GET_CODE (*expr);
33c8f6d1 1746 rtx cmp;
1747
1748 /* If we have a BImode input, then we already have a compare result, and
1749 do not need to emit another comparison. */
74f4459c 1750 if (GET_MODE (*op0) == BImode)
33c8f6d1 1751 {
74f4459c 1752 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1753 cmp = *op0;
33c8f6d1 1754 }
9abe3ad4 1755 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1756 magic number as its third argument, that indicates what to do.
1757 The return value is an integer to be compared against zero. */
74f4459c 1758 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
9abe3ad4 1759 {
1760 enum qfcmp_magic {
1761 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1762 QCMP_UNORD = 2,
1763 QCMP_EQ = 4,
1764 QCMP_LT = 8,
1765 QCMP_GT = 16
8458f4ca 1766 };
1767 int magic;
9abe3ad4 1768 enum rtx_code ncode;
1769 rtx ret, insns;
c5c17bca 1770
74f4459c 1771 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
9abe3ad4 1772 switch (code)
1773 {
1774 /* 1 = equal, 0 = not equal. Equality operators do
1775 not raise FP_INVALID when given an SNaN operand. */
1776 case EQ: magic = QCMP_EQ; ncode = NE; break;
1777 case NE: magic = QCMP_EQ; ncode = EQ; break;
1778 /* isunordered() from C99. */
1779 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
e21db506 1780 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
9abe3ad4 1781 /* Relational operators raise FP_INVALID when given
1782 an SNaN operand. */
1783 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1784 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1785 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1786 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1787 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1788 Expanders for buneq etc. weuld have to be added to ia64.md
1789 for this to be useful. */
c5c17bca 1790 default: gcc_unreachable ();
9abe3ad4 1791 }
1792
1793 start_sequence ();
1794
1795 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
74f4459c 1796 *op0, TFmode, *op1, TFmode,
9abe3ad4 1797 GEN_INT (magic), DImode);
1798 cmp = gen_reg_rtx (BImode);
1799 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1800 gen_rtx_fmt_ee (ncode, BImode,
1801 ret, const0_rtx)));
1802
1803 insns = get_insns ();
1804 end_sequence ();
1805
1806 emit_libcall_block (insns, cmp, cmp,
74f4459c 1807 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
9abe3ad4 1808 code = NE;
1809 }
33c8f6d1 1810 else
1811 {
1812 cmp = gen_reg_rtx (BImode);
1813 emit_insn (gen_rtx_SET (VOIDmode, cmp,
74f4459c 1814 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
33c8f6d1 1815 code = NE;
1816 }
1817
74f4459c 1818 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1819 *op0 = cmp;
1820 *op1 = const0_rtx;
33c8f6d1 1821}
46ebdd6b 1822
f10bf761 1823/* Generate an integral vector comparison. Return true if the condition has
1824 been reversed, and so the sense of the comparison should be inverted. */
a5c5f9d3 1825
1826static bool
1827ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1828 rtx dest, rtx op0, rtx op1)
1829{
1830 bool negate = false;
1831 rtx x;
1832
f10bf761 1833 /* Canonicalize the comparison to EQ, GT, GTU. */
a5c5f9d3 1834 switch (code)
1835 {
1836 case EQ:
1837 case GT:
f10bf761 1838 case GTU:
a5c5f9d3 1839 break;
1840
1841 case NE:
a5c5f9d3 1842 case LE:
f10bf761 1843 case LEU:
1844 code = reverse_condition (code);
a5c5f9d3 1845 negate = true;
1846 break;
1847
1848 case GE:
f10bf761 1849 case GEU:
1850 code = reverse_condition (code);
a5c5f9d3 1851 negate = true;
1852 /* FALLTHRU */
1853
1854 case LT:
a5c5f9d3 1855 case LTU:
f10bf761 1856 code = swap_condition (code);
1857 x = op0, op0 = op1, op1 = x;
1858 break;
a5c5f9d3 1859
f10bf761 1860 default:
1861 gcc_unreachable ();
1862 }
a5c5f9d3 1863
f10bf761 1864 /* Unsigned parallel compare is not supported by the hardware. Play some
008c3704 1865 tricks to turn this into a signed comparison against 0. */
f10bf761 1866 if (code == GTU)
1867 {
1868 switch (mode)
1869 {
1870 case V2SImode:
a5c5f9d3 1871 {
f10bf761 1872 rtx t1, t2, mask;
1873
549f381b 1874 /* Subtract (-(INT MAX) - 1) from both operands to make
1875 them signed. */
1876 mask = GEN_INT (0x80000000);
f10bf761 1877 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
549f381b 1878 mask = force_reg (mode, mask);
1879 t1 = gen_reg_rtx (mode);
1880 emit_insn (gen_subv2si3 (t1, op0, mask));
1881 t2 = gen_reg_rtx (mode);
1882 emit_insn (gen_subv2si3 (t2, op1, mask));
1883 op0 = t1;
1884 op1 = t2;
008c3704 1885 code = GT;
a5c5f9d3 1886 }
f10bf761 1887 break;
1888
1889 case V8QImode:
1890 case V4HImode:
1891 /* Perform a parallel unsigned saturating subtraction. */
1892 x = gen_reg_rtx (mode);
1893 emit_insn (gen_rtx_SET (VOIDmode, x,
1894 gen_rtx_US_MINUS (mode, op0, op1)));
008c3704 1895
1896 code = EQ;
1897 op0 = x;
1898 op1 = CONST0_RTX (mode);
1899 negate = !negate;
f10bf761 1900 break;
1901
1902 default:
1903 gcc_unreachable ();
1904 }
a5c5f9d3 1905 }
1906
1907 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1908 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1909
1910 return negate;
1911}
1912
a5c5f9d3 1913/* Emit an integral vector conditional move. */
1914
1915void
1916ia64_expand_vecint_cmov (rtx operands[])
1917{
1918 enum machine_mode mode = GET_MODE (operands[0]);
1919 enum rtx_code code = GET_CODE (operands[3]);
1920 bool negate;
1921 rtx cmp, x, ot, of;
1922
a5c5f9d3 1923 cmp = gen_reg_rtx (mode);
1924 negate = ia64_expand_vecint_compare (code, mode, cmp,
1925 operands[4], operands[5]);
1926
1927 ot = operands[1+negate];
1928 of = operands[2-negate];
1929
1930 if (ot == CONST0_RTX (mode))
1931 {
1932 if (of == CONST0_RTX (mode))
1933 {
1934 emit_move_insn (operands[0], ot);
1935 return;
1936 }
1937
1938 x = gen_rtx_NOT (mode, cmp);
1939 x = gen_rtx_AND (mode, x, of);
1940 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1941 }
1942 else if (of == CONST0_RTX (mode))
1943 {
1944 x = gen_rtx_AND (mode, cmp, ot);
1945 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1946 }
1947 else
1948 {
1949 rtx t, f;
1950
1951 t = gen_reg_rtx (mode);
1952 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1953 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1954
1955 f = gen_reg_rtx (mode);
1956 x = gen_rtx_NOT (mode, cmp);
1957 x = gen_rtx_AND (mode, x, operands[2-negate]);
1958 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1959
1960 x = gen_rtx_IOR (mode, t, f);
1961 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1962 }
1963}
1964
1965/* Emit an integral vector min or max operation. Return true if all done. */
1966
1967bool
1968ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1969 rtx operands[])
1970{
28032f0a 1971 rtx xops[6];
a5c5f9d3 1972
1973 /* These four combinations are supported directly. */
1974 if (mode == V8QImode && (code == UMIN || code == UMAX))
1975 return false;
1976 if (mode == V4HImode && (code == SMIN || code == SMAX))
1977 return false;
1978
6de03015 1979 /* This combination can be implemented with only saturating subtraction. */
1980 if (mode == V4HImode && code == UMAX)
1981 {
1982 rtx x, tmp = gen_reg_rtx (mode);
1983
1984 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1985 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1986
1987 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1988 return true;
1989 }
1990
a5c5f9d3 1991 /* Everything else implemented via vector comparisons. */
1992 xops[0] = operands[0];
1993 xops[4] = xops[1] = operands[1];
1994 xops[5] = xops[2] = operands[2];
1995
1996 switch (code)
1997 {
1998 case UMIN:
1999 code = LTU;
2000 break;
2001 case UMAX:
2002 code = GTU;
2003 break;
2004 case SMIN:
2005 code = LT;
2006 break;
2007 case SMAX:
2008 code = GT;
2009 break;
2010 default:
c5c17bca 2011 gcc_unreachable ();
a5c5f9d3 2012 }
2013 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2014
2015 ia64_expand_vecint_cmov (xops);
2016 return true;
2017}
2018
0cbc5fc8 2019/* The vectors LO and HI each contain N halves of a double-wide vector.
2020 Reassemble either the first N/2 or the second N/2 elements. */
b133dbfc 2021
2022void
0cbc5fc8 2023ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
b133dbfc 2024{
b155a608 2025 enum machine_mode vmode = GET_MODE (lo);
2026 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2027 struct expand_vec_perm_d d;
2028 bool ok;
b133dbfc 2029
b155a608 2030 d.target = gen_lowpart (vmode, out);
2031 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2032 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2033 d.vmode = vmode;
2034 d.nelt = nelt;
2035 d.one_operand_p = false;
2036 d.testing_p = false;
2037
2038 high = (highp ? nelt / 2 : 0);
2039 for (i = 0; i < nelt / 2; ++i)
b133dbfc 2040 {
b155a608 2041 d.perm[i * 2] = i + high;
2042 d.perm[i * 2 + 1] = i + high + nelt;
b133dbfc 2043 }
2044
b155a608 2045 ok = ia64_expand_vec_perm_const_1 (&d);
2046 gcc_assert (ok);
b133dbfc 2047}
2048
0cbc5fc8 2049/* Return a vector of the sign-extension of VEC. */
d3b735c8 2050
0cbc5fc8 2051static rtx
2052ia64_unpack_sign (rtx vec, bool unsignedp)
d3b735c8 2053{
0cbc5fc8 2054 enum machine_mode mode = GET_MODE (vec);
2055 rtx zero = CONST0_RTX (mode);
d3b735c8 2056
d3b735c8 2057 if (unsignedp)
0cbc5fc8 2058 return zero;
d3b735c8 2059 else
2060 {
0cbc5fc8 2061 rtx sign = gen_reg_rtx (mode);
d3b735c8 2062 bool neg;
2063
0cbc5fc8 2064 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
d3b735c8 2065 gcc_assert (!neg);
0cbc5fc8 2066
2067 return sign;
d3b735c8 2068 }
0cbc5fc8 2069}
d3b735c8 2070
0cbc5fc8 2071/* Emit an integral vector unpack operation. */
d3b735c8 2072
0cbc5fc8 2073void
2074ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2075{
2076 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2077 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
d3b735c8 2078}
2079
0cbc5fc8 2080/* Emit an integral vector widening sum operations. */
2081
b133dbfc 2082void
0cbc5fc8 2083ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
b133dbfc 2084{
0cbc5fc8 2085 enum machine_mode wmode;
2086 rtx l, h, t, sign;
b133dbfc 2087
0cbc5fc8 2088 sign = ia64_unpack_sign (operands[1], unsignedp);
2089
2090 wmode = GET_MODE (operands[0]);
2091 l = gen_reg_rtx (wmode);
2092 h = gen_reg_rtx (wmode);
b133dbfc 2093
0cbc5fc8 2094 ia64_unpack_assemble (l, operands[1], sign, false);
2095 ia64_unpack_assemble (h, operands[1], sign, true);
b133dbfc 2096
0cbc5fc8 2097 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2098 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2099 if (t != operands[0])
2100 emit_move_insn (operands[0], t);
b133dbfc 2101}
2102
46ebdd6b 2103/* Emit the appropriate sequence for a call. */
2104
2105void
b40da9a7 2106ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2107 int sibcall_p)
46ebdd6b 2108{
e13693ec 2109 rtx insn, b0;
46ebdd6b 2110
2111 addr = XEXP (addr, 0);
e569ee68 2112 addr = convert_memory_address (DImode, addr);
46ebdd6b 2113 b0 = gen_rtx_REG (DImode, R_BR (0));
46ebdd6b 2114
e13693ec 2115 /* ??? Should do this for functions known to bind local too. */
46ebdd6b 2116 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2117 {
2118 if (sibcall_p)
e13693ec 2119 insn = gen_sibcall_nogp (addr);
46ebdd6b 2120 else if (! retval)
e13693ec 2121 insn = gen_call_nogp (addr, b0);
46ebdd6b 2122 else
e13693ec 2123 insn = gen_call_value_nogp (retval, addr, b0);
2124 insn = emit_call_insn (insn);
46ebdd6b 2125 }
46ebdd6b 2126 else
e13693ec 2127 {
2128 if (sibcall_p)
2129 insn = gen_sibcall_gp (addr);
2130 else if (! retval)
2131 insn = gen_call_gp (addr, b0);
2132 else
2133 insn = gen_call_value_gp (retval, addr, b0);
2134 insn = emit_call_insn (insn);
46ebdd6b 2135
e13693ec 2136 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2137 }
1c8a3b46 2138
e13693ec 2139 if (sibcall_p)
7d8b6742 2140 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
8b8d3752 2141
2142 if (TARGET_ABI_OPEN_VMS)
2143 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2144 gen_rtx_REG (DImode, GR_REG (25)));
e13693ec 2145}
2146
3072d30e 2147static void
2148reg_emitted (enum ia64_frame_regs r)
2149{
2150 if (emitted_frame_related_regs[r] == 0)
2151 emitted_frame_related_regs[r] = current_frame_info.r[r];
2152 else
2153 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2154}
2155
2156static int
2157get_reg (enum ia64_frame_regs r)
2158{
2159 reg_emitted (r);
2160 return current_frame_info.r[r];
2161}
2162
2163static bool
2164is_emitted (int regno)
2165{
9f1b7d17 2166 unsigned int r;
3072d30e 2167
2168 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2169 if (emitted_frame_related_regs[r] == regno)
2170 return true;
2171 return false;
2172}
2173
e13693ec 2174void
b40da9a7 2175ia64_reload_gp (void)
e13693ec 2176{
2177 rtx tmp;
2178
3072d30e 2179 if (current_frame_info.r[reg_save_gp])
2180 {
2181 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2182 }
46ebdd6b 2183 else
e13693ec 2184 {
2185 HOST_WIDE_INT offset;
269f7060 2186 rtx offset_r;
e13693ec 2187
2188 offset = (current_frame_info.spill_cfa_off
2189 + current_frame_info.spill_size);
2190 if (frame_pointer_needed)
2191 {
2192 tmp = hard_frame_pointer_rtx;
2193 offset = -offset;
2194 }
2195 else
2196 {
2197 tmp = stack_pointer_rtx;
2198 offset = current_frame_info.total_size - offset;
2199 }
2200
269f7060 2201 offset_r = GEN_INT (offset);
2202 if (satisfies_constraint_I (offset_r))
2203 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
e13693ec 2204 else
2205 {
269f7060 2206 emit_move_insn (pic_offset_table_rtx, offset_r);
e13693ec 2207 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2208 pic_offset_table_rtx, tmp));
2209 }
2210
2211 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2212 }
2213
2214 emit_move_insn (pic_offset_table_rtx, tmp);
2215}
2216
2217void
b40da9a7 2218ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2219 rtx scratch_b, int noreturn_p, int sibcall_p)
e13693ec 2220{
2221 rtx insn;
2222 bool is_desc = false;
2223
2224 /* If we find we're calling through a register, then we're actually
2225 calling through a descriptor, so load up the values. */
7d8b6742 2226 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
e13693ec 2227 {
2228 rtx tmp;
2229 bool addr_dead_p;
2230
2231 /* ??? We are currently constrained to *not* use peep2, because
3c364971 2232 we can legitimately change the global lifetime of the GP
b40da9a7 2233 (in the form of killing where previously live). This is
e13693ec 2234 because a call through a descriptor doesn't use the previous
2235 value of the GP, while a direct call does, and we do not
2236 commit to either form until the split here.
2237
2238 That said, this means that we lack precise life info for
2239 whether ADDR is dead after this call. This is not terribly
2240 important, since we can fix things up essentially for free
2241 with the POST_DEC below, but it's nice to not use it when we
2242 can immediately tell it's not necessary. */
2243 addr_dead_p = ((noreturn_p || sibcall_p
2244 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2245 REGNO (addr)))
2246 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2247
2248 /* Load the code address into scratch_b. */
2249 tmp = gen_rtx_POST_INC (Pmode, addr);
2250 tmp = gen_rtx_MEM (Pmode, tmp);
2251 emit_move_insn (scratch_r, tmp);
2252 emit_move_insn (scratch_b, scratch_r);
2253
2254 /* Load the GP address. If ADDR is not dead here, then we must
2255 revert the change made above via the POST_INCREMENT. */
2256 if (!addr_dead_p)
2257 tmp = gen_rtx_POST_DEC (Pmode, addr);
2258 else
2259 tmp = addr;
2260 tmp = gen_rtx_MEM (Pmode, tmp);
2261 emit_move_insn (pic_offset_table_rtx, tmp);
2262
2263 is_desc = true;
2264 addr = scratch_b;
2265 }
46ebdd6b 2266
1c8a3b46 2267 if (sibcall_p)
e13693ec 2268 insn = gen_sibcall_nogp (addr);
2269 else if (retval)
2270 insn = gen_call_value_nogp (retval, addr, retaddr);
1c8a3b46 2271 else
e13693ec 2272 insn = gen_call_nogp (addr, retaddr);
1c8a3b46 2273 emit_call_insn (insn);
46ebdd6b 2274
e13693ec 2275 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2276 ia64_reload_gp ();
46ebdd6b 2277}
25991aec 2278
2279/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2280
2281 This differs from the generic code in that we know about the zero-extending
2282 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2283 also know that ld.acq+cmpxchg.rel equals a full barrier.
2284
2285 The loop we want to generate looks like
2286
2287 cmp_reg = mem;
2288 label:
2289 old_reg = cmp_reg;
2290 new_reg = cmp_reg op val;
2291 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2292 if (cmp_reg != old_reg)
2293 goto label;
2294
2295 Note that we only do the plain load from memory once. Subsequent
2296 iterations use the value loaded by the compare-and-swap pattern. */
2297
2298void
2299ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
e35728c4 2300 rtx old_dst, rtx new_dst, enum memmodel model)
25991aec 2301{
2302 enum machine_mode mode = GET_MODE (mem);
2303 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2304 enum insn_code icode;
2305
2306 /* Special case for using fetchadd. */
1591276c 2307 if ((mode == SImode || mode == DImode)
2308 && (code == PLUS || code == MINUS)
2309 && fetchadd_operand (val, mode))
25991aec 2310 {
1591276c 2311 if (code == MINUS)
2312 val = GEN_INT (-INTVAL (val));
2313
25991aec 2314 if (!old_dst)
2315 old_dst = gen_reg_rtx (mode);
2316
e35728c4 2317 switch (model)
2318 {
2319 case MEMMODEL_ACQ_REL:
2320 case MEMMODEL_SEQ_CST:
2321 emit_insn (gen_memory_barrier ());
2322 /* FALLTHRU */
2323 case MEMMODEL_RELAXED:
2324 case MEMMODEL_ACQUIRE:
2325 case MEMMODEL_CONSUME:
2326 if (mode == SImode)
2327 icode = CODE_FOR_fetchadd_acq_si;
2328 else
2329 icode = CODE_FOR_fetchadd_acq_di;
2330 break;
2331 case MEMMODEL_RELEASE:
2332 if (mode == SImode)
2333 icode = CODE_FOR_fetchadd_rel_si;
2334 else
2335 icode = CODE_FOR_fetchadd_rel_di;
2336 break;
2337
2338 default:
2339 gcc_unreachable ();
2340 }
25991aec 2341
25991aec 2342 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2343
2344 if (new_dst)
2345 {
2346 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2347 true, OPTAB_WIDEN);
2348 if (new_reg != new_dst)
2349 emit_move_insn (new_dst, new_reg);
2350 }
2351 return;
2352 }
2353
2354 /* Because of the volatile mem read, we get an ld.acq, which is the
e35728c4 2355 front half of the full barrier. The end half is the cmpxchg.rel.
2356 For relaxed and release memory models, we don't need this. But we
2357 also don't bother trying to prevent it either. */
2358 gcc_assert (model == MEMMODEL_RELAXED
2359 || model == MEMMODEL_RELEASE
2360 || MEM_VOLATILE_P (mem));
25991aec 2361
2362 old_reg = gen_reg_rtx (DImode);
2363 cmp_reg = gen_reg_rtx (DImode);
2364 label = gen_label_rtx ();
2365
2366 if (mode != DImode)
2367 {
2368 val = simplify_gen_subreg (DImode, val, mode, 0);
2369 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2370 }
2371 else
2372 emit_move_insn (cmp_reg, mem);
2373
2374 emit_label (label);
2375
2376 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2377 emit_move_insn (old_reg, cmp_reg);
2378 emit_move_insn (ar_ccv, cmp_reg);
2379
2380 if (old_dst)
2381 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2382
2383 new_reg = cmp_reg;
2384 if (code == NOT)
2385 {
9c57be72 2386 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2387 true, OPTAB_DIRECT);
2388 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
25991aec 2389 }
9c57be72 2390 else
2391 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2392 true, OPTAB_DIRECT);
25991aec 2393
2394 if (mode != DImode)
2395 new_reg = gen_lowpart (mode, new_reg);
2396 if (new_dst)
2397 emit_move_insn (new_dst, new_reg);
2398
e35728c4 2399 switch (model)
25991aec 2400 {
e35728c4 2401 case MEMMODEL_RELAXED:
2402 case MEMMODEL_ACQUIRE:
2403 case MEMMODEL_CONSUME:
2404 switch (mode)
2405 {
2406 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2407 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2408 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2409 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2410 default:
2411 gcc_unreachable ();
2412 }
2413 break;
2414
2415 case MEMMODEL_RELEASE:
2416 case MEMMODEL_ACQ_REL:
2417 case MEMMODEL_SEQ_CST:
2418 switch (mode)
2419 {
2420 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2421 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2422 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2423 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2424 default:
2425 gcc_unreachable ();
2426 }
2427 break;
2428
25991aec 2429 default:
2430 gcc_unreachable ();
2431 }
2432
2433 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2434
533b22f0 2435 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
25991aec 2436}
61788791 2437\f
9b06caff 2438/* Begin the assembly file. */
2439
92c473b8 2440static void
b40da9a7 2441ia64_file_start (void)
92c473b8 2442{
2443 default_file_start ();
2444 emit_safe_across_calls ();
2445}
2446
9b06caff 2447void
b40da9a7 2448emit_safe_across_calls (void)
9b06caff 2449{
2450 unsigned int rs, re;
2451 int out_state;
2452
2453 rs = 1;
2454 out_state = 0;
2455 while (1)
2456 {
2457 while (rs < 64 && call_used_regs[PR_REG (rs)])
2458 rs++;
2459 if (rs >= 64)
2460 break;
2461 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2462 continue;
2463 if (out_state == 0)
2464 {
92c473b8 2465 fputs ("\t.pred.safe_across_calls ", asm_out_file);
9b06caff 2466 out_state = 1;
2467 }
2468 else
92c473b8 2469 fputc (',', asm_out_file);
9b06caff 2470 if (re == rs + 1)
92c473b8 2471 fprintf (asm_out_file, "p%u", rs);
9b06caff 2472 else
92c473b8 2473 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
9b06caff 2474 rs = re + 1;
2475 }
2476 if (out_state)
92c473b8 2477 fputc ('\n', asm_out_file);
9b06caff 2478}
2479
9afff52d 2480/* Globalize a declaration. */
2481
2482static void
2483ia64_globalize_decl_name (FILE * stream, tree decl)
2484{
2485 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2486 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2487 if (version_attr)
2488 {
2489 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2490 const char *p = TREE_STRING_POINTER (v);
2491 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2492 }
2493 targetm.asm_out.globalize_label (stream, name);
2494 if (TREE_CODE (decl) == FUNCTION_DECL)
2495 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2496}
2497
cac50a9f 2498/* Helper function for ia64_compute_frame_size: find an appropriate general
2499 register to spill some special register to. SPECIAL_SPILL_MASK contains
2500 bits in GR0 to GR31 that have already been allocated by this routine.
2501 TRY_LOCALS is true if we should attempt to locate a local regnum. */
ac445222 2502
cac50a9f 2503static int
3072d30e 2504find_gr_spill (enum ia64_frame_regs r, int try_locals)
cac50a9f 2505{
2506 int regno;
2507
3072d30e 2508 if (emitted_frame_related_regs[r] != 0)
2509 {
2510 regno = emitted_frame_related_regs[r];
fa83ecdd 2511 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2512 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
3072d30e 2513 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
d5bf7b64 2514 else if (crtl->is_leaf
3072d30e 2515 && regno >= GR_REG (1) && regno <= GR_REG (31))
2516 current_frame_info.gr_used_mask |= 1 << regno;
2517
2518 return regno;
2519 }
2520
cac50a9f 2521 /* If this is a leaf function, first try an otherwise unused
2522 call-clobbered register. */
d5bf7b64 2523 if (crtl->is_leaf)
cac50a9f 2524 {
2525 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
3072d30e 2526 if (! df_regs_ever_live_p (regno)
cac50a9f 2527 && call_used_regs[regno]
2528 && ! fixed_regs[regno]
2529 && ! global_regs[regno]
3072d30e 2530 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2531 && ! is_emitted (regno))
cac50a9f 2532 {
2533 current_frame_info.gr_used_mask |= 1 << regno;
2534 return regno;
2535 }
2536 }
2537
2538 if (try_locals)
2539 {
2540 regno = current_frame_info.n_local_regs;
d9ba5e6d 2541 /* If there is a frame pointer, then we can't use loc79, because
2542 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2543 reg_name switching code in ia64_expand_prologue. */
fa83ecdd 2544 while (regno < (80 - frame_pointer_needed))
2545 if (! is_emitted (LOC_REG (regno++)))
2546 {
2547 current_frame_info.n_local_regs = regno;
2548 return LOC_REG (regno - 1);
2549 }
cac50a9f 2550 }
2551
2552 /* Failed to find a general register to spill to. Must use stack. */
2553 return 0;
2554}
2555
2556/* In order to make for nice schedules, we try to allocate every temporary
2557 to a different register. We must of course stay away from call-saved,
2558 fixed, and global registers. We must also stay away from registers
2559 allocated in current_frame_info.gr_used_mask, since those include regs
2560 used all through the prologue.
2561
2562 Any register allocated here must be used immediately. The idea is to
2563 aid scheduling, not to solve data flow problems. */
2564
2565static int last_scratch_gr_reg;
2566
2567static int
b40da9a7 2568next_scratch_gr_reg (void)
cac50a9f 2569{
2570 int i, regno;
2571
2572 for (i = 0; i < 32; ++i)
2573 {
2574 regno = (last_scratch_gr_reg + i + 1) & 31;
2575 if (call_used_regs[regno]
2576 && ! fixed_regs[regno]
2577 && ! global_regs[regno]
2578 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2579 {
2580 last_scratch_gr_reg = regno;
2581 return regno;
2582 }
2583 }
2584
2585 /* There must be _something_ available. */
c5c17bca 2586 gcc_unreachable ();
cac50a9f 2587}
2588
2589/* Helper function for ia64_compute_frame_size, called through
2590 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2591
2592static void
b40da9a7 2593mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
ac445222 2594{
cac50a9f 2595 unsigned int regno = REGNO (reg);
2596 if (regno < 32)
18281053 2597 {
ea34d710 2598 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
18281053 2599 for (i = 0; i < n; ++i)
2600 current_frame_info.gr_used_mask |= 1 << (regno + i);
2601 }
ac445222 2602}
2603
3072d30e 2604
ac445222 2605/* Returns the number of bytes offset between the frame pointer and the stack
2606 pointer for the current function. SIZE is the number of bytes of space
2607 needed for local variables. */
cac50a9f 2608
2609static void
b40da9a7 2610ia64_compute_frame_size (HOST_WIDE_INT size)
ac445222 2611{
cac50a9f 2612 HOST_WIDE_INT total_size;
2613 HOST_WIDE_INT spill_size = 0;
2614 HOST_WIDE_INT extra_spill_size = 0;
2615 HOST_WIDE_INT pretend_args_size;
ac445222 2616 HARD_REG_SET mask;
cac50a9f 2617 int n_spilled = 0;
2618 int spilled_gr_p = 0;
2619 int spilled_fr_p = 0;
2620 unsigned int regno;
fa83ecdd 2621 int min_regno;
2622 int max_regno;
cac50a9f 2623 int i;
ac445222 2624
cac50a9f 2625 if (current_frame_info.initialized)
2626 return;
1d20fd7a 2627
cac50a9f 2628 memset (&current_frame_info, 0, sizeof current_frame_info);
ac445222 2629 CLEAR_HARD_REG_SET (mask);
2630
cac50a9f 2631 /* Don't allocate scratches to the return register. */
2632 diddle_return_value (mark_reg_gr_used_mask, NULL);
2633
2634 /* Don't allocate scratches to the EH scratch registers. */
2635 if (cfun->machine->ia64_eh_epilogue_sp)
2636 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2637 if (cfun->machine->ia64_eh_epilogue_bsp)
2638 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
ac445222 2639
28d5c3d9 2640 /* Static stack checking uses r2 and r3. */
2641 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2642 current_frame_info.gr_used_mask |= 0xc;
2643
cac50a9f 2644 /* Find the size of the register stack frame. We have only 80 local
2645 registers, because we reserve 8 for the inputs and 8 for the
2646 outputs. */
2647
2648 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2649 since we'll be adjusting that down later. */
2650 regno = LOC_REG (78) + ! frame_pointer_needed;
2651 for (; regno >= LOC_REG (0); regno--)
3072d30e 2652 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
cac50a9f 2653 break;
2654 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
ac445222 2655
ea0d85ae 2656 /* For functions marked with the syscall_linkage attribute, we must mark
2657 all eight input registers as in use, so that locals aren't visible to
2658 the caller. */
2659
2660 if (cfun->machine->n_varargs > 0
2661 || lookup_attribute ("syscall_linkage",
2662 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
cac50a9f 2663 current_frame_info.n_input_regs = 8;
2664 else
2665 {
2666 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
3072d30e 2667 if (df_regs_ever_live_p (regno))
cac50a9f 2668 break;
2669 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2670 }
2671
2672 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
3072d30e 2673 if (df_regs_ever_live_p (regno))
cac50a9f 2674 break;
2675 i = regno - OUT_REG (0) + 1;
2676
f0ae8b21 2677#ifndef PROFILE_HOOK
cac50a9f 2678 /* When -p profiling, we need one output register for the mcount argument.
1d5a21db 2679 Likewise for -a profiling for the bb_init_func argument. For -ax
cac50a9f 2680 profiling, we need two output registers for the two bb_init_trace_func
2681 arguments. */
18d50ae6 2682 if (crtl->profile)
cac50a9f 2683 i = MAX (i, 1);
f0ae8b21 2684#endif
cac50a9f 2685 current_frame_info.n_output_regs = i;
2686
2687 /* ??? No rotating register support yet. */
2688 current_frame_info.n_rotate_regs = 0;
2689
2690 /* Discover which registers need spilling, and how much room that
b40da9a7 2691 will take. Begin with floating point and general registers,
cac50a9f 2692 which will always wind up on the stack. */
2693
2694 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
3072d30e 2695 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
ac445222 2696 {
2697 SET_HARD_REG_BIT (mask, regno);
cac50a9f 2698 spill_size += 16;
2699 n_spilled += 1;
2700 spilled_fr_p = 1;
ac445222 2701 }
2702
cac50a9f 2703 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
3072d30e 2704 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
ac445222 2705 {
2706 SET_HARD_REG_BIT (mask, regno);
cac50a9f 2707 spill_size += 8;
2708 n_spilled += 1;
2709 spilled_gr_p = 1;
ac445222 2710 }
2711
cac50a9f 2712 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
3072d30e 2713 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
ac445222 2714 {
2715 SET_HARD_REG_BIT (mask, regno);
cac50a9f 2716 spill_size += 8;
2717 n_spilled += 1;
ac445222 2718 }
2719
cac50a9f 2720 /* Now come all special registers that might get saved in other
2721 general registers. */
b40da9a7 2722
cac50a9f 2723 if (frame_pointer_needed)
2724 {
3072d30e 2725 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
b8b145b2 2726 /* If we did not get a register, then we take LOC79. This is guaranteed
2727 to be free, even if regs_ever_live is already set, because this is
2728 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2729 as we don't count loc79 above. */
3072d30e 2730 if (current_frame_info.r[reg_fp] == 0)
b8b145b2 2731 {
3072d30e 2732 current_frame_info.r[reg_fp] = LOC_REG (79);
2733 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
b8b145b2 2734 }
cac50a9f 2735 }
2736
d5bf7b64 2737 if (! crtl->is_leaf)
ac445222 2738 {
cac50a9f 2739 /* Emit a save of BR0 if we call other functions. Do this even
2740 if this function doesn't return, as EH depends on this to be
2741 able to unwind the stack. */
2742 SET_HARD_REG_BIT (mask, BR_REG (0));
2743
3072d30e 2744 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2745 if (current_frame_info.r[reg_save_b0] == 0)
cac50a9f 2746 {
f6eff11b 2747 extra_spill_size += 8;
cac50a9f 2748 n_spilled += 1;
2749 }
2750
2751 /* Similarly for ar.pfs. */
2752 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
3072d30e 2753 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2754 if (current_frame_info.r[reg_save_ar_pfs] == 0)
cac50a9f 2755 {
2756 extra_spill_size += 8;
2757 n_spilled += 1;
2758 }
e13693ec 2759
2760 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2761 registers are clobbered, so we fall back to the stack. */
3072d30e 2762 current_frame_info.r[reg_save_gp]
18d50ae6 2763 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
3072d30e 2764 if (current_frame_info.r[reg_save_gp] == 0)
e13693ec 2765 {
2766 SET_HARD_REG_BIT (mask, GR_REG (1));
2767 spill_size += 8;
2768 n_spilled += 1;
2769 }
ac445222 2770 }
2771 else
cac50a9f 2772 {
3072d30e 2773 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
cac50a9f 2774 {
2775 SET_HARD_REG_BIT (mask, BR_REG (0));
f6eff11b 2776 extra_spill_size += 8;
cac50a9f 2777 n_spilled += 1;
2778 }
3924b08e 2779
3072d30e 2780 if (df_regs_ever_live_p (AR_PFS_REGNUM))
3924b08e 2781 {
2782 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
3072d30e 2783 current_frame_info.r[reg_save_ar_pfs]
2784 = find_gr_spill (reg_save_ar_pfs, 1);
2785 if (current_frame_info.r[reg_save_ar_pfs] == 0)
3924b08e 2786 {
2787 extra_spill_size += 8;
2788 n_spilled += 1;
2789 }
2790 }
cac50a9f 2791 }
ac445222 2792
cac50a9f 2793 /* Unwind descriptor hackery: things are most efficient if we allocate
2794 consecutive GR save registers for RP, PFS, FP in that order. However,
2795 it is absolutely critical that FP get the only hard register that's
2796 guaranteed to be free, so we allocated it first. If all three did
2797 happen to be allocated hard regs, and are consecutive, rearrange them
3072d30e 2798 into the preferred order now.
2799
2800 If we have already emitted code for any of those registers,
2801 then it's already too late to change. */
fa83ecdd 2802 min_regno = MIN (current_frame_info.r[reg_fp],
2803 MIN (current_frame_info.r[reg_save_b0],
2804 current_frame_info.r[reg_save_ar_pfs]));
2805 max_regno = MAX (current_frame_info.r[reg_fp],
2806 MAX (current_frame_info.r[reg_save_b0],
2807 current_frame_info.r[reg_save_ar_pfs]));
2808 if (min_regno > 0
2809 && min_regno + 2 == max_regno
2810 && (current_frame_info.r[reg_fp] == min_regno + 1
2811 || current_frame_info.r[reg_save_b0] == min_regno + 1
2812 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2813 && (emitted_frame_related_regs[reg_save_b0] == 0
2814 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2815 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2816 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2817 && (emitted_frame_related_regs[reg_fp] == 0
2818 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
9641f63c 2819 {
fa83ecdd 2820 current_frame_info.r[reg_save_b0] = min_regno;
2821 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2822 current_frame_info.r[reg_fp] = min_regno + 2;
9641f63c 2823 }
2824
cac50a9f 2825 /* See if we need to store the predicate register block. */
2826 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
3072d30e 2827 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
cac50a9f 2828 break;
2829 if (regno <= PR_REG (63))
ac445222 2830 {
cac50a9f 2831 SET_HARD_REG_BIT (mask, PR_REG (0));
3072d30e 2832 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2833 if (current_frame_info.r[reg_save_pr] == 0)
cac50a9f 2834 {
2835 extra_spill_size += 8;
2836 n_spilled += 1;
2837 }
2838
2839 /* ??? Mark them all as used so that register renaming and such
2840 are free to use them. */
2841 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
3072d30e 2842 df_set_regs_ever_live (regno, true);
ac445222 2843 }
2844
cac50a9f 2845 /* If we're forced to use st8.spill, we're forced to save and restore
3924b08e 2846 ar.unat as well. The check for existing liveness allows inline asm
2847 to touch ar.unat. */
2848 if (spilled_gr_p || cfun->machine->n_varargs
3072d30e 2849 || df_regs_ever_live_p (AR_UNAT_REGNUM))
cac50a9f 2850 {
3072d30e 2851 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
cac50a9f 2852 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
3072d30e 2853 current_frame_info.r[reg_save_ar_unat]
2854 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2855 if (current_frame_info.r[reg_save_ar_unat] == 0)
cac50a9f 2856 {
2857 extra_spill_size += 8;
2858 n_spilled += 1;
2859 }
2860 }
2861
3072d30e 2862 if (df_regs_ever_live_p (AR_LC_REGNUM))
cac50a9f 2863 {
2864 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
3072d30e 2865 current_frame_info.r[reg_save_ar_lc]
2866 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2867 if (current_frame_info.r[reg_save_ar_lc] == 0)
cac50a9f 2868 {
2869 extra_spill_size += 8;
2870 n_spilled += 1;
2871 }
2872 }
2873
2874 /* If we have an odd number of words of pretend arguments written to
2875 the stack, then the FR save area will be unaligned. We round the
2876 size of this area up to keep things 16 byte aligned. */
2877 if (spilled_fr_p)
abe32cce 2878 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
cac50a9f 2879 else
abe32cce 2880 pretend_args_size = crtl->args.pretend_args_size;
cac50a9f 2881
2882 total_size = (spill_size + extra_spill_size + size + pretend_args_size
abe32cce 2883 + crtl->outgoing_args_size);
cac50a9f 2884 total_size = IA64_STACK_ALIGN (total_size);
2885
2886 /* We always use the 16-byte scratch area provided by the caller, but
2887 if we are a leaf function, there's no one to which we need to provide
2888 a scratch area. */
d5bf7b64 2889 if (crtl->is_leaf)
cac50a9f 2890 total_size = MAX (0, total_size - 16);
2891
ac445222 2892 current_frame_info.total_size = total_size;
cac50a9f 2893 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2894 current_frame_info.spill_size = spill_size;
2895 current_frame_info.extra_spill_size = extra_spill_size;
ac445222 2896 COPY_HARD_REG_SET (current_frame_info.mask, mask);
cac50a9f 2897 current_frame_info.n_spilled = n_spilled;
ac445222 2898 current_frame_info.initialized = reload_completed;
cac50a9f 2899}
2900
cd90919d 2901/* Worker function for TARGET_CAN_ELIMINATE. */
2902
2903bool
2904ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2905{
d5bf7b64 2906 return (to == BR_REG (0) ? crtl->is_leaf : true);
cd90919d 2907}
2908
cac50a9f 2909/* Compute the initial difference between the specified pair of registers. */
2910
2911HOST_WIDE_INT
b40da9a7 2912ia64_initial_elimination_offset (int from, int to)
cac50a9f 2913{
2914 HOST_WIDE_INT offset;
2915
2916 ia64_compute_frame_size (get_frame_size ());
2917 switch (from)
2918 {
2919 case FRAME_POINTER_REGNUM:
c5c17bca 2920 switch (to)
cac50a9f 2921 {
c5c17bca 2922 case HARD_FRAME_POINTER_REGNUM:
d5bf7b64 2923 if (crtl->is_leaf)
cac50a9f 2924 offset = -current_frame_info.total_size;
2925 else
2926 offset = -(current_frame_info.total_size
abe32cce 2927 - crtl->outgoing_args_size - 16);
c5c17bca 2928 break;
2929
2930 case STACK_POINTER_REGNUM:
d5bf7b64 2931 if (crtl->is_leaf)
cac50a9f 2932 offset = 0;
2933 else
abe32cce 2934 offset = 16 + crtl->outgoing_args_size;
c5c17bca 2935 break;
2936
2937 default:
2938 gcc_unreachable ();
cac50a9f 2939 }
cac50a9f 2940 break;
ac445222 2941
cac50a9f 2942 case ARG_POINTER_REGNUM:
2943 /* Arguments start above the 16 byte save area, unless stdarg
2944 in which case we store through the 16 byte save area. */
c5c17bca 2945 switch (to)
2946 {
2947 case HARD_FRAME_POINTER_REGNUM:
abe32cce 2948 offset = 16 - crtl->args.pretend_args_size;
c5c17bca 2949 break;
2950
2951 case STACK_POINTER_REGNUM:
2952 offset = (current_frame_info.total_size
abe32cce 2953 + 16 - crtl->args.pretend_args_size);
c5c17bca 2954 break;
2955
2956 default:
2957 gcc_unreachable ();
2958 }
cac50a9f 2959 break;
2960
cac50a9f 2961 default:
c5c17bca 2962 gcc_unreachable ();
cac50a9f 2963 }
2964
2965 return offset;
ac445222 2966}
2967
cac50a9f 2968/* If there are more than a trivial number of register spills, we use
2969 two interleaved iterators so that we can get two memory references
2970 per insn group.
2971
2972 In order to simplify things in the prologue and epilogue expanders,
2973 we use helper functions to fix up the memory references after the
2974 fact with the appropriate offsets to a POST_MODIFY memory mode.
2975 The following data structure tracks the state of the two iterators
2976 while insns are being emitted. */
2977
2978struct spill_fill_data
ac445222 2979{
cb0ccc1e 2980 rtx init_after; /* point at which to emit initializations */
cac50a9f 2981 rtx init_reg[2]; /* initial base register */
2982 rtx iter_reg[2]; /* the iterator registers */
2983 rtx *prev_addr[2]; /* address of last memory use */
50473320 2984 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
cac50a9f 2985 HOST_WIDE_INT prev_off[2]; /* last offset */
2986 int n_iter; /* number of iterators in use */
2987 int next_iter; /* next iterator to use */
2988 unsigned int save_gr_used_mask;
2989};
2990
2991static struct spill_fill_data spill_fill_data;
ac445222 2992
cac50a9f 2993static void
b40da9a7 2994setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
cac50a9f 2995{
2996 int i;
2997
2998 spill_fill_data.init_after = get_last_insn ();
2999 spill_fill_data.init_reg[0] = init_reg;
3000 spill_fill_data.init_reg[1] = init_reg;
3001 spill_fill_data.prev_addr[0] = NULL;
3002 spill_fill_data.prev_addr[1] = NULL;
50473320 3003 spill_fill_data.prev_insn[0] = NULL;
3004 spill_fill_data.prev_insn[1] = NULL;
cac50a9f 3005 spill_fill_data.prev_off[0] = cfa_off;
3006 spill_fill_data.prev_off[1] = cfa_off;
3007 spill_fill_data.next_iter = 0;
3008 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3009
3010 spill_fill_data.n_iter = 1 + (n_spills > 2);
3011 for (i = 0; i < spill_fill_data.n_iter; ++i)
ac445222 3012 {
cac50a9f 3013 int regno = next_scratch_gr_reg ();
3014 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3015 current_frame_info.gr_used_mask |= 1 << regno;
3016 }
3017}
3018
3019static void
b40da9a7 3020finish_spill_pointers (void)
cac50a9f 3021{
3022 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3023}
ac445222 3024
cac50a9f 3025static rtx
b40da9a7 3026spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
cac50a9f 3027{
3028 int iter = spill_fill_data.next_iter;
3029 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3030 rtx disp_rtx = GEN_INT (disp);
3031 rtx mem;
3032
3033 if (spill_fill_data.prev_addr[iter])
3034 {
269f7060 3035 if (satisfies_constraint_N (disp_rtx))
50473320 3036 {
3037 *spill_fill_data.prev_addr[iter]
3038 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3039 gen_rtx_PLUS (DImode,
3040 spill_fill_data.iter_reg[iter],
3041 disp_rtx));
b9c74b4d 3042 add_reg_note (spill_fill_data.prev_insn[iter],
3043 REG_INC, spill_fill_data.iter_reg[iter]);
50473320 3044 }
ac445222 3045 else
3046 {
cac50a9f 3047 /* ??? Could use register post_modify for loads. */
269f7060 3048 if (!satisfies_constraint_I (disp_rtx))
cac50a9f 3049 {
3050 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3051 emit_move_insn (tmp, disp_rtx);
3052 disp_rtx = tmp;
3053 }
3054 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3055 spill_fill_data.iter_reg[iter], disp_rtx));
ac445222 3056 }
cac50a9f 3057 }
3058 /* Micro-optimization: if we've created a frame pointer, it's at
3059 CFA 0, which may allow the real iterator to be initialized lower,
3060 slightly increasing parallelism. Also, if there are few saves
3061 it may eliminate the iterator entirely. */
3062 else if (disp == 0
3063 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3064 && frame_pointer_needed)
3065 {
3066 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ab6ab77e 3067 set_mem_alias_set (mem, get_varargs_alias_set ());
cac50a9f 3068 return mem;
3069 }
3070 else
3071 {
25ffe388 3072 rtx seq, insn;
61788791 3073
cac50a9f 3074 if (disp == 0)
3075 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3076 spill_fill_data.init_reg[iter]);
3077 else
ac445222 3078 {
cac50a9f 3079 start_sequence ();
3080
269f7060 3081 if (!satisfies_constraint_I (disp_rtx))
ac445222 3082 {
cac50a9f 3083 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3084 emit_move_insn (tmp, disp_rtx);
3085 disp_rtx = tmp;
ac445222 3086 }
cac50a9f 3087
3088 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3089 spill_fill_data.init_reg[iter],
3090 disp_rtx));
3091
31d3e01c 3092 seq = get_insns ();
cac50a9f 3093 end_sequence ();
ac445222 3094 }
61788791 3095
cac50a9f 3096 /* Careful for being the first insn in a sequence. */
3097 if (spill_fill_data.init_after)
25ffe388 3098 insn = emit_insn_after (seq, spill_fill_data.init_after);
cac50a9f 3099 else
8cb3ce82 3100 {
3101 rtx first = get_insns ();
3102 if (first)
25ffe388 3103 insn = emit_insn_before (seq, first);
8cb3ce82 3104 else
25ffe388 3105 insn = emit_insn (seq);
8cb3ce82 3106 }
25ffe388 3107 spill_fill_data.init_after = insn;
cac50a9f 3108 }
ac445222 3109
cac50a9f 3110 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
ac445222 3111
cac50a9f 3112 /* ??? Not all of the spills are for varargs, but some of them are.
3113 The rest of the spills belong in an alias set of their own. But
3114 it doesn't actually hurt to include them here. */
ab6ab77e 3115 set_mem_alias_set (mem, get_varargs_alias_set ());
61788791 3116
cac50a9f 3117 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3118 spill_fill_data.prev_off[iter] = cfa_off;
ac445222 3119
cac50a9f 3120 if (++iter >= spill_fill_data.n_iter)
3121 iter = 0;
3122 spill_fill_data.next_iter = iter;
ac445222 3123
cac50a9f 3124 return mem;
3125}
9641f63c 3126
cac50a9f 3127static void
b40da9a7 3128do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3129 rtx frame_reg)
cac50a9f 3130{
50473320 3131 int iter = spill_fill_data.next_iter;
cac50a9f 3132 rtx mem, insn;
9641f63c 3133
cac50a9f 3134 mem = spill_restore_mem (reg, cfa_off);
de4f4740 3135 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
50473320 3136 spill_fill_data.prev_insn[iter] = insn;
9641f63c 3137
cac50a9f 3138 if (frame_reg)
3139 {
3140 rtx base;
3141 HOST_WIDE_INT off;
3142
3143 RTX_FRAME_RELATED_P (insn) = 1;
3144
b40da9a7 3145 /* Don't even pretend that the unwind code can intuit its way
cac50a9f 3146 through a pair of interleaved post_modify iterators. Just
3147 provide the correct answer. */
3148
3149 if (frame_pointer_needed)
3150 {
3151 base = hard_frame_pointer_rtx;
3152 off = - cfa_off;
9641f63c 3153 }
cac50a9f 3154 else
3155 {
3156 base = stack_pointer_rtx;
3157 off = current_frame_info.total_size - cfa_off;
3158 }
3159
585d208e 3160 add_reg_note (insn, REG_CFA_OFFSET,
b9c74b4d 3161 gen_rtx_SET (VOIDmode,
3162 gen_rtx_MEM (GET_MODE (reg),
29c05e22 3163 plus_constant (Pmode,
3164 base, off)),
b9c74b4d 3165 frame_reg));
ac445222 3166 }
3167}
3168
cac50a9f 3169static void
b40da9a7 3170do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
cac50a9f 3171{
50473320 3172 int iter = spill_fill_data.next_iter;
3173 rtx insn;
3174
3175 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3176 GEN_INT (cfa_off)));
3177 spill_fill_data.prev_insn[iter] = insn;
cac50a9f 3178}
3179
de4f4740 3180/* Wrapper functions that discards the CONST_INT spill offset. These
3181 exist so that we can give gr_spill/gr_fill the offset they need and
1d5a21db 3182 use a consistent function interface. */
de4f4740 3183
3184static rtx
b40da9a7 3185gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
de4f4740 3186{
3187 return gen_movdi (dest, src);
3188}
3189
3190static rtx
b40da9a7 3191gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
de4f4740 3192{
3193 return gen_fr_spill (dest, src);
3194}
3195
3196static rtx
b40da9a7 3197gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
de4f4740 3198{
3199 return gen_fr_restore (dest, src);
3200}
ac445222 3201
28d5c3d9 3202#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3203
3204/* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3205#define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3206
3207/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3208 inclusive. These are offsets from the current stack pointer. SOL is the
3209 size of local registers. ??? This clobbers r2 and r3. */
3210
3211static void
3212ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, int sol)
3213{
3214 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3215 of the Register Stack Engine. We also need to probe it after checking
3216 that the 2 stacks don't overlap. */
3217 const int bs_size = BACKING_STORE_SIZE (sol);
3218 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3219 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3220
3221 /* Detect collision of the 2 stacks if necessary. */
3222 if (bs_size > 0 || size > 0)
3223 {
3224 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3225
3226 emit_insn (gen_bsp_value (r3));
3227 emit_move_insn (r2, GEN_INT (-(first + size)));
3228
3229 /* Compare current value of BSP and SP registers. */
3230 emit_insn (gen_rtx_SET (VOIDmode, p6,
3231 gen_rtx_fmt_ee (LTU, BImode,
3232 r3, stack_pointer_rtx)));
3233
3234 /* Compute the address of the probe for the Backing Store (which grows
3235 towards higher addresses). We probe only at the first offset of
3236 the next page because some OS (eg Linux/ia64) only extend the
3237 backing store when this specific address is hit (but generate a SEGV
3238 on other address). Page size is the worst case (4KB). The reserve
3239 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3240 Also compute the address of the last probe for the memory stack
3241 (which grows towards lower addresses). */
675fed0b 3242 emit_insn (gen_rtx_SET (VOIDmode, r3, plus_constant (Pmode, r3, 4095)));
28d5c3d9 3243 emit_insn (gen_rtx_SET (VOIDmode, r2,
3244 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3245
3246 /* Compare them and raise SEGV if the former has topped the latter. */
3247 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3248 gen_rtx_fmt_ee (NE, VOIDmode, p6,
3249 const0_rtx),
3250 gen_rtx_SET (VOIDmode, p6,
3251 gen_rtx_fmt_ee (GEU, BImode,
3252 r3, r2))));
3253 emit_insn (gen_rtx_SET (VOIDmode,
3254 gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3255 const0_rtx),
3256 const0_rtx));
3257 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3258 gen_rtx_fmt_ee (NE, VOIDmode, p6,
3259 const0_rtx),
3260 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3261 GEN_INT (11))));
3262 }
3263
3264 /* Probe the Backing Store if necessary. */
3265 if (bs_size > 0)
3266 emit_stack_probe (r3);
3267
3268 /* Probe the memory stack if necessary. */
3269 if (size == 0)
3270 ;
3271
3272 /* See if we have a constant small number of probes to generate. If so,
3273 that's the easy case. */
3274 else if (size <= PROBE_INTERVAL)
3275 emit_stack_probe (r2);
3276
3277 /* The run-time loop is made up of 8 insns in the generic case while this
3278 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3279 else if (size <= 4 * PROBE_INTERVAL)
3280 {
3281 HOST_WIDE_INT i;
3282
3283 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3284 emit_insn (gen_rtx_SET (VOIDmode, r2,
3285 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3286 emit_stack_probe (r2);
3287
3288 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3289 it exceeds SIZE. If only two probes are needed, this will not
3290 generate any code. Then probe at FIRST + SIZE. */
3291 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3292 {
3293 emit_insn (gen_rtx_SET (VOIDmode, r2,
675fed0b 3294 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
28d5c3d9 3295 emit_stack_probe (r2);
3296 }
3297
3298 emit_insn (gen_rtx_SET (VOIDmode, r2,
675fed0b 3299 plus_constant (Pmode, r2,
28d5c3d9 3300 (i - PROBE_INTERVAL) - size)));
3301 emit_stack_probe (r2);
3302 }
3303
3304 /* Otherwise, do the same as above, but in a loop. Note that we must be
3305 extra careful with variables wrapping around because we might be at
3306 the very top (or the very bottom) of the address space and we have
3307 to be able to handle this case properly; in particular, we use an
3308 equality test for the loop condition. */
3309 else
3310 {
3311 HOST_WIDE_INT rounded_size;
3312
3313 emit_move_insn (r2, GEN_INT (-first));
3314
3315
3316 /* Step 1: round SIZE to the previous multiple of the interval. */
3317
3318 rounded_size = size & -PROBE_INTERVAL;
3319
3320
3321 /* Step 2: compute initial and final value of the loop counter. */
3322
3323 /* TEST_ADDR = SP + FIRST. */
3324 emit_insn (gen_rtx_SET (VOIDmode, r2,
3325 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3326
3327 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3328 if (rounded_size > (1 << 21))
3329 {
3330 emit_move_insn (r3, GEN_INT (-rounded_size));
3331 emit_insn (gen_rtx_SET (VOIDmode, r3, gen_rtx_PLUS (Pmode, r2, r3)));
3332 }
3333 else
3334 emit_insn (gen_rtx_SET (VOIDmode, r3,
3335 gen_rtx_PLUS (Pmode, r2,
3336 GEN_INT (-rounded_size))));
3337
3338
3339 /* Step 3: the loop
3340
3341 while (TEST_ADDR != LAST_ADDR)
3342 {
3343 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3344 probe at TEST_ADDR
3345 }
3346
3347 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3348 until it is equal to ROUNDED_SIZE. */
3349
3350 emit_insn (gen_probe_stack_range (r2, r2, r3));
3351
3352
3353 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3354 that SIZE is equal to ROUNDED_SIZE. */
3355
3356 /* TEMP = SIZE - ROUNDED_SIZE. */
3357 if (size != rounded_size)
3358 {
3359 emit_insn (gen_rtx_SET (VOIDmode, r2,
675fed0b 3360 plus_constant (Pmode, r2,
3361 rounded_size - size)));
28d5c3d9 3362 emit_stack_probe (r2);
3363 }
3364 }
3365
3366 /* Make sure nothing is scheduled before we are done. */
3367 emit_insn (gen_blockage ());
3368}
3369
3370/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3371 absolute addresses. */
3372
3373const char *
3374output_probe_stack_range (rtx reg1, rtx reg2)
3375{
3376 static int labelno = 0;
3377 char loop_lab[32], end_lab[32];
3378 rtx xops[3];
3379
3380 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
3381 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
3382
3383 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3384
3385 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
3386 xops[0] = reg1;
3387 xops[1] = reg2;
3388 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3389 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3390 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [REGNO (xops[2])]);
3391 assemble_name_raw (asm_out_file, end_lab);
3392 fputc ('\n', asm_out_file);
3393
3394 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3395 xops[1] = GEN_INT (-PROBE_INTERVAL);
3396 output_asm_insn ("addl %0 = %1, %0", xops);
3397 fputs ("\t;;\n", asm_out_file);
3398
3399 /* Probe at TEST_ADDR and branch. */
3400 output_asm_insn ("probe.w.fault %0, 0", xops);
3401 fprintf (asm_out_file, "\tbr ");
3402 assemble_name_raw (asm_out_file, loop_lab);
3403 fputc ('\n', asm_out_file);
3404
3405 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
3406
3407 return "";
3408}
3409
ac445222 3410/* Called after register allocation to add any instructions needed for the
3411 prologue. Using a prologue insn is favored compared to putting all of the
17d9b0c3 3412 instructions in output_function_prologue(), since it allows the scheduler
ac445222 3413 to intermix instructions with the saves of the caller saved registers. In
3414 some cases, it might be necessary to emit a barrier instruction as the last
3415 insn to prevent such scheduling.
3416
3417 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
cac50a9f 3418 so that the debug info generation code can handle them properly.
3419
9d75589a 3420 The register save area is laid out like so:
cac50a9f 3421 cfa+16
3422 [ varargs spill area ]
3423 [ fr register spill area ]
3424 [ br register spill area ]
3425 [ ar register spill area ]
3426 [ pr register spill area ]
3427 [ gr register spill area ] */
ac445222 3428
3429/* ??? Get inefficient code when the frame size is larger than can fit in an
3430 adds instruction. */
3431
ac445222 3432void
b40da9a7 3433ia64_expand_prologue (void)
ac445222 3434{
cac50a9f 3435 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3436 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3437 rtx reg, alt_reg;
3438
3439 ia64_compute_frame_size (get_frame_size ());
3440 last_scratch_gr_reg = 15;
3441
8c0dd614 3442 if (flag_stack_usage_info)
990495a7 3443 current_function_static_stack_size = current_frame_info.total_size;
3444
28d5c3d9 3445 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3446 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3447 current_frame_info.total_size,
3448 current_frame_info.n_input_regs
3449 + current_frame_info.n_local_regs);
3450
3072d30e 3451 if (dump_file)
3452 {
3453 fprintf (dump_file, "ia64 frame related registers "
3454 "recorded in current_frame_info.r[]:\n");
3455#define PRINTREG(a) if (current_frame_info.r[a]) \
3456 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3457 PRINTREG(reg_fp);
3458 PRINTREG(reg_save_b0);
3459 PRINTREG(reg_save_pr);
3460 PRINTREG(reg_save_ar_pfs);
3461 PRINTREG(reg_save_ar_unat);
3462 PRINTREG(reg_save_ar_lc);
3463 PRINTREG(reg_save_gp);
3464#undef PRINTREG
3465 }
3466
cac50a9f 3467 /* If there is no epilogue, then we don't need some prologue insns.
3468 We need to avoid emitting the dead prologue insns, because flow
3469 will complain about them. */
ac445222 3470 if (optimize)
3471 {
cac50a9f 3472 edge e;
3eddc647 3473 edge_iterator ei;
cac50a9f 3474
cd665a06 3475 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
ac445222 3476 if ((e->flags & EDGE_FAKE) == 0
3477 && (e->flags & EDGE_FALLTHRU) != 0)
3478 break;
3479 epilogue_p = (e != NULL);
3480 }
3481 else
3482 epilogue_p = 1;
3483
cac50a9f 3484 /* Set the local, input, and output register names. We need to do this
3485 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3486 half. If we use in/loc/out register names, then we get assembler errors
3487 in crtn.S because there is no alloc insn or regstk directive in there. */
3488 if (! TARGET_REG_NAMES)
3489 {
3490 int inputs = current_frame_info.n_input_regs;
3491 int locals = current_frame_info.n_local_regs;
3492 int outputs = current_frame_info.n_output_regs;
3493
3494 for (i = 0; i < inputs; i++)
3495 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3496 for (i = 0; i < locals; i++)
3497 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3498 for (i = 0; i < outputs; i++)
3499 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3500 }
ac445222 3501
cac50a9f 3502 /* Set the frame pointer register name. The regnum is logically loc79,
3503 but of course we'll not have allocated that many locals. Rather than
3504 worrying about renumbering the existing rtxs, we adjust the name. */
d9ba5e6d 3505 /* ??? This code means that we can never use one local register when
3506 there is a frame pointer. loc79 gets wasted in this case, as it is
3507 renamed to a register that will never be used. See also the try_locals
3508 code in find_gr_spill. */
3072d30e 3509 if (current_frame_info.r[reg_fp])
cac50a9f 3510 {
3511 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3512 reg_names[HARD_FRAME_POINTER_REGNUM]
3072d30e 3513 = reg_names[current_frame_info.r[reg_fp]];
3514 reg_names[current_frame_info.r[reg_fp]] = tmp;
cac50a9f 3515 }
ac445222 3516
cac50a9f 3517 /* We don't need an alloc instruction if we've used no outputs or locals. */
3518 if (current_frame_info.n_local_regs == 0
46ebdd6b 3519 && current_frame_info.n_output_regs == 0
abe32cce 3520 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3924b08e 3521 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
cac50a9f 3522 {
3523 /* If there is no alloc, but there are input registers used, then we
3524 need a .regstk directive. */
3525 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3526 ar_pfs_save_reg = NULL_RTX;
3527 }
3528 else
3529 {
3530 current_frame_info.need_regstk = 0;
ac445222 3531
3072d30e 3532 if (current_frame_info.r[reg_save_ar_pfs])
3533 {
3534 regno = current_frame_info.r[reg_save_ar_pfs];
3535 reg_emitted (reg_save_ar_pfs);
3536 }
cac50a9f 3537 else
3538 regno = next_scratch_gr_reg ();
3539 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3540
b40da9a7 3541 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
cac50a9f 3542 GEN_INT (current_frame_info.n_input_regs),
3543 GEN_INT (current_frame_info.n_local_regs),
3544 GEN_INT (current_frame_info.n_output_regs),
3545 GEN_INT (current_frame_info.n_rotate_regs)));
19641ccb 3546 if (current_frame_info.r[reg_save_ar_pfs])
3547 {
3548 RTX_FRAME_RELATED_P (insn) = 1;
3549 add_reg_note (insn, REG_CFA_REGISTER,
3550 gen_rtx_SET (VOIDmode,
3551 ar_pfs_save_reg,
3552 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3553 }
cac50a9f 3554 }
ac445222 3555
cac50a9f 3556 /* Set up frame pointer, stack pointer, and spill iterators. */
ac445222 3557
721513d5 3558 n_varargs = cfun->machine->n_varargs;
cac50a9f 3559 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3560 stack_pointer_rtx, 0);
ac445222 3561
cac50a9f 3562 if (frame_pointer_needed)
3563 {
3564 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3565 RTX_FRAME_RELATED_P (insn) = 1;
585d208e 3566
3567 /* Force the unwind info to recognize this as defining a new CFA,
3568 rather than some temp register setup. */
3569 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
cac50a9f 3570 }
ac445222 3571
cac50a9f 3572 if (current_frame_info.total_size != 0)
3573 {
3574 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3575 rtx offset;
ac445222 3576
269f7060 3577 if (satisfies_constraint_I (frame_size_rtx))
cac50a9f 3578 offset = frame_size_rtx;
3579 else
3580 {
3581 regno = next_scratch_gr_reg ();
b40da9a7 3582 offset = gen_rtx_REG (DImode, regno);
cac50a9f 3583 emit_move_insn (offset, frame_size_rtx);
3584 }
ac445222 3585
cac50a9f 3586 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3587 stack_pointer_rtx, offset));
ac445222 3588
cac50a9f 3589 if (! frame_pointer_needed)
3590 {
3591 RTX_FRAME_RELATED_P (insn) = 1;
585d208e 3592 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3593 gen_rtx_SET (VOIDmode,
3594 stack_pointer_rtx,
3595 gen_rtx_PLUS (DImode,
3596 stack_pointer_rtx,
3597 frame_size_rtx)));
cac50a9f 3598 }
ac445222 3599
cac50a9f 3600 /* ??? At this point we must generate a magic insn that appears to
3601 modify the stack pointer, the frame pointer, and all spill
3602 iterators. This would allow the most scheduling freedom. For
3603 now, just hard stop. */
3604 emit_insn (gen_blockage ());
3605 }
ac445222 3606
cac50a9f 3607 /* Must copy out ar.unat before doing any integer spills. */
3608 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
ac445222 3609 {
3072d30e 3610 if (current_frame_info.r[reg_save_ar_unat])
3611 {
3612 ar_unat_save_reg
3613 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3614 reg_emitted (reg_save_ar_unat);
3615 }
cac50a9f 3616 else
ac445222 3617 {
cac50a9f 3618 alt_regno = next_scratch_gr_reg ();
3619 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3620 current_frame_info.gr_used_mask |= 1 << alt_regno;
ac445222 3621 }
ac445222 3622
cac50a9f 3623 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3624 insn = emit_move_insn (ar_unat_save_reg, reg);
585d208e 3625 if (current_frame_info.r[reg_save_ar_unat])
3626 {
3627 RTX_FRAME_RELATED_P (insn) = 1;
3628 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3629 }
cac50a9f 3630
3631 /* Even if we're not going to generate an epilogue, we still
3632 need to save the register so that EH works. */
3072d30e 3633 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
8ced207e 3634 emit_insn (gen_prologue_use (ar_unat_save_reg));
ac445222 3635 }
3636 else
cac50a9f 3637 ar_unat_save_reg = NULL_RTX;
3638
3639 /* Spill all varargs registers. Do this before spilling any GR registers,
3640 since we want the UNAT bits for the GR registers to override the UNAT
3641 bits from varargs, which we don't care about. */
ac445222 3642
cac50a9f 3643 cfa_off = -16;
3644 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
ac445222 3645 {
cac50a9f 3646 reg = gen_rtx_REG (DImode, regno);
de4f4740 3647 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
ac445222 3648 }
ac445222 3649
cac50a9f 3650 /* Locate the bottom of the register save area. */
3651 cfa_off = (current_frame_info.spill_cfa_off
3652 + current_frame_info.spill_size
3653 + current_frame_info.extra_spill_size);
ac445222 3654
cac50a9f 3655 /* Save the predicate register block either in a register or in memory. */
3656 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3657 {
3658 reg = gen_rtx_REG (DImode, PR_REG (0));
3072d30e 3659 if (current_frame_info.r[reg_save_pr] != 0)
2bd308f0 3660 {
3072d30e 3661 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3662 reg_emitted (reg_save_pr);
cac50a9f 3663 insn = emit_move_insn (alt_reg, reg);
2bd308f0 3664
cac50a9f 3665 /* ??? Denote pr spill/fill by a DImode move that modifies all
3666 64 hard registers. */
2bd308f0 3667 RTX_FRAME_RELATED_P (insn) = 1;
585d208e 3668 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
1a2eb8ab 3669
cac50a9f 3670 /* Even if we're not going to generate an epilogue, we still
3671 need to save the register so that EH works. */
3672 if (! epilogue_p)
8ced207e 3673 emit_insn (gen_prologue_use (alt_reg));
2bd308f0 3674 }
3675 else
cac50a9f 3676 {
3677 alt_regno = next_scratch_gr_reg ();
3678 alt_reg = gen_rtx_REG (DImode, alt_regno);
3679 insn = emit_move_insn (alt_reg, reg);
de4f4740 3680 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
cac50a9f 3681 cfa_off -= 8;
3682 }
ac445222 3683 }
3684
cac50a9f 3685 /* Handle AR regs in numerical order. All of them get special handling. */
3686 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3072d30e 3687 && current_frame_info.r[reg_save_ar_unat] == 0)
ac445222 3688 {
cac50a9f 3689 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
de4f4740 3690 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
cac50a9f 3691 cfa_off -= 8;
ac445222 3692 }
cac50a9f 3693
3694 /* The alloc insn already copied ar.pfs into a general register. The
3695 only thing we have to do now is copy that register to a stack slot
3696 if we'd not allocated a local register for the job. */
3924b08e 3697 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3072d30e 3698 && current_frame_info.r[reg_save_ar_pfs] == 0)
ac445222 3699 {
cac50a9f 3700 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
de4f4740 3701 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
cac50a9f 3702 cfa_off -= 8;
3703 }
3704
3705 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3706 {
3707 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3072d30e 3708 if (current_frame_info.r[reg_save_ar_lc] != 0)
cac50a9f 3709 {
3072d30e 3710 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3711 reg_emitted (reg_save_ar_lc);
cac50a9f 3712 insn = emit_move_insn (alt_reg, reg);
3713 RTX_FRAME_RELATED_P (insn) = 1;
585d208e 3714 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
cac50a9f 3715
3716 /* Even if we're not going to generate an epilogue, we still
3717 need to save the register so that EH works. */
3718 if (! epilogue_p)
8ced207e 3719 emit_insn (gen_prologue_use (alt_reg));
cac50a9f 3720 }
ac445222 3721 else
3722 {
cac50a9f 3723 alt_regno = next_scratch_gr_reg ();
3724 alt_reg = gen_rtx_REG (DImode, alt_regno);
3725 emit_move_insn (alt_reg, reg);
de4f4740 3726 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
cac50a9f 3727 cfa_off -= 8;
3728 }
3729 }
3730
f6eff11b 3731 /* Save the return pointer. */
3732 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3733 {
3734 reg = gen_rtx_REG (DImode, BR_REG (0));
3072d30e 3735 if (current_frame_info.r[reg_save_b0] != 0)
f6eff11b 3736 {
3072d30e 3737 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3738 reg_emitted (reg_save_b0);
f6eff11b 3739 insn = emit_move_insn (alt_reg, reg);
3740 RTX_FRAME_RELATED_P (insn) = 1;
4a5b1b88 3741 add_reg_note (insn, REG_CFA_REGISTER,
3742 gen_rtx_SET (VOIDmode, alt_reg, pc_rtx));
f6eff11b 3743
3744 /* Even if we're not going to generate an epilogue, we still
3745 need to save the register so that EH works. */
3746 if (! epilogue_p)
3747 emit_insn (gen_prologue_use (alt_reg));
3748 }
3749 else
3750 {
3751 alt_regno = next_scratch_gr_reg ();
3752 alt_reg = gen_rtx_REG (DImode, alt_regno);
3753 emit_move_insn (alt_reg, reg);
3754 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3755 cfa_off -= 8;
3756 }
3757 }
3758
3072d30e 3759 if (current_frame_info.r[reg_save_gp])
e13693ec 3760 {
3072d30e 3761 reg_emitted (reg_save_gp);
e13693ec 3762 insn = emit_move_insn (gen_rtx_REG (DImode,
3072d30e 3763 current_frame_info.r[reg_save_gp]),
e13693ec 3764 pic_offset_table_rtx);
e13693ec 3765 }
3766
cac50a9f 3767 /* We should now be at the base of the gr/br/fr spill area. */
c5c17bca 3768 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3769 + current_frame_info.spill_size));
cac50a9f 3770
3771 /* Spill all general registers. */
3772 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3773 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3774 {
3775 reg = gen_rtx_REG (DImode, regno);
3776 do_spill (gen_gr_spill, reg, cfa_off, reg);
3777 cfa_off -= 8;
3778 }
3779
cac50a9f 3780 /* Spill the rest of the BR registers. */
3781 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3782 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3783 {
3784 alt_regno = next_scratch_gr_reg ();
3785 alt_reg = gen_rtx_REG (DImode, alt_regno);
3786 reg = gen_rtx_REG (DImode, regno);
3787 emit_move_insn (alt_reg, reg);
de4f4740 3788 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
cac50a9f 3789 cfa_off -= 8;
3790 }
3791
3792 /* Align the frame and spill all FR registers. */
3793 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3794 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3795 {
c5c17bca 3796 gcc_assert (!(cfa_off & 15));
b8bc42e9 3797 reg = gen_rtx_REG (XFmode, regno);
de4f4740 3798 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
cac50a9f 3799 cfa_off -= 16;
3800 }
3801
c5c17bca 3802 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
cac50a9f 3803
3804 finish_spill_pointers ();
ac445222 3805}
3806
f2507985 3807/* Output the textual info surrounding the prologue. */
3808
3809void
3810ia64_start_function (FILE *file, const char *fnname,
3811 tree decl ATTRIBUTE_UNUSED)
3812{
784576c7 3813#if TARGET_ABI_OPEN_VMS
3814 vms_start_function (fnname);
f2507985 3815#endif
3816
3817 fputs ("\t.proc ", file);
3818 assemble_name (file, fnname);
3819 fputc ('\n', file);
3820 ASM_OUTPUT_LABEL (file, fnname);
3821}
3822
ac445222 3823/* Called after register allocation to add any instructions needed for the
45981c0a 3824 epilogue. Using an epilogue insn is favored compared to putting all of the
17d9b0c3 3825 instructions in output_function_prologue(), since it allows the scheduler
ac445222 3826 to intermix instructions with the saves of the caller saved registers. In
3827 some cases, it might be necessary to emit a barrier instruction as the last
3828 insn to prevent such scheduling. */
3829
3830void
b40da9a7 3831ia64_expand_epilogue (int sibcall_p)
ac445222 3832{
cac50a9f 3833 rtx insn, reg, alt_reg, ar_unat_save_reg;
3834 int regno, alt_regno, cfa_off;
3835
3836 ia64_compute_frame_size (get_frame_size ());
3837
3838 /* If there is a frame pointer, then we use it instead of the stack
3839 pointer, so that the stack pointer does not need to be valid when
3840 the epilogue starts. See EXIT_IGNORE_STACK. */
3841 if (frame_pointer_needed)
3842 setup_spill_pointers (current_frame_info.n_spilled,
3843 hard_frame_pointer_rtx, 0);
3844 else
b40da9a7 3845 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
cac50a9f 3846 current_frame_info.total_size);
3847
3848 if (current_frame_info.total_size != 0)
3849 {
3850 /* ??? At this point we must generate a magic insn that appears to
3851 modify the spill iterators and the frame pointer. This would
3852 allow the most scheduling freedom. For now, just hard stop. */
3853 emit_insn (gen_blockage ());
3854 }
3855
3856 /* Locate the bottom of the register save area. */
3857 cfa_off = (current_frame_info.spill_cfa_off
3858 + current_frame_info.spill_size
3859 + current_frame_info.extra_spill_size);
3860
3861 /* Restore the predicate registers. */
3862 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3863 {
3072d30e 3864 if (current_frame_info.r[reg_save_pr] != 0)
3865 {
3866 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3867 reg_emitted (reg_save_pr);
3868 }
cac50a9f 3869 else
3870 {
3871 alt_regno = next_scratch_gr_reg ();
3872 alt_reg = gen_rtx_REG (DImode, alt_regno);
de4f4740 3873 do_restore (gen_movdi_x, alt_reg, cfa_off);
cac50a9f 3874 cfa_off -= 8;
3875 }
3876 reg = gen_rtx_REG (DImode, PR_REG (0));
3877 emit_move_insn (reg, alt_reg);
3878 }
3879
3880 /* Restore the application registers. */
3881
3882 /* Load the saved unat from the stack, but do not restore it until
3883 after the GRs have been restored. */
3884 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3885 {
3072d30e 3886 if (current_frame_info.r[reg_save_ar_unat] != 0)
3887 {
3888 ar_unat_save_reg
3889 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3890 reg_emitted (reg_save_ar_unat);
3891 }
cac50a9f 3892 else
3893 {
3894 alt_regno = next_scratch_gr_reg ();
3895 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3896 current_frame_info.gr_used_mask |= 1 << alt_regno;
de4f4740 3897 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
cac50a9f 3898 cfa_off -= 8;
3899 }
3900 }
3901 else
3902 ar_unat_save_reg = NULL_RTX;
b40da9a7 3903
3072d30e 3904 if (current_frame_info.r[reg_save_ar_pfs] != 0)
cac50a9f 3905 {
3072d30e 3906 reg_emitted (reg_save_ar_pfs);
3907 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
cac50a9f 3908 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3909 emit_move_insn (reg, alt_reg);
3910 }
7d8b6742 3911 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
ac445222 3912 {
cac50a9f 3913 alt_regno = next_scratch_gr_reg ();
3914 alt_reg = gen_rtx_REG (DImode, alt_regno);
de4f4740 3915 do_restore (gen_movdi_x, alt_reg, cfa_off);
cac50a9f 3916 cfa_off -= 8;
3917 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3918 emit_move_insn (reg, alt_reg);
3919 }
3920
3921 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3922 {
3072d30e 3923 if (current_frame_info.r[reg_save_ar_lc] != 0)
3924 {
3925 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3926 reg_emitted (reg_save_ar_lc);
3927 }
cac50a9f 3928 else
3929 {
3930 alt_regno = next_scratch_gr_reg ();
3931 alt_reg = gen_rtx_REG (DImode, alt_regno);
de4f4740 3932 do_restore (gen_movdi_x, alt_reg, cfa_off);
cac50a9f 3933 cfa_off -= 8;
3934 }
3935 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3936 emit_move_insn (reg, alt_reg);
3937 }
3938
f6eff11b 3939 /* Restore the return pointer. */
3940 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3941 {
3072d30e 3942 if (current_frame_info.r[reg_save_b0] != 0)
3943 {
3944 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3945 reg_emitted (reg_save_b0);
3946 }
f6eff11b 3947 else
3948 {
3949 alt_regno = next_scratch_gr_reg ();
3950 alt_reg = gen_rtx_REG (DImode, alt_regno);
3951 do_restore (gen_movdi_x, alt_reg, cfa_off);
3952 cfa_off -= 8;
3953 }
3954 reg = gen_rtx_REG (DImode, BR_REG (0));
3955 emit_move_insn (reg, alt_reg);
3956 }
3957
cac50a9f 3958 /* We should now be at the base of the gr/br/fr spill area. */
c5c17bca 3959 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3960 + current_frame_info.spill_size));
cac50a9f 3961
e13693ec 3962 /* The GP may be stored on the stack in the prologue, but it's
3963 never restored in the epilogue. Skip the stack slot. */
3964 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3965 cfa_off -= 8;
3966
cac50a9f 3967 /* Restore all general registers. */
e13693ec 3968 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
cac50a9f 3969 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
7c2f467a 3970 {
cac50a9f 3971 reg = gen_rtx_REG (DImode, regno);
3972 do_restore (gen_gr_restore, reg, cfa_off);
3973 cfa_off -= 8;
7c2f467a 3974 }
b40da9a7 3975
f6eff11b 3976 /* Restore the branch registers. */
cac50a9f 3977 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3978 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
7c2f467a 3979 {
cac50a9f 3980 alt_regno = next_scratch_gr_reg ();
3981 alt_reg = gen_rtx_REG (DImode, alt_regno);
de4f4740 3982 do_restore (gen_movdi_x, alt_reg, cfa_off);
cac50a9f 3983 cfa_off -= 8;
3984 reg = gen_rtx_REG (DImode, regno);
3985 emit_move_insn (reg, alt_reg);
3986 }
ac445222 3987
cac50a9f 3988 /* Restore floating point registers. */
3989 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3990 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3991 {
c5c17bca 3992 gcc_assert (!(cfa_off & 15));
b8bc42e9 3993 reg = gen_rtx_REG (XFmode, regno);
de4f4740 3994 do_restore (gen_fr_restore_x, reg, cfa_off);
cac50a9f 3995 cfa_off -= 16;
7c2f467a 3996 }
cac50a9f 3997
3998 /* Restore ar.unat for real. */
3999 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4000 {
4001 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4002 emit_move_insn (reg, ar_unat_save_reg);
ac445222 4003 }
4004
c5c17bca 4005 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
cac50a9f 4006
4007 finish_spill_pointers ();
ac445222 4008
26f19261 4009 if (current_frame_info.total_size
4010 || cfun->machine->ia64_eh_epilogue_sp
4011 || frame_pointer_needed)
cac50a9f 4012 {
4013 /* ??? At this point we must generate a magic insn that appears to
4014 modify the spill iterators, the stack pointer, and the frame
4015 pointer. This would allow the most scheduling freedom. For now,
4016 just hard stop. */
4017 emit_insn (gen_blockage ());
4018 }
ac445222 4019
cac50a9f 4020 if (cfun->machine->ia64_eh_epilogue_sp)
4021 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4022 else if (frame_pointer_needed)
4023 {
4024 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4025 RTX_FRAME_RELATED_P (insn) = 1;
585d208e 4026 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
cac50a9f 4027 }
4028 else if (current_frame_info.total_size)
7c2f467a 4029 {
cac50a9f 4030 rtx offset, frame_size_rtx;
4031
4032 frame_size_rtx = GEN_INT (current_frame_info.total_size);
269f7060 4033 if (satisfies_constraint_I (frame_size_rtx))
cac50a9f 4034 offset = frame_size_rtx;
4035 else
4036 {
4037 regno = next_scratch_gr_reg ();
4038 offset = gen_rtx_REG (DImode, regno);
4039 emit_move_insn (offset, frame_size_rtx);
4040 }
4041
4042 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4043 offset));
4044
4045 RTX_FRAME_RELATED_P (insn) = 1;
585d208e 4046 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4047 gen_rtx_SET (VOIDmode,
4048 stack_pointer_rtx,
4049 gen_rtx_PLUS (DImode,
4050 stack_pointer_rtx,
4051 frame_size_rtx)));
7c2f467a 4052 }
cac50a9f 4053
4054 if (cfun->machine->ia64_eh_epilogue_bsp)
4055 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
b40da9a7 4056
46ebdd6b 4057 if (! sibcall_p)
4058 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
83b1a843 4059 else
449c0509 4060 {
4061 int fp = GR_REG (2);
585d208e 4062 /* We need a throw away register here, r0 and r1 are reserved,
4063 so r2 is the first available call clobbered register. If
4064 there was a frame_pointer register, we may have swapped the
4065 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4066 sure we're using the string "r2" when emitting the register
4067 name for the assembler. */
3072d30e 4068 if (current_frame_info.r[reg_fp]
4069 && current_frame_info.r[reg_fp] == GR_REG (2))
449c0509 4070 fp = HARD_FRAME_POINTER_REGNUM;
4071
4072 /* We must emit an alloc to force the input registers to become output
4073 registers. Otherwise, if the callee tries to pass its parameters
4074 through to another call without an intervening alloc, then these
4075 values get lost. */
4076 /* ??? We don't need to preserve all input registers. We only need to
4077 preserve those input registers used as arguments to the sibling call.
4078 It is unclear how to compute that number here. */
4079 if (current_frame_info.n_input_regs != 0)
ef4c30a3 4080 {
4081 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
42198dc2 4082
ef4c30a3 4083 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4084 const0_rtx, const0_rtx,
4085 n_inputs, const0_rtx));
4086 RTX_FRAME_RELATED_P (insn) = 1;
42198dc2 4087
4088 /* ??? We need to mark the alloc as frame-related so that it gets
4089 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4090 But there's nothing dwarf2 related to be done wrt the register
4091 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4092 the empty parallel means dwarf2out will not see anything. */
4093 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4094 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
ef4c30a3 4095 }
449c0509 4096 }
ac445222 4097}
4098
cac50a9f 4099/* Return 1 if br.ret can do all the work required to return from a
4100 function. */
4101
4102int
b40da9a7 4103ia64_direct_return (void)
cac50a9f 4104{
4105 if (reload_completed && ! frame_pointer_needed)
4106 {
4107 ia64_compute_frame_size (get_frame_size ());
4108
4109 return (current_frame_info.total_size == 0
4110 && current_frame_info.n_spilled == 0
3072d30e 4111 && current_frame_info.r[reg_save_b0] == 0
4112 && current_frame_info.r[reg_save_pr] == 0
4113 && current_frame_info.r[reg_save_ar_pfs] == 0
4114 && current_frame_info.r[reg_save_ar_unat] == 0
4115 && current_frame_info.r[reg_save_ar_lc] == 0);
cac50a9f 4116 }
4117 return 0;
4118}
4119
f6b73c1b 4120/* Return the magic cookie that we use to hold the return address
4121 during early compilation. */
4122
4123rtx
b40da9a7 4124ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
f6b73c1b 4125{
4126 if (count != 0)
4127 return NULL;
4128 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4129}
4130
4131/* Split this value after reload, now that we know where the return
4132 address is saved. */
4133
4134void
b40da9a7 4135ia64_split_return_addr_rtx (rtx dest)
f6b73c1b 4136{
4137 rtx src;
4138
4139 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4140 {
3072d30e 4141 if (current_frame_info.r[reg_save_b0] != 0)
4142 {
4143 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4144 reg_emitted (reg_save_b0);
4145 }
f6b73c1b 4146 else
4147 {
4148 HOST_WIDE_INT off;
4149 unsigned int regno;
269f7060 4150 rtx off_r;
f6b73c1b 4151
4152 /* Compute offset from CFA for BR0. */
4153 /* ??? Must be kept in sync with ia64_expand_prologue. */
4154 off = (current_frame_info.spill_cfa_off
4155 + current_frame_info.spill_size);
4156 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4157 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4158 off -= 8;
4159
4160 /* Convert CFA offset to a register based offset. */
4161 if (frame_pointer_needed)
4162 src = hard_frame_pointer_rtx;
4163 else
4164 {
4165 src = stack_pointer_rtx;
4166 off += current_frame_info.total_size;
4167 }
4168
4169 /* Load address into scratch register. */
269f7060 4170 off_r = GEN_INT (off);
4171 if (satisfies_constraint_I (off_r))
4172 emit_insn (gen_adddi3 (dest, src, off_r));
f6b73c1b 4173 else
4174 {
269f7060 4175 emit_move_insn (dest, off_r);
f6b73c1b 4176 emit_insn (gen_adddi3 (dest, src, dest));
4177 }
4178
4179 src = gen_rtx_MEM (Pmode, dest);
4180 }
4181 }
4182 else
4183 src = gen_rtx_REG (DImode, BR_REG (0));
4184
4185 emit_move_insn (dest, src);
4186}
4187
93689e16 4188int
b40da9a7 4189ia64_hard_regno_rename_ok (int from, int to)
93689e16 4190{
4191 /* Don't clobber any of the registers we reserved for the prologue. */
9f1b7d17 4192 unsigned int r;
93689e16 4193
3072d30e 4194 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4195 if (to == current_frame_info.r[r]
4196 || from == current_frame_info.r[r]
4197 || to == emitted_frame_related_regs[r]
4198 || from == emitted_frame_related_regs[r])
4199 return 0;
a0bb0d20 4200
93689e16 4201 /* Don't use output registers outside the register frame. */
4202 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4203 return 0;
4204
4205 /* Retain even/oddness on predicate register pairs. */
4206 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4207 return (from & 1) == (to & 1);
4208
4209 return 1;
4210}
4211
58356836 4212/* Target hook for assembling integer objects. Handle word-sized
4213 aligned objects and detect the cases when @fptr is needed. */
4214
4215static bool
b40da9a7 4216ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
58356836 4217{
f7d1f9d4 4218 if (size == POINTER_SIZE / BITS_PER_UNIT
58356836 4219 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4220 && GET_CODE (x) == SYMBOL_REF
c50e596a 4221 && SYMBOL_REF_FUNCTION_P (x))
58356836 4222 {
dc5a301a 4223 static const char * const directive[2][2] = {
4224 /* 64-bit pointer */ /* 32-bit pointer */
4225 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4226 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4227 };
4228 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
58356836 4229 output_addr_const (asm_out_file, x);
4230 fputs (")\n", asm_out_file);
4231 return true;
4232 }
4233 return default_assemble_integer (x, size, aligned_p);
4234}
4235
ac445222 4236/* Emit the function prologue. */
4237
17d9b0c3 4238static void
b40da9a7 4239ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
ac445222 4240{
cac50a9f 4241 int mask, grsave, grsave_prev;
4242
4243 if (current_frame_info.need_regstk)
4244 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4245 current_frame_info.n_input_regs,
4246 current_frame_info.n_local_regs,
4247 current_frame_info.n_output_regs,
4248 current_frame_info.n_rotate_regs);
ac445222 4249
b213bf24 4250 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
7c2f467a 4251 return;
4252
cac50a9f 4253 /* Emit the .prologue directive. */
61788791 4254
cac50a9f 4255 mask = 0;
4256 grsave = grsave_prev = 0;
3072d30e 4257 if (current_frame_info.r[reg_save_b0] != 0)
7c2f467a 4258 {
cac50a9f 4259 mask |= 8;
3072d30e 4260 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
cac50a9f 4261 }
3072d30e 4262 if (current_frame_info.r[reg_save_ar_pfs] != 0
cac50a9f 4263 && (grsave_prev == 0
3072d30e 4264 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
cac50a9f 4265 {
4266 mask |= 4;
4267 if (grsave_prev == 0)
3072d30e 4268 grsave = current_frame_info.r[reg_save_ar_pfs];
4269 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
7c2f467a 4270 }
3072d30e 4271 if (current_frame_info.r[reg_fp] != 0
cac50a9f 4272 && (grsave_prev == 0
3072d30e 4273 || current_frame_info.r[reg_fp] == grsave_prev + 1))
cac50a9f 4274 {
4275 mask |= 2;
4276 if (grsave_prev == 0)
4277 grsave = HARD_FRAME_POINTER_REGNUM;
3072d30e 4278 grsave_prev = current_frame_info.r[reg_fp];
cac50a9f 4279 }
3072d30e 4280 if (current_frame_info.r[reg_save_pr] != 0
cac50a9f 4281 && (grsave_prev == 0
3072d30e 4282 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
cac50a9f 4283 {
4284 mask |= 1;
4285 if (grsave_prev == 0)
3072d30e 4286 grsave = current_frame_info.r[reg_save_pr];
cac50a9f 4287 }
4288
66c17c96 4289 if (mask && TARGET_GNU_AS)
cac50a9f 4290 fprintf (file, "\t.prologue %d, %d\n", mask,
4291 ia64_dbx_register_number (grsave));
4292 else
4293 fputs ("\t.prologue\n", file);
4294
4295 /* Emit a .spill directive, if necessary, to relocate the base of
4296 the register spill area. */
4297 if (current_frame_info.spill_cfa_off != -16)
4298 fprintf (file, "\t.spill %ld\n",
4299 (long) (current_frame_info.spill_cfa_off
4300 + current_frame_info.spill_size));
ac445222 4301}
4302
2517dcd5 4303/* Emit the .body directive at the scheduled end of the prologue. */
4304
85ae73e8 4305static void
b40da9a7 4306ia64_output_function_end_prologue (FILE *file)
2517dcd5 4307{
b213bf24 4308 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
2517dcd5 4309 return;
4310
4311 fputs ("\t.body\n", file);
4312}
4313
ac445222 4314/* Emit the function epilogue. */
4315
17d9b0c3 4316static void
b40da9a7 4317ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4318 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
ac445222 4319{
57420024 4320 int i;
4321
3072d30e 4322 if (current_frame_info.r[reg_fp])
cac50a9f 4323 {
4324 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4325 reg_names[HARD_FRAME_POINTER_REGNUM]
3072d30e 4326 = reg_names[current_frame_info.r[reg_fp]];
4327 reg_names[current_frame_info.r[reg_fp]] = tmp;
4328 reg_emitted (reg_fp);
cac50a9f 4329 }
4330 if (! TARGET_REG_NAMES)
4331 {
cac50a9f 4332 for (i = 0; i < current_frame_info.n_input_regs; i++)
4333 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4334 for (i = 0; i < current_frame_info.n_local_regs; i++)
4335 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4336 for (i = 0; i < current_frame_info.n_output_regs; i++)
4337 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4338 }
57420024 4339
cac50a9f 4340 current_frame_info.initialized = 0;
4341}
ac445222 4342
4343int
b40da9a7 4344ia64_dbx_register_number (int regno)
ac445222 4345{
cac50a9f 4346 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4347 from its home at loc79 to something inside the register frame. We
4348 must perform the same renumbering here for the debug info. */
3072d30e 4349 if (current_frame_info.r[reg_fp])
cac50a9f 4350 {
4351 if (regno == HARD_FRAME_POINTER_REGNUM)
3072d30e 4352 regno = current_frame_info.r[reg_fp];
4353 else if (regno == current_frame_info.r[reg_fp])
cac50a9f 4354 regno = HARD_FRAME_POINTER_REGNUM;
4355 }
4356
4357 if (IN_REGNO_P (regno))
4358 return 32 + regno - IN_REG (0);
4359 else if (LOC_REGNO_P (regno))
4360 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4361 else if (OUT_REGNO_P (regno))
4362 return (32 + current_frame_info.n_input_regs
4363 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4364 else
4365 return regno;
ac445222 4366}
4367
7193d01d 4368/* Implement TARGET_TRAMPOLINE_INIT.
4369
4370 The trampoline should set the static chain pointer to value placed
4371 into the trampoline and should branch to the specified routine.
4372 To make the normal indirect-subroutine calling convention work,
4373 the trampoline must look like a function descriptor; the first
4374 word being the target address and the second being the target's
4375 global pointer.
4376
4377 We abuse the concept of a global pointer by arranging for it
4378 to point to the data we need to load. The complete trampoline
4379 has the following form:
4380
4381 +-------------------+ \
4382 TRAMP: | __ia64_trampoline | |
4383 +-------------------+ > fake function descriptor
4384 | TRAMP+16 | |
4385 +-------------------+ /
4386 | target descriptor |
4387 +-------------------+
4388 | static link |
4389 +-------------------+
4390*/
4391
4392static void
4393ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
cac50a9f 4394{
7193d01d 4395 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4396 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
cac50a9f 4397
66c17c96 4398 /* The Intel assembler requires that the global __ia64_trampoline symbol
4399 be declared explicitly */
4400 if (!TARGET_GNU_AS)
4401 {
4402 static bool declared_ia64_trampoline = false;
4403
4404 if (!declared_ia64_trampoline)
4405 {
4406 declared_ia64_trampoline = true;
f7d1f9d4 4407 (*targetm.asm_out.globalize_label) (asm_out_file,
4408 "__ia64_trampoline");
66c17c96 4409 }
4410 }
4411
3dce56cc 4412 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
7193d01d 4413 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
3dce56cc 4414 fnaddr = convert_memory_address (Pmode, fnaddr);
4415 static_chain = convert_memory_address (Pmode, static_chain);
4416
cac50a9f 4417 /* Load up our iterator. */
7193d01d 4418 addr_reg = copy_to_reg (addr);
4419 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
cac50a9f 4420
4421 /* The first two words are the fake descriptor:
4422 __ia64_trampoline, ADDR+16. */
8b8d3752 4423 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4424 if (TARGET_ABI_OPEN_VMS)
4425 {
4426 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4427 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4428 relocation against function symbols to make it identical to the
4429 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4430 strict ELF and dereference to get the bare code address. */
4431 rtx reg = gen_reg_rtx (Pmode);
4432 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4433 emit_move_insn (reg, tramp);
4434 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4435 tramp = reg;
4436 }
7193d01d 4437 emit_move_insn (m_tramp, tramp);
cac50a9f 4438 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
7193d01d 4439 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
cac50a9f 4440
29c05e22 4441 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
cac50a9f 4442 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
7193d01d 4443 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
cac50a9f 4444
4445 /* The third word is the target descriptor. */
7193d01d 4446 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
cac50a9f 4447 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
7193d01d 4448 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
cac50a9f 4449
4450 /* The fourth word is the static chain. */
7193d01d 4451 emit_move_insn (m_tramp, static_chain);
cac50a9f 4452}
ac445222 4453\f
4454/* Do any needed setup for a variadic function. CUM has not been updated
cac50a9f 4455 for the last named argument which has type TYPE and mode MODE.
4456
4457 We generate the actual spill instructions during prologue generation. */
4458
ac9a2599 4459static void
39cba157 4460ia64_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
ac9a2599 4461 tree type, int * pretend_size,
b40da9a7 4462 int second_time ATTRIBUTE_UNUSED)
ac445222 4463{
39cba157 4464 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
ac9a2599 4465
7ccc713a 4466 /* Skip the current argument. */
39cba157 4467 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
ac445222 4468
ac9a2599 4469 if (next_cum.words < MAX_ARGUMENT_SLOTS)
721513d5 4470 {
ac9a2599 4471 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
721513d5 4472 *pretend_size = n * UNITS_PER_WORD;
4473 cfun->machine->n_varargs = n;
4474 }
ac445222 4475}
4476
4477/* Check whether TYPE is a homogeneous floating point aggregate. If
4478 it is, return the mode of the floating point type that appears
4479 in all leafs. If it is not, return VOIDmode.
4480
4481 An aggregate is a homogeneous floating point aggregate is if all
4482 fields/elements in it have the same floating point type (e.g,
372bd947 4483 SFmode). 128-bit quad-precision floats are excluded.
4484
4485 Variable sized aggregates should never arrive here, since we should
4486 have already decided to pass them by reference. Top-level zero-sized
4487 aggregates are excluded because our parallels crash the middle-end. */
ac445222 4488
4489static enum machine_mode
fb80456a 4490hfa_element_mode (const_tree type, bool nested)
ac445222 4491{
4492 enum machine_mode element_mode = VOIDmode;
4493 enum machine_mode mode;
4494 enum tree_code code = TREE_CODE (type);
4495 int know_element_mode = 0;
4496 tree t;
4497
372bd947 4498 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4499 return VOIDmode;
4500
ac445222 4501 switch (code)
4502 {
4503 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
dfa42b3f 4504 case BOOLEAN_TYPE: case POINTER_TYPE:
ac445222 4505 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
a6d108f6 4506 case LANG_TYPE: case FUNCTION_TYPE:
ac445222 4507 return VOIDmode;
4508
4509 /* Fortran complex types are supposed to be HFAs, so we need to handle
4510 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4511 types though. */
4512 case COMPLEX_TYPE:
83186412 4513 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
b8bc42e9 4514 && TYPE_MODE (type) != TCmode)
4515 return GET_MODE_INNER (TYPE_MODE (type));
ac445222 4516 else
4517 return VOIDmode;
4518
4519 case REAL_TYPE:
4520 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4521 mode if this is contained within an aggregate. */
b8bc42e9 4522 if (nested && TYPE_MODE (type) != TFmode)
ac445222 4523 return TYPE_MODE (type);
4524 else
4525 return VOIDmode;
4526
4527 case ARRAY_TYPE:
0beeb343 4528 return hfa_element_mode (TREE_TYPE (type), 1);
ac445222 4529
4530 case RECORD_TYPE:
4531 case UNION_TYPE:
4532 case QUAL_UNION_TYPE:
1767a056 4533 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
ac445222 4534 {
4535 if (TREE_CODE (t) != FIELD_DECL)
4536 continue;
4537
4538 mode = hfa_element_mode (TREE_TYPE (t), 1);
4539 if (know_element_mode)
4540 {
4541 if (mode != element_mode)
4542 return VOIDmode;
4543 }
4544 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4545 return VOIDmode;
4546 else
4547 {
4548 know_element_mode = 1;
4549 element_mode = mode;
4550 }
4551 }
4552 return element_mode;
4553
4554 default:
4555 /* If we reach here, we probably have some front-end specific type
4556 that the backend doesn't know about. This can happen via the
4557 aggregate_value_p call in init_function_start. All we can do is
4558 ignore unknown tree types. */
4559 return VOIDmode;
4560 }
4561
4562 return VOIDmode;
4563}
4564
223b25f9 4565/* Return the number of words required to hold a quantity of TYPE and MODE
4566 when passed as an argument. */
4567static int
4bac51c9 4568ia64_function_arg_words (const_tree type, enum machine_mode mode)
223b25f9 4569{
4570 int words;
4571
4572 if (mode == BLKmode)
4573 words = int_size_in_bytes (type);
4574 else
4575 words = GET_MODE_SIZE (mode);
4576
4577 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4578}
4579
4580/* Return the number of registers that should be skipped so the current
4581 argument (described by TYPE and WORDS) will be properly aligned.
4582
4583 Integer and float arguments larger than 8 bytes start at the next
4584 even boundary. Aggregates larger than 8 bytes start at the next
4585 even boundary if the aggregate has 16 byte alignment. Note that
4586 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4587 but are still to be aligned in registers.
4588
4589 ??? The ABI does not specify how to handle aggregates with
4590 alignment from 9 to 15 bytes, or greater than 16. We handle them
4591 all as if they had 16 byte alignment. Such aggregates can occur
4592 only if gcc extensions are used. */
4593static int
4bac51c9 4594ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4595 const_tree type, int words)
223b25f9 4596{
8b8d3752 4597 /* No registers are skipped on VMS. */
4598 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
223b25f9 4599 return 0;
4600
4601 if (type
4602 && TREE_CODE (type) != INTEGER_TYPE
4603 && TREE_CODE (type) != REAL_TYPE)
4604 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4605 else
4606 return words > 1;
4607}
4608
ac445222 4609/* Return rtx for register where argument is passed, or zero if it is passed
4610 on the stack. */
ac445222 4611/* ??? 128-bit quad-precision floats are always passed in general
4612 registers. */
4613
4bac51c9 4614static rtx
39cba157 4615ia64_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
4bac51c9 4616 const_tree type, bool named, bool incoming)
ac445222 4617{
39cba157 4618 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4619
ac445222 4620 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
223b25f9 4621 int words = ia64_function_arg_words (type, mode);
4622 int offset = ia64_function_arg_offset (cum, type, words);
ac445222 4623 enum machine_mode hfa_mode = VOIDmode;
4624
8b8d3752 4625 /* For OPEN VMS, emit the instruction setting up the argument register here,
4626 when we know this will be together with the other arguments setup related
4627 insns. This is not the conceptually best place to do this, but this is
4628 the easiest as we have convenient access to cumulative args info. */
4629
4630 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4631 && named == 1)
4632 {
4633 unsigned HOST_WIDE_INT regval = cum->words;
4634 int i;
4635
4636 for (i = 0; i < 8; i++)
4637 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4638
4639 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4640 GEN_INT (regval));
4641 }
4642
ac445222 4643 /* If all argument slots are used, then it must go on the stack. */
4644 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4645 return 0;
4646
6b6253f2 4647 /* On OpenVMS argument is either in Rn or Fn. */
4648 if (TARGET_ABI_OPEN_VMS)
4649 {
4650 if (FLOAT_MODE_P (mode))
4651 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4652 else
4653 return gen_rtx_REG (mode, basereg + cum->words);
4654 }
4655
ac445222 4656 /* Check for and handle homogeneous FP aggregates. */
4657 if (type)
4658 hfa_mode = hfa_element_mode (type, 0);
4659
4660 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4661 and unprototyped hfas are passed specially. */
4662 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4663 {
4664 rtx loc[16];
4665 int i = 0;
4666 int fp_regs = cum->fp_regs;
4667 int int_regs = cum->words + offset;
4668 int hfa_size = GET_MODE_SIZE (hfa_mode);
4669 int byte_size;
4670 int args_byte_size;
4671
4672 /* If prototyped, pass it in FR regs then GR regs.
4673 If not prototyped, pass it in both FR and GR regs.
4674
4675 If this is an SFmode aggregate, then it is possible to run out of
4676 FR regs while GR regs are still left. In that case, we pass the
4677 remaining part in the GR regs. */
4678
4679 /* Fill the FP regs. We do this always. We stop if we reach the end
4680 of the argument, the last FP register, or the last argument slot. */
4681
4682 byte_size = ((mode == BLKmode)
4683 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4684 args_byte_size = int_regs * UNITS_PER_WORD;
4685 offset = 0;
4686 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4687 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4688 {
4689 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4690 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4691 + fp_regs)),
4692 GEN_INT (offset));
ac445222 4693 offset += hfa_size;
4694 args_byte_size += hfa_size;
4695 fp_regs++;
4696 }
4697
4698 /* If no prototype, then the whole thing must go in GR regs. */
4699 if (! cum->prototype)
4700 offset = 0;
4701 /* If this is an SFmode aggregate, then we might have some left over
4702 that needs to go in GR regs. */
4703 else if (byte_size != offset)
4704 int_regs += offset / UNITS_PER_WORD;
4705
4706 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4707
4708 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4709 {
4710 enum machine_mode gr_mode = DImode;
4d4b27e7 4711 unsigned int gr_size;
ac445222 4712
4713 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4714 then this goes in a GR reg left adjusted/little endian, right
4715 adjusted/big endian. */
4716 /* ??? Currently this is handled wrong, because 4-byte hunks are
4717 always right adjusted/little endian. */
4718 if (offset & 0x4)
4719 gr_mode = SImode;
4720 /* If we have an even 4 byte hunk because the aggregate is a
4721 multiple of 4 bytes in size, then this goes in a GR reg right
4722 adjusted/little endian. */
4723 else if (byte_size - offset == 4)
4724 gr_mode = SImode;
4725
4726 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4727 gen_rtx_REG (gr_mode, (basereg
4728 + int_regs)),
4729 GEN_INT (offset));
4d4b27e7 4730
4731 gr_size = GET_MODE_SIZE (gr_mode);
4732 offset += gr_size;
4733 if (gr_size == UNITS_PER_WORD
4734 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4735 int_regs++;
4736 else if (gr_size > UNITS_PER_WORD)
4737 int_regs += gr_size / UNITS_PER_WORD;
ac445222 4738 }
908e9a95 4739 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
ac445222 4740 }
8b8d3752 4741
ac445222 4742 /* Integral and aggregates go in general registers. If we have run out of
4743 FR registers, then FP values must also go in general registers. This can
4744 happen when we have a SFmode HFA. */
b8bc42e9 4745 else if (mode == TFmode || mode == TCmode
4746 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
1ffddaaa 4747 {
4748 int byte_size = ((mode == BLKmode)
4749 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4750 if (BYTES_BIG_ENDIAN
4751 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4752 && byte_size < UNITS_PER_WORD
4753 && byte_size > 0)
4754 {
4755 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4756 gen_rtx_REG (DImode,
4757 (basereg + cum->words
4758 + offset)),
4759 const0_rtx);
4760 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4761 }
4762 else
4763 return gen_rtx_REG (mode, basereg + cum->words + offset);
4764
4765 }
ac445222 4766
4767 /* If there is a prototype, then FP values go in a FR register when
1d5a21db 4768 named, and in a GR register when unnamed. */
ac445222 4769 else if (cum->prototype)
4770 {
d39bdc7b 4771 if (named)
ac445222 4772 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
d39bdc7b 4773 /* In big-endian mode, an anonymous SFmode value must be represented
4774 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4775 the value into the high half of the general register. */
4776 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4777 return gen_rtx_PARALLEL (mode,
4778 gen_rtvec (1,
4779 gen_rtx_EXPR_LIST (VOIDmode,
4780 gen_rtx_REG (DImode, basereg + cum->words + offset),
4781 const0_rtx)));
4782 else
4783 return gen_rtx_REG (mode, basereg + cum->words + offset);
ac445222 4784 }
4785 /* If there is no prototype, then FP values go in both FR and GR
4786 registers. */
4787 else
4788 {
d39bdc7b 4789 /* See comment above. */
4790 enum machine_mode inner_mode =
4791 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4792
ac445222 4793 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4794 gen_rtx_REG (mode, (FR_ARG_FIRST
4795 + cum->fp_regs)),
4796 const0_rtx);
4797 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
d39bdc7b 4798 gen_rtx_REG (inner_mode,
ac445222 4799 (basereg + cum->words
4800 + offset)),
4801 const0_rtx);
61788791 4802
ac445222 4803 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4804 }
4805}
4806
4bac51c9 4807/* Implement TARGET_FUNCION_ARG target hook. */
4808
4809static rtx
39cba157 4810ia64_function_arg (cumulative_args_t cum, enum machine_mode mode,
4bac51c9 4811 const_tree type, bool named)
4812{
4813 return ia64_function_arg_1 (cum, mode, type, named, false);
4814}
4815
4816/* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4817
4818static rtx
39cba157 4819ia64_function_incoming_arg (cumulative_args_t cum,
4bac51c9 4820 enum machine_mode mode,
4821 const_tree type, bool named)
4822{
4823 return ia64_function_arg_1 (cum, mode, type, named, true);
4824}
4825
f054eb3c 4826/* Return number of bytes, at the beginning of the argument, that must be
ac445222 4827 put in registers. 0 is the argument is entirely in registers or entirely
4828 in memory. */
4829
f054eb3c 4830static int
39cba157 4831ia64_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
f054eb3c 4832 tree type, bool named ATTRIBUTE_UNUSED)
ac445222 4833{
39cba157 4834 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4835
223b25f9 4836 int words = ia64_function_arg_words (type, mode);
4837 int offset = ia64_function_arg_offset (cum, type, words);
ac445222 4838
4839 /* If all argument slots are used, then it must go on the stack. */
4840 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4841 return 0;
4842
4843 /* It doesn't matter whether the argument goes in FR or GR regs. If
4844 it fits within the 8 argument slots, then it goes entirely in
4845 registers. If it extends past the last argument slot, then the rest
4846 goes on the stack. */
4847
4848 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4849 return 0;
4850
f054eb3c 4851 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
ac445222 4852}
4853
8b8d3752 4854/* Return ivms_arg_type based on machine_mode. */
4855
4856static enum ivms_arg_type
4857ia64_arg_type (enum machine_mode mode)
4858{
4859 switch (mode)
4860 {
4861 case SFmode:
4862 return FS;
4863 case DFmode:
4864 return FT;
4865 default:
4866 return I64;
4867 }
4868}
4869
ac445222 4870/* Update CUM to point after this argument. This is patterned after
4871 ia64_function_arg. */
4872
4bac51c9 4873static void
39cba157 4874ia64_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
4bac51c9 4875 const_tree type, bool named)
ac445222 4876{
39cba157 4877 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
223b25f9 4878 int words = ia64_function_arg_words (type, mode);
4879 int offset = ia64_function_arg_offset (cum, type, words);
ac445222 4880 enum machine_mode hfa_mode = VOIDmode;
4881
4882 /* If all arg slots are already full, then there is nothing to do. */
4883 if (cum->words >= MAX_ARGUMENT_SLOTS)
8b8d3752 4884 {
4885 cum->words += words + offset;
4886 return;
4887 }
ac445222 4888
8b8d3752 4889 cum->atypes[cum->words] = ia64_arg_type (mode);
ac445222 4890 cum->words += words + offset;
4891
6b6253f2 4892 /* On OpenVMS argument is either in Rn or Fn. */
4893 if (TARGET_ABI_OPEN_VMS)
4894 {
4895 cum->int_regs = cum->words;
4896 cum->fp_regs = cum->words;
4897 return;
4898 }
4899
ac445222 4900 /* Check for and handle homogeneous FP aggregates. */
4901 if (type)
4902 hfa_mode = hfa_element_mode (type, 0);
4903
4904 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4905 and unprototyped hfas are passed specially. */
4906 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4907 {
4908 int fp_regs = cum->fp_regs;
4909 /* This is the original value of cum->words + offset. */
4910 int int_regs = cum->words - words;
4911 int hfa_size = GET_MODE_SIZE (hfa_mode);
4912 int byte_size;
4913 int args_byte_size;
4914
4915 /* If prototyped, pass it in FR regs then GR regs.
4916 If not prototyped, pass it in both FR and GR regs.
4917
4918 If this is an SFmode aggregate, then it is possible to run out of
4919 FR regs while GR regs are still left. In that case, we pass the
4920 remaining part in the GR regs. */
4921
4922 /* Fill the FP regs. We do this always. We stop if we reach the end
4923 of the argument, the last FP register, or the last argument slot. */
4924
4925 byte_size = ((mode == BLKmode)
4926 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4927 args_byte_size = int_regs * UNITS_PER_WORD;
4928 offset = 0;
4929 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4930 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4931 {
ac445222 4932 offset += hfa_size;
4933 args_byte_size += hfa_size;
4934 fp_regs++;
4935 }
4936
4937 cum->fp_regs = fp_regs;
4938 }
4939
d0294e5d 4940 /* Integral and aggregates go in general registers. So do TFmode FP values.
4941 If we have run out of FR registers, then other FP values must also go in
4942 general registers. This can happen when we have a SFmode HFA. */
4943 else if (mode == TFmode || mode == TCmode
4944 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3a7c3680 4945 cum->int_regs = cum->words;
ac445222 4946
4947 /* If there is a prototype, then FP values go in a FR register when
1d5a21db 4948 named, and in a GR register when unnamed. */
ac445222 4949 else if (cum->prototype)
4950 {
4951 if (! named)
3a7c3680 4952 cum->int_regs = cum->words;
ac445222 4953 else
4954 /* ??? Complex types should not reach here. */
4955 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4956 }
4957 /* If there is no prototype, then FP values go in both FR and GR
4958 registers. */
4959 else
b40da9a7 4960 {
3a7c3680 4961 /* ??? Complex types should not reach here. */
4962 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4963 cum->int_regs = cum->words;
4964 }
ac445222 4965}
468c465f 4966
d0294e5d 4967/* Arguments with alignment larger than 8 bytes start at the next even
e1b8c0b4 4968 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
d0294e5d 4969 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4970
bd99ba64 4971static unsigned int
4972ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
d0294e5d 4973{
d0294e5d 4974 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4975 return PARM_BOUNDARY * 2;
4976
4977 if (type)
4978 {
4979 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4980 return PARM_BOUNDARY * 2;
4981 else
4982 return PARM_BOUNDARY;
4983 }
4984
4985 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4986 return PARM_BOUNDARY * 2;
4987 else
4988 return PARM_BOUNDARY;
4989}
4990
e13693ec 4991/* True if it is OK to do sibling call optimization for the specified
4992 call expression EXP. DECL will be the called function, or NULL if
4993 this is an indirect call. */
4994static bool
b40da9a7 4995ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
e13693ec 4996{
d9137ad4 4997 /* We can't perform a sibcall if the current function has the syscall_linkage
4998 attribute. */
4999 if (lookup_attribute ("syscall_linkage",
5000 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5001 return false;
5002
8c01cc0e 5003 /* We must always return with our current GP. This means we can
c49d20be 5004 only sibcall to functions defined in the current module unless
5005 TARGET_CONST_GP is set to true. */
5006 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
e13693ec 5007}
ac445222 5008\f
ac445222 5009
5010/* Implement va_arg. */
5011
e0eca1fa 5012static tree
75a70cf9 5013ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5014 gimple_seq *post_p)
fcdd3ab3 5015{
fcdd3ab3 5016 /* Variable sized types are passed by reference. */
2cd7bb84 5017 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
fcdd3ab3 5018 {
e0eca1fa 5019 tree ptrtype = build_pointer_type (type);
5020 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
f2462d6c 5021 return build_va_arg_indirect_ref (addr);
fcdd3ab3 5022 }
5023
5024 /* Aggregate arguments with alignment larger than 8 bytes start at
5025 the next even boundary. Integer and floating point arguments
5026 do so if they are larger than 8 bytes, whether or not they are
5027 also aligned larger than 8 bytes. */
5028 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5029 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5030 {
2cc66f2a 5031 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
ed03eadb 5032 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
2cc66f2a 5033 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
75a70cf9 5034 gimplify_assign (unshare_expr (valist), t, pre_p);
fcdd3ab3 5035 }
5036
e0eca1fa 5037 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
fcdd3ab3 5038}
ac445222 5039\f
5040/* Return 1 if function return value returned in memory. Return 0 if it is
5041 in a register. */
5042
ac9a2599 5043static bool
fb80456a 5044ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
ac445222 5045{
5046 enum machine_mode mode;
5047 enum machine_mode hfa_mode;
55819289 5048 HOST_WIDE_INT byte_size;
ac445222 5049
5050 mode = TYPE_MODE (valtype);
55819289 5051 byte_size = GET_MODE_SIZE (mode);
5052 if (mode == BLKmode)
5053 {
5054 byte_size = int_size_in_bytes (valtype);
5055 if (byte_size < 0)
ac9a2599 5056 return true;
55819289 5057 }
ac445222 5058
5059 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5060
5061 hfa_mode = hfa_element_mode (valtype, 0);
5062 if (hfa_mode != VOIDmode)
5063 {
5064 int hfa_size = GET_MODE_SIZE (hfa_mode);
5065
ac445222 5066 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
ac9a2599 5067 return true;
ac445222 5068 else
ac9a2599 5069 return false;
ac445222 5070 }
ac445222 5071 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
ac9a2599 5072 return true;
ac445222 5073 else
ac9a2599 5074 return false;
ac445222 5075}
5076
5077/* Return rtx for register that holds the function return value. */
5078
15f3e541 5079static rtx
5080ia64_function_value (const_tree valtype,
5081 const_tree fn_decl_or_type,
5082 bool outgoing ATTRIBUTE_UNUSED)
ac445222 5083{
5084 enum machine_mode mode;
5085 enum machine_mode hfa_mode;
8b8d3752 5086 int unsignedp;
15f3e541 5087 const_tree func = fn_decl_or_type;
ac445222 5088
15f3e541 5089 if (fn_decl_or_type
5090 && !DECL_P (fn_decl_or_type))
5091 func = NULL;
5092
ac445222 5093 mode = TYPE_MODE (valtype);
5094 hfa_mode = hfa_element_mode (valtype, 0);
5095
5096 if (hfa_mode != VOIDmode)
5097 {
5098 rtx loc[8];
5099 int i;
5100 int hfa_size;
5101 int byte_size;
5102 int offset;
5103
5104 hfa_size = GET_MODE_SIZE (hfa_mode);
5105 byte_size = ((mode == BLKmode)
5106 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5107 offset = 0;
5108 for (i = 0; offset < byte_size; i++)
5109 {
5110 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5111 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5112 GEN_INT (offset));
ac445222 5113 offset += hfa_size;
5114 }
908e9a95 5115 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
ac445222 5116 }
223b25f9 5117 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
ac445222 5118 return gen_rtx_REG (mode, FR_ARG_FIRST);
5119 else
1ffddaaa 5120 {
a80a7415 5121 bool need_parallel = false;
5122
5123 /* In big-endian mode, we need to manage the layout of aggregates
5124 in the registers so that we get the bits properly aligned in
5125 the highpart of the registers. */
1ffddaaa 5126 if (BYTES_BIG_ENDIAN
5127 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
a80a7415 5128 need_parallel = true;
5129
5130 /* Something like struct S { long double x; char a[0] } is not an
5131 HFA structure, and therefore doesn't go in fp registers. But
5132 the middle-end will give it XFmode anyway, and XFmode values
5133 don't normally fit in integer registers. So we need to smuggle
5134 the value inside a parallel. */
7a979707 5135 else if (mode == XFmode || mode == XCmode || mode == RFmode)
a80a7415 5136 need_parallel = true;
5137
5138 if (need_parallel)
1ffddaaa 5139 {
5140 rtx loc[8];
5141 int offset;
5142 int bytesize;
5143 int i;
5144
5145 offset = 0;
5146 bytesize = int_size_in_bytes (valtype);
489fc1bc 5147 /* An empty PARALLEL is invalid here, but the return value
5148 doesn't matter for empty structs. */
5149 if (bytesize == 0)
5150 return gen_rtx_REG (mode, GR_RET_FIRST);
1ffddaaa 5151 for (i = 0; offset < bytesize; i++)
5152 {
5153 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5154 gen_rtx_REG (DImode,
5155 GR_RET_FIRST + i),
5156 GEN_INT (offset));
5157 offset += UNITS_PER_WORD;
5158 }
5159 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5160 }
a80a7415 5161
be94d031 5162 mode = promote_function_mode (valtype, mode, &unsignedp,
5163 func ? TREE_TYPE (func) : NULL_TREE,
5164 true);
8b8d3752 5165
a80a7415 5166 return gen_rtx_REG (mode, GR_RET_FIRST);
1ffddaaa 5167 }
ac445222 5168}
5169
15f3e541 5170/* Worker function for TARGET_LIBCALL_VALUE. */
5171
5172static rtx
5173ia64_libcall_value (enum machine_mode mode,
5174 const_rtx fun ATTRIBUTE_UNUSED)
5175{
5176 return gen_rtx_REG (mode,
5177 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5178 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5179 && (mode) != TFmode)
5180 ? FR_RET_FIRST : GR_RET_FIRST));
5181}
5182
5183/* Worker function for FUNCTION_VALUE_REGNO_P. */
5184
5185static bool
5186ia64_function_value_regno_p (const unsigned int regno)
5187{
5188 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5189 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5190}
5191
40af64cc 5192/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
03c118d5 5193 We need to emit DTP-relative relocations. */
5194
40af64cc 5195static void
b40da9a7 5196ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
03c118d5 5197{
492c9aa3 5198 gcc_assert (size == 4 || size == 8);
5199 if (size == 4)
5200 fputs ("\tdata4.ua\t@dtprel(", file);
5201 else
5202 fputs ("\tdata8.ua\t@dtprel(", file);
03c118d5 5203 output_addr_const (file, x);
5204 fputs (")", file);
5205}
5206
ac445222 5207/* Print a memory address as an operand to reference that memory location. */
5208
5209/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5210 also call this from ia64_print_operand for memory addresses. */
5211
b486e21a 5212static void
b40da9a7 5213ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5214 rtx address ATTRIBUTE_UNUSED)
ac445222 5215{
5216}
5217
d6074782 5218/* Print an operand to an assembler instruction.
ac445222 5219 C Swap and print a comparison operator.
5220 D Print an FP comparison operator.
5221 E Print 32 - constant, for SImode shifts as extract.
1ef83f39 5222 e Print 64 - constant, for DImode rotates.
ac445222 5223 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5224 a floating point register emitted normally.
e41d35a8 5225 G A floating point constant.
ac445222 5226 I Invert a predicate register by adding 1.
945c34bb 5227 J Select the proper predicate register for a condition.
341cffb9 5228 j Select the inverse predicate register for a condition.
ac445222 5229 O Append .acq for volatile load.
5230 P Postincrement of a MEM.
5231 Q Append .rel for volatile store.
78d690bb 5232 R Print .s .d or nothing for a single, double or no truncation.
ac445222 5233 S Shift amount for shladd instruction.
5234 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5235 for Intel assembler.
5236 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5237 for Intel assembler.
4d053aca 5238 X A pair of floating point registers.
ac445222 5239 r Print register name, or constant 0 as r0. HP compatibility for
a5c5f9d3 5240 Linux kernel.
5241 v Print vector constant value as an 8-byte integer value. */
5242
b486e21a 5243static void
b40da9a7 5244ia64_print_operand (FILE * file, rtx x, int code)
ac445222 5245{
0aa63c5e 5246 const char *str;
5247
ac445222 5248 switch (code)
5249 {
ac445222 5250 case 0:
5251 /* Handled below. */
5252 break;
61788791 5253
ac445222 5254 case 'C':
5255 {
5256 enum rtx_code c = swap_condition (GET_CODE (x));
5257 fputs (GET_RTX_NAME (c), file);
5258 return;
5259 }
5260
5261 case 'D':
0aa63c5e 5262 switch (GET_CODE (x))
5263 {
5264 case NE:
5265 str = "neq";
5266 break;
5267 case UNORDERED:
5268 str = "unord";
5269 break;
5270 case ORDERED:
5271 str = "ord";
5272 break;
01445752 5273 case UNLT:
5274 str = "nge";
5275 break;
5276 case UNLE:
5277 str = "ngt";
5278 break;
5279 case UNGT:
5280 str = "nle";
5281 break;
5282 case UNGE:
5283 str = "nlt";
5284 break;
0aa63c5e 5285 default:
5286 str = GET_RTX_NAME (GET_CODE (x));
5287 break;
5288 }
5289 fputs (str, file);
ac445222 5290 return;
5291
5292 case 'E':
5293 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5294 return;
5295
1ef83f39 5296 case 'e':
5297 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5298 return;
5299
ac445222 5300 case 'F':
5301 if (x == CONST0_RTX (GET_MODE (x)))
0aa63c5e 5302 str = reg_names [FR_REG (0)];
ac445222 5303 else if (x == CONST1_RTX (GET_MODE (x)))
0aa63c5e 5304 str = reg_names [FR_REG (1)];
ac445222 5305 else
c5c17bca 5306 {
5307 gcc_assert (GET_CODE (x) == REG);
5308 str = reg_names [REGNO (x)];
5309 }
0aa63c5e 5310 fputs (str, file);
ac445222 5311 return;
5312
e41d35a8 5313 case 'G':
5314 {
5315 long val[4];
5316 REAL_VALUE_TYPE rv;
5317 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5318 real_to_target (val, &rv, GET_MODE (x));
5319 if (GET_MODE (x) == SFmode)
5320 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5321 else if (GET_MODE (x) == DFmode)
5322 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5323 & 0xffffffff,
5324 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5325 & 0xffffffff);
5326 else
5327 output_operand_lossage ("invalid %%G mode");
5328 }
5329 return;
5330
ac445222 5331 case 'I':
5332 fputs (reg_names [REGNO (x) + 1], file);
5333 return;
5334
945c34bb 5335 case 'J':
341cffb9 5336 case 'j':
5337 {
5338 unsigned int regno = REGNO (XEXP (x, 0));
5339 if (GET_CODE (x) == EQ)
5340 regno += 1;
5341 if (code == 'j')
5342 regno ^= 1;
5343 fputs (reg_names [regno], file);
5344 }
945c34bb 5345 return;
5346
ac445222 5347 case 'O':
5348 if (MEM_VOLATILE_P (x))
5349 fputs(".acq", file);
5350 return;
5351
5352 case 'P':
5353 {
40988080 5354 HOST_WIDE_INT value;
ac445222 5355
40988080 5356 switch (GET_CODE (XEXP (x, 0)))
5357 {
5358 default:
5359 return;
5360
5361 case POST_MODIFY:
5362 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5363 if (GET_CODE (x) == CONST_INT)
0ea79bfe 5364 value = INTVAL (x);
c5c17bca 5365 else
40988080 5366 {
c5c17bca 5367 gcc_assert (GET_CODE (x) == REG);
0ea79bfe 5368 fprintf (file, ", %s", reg_names[REGNO (x)]);
40988080 5369 return;
5370 }
40988080 5371 break;
ac445222 5372
40988080 5373 case POST_INC:
5374 value = GET_MODE_SIZE (GET_MODE (x));
40988080 5375 break;
ac445222 5376
40988080 5377 case POST_DEC:
0ea79bfe 5378 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
40988080 5379 break;
5380 }
61788791 5381
4840a03a 5382 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
ac445222 5383 return;
5384 }
5385
5386 case 'Q':
5387 if (MEM_VOLATILE_P (x))
5388 fputs(".rel", file);
5389 return;
5390
78d690bb 5391 case 'R':
5392 if (x == CONST0_RTX (GET_MODE (x)))
5393 fputs(".s", file);
5394 else if (x == CONST1_RTX (GET_MODE (x)))
5395 fputs(".d", file);
5396 else if (x == CONST2_RTX (GET_MODE (x)))
5397 ;
5398 else
5399 output_operand_lossage ("invalid %%R value");
5400 return;
5401
ac445222 5402 case 'S':
61788791 5403 fprintf (file, "%d", exact_log2 (INTVAL (x)));
ac445222 5404 return;
5405
5406 case 'T':
5407 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5408 {
61788791 5409 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
ac445222 5410 return;
5411 }
5412 break;
5413
5414 case 'U':
5415 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5416 {
9b06caff 5417 const char *prefix = "0x";
ac445222 5418 if (INTVAL (x) & 0x80000000)
5419 {
5420 fprintf (file, "0xffffffff");
5421 prefix = "";
5422 }
61788791 5423 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
ac445222 5424 return;
5425 }
5426 break;
61788791 5427
4d053aca 5428 case 'X':
5429 {
5430 unsigned int regno = REGNO (x);
5431 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5432 }
5433 return;
5434
ac445222 5435 case 'r':
836ac237 5436 /* If this operand is the constant zero, write it as register zero.
5437 Any register, zero, or CONST_INT value is OK here. */
ac445222 5438 if (GET_CODE (x) == REG)
5439 fputs (reg_names[REGNO (x)], file);
5440 else if (x == CONST0_RTX (GET_MODE (x)))
5441 fputs ("r0", file);
836ac237 5442 else if (GET_CODE (x) == CONST_INT)
5443 output_addr_const (file, x);
ac445222 5444 else
5445 output_operand_lossage ("invalid %%r value");
5446 return;
5447
a5c5f9d3 5448 case 'v':
5449 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5450 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5451 break;
5452
464c9e08 5453 case '+':
5454 {
5455 const char *which;
b40da9a7 5456
464c9e08 5457 /* For conditional branches, returns or calls, substitute
5458 sptk, dptk, dpnt, or spnt for %s. */
5459 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5460 if (x)
5461 {
5462 int pred_val = INTVAL (XEXP (x, 0));
5463
5464 /* Guess top and bottom 10% statically predicted. */
b41438e5 5465 if (pred_val < REG_BR_PROB_BASE / 50
5466 && br_prob_note_reliable_p (x))
464c9e08 5467 which = ".spnt";
5468 else if (pred_val < REG_BR_PROB_BASE / 2)
5469 which = ".dpnt";
b41438e5 5470 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5471 || !br_prob_note_reliable_p (x))
464c9e08 5472 which = ".dptk";
5473 else
5474 which = ".sptk";
5475 }
5476 else if (GET_CODE (current_output_insn) == CALL_INSN)
5477 which = ".sptk";
5478 else
5479 which = ".dptk";
5480
5481 fputs (which, file);
5482 return;
5483 }
5484
9d23a2c8 5485 case ',':
5486 x = current_insn_predicate;
5487 if (x)
5488 {
5489 unsigned int regno = REGNO (XEXP (x, 0));
5490 if (GET_CODE (x) == EQ)
5491 regno += 1;
9d23a2c8 5492 fprintf (file, "(%s) ", reg_names [regno]);
5493 }
5494 return;
5495
ac445222 5496 default:
5497 output_operand_lossage ("ia64_print_operand: unknown code");
5498 return;
5499 }
5500
5501 switch (GET_CODE (x))
5502 {
5503 /* This happens for the spill/restore instructions. */
5504 case POST_INC:
40988080 5505 case POST_DEC:
5506 case POST_MODIFY:
ac445222 5507 x = XEXP (x, 0);
87d3145e 5508 /* ... fall through ... */
ac445222 5509
5510 case REG:
5511 fputs (reg_names [REGNO (x)], file);
5512 break;
5513
5514 case MEM:
5515 {
5516 rtx addr = XEXP (x, 0);
6720e96c 5517 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
ac445222 5518 addr = XEXP (addr, 0);
5519 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5520 break;
5521 }
61788791 5522
ac445222 5523 default:
5524 output_addr_const (file, x);
5525 break;
5526 }
5527
5528 return;
5529}
b486e21a 5530
5531/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5532
5533static bool
5534ia64_print_operand_punct_valid_p (unsigned char code)
5535{
5536 return (code == '+' || code == ',');
5537}
ac445222 5538\f
fab7adbf 5539/* Compute a (partial) cost for rtx X. Return true if the complete
5540 cost has been computed, and false if subexpressions should be
5541 scanned. In either case, *TOTAL contains the cost result. */
5542/* ??? This is incomplete. */
5543
5544static bool
20d892d1 5545ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5546 int *total, bool speed ATTRIBUTE_UNUSED)
fab7adbf 5547{
5548 switch (code)
5549 {
5550 case CONST_INT:
5551 switch (outer_code)
5552 {
5553 case SET:
269f7060 5554 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
fab7adbf 5555 return true;
5556 case PLUS:
269f7060 5557 if (satisfies_constraint_I (x))
fab7adbf 5558 *total = 0;
269f7060 5559 else if (satisfies_constraint_J (x))
fab7adbf 5560 *total = 1;
5561 else
5562 *total = COSTS_N_INSNS (1);
5563 return true;
5564 default:
269f7060 5565 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
fab7adbf 5566 *total = 0;
5567 else
5568 *total = COSTS_N_INSNS (1);
5569 return true;
5570 }
5571
5572 case CONST_DOUBLE:
5573 *total = COSTS_N_INSNS (1);
5574 return true;
5575
5576 case CONST:
5577 case SYMBOL_REF:
5578 case LABEL_REF:
5579 *total = COSTS_N_INSNS (3);
5580 return true;
5581
dd7870fa 5582 case FMA:
5583 *total = COSTS_N_INSNS (4);
5584 return true;
5585
fab7adbf 5586 case MULT:
5587 /* For multiplies wider than HImode, we have to go to the FPU,
5588 which normally involves copies. Plus there's the latency
5589 of the multiply itself, and the latency of the instructions to
5590 transfer integer regs to FP regs. */
dd7870fa 5591 if (FLOAT_MODE_P (GET_MODE (x)))
5592 *total = COSTS_N_INSNS (4);
5593 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
fab7adbf 5594 *total = COSTS_N_INSNS (10);
5595 else
5596 *total = COSTS_N_INSNS (2);
5597 return true;
5598
5599 case PLUS:
5600 case MINUS:
dd7870fa 5601 if (FLOAT_MODE_P (GET_MODE (x)))
5602 {
5603 *total = COSTS_N_INSNS (4);
5604 return true;
5605 }
5606 /* FALLTHRU */
5607
fab7adbf 5608 case ASHIFT:
5609 case ASHIFTRT:
5610 case LSHIFTRT:
5611 *total = COSTS_N_INSNS (1);
5612 return true;
5613
5614 case DIV:
5615 case UDIV:
5616 case MOD:
5617 case UMOD:
5618 /* We make divide expensive, so that divide-by-constant will be
5619 optimized to a multiply. */
5620 *total = COSTS_N_INSNS (60);
5621 return true;
5622
5623 default:
5624 return false;
5625 }
5626}
5627
1d5a21db 5628/* Calculate the cost of moving data from a register in class FROM to
59c97804 5629 one in class TO, using MODE. */
9641f63c 5630
e6078fbb 5631static int
ade444a4 5632ia64_register_move_cost (enum machine_mode mode, reg_class_t from,
5633 reg_class_t to)
964229b7 5634{
59c97804 5635 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5636 if (to == ADDL_REGS)
5637 to = GR_REGS;
5638 if (from == ADDL_REGS)
5639 from = GR_REGS;
5640
5641 /* All costs are symmetric, so reduce cases by putting the
5642 lower number class as the destination. */
5643 if (from < to)
5644 {
ade444a4 5645 reg_class_t tmp = to;
59c97804 5646 to = from, from = tmp;
5647 }
5648
b8bc42e9 5649 /* Moving from FR<->GR in XFmode must be more expensive than 2,
59c97804 5650 so that we get secondary memory reloads. Between FR_REGS,
f501ee23 5651 we have to make this at least as expensive as memory_move_cost
59c97804 5652 to avoid spectacularly poor register class preferencing. */
7a979707 5653 if (mode == XFmode || mode == RFmode)
59c97804 5654 {
5655 if (to != GR_REGS || from != GR_REGS)
f501ee23 5656 return memory_move_cost (mode, to, false);
59c97804 5657 else
5658 return 3;
5659 }
5660
5661 switch (to)
5662 {
5663 case PR_REGS:
5664 /* Moving between PR registers takes two insns. */
5665 if (from == PR_REGS)
5666 return 3;
5667 /* Moving between PR and anything but GR is impossible. */
5668 if (from != GR_REGS)
f501ee23 5669 return memory_move_cost (mode, to, false);
59c97804 5670 break;
5671
5672 case BR_REGS:
5673 /* Moving between BR and anything but GR is impossible. */
5674 if (from != GR_REGS && from != GR_AND_BR_REGS)
f501ee23 5675 return memory_move_cost (mode, to, false);
59c97804 5676 break;
5677
5678 case AR_I_REGS:
5679 case AR_M_REGS:
5680 /* Moving between AR and anything but GR is impossible. */
5681 if (from != GR_REGS)
f501ee23 5682 return memory_move_cost (mode, to, false);
59c97804 5683 break;
5684
5685 case GR_REGS:
5686 case FR_REGS:
4d053aca 5687 case FP_REGS:
59c97804 5688 case GR_AND_FR_REGS:
5689 case GR_AND_BR_REGS:
5690 case ALL_REGS:
5691 break;
5692
5693 default:
c5c17bca 5694 gcc_unreachable ();
59c97804 5695 }
d5f10acf 5696
9641f63c 5697 return 2;
5698}
ac445222 5699
f501ee23 5700/* Calculate the cost of moving data of MODE from a register to or from
5701 memory. */
5702
5703static int
5704ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5705 reg_class_t rclass,
5706 bool in ATTRIBUTE_UNUSED)
5707{
5708 if (rclass == GENERAL_REGS
5709 || rclass == FR_REGS
5710 || rclass == FP_REGS
5711 || rclass == GR_AND_FR_REGS)
5712 return 4;
5713 else
5714 return 10;
5715}
5716
3f60eeb4 5717/* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5718 on RCLASS to use when copying X into that class. */
a5c5f9d3 5719
3f60eeb4 5720static reg_class_t
5721ia64_preferred_reload_class (rtx x, reg_class_t rclass)
a5c5f9d3 5722{
8deb3959 5723 switch (rclass)
a5c5f9d3 5724 {
5725 case FR_REGS:
4d053aca 5726 case FP_REGS:
a5c5f9d3 5727 /* Don't allow volatile mem reloads into floating point registers.
5728 This is defined to force reload to choose the r/m case instead
5729 of the f/f case when reloading (set (reg fX) (mem/v)). */
5730 if (MEM_P (x) && MEM_VOLATILE_P (x))
5731 return NO_REGS;
5732
5733 /* Force all unrecognized constants into the constant pool. */
5734 if (CONSTANT_P (x))
5735 return NO_REGS;
5736 break;
5737
5738 case AR_M_REGS:
5739 case AR_I_REGS:
5740 if (!OBJECT_P (x))
5741 return NO_REGS;
5742 break;
5743
5744 default:
5745 break;
5746 }
5747
8deb3959 5748 return rclass;
a5c5f9d3 5749}
5750
ac445222 5751/* This function returns the register class required for a secondary
8deb3959 5752 register when copying between one of the registers in RCLASS, and X,
ac445222 5753 using MODE. A return value of NO_REGS means that no secondary register
5754 is required. */
5755
5756enum reg_class
8deb3959 5757ia64_secondary_reload_class (enum reg_class rclass,
b40da9a7 5758 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
ac445222 5759{
5760 int regno = -1;
5761
5762 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5763 regno = true_regnum (x);
5764
8deb3959 5765 switch (rclass)
cac50a9f 5766 {
5767 case BR_REGS:
59c97804 5768 case AR_M_REGS:
5769 case AR_I_REGS:
5770 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5771 interaction. We end up with two pseudos with overlapping lifetimes
5772 both of which are equiv to the same constant, and both which need
5773 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5774 changes depending on the path length, which means the qty_first_reg
5775 check in make_regs_eqv can give different answers at different times.
5776 At some point I'll probably need a reload_indi pattern to handle
5777 this.
5778
5779 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5780 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5781 non-general registers for good measure. */
5782 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
cac50a9f 5783 return GR_REGS;
5784
5785 /* This is needed if a pseudo used as a call_operand gets spilled to a
5786 stack slot. */
5787 if (GET_CODE (x) == MEM)
5788 return GR_REGS;
5789 break;
5790
5791 case FR_REGS:
4d053aca 5792 case FP_REGS:
f52eff0c 5793 /* Need to go through general registers to get to other class regs. */
59c97804 5794 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5795 return GR_REGS;
b40da9a7 5796
cac50a9f 5797 /* This can happen when a paradoxical subreg is an operand to the
5798 muldi3 pattern. */
5799 /* ??? This shouldn't be necessary after instruction scheduling is
5800 enabled, because paradoxical subregs are not accepted by
5801 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5802 stop the paradoxical subreg stupidity in the *_operand functions
5803 in recog.c. */
5804 if (GET_CODE (x) == MEM
5805 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5806 || GET_MODE (x) == QImode))
5807 return GR_REGS;
5808
5809 /* This can happen because of the ior/and/etc patterns that accept FP
5810 registers as operands. If the third operand is a constant, then it
5811 needs to be reloaded into a FP register. */
5812 if (GET_CODE (x) == CONST_INT)
5813 return GR_REGS;
5814
5815 /* This can happen because of register elimination in a muldi3 insn.
5816 E.g. `26107 * (unsigned long)&u'. */
5817 if (GET_CODE (x) == PLUS)
5818 return GR_REGS;
5819 break;
5820
5821 case PR_REGS:
33c8f6d1 5822 /* ??? This happens if we cse/gcse a BImode value across a call,
cac50a9f 5823 and the function has a nonlocal goto. This is because global
5824 does not allocate call crossing pseudos to hard registers when
18d50ae6 5825 crtl->has_nonlocal_goto is true. This is relatively
cac50a9f 5826 common for C++ programs that use exceptions. To reproduce,
5827 return NO_REGS and compile libstdc++. */
5828 if (GET_CODE (x) == MEM)
5829 return GR_REGS;
33c8f6d1 5830
5831 /* This can happen when we take a BImode subreg of a DImode value,
5832 and that DImode value winds up in some non-GR register. */
5833 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5834 return GR_REGS;
cac50a9f 5835 break;
5836
5837 default:
5838 break;
5839 }
ac445222 5840
5841 return NO_REGS;
5842}
5843
77ad8e5a 5844\f
5845/* Implement targetm.unspec_may_trap_p hook. */
5846static int
5847ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5848{
5849 if (GET_CODE (x) == UNSPEC)
5850 {
5851 switch (XINT (x, 1))
5852 {
5853 case UNSPEC_LDA:
5854 case UNSPEC_LDS:
5855 case UNSPEC_LDSA:
5856 case UNSPEC_LDCCLR:
5857 case UNSPEC_CHKACLR:
5858 case UNSPEC_CHKS:
5859 /* These unspecs are just wrappers. */
5860 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5861 }
5862 }
5863
5864 return default_unspec_may_trap_p (x, flags);
5865}
5866
ac445222 5867\f
5868/* Parse the -mfixed-range= option string. */
5869
5870static void
b40da9a7 5871fix_range (const char *const_str)
ac445222 5872{
5873 int i, first, last;
9b06caff 5874 char *str, *dash, *comma;
ac445222 5875
5876 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5877 REG2 are either register names or register numbers. The effect
5878 of this option is to mark the registers in the range from REG1 to
5879 REG2 as ``fixed'' so they won't be used by the compiler. This is
5880 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5881
9b06caff 5882 i = strlen (const_str);
5883 str = (char *) alloca (i + 1);
5884 memcpy (str, const_str, i + 1);
5885
ac445222 5886 while (1)
5887 {
5888 dash = strchr (str, '-');
5889 if (!dash)
5890 {
c3ceba8e 5891 warning (0, "value of -mfixed-range must have form REG1-REG2");
ac445222 5892 return;
5893 }
5894 *dash = '\0';
5895
5896 comma = strchr (dash + 1, ',');
5897 if (comma)
5898 *comma = '\0';
5899
5900 first = decode_reg_name (str);
5901 if (first < 0)
5902 {
c3ceba8e 5903 warning (0, "unknown register name: %s", str);
ac445222 5904 return;
5905 }
5906
5907 last = decode_reg_name (dash + 1);
5908 if (last < 0)
5909 {
c3ceba8e 5910 warning (0, "unknown register name: %s", dash + 1);
ac445222 5911 return;
5912 }
5913
5914 *dash = '-';
5915
5916 if (first > last)
5917 {
c3ceba8e 5918 warning (0, "%s-%s is an empty range", str, dash + 1);
ac445222 5919 return;
5920 }
5921
5922 for (i = first; i <= last; ++i)
5923 fixed_regs[i] = call_used_regs[i] = 1;
5924
5925 if (!comma)
5926 break;
5927
5928 *comma = ',';
5929 str = comma + 1;
5930 }
5931}
5932
e81d24dd 5933/* Implement TARGET_OPTION_OVERRIDE. */
ac445222 5934
e81d24dd 5935static void
5936ia64_option_override (void)
ac445222 5937{
c0bfee3d 5938 unsigned int i;
5939 cl_deferred_option *opt;
f1f41a6c 5940 vec<cl_deferred_option> *v
5941 = (vec<cl_deferred_option> *) ia64_deferred_options;
c0bfee3d 5942
f1f41a6c 5943 if (v)
5944 FOR_EACH_VEC_ELT (*v, i, opt)
5945 {
5946 switch (opt->opt_index)
5947 {
5948 case OPT_mfixed_range_:
5949 fix_range (opt->arg);
5950 break;
c0bfee3d 5951
f1f41a6c 5952 default:
5953 gcc_unreachable ();
5954 }
5955 }
c0bfee3d 5956
cc40da95 5957 if (TARGET_AUTO_PIC)
5958 target_flags |= MASK_CONST_GP;
5959
f58620b7 5960 /* Numerous experiment shows that IRA based loop pressure
5961 calculation works better for RTL loop invariant motion on targets
5962 with enough (>= 32) registers. It is an expensive optimization.
5963 So it is on only for peak performance. */
5964 if (optimize >= 3)
5965 flag_ira_loop_pressure = 1;
5966
5967
13a54dd9 5968 ia64_section_threshold = (global_options_set.x_g_switch_value
5969 ? g_switch_value
5970 : IA64_DEFAULT_GVALUE);
4bec06b3 5971
5972 init_machine_status = ia64_init_machine_status;
5973
5974 if (align_functions <= 0)
5975 align_functions = 64;
5976 if (align_loops <= 0)
5977 align_loops = 32;
5978 if (TARGET_ABI_OPEN_VMS)
5979 flag_no_common = 1;
5980
5981 ia64_override_options_after_change();
5982}
5983
5984/* Implement targetm.override_options_after_change. */
5985
5986static void
5987ia64_override_options_after_change (void)
5988{
cf7898a6 5989 if (optimize >= 3
f83b64ca 5990 && !global_options_set.x_flag_selective_scheduling
5991 && !global_options_set.x_flag_selective_scheduling2)
cf7898a6 5992 {
5993 flag_selective_scheduling2 = 1;
5994 flag_sel_sched_pipelining = 1;
5995 }
5996 if (mflag_sched_control_spec == 2)
5997 {
5998 /* Control speculation is on by default for the selective scheduler,
5999 but not for the Haifa scheduler. */
6000 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6001 }
6002 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6003 {
6004 /* FIXME: remove this when we'd implement breaking autoinsns as
6005 a transformation. */
6006 flag_auto_inc_dec = 0;
6007 }
ac445222 6008}
b1c7573c 6009
3072d30e 6010/* Initialize the record of emitted frame related registers. */
6011
6012void ia64_init_expanders (void)
6013{
6014 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6015}
6016
b1c7573c 6017static struct machine_function *
6018ia64_init_machine_status (void)
6019{
ba72912a 6020 return ggc_alloc_cleared_machine_function ();
b1c7573c 6021}
ac445222 6022\f
b40da9a7 6023static enum attr_itanium_class ia64_safe_itanium_class (rtx);
6024static enum attr_type ia64_safe_type (rtx);
a0bb0d20 6025
a0bb0d20 6026static enum attr_itanium_class
b40da9a7 6027ia64_safe_itanium_class (rtx insn)
a0bb0d20 6028{
6029 if (recog_memoized (insn) >= 0)
6030 return get_attr_itanium_class (insn);
9845d120 6031 else if (DEBUG_INSN_P (insn))
6032 return ITANIUM_CLASS_IGNORE;
a0bb0d20 6033 else
6034 return ITANIUM_CLASS_UNKNOWN;
6035}
6036
6037static enum attr_type
b40da9a7 6038ia64_safe_type (rtx insn)
a0bb0d20 6039{
6040 if (recog_memoized (insn) >= 0)
6041 return get_attr_type (insn);
6042 else
6043 return TYPE_UNKNOWN;
6044}
6045\f
ac445222 6046/* The following collection of routines emit instruction group stop bits as
6047 necessary to avoid dependencies. */
6048
6049/* Need to track some additional registers as far as serialization is
6050 concerned so we can properly handle br.call and br.ret. We could
6051 make these registers visible to gcc, but since these registers are
6052 never explicitly used in gcc generated code, it seems wasteful to
6053 do so (plus it would make the call and return patterns needlessly
6054 complex). */
ac445222 6055#define REG_RP (BR_REG (0))
ac445222 6056#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
ac445222 6057/* This is used for volatile asms which may require a stop bit immediately
6058 before and after them. */
9641f63c 6059#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
de4f4740 6060#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6061#define NUM_REGS (AR_UNAT_BIT_0 + 64)
ac445222 6062
33c8f6d1 6063/* For each register, we keep track of how it has been written in the
6064 current instruction group.
6065
6066 If a register is written unconditionally (no qualifying predicate),
6067 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6068
6069 If a register is written if its qualifying predicate P is true, we
6070 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6071 may be written again by the complement of P (P^1) and when this happens,
6072 WRITE_COUNT gets set to 2.
6073
6074 The result of this is that whenever an insn attempts to write a register
ebb11c7b 6075 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
33c8f6d1 6076
6077 If a predicate register is written by a floating-point insn, we set
6078 WRITTEN_BY_FP to true.
6079
6080 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6081 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6082
c2f54bdd 6083#if GCC_VERSION >= 4000
6084#define RWS_FIELD_TYPE __extension__ unsigned short
6085#else
6086#define RWS_FIELD_TYPE unsigned int
6087#endif
ac445222 6088struct reg_write_state
6089{
c2f54bdd 6090 RWS_FIELD_TYPE write_count : 2;
6091 RWS_FIELD_TYPE first_pred : 10;
6092 RWS_FIELD_TYPE written_by_fp : 1;
6093 RWS_FIELD_TYPE written_by_and : 1;
6094 RWS_FIELD_TYPE written_by_or : 1;
ac445222 6095};
6096
6097/* Cumulative info for the current instruction group. */
6098struct reg_write_state rws_sum[NUM_REGS];
c2f54bdd 6099#ifdef ENABLE_CHECKING
6100/* Bitmap whether a register has been written in the current insn. */
6101HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6102 / HOST_BITS_PER_WIDEST_FAST_INT];
6103
6104static inline void
6105rws_insn_set (int regno)
6106{
6107 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6108 SET_HARD_REG_BIT (rws_insn, regno);
6109}
6110
6111static inline int
6112rws_insn_test (int regno)
6113{
6114 return TEST_HARD_REG_BIT (rws_insn, regno);
6115}
6116#else
6117/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6118unsigned char rws_insn[2];
6119
6120static inline void
6121rws_insn_set (int regno)
6122{
6123 if (regno == REG_AR_CFM)
6124 rws_insn[0] = 1;
6125 else if (regno == REG_VOLATILE)
6126 rws_insn[1] = 1;
6127}
6128
6129static inline int
6130rws_insn_test (int regno)
6131{
6132 if (regno == REG_AR_CFM)
6133 return rws_insn[0];
6134 if (regno == REG_VOLATILE)
6135 return rws_insn[1];
6136 return 0;
6137}
6138#endif
ac445222 6139
83b1a843 6140/* Indicates whether this is the first instruction after a stop bit,
c5c17bca 6141 in which case we don't need another stop bit. Without this,
6142 ia64_variable_issue will die when scheduling an alloc. */
83b1a843 6143static int first_instruction;
6144
ac445222 6145/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6146 RTL for one instruction. */
6147struct reg_flags
6148{
6149 unsigned int is_write : 1; /* Is register being written? */
6150 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6151 unsigned int is_branch : 1; /* Is register used as part of a branch? */
33c8f6d1 6152 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6153 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
46ebdd6b 6154 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
ac445222 6155};
6156
c2f54bdd 6157static void rws_update (int, struct reg_flags, int);
b40da9a7 6158static int rws_access_regno (int, struct reg_flags, int);
6159static int rws_access_reg (rtx, struct reg_flags, int);
fb16e37a 6160static void update_set_flags (rtx, struct reg_flags *);
6161static int set_src_needs_barrier (rtx, struct reg_flags, int);
b40da9a7 6162static int rtx_needs_barrier (rtx, struct reg_flags, int);
6163static void init_insn_group_barriers (void);
fb16e37a 6164static int group_barrier_needed (rtx);
6165static int safe_group_barrier_needed (rtx);
c2f54bdd 6166static int in_safe_group_barrier;
9b06caff 6167
ac445222 6168/* Update *RWS for REGNO, which is being written by the current instruction,
6169 with predicate PRED, and associated register flags in FLAGS. */
6170
6171static void
c2f54bdd 6172rws_update (int regno, struct reg_flags flags, int pred)
ac445222 6173{
f756da21 6174 if (pred)
c2f54bdd 6175 rws_sum[regno].write_count++;
f756da21 6176 else
c2f54bdd 6177 rws_sum[regno].write_count = 2;
6178 rws_sum[regno].written_by_fp |= flags.is_fp;
33c8f6d1 6179 /* ??? Not tracking and/or across differing predicates. */
c2f54bdd 6180 rws_sum[regno].written_by_and = flags.is_and;
6181 rws_sum[regno].written_by_or = flags.is_or;
6182 rws_sum[regno].first_pred = pred;
ac445222 6183}
6184
6185/* Handle an access to register REGNO of type FLAGS using predicate register
c2f54bdd 6186 PRED. Update rws_sum array. Return 1 if this access creates
ac445222 6187 a dependency with an earlier instruction in the same group. */
6188
6189static int
b40da9a7 6190rws_access_regno (int regno, struct reg_flags flags, int pred)
ac445222 6191{
6192 int need_barrier = 0;
ac445222 6193
c5c17bca 6194 gcc_assert (regno < NUM_REGS);
ac445222 6195
33c8f6d1 6196 if (! PR_REGNO_P (regno))
6197 flags.is_and = flags.is_or = 0;
6198
ac445222 6199 if (flags.is_write)
6200 {
261c1810 6201 int write_count;
6202
c2f54bdd 6203 rws_insn_set (regno);
261c1810 6204 write_count = rws_sum[regno].write_count;
261c1810 6205
6206 switch (write_count)
ac445222 6207 {
6208 case 0:
6209 /* The register has not been written yet. */
c2f54bdd 6210 if (!in_safe_group_barrier)
6211 rws_update (regno, flags, pred);
ac445222 6212 break;
6213
6214 case 1:
486941bd 6215 /* The register has been written via a predicate. Treat
6216 it like a unconditional write and do not try to check
6217 for complementary pred reg in earlier write. */
33c8f6d1 6218 if (flags.is_and && rws_sum[regno].written_by_and)
b40da9a7 6219 ;
33c8f6d1 6220 else if (flags.is_or && rws_sum[regno].written_by_or)
6221 ;
486941bd 6222 else
ac445222 6223 need_barrier = 1;
c2f54bdd 6224 if (!in_safe_group_barrier)
6225 rws_update (regno, flags, pred);
ac445222 6226 break;
6227
6228 case 2:
6229 /* The register has been unconditionally written already. We
6230 need a barrier. */
33c8f6d1 6231 if (flags.is_and && rws_sum[regno].written_by_and)
6232 ;
6233 else if (flags.is_or && rws_sum[regno].written_by_or)
6234 ;
6235 else
6236 need_barrier = 1;
c2f54bdd 6237 if (!in_safe_group_barrier)
6238 {
6239 rws_sum[regno].written_by_and = flags.is_and;
6240 rws_sum[regno].written_by_or = flags.is_or;
6241 }
ac445222 6242 break;
6243
6244 default:
c5c17bca 6245 gcc_unreachable ();
ac445222 6246 }
6247 }
6248 else
6249 {
6250 if (flags.is_branch)
6251 {
6252 /* Branches have several RAW exceptions that allow to avoid
6253 barriers. */
6254
9641f63c 6255 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
ac445222 6256 /* RAW dependencies on branch regs are permissible as long
6257 as the writer is a non-branch instruction. Since we
6258 never generate code that uses a branch register written
6259 by a branch instruction, handling this case is
6260 easy. */
9641f63c 6261 return 0;
ac445222 6262
6263 if (REGNO_REG_CLASS (regno) == PR_REGS
6264 && ! rws_sum[regno].written_by_fp)
6265 /* The predicates of a branch are available within the
6266 same insn group as long as the predicate was written by
87d3145e 6267 something other than a floating-point instruction. */
ac445222 6268 return 0;
6269 }
6270
33c8f6d1 6271 if (flags.is_and && rws_sum[regno].written_by_and)
6272 return 0;
6273 if (flags.is_or && rws_sum[regno].written_by_or)
6274 return 0;
6275
ac445222 6276 switch (rws_sum[regno].write_count)
6277 {
6278 case 0:
6279 /* The register has not been written yet. */
6280 break;
6281
6282 case 1:
486941bd 6283 /* The register has been written via a predicate, assume we
6284 need a barrier (don't check for complementary regs). */
6285 need_barrier = 1;
ac445222 6286 break;
6287
6288 case 2:
6289 /* The register has been unconditionally written already. We
6290 need a barrier. */
6291 need_barrier = 1;
6292 break;
6293
6294 default:
c5c17bca 6295 gcc_unreachable ();
ac445222 6296 }
6297 }
6298
6299 return need_barrier;
6300}
6301
cac50a9f 6302static int
b40da9a7 6303rws_access_reg (rtx reg, struct reg_flags flags, int pred)
cac50a9f 6304{
6305 int regno = REGNO (reg);
6306 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6307
6308 if (n == 1)
6309 return rws_access_regno (regno, flags, pred);
6310 else
6311 {
6312 int need_barrier = 0;
6313 while (--n >= 0)
6314 need_barrier |= rws_access_regno (regno + n, flags, pred);
6315 return need_barrier;
6316 }
6317}
6318
3149b29e 6319/* Examine X, which is a SET rtx, and update the flags, the predicate, and
6320 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6321
6322static void
fb16e37a 6323update_set_flags (rtx x, struct reg_flags *pflags)
3149b29e 6324{
6325 rtx src = SET_SRC (x);
6326
3149b29e 6327 switch (GET_CODE (src))
6328 {
6329 case CALL:
6330 return;
6331
6332 case IF_THEN_ELSE:
ea13ae4c 6333 /* There are four cases here:
cd8e835e 6334 (1) The destination is (pc), in which case this is a branch,
6335 nothing here applies.
6336 (2) The destination is ar.lc, in which case this is a
6337 doloop_end_internal,
6338 (3) The destination is an fp register, in which case this is
6339 an fselect instruction.
ea13ae4c 6340 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6341 this is a check load.
cd8e835e 6342 In all cases, nothing we do in this function applies. */
6343 return;
3149b29e 6344
6345 default:
6720e96c 6346 if (COMPARISON_P (src)
cd8e835e 6347 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
3149b29e 6348 /* Set pflags->is_fp to 1 so that we know we're dealing
6349 with a floating point comparison when processing the
6350 destination of the SET. */
6351 pflags->is_fp = 1;
6352
6353 /* Discover if this is a parallel comparison. We only handle
6354 and.orcm and or.andcm at present, since we must retain a
6355 strict inverse on the predicate pair. */
6356 else if (GET_CODE (src) == AND)
6357 pflags->is_and = 1;
6358 else if (GET_CODE (src) == IOR)
6359 pflags->is_or = 1;
6360
6361 break;
6362 }
6363}
6364
6365/* Subroutine of rtx_needs_barrier; this function determines whether the
6366 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6367 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6368 for this insn. */
b40da9a7 6369
3149b29e 6370static int
fb16e37a 6371set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
3149b29e 6372{
6373 int need_barrier = 0;
6374 rtx dst;
6375 rtx src = SET_SRC (x);
6376
6377 if (GET_CODE (src) == CALL)
6378 /* We don't need to worry about the result registers that
6379 get written by subroutine call. */
6380 return rtx_needs_barrier (src, flags, pred);
6381 else if (SET_DEST (x) == pc_rtx)
6382 {
6383 /* X is a conditional branch. */
6384 /* ??? This seems redundant, as the caller sets this bit for
6385 all JUMP_INSNs. */
ea13ae4c 6386 if (!ia64_spec_check_src_p (src))
6387 flags.is_branch = 1;
3149b29e 6388 return rtx_needs_barrier (src, flags, pred);
6389 }
6390
ea13ae4c 6391 if (ia64_spec_check_src_p (src))
6392 /* Avoid checking one register twice (in condition
6393 and in 'then' section) for ldc pattern. */
6394 {
6395 gcc_assert (REG_P (XEXP (src, 2)));
6396 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6397
6398 /* We process MEM below. */
6399 src = XEXP (src, 1);
6400 }
6401
6402 need_barrier |= rtx_needs_barrier (src, flags, pred);
3149b29e 6403
3149b29e 6404 dst = SET_DEST (x);
6405 if (GET_CODE (dst) == ZERO_EXTRACT)
6406 {
6407 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6408 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
3149b29e 6409 }
6410 return need_barrier;
6411}
6412
da4ab0bd 6413/* Handle an access to rtx X of type FLAGS using predicate register
6414 PRED. Return 1 if this access creates a dependency with an earlier
6415 instruction in the same group. */
ac445222 6416
6417static int
b40da9a7 6418rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
ac445222 6419{
6420 int i, j;
6421 int is_complemented = 0;
6422 int need_barrier = 0;
6423 const char *format_ptr;
6424 struct reg_flags new_flags;
fb16e37a 6425 rtx cond;
ac445222 6426
6427 if (! x)
6428 return 0;
6429
6430 new_flags = flags;
6431
6432 switch (GET_CODE (x))
6433 {
b40da9a7 6434 case SET:
fb16e37a 6435 update_set_flags (x, &new_flags);
6436 need_barrier = set_src_needs_barrier (x, new_flags, pred);
3149b29e 6437 if (GET_CODE (SET_SRC (x)) != CALL)
ac445222 6438 {
3149b29e 6439 new_flags.is_write = 1;
6440 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
ac445222 6441 }
ac445222 6442 break;
6443
6444 case CALL:
6445 new_flags.is_write = 0;
cac50a9f 6446 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
ac445222 6447
6448 /* Avoid multiple register writes, in case this is a pattern with
c5c17bca 6449 multiple CALL rtx. This avoids a failure in rws_access_reg. */
c2f54bdd 6450 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
ac445222 6451 {
6452 new_flags.is_write = 1;
cac50a9f 6453 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6454 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6455 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
ac445222 6456 }
6457 break;
6458
945c34bb 6459 case COND_EXEC:
6460 /* X is a predicated instruction. */
6461
6462 cond = COND_EXEC_TEST (x);
c5c17bca 6463 gcc_assert (!pred);
945c34bb 6464 need_barrier = rtx_needs_barrier (cond, flags, 0);
6465
6466 if (GET_CODE (cond) == EQ)
6467 is_complemented = 1;
6468 cond = XEXP (cond, 0);
c5c17bca 6469 gcc_assert (GET_CODE (cond) == REG
fb16e37a 6470 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
945c34bb 6471 pred = REGNO (cond);
6472 if (is_complemented)
6473 ++pred;
6474
6475 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6476 return need_barrier;
6477
ac445222 6478 case CLOBBER:
ac445222 6479 case USE:
ac445222 6480 /* Clobber & use are for earlier compiler-phases only. */
6481 break;
6482
6483 case ASM_OPERANDS:
6484 case ASM_INPUT:
6485 /* We always emit stop bits for traditional asms. We emit stop bits
6486 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6487 if (GET_CODE (x) != ASM_OPERANDS
6488 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6489 {
6490 /* Avoid writing the register multiple times if we have multiple
c5c17bca 6491 asm outputs. This avoids a failure in rws_access_reg. */
c2f54bdd 6492 if (! rws_insn_test (REG_VOLATILE))
ac445222 6493 {
6494 new_flags.is_write = 1;
cac50a9f 6495 rws_access_regno (REG_VOLATILE, new_flags, pred);
ac445222 6496 }
6497 return 1;
6498 }
6499
6500 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
33f88b1c 6501 We cannot just fall through here since then we would be confused
ac445222 6502 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6503 traditional asms unlike their normal usage. */
6504
6505 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6506 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6507 need_barrier = 1;
6508 break;
6509
6510 case PARALLEL:
6511 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
3149b29e 6512 {
6513 rtx pat = XVECEXP (x, 0, i);
3f494649 6514 switch (GET_CODE (pat))
3149b29e 6515 {
3f494649 6516 case SET:
fb16e37a 6517 update_set_flags (pat, &new_flags);
6518 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
3f494649 6519 break;
6520
6521 case USE:
6522 case CALL:
6523 case ASM_OPERANDS:
6524 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6525 break;
6526
6527 case CLOBBER:
7f2c335b 6528 if (REG_P (XEXP (pat, 0))
6529 && extract_asm_operands (x) != NULL_RTX
6530 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6531 {
6532 new_flags.is_write = 1;
6533 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6534 new_flags, pred);
6535 new_flags = flags;
6536 }
6537 break;
6538
3f494649 6539 case RETURN:
6540 break;
6541
6542 default:
6543 gcc_unreachable ();
3149b29e 6544 }
3149b29e 6545 }
6546 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6547 {
6548 rtx pat = XVECEXP (x, 0, i);
6549 if (GET_CODE (pat) == SET)
6550 {
6551 if (GET_CODE (SET_SRC (pat)) != CALL)
6552 {
6553 new_flags.is_write = 1;
6554 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6555 pred);
6556 }
6557 }
1c97971e 6558 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
3149b29e 6559 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6560 }
ac445222 6561 break;
6562
6563 case SUBREG:
9fccf84d 6564 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6565 break;
ac445222 6566 case REG:
de4f4740 6567 if (REGNO (x) == AR_UNAT_REGNUM)
6568 {
6569 for (i = 0; i < 64; ++i)
6570 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6571 }
6572 else
6573 need_barrier = rws_access_reg (x, flags, pred);
ac445222 6574 break;
6575
6576 case MEM:
6577 /* Find the regs used in memory address computation. */
6578 new_flags.is_write = 0;
6579 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6580 break;
6581
3f494649 6582 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
ac445222 6583 case SYMBOL_REF: case LABEL_REF: case CONST:
6584 break;
6585
6586 /* Operators with side-effects. */
6587 case POST_INC: case POST_DEC:
c5c17bca 6588 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
ac445222 6589
6590 new_flags.is_write = 0;
cac50a9f 6591 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
ac445222 6592 new_flags.is_write = 1;
cac50a9f 6593 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
40988080 6594 break;
6595
6596 case POST_MODIFY:
c5c17bca 6597 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
40988080 6598
6599 new_flags.is_write = 0;
cac50a9f 6600 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
40988080 6601 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6602 new_flags.is_write = 1;
cac50a9f 6603 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
ac445222 6604 break;
6605
6606 /* Handle common unary and binary ops for efficiency. */
6607 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6608 case MOD: case UDIV: case UMOD: case AND: case IOR:
6609 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6610 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6611 case NE: case EQ: case GE: case GT: case LE:
6612 case LT: case GEU: case GTU: case LEU: case LTU:
6613 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6614 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6615 break;
6616
6617 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6618 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6619 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
2d0a3e2b 6620 case SQRT: case FFS: case POPCOUNT:
ac445222 6621 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6622 break;
6623
3f494649 6624 case VEC_SELECT:
6625 /* VEC_SELECT's second argument is a PARALLEL with integers that
6626 describe the elements selected. On ia64, those integers are
6627 always constants. Avoid walking the PARALLEL so that we don't
c5c17bca 6628 get confused with "normal" parallels and then die. */
3f494649 6629 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6630 break;
6631
ac445222 6632 case UNSPEC:
6633 switch (XINT (x, 1))
6634 {
c87e09ad 6635 case UNSPEC_LTOFF_DTPMOD:
6636 case UNSPEC_LTOFF_DTPREL:
6637 case UNSPEC_DTPREL:
6638 case UNSPEC_LTOFF_TPREL:
6639 case UNSPEC_TPREL:
6640 case UNSPEC_PRED_REL_MUTEX:
6641 case UNSPEC_PIC_CALL:
6642 case UNSPEC_MF:
6643 case UNSPEC_FETCHADD_ACQ:
e35728c4 6644 case UNSPEC_FETCHADD_REL:
c87e09ad 6645 case UNSPEC_BSP_VALUE:
6646 case UNSPEC_FLUSHRS:
6647 case UNSPEC_BUNDLE_SELECTOR:
6648 break;
6649
e2b723ca 6650 case UNSPEC_GR_SPILL:
6651 case UNSPEC_GR_RESTORE:
de4f4740 6652 {
6653 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6654 HOST_WIDE_INT bit = (offset >> 3) & 63;
6655
6656 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
20391a1f 6657 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
de4f4740 6658 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6659 new_flags, pred);
6660 break;
6661 }
b40da9a7 6662
e2b723ca 6663 case UNSPEC_FR_SPILL:
6664 case UNSPEC_FR_RESTORE:
2d0a3e2b 6665 case UNSPEC_GETF_EXP:
da4ab0bd 6666 case UNSPEC_SETF_EXP:
e2b723ca 6667 case UNSPEC_ADDP4:
da4ab0bd 6668 case UNSPEC_FR_SQRT_RECIP_APPROX:
aaff414b 6669 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
ea13ae4c 6670 case UNSPEC_LDA:
6671 case UNSPEC_LDS:
cf7898a6 6672 case UNSPEC_LDS_A:
ea13ae4c 6673 case UNSPEC_LDSA:
6674 case UNSPEC_CHKACLR:
6675 case UNSPEC_CHKS:
3cc092f7 6676 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6677 break;
6678
e2b723ca 6679 case UNSPEC_FR_RECIP_APPROX:
6d6f3860 6680 case UNSPEC_SHRP:
270436f3 6681 case UNSPEC_COPYSIGN:
4e508025 6682 case UNSPEC_FR_RECIP_APPROX_RES:
1beeaf61 6683 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6684 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6685 break;
6686
e2b723ca 6687 case UNSPEC_CMPXCHG_ACQ:
e35728c4 6688 case UNSPEC_CMPXCHG_REL:
f087d65d 6689 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6690 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6691 break;
6692
ac445222 6693 default:
c5c17bca 6694 gcc_unreachable ();
ac445222 6695 }
6696 break;
6697
6698 case UNSPEC_VOLATILE:
6699 switch (XINT (x, 1))
6700 {
e2b723ca 6701 case UNSPECV_ALLOC:
83b1a843 6702 /* Alloc must always be the first instruction of a group.
6703 We force this by always returning true. */
6704 /* ??? We might get better scheduling if we explicitly check for
6705 input/local/output register dependencies, and modify the
6706 scheduler so that alloc is always reordered to the start of
6707 the current group. We could then eliminate all of the
6708 first_instruction code. */
6709 rws_access_regno (AR_PFS_REGNUM, flags, pred);
ac445222 6710
6711 new_flags.is_write = 1;
83b1a843 6712 rws_access_regno (REG_AR_CFM, new_flags, pred);
6713 return 1;
ac445222 6714
e2b723ca 6715 case UNSPECV_SET_BSP:
28d5c3d9 6716 case UNSPECV_PROBE_STACK_RANGE:
9b06caff 6717 need_barrier = 1;
6718 break;
6719
e2b723ca 6720 case UNSPECV_BLOCKAGE:
6721 case UNSPECV_INSN_GROUP_BARRIER:
6722 case UNSPECV_BREAK:
6723 case UNSPECV_PSAC_ALL:
6724 case UNSPECV_PSAC_NORMAL:
9b06caff 6725 return 0;
7c2f467a 6726
28d5c3d9 6727 case UNSPECV_PROBE_STACK_ADDRESS:
6728 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6729 break;
6730
ac445222 6731 default:
c5c17bca 6732 gcc_unreachable ();
ac445222 6733 }
6734 break;
6735
6736 case RETURN:
6737 new_flags.is_write = 0;
cac50a9f 6738 need_barrier = rws_access_regno (REG_RP, flags, pred);
6739 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
ac445222 6740
6741 new_flags.is_write = 1;
cac50a9f 6742 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6743 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
ac445222 6744 break;
6745
6746 default:
6747 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6748 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6749 switch (format_ptr[i])
6750 {
6751 case '0': /* unused field */
6752 case 'i': /* integer */
6753 case 'n': /* note */
6754 case 'w': /* wide integer */
6755 case 's': /* pointer to string */
6756 case 'S': /* optional pointer to string */
6757 break;
6758
6759 case 'e':
6760 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6761 need_barrier = 1;
6762 break;
6763
6764 case 'E':
6765 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6766 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6767 need_barrier = 1;
6768 break;
6769
6770 default:
c5c17bca 6771 gcc_unreachable ();
ac445222 6772 }
46ebdd6b 6773 break;
ac445222 6774 }
6775 return need_barrier;
6776}
6777
fb16e37a 6778/* Clear out the state for group_barrier_needed at the start of a
a0bb0d20 6779 sequence of insns. */
6780
6781static void
b40da9a7 6782init_insn_group_barriers (void)
a0bb0d20 6783{
6784 memset (rws_sum, 0, sizeof (rws_sum));
83b1a843 6785 first_instruction = 1;
a0bb0d20 6786}
6787
fb16e37a 6788/* Given the current state, determine whether a group barrier (a stop bit) is
6789 necessary before INSN. Return nonzero if so. This modifies the state to
6790 include the effects of INSN as a side-effect. */
a0bb0d20 6791
6792static int
fb16e37a 6793group_barrier_needed (rtx insn)
a0bb0d20 6794{
6795 rtx pat;
6796 int need_barrier = 0;
6797 struct reg_flags flags;
6798
6799 memset (&flags, 0, sizeof (flags));
6800 switch (GET_CODE (insn))
6801 {
6802 case NOTE:
9845d120 6803 case DEBUG_INSN:
a0bb0d20 6804 break;
6805
6806 case BARRIER:
6807 /* A barrier doesn't imply an instruction group boundary. */
6808 break;
6809
6810 case CODE_LABEL:
6811 memset (rws_insn, 0, sizeof (rws_insn));
6812 return 1;
6813
6814 case CALL_INSN:
6815 flags.is_branch = 1;
6816 flags.is_sibcall = SIBLING_CALL_P (insn);
6817 memset (rws_insn, 0, sizeof (rws_insn));
b69208e6 6818
6819 /* Don't bundle a call following another call. */
6820 if ((pat = prev_active_insn (insn))
6821 && GET_CODE (pat) == CALL_INSN)
6822 {
6823 need_barrier = 1;
6824 break;
6825 }
6826
a0bb0d20 6827 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6828 break;
6829
6830 case JUMP_INSN:
ea13ae4c 6831 if (!ia64_spec_check_p (insn))
6832 flags.is_branch = 1;
b69208e6 6833
6834 /* Don't bundle a jump following a call. */
6835 if ((pat = prev_active_insn (insn))
6836 && GET_CODE (pat) == CALL_INSN)
6837 {
6838 need_barrier = 1;
6839 break;
6840 }
8e262b5e 6841 /* FALLTHRU */
a0bb0d20 6842
6843 case INSN:
6844 if (GET_CODE (PATTERN (insn)) == USE
6845 || GET_CODE (PATTERN (insn)) == CLOBBER)
6846 /* Don't care about USE and CLOBBER "insns"---those are used to
6847 indicate to the optimizer that it shouldn't get rid of
6848 certain operations. */
6849 break;
6850
6851 pat = PATTERN (insn);
6852
6853 /* Ug. Hack hacks hacked elsewhere. */
6854 switch (recog_memoized (insn))
6855 {
6856 /* We play dependency tricks with the epilogue in order
6857 to get proper schedules. Undo this for dv analysis. */
6858 case CODE_FOR_epilogue_deallocate_stack:
b8536bfb 6859 case CODE_FOR_prologue_allocate_stack:
a0bb0d20 6860 pat = XVECEXP (pat, 0, 0);
6861 break;
6862
6863 /* The pattern we use for br.cloop confuses the code above.
6864 The second element of the vector is representative. */
6865 case CODE_FOR_doloop_end_internal:
6866 pat = XVECEXP (pat, 0, 1);
6867 break;
6868
6869 /* Doesn't generate code. */
6870 case CODE_FOR_pred_rel_mutex:
8ced207e 6871 case CODE_FOR_prologue_use:
a0bb0d20 6872 return 0;
6873
6874 default:
6875 break;
6876 }
6877
6878 memset (rws_insn, 0, sizeof (rws_insn));
6879 need_barrier = rtx_needs_barrier (pat, flags, 0);
6880
6881 /* Check to see if the previous instruction was a volatile
6882 asm. */
6883 if (! need_barrier)
6884 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
cf7898a6 6885
a0bb0d20 6886 break;
6887
6888 default:
c5c17bca 6889 gcc_unreachable ();
a0bb0d20 6890 }
83b1a843 6891
28d5c3d9 6892 if (first_instruction && important_for_bundling_p (insn))
83b1a843 6893 {
6894 need_barrier = 0;
6895 first_instruction = 0;
6896 }
6897
a0bb0d20 6898 return need_barrier;
6899}
6900
fb16e37a 6901/* Like group_barrier_needed, but do not clobber the current state. */
a0bb0d20 6902
6903static int
fb16e37a 6904safe_group_barrier_needed (rtx insn)
a0bb0d20 6905{
83b1a843 6906 int saved_first_instruction;
a0bb0d20 6907 int t;
83b1a843 6908
83b1a843 6909 saved_first_instruction = first_instruction;
c2f54bdd 6910 in_safe_group_barrier = 1;
83b1a843 6911
fb16e37a 6912 t = group_barrier_needed (insn);
83b1a843 6913
83b1a843 6914 first_instruction = saved_first_instruction;
c2f54bdd 6915 in_safe_group_barrier = 0;
83b1a843 6916
a0bb0d20 6917 return t;
6918}
6919
2efea8c0 6920/* Scan the current function and insert stop bits as necessary to
6921 eliminate dependencies. This function assumes that a final
6922 instruction scheduling pass has been run which has already
6923 inserted most of the necessary stop bits. This function only
6924 inserts new ones at basic block boundaries, since these are
6925 invisible to the scheduler. */
a0bb0d20 6926
6927static void
b40da9a7 6928emit_insn_group_barriers (FILE *dump)
a0bb0d20 6929{
6930 rtx insn;
6931 rtx last_label = 0;
6932 int insns_since_last_label = 0;
6933
6934 init_insn_group_barriers ();
6935
2efea8c0 6936 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
a0bb0d20 6937 {
6938 if (GET_CODE (insn) == CODE_LABEL)
6939 {
6940 if (insns_since_last_label)
6941 last_label = insn;
6942 insns_since_last_label = 0;
6943 }
6944 else if (GET_CODE (insn) == NOTE
ad4583d9 6945 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
a0bb0d20 6946 {
6947 if (insns_since_last_label)
6948 last_label = insn;
6949 insns_since_last_label = 0;
6950 }
6951 else if (GET_CODE (insn) == INSN
6952 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
e2b723ca 6953 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
a0bb0d20 6954 {
6955 init_insn_group_barriers ();
6956 last_label = 0;
6957 }
9845d120 6958 else if (NONDEBUG_INSN_P (insn))
a0bb0d20 6959 {
6960 insns_since_last_label = 1;
6961
fb16e37a 6962 if (group_barrier_needed (insn))
a0bb0d20 6963 {
6964 if (last_label)
6965 {
6966 if (dump)
6967 fprintf (dump, "Emitting stop before label %d\n",
6968 INSN_UID (last_label));
6969 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6970 insn = last_label;
3149b29e 6971
6972 init_insn_group_barriers ();
6973 last_label = 0;
a0bb0d20 6974 }
a0bb0d20 6975 }
6976 }
6977 }
6978}
940fa57f 6979
6980/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6981 This function has to emit all necessary group barriers. */
6982
6983static void
b40da9a7 6984emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
940fa57f 6985{
6986 rtx insn;
6987
6988 init_insn_group_barriers ();
6989
2efea8c0 6990 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
940fa57f 6991 {
811aa257 6992 if (GET_CODE (insn) == BARRIER)
6993 {
6994 rtx last = prev_active_insn (insn);
6995
6996 if (! last)
6997 continue;
6998 if (GET_CODE (last) == JUMP_INSN
6999 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7000 last = prev_active_insn (last);
7001 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7002 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7003
7004 init_insn_group_barriers ();
7005 }
9845d120 7006 else if (NONDEBUG_INSN_P (insn))
940fa57f 7007 {
811aa257 7008 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7009 init_insn_group_barriers ();
fb16e37a 7010 else if (group_barrier_needed (insn))
940fa57f 7011 {
7012 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7013 init_insn_group_barriers ();
fb16e37a 7014 group_barrier_needed (insn);
940fa57f 7015 }
7016 }
7017 }
7018}
58ada791 7019
a0bb0d20 7020\f
a0bb0d20 7021
58ada791 7022/* Instruction scheduling support. */
a0bb0d20 7023
7024#define NR_BUNDLES 10
7025
58ada791 7026/* A list of names of all available bundles. */
a0bb0d20 7027
58ada791 7028static const char *bundle_name [NR_BUNDLES] =
a0bb0d20 7029{
58ada791 7030 ".mii",
7031 ".mmi",
7032 ".mfi",
7033 ".mmf",
a0bb0d20 7034#if NR_BUNDLES == 10
58ada791 7035 ".bbb",
7036 ".mbb",
a0bb0d20 7037#endif
58ada791 7038 ".mib",
7039 ".mmb",
7040 ".mfb",
7041 ".mlx"
a0bb0d20 7042};
7043
58ada791 7044/* Nonzero if we should insert stop bits into the schedule. */
a0bb0d20 7045
58ada791 7046int ia64_final_schedule = 0;
a0bb0d20 7047
0975351b 7048/* Codes of the corresponding queried units: */
a0bb0d20 7049
58ada791 7050static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7051static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
a0bb0d20 7052
58ada791 7053static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7054static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
a0bb0d20 7055
58ada791 7056static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7057
7058/* The following variable value is an insn group barrier. */
7059
7060static rtx dfa_stop_insn;
7061
7062/* The following variable value is the last issued insn. */
7063
7064static rtx last_scheduled_insn;
7065
58ada791 7066/* The following variable value is pointer to a DFA state used as
7067 temporary variable. */
7068
7069static state_t temp_dfa_state = NULL;
7070
7071/* The following variable value is DFA state after issuing the last
7072 insn. */
7073
7074static state_t prev_cycle_state = NULL;
7075
7076/* The following array element values are TRUE if the corresponding
1d5a21db 7077 insn requires to add stop bits before it. */
58ada791 7078
ea13ae4c 7079static char *stops_p = NULL;
7080
58ada791 7081/* The following variable is used to set up the mentioned above array. */
7082
7083static int stop_before_p = 0;
7084
7085/* The following variable value is length of the arrays `clocks' and
7086 `add_cycles'. */
7087
7088static int clocks_length;
7089
ea13ae4c 7090/* The following variable value is number of data speculations in progress. */
7091static int pending_data_specs = 0;
7092
cf7898a6 7093/* Number of memory references on current and three future processor cycles. */
7094static char mem_ops_in_group[4];
7095
7096/* Number of current processor cycle (from scheduler's point of view). */
7097static int current_cycle;
7098
b40da9a7 7099static rtx ia64_single_set (rtx);
7100static void ia64_emit_insn_before (rtx, rtx);
a0bb0d20 7101
7102/* Map a bundle number to its pseudo-op. */
7103
7104const char *
b40da9a7 7105get_bundle_name (int b)
a0bb0d20 7106{
58ada791 7107 return bundle_name[b];
a0bb0d20 7108}
7109
a0bb0d20 7110
7111/* Return the maximum number of instructions a cpu can issue. */
7112
747af5e7 7113static int
b40da9a7 7114ia64_issue_rate (void)
a0bb0d20 7115{
7116 return 6;
7117}
7118
7119/* Helper function - like single_set, but look inside COND_EXEC. */
7120
7121static rtx
b40da9a7 7122ia64_single_set (rtx insn)
a0bb0d20 7123{
af6e428f 7124 rtx x = PATTERN (insn), ret;
a0bb0d20 7125 if (GET_CODE (x) == COND_EXEC)
7126 x = COND_EXEC_CODE (x);
7127 if (GET_CODE (x) == SET)
7128 return x;
b8536bfb 7129
7130 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7131 Although they are not classical single set, the second set is there just
7132 to protect it from moving past FP-relative stack accesses. */
7133 switch (recog_memoized (insn))
af6e428f 7134 {
b8536bfb 7135 case CODE_FOR_prologue_allocate_stack:
7136 case CODE_FOR_epilogue_deallocate_stack:
7137 ret = XVECEXP (x, 0, 0);
7138 break;
7139
7140 default:
7141 ret = single_set_2 (insn, x);
7142 break;
af6e428f 7143 }
b8536bfb 7144
af6e428f 7145 return ret;
a0bb0d20 7146}
7147
cf7898a6 7148/* Adjust the cost of a scheduling dependency.
7149 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7150 COST is the current cost, DW is dependency weakness. */
747af5e7 7151static int
cf7898a6 7152ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
a0bb0d20 7153{
cf7898a6 7154 enum reg_note dep_type = (enum reg_note) dep_type1;
a0bb0d20 7155 enum attr_itanium_class dep_class;
7156 enum attr_itanium_class insn_class;
a0bb0d20 7157
a0bb0d20 7158 insn_class = ia64_safe_itanium_class (insn);
58ada791 7159 dep_class = ia64_safe_itanium_class (dep_insn);
cf7898a6 7160
7161 /* Treat true memory dependencies separately. Ignore apparent true
7162 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7163 if (dep_type == REG_DEP_TRUE
7164 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7165 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7166 return 0;
7167
7168 if (dw == MIN_DEP_WEAK)
7169 /* Store and load are likely to alias, use higher cost to avoid stall. */
7170 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7171 else if (dw > MIN_DEP_WEAK)
7172 {
7173 /* Store and load are less likely to alias. */
7174 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7175 /* Assume there will be no cache conflict for floating-point data.
7176 For integer data, L1 conflict penalty is huge (17 cycles), so we
7177 never assume it will not cause a conflict. */
7178 return 0;
7179 else
7180 return cost;
7181 }
7182
7183 if (dep_type != REG_DEP_OUTPUT)
7184 return cost;
7185
58ada791 7186 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7187 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
a0bb0d20 7188 return 0;
7189
a0bb0d20 7190 return cost;
7191}
7192
5da2bd31 7193/* Like emit_insn_before, but skip cycle_display notes.
7194 ??? When cycle display notes are implemented, update this. */
7195
7196static void
b40da9a7 7197ia64_emit_insn_before (rtx insn, rtx before)
5da2bd31 7198{
7199 emit_insn_before (insn, before);
7200}
7201
58ada791 7202/* The following function marks insns who produce addresses for load
7203 and store insns. Such insns will be placed into M slots because it
7204 decrease latency time for Itanium1 (see function
7205 `ia64_produce_address_p' and the DFA descriptions). */
a0bb0d20 7206
7207static void
b40da9a7 7208ia64_dependencies_evaluation_hook (rtx head, rtx tail)
a0bb0d20 7209{
9997bd27 7210 rtx insn, next, next_tail;
b40da9a7 7211
87121034 7212 /* Before reload, which_alternative is not set, which means that
7213 ia64_safe_itanium_class will produce wrong results for (at least)
7214 move instructions. */
7215 if (!reload_completed)
7216 return;
7217
58ada791 7218 next_tail = NEXT_INSN (tail);
7219 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7220 if (INSN_P (insn))
7221 insn->call = 0;
7222 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7223 if (INSN_P (insn)
7224 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7225 {
93f6b030 7226 sd_iterator_def sd_it;
7227 dep_t dep;
7228 bool has_mem_op_consumer_p = false;
9997bd27 7229
93f6b030 7230 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
58ada791 7231 {
4d053aca 7232 enum attr_itanium_class c;
7233
93f6b030 7234 if (DEP_TYPE (dep) != REG_DEP_TRUE)
87121034 7235 continue;
9997bd27 7236
93f6b030 7237 next = DEP_CON (dep);
4d053aca 7238 c = ia64_safe_itanium_class (next);
7239 if ((c == ITANIUM_CLASS_ST
7240 || c == ITANIUM_CLASS_STF)
58ada791 7241 && ia64_st_address_bypass_p (insn, next))
93f6b030 7242 {
7243 has_mem_op_consumer_p = true;
7244 break;
7245 }
4d053aca 7246 else if ((c == ITANIUM_CLASS_LD
7247 || c == ITANIUM_CLASS_FLD
7248 || c == ITANIUM_CLASS_FLDP)
58ada791 7249 && ia64_ld_address_bypass_p (insn, next))
93f6b030 7250 {
7251 has_mem_op_consumer_p = true;
7252 break;
7253 }
58ada791 7254 }
93f6b030 7255
7256 insn->call = has_mem_op_consumer_p;
58ada791 7257 }
7258}
a0bb0d20 7259
58ada791 7260/* We're beginning a new block. Initialize data structures as necessary. */
a0bb0d20 7261
58ada791 7262static void
b40da9a7 7263ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7264 int sched_verbose ATTRIBUTE_UNUSED,
7265 int max_ready ATTRIBUTE_UNUSED)
58ada791 7266{
7267#ifdef ENABLE_CHECKING
7268 rtx insn;
b40da9a7 7269
cf7898a6 7270 if (!sel_sched_p () && reload_completed)
58ada791 7271 for (insn = NEXT_INSN (current_sched_info->prev_head);
7272 insn != current_sched_info->next_tail;
7273 insn = NEXT_INSN (insn))
c5c17bca 7274 gcc_assert (!SCHED_GROUP_P (insn));
58ada791 7275#endif
7276 last_scheduled_insn = NULL_RTX;
7277 init_insn_group_barriers ();
cf7898a6 7278
7279 current_cycle = 0;
7280 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
a0bb0d20 7281}
7282
ea13ae4c 7283/* We're beginning a scheduling pass. Check assertion. */
7284
7285static void
7286ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7287 int sched_verbose ATTRIBUTE_UNUSED,
7288 int max_ready ATTRIBUTE_UNUSED)
7289{
cf7898a6 7290 gcc_assert (pending_data_specs == 0);
ea13ae4c 7291}
7292
7293/* Scheduling pass is now finished. Free/reset static variable. */
7294static void
7295ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7296 int sched_verbose ATTRIBUTE_UNUSED)
7297{
cf7898a6 7298 gcc_assert (pending_data_specs == 0);
7299}
7300
7301/* Return TRUE if INSN is a load (either normal or speculative, but not a
7302 speculation check), FALSE otherwise. */
7303static bool
7304is_load_p (rtx insn)
7305{
7306 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7307
7308 return
7309 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7310 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7311}
7312
7313/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7314 (taking account for 3-cycle cache reference postponing for stores: Intel
7315 Itanium 2 Reference Manual for Software Development and Optimization,
7316 6.7.3.1). */
7317static void
7318record_memory_reference (rtx insn)
7319{
7320 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7321
7322 switch (insn_class) {
7323 case ITANIUM_CLASS_FLD:
7324 case ITANIUM_CLASS_LD:
7325 mem_ops_in_group[current_cycle % 4]++;
7326 break;
7327 case ITANIUM_CLASS_STF:
7328 case ITANIUM_CLASS_ST:
7329 mem_ops_in_group[(current_cycle + 3) % 4]++;
7330 break;
7331 default:;
7332 }
ea13ae4c 7333}
7334
58ada791 7335/* We are about to being issuing insns for this clock cycle.
7336 Override the default sort algorithm to better slot instructions. */
a0bb0d20 7337
58ada791 7338static int
b40da9a7 7339ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
cf7898a6 7340 int *pn_ready, int clock_var,
b40da9a7 7341 int reorder_type)
a0bb0d20 7342{
58ada791 7343 int n_asms;
7344 int n_ready = *pn_ready;
7345 rtx *e_ready = ready + n_ready;
7346 rtx *insnp;
a0bb0d20 7347
58ada791 7348 if (sched_verbose)
7349 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
a0bb0d20 7350
58ada791 7351 if (reorder_type == 0)
a0bb0d20 7352 {
58ada791 7353 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7354 n_asms = 0;
7355 for (insnp = ready; insnp < e_ready; insnp++)
7356 if (insnp < e_ready)
7357 {
7358 rtx insn = *insnp;
7359 enum attr_type t = ia64_safe_type (insn);
7360 if (t == TYPE_UNKNOWN)
7361 {
7362 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7363 || asm_noperands (PATTERN (insn)) >= 0)
7364 {
7365 rtx lowest = ready[n_asms];
7366 ready[n_asms] = insn;
7367 *insnp = lowest;
7368 n_asms++;
7369 }
7370 else
7371 {
7372 rtx highest = ready[n_ready - 1];
7373 ready[n_ready - 1] = insn;
7374 *insnp = highest;
7375 return 1;
7376 }
7377 }
7378 }
60e8331c 7379
58ada791 7380 if (n_asms < n_ready)
60e8331c 7381 {
58ada791 7382 /* Some normal insns to process. Skip the asms. */
7383 ready += n_asms;
7384 n_ready -= n_asms;
60e8331c 7385 }
58ada791 7386 else if (n_ready > 0)
7387 return 1;
a0bb0d20 7388 }
7389
58ada791 7390 if (ia64_final_schedule)
a0bb0d20 7391 {
58ada791 7392 int deleted = 0;
7393 int nr_need_stop = 0;
7394
7395 for (insnp = ready; insnp < e_ready; insnp++)
fb16e37a 7396 if (safe_group_barrier_needed (*insnp))
58ada791 7397 nr_need_stop++;
b40da9a7 7398
58ada791 7399 if (reorder_type == 1 && n_ready == nr_need_stop)
7400 return 0;
7401 if (reorder_type == 0)
7402 return 1;
7403 insnp = e_ready;
7404 /* Move down everything that needs a stop bit, preserving
7405 relative order. */
7406 while (insnp-- > ready + deleted)
7407 while (insnp >= ready + deleted)
7408 {
7409 rtx insn = *insnp;
fb16e37a 7410 if (! safe_group_barrier_needed (insn))
58ada791 7411 break;
7412 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7413 *ready = insn;
7414 deleted++;
7415 }
7416 n_ready -= deleted;
7417 ready += deleted;
a0bb0d20 7418 }
a0bb0d20 7419
cf7898a6 7420 current_cycle = clock_var;
7421 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7422 {
7423 int moved = 0;
7424
7425 insnp = e_ready;
7426 /* Move down loads/stores, preserving relative order. */
7427 while (insnp-- > ready + moved)
7428 while (insnp >= ready + moved)
7429 {
7430 rtx insn = *insnp;
7431 if (! is_load_p (insn))
7432 break;
7433 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7434 *ready = insn;
7435 moved++;
7436 }
7437 n_ready -= moved;
7438 ready += moved;
7439 }
7440
58ada791 7441 return 1;
a0bb0d20 7442}
341cffb9 7443
58ada791 7444/* We are about to being issuing insns for this clock cycle. Override
7445 the default sort algorithm to better slot instructions. */
ac445222 7446
58ada791 7447static int
b40da9a7 7448ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7449 int clock_var)
a0bb0d20 7450{
58ada791 7451 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7452 pn_ready, clock_var, 0);
a0bb0d20 7453}
7454
58ada791 7455/* Like ia64_sched_reorder, but called after issuing each insn.
7456 Override the default sort algorithm to better slot instructions. */
a0bb0d20 7457
58ada791 7458static int
b40da9a7 7459ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7460 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7461 int *pn_ready, int clock_var)
58ada791 7462{
58ada791 7463 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7464 clock_var, 1);
a0bb0d20 7465}
7466
58ada791 7467/* We are about to issue INSN. Return the number of insns left on the
7468 ready queue that can be issued this cycle. */
a0bb0d20 7469
58ada791 7470static int
b40da9a7 7471ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7472 int sched_verbose ATTRIBUTE_UNUSED,
7473 rtx insn ATTRIBUTE_UNUSED,
7474 int can_issue_more ATTRIBUTE_UNUSED)
a0bb0d20 7475{
cf7898a6 7476 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
ea13ae4c 7477 /* Modulo scheduling does not extend h_i_d when emitting
cf7898a6 7478 new instructions. Don't use h_i_d, if we don't have to. */
ea13ae4c 7479 {
7480 if (DONE_SPEC (insn) & BEGIN_DATA)
7481 pending_data_specs++;
7482 if (CHECK_SPEC (insn) & BEGIN_DATA)
7483 pending_data_specs--;
7484 }
7485
9845d120 7486 if (DEBUG_INSN_P (insn))
7487 return 1;
7488
58ada791 7489 last_scheduled_insn = insn;
7490 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7491 if (reload_completed)
a0bb0d20 7492 {
fb16e37a 7493 int needed = group_barrier_needed (insn);
c5c17bca 7494
7495 gcc_assert (!needed);
58ada791 7496 if (GET_CODE (insn) == CALL_INSN)
7497 init_insn_group_barriers ();
7498 stops_p [INSN_UID (insn)] = stop_before_p;
7499 stop_before_p = 0;
cf7898a6 7500
7501 record_memory_reference (insn);
a0bb0d20 7502 }
58ada791 7503 return 1;
7504}
ac445222 7505
58ada791 7506/* We are choosing insn from the ready queue. Return nonzero if INSN
7507 can be chosen. */
ac445222 7508
58ada791 7509static int
b40da9a7 7510ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
58ada791 7511{
cf7898a6 7512 gcc_assert (insn && INSN_P (insn));
ea13ae4c 7513 return ((!reload_completed
7514 || !safe_group_barrier_needed (insn))
cf7898a6 7515 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7516 && (!mflag_sched_mem_insns_hard_limit
7517 || !is_load_p (insn)
7518 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
ea13ae4c 7519}
7520
7521/* We are choosing insn from the ready queue. Return nonzero if INSN
7522 can be chosen. */
7523
7524static bool
a9f1838b 7525ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
ea13ae4c 7526{
7527 gcc_assert (insn && INSN_P (insn));
7528 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7529 we keep ALAT half-empty. */
7530 return (pending_data_specs < 16
7531 || !(TODO_SPEC (insn) & BEGIN_DATA));
a0bb0d20 7532}
7533
58ada791 7534/* The following variable value is pseudo-insn used by the DFA insn
7535 scheduler to change the DFA state when the simulated clock is
7536 increased. */
a0bb0d20 7537
58ada791 7538static rtx dfa_pre_cycle_insn;
a0bb0d20 7539
cf7898a6 7540/* Returns 1 when a meaningful insn was scheduled between the last group
7541 barrier and LAST. */
7542static int
7543scheduled_good_insn (rtx last)
7544{
7545 if (last && recog_memoized (last) >= 0)
7546 return 1;
7547
7548 for ( ;
7549 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7550 && !stops_p[INSN_UID (last)];
7551 last = PREV_INSN (last))
7552 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7553 the ebb we're scheduling. */
7554 if (INSN_P (last) && recog_memoized (last) >= 0)
7555 return 1;
7556
7557 return 0;
7558}
7559
33f88b1c 7560/* We are about to being issuing INSN. Return nonzero if we cannot
58ada791 7561 issue it on given cycle CLOCK and return zero if we should not sort
7562 the ready queue on the next clock start. */
a0bb0d20 7563
7564static int
b40da9a7 7565ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7566 int clock, int *sort_p)
a0bb0d20 7567{
c5c17bca 7568 gcc_assert (insn && INSN_P (insn));
9845d120 7569
7570 if (DEBUG_INSN_P (insn))
7571 return 0;
7572
cf7898a6 7573 /* When a group barrier is needed for insn, last_scheduled_insn
7574 should be set. */
7575 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7576 || last_scheduled_insn);
7577
7578 if ((reload_completed
7579 && (safe_group_barrier_needed (insn)
7580 || (mflag_sched_stop_bits_after_every_cycle
7581 && last_clock != clock
7582 && last_scheduled_insn
7583 && scheduled_good_insn (last_scheduled_insn))))
58ada791 7584 || (last_scheduled_insn
7585 && (GET_CODE (last_scheduled_insn) == CALL_INSN
28d5c3d9 7586 || unknown_for_bundling_p (last_scheduled_insn))))
a0bb0d20 7587 {
58ada791 7588 init_insn_group_barriers ();
cf7898a6 7589
58ada791 7590 if (verbose && dump)
7591 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7592 last_clock == clock ? " + cycle advance" : "");
cf7898a6 7593
58ada791 7594 stop_before_p = 1;
cf7898a6 7595 current_cycle = clock;
7596 mem_ops_in_group[current_cycle % 4] = 0;
7597
58ada791 7598 if (last_clock == clock)
a0bb0d20 7599 {
58ada791 7600 state_transition (curr_state, dfa_stop_insn);
7601 if (TARGET_EARLY_STOP_BITS)
7602 *sort_p = (last_scheduled_insn == NULL_RTX
7603 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7604 else
7605 *sort_p = 0;
7606 return 1;
7607 }
cf7898a6 7608
7609 if (last_scheduled_insn)
2fbc1ecc 7610 {
28d5c3d9 7611 if (unknown_for_bundling_p (last_scheduled_insn))
cf7898a6 7612 state_reset (curr_state);
7613 else
7614 {
7615 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7616 state_transition (curr_state, dfa_stop_insn);
7617 state_transition (curr_state, dfa_pre_cycle_insn);
7618 state_transition (curr_state, NULL);
7619 }
2fbc1ecc 7620 }
58ada791 7621 }
58ada791 7622 return 0;
a0bb0d20 7623}
7624
ea13ae4c 7625/* Implement targetm.sched.h_i_d_extended hook.
7626 Extend internal data structures. */
7627static void
7628ia64_h_i_d_extended (void)
7629{
ea13ae4c 7630 if (stops_p != NULL)
7631 {
cf7898a6 7632 int new_clocks_length = get_max_uid () * 3 / 2;
225ab426 7633 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
ea13ae4c 7634 clocks_length = new_clocks_length;
7635 }
7636}
cf7898a6 7637\f
7638
7639/* This structure describes the data used by the backend to guide scheduling.
7640 When the current scheduling point is switched, this data should be saved
7641 and restored later, if the scheduler returns to this point. */
7642struct _ia64_sched_context
7643{
7644 state_t prev_cycle_state;
7645 rtx last_scheduled_insn;
7646 struct reg_write_state rws_sum[NUM_REGS];
7647 struct reg_write_state rws_insn[NUM_REGS];
7648 int first_instruction;
7649 int pending_data_specs;
7650 int current_cycle;
7651 char mem_ops_in_group[4];
7652};
7653typedef struct _ia64_sched_context *ia64_sched_context_t;
7654
7655/* Allocates a scheduling context. */
7656static void *
7657ia64_alloc_sched_context (void)
7658{
7659 return xmalloc (sizeof (struct _ia64_sched_context));
7660}
7661
7662/* Initializes the _SC context with clean data, if CLEAN_P, and from
7663 the global context otherwise. */
7664static void
7665ia64_init_sched_context (void *_sc, bool clean_p)
7666{
7667 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7668
7669 sc->prev_cycle_state = xmalloc (dfa_state_size);
7670 if (clean_p)
7671 {
7672 state_reset (sc->prev_cycle_state);
7673 sc->last_scheduled_insn = NULL_RTX;
7674 memset (sc->rws_sum, 0, sizeof (rws_sum));
7675 memset (sc->rws_insn, 0, sizeof (rws_insn));
7676 sc->first_instruction = 1;
7677 sc->pending_data_specs = 0;
7678 sc->current_cycle = 0;
7679 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7680 }
7681 else
7682 {
7683 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7684 sc->last_scheduled_insn = last_scheduled_insn;
7685 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7686 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7687 sc->first_instruction = first_instruction;
7688 sc->pending_data_specs = pending_data_specs;
7689 sc->current_cycle = current_cycle;
7690 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7691 }
7692}
7693
7694/* Sets the global scheduling context to the one pointed to by _SC. */
7695static void
7696ia64_set_sched_context (void *_sc)
7697{
7698 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7699
7700 gcc_assert (sc != NULL);
7701
7702 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7703 last_scheduled_insn = sc->last_scheduled_insn;
7704 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7705 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7706 first_instruction = sc->first_instruction;
7707 pending_data_specs = sc->pending_data_specs;
7708 current_cycle = sc->current_cycle;
7709 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7710}
7711
7712/* Clears the data in the _SC scheduling context. */
7713static void
7714ia64_clear_sched_context (void *_sc)
7715{
7716 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7717
7718 free (sc->prev_cycle_state);
7719 sc->prev_cycle_state = NULL;
7720}
7721
7722/* Frees the _SC scheduling context. */
7723static void
7724ia64_free_sched_context (void *_sc)
7725{
7726 gcc_assert (_sc != NULL);
7727
7728 free (_sc);
7729}
7730
7731typedef rtx (* gen_func_t) (rtx, rtx);
7732
7733/* Return a function that will generate a load of mode MODE_NO
7734 with speculation types TS. */
7735static gen_func_t
7736get_spec_load_gen_function (ds_t ts, int mode_no)
7737{
7738 static gen_func_t gen_ld_[] = {
7739 gen_movbi,
7740 gen_movqi_internal,
7741 gen_movhi_internal,
7742 gen_movsi_internal,
7743 gen_movdi_internal,
7744 gen_movsf_internal,
7745 gen_movdf_internal,
7746 gen_movxf_internal,
7747 gen_movti_internal,
7748 gen_zero_extendqidi2,
7749 gen_zero_extendhidi2,
7750 gen_zero_extendsidi2,
7751 };
7752
7753 static gen_func_t gen_ld_a[] = {
7754 gen_movbi_advanced,
7755 gen_movqi_advanced,
7756 gen_movhi_advanced,
7757 gen_movsi_advanced,
7758 gen_movdi_advanced,
7759 gen_movsf_advanced,
7760 gen_movdf_advanced,
7761 gen_movxf_advanced,
7762 gen_movti_advanced,
7763 gen_zero_extendqidi2_advanced,
7764 gen_zero_extendhidi2_advanced,
7765 gen_zero_extendsidi2_advanced,
7766 };
7767 static gen_func_t gen_ld_s[] = {
7768 gen_movbi_speculative,
7769 gen_movqi_speculative,
7770 gen_movhi_speculative,
7771 gen_movsi_speculative,
7772 gen_movdi_speculative,
7773 gen_movsf_speculative,
7774 gen_movdf_speculative,
7775 gen_movxf_speculative,
7776 gen_movti_speculative,
7777 gen_zero_extendqidi2_speculative,
7778 gen_zero_extendhidi2_speculative,
7779 gen_zero_extendsidi2_speculative,
7780 };
7781 static gen_func_t gen_ld_sa[] = {
7782 gen_movbi_speculative_advanced,
7783 gen_movqi_speculative_advanced,
7784 gen_movhi_speculative_advanced,
7785 gen_movsi_speculative_advanced,
7786 gen_movdi_speculative_advanced,
7787 gen_movsf_speculative_advanced,
7788 gen_movdf_speculative_advanced,
7789 gen_movxf_speculative_advanced,
7790 gen_movti_speculative_advanced,
7791 gen_zero_extendqidi2_speculative_advanced,
7792 gen_zero_extendhidi2_speculative_advanced,
7793 gen_zero_extendsidi2_speculative_advanced,
7794 };
7795 static gen_func_t gen_ld_s_a[] = {
7796 gen_movbi_speculative_a,
7797 gen_movqi_speculative_a,
7798 gen_movhi_speculative_a,
7799 gen_movsi_speculative_a,
7800 gen_movdi_speculative_a,
7801 gen_movsf_speculative_a,
7802 gen_movdf_speculative_a,
7803 gen_movxf_speculative_a,
7804 gen_movti_speculative_a,
7805 gen_zero_extendqidi2_speculative_a,
7806 gen_zero_extendhidi2_speculative_a,
7807 gen_zero_extendsidi2_speculative_a,
7808 };
7809
7810 gen_func_t *gen_ld;
7811
7812 if (ts & BEGIN_DATA)
7813 {
7814 if (ts & BEGIN_CONTROL)
7815 gen_ld = gen_ld_sa;
7816 else
7817 gen_ld = gen_ld_a;
7818 }
7819 else if (ts & BEGIN_CONTROL)
7820 {
7821 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7822 || ia64_needs_block_p (ts))
7823 gen_ld = gen_ld_s;
7824 else
7825 gen_ld = gen_ld_s_a;
7826 }
7827 else if (ts == 0)
7828 gen_ld = gen_ld_;
7829 else
7830 gcc_unreachable ();
7831
7832 return gen_ld[mode_no];
7833}
ea13ae4c 7834
7835/* Constants that help mapping 'enum machine_mode' to int. */
7836enum SPEC_MODES
7837 {
7838 SPEC_MODE_INVALID = -1,
7839 SPEC_MODE_FIRST = 0,
7840 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7841 SPEC_MODE_FOR_EXTEND_LAST = 3,
7842 SPEC_MODE_LAST = 8
7843 };
7844
cf7898a6 7845enum
7846 {
7847 /* Offset to reach ZERO_EXTEND patterns. */
7848 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7849 };
7850
ea13ae4c 7851/* Return index of the MODE. */
7852static int
7853ia64_mode_to_int (enum machine_mode mode)
7854{
7855 switch (mode)
7856 {
7857 case BImode: return 0; /* SPEC_MODE_FIRST */
7858 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7859 case HImode: return 2;
7860 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7861 case DImode: return 4;
7862 case SFmode: return 5;
7863 case DFmode: return 6;
7864 case XFmode: return 7;
7865 case TImode:
7866 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7867 mentioned in itanium[12].md. Predicate fp_register_operand also
7868 needs to be defined. Bottom line: better disable for now. */
7869 return SPEC_MODE_INVALID;
7870 default: return SPEC_MODE_INVALID;
7871 }
7872}
7873
7874/* Provide information about speculation capabilities. */
7875static void
7876ia64_set_sched_flags (spec_info_t spec_info)
7877{
7878 unsigned int *flags = &(current_sched_info->flags);
7879
7880 if (*flags & SCHED_RGN
cf7898a6 7881 || *flags & SCHED_EBB
7882 || *flags & SEL_SCHED)
ea13ae4c 7883 {
7884 int mask = 0;
7885
e5f7d6da 7886 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
cf7898a6 7887 || (mflag_sched_ar_data_spec && reload_completed))
ea13ae4c 7888 {
7889 mask |= BEGIN_DATA;
cf7898a6 7890
7891 if (!sel_sched_p ()
7892 && ((mflag_sched_br_in_data_spec && !reload_completed)
7893 || (mflag_sched_ar_in_data_spec && reload_completed)))
ea13ae4c 7894 mask |= BE_IN_DATA;
7895 }
7896
cf7898a6 7897 if (mflag_sched_control_spec
7898 && (!sel_sched_p ()
7899 || reload_completed))
ea13ae4c 7900 {
7901 mask |= BEGIN_CONTROL;
7902
cf7898a6 7903 if (!sel_sched_p () && mflag_sched_in_control_spec)
ea13ae4c 7904 mask |= BE_IN_CONTROL;
7905 }
7906
7bfeabaf 7907 spec_info->mask = mask;
7908
ea13ae4c 7909 if (mask)
7910 {
3072d30e 7911 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7912
7913 if (mask & BE_IN_SPEC)
7914 *flags |= NEW_BBS;
ea13ae4c 7915
ea13ae4c 7916 spec_info->flags = 0;
7917
7918 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7919 spec_info->flags |= PREFER_NON_DATA_SPEC;
7920
cf7898a6 7921 if (mask & CONTROL_SPEC)
ea13ae4c 7922 {
cf7898a6 7923 if (mflag_sched_prefer_non_control_spec_insns)
7924 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7925
7926 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7927 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
ea13ae4c 7928 }
cf7898a6 7929
7930 if (sched_verbose >= 1)
7931 spec_info->dump = sched_dump;
ea13ae4c 7932 else
7933 spec_info->dump = 0;
7934
7935 if (mflag_sched_count_spec_in_critical_path)
7936 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7937 }
7938 }
686d79e0 7939 else
7940 spec_info->mask = 0;
ea13ae4c 7941}
7942
cf7898a6 7943/* If INSN is an appropriate load return its mode.
7944 Return -1 otherwise. */
ea13ae4c 7945static int
cf7898a6 7946get_mode_no_for_insn (rtx insn)
7947{
7948 rtx reg, mem, mode_rtx;
7949 int mode_no;
ea13ae4c 7950 bool extend_p;
ea13ae4c 7951
cf7898a6 7952 extract_insn_cached (insn);
ea13ae4c 7953
cf7898a6 7954 /* We use WHICH_ALTERNATIVE only after reload. This will
7955 guarantee that reload won't touch a speculative insn. */
1c42a59d 7956
cf7898a6 7957 if (recog_data.n_operands != 2)
ea13ae4c 7958 return -1;
7959
cf7898a6 7960 reg = recog_data.operand[0];
7961 mem = recog_data.operand[1];
1c42a59d 7962
cf7898a6 7963 /* We should use MEM's mode since REG's mode in presence of
7964 ZERO_EXTEND will always be DImode. */
7965 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7966 /* Process non-speculative ld. */
7967 {
7968 if (!reload_completed)
7969 {
7970 /* Do not speculate into regs like ar.lc. */
7971 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7972 return -1;
7973
7974 if (!MEM_P (mem))
7975 return -1;
7976
7977 {
7978 rtx mem_reg = XEXP (mem, 0);
7979
7980 if (!REG_P (mem_reg))
7981 return -1;
7982 }
7983
7984 mode_rtx = mem;
7985 }
7986 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7987 {
7988 gcc_assert (REG_P (reg) && MEM_P (mem));
7989 mode_rtx = mem;
7990 }
7991 else
7992 return -1;
7993 }
7994 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7995 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7996 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7997 /* Process speculative ld or ld.c. */
ea13ae4c 7998 {
cf7898a6 7999 gcc_assert (REG_P (reg) && MEM_P (mem));
8000 mode_rtx = mem;
ea13ae4c 8001 }
8002 else
ea13ae4c 8003 {
cf7898a6 8004 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
ea13ae4c 8005
cf7898a6 8006 if (attr_class == ITANIUM_CLASS_CHK_A
8007 || attr_class == ITANIUM_CLASS_CHK_S_I
8008 || attr_class == ITANIUM_CLASS_CHK_S_F)
8009 /* Process chk. */
8010 mode_rtx = reg;
8011 else
8012 return -1;
ea13ae4c 8013 }
1c42a59d 8014
cf7898a6 8015 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
1c42a59d 8016
cf7898a6 8017 if (mode_no == SPEC_MODE_INVALID)
ea13ae4c 8018 return -1;
8019
cf7898a6 8020 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8021
8022 if (extend_p)
8023 {
8024 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8025 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8026 return -1;
1c42a59d 8027
cf7898a6 8028 mode_no += SPEC_GEN_EXTEND_OFFSET;
8029 }
ea13ae4c 8030
cf7898a6 8031 return mode_no;
ea13ae4c 8032}
8033
cf7898a6 8034/* If X is an unspec part of a speculative load, return its code.
8035 Return -1 otherwise. */
8036static int
8037get_spec_unspec_code (const_rtx x)
8038{
8039 if (GET_CODE (x) != UNSPEC)
8040 return -1;
ea13ae4c 8041
ea13ae4c 8042 {
cf7898a6 8043 int code;
ea13ae4c 8044
cf7898a6 8045 code = XINT (x, 1);
ea13ae4c 8046
cf7898a6 8047 switch (code)
8048 {
8049 case UNSPEC_LDA:
8050 case UNSPEC_LDS:
8051 case UNSPEC_LDS_A:
8052 case UNSPEC_LDSA:
8053 return code;
ea13ae4c 8054
cf7898a6 8055 default:
8056 return -1;
8057 }
8058 }
8059}
ea13ae4c 8060
cf7898a6 8061/* Implement skip_rtx_p hook. */
8062static bool
8063ia64_skip_rtx_p (const_rtx x)
8064{
8065 return get_spec_unspec_code (x) != -1;
8066}
ea13ae4c 8067
cf7898a6 8068/* If INSN is a speculative load, return its UNSPEC code.
8069 Return -1 otherwise. */
8070static int
8071get_insn_spec_code (const_rtx insn)
8072{
8073 rtx pat, reg, mem;
ea13ae4c 8074
cf7898a6 8075 pat = PATTERN (insn);
ea13ae4c 8076
cf7898a6 8077 if (GET_CODE (pat) == COND_EXEC)
8078 pat = COND_EXEC_CODE (pat);
ea13ae4c 8079
cf7898a6 8080 if (GET_CODE (pat) != SET)
8081 return -1;
8082
8083 reg = SET_DEST (pat);
8084 if (!REG_P (reg))
8085 return -1;
8086
8087 mem = SET_SRC (pat);
8088 if (GET_CODE (mem) == ZERO_EXTEND)
8089 mem = XEXP (mem, 0);
8090
8091 return get_spec_unspec_code (mem);
8092}
8093
8094/* If INSN is a speculative load, return a ds with the speculation types.
8095 Otherwise [if INSN is a normal instruction] return 0. */
8096static ds_t
8097ia64_get_insn_spec_ds (rtx insn)
8098{
8099 int code = get_insn_spec_code (insn);
8100
8101 switch (code)
ea13ae4c 8102 {
cf7898a6 8103 case UNSPEC_LDA:
8104 return BEGIN_DATA;
ea13ae4c 8105
cf7898a6 8106 case UNSPEC_LDS:
8107 case UNSPEC_LDS_A:
8108 return BEGIN_CONTROL;
ea13ae4c 8109
cf7898a6 8110 case UNSPEC_LDSA:
8111 return BEGIN_DATA | BEGIN_CONTROL;
ea13ae4c 8112
cf7898a6 8113 default:
8114 return 0;
ea13ae4c 8115 }
cf7898a6 8116}
8117
8118/* If INSN is a speculative load return a ds with the speculation types that
8119 will be checked.
8120 Otherwise [if INSN is a normal instruction] return 0. */
8121static ds_t
8122ia64_get_insn_checked_ds (rtx insn)
8123{
8124 int code = get_insn_spec_code (insn);
8125
8126 switch (code)
ea13ae4c 8127 {
cf7898a6 8128 case UNSPEC_LDA:
8129 return BEGIN_DATA | BEGIN_CONTROL;
8130
8131 case UNSPEC_LDS:
8132 return BEGIN_CONTROL;
8133
8134 case UNSPEC_LDS_A:
8135 case UNSPEC_LDSA:
8136 return BEGIN_DATA | BEGIN_CONTROL;
8137
8138 default:
8139 return 0;
ea13ae4c 8140 }
cf7898a6 8141}
ea13ae4c 8142
cf7898a6 8143/* If GEN_P is true, calculate the index of needed speculation check and return
8144 speculative pattern for INSN with speculative mode TS, machine mode
8145 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8146 If GEN_P is false, just calculate the index of needed speculation check. */
8147static rtx
8148ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8149{
8150 rtx pat, new_pat;
8151 gen_func_t gen_load;
ea13ae4c 8152
cf7898a6 8153 gen_load = get_spec_load_gen_function (ts, mode_no);
ea13ae4c 8154
cf7898a6 8155 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8156 copy_rtx (recog_data.operand[1]));
ea13ae4c 8157
8158 pat = PATTERN (insn);
8159 if (GET_CODE (pat) == COND_EXEC)
cf7898a6 8160 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8161 new_pat);
ea13ae4c 8162
8163 return new_pat;
8164}
8165
ea13ae4c 8166static bool
cf7898a6 8167insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8168 ds_t ds ATTRIBUTE_UNUSED)
ea13ae4c 8169{
cf7898a6 8170 return false;
8171}
ea13ae4c 8172
cf7898a6 8173/* Implement targetm.sched.speculate_insn hook.
8174 Check if the INSN can be TS speculative.
8175 If 'no' - return -1.
8176 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8177 If current pattern of the INSN already provides TS speculation,
8178 return 0. */
8179static int
8180ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
8181{
8182 int mode_no;
8183 int res;
8184
8185 gcc_assert (!(ts & ~SPECULATIVE));
ea13ae4c 8186
cf7898a6 8187 if (ia64_spec_check_p (insn))
8188 return -1;
ea13ae4c 8189
cf7898a6 8190 if ((ts & BE_IN_SPEC)
8191 && !insn_can_be_in_speculative_p (insn, ts))
8192 return -1;
ea13ae4c 8193
cf7898a6 8194 mode_no = get_mode_no_for_insn (insn);
ea13ae4c 8195
cf7898a6 8196 if (mode_no != SPEC_MODE_INVALID)
8197 {
8198 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8199 res = 0;
8200 else
8201 {
8202 res = 1;
8203 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8204 }
8205 }
8206 else
8207 res = -1;
ea13ae4c 8208
cf7898a6 8209 return res;
8210}
ea13ae4c 8211
cf7898a6 8212/* Return a function that will generate a check for speculation TS with mode
8213 MODE_NO.
8214 If simple check is needed, pass true for SIMPLE_CHECK_P.
8215 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8216static gen_func_t
8217get_spec_check_gen_function (ds_t ts, int mode_no,
8218 bool simple_check_p, bool clearing_check_p)
8219{
8220 static gen_func_t gen_ld_c_clr[] = {
ea13ae4c 8221 gen_movbi_clr,
8222 gen_movqi_clr,
8223 gen_movhi_clr,
8224 gen_movsi_clr,
8225 gen_movdi_clr,
8226 gen_movsf_clr,
8227 gen_movdf_clr,
8228 gen_movxf_clr,
8229 gen_movti_clr,
8230 gen_zero_extendqidi2_clr,
8231 gen_zero_extendhidi2_clr,
8232 gen_zero_extendsidi2_clr,
cf7898a6 8233 };
8234 static gen_func_t gen_ld_c_nc[] = {
8235 gen_movbi_nc,
8236 gen_movqi_nc,
8237 gen_movhi_nc,
8238 gen_movsi_nc,
8239 gen_movdi_nc,
8240 gen_movsf_nc,
8241 gen_movdf_nc,
8242 gen_movxf_nc,
8243 gen_movti_nc,
8244 gen_zero_extendqidi2_nc,
8245 gen_zero_extendhidi2_nc,
8246 gen_zero_extendsidi2_nc,
8247 };
8248 static gen_func_t gen_chk_a_clr[] = {
ea13ae4c 8249 gen_advanced_load_check_clr_bi,
8250 gen_advanced_load_check_clr_qi,
8251 gen_advanced_load_check_clr_hi,
8252 gen_advanced_load_check_clr_si,
8253 gen_advanced_load_check_clr_di,
8254 gen_advanced_load_check_clr_sf,
8255 gen_advanced_load_check_clr_df,
8256 gen_advanced_load_check_clr_xf,
8257 gen_advanced_load_check_clr_ti,
8258 gen_advanced_load_check_clr_di,
8259 gen_advanced_load_check_clr_di,
8260 gen_advanced_load_check_clr_di,
cf7898a6 8261 };
8262 static gen_func_t gen_chk_a_nc[] = {
8263 gen_advanced_load_check_nc_bi,
8264 gen_advanced_load_check_nc_qi,
8265 gen_advanced_load_check_nc_hi,
8266 gen_advanced_load_check_nc_si,
8267 gen_advanced_load_check_nc_di,
8268 gen_advanced_load_check_nc_sf,
8269 gen_advanced_load_check_nc_df,
8270 gen_advanced_load_check_nc_xf,
8271 gen_advanced_load_check_nc_ti,
8272 gen_advanced_load_check_nc_di,
8273 gen_advanced_load_check_nc_di,
8274 gen_advanced_load_check_nc_di,
8275 };
8276 static gen_func_t gen_chk_s[] = {
ea13ae4c 8277 gen_speculation_check_bi,
8278 gen_speculation_check_qi,
8279 gen_speculation_check_hi,
8280 gen_speculation_check_si,
8281 gen_speculation_check_di,
8282 gen_speculation_check_sf,
8283 gen_speculation_check_df,
8284 gen_speculation_check_xf,
8285 gen_speculation_check_ti,
8286 gen_speculation_check_di,
8287 gen_speculation_check_di,
cf7898a6 8288 gen_speculation_check_di,
ea13ae4c 8289 };
8290
cf7898a6 8291 gen_func_t *gen_check;
ea13ae4c 8292
cf7898a6 8293 if (ts & BEGIN_DATA)
ea13ae4c 8294 {
cf7898a6 8295 /* We don't need recovery because even if this is ld.sa
8296 ALAT entry will be allocated only if NAT bit is set to zero.
8297 So it is enough to use ld.c here. */
8298
8299 if (simple_check_p)
8300 {
8301 gcc_assert (mflag_sched_spec_ldc);
8302
8303 if (clearing_check_p)
8304 gen_check = gen_ld_c_clr;
8305 else
8306 gen_check = gen_ld_c_nc;
8307 }
8308 else
8309 {
8310 if (clearing_check_p)
8311 gen_check = gen_chk_a_clr;
8312 else
8313 gen_check = gen_chk_a_nc;
8314 }
ea13ae4c 8315 }
cf7898a6 8316 else if (ts & BEGIN_CONTROL)
ea13ae4c 8317 {
cf7898a6 8318 if (simple_check_p)
8319 /* We might want to use ld.sa -> ld.c instead of
8320 ld.s -> chk.s. */
ea13ae4c 8321 {
cf7898a6 8322 gcc_assert (!ia64_needs_block_p (ts));
ea13ae4c 8323
cf7898a6 8324 if (clearing_check_p)
8325 gen_check = gen_ld_c_clr;
8326 else
8327 gen_check = gen_ld_c_nc;
8328 }
8329 else
8330 {
8331 gen_check = gen_chk_s;
ea13ae4c 8332 }
cf7898a6 8333 }
8334 else
8335 gcc_unreachable ();
8336
8337 gcc_assert (mode_no >= 0);
8338 return gen_check[mode_no];
8339}
8340
8341/* Return nonzero, if INSN needs branchy recovery check. */
8342static bool
8343ia64_needs_block_p (ds_t ts)
8344{
8345 if (ts & BEGIN_DATA)
8346 return !mflag_sched_spec_ldc;
8347
8348 gcc_assert ((ts & BEGIN_CONTROL) != 0);
ea13ae4c 8349
cf7898a6 8350 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8351}
8352
8353/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8354 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8355 Otherwise, generate a simple check. */
8356static rtx
8357ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
8358{
8359 rtx op1, pat, check_pat;
8360 gen_func_t gen_check;
8361 int mode_no;
8362
8363 mode_no = get_mode_no_for_insn (insn);
8364 gcc_assert (mode_no >= 0);
8365
8366 if (label)
8367 op1 = label;
8368 else
8369 {
8370 gcc_assert (!ia64_needs_block_p (ds));
8371 op1 = copy_rtx (recog_data.operand[1]);
ea13ae4c 8372 }
cf7898a6 8373
8374 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8375 true);
ea13ae4c 8376
cf7898a6 8377 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
ea13ae4c 8378
8379 pat = PATTERN (insn);
8380 if (GET_CODE (pat) == COND_EXEC)
8381 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8382 check_pat);
8383
8384 return check_pat;
8385}
8386
8387/* Return nonzero, if X is branchy recovery check. */
8388static int
8389ia64_spec_check_p (rtx x)
8390{
8391 x = PATTERN (x);
8392 if (GET_CODE (x) == COND_EXEC)
8393 x = COND_EXEC_CODE (x);
8394 if (GET_CODE (x) == SET)
8395 return ia64_spec_check_src_p (SET_SRC (x));
8396 return 0;
8397}
8398
8399/* Return nonzero, if SRC belongs to recovery check. */
8400static int
8401ia64_spec_check_src_p (rtx src)
8402{
8403 if (GET_CODE (src) == IF_THEN_ELSE)
8404 {
8405 rtx t;
8406
8407 t = XEXP (src, 0);
8408 if (GET_CODE (t) == NE)
8409 {
8410 t = XEXP (t, 0);
8411
8412 if (GET_CODE (t) == UNSPEC)
8413 {
8414 int code;
8415
8416 code = XINT (t, 1);
8417
cf7898a6 8418 if (code == UNSPEC_LDCCLR
8419 || code == UNSPEC_LDCNC
8420 || code == UNSPEC_CHKACLR
8421 || code == UNSPEC_CHKANC
8422 || code == UNSPEC_CHKS)
ea13ae4c 8423 {
8424 gcc_assert (code != 0);
8425 return code;
8426 }
8427 }
8428 }
8429 }
8430 return 0;
8431}
58ada791 8432\f
a0bb0d20 8433
58ada791 8434/* The following page contains abstract data `bundle states' which are
8435 used for bundling insns (inserting nops and template generation). */
8436
8437/* The following describes state of insn bundling. */
8438
8439struct bundle_state
8440{
8441 /* Unique bundle state number to identify them in the debugging
8442 output */
8443 int unique_num;
8444 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
8445 /* number nops before and after the insn */
8446 short before_nops_num, after_nops_num;
8447 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8448 insn */
8449 int cost; /* cost of the state in cycles */
8450 int accumulated_insns_num; /* number of all previous insns including
8451 nops. L is considered as 2 insns */
8452 int branch_deviation; /* deviation of previous branches from 3rd slots */
cf7898a6 8453 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
58ada791 8454 struct bundle_state *next; /* next state with the same insn_num */
8455 struct bundle_state *originator; /* originator (previous insn state) */
8456 /* All bundle states are in the following chain. */
8457 struct bundle_state *allocated_states_chain;
8458 /* The DFA State after issuing the insn and the nops. */
8459 state_t dfa_state;
8460};
a0bb0d20 8461
58ada791 8462/* The following is map insn number to the corresponding bundle state. */
a0bb0d20 8463
58ada791 8464static struct bundle_state **index_to_bundle_states;
a0bb0d20 8465
58ada791 8466/* The unique number of next bundle state. */
a0bb0d20 8467
58ada791 8468static int bundle_states_num;
a0bb0d20 8469
58ada791 8470/* All allocated bundle states are in the following chain. */
a0bb0d20 8471
58ada791 8472static struct bundle_state *allocated_bundle_states_chain;
0aa63c5e 8473
58ada791 8474/* All allocated but not used bundle states are in the following
8475 chain. */
de4f4740 8476
58ada791 8477static struct bundle_state *free_bundle_state_chain;
a0bb0d20 8478
a0bb0d20 8479
58ada791 8480/* The following function returns a free bundle state. */
a0bb0d20 8481
58ada791 8482static struct bundle_state *
b40da9a7 8483get_free_bundle_state (void)
58ada791 8484{
8485 struct bundle_state *result;
a0bb0d20 8486
58ada791 8487 if (free_bundle_state_chain != NULL)
a0bb0d20 8488 {
58ada791 8489 result = free_bundle_state_chain;
8490 free_bundle_state_chain = result->next;
a0bb0d20 8491 }
58ada791 8492 else
a0bb0d20 8493 {
225ab426 8494 result = XNEW (struct bundle_state);
58ada791 8495 result->dfa_state = xmalloc (dfa_state_size);
8496 result->allocated_states_chain = allocated_bundle_states_chain;
8497 allocated_bundle_states_chain = result;
a0bb0d20 8498 }
58ada791 8499 result->unique_num = bundle_states_num++;
8500 return result;
b40da9a7 8501
58ada791 8502}
a0bb0d20 8503
58ada791 8504/* The following function frees given bundle state. */
a0bb0d20 8505
58ada791 8506static void
b40da9a7 8507free_bundle_state (struct bundle_state *state)
58ada791 8508{
8509 state->next = free_bundle_state_chain;
8510 free_bundle_state_chain = state;
8511}
a0bb0d20 8512
58ada791 8513/* Start work with abstract data `bundle states'. */
a0bb0d20 8514
58ada791 8515static void
b40da9a7 8516initiate_bundle_states (void)
58ada791 8517{
8518 bundle_states_num = 0;
8519 free_bundle_state_chain = NULL;
8520 allocated_bundle_states_chain = NULL;
a0bb0d20 8521}
8522
58ada791 8523/* Finish work with abstract data `bundle states'. */
a0bb0d20 8524
8525static void
b40da9a7 8526finish_bundle_states (void)
a0bb0d20 8527{
58ada791 8528 struct bundle_state *curr_state, *next_state;
8529
8530 for (curr_state = allocated_bundle_states_chain;
8531 curr_state != NULL;
8532 curr_state = next_state)
a0bb0d20 8533 {
58ada791 8534 next_state = curr_state->allocated_states_chain;
8535 free (curr_state->dfa_state);
8536 free (curr_state);
a0bb0d20 8537 }
a0bb0d20 8538}
8539
58ada791 8540/* Hash table of the bundle states. The key is dfa_state and insn_num
8541 of the bundle states. */
a0bb0d20 8542
58ada791 8543static htab_t bundle_state_table;
a0bb0d20 8544
58ada791 8545/* The function returns hash of BUNDLE_STATE. */
a0bb0d20 8546
58ada791 8547static unsigned
b40da9a7 8548bundle_state_hash (const void *bundle_state)
58ada791 8549{
fb80456a 8550 const struct bundle_state *const state
8551 = (const struct bundle_state *) bundle_state;
58ada791 8552 unsigned result, i;
a0bb0d20 8553
58ada791 8554 for (result = i = 0; i < dfa_state_size; i++)
8555 result += (((unsigned char *) state->dfa_state) [i]
8556 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8557 return result + state->insn_num;
8558}
a0bb0d20 8559
58ada791 8560/* The function returns nonzero if the bundle state keys are equal. */
a0bb0d20 8561
58ada791 8562static int
b40da9a7 8563bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
58ada791 8564{
fb80456a 8565 const struct bundle_state *const state1
8566 = (const struct bundle_state *) bundle_state_1;
8567 const struct bundle_state *const state2
8568 = (const struct bundle_state *) bundle_state_2;
a0bb0d20 8569
58ada791 8570 return (state1->insn_num == state2->insn_num
8571 && memcmp (state1->dfa_state, state2->dfa_state,
8572 dfa_state_size) == 0);
8573}
a0bb0d20 8574
58ada791 8575/* The function inserts the BUNDLE_STATE into the hash table. The
8576 function returns nonzero if the bundle has been inserted into the
8577 table. The table contains the best bundle state with given key. */
a0bb0d20 8578
58ada791 8579static int
b40da9a7 8580insert_bundle_state (struct bundle_state *bundle_state)
58ada791 8581{
8582 void **entry_ptr;
a0bb0d20 8583
b9c74b4d 8584 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
58ada791 8585 if (*entry_ptr == NULL)
8586 {
8587 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8588 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8589 *entry_ptr = (void *) bundle_state;
8590 return TRUE;
a0bb0d20 8591 }
58ada791 8592 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8593 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8594 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8595 > bundle_state->accumulated_insns_num
8596 || (((struct bundle_state *)
8597 *entry_ptr)->accumulated_insns_num
8598 == bundle_state->accumulated_insns_num
cf7898a6 8599 && (((struct bundle_state *)
8600 *entry_ptr)->branch_deviation
8601 > bundle_state->branch_deviation
8602 || (((struct bundle_state *)
8603 *entry_ptr)->branch_deviation
8604 == bundle_state->branch_deviation
8605 && ((struct bundle_state *)
8606 *entry_ptr)->middle_bundle_stops
8607 > bundle_state->middle_bundle_stops))))))
b40da9a7 8608
a0bb0d20 8609 {
58ada791 8610 struct bundle_state temp;
8611
8612 temp = *(struct bundle_state *) *entry_ptr;
8613 *(struct bundle_state *) *entry_ptr = *bundle_state;
8614 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8615 *bundle_state = temp;
a0bb0d20 8616 }
58ada791 8617 return FALSE;
8618}
a0bb0d20 8619
58ada791 8620/* Start work with the hash table. */
8621
8622static void
b40da9a7 8623initiate_bundle_state_table (void)
58ada791 8624{
8625 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8626 (htab_del) 0);
a0bb0d20 8627}
8628
58ada791 8629/* Finish work with the hash table. */
5c74c9e4 8630
8631static void
b40da9a7 8632finish_bundle_state_table (void)
5c74c9e4 8633{
58ada791 8634 htab_delete (bundle_state_table);
5c74c9e4 8635}
8636
58ada791 8637\f
1e0ce7b2 8638
58ada791 8639/* The following variable is a insn `nop' used to check bundle states
8640 with different number of inserted nops. */
1e0ce7b2 8641
58ada791 8642static rtx ia64_nop;
1e0ce7b2 8643
58ada791 8644/* The following function tries to issue NOPS_NUM nops for the current
8645 state without advancing processor cycle. If it failed, the
8646 function returns FALSE and frees the current state. */
8647
8648static int
b40da9a7 8649try_issue_nops (struct bundle_state *curr_state, int nops_num)
1e0ce7b2 8650{
58ada791 8651 int i;
1e0ce7b2 8652
58ada791 8653 for (i = 0; i < nops_num; i++)
8654 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8655 {
8656 free_bundle_state (curr_state);
8657 return FALSE;
8658 }
8659 return TRUE;
8660}
1e0ce7b2 8661
58ada791 8662/* The following function tries to issue INSN for the current
8663 state without advancing processor cycle. If it failed, the
8664 function returns FALSE and frees the current state. */
1e0ce7b2 8665
58ada791 8666static int
b40da9a7 8667try_issue_insn (struct bundle_state *curr_state, rtx insn)
58ada791 8668{
8669 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8670 {
8671 free_bundle_state (curr_state);
8672 return FALSE;
8673 }
8674 return TRUE;
8675}
1e0ce7b2 8676
58ada791 8677/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8678 starting with ORIGINATOR without advancing processor cycle. If
c4177b9b 8679 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8680 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8681 If it was successful, the function creates new bundle state and
8682 insert into the hash table and into `index_to_bundle_states'. */
1e0ce7b2 8683
58ada791 8684static void
b40da9a7 8685issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8686 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
58ada791 8687{
8688 struct bundle_state *curr_state;
8689
8690 curr_state = get_free_bundle_state ();
8691 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8692 curr_state->insn = insn;
8693 curr_state->insn_num = originator->insn_num + 1;
8694 curr_state->cost = originator->cost;
8695 curr_state->originator = originator;
8696 curr_state->before_nops_num = before_nops_num;
8697 curr_state->after_nops_num = 0;
8698 curr_state->accumulated_insns_num
8699 = originator->accumulated_insns_num + before_nops_num;
8700 curr_state->branch_deviation = originator->branch_deviation;
cf7898a6 8701 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
c5c17bca 8702 gcc_assert (insn);
8703 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
58ada791 8704 {
c5c17bca 8705 gcc_assert (GET_MODE (insn) != TImode);
58ada791 8706 if (!try_issue_nops (curr_state, before_nops_num))
8707 return;
8708 if (!try_issue_insn (curr_state, insn))
8709 return;
8710 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
cf7898a6 8711 if (curr_state->accumulated_insns_num % 3 != 0)
8712 curr_state->middle_bundle_stops++;
58ada791 8713 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8714 && curr_state->accumulated_insns_num % 3 != 0)
1e0ce7b2 8715 {
58ada791 8716 free_bundle_state (curr_state);
8717 return;
1e0ce7b2 8718 }
1e0ce7b2 8719 }
58ada791 8720 else if (GET_MODE (insn) != TImode)
1e0ce7b2 8721 {
58ada791 8722 if (!try_issue_nops (curr_state, before_nops_num))
8723 return;
8724 if (!try_issue_insn (curr_state, insn))
8725 return;
c4177b9b 8726 curr_state->accumulated_insns_num++;
28d5c3d9 8727 gcc_assert (!unknown_for_bundling_p (insn));
c5c17bca 8728
58ada791 8729 if (ia64_safe_type (insn) == TYPE_L)
8730 curr_state->accumulated_insns_num++;
8731 }
8732 else
8733 {
5f121199 8734 /* If this is an insn that must be first in a group, then don't allow
8735 nops to be emitted before it. Currently, alloc is the only such
8736 supported instruction. */
8737 /* ??? The bundling automatons should handle this for us, but they do
8738 not yet have support for the first_insn attribute. */
8739 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8740 {
8741 free_bundle_state (curr_state);
8742 return;
8743 }
8744
58ada791 8745 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8746 state_transition (curr_state->dfa_state, NULL);
8747 curr_state->cost++;
8748 if (!try_issue_nops (curr_state, before_nops_num))
8749 return;
8750 if (!try_issue_insn (curr_state, insn))
8751 return;
c4177b9b 8752 curr_state->accumulated_insns_num++;
28d5c3d9 8753 if (unknown_for_bundling_p (insn))
c4177b9b 8754 {
8755 /* Finish bundle containing asm insn. */
8756 curr_state->after_nops_num
8757 = 3 - curr_state->accumulated_insns_num % 3;
8758 curr_state->accumulated_insns_num
8759 += 3 - curr_state->accumulated_insns_num % 3;
8760 }
8761 else if (ia64_safe_type (insn) == TYPE_L)
58ada791 8762 curr_state->accumulated_insns_num++;
8763 }
8764 if (ia64_safe_type (insn) == TYPE_B)
8765 curr_state->branch_deviation
8766 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8767 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8768 {
c4177b9b 8769 if (!only_bundle_end_p && insert_bundle_state (curr_state))
1e0ce7b2 8770 {
58ada791 8771 state_t dfa_state;
8772 struct bundle_state *curr_state1;
8773 struct bundle_state *allocated_states_chain;
8774
8775 curr_state1 = get_free_bundle_state ();
8776 dfa_state = curr_state1->dfa_state;
8777 allocated_states_chain = curr_state1->allocated_states_chain;
8778 *curr_state1 = *curr_state;
8779 curr_state1->dfa_state = dfa_state;
8780 curr_state1->allocated_states_chain = allocated_states_chain;
8781 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8782 dfa_state_size);
8783 curr_state = curr_state1;
1e0ce7b2 8784 }
58ada791 8785 if (!try_issue_nops (curr_state,
8786 3 - curr_state->accumulated_insns_num % 3))
8787 return;
8788 curr_state->after_nops_num
8789 = 3 - curr_state->accumulated_insns_num % 3;
8790 curr_state->accumulated_insns_num
8791 += 3 - curr_state->accumulated_insns_num % 3;
1e0ce7b2 8792 }
58ada791 8793 if (!insert_bundle_state (curr_state))
8794 free_bundle_state (curr_state);
8795 return;
8796}
2e2347b5 8797
58ada791 8798/* The following function returns position in the two window bundle
8799 for given STATE. */
8800
8801static int
b40da9a7 8802get_max_pos (state_t state)
58ada791 8803{
8804 if (cpu_unit_reservation_p (state, pos_6))
8805 return 6;
8806 else if (cpu_unit_reservation_p (state, pos_5))
8807 return 5;
8808 else if (cpu_unit_reservation_p (state, pos_4))
8809 return 4;
8810 else if (cpu_unit_reservation_p (state, pos_3))
8811 return 3;
8812 else if (cpu_unit_reservation_p (state, pos_2))
8813 return 2;
8814 else if (cpu_unit_reservation_p (state, pos_1))
8815 return 1;
8816 else
8817 return 0;
1e0ce7b2 8818}
8819
58ada791 8820/* The function returns code of a possible template for given position
8821 and state. The function should be called only with 2 values of
17d386e4 8822 position equal to 3 or 6. We avoid generating F NOPs by putting
8823 templates containing F insns at the end of the template search
8824 because undocumented anomaly in McKinley derived cores which can
8825 cause stalls if an F-unit insn (including a NOP) is issued within a
8826 six-cycle window after reading certain application registers (such
8827 as ar.bsp). Furthermore, power-considerations also argue against
8828 the use of F-unit instructions unless they're really needed. */
a0bb0d20 8829
747af5e7 8830static int
b40da9a7 8831get_template (state_t state, int pos)
a0bb0d20 8832{
58ada791 8833 switch (pos)
a0bb0d20 8834 {
58ada791 8835 case 3:
17d386e4 8836 if (cpu_unit_reservation_p (state, _0mmi_))
58ada791 8837 return 1;
17d386e4 8838 else if (cpu_unit_reservation_p (state, _0mii_))
8839 return 0;
58ada791 8840 else if (cpu_unit_reservation_p (state, _0mmb_))
8841 return 7;
17d386e4 8842 else if (cpu_unit_reservation_p (state, _0mib_))
8843 return 6;
8844 else if (cpu_unit_reservation_p (state, _0mbb_))
8845 return 5;
8846 else if (cpu_unit_reservation_p (state, _0bbb_))
8847 return 4;
8848 else if (cpu_unit_reservation_p (state, _0mmf_))
8849 return 3;
8850 else if (cpu_unit_reservation_p (state, _0mfi_))
8851 return 2;
58ada791 8852 else if (cpu_unit_reservation_p (state, _0mfb_))
8853 return 8;
8854 else if (cpu_unit_reservation_p (state, _0mlx_))
8855 return 9;
8856 else
c5c17bca 8857 gcc_unreachable ();
58ada791 8858 case 6:
17d386e4 8859 if (cpu_unit_reservation_p (state, _1mmi_))
58ada791 8860 return 1;
17d386e4 8861 else if (cpu_unit_reservation_p (state, _1mii_))
8862 return 0;
58ada791 8863 else if (cpu_unit_reservation_p (state, _1mmb_))
8864 return 7;
17d386e4 8865 else if (cpu_unit_reservation_p (state, _1mib_))
8866 return 6;
8867 else if (cpu_unit_reservation_p (state, _1mbb_))
8868 return 5;
8869 else if (cpu_unit_reservation_p (state, _1bbb_))
8870 return 4;
8871 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8872 return 3;
8873 else if (cpu_unit_reservation_p (state, _1mfi_))
8874 return 2;
58ada791 8875 else if (cpu_unit_reservation_p (state, _1mfb_))
8876 return 8;
8877 else if (cpu_unit_reservation_p (state, _1mlx_))
8878 return 9;
8879 else
c5c17bca 8880 gcc_unreachable ();
58ada791 8881 default:
c5c17bca 8882 gcc_unreachable ();
a0bb0d20 8883 }
58ada791 8884}
a0bb0d20 8885
cf7898a6 8886/* True when INSN is important for bundling. */
28d5c3d9 8887
cf7898a6 8888static bool
8889important_for_bundling_p (rtx insn)
8890{
8891 return (INSN_P (insn)
8892 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8893 && GET_CODE (PATTERN (insn)) != USE
8894 && GET_CODE (PATTERN (insn)) != CLOBBER);
8895}
8896
58ada791 8897/* The following function returns an insn important for insn bundling
8898 followed by INSN and before TAIL. */
1e0ce7b2 8899
58ada791 8900static rtx
b40da9a7 8901get_next_important_insn (rtx insn, rtx tail)
58ada791 8902{
8903 for (; insn && insn != tail; insn = NEXT_INSN (insn))
cf7898a6 8904 if (important_for_bundling_p (insn))
58ada791 8905 return insn;
8906 return NULL_RTX;
8907}
8908
28d5c3d9 8909/* True when INSN is unknown, but important, for bundling. */
8910
8911static bool
8912unknown_for_bundling_p (rtx insn)
8913{
8914 return (INSN_P (insn)
8915 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8916 && GET_CODE (PATTERN (insn)) != USE
8917 && GET_CODE (PATTERN (insn)) != CLOBBER);
8918}
8919
98154846 8920/* Add a bundle selector TEMPLATE0 before INSN. */
8921
8922static void
8923ia64_add_bundle_selector_before (int template0, rtx insn)
8924{
8925 rtx b = gen_bundle_selector (GEN_INT (template0));
8926
8927 ia64_emit_insn_before (b, insn);
8928#if NR_BUNDLES == 10
8929 if ((template0 == 4 || template0 == 5)
b213bf24 8930 && ia64_except_unwind_info (&global_options) == UI_TARGET)
98154846 8931 {
8932 int i;
8933 rtx note = NULL_RTX;
8934
8935 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8936 first or second slot. If it is and has REG_EH_NOTE set, copy it
8937 to following nops, as br.call sets rp to the address of following
8938 bundle and therefore an EH region end must be on a bundle
8939 boundary. */
8940 insn = PREV_INSN (insn);
8941 for (i = 0; i < 3; i++)
8942 {
8943 do
8944 insn = next_active_insn (insn);
8945 while (GET_CODE (insn) == INSN
8946 && get_attr_empty (insn) == EMPTY_YES);
8947 if (GET_CODE (insn) == CALL_INSN)
8948 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8949 else if (note)
8950 {
8951 int code;
8952
8953 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8954 || code == CODE_FOR_nop_b);
8955 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8956 note = NULL_RTX;
8957 else
b9c74b4d 8958 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
98154846 8959 }
8960 }
8961 }
8962#endif
8963}
8964
df4668bf 8965/* The following function does insn bundling. Bundling means
8966 inserting templates and nop insns to fit insn groups into permitted
8967 templates. Instruction scheduling uses NDFA (non-deterministic
8968 finite automata) encoding informations about the templates and the
8969 inserted nops. Nondeterminism of the automata permits follows
8970 all possible insn sequences very fast.
8971
8972 Unfortunately it is not possible to get information about inserting
8973 nop insns and used templates from the automata states. The
8974 automata only says that we can issue an insn possibly inserting
8975 some nops before it and using some template. Therefore insn
8976 bundling in this function is implemented by using DFA
ea13ae4c 8977 (deterministic finite automata). We follow all possible insn
df4668bf 8978 sequences by inserting 0-2 nops (that is what the NDFA describe for
8979 insn scheduling) before/after each insn being bundled. We know the
8980 start of simulated processor cycle from insn scheduling (insn
8981 starting a new cycle has TImode).
8982
8983 Simple implementation of insn bundling would create enormous
8984 number of possible insn sequences satisfying information about new
8985 cycle ticks taken from the insn scheduling. To make the algorithm
8986 practical we use dynamic programming. Each decision (about
8987 inserting nops and implicitly about previous decisions) is described
8988 by structure bundle_state (see above). If we generate the same
8989 bundle state (key is automaton state after issuing the insns and
8990 nops for it), we reuse already generated one. As consequence we
33f88b1c 8991 reject some decisions which cannot improve the solution and
df4668bf 8992 reduce memory for the algorithm.
8993
8994 When we reach the end of EBB (extended basic block), we choose the
8995 best sequence and then, moving back in EBB, insert templates for
8996 the best alternative. The templates are taken from querying
8997 automaton state for each insn in chosen bundle states.
8998
8999 So the algorithm makes two (forward and backward) passes through
e65caf8d 9000 EBB. */
1e0ce7b2 9001
58ada791 9002static void
b40da9a7 9003bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
58ada791 9004{
9005 struct bundle_state *curr_state, *next_state, *best_state;
9006 rtx insn, next_insn;
9007 int insn_num;
c4177b9b 9008 int i, bundle_end_p, only_bundle_end_p, asm_p;
36b7e0e7 9009 int pos = 0, max_pos, template0, template1;
58ada791 9010 rtx b;
9011 rtx nop;
9012 enum attr_type type;
8de12924 9013
58ada791 9014 insn_num = 0;
df4668bf 9015 /* Count insns in the EBB. */
58ada791 9016 for (insn = NEXT_INSN (prev_head_insn);
9017 insn && insn != tail;
9018 insn = NEXT_INSN (insn))
9019 if (INSN_P (insn))
9020 insn_num++;
9021 if (insn_num == 0)
9022 return;
9023 bundling_p = 1;
9024 dfa_clean_insn_cache ();
9025 initiate_bundle_state_table ();
225ab426 9026 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
1d60d981 9027 /* First (forward) pass -- generation of bundle states. */
58ada791 9028 curr_state = get_free_bundle_state ();
9029 curr_state->insn = NULL;
9030 curr_state->before_nops_num = 0;
9031 curr_state->after_nops_num = 0;
9032 curr_state->insn_num = 0;
9033 curr_state->cost = 0;
9034 curr_state->accumulated_insns_num = 0;
9035 curr_state->branch_deviation = 0;
cf7898a6 9036 curr_state->middle_bundle_stops = 0;
58ada791 9037 curr_state->next = NULL;
9038 curr_state->originator = NULL;
9039 state_reset (curr_state->dfa_state);
9040 index_to_bundle_states [0] = curr_state;
9041 insn_num = 0;
df4668bf 9042 /* Shift cycle mark if it is put on insn which could be ignored. */
58ada791 9043 for (insn = NEXT_INSN (prev_head_insn);
9044 insn != tail;
9045 insn = NEXT_INSN (insn))
9046 if (INSN_P (insn)
28d5c3d9 9047 && !important_for_bundling_p (insn)
58ada791 9048 && GET_MODE (insn) == TImode)
a0bb0d20 9049 {
58ada791 9050 PUT_MODE (insn, VOIDmode);
9051 for (next_insn = NEXT_INSN (insn);
9052 next_insn != tail;
9053 next_insn = NEXT_INSN (next_insn))
28d5c3d9 9054 if (important_for_bundling_p (next_insn)
cf7898a6 9055 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
58ada791 9056 {
9057 PUT_MODE (next_insn, TImode);
9058 break;
9059 }
a0bb0d20 9060 }
ea13ae4c 9061 /* Forward pass: generation of bundle states. */
58ada791 9062 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9063 insn != NULL_RTX;
9064 insn = next_insn)
0f717932 9065 {
28d5c3d9 9066 gcc_assert (important_for_bundling_p (insn));
c4177b9b 9067 type = ia64_safe_type (insn);
58ada791 9068 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9069 insn_num++;
9070 index_to_bundle_states [insn_num] = NULL;
9071 for (curr_state = index_to_bundle_states [insn_num - 1];
9072 curr_state != NULL;
9073 curr_state = next_state)
43a2533b 9074 {
58ada791 9075 pos = curr_state->accumulated_insns_num % 3;
58ada791 9076 next_state = curr_state->next;
df4668bf 9077 /* We must fill up the current bundle in order to start a
9078 subsequent asm insn in a new bundle. Asm insn is always
9079 placed in a separate bundle. */
c4177b9b 9080 only_bundle_end_p
9081 = (next_insn != NULL_RTX
9082 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
28d5c3d9 9083 && unknown_for_bundling_p (next_insn));
df4668bf 9084 /* We may fill up the current bundle if it is the cycle end
9085 without a group barrier. */
58ada791 9086 bundle_end_p
c4177b9b 9087 = (only_bundle_end_p || next_insn == NULL_RTX
58ada791 9088 || (GET_MODE (next_insn) == TImode
9089 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9090 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
e65caf8d 9091 || type == TYPE_S)
c4177b9b 9092 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9093 only_bundle_end_p);
9094 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9095 only_bundle_end_p);
9096 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9097 only_bundle_end_p);
43a2533b 9098 }
c5c17bca 9099 gcc_assert (index_to_bundle_states [insn_num]);
58ada791 9100 for (curr_state = index_to_bundle_states [insn_num];
9101 curr_state != NULL;
9102 curr_state = curr_state->next)
9103 if (verbose >= 2 && dump)
9104 {
df4668bf 9105 /* This structure is taken from generated code of the
9106 pipeline hazard recognizer (see file insn-attrtab.c).
9107 Please don't forget to change the structure if a new
9108 automaton is added to .md file. */
58ada791 9109 struct DFA_chip
9110 {
9111 unsigned short one_automaton_state;
9112 unsigned short oneb_automaton_state;
9113 unsigned short two_automaton_state;
9114 unsigned short twob_automaton_state;
9115 };
b40da9a7 9116
58ada791 9117 fprintf
9118 (dump,
cf7898a6 9119 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
58ada791 9120 curr_state->unique_num,
9121 (curr_state->originator == NULL
9122 ? -1 : curr_state->originator->unique_num),
9123 curr_state->cost,
9124 curr_state->before_nops_num, curr_state->after_nops_num,
9125 curr_state->accumulated_insns_num, curr_state->branch_deviation,
cf7898a6 9126 curr_state->middle_bundle_stops,
e65caf8d 9127 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
58ada791 9128 INSN_UID (insn));
9129 }
0f717932 9130 }
c5c17bca 9131
9132 /* We should find a solution because the 2nd insn scheduling has
9133 found one. */
9134 gcc_assert (index_to_bundle_states [insn_num]);
df4668bf 9135 /* Find a state corresponding to the best insn sequence. */
58ada791 9136 best_state = NULL;
9137 for (curr_state = index_to_bundle_states [insn_num];
9138 curr_state != NULL;
9139 curr_state = curr_state->next)
df4668bf 9140 /* We are just looking at the states with fully filled up last
9141 bundle. The first we prefer insn sequences with minimal cost
9142 then with minimal inserted nops and finally with branch insns
9143 placed in the 3rd slots. */
58ada791 9144 if (curr_state->accumulated_insns_num % 3 == 0
9145 && (best_state == NULL || best_state->cost > curr_state->cost
9146 || (best_state->cost == curr_state->cost
9147 && (curr_state->accumulated_insns_num
9148 < best_state->accumulated_insns_num
9149 || (curr_state->accumulated_insns_num
9150 == best_state->accumulated_insns_num
cf7898a6 9151 && (curr_state->branch_deviation
9152 < best_state->branch_deviation
9153 || (curr_state->branch_deviation
9154 == best_state->branch_deviation
9155 && curr_state->middle_bundle_stops
9156 < best_state->middle_bundle_stops)))))))
58ada791 9157 best_state = curr_state;
df4668bf 9158 /* Second (backward) pass: adding nops and templates. */
cf7898a6 9159 gcc_assert (best_state);
58ada791 9160 insn_num = best_state->before_nops_num;
9161 template0 = template1 = -1;
9162 for (curr_state = best_state;
9163 curr_state->originator != NULL;
9164 curr_state = curr_state->originator)
9165 {
9166 insn = curr_state->insn;
28d5c3d9 9167 asm_p = unknown_for_bundling_p (insn);
58ada791 9168 insn_num++;
9169 if (verbose >= 2 && dump)
a0bb0d20 9170 {
58ada791 9171 struct DFA_chip
9172 {
9173 unsigned short one_automaton_state;
9174 unsigned short oneb_automaton_state;
9175 unsigned short two_automaton_state;
9176 unsigned short twob_automaton_state;
9177 };
b40da9a7 9178
58ada791 9179 fprintf
9180 (dump,
cf7898a6 9181 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
58ada791 9182 curr_state->unique_num,
9183 (curr_state->originator == NULL
9184 ? -1 : curr_state->originator->unique_num),
9185 curr_state->cost,
9186 curr_state->before_nops_num, curr_state->after_nops_num,
9187 curr_state->accumulated_insns_num, curr_state->branch_deviation,
cf7898a6 9188 curr_state->middle_bundle_stops,
e65caf8d 9189 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
58ada791 9190 INSN_UID (insn));
a0bb0d20 9191 }
df4668bf 9192 /* Find the position in the current bundle window. The window can
9193 contain at most two bundles. Two bundle window means that
9194 the processor will make two bundle rotation. */
58ada791 9195 max_pos = get_max_pos (curr_state->dfa_state);
df4668bf 9196 if (max_pos == 6
9197 /* The following (negative template number) means that the
9198 processor did one bundle rotation. */
9199 || (max_pos == 3 && template0 < 0))
a0bb0d20 9200 {
df4668bf 9201 /* We are at the end of the window -- find template(s) for
9202 its bundle(s). */
58ada791 9203 pos = max_pos;
9204 if (max_pos == 3)
9205 template0 = get_template (curr_state->dfa_state, 3);
9206 else
9207 {
9208 template1 = get_template (curr_state->dfa_state, 3);
9209 template0 = get_template (curr_state->dfa_state, 6);
9210 }
9211 }
9212 if (max_pos > 3 && template1 < 0)
df4668bf 9213 /* It may happen when we have the stop inside a bundle. */
58ada791 9214 {
c5c17bca 9215 gcc_assert (pos <= 3);
58ada791 9216 template1 = get_template (curr_state->dfa_state, 3);
9217 pos += 3;
9218 }
c4177b9b 9219 if (!asm_p)
df4668bf 9220 /* Emit nops after the current insn. */
c4177b9b 9221 for (i = 0; i < curr_state->after_nops_num; i++)
9222 {
9223 nop = gen_nop ();
9224 emit_insn_after (nop, insn);
9225 pos--;
c5c17bca 9226 gcc_assert (pos >= 0);
c4177b9b 9227 if (pos % 3 == 0)
9228 {
df4668bf 9229 /* We are at the start of a bundle: emit the template
9230 (it should be defined). */
c5c17bca 9231 gcc_assert (template0 >= 0);
98154846 9232 ia64_add_bundle_selector_before (template0, nop);
df4668bf 9233 /* If we have two bundle window, we make one bundle
9234 rotation. Otherwise template0 will be undefined
9235 (negative value). */
c4177b9b 9236 template0 = template1;
9237 template1 = -1;
9238 }
9239 }
df4668bf 9240 /* Move the position backward in the window. Group barrier has
9241 no slot. Asm insn takes all bundle. */
58ada791 9242 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
28d5c3d9 9243 && !unknown_for_bundling_p (insn))
58ada791 9244 pos--;
df4668bf 9245 /* Long insn takes 2 slots. */
58ada791 9246 if (ia64_safe_type (insn) == TYPE_L)
9247 pos--;
c5c17bca 9248 gcc_assert (pos >= 0);
58ada791 9249 if (pos % 3 == 0
9250 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
28d5c3d9 9251 && !unknown_for_bundling_p (insn))
58ada791 9252 {
df4668bf 9253 /* The current insn is at the bundle start: emit the
9254 template. */
c5c17bca 9255 gcc_assert (template0 >= 0);
98154846 9256 ia64_add_bundle_selector_before (template0, insn);
58ada791 9257 b = PREV_INSN (insn);
9258 insn = b;
bf67dc63 9259 /* See comment above in analogous place for emitting nops
df4668bf 9260 after the insn. */
58ada791 9261 template0 = template1;
9262 template1 = -1;
9263 }
df4668bf 9264 /* Emit nops after the current insn. */
58ada791 9265 for (i = 0; i < curr_state->before_nops_num; i++)
9266 {
9267 nop = gen_nop ();
9268 ia64_emit_insn_before (nop, insn);
9269 nop = PREV_INSN (insn);
9270 insn = nop;
9271 pos--;
c5c17bca 9272 gcc_assert (pos >= 0);
58ada791 9273 if (pos % 3 == 0)
9274 {
bf67dc63 9275 /* See comment above in analogous place for emitting nops
df4668bf 9276 after the insn. */
c5c17bca 9277 gcc_assert (template0 >= 0);
98154846 9278 ia64_add_bundle_selector_before (template0, insn);
58ada791 9279 b = PREV_INSN (insn);
9280 insn = b;
9281 template0 = template1;
9282 template1 = -1;
9283 }
a0bb0d20 9284 }
9285 }
cf7898a6 9286
9287#ifdef ENABLE_CHECKING
9288 {
9289 /* Assert right calculation of middle_bundle_stops. */
9290 int num = best_state->middle_bundle_stops;
9291 bool start_bundle = true, end_bundle = false;
9292
9293 for (insn = NEXT_INSN (prev_head_insn);
9294 insn && insn != tail;
9295 insn = NEXT_INSN (insn))
9296 {
9297 if (!INSN_P (insn))
9298 continue;
9299 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9300 start_bundle = true;
9301 else
9302 {
9303 rtx next_insn;
9304
9305 for (next_insn = NEXT_INSN (insn);
9306 next_insn && next_insn != tail;
9307 next_insn = NEXT_INSN (next_insn))
9308 if (INSN_P (next_insn)
9309 && (ia64_safe_itanium_class (next_insn)
9310 != ITANIUM_CLASS_IGNORE
9311 || recog_memoized (next_insn)
9312 == CODE_FOR_bundle_selector)
9313 && GET_CODE (PATTERN (next_insn)) != USE
9314 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9315 break;
9316
9317 end_bundle = next_insn == NULL_RTX
9318 || next_insn == tail
9319 || (INSN_P (next_insn)
9320 && recog_memoized (next_insn)
9321 == CODE_FOR_bundle_selector);
9322 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9323 && !start_bundle && !end_bundle
9324 && next_insn
28d5c3d9 9325 && !unknown_for_bundling_p (next_insn))
cf7898a6 9326 num--;
9327
9328 start_bundle = false;
9329 }
9330 }
9331
9332 gcc_assert (num == 0);
9333 }
9334#endif
9335
58ada791 9336 free (index_to_bundle_states);
9337 finish_bundle_state_table ();
9338 bundling_p = 0;
9339 dfa_clean_insn_cache ();
a0bb0d20 9340}
ac445222 9341
58ada791 9342/* The following function is called at the end of scheduling BB or
9343 EBB. After reload, it inserts stop bits and does insn bundling. */
9344
9345static void
b40da9a7 9346ia64_sched_finish (FILE *dump, int sched_verbose)
747af5e7 9347{
58ada791 9348 if (sched_verbose)
9349 fprintf (dump, "// Finishing schedule.\n");
9350 if (!reload_completed)
9351 return;
9352 if (reload_completed)
9353 {
9354 final_emit_insn_group_barriers (dump);
9355 bundling (dump, sched_verbose, current_sched_info->prev_head,
9356 current_sched_info->next_tail);
9357 if (sched_verbose && dump)
9358 fprintf (dump, "// finishing %d-%d\n",
9359 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9360 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
b40da9a7 9361
58ada791 9362 return;
9363 }
747af5e7 9364}
9365
58ada791 9366/* The following function inserts stop bits in scheduled BB or EBB. */
a0bb0d20 9367
58ada791 9368static void
b40da9a7 9369final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
a0bb0d20 9370{
58ada791 9371 rtx insn;
9372 int need_barrier_p = 0;
cf7898a6 9373 int seen_good_insn = 0;
a0bb0d20 9374
58ada791 9375 init_insn_group_barriers ();
a0bb0d20 9376
58ada791 9377 for (insn = NEXT_INSN (current_sched_info->prev_head);
9378 insn != current_sched_info->next_tail;
9379 insn = NEXT_INSN (insn))
9380 {
9381 if (GET_CODE (insn) == BARRIER)
5eb8a656 9382 {
58ada791 9383 rtx last = prev_active_insn (insn);
5da2bd31 9384
58ada791 9385 if (! last)
5eb8a656 9386 continue;
58ada791 9387 if (GET_CODE (last) == JUMP_INSN
9388 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9389 last = prev_active_insn (last);
9390 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9391 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
a0bb0d20 9392
58ada791 9393 init_insn_group_barriers ();
cf7898a6 9394 seen_good_insn = 0;
58ada791 9395 need_barrier_p = 0;
5eb8a656 9396 }
9845d120 9397 else if (NONDEBUG_INSN_P (insn))
a0bb0d20 9398 {
58ada791 9399 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
a0bb0d20 9400 {
58ada791 9401 init_insn_group_barriers ();
cf7898a6 9402 seen_good_insn = 0;
58ada791 9403 need_barrier_p = 0;
ac445222 9404 }
cf7898a6 9405 else if (need_barrier_p || group_barrier_needed (insn)
9406 || (mflag_sched_stop_bits_after_every_cycle
9407 && GET_MODE (insn) == TImode
9408 && seen_good_insn))
a0bb0d20 9409 {
58ada791 9410 if (TARGET_EARLY_STOP_BITS)
9411 {
9412 rtx last;
b40da9a7 9413
58ada791 9414 for (last = insn;
9415 last != current_sched_info->prev_head;
9416 last = PREV_INSN (last))
9417 if (INSN_P (last) && GET_MODE (last) == TImode
9418 && stops_p [INSN_UID (last)])
9419 break;
9420 if (last == current_sched_info->prev_head)
9421 last = insn;
9422 last = prev_active_insn (last);
9423 if (last
9424 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9425 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9426 last);
9427 init_insn_group_barriers ();
9428 for (last = NEXT_INSN (last);
9429 last != insn;
9430 last = NEXT_INSN (last))
9431 if (INSN_P (last))
cf7898a6 9432 {
9433 group_barrier_needed (last);
9434 if (recog_memoized (last) >= 0
9435 && important_for_bundling_p (last))
9436 seen_good_insn = 1;
9437 }
58ada791 9438 }
9439 else
9440 {
9441 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9442 insn);
9443 init_insn_group_barriers ();
cf7898a6 9444 seen_good_insn = 0;
58ada791 9445 }
fb16e37a 9446 group_barrier_needed (insn);
cf7898a6 9447 if (recog_memoized (insn) >= 0
9448 && important_for_bundling_p (insn))
9449 seen_good_insn = 1;
a0bb0d20 9450 }
cf7898a6 9451 else if (recog_memoized (insn) >= 0
9452 && important_for_bundling_p (insn))
fdf7fc97 9453 seen_good_insn = 1;
58ada791 9454 need_barrier_p = (GET_CODE (insn) == CALL_INSN
28d5c3d9 9455 || unknown_for_bundling_p (insn));
ac445222 9456 }
a0bb0d20 9457 }
58ada791 9458}
a0bb0d20 9459
58ada791 9460\f
a0bb0d20 9461
3ce7ff97 9462/* If the following function returns TRUE, we will use the DFA
58ada791 9463 insn scheduler. */
a0bb0d20 9464
747af5e7 9465static int
b40da9a7 9466ia64_first_cycle_multipass_dfa_lookahead (void)
a0bb0d20 9467{
58ada791 9468 return (reload_completed ? 6 : 4);
9469}
a0bb0d20 9470
58ada791 9471/* The following function initiates variable `dfa_pre_cycle_insn'. */
a0bb0d20 9472
58ada791 9473static void
b40da9a7 9474ia64_init_dfa_pre_cycle_insn (void)
58ada791 9475{
9476 if (temp_dfa_state == NULL)
a0bb0d20 9477 {
58ada791 9478 dfa_state_size = state_size ();
9479 temp_dfa_state = xmalloc (dfa_state_size);
9480 prev_cycle_state = xmalloc (dfa_state_size);
a0bb0d20 9481 }
58ada791 9482 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9483 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9484 recog_memoized (dfa_pre_cycle_insn);
9485 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9486 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9487 recog_memoized (dfa_stop_insn);
9488}
a0bb0d20 9489
58ada791 9490/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9491 used by the DFA insn scheduler. */
a0bb0d20 9492
58ada791 9493static rtx
b40da9a7 9494ia64_dfa_pre_cycle_insn (void)
58ada791 9495{
9496 return dfa_pre_cycle_insn;
9497}
a0bb0d20 9498
58ada791 9499/* The following function returns TRUE if PRODUCER (of type ilog or
9500 ld) produces address for CONSUMER (of type st or stf). */
a0bb0d20 9501
58ada791 9502int
b40da9a7 9503ia64_st_address_bypass_p (rtx producer, rtx consumer)
58ada791 9504{
9505 rtx dest, reg, mem;
a0bb0d20 9506
c5c17bca 9507 gcc_assert (producer && consumer);
58ada791 9508 dest = ia64_single_set (producer);
c5c17bca 9509 gcc_assert (dest);
9510 reg = SET_DEST (dest);
9511 gcc_assert (reg);
58ada791 9512 if (GET_CODE (reg) == SUBREG)
9513 reg = SUBREG_REG (reg);
c5c17bca 9514 gcc_assert (GET_CODE (reg) == REG);
9515
58ada791 9516 dest = ia64_single_set (consumer);
c5c17bca 9517 gcc_assert (dest);
9518 mem = SET_DEST (dest);
9519 gcc_assert (mem && GET_CODE (mem) == MEM);
58ada791 9520 return reg_mentioned_p (reg, mem);
a0bb0d20 9521}
9522
58ada791 9523/* The following function returns TRUE if PRODUCER (of type ilog or
9524 ld) produces address for CONSUMER (of type ld or fld). */
a0bb0d20 9525
58ada791 9526int
b40da9a7 9527ia64_ld_address_bypass_p (rtx producer, rtx consumer)
a0bb0d20 9528{
58ada791 9529 rtx dest, src, reg, mem;
9530
c5c17bca 9531 gcc_assert (producer && consumer);
58ada791 9532 dest = ia64_single_set (producer);
c5c17bca 9533 gcc_assert (dest);
9534 reg = SET_DEST (dest);
9535 gcc_assert (reg);
58ada791 9536 if (GET_CODE (reg) == SUBREG)
9537 reg = SUBREG_REG (reg);
c5c17bca 9538 gcc_assert (GET_CODE (reg) == REG);
9539
58ada791 9540 src = ia64_single_set (consumer);
c5c17bca 9541 gcc_assert (src);
9542 mem = SET_SRC (src);
9543 gcc_assert (mem);
ea13ae4c 9544
58ada791 9545 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9546 mem = XVECEXP (mem, 0, 0);
ea13ae4c 9547 else if (GET_CODE (mem) == IF_THEN_ELSE)
9ca2c29a 9548 /* ??? Is this bypass necessary for ld.c? */
ea13ae4c 9549 {
9550 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9551 mem = XEXP (mem, 1);
9552 }
9553
58ada791 9554 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9555 mem = XEXP (mem, 0);
0cc4b4a2 9556
ea13ae4c 9557 if (GET_CODE (mem) == UNSPEC)
9558 {
9559 int c = XINT (mem, 1);
9560
cf7898a6 9561 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9562 || c == UNSPEC_LDSA);
ea13ae4c 9563 mem = XVECEXP (mem, 0, 0);
9564 }
9565
0cc4b4a2 9566 /* Note that LO_SUM is used for GOT loads. */
c5c17bca 9567 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
0cc4b4a2 9568
58ada791 9569 return reg_mentioned_p (reg, mem);
9570}
9571
9572/* The following function returns TRUE if INSN produces address for a
9573 load/store insn. We will place such insns into M slot because it
1d60d981 9574 decreases its latency time. */
58ada791 9575
9576int
b40da9a7 9577ia64_produce_address_p (rtx insn)
58ada791 9578{
9579 return insn->call;
a0bb0d20 9580}
58ada791 9581
a0bb0d20 9582\f
9b06caff 9583/* Emit pseudo-ops for the assembler to describe predicate relations.
9584 At present this assumes that we only consider predicate pairs to
9585 be mutex, and that the assembler can deduce proper values from
9586 straight-line code. */
9587
9588static void
b40da9a7 9589emit_predicate_relation_info (void)
9b06caff 9590{
4c26117a 9591 basic_block bb;
9b06caff 9592
4c26117a 9593 FOR_EACH_BB_REVERSE (bb)
9b06caff 9594 {
9b06caff 9595 int r;
5496dbfc 9596 rtx head = BB_HEAD (bb);
9b06caff 9597
9598 /* We only need such notes at code labels. */
9599 if (GET_CODE (head) != CODE_LABEL)
9600 continue;
98b6e5c5 9601 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9b06caff 9602 head = NEXT_INSN (head);
9603
92a8446a 9604 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9605 grabbing the entire block of predicate registers. */
9606 for (r = PR_REG (2); r < PR_REG (64); r += 2)
3072d30e 9607 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9b06caff 9608 {
33c8f6d1 9609 rtx p = gen_rtx_REG (BImode, r);
aad3d095 9610 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
5496dbfc 9611 if (head == BB_END (bb))
9612 BB_END (bb) = n;
9b06caff 9613 head = n;
9614 }
9615 }
356b51a0 9616
9617 /* Look for conditional calls that do not return, and protect predicate
9618 relations around them. Otherwise the assembler will assume the call
9619 returns, and complain about uses of call-clobbered predicates after
9620 the call. */
4c26117a 9621 FOR_EACH_BB_REVERSE (bb)
356b51a0 9622 {
5496dbfc 9623 rtx insn = BB_HEAD (bb);
b40da9a7 9624
356b51a0 9625 while (1)
9626 {
9627 if (GET_CODE (insn) == CALL_INSN
9628 && GET_CODE (PATTERN (insn)) == COND_EXEC
9629 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9630 {
9631 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9632 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
5496dbfc 9633 if (BB_HEAD (bb) == insn)
9634 BB_HEAD (bb) = b;
9635 if (BB_END (bb) == insn)
9636 BB_END (bb) = a;
356b51a0 9637 }
b40da9a7 9638
5496dbfc 9639 if (insn == BB_END (bb))
356b51a0 9640 break;
9641 insn = NEXT_INSN (insn);
9642 }
9643 }
9b06caff 9644}
9645
ac445222 9646/* Perform machine dependent operations on the rtl chain INSNS. */
9647
2efea8c0 9648static void
b40da9a7 9649ia64_reorg (void)
ac445222 9650{
b65ae28b 9651 /* We are freeing block_for_insn in the toplev to keep compatibility
9652 with old MDEP_REORGS that are not CFG based. Recompute it now. */
f23d9a22 9653 compute_bb_for_insn ();
0264cc04 9654
9655 /* If optimizing, we'll have split before scheduling. */
9656 if (optimize == 0)
3072d30e 9657 split_all_insns ();
a0bb0d20 9658
8a42230a 9659 if (optimize && flag_schedule_insns_after_reload
cf7898a6 9660 && dbg_cnt (ia64_sched2))
940fa57f 9661 {
b3923b22 9662 basic_block bb;
703f29bc 9663 timevar_push (TV_SCHED2);
940fa57f 9664 ia64_final_schedule = 1;
58ada791 9665
b3923b22 9666 /* We can't let modulo-sched prevent us from scheduling any bbs,
9667 since we need the final schedule to produce bundle information. */
9668 FOR_EACH_BB (bb)
9669 bb->flags &= ~BB_DISABLE_SCHEDULE;
9670
58ada791 9671 initiate_bundle_states ();
9672 ia64_nop = make_insn_raw (gen_nop ());
9673 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9674 recog_memoized (ia64_nop);
9675 clocks_length = get_max_uid () + 1;
225ab426 9676 stops_p = XCNEWVEC (char, clocks_length);
e65caf8d 9677
58ada791 9678 if (ia64_tune == PROCESSOR_ITANIUM2)
9679 {
9680 pos_1 = get_cpu_unit_code ("2_1");
9681 pos_2 = get_cpu_unit_code ("2_2");
9682 pos_3 = get_cpu_unit_code ("2_3");
9683 pos_4 = get_cpu_unit_code ("2_4");
9684 pos_5 = get_cpu_unit_code ("2_5");
9685 pos_6 = get_cpu_unit_code ("2_6");
9686 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9687 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9688 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9689 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9690 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9691 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9692 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9693 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9694 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9695 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9696 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9697 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9698 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9699 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9700 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9701 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9702 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9703 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9704 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9705 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9706 }
9707 else
9708 {
9709 pos_1 = get_cpu_unit_code ("1_1");
9710 pos_2 = get_cpu_unit_code ("1_2");
9711 pos_3 = get_cpu_unit_code ("1_3");
9712 pos_4 = get_cpu_unit_code ("1_4");
9713 pos_5 = get_cpu_unit_code ("1_5");
9714 pos_6 = get_cpu_unit_code ("1_6");
9715 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9716 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9717 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9718 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9719 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9720 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9721 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9722 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9723 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9724 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9725 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9726 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9727 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9728 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9729 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9730 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9731 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9732 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9733 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9734 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9735 }
cf7898a6 9736
9737 if (flag_selective_scheduling2
9738 && !maybe_skip_selective_scheduling ())
9739 run_selective_scheduling ();
9740 else
9741 schedule_ebbs ();
9742
9743 /* Redo alignment computation, as it might gone wrong. */
9744 compute_alignments ();
9745
3072d30e 9746 /* We cannot reuse this one because it has been corrupted by the
9747 evil glat. */
58ada791 9748 finish_bundle_states ();
58ada791 9749 free (stops_p);
ea13ae4c 9750 stops_p = NULL;
450d042a 9751 emit_insn_group_barriers (dump_file);
58ada791 9752
940fa57f 9753 ia64_final_schedule = 0;
703f29bc 9754 timevar_pop (TV_SCHED2);
940fa57f 9755 }
9756 else
450d042a 9757 emit_all_insn_group_barriers (dump_file);
33c8f6d1 9758
3072d30e 9759 df_analyze ();
9760
b69208e6 9761 /* A call must not be the last instruction in a function, so that the
9762 return address is still within the function, so that unwinding works
9763 properly. Note that IA-64 differs from dwarf2 on this point. */
b213bf24 9764 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
b69208e6 9765 {
9766 rtx insn;
9767 int saw_stop = 0;
9768
9769 insn = get_last_insn ();
9770 if (! INSN_P (insn))
9771 insn = prev_active_insn (insn);
025781a4 9772 if (insn)
b69208e6 9773 {
025781a4 9774 /* Skip over insns that expand to nothing. */
9775 while (GET_CODE (insn) == INSN
9776 && get_attr_empty (insn) == EMPTY_YES)
9777 {
9778 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9779 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9780 saw_stop = 1;
9781 insn = prev_active_insn (insn);
9782 }
9783 if (GET_CODE (insn) == CALL_INSN)
9784 {
9785 if (! saw_stop)
9786 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9787 emit_insn (gen_break_f ());
9788 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9789 }
b69208e6 9790 }
9791 }
9792
33c8f6d1 9793 emit_predicate_relation_info ();
5923a5e7 9794
8a42230a 9795 if (flag_var_tracking)
5923a5e7 9796 {
9797 timevar_push (TV_VAR_TRACKING);
9798 variable_tracking_main ();
9799 timevar_pop (TV_VAR_TRACKING);
9800 }
314966f4 9801 df_finish_pass (false);
ac445222 9802}
9803\f
9804/* Return true if REGNO is used by the epilogue. */
9805
9806int
b40da9a7 9807ia64_epilogue_uses (int regno)
ac445222 9808{
34a64f80 9809 switch (regno)
9810 {
9811 case R_GR (1):
8c01cc0e 9812 /* With a call to a function in another module, we will write a new
9813 value to "gp". After returning from such a call, we need to make
9814 sure the function restores the original gp-value, even if the
9815 function itself does not use the gp anymore. */
9816 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
34a64f80 9817
9818 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9819 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9820 /* For functions defined with the syscall_linkage attribute, all
9821 input registers are marked as live at all function exits. This
9822 prevents the register allocator from using the input registers,
9823 which in turn makes it possible to restart a system call after
9824 an interrupt without having to save/restore the input registers.
9825 This also prevents kernel data from leaking to application code. */
9826 return lookup_attribute ("syscall_linkage",
9827 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9828
9829 case R_BR (0):
9830 /* Conditional return patterns can't represent the use of `b0' as
9831 the return address, so we force the value live this way. */
9832 return 1;
341cffb9 9833
34a64f80 9834 case AR_PFS_REGNUM:
9835 /* Likewise for ar.pfs, which is used by br.ret. */
9836 return 1;
9641f63c 9837
34a64f80 9838 default:
9839 return 0;
9840 }
ac445222 9841}
1c6bdf07 9842
9843/* Return true if REGNO is used by the frame unwinder. */
9844
9845int
b40da9a7 9846ia64_eh_uses (int regno)
1c6bdf07 9847{
9f1b7d17 9848 unsigned int r;
3072d30e 9849
1c6bdf07 9850 if (! reload_completed)
9851 return 0;
9852
3072d30e 9853 if (regno == 0)
9854 return 0;
9855
9856 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9857 if (regno == current_frame_info.r[r]
9858 || regno == emitted_frame_related_regs[r])
9859 return 1;
1c6bdf07 9860
9861 return 0;
9862}
ac445222 9863\f
c50e596a 9864/* Return true if this goes in small data/bss. */
ac445222 9865
9866/* ??? We could also support own long data here. Generating movl/add/ld8
9867 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9868 code faster because there is one less load. This also includes incomplete
9869 types which can't go in sdata/sbss. */
9870
52470889 9871static bool
a9f1838b 9872ia64_in_small_data_p (const_tree exp)
52470889 9873{
9874 if (TARGET_NO_SDATA)
9875 return false;
9876
03864a6f 9877 /* We want to merge strings, so we never consider them small data. */
9878 if (TREE_CODE (exp) == STRING_CST)
9879 return false;
9880
e3a37c0a 9881 /* Functions are never small data. */
9882 if (TREE_CODE (exp) == FUNCTION_DECL)
9883 return false;
9884
52470889 9885 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9886 {
9887 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
09727e8a 9888
52470889 9889 if (strcmp (section, ".sdata") == 0
09727e8a 9890 || strncmp (section, ".sdata.", 7) == 0
9891 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9892 || strcmp (section, ".sbss") == 0
9893 || strncmp (section, ".sbss.", 6) == 0
9894 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
52470889 9895 return true;
9896 }
9897 else
9898 {
9899 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9900
9901 /* If this is an incomplete type with size 0, then we can't put it
9902 in sdata because it might be too big when completed. */
9903 if (size > 0 && size <= ia64_section_threshold)
9904 return true;
9905 }
9906
9907 return false;
9908}
7c2f467a 9909\f
cbf41957 9910/* Output assembly directives for prologue regions. */
9911
9912/* The current basic block number. */
9913
4c26117a 9914static bool last_block;
cbf41957 9915
9916/* True if we need a copy_state command at the start of the next block. */
9917
4c26117a 9918static bool need_copy_state;
cbf41957 9919
34986748 9920#ifndef MAX_ARTIFICIAL_LABEL_BYTES
9921# define MAX_ARTIFICIAL_LABEL_BYTES 30
9922#endif
9923
cbf41957 9924/* The function emits unwind directives for the start of an epilogue. */
9925
9926static void
8b2d95d2 9927process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9928 bool unwind, bool frame ATTRIBUTE_UNUSED)
cbf41957 9929{
9930 /* If this isn't the last block of the function, then we need to label the
9931 current state, and copy it back in at the start of the next block. */
9932
4c26117a 9933 if (!last_block)
cbf41957 9934 {
34986748 9935 if (unwind)
9936 fprintf (asm_out_file, "\t.label_state %d\n",
9937 ++cfun->machine->state_num);
4c26117a 9938 need_copy_state = true;
cbf41957 9939 }
9940
34986748 9941 if (unwind)
9942 fprintf (asm_out_file, "\t.restore sp\n");
cbf41957 9943}
7c2f467a 9944
585d208e 9945/* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
cac50a9f 9946
585d208e 9947static void
9948process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9949 bool unwind, bool frame)
7c2f467a 9950{
7c2f467a 9951 rtx dest = SET_DEST (pat);
585d208e 9952 rtx src = SET_SRC (pat);
7c2f467a 9953
585d208e 9954 if (dest == stack_pointer_rtx)
7c2f467a 9955 {
9956 if (GET_CODE (src) == PLUS)
585d208e 9957 {
7c2f467a 9958 rtx op0 = XEXP (src, 0);
9959 rtx op1 = XEXP (src, 1);
c5c17bca 9960
9961 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9962
9963 if (INTVAL (op1) < 0)
34986748 9964 {
9965 gcc_assert (!frame_pointer_needed);
9966 if (unwind)
585d208e 9967 fprintf (asm_out_file,
9968 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
34986748 9969 -INTVAL (op1));
34986748 9970 }
2517dcd5 9971 else
34986748 9972 process_epilogue (asm_out_file, insn, unwind, frame);
7c2f467a 9973 }
2517dcd5 9974 else
c5c17bca 9975 {
585d208e 9976 gcc_assert (src == hard_frame_pointer_rtx);
34986748 9977 process_epilogue (asm_out_file, insn, unwind, frame);
c5c17bca 9978 }
585d208e 9979 }
9980 else if (dest == hard_frame_pointer_rtx)
9981 {
9982 gcc_assert (src == stack_pointer_rtx);
9983 gcc_assert (frame_pointer_needed);
2517dcd5 9984
585d208e 9985 if (unwind)
9986 fprintf (asm_out_file, "\t.vframe r%d\n",
9987 ia64_dbx_register_number (REGNO (dest)));
7c2f467a 9988 }
585d208e 9989 else
9990 gcc_unreachable ();
9991}
7c2f467a 9992
585d208e 9993/* This function processes a SET pattern for REG_CFA_REGISTER. */
cac50a9f 9994
585d208e 9995static void
9996process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
9997{
9998 rtx dest = SET_DEST (pat);
9999 rtx src = SET_SRC (pat);
585d208e 10000 int dest_regno = REGNO (dest);
4a5b1b88 10001 int src_regno;
cac50a9f 10002
4a5b1b88 10003 if (src == pc_rtx)
585d208e 10004 {
585d208e 10005 /* Saving return address pointer. */
585d208e 10006 if (unwind)
10007 fprintf (asm_out_file, "\t.save rp, r%d\n",
10008 ia64_dbx_register_number (dest_regno));
4a5b1b88 10009 return;
10010 }
10011
10012 src_regno = REGNO (src);
cac50a9f 10013
4a5b1b88 10014 switch (src_regno)
10015 {
585d208e 10016 case PR_REG (0):
10017 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10018 if (unwind)
10019 fprintf (asm_out_file, "\t.save pr, r%d\n",
10020 ia64_dbx_register_number (dest_regno));
10021 break;
cac50a9f 10022
585d208e 10023 case AR_UNAT_REGNUM:
10024 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10025 if (unwind)
10026 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10027 ia64_dbx_register_number (dest_regno));
10028 break;
cac50a9f 10029
585d208e 10030 case AR_LC_REGNUM:
10031 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10032 if (unwind)
10033 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10034 ia64_dbx_register_number (dest_regno));
10035 break;
10036
10037 default:
10038 /* Everything else should indicate being stored to memory. */
10039 gcc_unreachable ();
7c2f467a 10040 }
585d208e 10041}
cac50a9f 10042
585d208e 10043/* This function processes a SET pattern for REG_CFA_OFFSET. */
cac50a9f 10044
585d208e 10045static void
10046process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10047{
10048 rtx dest = SET_DEST (pat);
10049 rtx src = SET_SRC (pat);
10050 int src_regno = REGNO (src);
10051 const char *saveop;
10052 HOST_WIDE_INT off;
10053 rtx base;
7c2f467a 10054
585d208e 10055 gcc_assert (MEM_P (dest));
10056 if (GET_CODE (XEXP (dest, 0)) == REG)
10057 {
10058 base = XEXP (dest, 0);
10059 off = 0;
10060 }
10061 else
10062 {
10063 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10064 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10065 base = XEXP (XEXP (dest, 0), 0);
10066 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10067 }
cac50a9f 10068
585d208e 10069 if (base == hard_frame_pointer_rtx)
10070 {
10071 saveop = ".savepsp";
10072 off = - off;
10073 }
10074 else
10075 {
10076 gcc_assert (base == stack_pointer_rtx);
10077 saveop = ".savesp";
10078 }
cac50a9f 10079
585d208e 10080 src_regno = REGNO (src);
10081 switch (src_regno)
10082 {
10083 case BR_REG (0):
10084 gcc_assert (!current_frame_info.r[reg_save_b0]);
10085 if (unwind)
10086 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10087 saveop, off);
10088 break;
cac50a9f 10089
585d208e 10090 case PR_REG (0):
10091 gcc_assert (!current_frame_info.r[reg_save_pr]);
10092 if (unwind)
10093 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10094 saveop, off);
10095 break;
cac50a9f 10096
585d208e 10097 case AR_LC_REGNUM:
10098 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10099 if (unwind)
10100 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10101 saveop, off);
10102 break;
cac50a9f 10103
585d208e 10104 case AR_PFS_REGNUM:
10105 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10106 if (unwind)
10107 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10108 saveop, off);
10109 break;
cac50a9f 10110
585d208e 10111 case AR_UNAT_REGNUM:
10112 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10113 if (unwind)
10114 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10115 saveop, off);
10116 break;
cac50a9f 10117
585d208e 10118 case GR_REG (4):
10119 case GR_REG (5):
10120 case GR_REG (6):
10121 case GR_REG (7):
10122 if (unwind)
10123 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10124 1 << (src_regno - GR_REG (4)));
10125 break;
cac50a9f 10126
585d208e 10127 case BR_REG (1):
10128 case BR_REG (2):
10129 case BR_REG (3):
10130 case BR_REG (4):
10131 case BR_REG (5):
10132 if (unwind)
10133 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10134 1 << (src_regno - BR_REG (1)));
10135 break;
cac50a9f 10136
585d208e 10137 case FR_REG (2):
10138 case FR_REG (3):
10139 case FR_REG (4):
10140 case FR_REG (5):
10141 if (unwind)
10142 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10143 1 << (src_regno - FR_REG (2)));
10144 break;
cac50a9f 10145
585d208e 10146 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10147 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10148 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10149 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10150 if (unwind)
10151 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10152 1 << (src_regno - FR_REG (12)));
10153 break;
cac50a9f 10154
585d208e 10155 default:
10156 /* ??? For some reason we mark other general registers, even those
10157 we can't represent in the unwind info. Ignore them. */
10158 break;
10159 }
7c2f467a 10160}
10161
7c2f467a 10162/* This function looks at a single insn and emits any directives
10163 required to unwind this insn. */
585d208e 10164
37966699 10165static void
10166ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
7c2f467a 10167{
b213bf24 10168 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
34986748 10169 bool frame = dwarf2out_do_frame ();
585d208e 10170 rtx note, pat;
10171 bool handled_one;
10172
10173 if (!unwind && !frame)
10174 return;
34986748 10175
585d208e 10176 if (NOTE_INSN_BASIC_BLOCK_P (insn))
7c2f467a 10177 {
585d208e 10178 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
cac50a9f 10179
585d208e 10180 /* Restore unwind state from immediately before the epilogue. */
10181 if (need_copy_state)
cbf41957 10182 {
585d208e 10183 if (unwind)
cbf41957 10184 {
585d208e 10185 fprintf (asm_out_file, "\t.body\n");
10186 fprintf (asm_out_file, "\t.copy_state %d\n",
10187 cfun->machine->state_num);
cbf41957 10188 }
585d208e 10189 need_copy_state = false;
cbf41957 10190 }
585d208e 10191 }
cbf41957 10192
585d208e 10193 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
10194 return;
10195
10196 /* Look for the ALLOC insn. */
10197 if (INSN_CODE (insn) == CODE_FOR_alloc)
10198 {
10199 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10200 int dest_regno = REGNO (dest);
cbf41957 10201
585d208e 10202 /* If this is the final destination for ar.pfs, then this must
10203 be the alloc in the prologue. */
10204 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10205 {
10206 if (unwind)
10207 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10208 ia64_dbx_register_number (dest_regno));
10209 }
cac50a9f 10210 else
585d208e 10211 {
10212 /* This must be an alloc before a sibcall. We must drop the
10213 old frame info. The easiest way to drop the old frame
10214 info is to ensure we had a ".restore sp" directive
10215 followed by a new prologue. If the procedure doesn't
10216 have a memory-stack frame, we'll issue a dummy ".restore
10217 sp" now. */
10218 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10219 /* if haven't done process_epilogue() yet, do it now */
10220 process_epilogue (asm_out_file, insn, unwind, frame);
10221 if (unwind)
10222 fprintf (asm_out_file, "\t.prologue\n");
10223 }
10224 return;
10225 }
7c2f467a 10226
585d208e 10227 handled_one = false;
10228 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10229 switch (REG_NOTE_KIND (note))
10230 {
10231 case REG_CFA_ADJUST_CFA:
10232 pat = XEXP (note, 0);
10233 if (pat == NULL)
10234 pat = PATTERN (insn);
10235 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10236 handled_one = true;
10237 break;
61788791 10238
585d208e 10239 case REG_CFA_OFFSET:
10240 pat = XEXP (note, 0);
10241 if (pat == NULL)
10242 pat = PATTERN (insn);
10243 process_cfa_offset (asm_out_file, pat, unwind);
10244 handled_one = true;
10245 break;
61788791 10246
585d208e 10247 case REG_CFA_REGISTER:
10248 pat = XEXP (note, 0);
10249 if (pat == NULL)
10250 pat = PATTERN (insn);
10251 process_cfa_register (asm_out_file, pat, unwind);
10252 handled_one = true;
10253 break;
10254
10255 case REG_FRAME_RELATED_EXPR:
10256 case REG_CFA_DEF_CFA:
10257 case REG_CFA_EXPRESSION:
10258 case REG_CFA_RESTORE:
10259 case REG_CFA_SET_VDRAP:
10260 /* Not used in the ia64 port. */
10261 gcc_unreachable ();
10262
10263 default:
10264 /* Not a frame-related note. */
10265 break;
10266 }
10267
10268 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10269 explicit action to take. No guessing required. */
10270 gcc_assert (handled_one);
7c2f467a 10271}
ac445222 10272
37966699 10273/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10274
10275static void
10276ia64_asm_emit_except_personality (rtx personality)
10277{
10278 fputs ("\t.personality\t", asm_out_file);
10279 output_addr_const (asm_out_file, personality);
10280 fputc ('\n', asm_out_file);
10281}
10282
10283/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10284
10285static void
10286ia64_asm_init_sections (void)
10287{
10288 exception_section = get_unnamed_section (0, output_section_asm_op,
10289 "\t.handlerdata");
10290}
cc7d6aed 10291
10292/* Implement TARGET_DEBUG_UNWIND_INFO. */
10293
10294static enum unwind_info_type
10295ia64_debug_unwind_info (void)
10296{
10297 return UI_TARGET;
10298}
f087d65d 10299\f
7c1e874f 10300enum ia64_builtins
10301{
10302 IA64_BUILTIN_BSP,
8b21beb2 10303 IA64_BUILTIN_COPYSIGNQ,
10304 IA64_BUILTIN_FABSQ,
10305 IA64_BUILTIN_FLUSHRS,
9b57ed9f 10306 IA64_BUILTIN_INFQ,
d889cd75 10307 IA64_BUILTIN_HUGE_VALQ,
10308 IA64_BUILTIN_max
7c1e874f 10309};
10310
d889cd75 10311static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10312
ac445222 10313void
b40da9a7 10314ia64_init_builtins (void)
ac445222 10315{
b268e47e 10316 tree fpreg_type;
bd0cf2bc 10317 tree float80_type;
d889cd75 10318 tree decl;
b268e47e 10319
10320 /* The __fpreg type. */
10321 fpreg_type = make_node (REAL_TYPE);
7a979707 10322 TYPE_PRECISION (fpreg_type) = 82;
b268e47e 10323 layout_type (fpreg_type);
10324 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10325
10326 /* The __float80 type. */
bd0cf2bc 10327 float80_type = make_node (REAL_TYPE);
77d98cfe 10328 TYPE_PRECISION (float80_type) = 80;
bd0cf2bc 10329 layout_type (float80_type);
10330 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
b268e47e 10331
10332 /* The __float128 type. */
b8bc42e9 10333 if (!TARGET_HPUX)
b268e47e 10334 {
d889cd75 10335 tree ftype;
b268e47e 10336 tree float128_type = make_node (REAL_TYPE);
8b21beb2 10337
b268e47e 10338 TYPE_PRECISION (float128_type) = 128;
10339 layout_type (float128_type);
10340 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8b21beb2 10341
10342 /* TFmode support builtins. */
58711bd3 10343 ftype = build_function_type_list (float128_type, NULL_TREE);
d889cd75 10344 decl = add_builtin_function ("__builtin_infq", ftype,
10345 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10346 NULL, NULL_TREE);
10347 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
8b21beb2 10348
d889cd75 10349 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10350 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10351 NULL, NULL_TREE);
10352 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
9b57ed9f 10353
8b21beb2 10354 ftype = build_function_type_list (float128_type,
10355 float128_type,
10356 NULL_TREE);
10357 decl = add_builtin_function ("__builtin_fabsq", ftype,
10358 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10359 "__fabstf2", NULL_TREE);
10360 TREE_READONLY (decl) = 1;
d889cd75 10361 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
8b21beb2 10362
10363 ftype = build_function_type_list (float128_type,
10364 float128_type,
10365 float128_type,
10366 NULL_TREE);
10367 decl = add_builtin_function ("__builtin_copysignq", ftype,
10368 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10369 "__copysigntf3", NULL_TREE);
10370 TREE_READONLY (decl) = 1;
d889cd75 10371 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
b268e47e 10372 }
10373 else
b8bc42e9 10374 /* Under HPUX, this is a synonym for "long double". */
b268e47e 10375 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10376 "__float128");
10377
8b8d3752 10378 /* Fwrite on VMS is non-standard. */
044f30d8 10379#if TARGET_ABI_OPEN_VMS
10380 vms_patch_builtins ();
10381#endif
8b8d3752 10382
e60d3615 10383#define def_builtin(name, type, code) \
54be5d7e 10384 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10385 NULL, NULL_TREE)
f087d65d 10386
d889cd75 10387 decl = def_builtin ("__builtin_ia64_bsp",
58711bd3 10388 build_function_type_list (ptr_type_node, NULL_TREE),
10389 IA64_BUILTIN_BSP);
d889cd75 10390 ia64_builtins[IA64_BUILTIN_BSP] = decl;
fbba5463 10391
d889cd75 10392 decl = def_builtin ("__builtin_ia64_flushrs",
58711bd3 10393 build_function_type_list (void_type_node, NULL_TREE),
10394 IA64_BUILTIN_FLUSHRS);
d889cd75 10395 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
fbba5463 10396
f087d65d 10397#undef def_builtin
aafb162c 10398
10399 if (TARGET_HPUX)
10400 {
ac03db3f 10401 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
b9a16870 10402 set_user_assembler_name (decl, "_Isfinite");
ac03db3f 10403 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
b9a16870 10404 set_user_assembler_name (decl, "_Isfinitef");
ac03db3f 10405 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
b9a16870 10406 set_user_assembler_name (decl, "_Isfinitef128");
aafb162c 10407 }
ac445222 10408}
10409
ac445222 10410rtx
b40da9a7 10411ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10412 enum machine_mode mode ATTRIBUTE_UNUSED,
10413 int ignore ATTRIBUTE_UNUSED)
ac445222 10414{
f3a91845 10415 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
cac50a9f 10416 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
ac445222 10417
10418 switch (fcode)
10419 {
fbba5463 10420 case IA64_BUILTIN_BSP:
f087d65d 10421 if (! target || ! register_operand (target, DImode))
10422 target = gen_reg_rtx (DImode);
10423 emit_insn (gen_bsp_value (target));
529e3d53 10424#ifdef POINTERS_EXTEND_UNSIGNED
10425 target = convert_memory_address (ptr_mode, target);
10426#endif
f087d65d 10427 return target;
fbba5463 10428
10429 case IA64_BUILTIN_FLUSHRS:
9b06caff 10430 emit_insn (gen_flushrs ());
10431 return const0_rtx;
fbba5463 10432
8b21beb2 10433 case IA64_BUILTIN_INFQ:
9b57ed9f 10434 case IA64_BUILTIN_HUGE_VALQ:
8b21beb2 10435 {
dd009b87 10436 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
8b21beb2 10437 REAL_VALUE_TYPE inf;
10438 rtx tmp;
10439
10440 real_inf (&inf);
dd009b87 10441 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
8b21beb2 10442
dd009b87 10443 tmp = validize_mem (force_const_mem (target_mode, tmp));
8b21beb2 10444
10445 if (target == 0)
dd009b87 10446 target = gen_reg_rtx (target_mode);
8b21beb2 10447
10448 emit_move_insn (target, tmp);
10449 return target;
10450 }
10451
10452 case IA64_BUILTIN_FABSQ:
10453 case IA64_BUILTIN_COPYSIGNQ:
10454 return expand_call (exp, target, ignore);
10455
ac445222 10456 default:
8b21beb2 10457 gcc_unreachable ();
ac445222 10458 }
10459
f087d65d 10460 return NULL_RTX;
ac445222 10461}
23551094 10462
d889cd75 10463/* Return the ia64 builtin for CODE. */
10464
10465static tree
10466ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10467{
10468 if (code >= IA64_BUILTIN_max)
10469 return error_mark_node;
10470
10471 return ia64_builtins[code];
10472}
10473
23551094 10474/* For the HP-UX IA64 aggregate parameters are passed stored in the
10475 most significant bits of the stack slot. */
10476
10477enum direction
fb80456a 10478ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
23551094 10479{
87d3145e 10480 /* Exception to normal case for structures/unions/etc. */
23551094 10481
10482 if (type && AGGREGATE_TYPE_P (type)
10483 && int_size_in_bytes (type) < UNITS_PER_WORD)
10484 return upward;
10485
f41d3a7e 10486 /* Fall back to the default. */
10487 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
23551094 10488}
d7b7c754 10489
44b23e05 10490/* Emit text to declare externally defined variables and functions, because
10491 the Intel assembler does not support undefined externals. */
d7b7c754 10492
44b23e05 10493void
10494ia64_asm_output_external (FILE *file, tree decl, const char *name)
d7b7c754 10495{
44b23e05 10496 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10497 set in order to avoid putting out names that are never really
10498 used. */
10499 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
d7b7c754 10500 {
44b23e05 10501 /* maybe_assemble_visibility will return 1 if the assembler
7920eed5 10502 visibility directive is output. */
44b23e05 10503 int need_visibility = ((*targetm.binds_local_p) (decl)
10504 && maybe_assemble_visibility (decl));
d3cdd238 10505
44b23e05 10506 /* GNU as does not need anything here, but the HP linker does
10507 need something for external functions. */
10508 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10509 && TREE_CODE (decl) == FUNCTION_DECL)
9afff52d 10510 (*targetm.asm_out.globalize_decl_name) (file, decl);
44b23e05 10511 else if (need_visibility && !TARGET_GNU_AS)
10512 (*targetm.asm_out.globalize_label) (file, name);
d7b7c754 10513 }
10514}
10515
ea5219c2 10516/* Set SImode div/mod functions, init_integral_libfuncs only initializes
0a5425ff 10517 modes of word_mode and larger. Rename the TFmode libfuncs using the
10518 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10519 backward compatibility. */
ea5219c2 10520
10521static void
10522ia64_init_libfuncs (void)
10523{
10524 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10525 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10526 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10527 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
0a5425ff 10528
10529 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10530 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10531 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10532 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10533 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10534
10535 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10536 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10537 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10538 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10539 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10540 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10541
10542 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10543 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
3212edfa 10544 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
0a5425ff 10545 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10546 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10547
10548 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10549 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
3212edfa 10550 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
d1fec927 10551 /* HP-UX 11.23 libc does not have a function for unsigned
10552 SImode-to-TFmode conversion. */
10553 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
ea5219c2 10554}
10555
f2f543a3 10556/* Rename all the TFmode libfuncs using the HPUX conventions. */
66c17c96 10557
f2f543a3 10558static void
10559ia64_hpux_init_libfuncs (void)
10560{
ea5219c2 10561 ia64_init_libfuncs ();
10562
f1ada0df 10563 /* The HP SI millicode division and mod functions expect DI arguments.
10564 By turning them off completely we avoid using both libgcc and the
10565 non-standard millicode routines and use the HP DI millicode routines
10566 instead. */
10567
10568 set_optab_libfunc (sdiv_optab, SImode, 0);
10569 set_optab_libfunc (udiv_optab, SImode, 0);
10570 set_optab_libfunc (smod_optab, SImode, 0);
10571 set_optab_libfunc (umod_optab, SImode, 0);
10572
10573 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10574 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10575 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10576 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10577
10578 /* HP-UX libc has TF min/max/abs routines in it. */
f2f543a3 10579 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10580 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10581 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
f2f543a3 10582
9abe3ad4 10583 /* ia64_expand_compare uses this. */
10584 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10585
10586 /* These should never be used. */
10587 set_optab_libfunc (eq_optab, TFmode, 0);
10588 set_optab_libfunc (ne_optab, TFmode, 0);
10589 set_optab_libfunc (gt_optab, TFmode, 0);
10590 set_optab_libfunc (ge_optab, TFmode, 0);
10591 set_optab_libfunc (lt_optab, TFmode, 0);
10592 set_optab_libfunc (le_optab, TFmode, 0);
f2f543a3 10593}
66c17c96 10594
10595/* Rename the division and modulus functions in VMS. */
10596
10597static void
10598ia64_vms_init_libfuncs (void)
10599{
10600 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10601 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10602 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10603 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10604 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10605 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10606 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10607 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8b8d3752 10608 abort_libfunc = init_one_libfunc ("decc$abort");
10609 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10610#ifdef MEM_LIBFUNCS_INIT
10611 MEM_LIBFUNCS_INIT;
10612#endif
66c17c96 10613}
0a5425ff 10614
10615/* Rename the TFmode libfuncs available from soft-fp in glibc using
10616 the HPUX conventions. */
10617
10618static void
10619ia64_sysv4_init_libfuncs (void)
10620{
10621 ia64_init_libfuncs ();
10622
10623 /* These functions are not part of the HPUX TFmode interface. We
10624 use them instead of _U_Qfcmp, which doesn't work the way we
10625 expect. */
10626 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10627 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10628 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10629 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10630 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10631 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10632
10633 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10634 glibc doesn't have them. */
10635}
8b21beb2 10636
10637/* Use soft-fp. */
10638
10639static void
10640ia64_soft_fp_init_libfuncs (void)
10641{
10642}
8b8d3752 10643
10644static bool
10645ia64_vms_valid_pointer_mode (enum machine_mode mode)
10646{
10647 return (mode == SImode || mode == DImode);
10648}
52470889 10649\f
4e151b05 10650/* For HPUX, it is illegal to have relocations in shared segments. */
10651
10652static int
10653ia64_hpux_reloc_rw_mask (void)
10654{
10655 return 3;
10656}
10657
10658/* For others, relax this so that relocations to local data goes in
10659 read-only segments, but we still cannot allow global relocations
10660 in read-only segments. */
10661
10662static int
10663ia64_reloc_rw_mask (void)
10664{
10665 return flag_pic ? 3 : 2;
10666}
10667
2f14b1f9 10668/* Return the section to use for X. The only special thing we do here
10669 is to honor small data. */
bbfbe351 10670
2f14b1f9 10671static section *
b40da9a7 10672ia64_select_rtx_section (enum machine_mode mode, rtx x,
10673 unsigned HOST_WIDE_INT align)
bbfbe351 10674{
10675 if (GET_MODE_SIZE (mode) > 0
71685250 10676 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10677 && !TARGET_NO_SDATA)
2f14b1f9 10678 return sdata_section;
bbfbe351 10679 else
2f14b1f9 10680 return default_elf_select_rtx_section (mode, x, align);
bbfbe351 10681}
10682
2ee31014 10683static unsigned int
64717bc5 10684ia64_section_type_flags (tree decl, const char *name, int reloc)
10685{
10686 unsigned int flags = 0;
10687
10688 if (strcmp (name, ".sdata") == 0
10689 || strncmp (name, ".sdata.", 7) == 0
10690 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10691 || strncmp (name, ".sdata2.", 8) == 0
10692 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10693 || strcmp (name, ".sbss") == 0
10694 || strncmp (name, ".sbss.", 6) == 0
10695 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10696 flags = SECTION_SMALL;
10697
4e151b05 10698 flags |= default_section_type_flags (decl, name, reloc);
64717bc5 10699 return flags;
2ee31014 10700}
10701
e100aadc 10702/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10703 structure type and that the address of that type should be passed
10704 in out0, rather than in r8. */
10705
10706static bool
10707ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10708{
10709 tree ret_type = TREE_TYPE (fntype);
10710
10711 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10712 as the structure return address parameter, if the return value
10713 type has a non-trivial copy constructor or destructor. It is not
10714 clear if this same convention should be used for other
10715 programming languages. Until G++ 3.4, we incorrectly used r8 for
10716 these return values. */
10717 return (abi_version_at_least (2)
10718 && ret_type
10719 && TYPE_MODE (ret_type) == BLKmode
10720 && TREE_ADDRESSABLE (ret_type)
10721 && strcmp (lang_hooks.name, "GNU C++") == 0);
10722}
2ee31014 10723
100c9270 10724/* Output the assembler code for a thunk function. THUNK_DECL is the
10725 declaration for the thunk function itself, FUNCTION is the decl for
10726 the target function. DELTA is an immediate constant offset to be
a8b24921 10727 added to THIS. If VCALL_OFFSET is nonzero, the word at
100c9270 10728 *(*this + vcall_offset) should be added to THIS. */
10729
6988553d 10730static void
b40da9a7 10731ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10732 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10733 tree function)
c6933ba6 10734{
8deb3959 10735 rtx this_rtx, insn, funexp;
e100aadc 10736 unsigned int this_parmno;
10737 unsigned int this_regno;
269f7060 10738 rtx delta_rtx;
100c9270 10739
e13693ec 10740 reload_completed = 1;
8af3db02 10741 epilogue_completed = 1;
e13693ec 10742
100c9270 10743 /* Set things up as ia64_expand_prologue might. */
10744 last_scratch_gr_reg = 15;
10745
10746 memset (&current_frame_info, 0, sizeof (current_frame_info));
10747 current_frame_info.spill_cfa_off = -16;
10748 current_frame_info.n_input_regs = 1;
10749 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10750
100c9270 10751 /* Mark the end of the (empty) prologue. */
31b97e8f 10752 emit_note (NOTE_INSN_PROLOGUE_END);
100c9270 10753
e100aadc 10754 /* Figure out whether "this" will be the first parameter (the
10755 typical case) or the second parameter (as happens when the
10756 virtual function returns certain class objects). */
10757 this_parmno
10758 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10759 ? 1 : 0);
10760 this_regno = IN_REG (this_parmno);
10761 if (!TARGET_REG_NAMES)
10762 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10763
8deb3959 10764 this_rtx = gen_rtx_REG (Pmode, this_regno);
269f7060 10765
10766 /* Apply the constant offset, if required. */
10767 delta_rtx = GEN_INT (delta);
f10f921c 10768 if (TARGET_ILP32)
10769 {
e100aadc 10770 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
f10f921c 10771 REG_POINTER (tmp) = 1;
269f7060 10772 if (delta && satisfies_constraint_I (delta_rtx))
f10f921c 10773 {
8deb3959 10774 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
f10f921c 10775 delta = 0;
10776 }
10777 else
8deb3959 10778 emit_insn (gen_ptr_extend (this_rtx, tmp));
f10f921c 10779 }
100c9270 10780 if (delta)
10781 {
269f7060 10782 if (!satisfies_constraint_I (delta_rtx))
100c9270 10783 {
10784 rtx tmp = gen_rtx_REG (Pmode, 2);
10785 emit_move_insn (tmp, delta_rtx);
10786 delta_rtx = tmp;
10787 }
8deb3959 10788 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
100c9270 10789 }
10790
10791 /* Apply the offset from the vtable, if required. */
10792 if (vcall_offset)
10793 {
10794 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10795 rtx tmp = gen_rtx_REG (Pmode, 2);
10796
f10f921c 10797 if (TARGET_ILP32)
10798 {
10799 rtx t = gen_rtx_REG (ptr_mode, 2);
10800 REG_POINTER (t) = 1;
8deb3959 10801 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
269f7060 10802 if (satisfies_constraint_I (vcall_offset_rtx))
f10f921c 10803 {
269f7060 10804 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
f10f921c 10805 vcall_offset = 0;
10806 }
10807 else
10808 emit_insn (gen_ptr_extend (tmp, t));
10809 }
10810 else
8deb3959 10811 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
100c9270 10812
f10f921c 10813 if (vcall_offset)
100c9270 10814 {
269f7060 10815 if (!satisfies_constraint_J (vcall_offset_rtx))
f10f921c 10816 {
10817 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10818 emit_move_insn (tmp2, vcall_offset_rtx);
10819 vcall_offset_rtx = tmp2;
10820 }
10821 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
100c9270 10822 }
100c9270 10823
f10f921c 10824 if (TARGET_ILP32)
269f7060 10825 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
f10f921c 10826 else
10827 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
100c9270 10828
8deb3959 10829 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
100c9270 10830 }
10831
10832 /* Generate a tail call to the target function. */
10833 if (! TREE_USED (function))
10834 {
10835 assemble_external (function);
10836 TREE_USED (function) = 1;
10837 }
10838 funexp = XEXP (DECL_RTL (function), 0);
10839 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10840 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10841 insn = get_last_insn ();
10842 SIBLING_CALL_P (insn) = 1;
e13693ec 10843
10844 /* Code generation for calls relies on splitting. */
10845 reload_completed = 1;
8af3db02 10846 epilogue_completed = 1;
e13693ec 10847 try_split (PATTERN (insn), insn, 0);
10848
100c9270 10849 emit_barrier ();
10850
10851 /* Run just enough of rest_of_compilation to get the insns emitted.
10852 There's not really enough bulk here to make other passes such as
10853 instruction scheduling worth while. Note that use_thunk calls
10854 assemble_start_function and assemble_end_function. */
e13693ec 10855
2efea8c0 10856 emit_all_insn_group_barriers (NULL);
100c9270 10857 insn = get_insns ();
100c9270 10858 shorten_branches (insn);
10859 final_start_function (insn, file, 1);
4bf029b0 10860 final (insn, file, 1);
100c9270 10861 final_end_function ();
e13693ec 10862
10863 reload_completed = 0;
8af3db02 10864 epilogue_completed = 0;
c6933ba6 10865}
10866
ac9a2599 10867/* Worker function for TARGET_STRUCT_VALUE_RTX. */
10868
10869static rtx
e100aadc 10870ia64_struct_value_rtx (tree fntype,
ac9a2599 10871 int incoming ATTRIBUTE_UNUSED)
10872{
8b8d3752 10873 if (TARGET_ABI_OPEN_VMS ||
10874 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
e100aadc 10875 return NULL_RTX;
ac9a2599 10876 return gen_rtx_REG (Pmode, GR_REG (8));
10877}
10878
e64e1ea1 10879static bool
10880ia64_scalar_mode_supported_p (enum machine_mode mode)
10881{
10882 switch (mode)
10883 {
10884 case QImode:
10885 case HImode:
10886 case SImode:
10887 case DImode:
10888 case TImode:
10889 return true;
10890
10891 case SFmode:
10892 case DFmode:
10893 case XFmode:
7a979707 10894 case RFmode:
e64e1ea1 10895 return true;
10896
10897 case TFmode:
8b21beb2 10898 return true;
e64e1ea1 10899
10900 default:
10901 return false;
10902 }
10903}
10904
a5c5f9d3 10905static bool
10906ia64_vector_mode_supported_p (enum machine_mode mode)
10907{
10908 switch (mode)
10909 {
10910 case V8QImode:
10911 case V4HImode:
10912 case V2SImode:
10913 return true;
10914
10915 case V2SFmode:
10916 return true;
10917
10918 default:
10919 return false;
10920 }
10921}
10922
c652f2ab 10923/* Implement the FUNCTION_PROFILER macro. */
10924
2a76106f 10925void
10926ia64_output_function_profiler (FILE *file, int labelno)
10927{
c652f2ab 10928 bool indirect_call;
10929
10930 /* If the function needs a static chain and the static chain
10931 register is r15, we use an indirect call so as to bypass
10932 the PLT stub in case the executable is dynamically linked,
10933 because the stub clobbers r15 as per 5.3.6 of the psABI.
10934 We don't need to do that in non canonical PIC mode. */
10935
10936 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10937 {
10938 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10939 indirect_call = true;
10940 }
10941 else
10942 indirect_call = false;
10943
2a76106f 10944 if (TARGET_GNU_AS)
10945 fputs ("\t.prologue 4, r40\n", file);
10946 else
10947 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10948 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
1dca2721 10949
10950 if (NO_PROFILE_COUNTERS)
c652f2ab 10951 fputs ("\tmov out3 = r0\n", file);
1dca2721 10952 else
10953 {
10954 char buf[20];
10955 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10956
10957 if (TARGET_AUTO_PIC)
10958 fputs ("\tmovl out3 = @gprel(", file);
10959 else
10960 fputs ("\taddl out3 = @ltoff(", file);
10961 assemble_name (file, buf);
10962 if (TARGET_AUTO_PIC)
c652f2ab 10963 fputs (")\n", file);
1dca2721 10964 else
c652f2ab 10965 fputs ("), r1\n", file);
1dca2721 10966 }
10967
c652f2ab 10968 if (indirect_call)
10969 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10970 fputs ("\t;;\n", file);
10971
2a76106f 10972 fputs ("\t.save rp, r42\n", file);
1dca2721 10973 fputs ("\tmov out2 = b0\n", file);
c652f2ab 10974 if (indirect_call)
10975 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
2a76106f 10976 fputs ("\t.body\n", file);
2a76106f 10977 fputs ("\tmov out1 = r1\n", file);
c652f2ab 10978 if (indirect_call)
10979 {
10980 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10981 fputs ("\tmov b6 = r16\n", file);
10982 fputs ("\tld8 r1 = [r14]\n", file);
10983 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10984 }
10985 else
10986 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
2a76106f 10987}
10988
f0ae8b21 10989static GTY(()) rtx mcount_func_rtx;
10990static rtx
10991gen_mcount_func_rtx (void)
10992{
10993 if (!mcount_func_rtx)
10994 mcount_func_rtx = init_one_libfunc ("_mcount");
10995 return mcount_func_rtx;
10996}
10997
10998void
10999ia64_profile_hook (int labelno)
11000{
11001 rtx label, ip;
11002
11003 if (NO_PROFILE_COUNTERS)
11004 label = const0_rtx;
11005 else
11006 {
11007 char buf[30];
11008 const char *label_name;
11009 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
85a98f46 11010 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
f0ae8b21 11011 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11012 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11013 }
11014 ip = gen_reg_rtx (Pmode);
11015 emit_insn (gen_ip_value (ip));
11016 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11017 VOIDmode, 3,
11018 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11019 ip, Pmode,
11020 label, Pmode);
11021}
11022
4e11e0fc 11023/* Return the mangling of TYPE if it is an extended fundamental type. */
11024
11025static const char *
a9f1838b 11026ia64_mangle_type (const_tree type)
4e11e0fc 11027{
eddcdde1 11028 type = TYPE_MAIN_VARIANT (type);
11029
11030 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11031 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11032 return NULL;
11033
4e11e0fc 11034 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11035 mangled as "e". */
11036 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11037 return "g";
11038 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11039 an extended mangling. Elsewhere, "e" is available since long
11040 double is 80 bits. */
11041 if (TYPE_MODE (type) == XFmode)
11042 return TARGET_HPUX ? "u9__float80" : "e";
7a979707 11043 if (TYPE_MODE (type) == RFmode)
11044 return "u7__fpreg";
11045 return NULL;
11046}
11047
11048/* Return the diagnostic message string if conversion from FROMTYPE to
11049 TOTYPE is not allowed, NULL otherwise. */
11050static const char *
a9f1838b 11051ia64_invalid_conversion (const_tree fromtype, const_tree totype)
7a979707 11052{
11053 /* Reject nontrivial conversion to or from __fpreg. */
11054 if (TYPE_MODE (fromtype) == RFmode
11055 && TYPE_MODE (totype) != RFmode
11056 && TYPE_MODE (totype) != VOIDmode)
11057 return N_("invalid conversion from %<__fpreg%>");
11058 if (TYPE_MODE (totype) == RFmode
11059 && TYPE_MODE (fromtype) != RFmode)
11060 return N_("invalid conversion to %<__fpreg%>");
11061 return NULL;
11062}
11063
11064/* Return the diagnostic message string if the unary operation OP is
11065 not permitted on TYPE, NULL otherwise. */
11066static const char *
a9f1838b 11067ia64_invalid_unary_op (int op, const_tree type)
7a979707 11068{
11069 /* Reject operations on __fpreg other than unary + or &. */
11070 if (TYPE_MODE (type) == RFmode
11071 && op != CONVERT_EXPR
11072 && op != ADDR_EXPR)
11073 return N_("invalid operation on %<__fpreg%>");
11074 return NULL;
11075}
11076
11077/* Return the diagnostic message string if the binary operation OP is
11078 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11079static const char *
a9f1838b 11080ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
7a979707 11081{
11082 /* Reject operations on __fpreg. */
11083 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11084 return N_("invalid operation on %<__fpreg%>");
4e11e0fc 11085 return NULL;
11086}
11087
9afff52d 11088/* HP-UX version_id attribute.
11089 For object foo, if the version_id is set to 1234 put out an alias
11090 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11091 other than an alias statement because it is an illegal symbol name. */
11092
11093static tree
11094ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11095 tree name ATTRIBUTE_UNUSED,
11096 tree args,
11097 int flags ATTRIBUTE_UNUSED,
11098 bool *no_add_attrs)
11099{
11100 tree arg = TREE_VALUE (args);
11101
11102 if (TREE_CODE (arg) != STRING_CST)
11103 {
11104 error("version attribute is not a string");
11105 *no_add_attrs = true;
11106 return NULL_TREE;
11107 }
11108 return NULL_TREE;
11109}
11110
0ddb2244 11111/* Target hook for c_mode_for_suffix. */
11112
11113static enum machine_mode
11114ia64_c_mode_for_suffix (char suffix)
11115{
11116 if (suffix == 'q')
11117 return TFmode;
11118 if (suffix == 'w')
11119 return XFmode;
11120
11121 return VOIDmode;
11122}
11123
50738926 11124static GTY(()) rtx ia64_dconst_0_5_rtx;
11125
11126rtx
11127ia64_dconst_0_5 (void)
11128{
11129 if (! ia64_dconst_0_5_rtx)
11130 {
11131 REAL_VALUE_TYPE rv;
11132 real_from_string (&rv, "0.5");
11133 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11134 }
11135 return ia64_dconst_0_5_rtx;
11136}
11137
11138static GTY(()) rtx ia64_dconst_0_375_rtx;
11139
11140rtx
11141ia64_dconst_0_375 (void)
11142{
11143 if (! ia64_dconst_0_375_rtx)
11144 {
11145 REAL_VALUE_TYPE rv;
11146 real_from_string (&rv, "0.375");
11147 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11148 }
11149 return ia64_dconst_0_375_rtx;
11150}
11151
4bac51c9 11152static enum machine_mode
11153ia64_get_reg_raw_mode (int regno)
11154{
11155 if (FR_REGNO_P (regno))
11156 return XFmode;
11157 return default_get_reg_raw_mode(regno);
11158}
50738926 11159
f91ed644 11160/* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11161 anymore. */
11162
11163bool
11164ia64_member_type_forces_blk (const_tree, enum machine_mode mode)
11165{
11166 return TARGET_HPUX && mode == TFmode;
11167}
11168
3b73548b 11169/* Always default to .text section until HP-UX linker is fixed. */
11170
11171ATTRIBUTE_UNUSED static section *
11172ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11173 enum node_frequency freq ATTRIBUTE_UNUSED,
11174 bool startup ATTRIBUTE_UNUSED,
11175 bool exit ATTRIBUTE_UNUSED)
11176{
11177 return NULL;
11178}
b155a608 11179\f
11180/* Construct (set target (vec_select op0 (parallel perm))) and
11181 return true if that's a valid instruction in the active ISA. */
11182
11183static bool
11184expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11185{
11186 rtx rperm[MAX_VECT_LEN], x;
11187 unsigned i;
11188
11189 for (i = 0; i < nelt; ++i)
11190 rperm[i] = GEN_INT (perm[i]);
11191
11192 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11193 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11194 x = gen_rtx_SET (VOIDmode, target, x);
11195
11196 x = emit_insn (x);
11197 if (recog_memoized (x) < 0)
11198 {
11199 remove_insn (x);
11200 return false;
11201 }
11202 return true;
11203}
11204
11205/* Similar, but generate a vec_concat from op0 and op1 as well. */
11206
11207static bool
11208expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11209 const unsigned char *perm, unsigned nelt)
11210{
11211 enum machine_mode v2mode;
11212 rtx x;
11213
11214 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11215 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11216 return expand_vselect (target, x, perm, nelt);
11217}
11218
11219/* Try to expand a no-op permutation. */
11220
11221static bool
11222expand_vec_perm_identity (struct expand_vec_perm_d *d)
11223{
11224 unsigned i, nelt = d->nelt;
11225
11226 for (i = 0; i < nelt; ++i)
11227 if (d->perm[i] != i)
11228 return false;
11229
11230 if (!d->testing_p)
11231 emit_move_insn (d->target, d->op0);
11232
11233 return true;
11234}
11235
11236/* Try to expand D via a shrp instruction. */
11237
11238static bool
11239expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11240{
11241 unsigned i, nelt = d->nelt, shift, mask;
7208667b 11242 rtx tmp, hi, lo;
b155a608 11243
11244 /* ??? Don't force V2SFmode into the integer registers. */
11245 if (d->vmode == V2SFmode)
11246 return false;
11247
11248 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11249
11250 shift = d->perm[0];
7208667b 11251 if (BYTES_BIG_ENDIAN && shift > nelt)
11252 return false;
11253
b155a608 11254 for (i = 1; i < nelt; ++i)
11255 if (d->perm[i] != ((shift + i) & mask))
11256 return false;
11257
11258 if (d->testing_p)
11259 return true;
11260
7208667b 11261 hi = shift < nelt ? d->op1 : d->op0;
11262 lo = shift < nelt ? d->op0 : d->op1;
11263
11264 shift %= nelt;
11265
b155a608 11266 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11267
11268 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11269 gcc_assert (IN_RANGE (shift, 1, 63));
11270
11271 /* Recall that big-endian elements are numbered starting at the top of
11272 the register. Ideally we'd have a shift-left-pair. But since we
11273 don't, convert to a shift the other direction. */
11274 if (BYTES_BIG_ENDIAN)
11275 shift = 64 - shift;
11276
11277 tmp = gen_reg_rtx (DImode);
7208667b 11278 hi = gen_lowpart (DImode, hi);
11279 lo = gen_lowpart (DImode, lo);
11280 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
b155a608 11281
11282 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11283 return true;
11284}
11285
11286/* Try to instantiate D in a single instruction. */
11287
11288static bool
11289expand_vec_perm_1 (struct expand_vec_perm_d *d)
11290{
11291 unsigned i, nelt = d->nelt;
11292 unsigned char perm2[MAX_VECT_LEN];
11293
11294 /* Try single-operand selections. */
11295 if (d->one_operand_p)
11296 {
11297 if (expand_vec_perm_identity (d))
11298 return true;
11299 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11300 return true;
11301 }
11302
11303 /* Try two operand selections. */
11304 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11305 return true;
11306
11307 /* Recognize interleave style patterns with reversed operands. */
11308 if (!d->one_operand_p)
11309 {
11310 for (i = 0; i < nelt; ++i)
11311 {
11312 unsigned e = d->perm[i];
11313 if (e >= nelt)
11314 e -= nelt;
11315 else
11316 e += nelt;
11317 perm2[i] = e;
11318 }
11319
11320 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11321 return true;
11322 }
11323
11324 if (expand_vec_perm_shrp (d))
11325 return true;
11326
11327 /* ??? Look for deposit-like permutations where most of the result
11328 comes from one vector unchanged and the rest comes from a
11329 sequential hunk of the other vector. */
11330
11331 return false;
11332}
11333
11334/* Pattern match broadcast permutations. */
11335
11336static bool
11337expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11338{
11339 unsigned i, elt, nelt = d->nelt;
11340 unsigned char perm2[2];
11341 rtx temp;
11342 bool ok;
11343
11344 if (!d->one_operand_p)
11345 return false;
11346
11347 elt = d->perm[0];
11348 for (i = 1; i < nelt; ++i)
11349 if (d->perm[i] != elt)
11350 return false;
11351
11352 switch (d->vmode)
11353 {
11354 case V2SImode:
11355 case V2SFmode:
11356 /* Implementable by interleave. */
11357 perm2[0] = elt;
11358 perm2[1] = elt + 2;
11359 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11360 gcc_assert (ok);
11361 break;
11362
11363 case V8QImode:
11364 /* Implementable by extract + broadcast. */
11365 if (BYTES_BIG_ENDIAN)
11366 elt = 7 - elt;
11367 elt *= BITS_PER_UNIT;
11368 temp = gen_reg_rtx (DImode);
11369 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
87f78f87 11370 GEN_INT (8), GEN_INT (elt)));
b155a608 11371 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11372 break;
11373
11374 case V4HImode:
11375 /* Should have been matched directly by vec_select. */
11376 default:
11377 gcc_unreachable ();
11378 }
11379
11380 return true;
11381}
11382
11383/* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11384 two vector permutation into a single vector permutation by using
11385 an interleave operation to merge the vectors. */
11386
11387static bool
11388expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11389{
11390 struct expand_vec_perm_d dremap, dfinal;
11391 unsigned char remap[2 * MAX_VECT_LEN];
11392 unsigned contents, i, nelt, nelt2;
11393 unsigned h0, h1, h2, h3;
11394 rtx seq;
11395 bool ok;
11396
11397 if (d->one_operand_p)
11398 return false;
11399
11400 nelt = d->nelt;
11401 nelt2 = nelt / 2;
11402
11403 /* Examine from whence the elements come. */
11404 contents = 0;
11405 for (i = 0; i < nelt; ++i)
11406 contents |= 1u << d->perm[i];
11407
11408 memset (remap, 0xff, sizeof (remap));
11409 dremap = *d;
11410
11411 h0 = (1u << nelt2) - 1;
11412 h1 = h0 << nelt2;
11413 h2 = h0 << nelt;
11414 h3 = h0 << (nelt + nelt2);
11415
11416 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11417 {
11418 for (i = 0; i < nelt; ++i)
11419 {
11420 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11421 remap[which] = i;
11422 dremap.perm[i] = which;
11423 }
11424 }
11425 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11426 {
11427 for (i = 0; i < nelt; ++i)
11428 {
11429 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11430 remap[which] = i;
11431 dremap.perm[i] = which;
11432 }
11433 }
11434 else if ((contents & 0x5555) == contents) /* mix even elements */
11435 {
11436 for (i = 0; i < nelt; ++i)
11437 {
11438 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11439 remap[which] = i;
11440 dremap.perm[i] = which;
11441 }
11442 }
11443 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11444 {
11445 for (i = 0; i < nelt; ++i)
11446 {
11447 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11448 remap[which] = i;
11449 dremap.perm[i] = which;
11450 }
11451 }
11452 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11453 {
11454 unsigned shift = ctz_hwi (contents);
11455 for (i = 0; i < nelt; ++i)
11456 {
11457 unsigned which = (i + shift) & (2 * nelt - 1);
11458 remap[which] = i;
11459 dremap.perm[i] = which;
11460 }
11461 }
11462 else
11463 return false;
11464
11465 /* Use the remapping array set up above to move the elements from their
11466 swizzled locations into their final destinations. */
11467 dfinal = *d;
11468 for (i = 0; i < nelt; ++i)
11469 {
11470 unsigned e = remap[d->perm[i]];
11471 gcc_assert (e < nelt);
11472 dfinal.perm[i] = e;
11473 }
11474 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11475 dfinal.op1 = dfinal.op0;
11476 dfinal.one_operand_p = true;
11477 dremap.target = dfinal.op0;
11478
11479 /* Test if the final remap can be done with a single insn. For V4HImode
11480 this *will* succeed. For V8QImode or V2SImode it may not. */
11481 start_sequence ();
11482 ok = expand_vec_perm_1 (&dfinal);
11483 seq = get_insns ();
11484 end_sequence ();
11485 if (!ok)
11486 return false;
11487 if (d->testing_p)
11488 return true;
11489
11490 ok = expand_vec_perm_1 (&dremap);
11491 gcc_assert (ok);
11492
11493 emit_insn (seq);
11494 return true;
11495}
11496
11497/* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11498 constant permutation via two mux2 and a merge. */
11499
11500static bool
11501expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11502{
11503 unsigned char perm2[4];
11504 rtx rmask[4];
11505 unsigned i;
11506 rtx t0, t1, mask, x;
11507 bool ok;
11508
11509 if (d->vmode != V4HImode || d->one_operand_p)
11510 return false;
11511 if (d->testing_p)
11512 return true;
11513
11514 for (i = 0; i < 4; ++i)
11515 {
11516 perm2[i] = d->perm[i] & 3;
11517 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11518 }
11519 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11520 mask = force_reg (V4HImode, mask);
11521
11522 t0 = gen_reg_rtx (V4HImode);
11523 t1 = gen_reg_rtx (V4HImode);
11524
11525 ok = expand_vselect (t0, d->op0, perm2, 4);
11526 gcc_assert (ok);
11527 ok = expand_vselect (t1, d->op1, perm2, 4);
11528 gcc_assert (ok);
11529
11530 x = gen_rtx_AND (V4HImode, mask, t0);
11531 emit_insn (gen_rtx_SET (VOIDmode, t0, x));
11532
11533 x = gen_rtx_NOT (V4HImode, mask);
11534 x = gen_rtx_AND (V4HImode, x, t1);
11535 emit_insn (gen_rtx_SET (VOIDmode, t1, x));
11536
11537 x = gen_rtx_IOR (V4HImode, t0, t1);
11538 emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
11539
11540 return true;
11541}
11542
11543/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11544 With all of the interface bits taken care of, perform the expansion
11545 in D and return true on success. */
11546
11547static bool
11548ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11549{
11550 if (expand_vec_perm_1 (d))
11551 return true;
11552 if (expand_vec_perm_broadcast (d))
11553 return true;
11554 if (expand_vec_perm_interleave_2 (d))
11555 return true;
11556 if (expand_vec_perm_v4hi_5 (d))
11557 return true;
11558 return false;
11559}
11560
11561bool
11562ia64_expand_vec_perm_const (rtx operands[4])
11563{
11564 struct expand_vec_perm_d d;
11565 unsigned char perm[MAX_VECT_LEN];
11566 int i, nelt, which;
11567 rtx sel;
11568
11569 d.target = operands[0];
11570 d.op0 = operands[1];
11571 d.op1 = operands[2];
11572 sel = operands[3];
11573
11574 d.vmode = GET_MODE (d.target);
11575 gcc_assert (VECTOR_MODE_P (d.vmode));
11576 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11577 d.testing_p = false;
11578
11579 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11580 gcc_assert (XVECLEN (sel, 0) == nelt);
11581 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11582
11583 for (i = which = 0; i < nelt; ++i)
11584 {
11585 rtx e = XVECEXP (sel, 0, i);
11586 int ei = INTVAL (e) & (2 * nelt - 1);
11587
11588 which |= (ei < nelt ? 1 : 2);
11589 d.perm[i] = ei;
11590 perm[i] = ei;
11591 }
11592
11593 switch (which)
11594 {
11595 default:
11596 gcc_unreachable();
11597
11598 case 3:
11599 if (!rtx_equal_p (d.op0, d.op1))
11600 {
11601 d.one_operand_p = false;
11602 break;
11603 }
11604
11605 /* The elements of PERM do not suggest that only the first operand
11606 is used, but both operands are identical. Allow easier matching
11607 of the permutation by folding the permutation into the single
11608 input vector. */
11609 for (i = 0; i < nelt; ++i)
11610 if (d.perm[i] >= nelt)
11611 d.perm[i] -= nelt;
11612 /* FALLTHRU */
11613
11614 case 1:
11615 d.op1 = d.op0;
11616 d.one_operand_p = true;
11617 break;
11618
11619 case 2:
11620 for (i = 0; i < nelt; ++i)
11621 d.perm[i] -= nelt;
11622 d.op0 = d.op1;
11623 d.one_operand_p = true;
11624 break;
11625 }
11626
11627 if (ia64_expand_vec_perm_const_1 (&d))
11628 return true;
11629
11630 /* If the mask says both arguments are needed, but they are the same,
11631 the above tried to expand with one_operand_p true. If that didn't
11632 work, retry with one_operand_p false, as that's what we used in _ok. */
11633 if (which == 3 && d.one_operand_p)
11634 {
11635 memcpy (d.perm, perm, sizeof (perm));
11636 d.one_operand_p = false;
11637 return ia64_expand_vec_perm_const_1 (&d);
11638 }
11639
11640 return false;
11641}
11642
11643/* Implement targetm.vectorize.vec_perm_const_ok. */
11644
11645static bool
11646ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
11647 const unsigned char *sel)
11648{
11649 struct expand_vec_perm_d d;
11650 unsigned int i, nelt, which;
11651 bool ret;
11652
11653 d.vmode = vmode;
11654 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11655 d.testing_p = true;
11656
11657 /* Extract the values from the vector CST into the permutation
11658 array in D. */
11659 memcpy (d.perm, sel, nelt);
11660 for (i = which = 0; i < nelt; ++i)
11661 {
11662 unsigned char e = d.perm[i];
11663 gcc_assert (e < 2 * nelt);
11664 which |= (e < nelt ? 1 : 2);
11665 }
11666
11667 /* For all elements from second vector, fold the elements to first. */
11668 if (which == 2)
11669 for (i = 0; i < nelt; ++i)
11670 d.perm[i] -= nelt;
11671
11672 /* Check whether the mask can be applied to the vector type. */
11673 d.one_operand_p = (which != 3);
11674
11675 /* Otherwise we have to go through the motions and see if we can
11676 figure out how to generate the requested permutation. */
11677 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11678 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11679 if (!d.one_operand_p)
11680 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11681
11682 start_sequence ();
11683 ret = ia64_expand_vec_perm_const_1 (&d);
11684 end_sequence ();
11685
11686 return ret;
11687}
11688
11689void
11690ia64_expand_vec_setv2sf (rtx operands[3])
11691{
11692 struct expand_vec_perm_d d;
11693 unsigned int which;
11694 bool ok;
11695
11696 d.target = operands[0];
11697 d.op0 = operands[0];
11698 d.op1 = gen_reg_rtx (V2SFmode);
11699 d.vmode = V2SFmode;
11700 d.nelt = 2;
11701 d.one_operand_p = false;
11702 d.testing_p = false;
11703
11704 which = INTVAL (operands[2]);
11705 gcc_assert (which <= 1);
11706 d.perm[0] = 1 - which;
11707 d.perm[1] = which + 2;
11708
11709 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11710
11711 ok = ia64_expand_vec_perm_const_1 (&d);
11712 gcc_assert (ok);
11713}
11714
11715void
11716ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11717{
11718 struct expand_vec_perm_d d;
11719 enum machine_mode vmode = GET_MODE (target);
11720 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11721 bool ok;
11722
11723 d.target = target;
11724 d.op0 = op0;
11725 d.op1 = op1;
11726 d.vmode = vmode;
11727 d.nelt = nelt;
11728 d.one_operand_p = false;
11729 d.testing_p = false;
11730
11731 for (i = 0; i < nelt; ++i)
11732 d.perm[i] = i * 2 + odd;
11733
11734 ok = ia64_expand_vec_perm_const_1 (&d);
11735 gcc_assert (ok);
11736}
3b73548b 11737
1f3233d1 11738#include "gt-ia64.h"