]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/ia64/ia64.c
Turn HARD_REGNO_NREGS into a target hook
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "flags.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "cfgrtl.h"
51 #include "libfuncs.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "gimplify.h"
56 #include "intl.h"
57 #include "debug.h"
58 #include "params.h"
59 #include "dbgcnt.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
62 #include "reload.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "builtins.h"
66
67 /* This file should be included last. */
68 #include "target-def.h"
69
70 /* This is used for communication between ASM_OUTPUT_LABEL and
71 ASM_OUTPUT_LABELREF. */
72 int ia64_asm_output_label = 0;
73
74 /* Register names for ia64_expand_prologue. */
75 static const char * const ia64_reg_numbers[96] =
76 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
77 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
78 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
79 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
80 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
81 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
82 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
83 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
84 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
85 "r104","r105","r106","r107","r108","r109","r110","r111",
86 "r112","r113","r114","r115","r116","r117","r118","r119",
87 "r120","r121","r122","r123","r124","r125","r126","r127"};
88
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_input_reg_names[8] =
91 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
92
93 /* ??? These strings could be shared with REGISTER_NAMES. */
94 static const char * const ia64_local_reg_names[80] =
95 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
96 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
97 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
98 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
99 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
100 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
101 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
102 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
103 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
104 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
105
106 /* ??? These strings could be shared with REGISTER_NAMES. */
107 static const char * const ia64_output_reg_names[8] =
108 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
109
110 /* Variables which are this size or smaller are put in the sdata/sbss
111 sections. */
112
113 unsigned int ia64_section_threshold;
114
115 /* The following variable is used by the DFA insn scheduler. The value is
116 TRUE if we do insn bundling instead of insn scheduling. */
117 int bundling_p = 0;
118
119 enum ia64_frame_regs
120 {
121 reg_fp,
122 reg_save_b0,
123 reg_save_pr,
124 reg_save_ar_pfs,
125 reg_save_ar_unat,
126 reg_save_ar_lc,
127 reg_save_gp,
128 number_of_ia64_frame_regs
129 };
130
131 /* Structure to be filled in by ia64_compute_frame_size with register
132 save masks and offsets for the current function. */
133
134 struct ia64_frame_info
135 {
136 HOST_WIDE_INT total_size; /* size of the stack frame, not including
137 the caller's scratch area. */
138 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
139 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
140 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
141 HARD_REG_SET mask; /* mask of saved registers. */
142 unsigned int gr_used_mask; /* mask of registers in use as gr spill
143 registers or long-term scratches. */
144 int n_spilled; /* number of spilled registers. */
145 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
146 int n_input_regs; /* number of input registers used. */
147 int n_local_regs; /* number of local registers used. */
148 int n_output_regs; /* number of output registers used. */
149 int n_rotate_regs; /* number of rotating registers used. */
150
151 char need_regstk; /* true if a .regstk directive needed. */
152 char initialized; /* true if the data is finalized. */
153 };
154
155 /* Current frame information calculated by ia64_compute_frame_size. */
156 static struct ia64_frame_info current_frame_info;
157 /* The actual registers that are emitted. */
158 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
159 \f
160 static int ia64_first_cycle_multipass_dfa_lookahead (void);
161 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
162 static void ia64_init_dfa_pre_cycle_insn (void);
163 static rtx ia64_dfa_pre_cycle_insn (void);
164 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
165 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
166 static void ia64_h_i_d_extended (void);
167 static void * ia64_alloc_sched_context (void);
168 static void ia64_init_sched_context (void *, bool);
169 static void ia64_set_sched_context (void *);
170 static void ia64_clear_sched_context (void *);
171 static void ia64_free_sched_context (void *);
172 static int ia64_mode_to_int (machine_mode);
173 static void ia64_set_sched_flags (spec_info_t);
174 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
175 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
176 static bool ia64_skip_rtx_p (const_rtx);
177 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
178 static bool ia64_needs_block_p (ds_t);
179 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
180 static int ia64_spec_check_p (rtx);
181 static int ia64_spec_check_src_p (rtx);
182 static rtx gen_tls_get_addr (void);
183 static rtx gen_thread_pointer (void);
184 static int find_gr_spill (enum ia64_frame_regs, int);
185 static int next_scratch_gr_reg (void);
186 static void mark_reg_gr_used_mask (rtx, void *);
187 static void ia64_compute_frame_size (HOST_WIDE_INT);
188 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
189 static void finish_spill_pointers (void);
190 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
191 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
192 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
193 static rtx gen_movdi_x (rtx, rtx, rtx);
194 static rtx gen_fr_spill_x (rtx, rtx, rtx);
195 static rtx gen_fr_restore_x (rtx, rtx, rtx);
196
197 static void ia64_option_override (void);
198 static bool ia64_can_eliminate (const int, const int);
199 static machine_mode hfa_element_mode (const_tree, bool);
200 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
201 tree, int *, int);
202 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
203 tree, bool);
204 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
205 const_tree, bool, bool);
206 static rtx ia64_function_arg (cumulative_args_t, machine_mode,
207 const_tree, bool);
208 static rtx ia64_function_incoming_arg (cumulative_args_t,
209 machine_mode, const_tree, bool);
210 static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
211 const_tree, bool);
212 static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
213 static unsigned int ia64_function_arg_boundary (machine_mode,
214 const_tree);
215 static bool ia64_function_ok_for_sibcall (tree, tree);
216 static bool ia64_return_in_memory (const_tree, const_tree);
217 static rtx ia64_function_value (const_tree, const_tree, bool);
218 static rtx ia64_libcall_value (machine_mode, const_rtx);
219 static bool ia64_function_value_regno_p (const unsigned int);
220 static int ia64_register_move_cost (machine_mode, reg_class_t,
221 reg_class_t);
222 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
223 bool);
224 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
225 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
226 static void fix_range (const char *);
227 static struct machine_function * ia64_init_machine_status (void);
228 static void emit_insn_group_barriers (FILE *);
229 static void emit_all_insn_group_barriers (FILE *);
230 static void final_emit_insn_group_barriers (FILE *);
231 static void emit_predicate_relation_info (void);
232 static void ia64_reorg (void);
233 static bool ia64_in_small_data_p (const_tree);
234 static void process_epilogue (FILE *, rtx, bool, bool);
235
236 static bool ia64_assemble_integer (rtx, unsigned int, int);
237 static void ia64_output_function_prologue (FILE *);
238 static void ia64_output_function_epilogue (FILE *);
239 static void ia64_output_function_end_prologue (FILE *);
240
241 static void ia64_print_operand (FILE *, rtx, int);
242 static void ia64_print_operand_address (FILE *, machine_mode, rtx);
243 static bool ia64_print_operand_punct_valid_p (unsigned char code);
244
245 static int ia64_issue_rate (void);
246 static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
247 static void ia64_sched_init (FILE *, int, int);
248 static void ia64_sched_init_global (FILE *, int, int);
249 static void ia64_sched_finish_global (FILE *, int);
250 static void ia64_sched_finish (FILE *, int);
251 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
252 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
253 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
254 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
255
256 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
257 static void ia64_asm_emit_except_personality (rtx);
258 static void ia64_asm_init_sections (void);
259
260 static enum unwind_info_type ia64_debug_unwind_info (void);
261
262 static struct bundle_state *get_free_bundle_state (void);
263 static void free_bundle_state (struct bundle_state *);
264 static void initiate_bundle_states (void);
265 static void finish_bundle_states (void);
266 static int insert_bundle_state (struct bundle_state *);
267 static void initiate_bundle_state_table (void);
268 static void finish_bundle_state_table (void);
269 static int try_issue_nops (struct bundle_state *, int);
270 static int try_issue_insn (struct bundle_state *, rtx);
271 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
272 int, int);
273 static int get_max_pos (state_t);
274 static int get_template (state_t, int);
275
276 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
277 static bool important_for_bundling_p (rtx_insn *);
278 static bool unknown_for_bundling_p (rtx_insn *);
279 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
280
281 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
282 HOST_WIDE_INT, tree);
283 static void ia64_file_start (void);
284 static void ia64_globalize_decl_name (FILE *, tree);
285
286 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
287 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
288 static section *ia64_select_rtx_section (machine_mode, rtx,
289 unsigned HOST_WIDE_INT);
290 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
291 ATTRIBUTE_UNUSED;
292 static unsigned int ia64_section_type_flags (tree, const char *, int);
293 static void ia64_init_libfuncs (void)
294 ATTRIBUTE_UNUSED;
295 static void ia64_hpux_init_libfuncs (void)
296 ATTRIBUTE_UNUSED;
297 static void ia64_sysv4_init_libfuncs (void)
298 ATTRIBUTE_UNUSED;
299 static void ia64_vms_init_libfuncs (void)
300 ATTRIBUTE_UNUSED;
301 static void ia64_soft_fp_init_libfuncs (void)
302 ATTRIBUTE_UNUSED;
303 static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
304 ATTRIBUTE_UNUSED;
305 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
306 ATTRIBUTE_UNUSED;
307
308 static bool ia64_attribute_takes_identifier_p (const_tree);
309 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
310 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
311 static void ia64_encode_section_info (tree, rtx, int);
312 static rtx ia64_struct_value_rtx (tree, int);
313 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
314 static bool ia64_scalar_mode_supported_p (scalar_mode mode);
315 static bool ia64_vector_mode_supported_p (machine_mode mode);
316 static bool ia64_legitimate_constant_p (machine_mode, rtx);
317 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
318 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
319 static const char *ia64_mangle_type (const_tree);
320 static const char *ia64_invalid_conversion (const_tree, const_tree);
321 static const char *ia64_invalid_unary_op (int, const_tree);
322 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
323 static machine_mode ia64_c_mode_for_suffix (char);
324 static void ia64_trampoline_init (rtx, tree, rtx);
325 static void ia64_override_options_after_change (void);
326 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
327
328 static tree ia64_fold_builtin (tree, int, tree *, bool);
329 static tree ia64_builtin_decl (unsigned, bool);
330
331 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
332 static machine_mode ia64_get_reg_raw_mode (int regno);
333 static section * ia64_hpux_function_section (tree, enum node_frequency,
334 bool, bool);
335
336 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
337 const unsigned char *sel);
338
339 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
340 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
341 static bool ia64_modes_tieable_p (machine_mode, machine_mode);
342
343 #define MAX_VECT_LEN 8
344
345 struct expand_vec_perm_d
346 {
347 rtx target, op0, op1;
348 unsigned char perm[MAX_VECT_LEN];
349 machine_mode vmode;
350 unsigned char nelt;
351 bool one_operand_p;
352 bool testing_p;
353 };
354
355 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
356
357 \f
358 /* Table of valid machine attributes. */
359 static const struct attribute_spec ia64_attribute_table[] =
360 {
361 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
362 affects_type_identity } */
363 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
364 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
365 false },
366 #if TARGET_ABI_OPEN_VMS
367 { "common_object", 1, 1, true, false, false,
368 ia64_vms_common_object_attribute, false },
369 #endif
370 { "version_id", 1, 1, true, false, false,
371 ia64_handle_version_id_attribute, false },
372 { NULL, 0, 0, false, false, false, NULL, false }
373 };
374
375 /* Initialize the GCC target structure. */
376 #undef TARGET_ATTRIBUTE_TABLE
377 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
378
379 #undef TARGET_INIT_BUILTINS
380 #define TARGET_INIT_BUILTINS ia64_init_builtins
381
382 #undef TARGET_FOLD_BUILTIN
383 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
384
385 #undef TARGET_EXPAND_BUILTIN
386 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
387
388 #undef TARGET_BUILTIN_DECL
389 #define TARGET_BUILTIN_DECL ia64_builtin_decl
390
391 #undef TARGET_ASM_BYTE_OP
392 #define TARGET_ASM_BYTE_OP "\tdata1\t"
393 #undef TARGET_ASM_ALIGNED_HI_OP
394 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
395 #undef TARGET_ASM_ALIGNED_SI_OP
396 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
397 #undef TARGET_ASM_ALIGNED_DI_OP
398 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
399 #undef TARGET_ASM_UNALIGNED_HI_OP
400 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
401 #undef TARGET_ASM_UNALIGNED_SI_OP
402 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
403 #undef TARGET_ASM_UNALIGNED_DI_OP
404 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
405 #undef TARGET_ASM_INTEGER
406 #define TARGET_ASM_INTEGER ia64_assemble_integer
407
408 #undef TARGET_OPTION_OVERRIDE
409 #define TARGET_OPTION_OVERRIDE ia64_option_override
410
411 #undef TARGET_ASM_FUNCTION_PROLOGUE
412 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
413 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
414 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
415 #undef TARGET_ASM_FUNCTION_EPILOGUE
416 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
417
418 #undef TARGET_PRINT_OPERAND
419 #define TARGET_PRINT_OPERAND ia64_print_operand
420 #undef TARGET_PRINT_OPERAND_ADDRESS
421 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
422 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
423 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
424
425 #undef TARGET_IN_SMALL_DATA_P
426 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
427
428 #undef TARGET_SCHED_ADJUST_COST
429 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
430 #undef TARGET_SCHED_ISSUE_RATE
431 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
432 #undef TARGET_SCHED_VARIABLE_ISSUE
433 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
434 #undef TARGET_SCHED_INIT
435 #define TARGET_SCHED_INIT ia64_sched_init
436 #undef TARGET_SCHED_FINISH
437 #define TARGET_SCHED_FINISH ia64_sched_finish
438 #undef TARGET_SCHED_INIT_GLOBAL
439 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
440 #undef TARGET_SCHED_FINISH_GLOBAL
441 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
442 #undef TARGET_SCHED_REORDER
443 #define TARGET_SCHED_REORDER ia64_sched_reorder
444 #undef TARGET_SCHED_REORDER2
445 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
446
447 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
448 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
449
450 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
451 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
452
453 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
454 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
455 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
456 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
457
458 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
459 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
460 ia64_first_cycle_multipass_dfa_lookahead_guard
461
462 #undef TARGET_SCHED_DFA_NEW_CYCLE
463 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
464
465 #undef TARGET_SCHED_H_I_D_EXTENDED
466 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
467
468 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
469 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
470
471 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
472 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
473
474 #undef TARGET_SCHED_SET_SCHED_CONTEXT
475 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
476
477 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
478 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
479
480 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
481 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
482
483 #undef TARGET_SCHED_SET_SCHED_FLAGS
484 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
485
486 #undef TARGET_SCHED_GET_INSN_SPEC_DS
487 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
488
489 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
490 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
491
492 #undef TARGET_SCHED_SPECULATE_INSN
493 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
494
495 #undef TARGET_SCHED_NEEDS_BLOCK_P
496 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
497
498 #undef TARGET_SCHED_GEN_SPEC_CHECK
499 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
500
501 #undef TARGET_SCHED_SKIP_RTX_P
502 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
503
504 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
505 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
506 #undef TARGET_ARG_PARTIAL_BYTES
507 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
508 #undef TARGET_FUNCTION_ARG
509 #define TARGET_FUNCTION_ARG ia64_function_arg
510 #undef TARGET_FUNCTION_INCOMING_ARG
511 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
512 #undef TARGET_FUNCTION_ARG_ADVANCE
513 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
514 #undef TARGET_FUNCTION_ARG_PADDING
515 #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
516 #undef TARGET_FUNCTION_ARG_BOUNDARY
517 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
518
519 #undef TARGET_ASM_OUTPUT_MI_THUNK
520 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
521 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
522 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
523
524 #undef TARGET_ASM_FILE_START
525 #define TARGET_ASM_FILE_START ia64_file_start
526
527 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
528 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
529
530 #undef TARGET_REGISTER_MOVE_COST
531 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
532 #undef TARGET_MEMORY_MOVE_COST
533 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
534 #undef TARGET_RTX_COSTS
535 #define TARGET_RTX_COSTS ia64_rtx_costs
536 #undef TARGET_ADDRESS_COST
537 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
538
539 #undef TARGET_UNSPEC_MAY_TRAP_P
540 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
541
542 #undef TARGET_MACHINE_DEPENDENT_REORG
543 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
544
545 #undef TARGET_ENCODE_SECTION_INFO
546 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
547
548 #undef TARGET_SECTION_TYPE_FLAGS
549 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
550
551 #ifdef HAVE_AS_TLS
552 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
553 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
554 #endif
555
556 /* ??? Investigate. */
557 #if 0
558 #undef TARGET_PROMOTE_PROTOTYPES
559 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
560 #endif
561
562 #undef TARGET_FUNCTION_VALUE
563 #define TARGET_FUNCTION_VALUE ia64_function_value
564 #undef TARGET_LIBCALL_VALUE
565 #define TARGET_LIBCALL_VALUE ia64_libcall_value
566 #undef TARGET_FUNCTION_VALUE_REGNO_P
567 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
568
569 #undef TARGET_STRUCT_VALUE_RTX
570 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
571 #undef TARGET_RETURN_IN_MEMORY
572 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
573 #undef TARGET_SETUP_INCOMING_VARARGS
574 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
575 #undef TARGET_STRICT_ARGUMENT_NAMING
576 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
577 #undef TARGET_MUST_PASS_IN_STACK
578 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
579 #undef TARGET_GET_RAW_RESULT_MODE
580 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
581 #undef TARGET_GET_RAW_ARG_MODE
582 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
583
584 #undef TARGET_MEMBER_TYPE_FORCES_BLK
585 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
586
587 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
588 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
589
590 #undef TARGET_ASM_UNWIND_EMIT
591 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
592 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
593 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
594 #undef TARGET_ASM_INIT_SECTIONS
595 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
596
597 #undef TARGET_DEBUG_UNWIND_INFO
598 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
599
600 #undef TARGET_SCALAR_MODE_SUPPORTED_P
601 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
602 #undef TARGET_VECTOR_MODE_SUPPORTED_P
603 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
604
605 #undef TARGET_LEGITIMATE_CONSTANT_P
606 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
607 #undef TARGET_LEGITIMATE_ADDRESS_P
608 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
609
610 #undef TARGET_LRA_P
611 #define TARGET_LRA_P hook_bool_void_false
612
613 #undef TARGET_CANNOT_FORCE_CONST_MEM
614 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
615
616 #undef TARGET_MANGLE_TYPE
617 #define TARGET_MANGLE_TYPE ia64_mangle_type
618
619 #undef TARGET_INVALID_CONVERSION
620 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
621 #undef TARGET_INVALID_UNARY_OP
622 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
623 #undef TARGET_INVALID_BINARY_OP
624 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
625
626 #undef TARGET_C_MODE_FOR_SUFFIX
627 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
628
629 #undef TARGET_CAN_ELIMINATE
630 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
631
632 #undef TARGET_TRAMPOLINE_INIT
633 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
634
635 #undef TARGET_CAN_USE_DOLOOP_P
636 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
637 #undef TARGET_INVALID_WITHIN_DOLOOP
638 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
639
640 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
641 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
642
643 #undef TARGET_PREFERRED_RELOAD_CLASS
644 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
645
646 #undef TARGET_DELAY_SCHED2
647 #define TARGET_DELAY_SCHED2 true
648
649 /* Variable tracking should be run after all optimizations which
650 change order of insns. It also needs a valid CFG. */
651 #undef TARGET_DELAY_VARTRACK
652 #define TARGET_DELAY_VARTRACK true
653
654 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
655 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
656
657 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
658 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
659
660 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
661 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
662
663 #undef TARGET_HARD_REGNO_NREGS
664 #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
665 #undef TARGET_HARD_REGNO_MODE_OK
666 #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
667
668 #undef TARGET_MODES_TIEABLE_P
669 #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
670
671 struct gcc_target targetm = TARGET_INITIALIZER;
672 \f
673 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
674 identifier as an argument, so the front end shouldn't look it up. */
675
676 static bool
677 ia64_attribute_takes_identifier_p (const_tree attr_id)
678 {
679 if (is_attribute_p ("model", attr_id))
680 return true;
681 #if TARGET_ABI_OPEN_VMS
682 if (is_attribute_p ("common_object", attr_id))
683 return true;
684 #endif
685 return false;
686 }
687
688 typedef enum
689 {
690 ADDR_AREA_NORMAL, /* normal address area */
691 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
692 }
693 ia64_addr_area;
694
695 static GTY(()) tree small_ident1;
696 static GTY(()) tree small_ident2;
697
698 static void
699 init_idents (void)
700 {
701 if (small_ident1 == 0)
702 {
703 small_ident1 = get_identifier ("small");
704 small_ident2 = get_identifier ("__small__");
705 }
706 }
707
708 /* Retrieve the address area that has been chosen for the given decl. */
709
710 static ia64_addr_area
711 ia64_get_addr_area (tree decl)
712 {
713 tree model_attr;
714
715 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
716 if (model_attr)
717 {
718 tree id;
719
720 init_idents ();
721 id = TREE_VALUE (TREE_VALUE (model_attr));
722 if (id == small_ident1 || id == small_ident2)
723 return ADDR_AREA_SMALL;
724 }
725 return ADDR_AREA_NORMAL;
726 }
727
728 static tree
729 ia64_handle_model_attribute (tree *node, tree name, tree args,
730 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
731 {
732 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
733 ia64_addr_area area;
734 tree arg, decl = *node;
735
736 init_idents ();
737 arg = TREE_VALUE (args);
738 if (arg == small_ident1 || arg == small_ident2)
739 {
740 addr_area = ADDR_AREA_SMALL;
741 }
742 else
743 {
744 warning (OPT_Wattributes, "invalid argument of %qE attribute",
745 name);
746 *no_add_attrs = true;
747 }
748
749 switch (TREE_CODE (decl))
750 {
751 case VAR_DECL:
752 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
753 == FUNCTION_DECL)
754 && !TREE_STATIC (decl))
755 {
756 error_at (DECL_SOURCE_LOCATION (decl),
757 "an address area attribute cannot be specified for "
758 "local variables");
759 *no_add_attrs = true;
760 }
761 area = ia64_get_addr_area (decl);
762 if (area != ADDR_AREA_NORMAL && addr_area != area)
763 {
764 error ("address area of %q+D conflicts with previous "
765 "declaration", decl);
766 *no_add_attrs = true;
767 }
768 break;
769
770 case FUNCTION_DECL:
771 error_at (DECL_SOURCE_LOCATION (decl),
772 "address area attribute cannot be specified for "
773 "functions");
774 *no_add_attrs = true;
775 break;
776
777 default:
778 warning (OPT_Wattributes, "%qE attribute ignored",
779 name);
780 *no_add_attrs = true;
781 break;
782 }
783
784 return NULL_TREE;
785 }
786
787 /* Part of the low level implementation of DEC Ada pragma Common_Object which
788 enables the shared use of variables stored in overlaid linker areas
789 corresponding to the use of Fortran COMMON. */
790
791 static tree
792 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
793 int flags ATTRIBUTE_UNUSED,
794 bool *no_add_attrs)
795 {
796 tree decl = *node;
797 tree id;
798
799 gcc_assert (DECL_P (decl));
800
801 DECL_COMMON (decl) = 1;
802 id = TREE_VALUE (args);
803 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
804 {
805 error ("%qE attribute requires a string constant argument", name);
806 *no_add_attrs = true;
807 return NULL_TREE;
808 }
809 return NULL_TREE;
810 }
811
812 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
813
814 void
815 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
816 unsigned HOST_WIDE_INT size,
817 unsigned int align)
818 {
819 tree attr = DECL_ATTRIBUTES (decl);
820
821 if (attr)
822 attr = lookup_attribute ("common_object", attr);
823 if (attr)
824 {
825 tree id = TREE_VALUE (TREE_VALUE (attr));
826 const char *name;
827
828 if (TREE_CODE (id) == IDENTIFIER_NODE)
829 name = IDENTIFIER_POINTER (id);
830 else if (TREE_CODE (id) == STRING_CST)
831 name = TREE_STRING_POINTER (id);
832 else
833 abort ();
834
835 fprintf (file, "\t.vms_common\t\"%s\",", name);
836 }
837 else
838 fprintf (file, "%s", COMMON_ASM_OP);
839
840 /* Code from elfos.h. */
841 assemble_name (file, name);
842 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
843 size, align / BITS_PER_UNIT);
844
845 fputc ('\n', file);
846 }
847
848 static void
849 ia64_encode_addr_area (tree decl, rtx symbol)
850 {
851 int flags;
852
853 flags = SYMBOL_REF_FLAGS (symbol);
854 switch (ia64_get_addr_area (decl))
855 {
856 case ADDR_AREA_NORMAL: break;
857 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
858 default: gcc_unreachable ();
859 }
860 SYMBOL_REF_FLAGS (symbol) = flags;
861 }
862
863 static void
864 ia64_encode_section_info (tree decl, rtx rtl, int first)
865 {
866 default_encode_section_info (decl, rtl, first);
867
868 /* Careful not to prod global register variables. */
869 if (TREE_CODE (decl) == VAR_DECL
870 && GET_CODE (DECL_RTL (decl)) == MEM
871 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
872 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
873 ia64_encode_addr_area (decl, XEXP (rtl, 0));
874 }
875 \f
876 /* Return 1 if the operands of a move are ok. */
877
878 int
879 ia64_move_ok (rtx dst, rtx src)
880 {
881 /* If we're under init_recog_no_volatile, we'll not be able to use
882 memory_operand. So check the code directly and don't worry about
883 the validity of the underlying address, which should have been
884 checked elsewhere anyway. */
885 if (GET_CODE (dst) != MEM)
886 return 1;
887 if (GET_CODE (src) == MEM)
888 return 0;
889 if (register_operand (src, VOIDmode))
890 return 1;
891
892 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
893 if (INTEGRAL_MODE_P (GET_MODE (dst)))
894 return src == const0_rtx;
895 else
896 return satisfies_constraint_G (src);
897 }
898
899 /* Return 1 if the operands are ok for a floating point load pair. */
900
901 int
902 ia64_load_pair_ok (rtx dst, rtx src)
903 {
904 /* ??? There is a thinko in the implementation of the "x" constraint and the
905 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
906 also return false for it. */
907 if (GET_CODE (dst) != REG
908 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
909 return 0;
910 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
911 return 0;
912 switch (GET_CODE (XEXP (src, 0)))
913 {
914 case REG:
915 case POST_INC:
916 break;
917 case POST_DEC:
918 return 0;
919 case POST_MODIFY:
920 {
921 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
922
923 if (GET_CODE (adjust) != CONST_INT
924 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
925 return 0;
926 }
927 break;
928 default:
929 abort ();
930 }
931 return 1;
932 }
933
934 int
935 addp4_optimize_ok (rtx op1, rtx op2)
936 {
937 return (basereg_operand (op1, GET_MODE(op1)) !=
938 basereg_operand (op2, GET_MODE(op2)));
939 }
940
941 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
942 Return the length of the field, or <= 0 on failure. */
943
944 int
945 ia64_depz_field_mask (rtx rop, rtx rshift)
946 {
947 unsigned HOST_WIDE_INT op = INTVAL (rop);
948 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
949
950 /* Get rid of the zero bits we're shifting in. */
951 op >>= shift;
952
953 /* We must now have a solid block of 1's at bit 0. */
954 return exact_log2 (op + 1);
955 }
956
957 /* Return the TLS model to use for ADDR. */
958
959 static enum tls_model
960 tls_symbolic_operand_type (rtx addr)
961 {
962 enum tls_model tls_kind = TLS_MODEL_NONE;
963
964 if (GET_CODE (addr) == CONST)
965 {
966 if (GET_CODE (XEXP (addr, 0)) == PLUS
967 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
968 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
969 }
970 else if (GET_CODE (addr) == SYMBOL_REF)
971 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
972
973 return tls_kind;
974 }
975
976 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
977 as a base register. */
978
979 static inline bool
980 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
981 {
982 if (strict
983 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
984 return true;
985 else if (!strict
986 && (GENERAL_REGNO_P (REGNO (reg))
987 || !HARD_REGISTER_P (reg)))
988 return true;
989 else
990 return false;
991 }
992
993 static bool
994 ia64_legitimate_address_reg (const_rtx reg, bool strict)
995 {
996 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
997 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
998 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
999 return true;
1000
1001 return false;
1002 }
1003
1004 static bool
1005 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1006 {
1007 if (GET_CODE (disp) == PLUS
1008 && rtx_equal_p (reg, XEXP (disp, 0))
1009 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1010 || (CONST_INT_P (XEXP (disp, 1))
1011 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1012 return true;
1013
1014 return false;
1015 }
1016
1017 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1018
1019 static bool
1020 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1021 rtx x, bool strict)
1022 {
1023 if (ia64_legitimate_address_reg (x, strict))
1024 return true;
1025 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1026 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1027 && XEXP (x, 0) != arg_pointer_rtx)
1028 return true;
1029 else if (GET_CODE (x) == POST_MODIFY
1030 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1031 && XEXP (x, 0) != arg_pointer_rtx
1032 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1033 return true;
1034 else
1035 return false;
1036 }
1037
1038 /* Return true if X is a constant that is valid for some immediate
1039 field in an instruction. */
1040
1041 static bool
1042 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1043 {
1044 switch (GET_CODE (x))
1045 {
1046 case CONST_INT:
1047 case LABEL_REF:
1048 return true;
1049
1050 case CONST_DOUBLE:
1051 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1052 return true;
1053 return satisfies_constraint_G (x);
1054
1055 case CONST:
1056 case SYMBOL_REF:
1057 /* ??? Short term workaround for PR 28490. We must make the code here
1058 match the code in ia64_expand_move and move_operand, even though they
1059 are both technically wrong. */
1060 if (tls_symbolic_operand_type (x) == 0)
1061 {
1062 HOST_WIDE_INT addend = 0;
1063 rtx op = x;
1064
1065 if (GET_CODE (op) == CONST
1066 && GET_CODE (XEXP (op, 0)) == PLUS
1067 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1068 {
1069 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1070 op = XEXP (XEXP (op, 0), 0);
1071 }
1072
1073 if (any_offset_symbol_operand (op, mode)
1074 || function_operand (op, mode))
1075 return true;
1076 if (aligned_offset_symbol_operand (op, mode))
1077 return (addend & 0x3fff) == 0;
1078 return false;
1079 }
1080 return false;
1081
1082 case CONST_VECTOR:
1083 if (mode == V2SFmode)
1084 return satisfies_constraint_Y (x);
1085
1086 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1087 && GET_MODE_SIZE (mode) <= 8);
1088
1089 default:
1090 return false;
1091 }
1092 }
1093
1094 /* Don't allow TLS addresses to get spilled to memory. */
1095
1096 static bool
1097 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1098 {
1099 if (mode == RFmode)
1100 return true;
1101 return tls_symbolic_operand_type (x) != 0;
1102 }
1103
1104 /* Expand a symbolic constant load. */
1105
1106 bool
1107 ia64_expand_load_address (rtx dest, rtx src)
1108 {
1109 gcc_assert (GET_CODE (dest) == REG);
1110
1111 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1112 having to pointer-extend the value afterward. Other forms of address
1113 computation below are also more natural to compute as 64-bit quantities.
1114 If we've been given an SImode destination register, change it. */
1115 if (GET_MODE (dest) != Pmode)
1116 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1117 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1118
1119 if (TARGET_NO_PIC)
1120 return false;
1121 if (small_addr_symbolic_operand (src, VOIDmode))
1122 return false;
1123
1124 if (TARGET_AUTO_PIC)
1125 emit_insn (gen_load_gprel64 (dest, src));
1126 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1127 emit_insn (gen_load_fptr (dest, src));
1128 else if (sdata_symbolic_operand (src, VOIDmode))
1129 emit_insn (gen_load_gprel (dest, src));
1130 else if (local_symbolic_operand64 (src, VOIDmode))
1131 {
1132 /* We want to use @gprel rather than @ltoff relocations for local
1133 symbols:
1134 - @gprel does not require dynamic linker
1135 - and does not use .sdata section
1136 https://gcc.gnu.org/bugzilla/60465 */
1137 emit_insn (gen_load_gprel64 (dest, src));
1138 }
1139 else
1140 {
1141 HOST_WIDE_INT addend = 0;
1142 rtx tmp;
1143
1144 /* We did split constant offsets in ia64_expand_move, and we did try
1145 to keep them split in move_operand, but we also allowed reload to
1146 rematerialize arbitrary constants rather than spill the value to
1147 the stack and reload it. So we have to be prepared here to split
1148 them apart again. */
1149 if (GET_CODE (src) == CONST)
1150 {
1151 HOST_WIDE_INT hi, lo;
1152
1153 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1154 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1155 hi = hi - lo;
1156
1157 if (lo != 0)
1158 {
1159 addend = lo;
1160 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1161 }
1162 }
1163
1164 tmp = gen_rtx_HIGH (Pmode, src);
1165 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1166 emit_insn (gen_rtx_SET (dest, tmp));
1167
1168 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1169 emit_insn (gen_rtx_SET (dest, tmp));
1170
1171 if (addend)
1172 {
1173 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1174 emit_insn (gen_rtx_SET (dest, tmp));
1175 }
1176 }
1177
1178 return true;
1179 }
1180
1181 static GTY(()) rtx gen_tls_tga;
1182 static rtx
1183 gen_tls_get_addr (void)
1184 {
1185 if (!gen_tls_tga)
1186 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1187 return gen_tls_tga;
1188 }
1189
1190 static GTY(()) rtx thread_pointer_rtx;
1191 static rtx
1192 gen_thread_pointer (void)
1193 {
1194 if (!thread_pointer_rtx)
1195 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1196 return thread_pointer_rtx;
1197 }
1198
1199 static rtx
1200 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1201 rtx orig_op1, HOST_WIDE_INT addend)
1202 {
1203 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1204 rtx_insn *insns;
1205 rtx orig_op0 = op0;
1206 HOST_WIDE_INT addend_lo, addend_hi;
1207
1208 switch (tls_kind)
1209 {
1210 case TLS_MODEL_GLOBAL_DYNAMIC:
1211 start_sequence ();
1212
1213 tga_op1 = gen_reg_rtx (Pmode);
1214 emit_insn (gen_load_dtpmod (tga_op1, op1));
1215
1216 tga_op2 = gen_reg_rtx (Pmode);
1217 emit_insn (gen_load_dtprel (tga_op2, op1));
1218
1219 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1220 LCT_CONST, Pmode,
1221 tga_op1, Pmode, tga_op2, Pmode);
1222
1223 insns = get_insns ();
1224 end_sequence ();
1225
1226 if (GET_MODE (op0) != Pmode)
1227 op0 = tga_ret;
1228 emit_libcall_block (insns, op0, tga_ret, op1);
1229 break;
1230
1231 case TLS_MODEL_LOCAL_DYNAMIC:
1232 /* ??? This isn't the completely proper way to do local-dynamic
1233 If the call to __tls_get_addr is used only by a single symbol,
1234 then we should (somehow) move the dtprel to the second arg
1235 to avoid the extra add. */
1236 start_sequence ();
1237
1238 tga_op1 = gen_reg_rtx (Pmode);
1239 emit_insn (gen_load_dtpmod (tga_op1, op1));
1240
1241 tga_op2 = const0_rtx;
1242
1243 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1244 LCT_CONST, Pmode,
1245 tga_op1, Pmode, tga_op2, Pmode);
1246
1247 insns = get_insns ();
1248 end_sequence ();
1249
1250 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1251 UNSPEC_LD_BASE);
1252 tmp = gen_reg_rtx (Pmode);
1253 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1254
1255 if (!register_operand (op0, Pmode))
1256 op0 = gen_reg_rtx (Pmode);
1257 if (TARGET_TLS64)
1258 {
1259 emit_insn (gen_load_dtprel (op0, op1));
1260 emit_insn (gen_adddi3 (op0, tmp, op0));
1261 }
1262 else
1263 emit_insn (gen_add_dtprel (op0, op1, tmp));
1264 break;
1265
1266 case TLS_MODEL_INITIAL_EXEC:
1267 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1268 addend_hi = addend - addend_lo;
1269
1270 op1 = plus_constant (Pmode, op1, addend_hi);
1271 addend = addend_lo;
1272
1273 tmp = gen_reg_rtx (Pmode);
1274 emit_insn (gen_load_tprel (tmp, op1));
1275
1276 if (!register_operand (op0, Pmode))
1277 op0 = gen_reg_rtx (Pmode);
1278 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1279 break;
1280
1281 case TLS_MODEL_LOCAL_EXEC:
1282 if (!register_operand (op0, Pmode))
1283 op0 = gen_reg_rtx (Pmode);
1284
1285 op1 = orig_op1;
1286 addend = 0;
1287 if (TARGET_TLS64)
1288 {
1289 emit_insn (gen_load_tprel (op0, op1));
1290 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1291 }
1292 else
1293 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1294 break;
1295
1296 default:
1297 gcc_unreachable ();
1298 }
1299
1300 if (addend)
1301 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1302 orig_op0, 1, OPTAB_DIRECT);
1303 if (orig_op0 == op0)
1304 return NULL_RTX;
1305 if (GET_MODE (orig_op0) == Pmode)
1306 return op0;
1307 return gen_lowpart (GET_MODE (orig_op0), op0);
1308 }
1309
1310 rtx
1311 ia64_expand_move (rtx op0, rtx op1)
1312 {
1313 machine_mode mode = GET_MODE (op0);
1314
1315 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1316 op1 = force_reg (mode, op1);
1317
1318 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1319 {
1320 HOST_WIDE_INT addend = 0;
1321 enum tls_model tls_kind;
1322 rtx sym = op1;
1323
1324 if (GET_CODE (op1) == CONST
1325 && GET_CODE (XEXP (op1, 0)) == PLUS
1326 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1327 {
1328 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1329 sym = XEXP (XEXP (op1, 0), 0);
1330 }
1331
1332 tls_kind = tls_symbolic_operand_type (sym);
1333 if (tls_kind)
1334 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1335
1336 if (any_offset_symbol_operand (sym, mode))
1337 addend = 0;
1338 else if (aligned_offset_symbol_operand (sym, mode))
1339 {
1340 HOST_WIDE_INT addend_lo, addend_hi;
1341
1342 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1343 addend_hi = addend - addend_lo;
1344
1345 if (addend_lo != 0)
1346 {
1347 op1 = plus_constant (mode, sym, addend_hi);
1348 addend = addend_lo;
1349 }
1350 else
1351 addend = 0;
1352 }
1353 else
1354 op1 = sym;
1355
1356 if (reload_completed)
1357 {
1358 /* We really should have taken care of this offset earlier. */
1359 gcc_assert (addend == 0);
1360 if (ia64_expand_load_address (op0, op1))
1361 return NULL_RTX;
1362 }
1363
1364 if (addend)
1365 {
1366 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1367
1368 emit_insn (gen_rtx_SET (subtarget, op1));
1369
1370 op1 = expand_simple_binop (mode, PLUS, subtarget,
1371 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1372 if (op0 == op1)
1373 return NULL_RTX;
1374 }
1375 }
1376
1377 return op1;
1378 }
1379
1380 /* Split a move from OP1 to OP0 conditional on COND. */
1381
1382 void
1383 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1384 {
1385 rtx_insn *insn, *first = get_last_insn ();
1386
1387 emit_move_insn (op0, op1);
1388
1389 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1390 if (INSN_P (insn))
1391 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1392 PATTERN (insn));
1393 }
1394
1395 /* Split a post-reload TImode or TFmode reference into two DImode
1396 components. This is made extra difficult by the fact that we do
1397 not get any scratch registers to work with, because reload cannot
1398 be prevented from giving us a scratch that overlaps the register
1399 pair involved. So instead, when addressing memory, we tweak the
1400 pointer register up and back down with POST_INCs. Or up and not
1401 back down when we can get away with it.
1402
1403 REVERSED is true when the loads must be done in reversed order
1404 (high word first) for correctness. DEAD is true when the pointer
1405 dies with the second insn we generate and therefore the second
1406 address must not carry a postmodify.
1407
1408 May return an insn which is to be emitted after the moves. */
1409
1410 static rtx
1411 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1412 {
1413 rtx fixup = 0;
1414
1415 switch (GET_CODE (in))
1416 {
1417 case REG:
1418 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1419 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1420 break;
1421
1422 case CONST_INT:
1423 case CONST_DOUBLE:
1424 /* Cannot occur reversed. */
1425 gcc_assert (!reversed);
1426
1427 if (GET_MODE (in) != TFmode)
1428 split_double (in, &out[0], &out[1]);
1429 else
1430 /* split_double does not understand how to split a TFmode
1431 quantity into a pair of DImode constants. */
1432 {
1433 unsigned HOST_WIDE_INT p[2];
1434 long l[4]; /* TFmode is 128 bits */
1435
1436 real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
1437
1438 if (FLOAT_WORDS_BIG_ENDIAN)
1439 {
1440 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1441 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1442 }
1443 else
1444 {
1445 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1446 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1447 }
1448 out[0] = GEN_INT (p[0]);
1449 out[1] = GEN_INT (p[1]);
1450 }
1451 break;
1452
1453 case MEM:
1454 {
1455 rtx base = XEXP (in, 0);
1456 rtx offset;
1457
1458 switch (GET_CODE (base))
1459 {
1460 case REG:
1461 if (!reversed)
1462 {
1463 out[0] = adjust_automodify_address
1464 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1465 out[1] = adjust_automodify_address
1466 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1467 }
1468 else
1469 {
1470 /* Reversal requires a pre-increment, which can only
1471 be done as a separate insn. */
1472 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1473 out[0] = adjust_automodify_address
1474 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1475 out[1] = adjust_address (in, DImode, 0);
1476 }
1477 break;
1478
1479 case POST_INC:
1480 gcc_assert (!reversed && !dead);
1481
1482 /* Just do the increment in two steps. */
1483 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1484 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1485 break;
1486
1487 case POST_DEC:
1488 gcc_assert (!reversed && !dead);
1489
1490 /* Add 8, subtract 24. */
1491 base = XEXP (base, 0);
1492 out[0] = adjust_automodify_address
1493 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1494 out[1] = adjust_automodify_address
1495 (in, DImode,
1496 gen_rtx_POST_MODIFY (Pmode, base,
1497 plus_constant (Pmode, base, -24)),
1498 8);
1499 break;
1500
1501 case POST_MODIFY:
1502 gcc_assert (!reversed && !dead);
1503
1504 /* Extract and adjust the modification. This case is
1505 trickier than the others, because we might have an
1506 index register, or we might have a combined offset that
1507 doesn't fit a signed 9-bit displacement field. We can
1508 assume the incoming expression is already legitimate. */
1509 offset = XEXP (base, 1);
1510 base = XEXP (base, 0);
1511
1512 out[0] = adjust_automodify_address
1513 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1514
1515 if (GET_CODE (XEXP (offset, 1)) == REG)
1516 {
1517 /* Can't adjust the postmodify to match. Emit the
1518 original, then a separate addition insn. */
1519 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1520 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1521 }
1522 else
1523 {
1524 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1525 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1526 {
1527 /* Again the postmodify cannot be made to match,
1528 but in this case it's more efficient to get rid
1529 of the postmodify entirely and fix up with an
1530 add insn. */
1531 out[1] = adjust_automodify_address (in, DImode, base, 8);
1532 fixup = gen_adddi3
1533 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1534 }
1535 else
1536 {
1537 /* Combined offset still fits in the displacement field.
1538 (We cannot overflow it at the high end.) */
1539 out[1] = adjust_automodify_address
1540 (in, DImode, gen_rtx_POST_MODIFY
1541 (Pmode, base, gen_rtx_PLUS
1542 (Pmode, base,
1543 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1544 8);
1545 }
1546 }
1547 break;
1548
1549 default:
1550 gcc_unreachable ();
1551 }
1552 break;
1553 }
1554
1555 default:
1556 gcc_unreachable ();
1557 }
1558
1559 return fixup;
1560 }
1561
1562 /* Split a TImode or TFmode move instruction after reload.
1563 This is used by *movtf_internal and *movti_internal. */
1564 void
1565 ia64_split_tmode_move (rtx operands[])
1566 {
1567 rtx in[2], out[2], insn;
1568 rtx fixup[2];
1569 bool dead = false;
1570 bool reversed = false;
1571
1572 /* It is possible for reload to decide to overwrite a pointer with
1573 the value it points to. In that case we have to do the loads in
1574 the appropriate order so that the pointer is not destroyed too
1575 early. Also we must not generate a postmodify for that second
1576 load, or rws_access_regno will die. And we must not generate a
1577 postmodify for the second load if the destination register
1578 overlaps with the base register. */
1579 if (GET_CODE (operands[1]) == MEM
1580 && reg_overlap_mentioned_p (operands[0], operands[1]))
1581 {
1582 rtx base = XEXP (operands[1], 0);
1583 while (GET_CODE (base) != REG)
1584 base = XEXP (base, 0);
1585
1586 if (REGNO (base) == REGNO (operands[0]))
1587 reversed = true;
1588
1589 if (refers_to_regno_p (REGNO (operands[0]),
1590 REGNO (operands[0])+2,
1591 base, 0))
1592 dead = true;
1593 }
1594 /* Another reason to do the moves in reversed order is if the first
1595 element of the target register pair is also the second element of
1596 the source register pair. */
1597 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1598 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1599 reversed = true;
1600
1601 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1602 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1603
1604 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1605 if (GET_CODE (EXP) == MEM \
1606 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1607 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1608 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1609 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1610
1611 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1612 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1613 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1614
1615 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1616 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1617 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1618
1619 if (fixup[0])
1620 emit_insn (fixup[0]);
1621 if (fixup[1])
1622 emit_insn (fixup[1]);
1623
1624 #undef MAYBE_ADD_REG_INC_NOTE
1625 }
1626
1627 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1628 through memory plus an extra GR scratch register. Except that you can
1629 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1630 SECONDARY_RELOAD_CLASS, but not both.
1631
1632 We got into problems in the first place by allowing a construct like
1633 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1634 This solution attempts to prevent this situation from occurring. When
1635 we see something like the above, we spill the inner register to memory. */
1636
1637 static rtx
1638 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1639 {
1640 if (GET_CODE (in) == SUBREG
1641 && GET_MODE (SUBREG_REG (in)) == TImode
1642 && GET_CODE (SUBREG_REG (in)) == REG)
1643 {
1644 rtx memt = assign_stack_temp (TImode, 16);
1645 emit_move_insn (memt, SUBREG_REG (in));
1646 return adjust_address (memt, mode, 0);
1647 }
1648 else if (force && GET_CODE (in) == REG)
1649 {
1650 rtx memx = assign_stack_temp (mode, 16);
1651 emit_move_insn (memx, in);
1652 return memx;
1653 }
1654 else
1655 return in;
1656 }
1657
1658 /* Expand the movxf or movrf pattern (MODE says which) with the given
1659 OPERANDS, returning true if the pattern should then invoke
1660 DONE. */
1661
1662 bool
1663 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1664 {
1665 rtx op0 = operands[0];
1666
1667 if (GET_CODE (op0) == SUBREG)
1668 op0 = SUBREG_REG (op0);
1669
1670 /* We must support XFmode loads into general registers for stdarg/vararg,
1671 unprototyped calls, and a rare case where a long double is passed as
1672 an argument after a float HFA fills the FP registers. We split them into
1673 DImode loads for convenience. We also need to support XFmode stores
1674 for the last case. This case does not happen for stdarg/vararg routines,
1675 because we do a block store to memory of unnamed arguments. */
1676
1677 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1678 {
1679 rtx out[2];
1680
1681 /* We're hoping to transform everything that deals with XFmode
1682 quantities and GR registers early in the compiler. */
1683 gcc_assert (can_create_pseudo_p ());
1684
1685 /* Struct to register can just use TImode instead. */
1686 if ((GET_CODE (operands[1]) == SUBREG
1687 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1688 || (GET_CODE (operands[1]) == REG
1689 && GR_REGNO_P (REGNO (operands[1]))))
1690 {
1691 rtx op1 = operands[1];
1692
1693 if (GET_CODE (op1) == SUBREG)
1694 op1 = SUBREG_REG (op1);
1695 else
1696 op1 = gen_rtx_REG (TImode, REGNO (op1));
1697
1698 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1699 return true;
1700 }
1701
1702 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1703 {
1704 /* Don't word-swap when reading in the constant. */
1705 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1706 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1707 0, mode));
1708 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1709 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1710 0, mode));
1711 return true;
1712 }
1713
1714 /* If the quantity is in a register not known to be GR, spill it. */
1715 if (register_operand (operands[1], mode))
1716 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1717
1718 gcc_assert (GET_CODE (operands[1]) == MEM);
1719
1720 /* Don't word-swap when reading in the value. */
1721 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1722 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1723
1724 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1725 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1726 return true;
1727 }
1728
1729 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1730 {
1731 /* We're hoping to transform everything that deals with XFmode
1732 quantities and GR registers early in the compiler. */
1733 gcc_assert (can_create_pseudo_p ());
1734
1735 /* Op0 can't be a GR_REG here, as that case is handled above.
1736 If op0 is a register, then we spill op1, so that we now have a
1737 MEM operand. This requires creating an XFmode subreg of a TImode reg
1738 to force the spill. */
1739 if (register_operand (operands[0], mode))
1740 {
1741 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1742 op1 = gen_rtx_SUBREG (mode, op1, 0);
1743 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1744 }
1745
1746 else
1747 {
1748 rtx in[2];
1749
1750 gcc_assert (GET_CODE (operands[0]) == MEM);
1751
1752 /* Don't word-swap when writing out the value. */
1753 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1754 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1755
1756 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1757 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1758 return true;
1759 }
1760 }
1761
1762 if (!reload_in_progress && !reload_completed)
1763 {
1764 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1765
1766 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1767 {
1768 rtx memt, memx, in = operands[1];
1769 if (CONSTANT_P (in))
1770 in = validize_mem (force_const_mem (mode, in));
1771 if (GET_CODE (in) == MEM)
1772 memt = adjust_address (in, TImode, 0);
1773 else
1774 {
1775 memt = assign_stack_temp (TImode, 16);
1776 memx = adjust_address (memt, mode, 0);
1777 emit_move_insn (memx, in);
1778 }
1779 emit_move_insn (op0, memt);
1780 return true;
1781 }
1782
1783 if (!ia64_move_ok (operands[0], operands[1]))
1784 operands[1] = force_reg (mode, operands[1]);
1785 }
1786
1787 return false;
1788 }
1789
1790 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1791 with the expression that holds the compare result (in VOIDmode). */
1792
1793 static GTY(()) rtx cmptf_libfunc;
1794
1795 void
1796 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1797 {
1798 enum rtx_code code = GET_CODE (*expr);
1799 rtx cmp;
1800
1801 /* If we have a BImode input, then we already have a compare result, and
1802 do not need to emit another comparison. */
1803 if (GET_MODE (*op0) == BImode)
1804 {
1805 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1806 cmp = *op0;
1807 }
1808 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1809 magic number as its third argument, that indicates what to do.
1810 The return value is an integer to be compared against zero. */
1811 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1812 {
1813 enum qfcmp_magic {
1814 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
1815 QCMP_UNORD = 2,
1816 QCMP_EQ = 4,
1817 QCMP_LT = 8,
1818 QCMP_GT = 16
1819 };
1820 int magic;
1821 enum rtx_code ncode;
1822 rtx ret;
1823
1824 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1825 switch (code)
1826 {
1827 /* 1 = equal, 0 = not equal. Equality operators do
1828 not raise FP_INVALID when given a NaN operand. */
1829 case EQ: magic = QCMP_EQ; ncode = NE; break;
1830 case NE: magic = QCMP_EQ; ncode = EQ; break;
1831 /* isunordered() from C99. */
1832 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1833 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1834 /* Relational operators raise FP_INVALID when given
1835 a NaN operand. */
1836 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1837 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1838 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1839 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1840 /* Unordered relational operators do not raise FP_INVALID
1841 when given a NaN operand. */
1842 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1843 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1844 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1845 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1846 /* Not supported. */
1847 case UNEQ:
1848 case LTGT:
1849 default: gcc_unreachable ();
1850 }
1851
1852 start_sequence ();
1853
1854 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
1855 *op0, TFmode, *op1, TFmode,
1856 GEN_INT (magic), DImode);
1857 cmp = gen_reg_rtx (BImode);
1858 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1859 ret, const0_rtx)));
1860
1861 rtx_insn *insns = get_insns ();
1862 end_sequence ();
1863
1864 emit_libcall_block (insns, cmp, cmp,
1865 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1866 code = NE;
1867 }
1868 else
1869 {
1870 cmp = gen_reg_rtx (BImode);
1871 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1872 code = NE;
1873 }
1874
1875 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1876 *op0 = cmp;
1877 *op1 = const0_rtx;
1878 }
1879
1880 /* Generate an integral vector comparison. Return true if the condition has
1881 been reversed, and so the sense of the comparison should be inverted. */
1882
1883 static bool
1884 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1885 rtx dest, rtx op0, rtx op1)
1886 {
1887 bool negate = false;
1888 rtx x;
1889
1890 /* Canonicalize the comparison to EQ, GT, GTU. */
1891 switch (code)
1892 {
1893 case EQ:
1894 case GT:
1895 case GTU:
1896 break;
1897
1898 case NE:
1899 case LE:
1900 case LEU:
1901 code = reverse_condition (code);
1902 negate = true;
1903 break;
1904
1905 case GE:
1906 case GEU:
1907 code = reverse_condition (code);
1908 negate = true;
1909 /* FALLTHRU */
1910
1911 case LT:
1912 case LTU:
1913 code = swap_condition (code);
1914 x = op0, op0 = op1, op1 = x;
1915 break;
1916
1917 default:
1918 gcc_unreachable ();
1919 }
1920
1921 /* Unsigned parallel compare is not supported by the hardware. Play some
1922 tricks to turn this into a signed comparison against 0. */
1923 if (code == GTU)
1924 {
1925 switch (mode)
1926 {
1927 case E_V2SImode:
1928 {
1929 rtx t1, t2, mask;
1930
1931 /* Subtract (-(INT MAX) - 1) from both operands to make
1932 them signed. */
1933 mask = gen_int_mode (0x80000000, SImode);
1934 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1935 mask = force_reg (mode, mask);
1936 t1 = gen_reg_rtx (mode);
1937 emit_insn (gen_subv2si3 (t1, op0, mask));
1938 t2 = gen_reg_rtx (mode);
1939 emit_insn (gen_subv2si3 (t2, op1, mask));
1940 op0 = t1;
1941 op1 = t2;
1942 code = GT;
1943 }
1944 break;
1945
1946 case E_V8QImode:
1947 case E_V4HImode:
1948 /* Perform a parallel unsigned saturating subtraction. */
1949 x = gen_reg_rtx (mode);
1950 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1951
1952 code = EQ;
1953 op0 = x;
1954 op1 = CONST0_RTX (mode);
1955 negate = !negate;
1956 break;
1957
1958 default:
1959 gcc_unreachable ();
1960 }
1961 }
1962
1963 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1964 emit_insn (gen_rtx_SET (dest, x));
1965
1966 return negate;
1967 }
1968
1969 /* Emit an integral vector conditional move. */
1970
1971 void
1972 ia64_expand_vecint_cmov (rtx operands[])
1973 {
1974 machine_mode mode = GET_MODE (operands[0]);
1975 enum rtx_code code = GET_CODE (operands[3]);
1976 bool negate;
1977 rtx cmp, x, ot, of;
1978
1979 cmp = gen_reg_rtx (mode);
1980 negate = ia64_expand_vecint_compare (code, mode, cmp,
1981 operands[4], operands[5]);
1982
1983 ot = operands[1+negate];
1984 of = operands[2-negate];
1985
1986 if (ot == CONST0_RTX (mode))
1987 {
1988 if (of == CONST0_RTX (mode))
1989 {
1990 emit_move_insn (operands[0], ot);
1991 return;
1992 }
1993
1994 x = gen_rtx_NOT (mode, cmp);
1995 x = gen_rtx_AND (mode, x, of);
1996 emit_insn (gen_rtx_SET (operands[0], x));
1997 }
1998 else if (of == CONST0_RTX (mode))
1999 {
2000 x = gen_rtx_AND (mode, cmp, ot);
2001 emit_insn (gen_rtx_SET (operands[0], x));
2002 }
2003 else
2004 {
2005 rtx t, f;
2006
2007 t = gen_reg_rtx (mode);
2008 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2009 emit_insn (gen_rtx_SET (t, x));
2010
2011 f = gen_reg_rtx (mode);
2012 x = gen_rtx_NOT (mode, cmp);
2013 x = gen_rtx_AND (mode, x, operands[2-negate]);
2014 emit_insn (gen_rtx_SET (f, x));
2015
2016 x = gen_rtx_IOR (mode, t, f);
2017 emit_insn (gen_rtx_SET (operands[0], x));
2018 }
2019 }
2020
2021 /* Emit an integral vector min or max operation. Return true if all done. */
2022
2023 bool
2024 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2025 rtx operands[])
2026 {
2027 rtx xops[6];
2028
2029 /* These four combinations are supported directly. */
2030 if (mode == V8QImode && (code == UMIN || code == UMAX))
2031 return false;
2032 if (mode == V4HImode && (code == SMIN || code == SMAX))
2033 return false;
2034
2035 /* This combination can be implemented with only saturating subtraction. */
2036 if (mode == V4HImode && code == UMAX)
2037 {
2038 rtx x, tmp = gen_reg_rtx (mode);
2039
2040 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2041 emit_insn (gen_rtx_SET (tmp, x));
2042
2043 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2044 return true;
2045 }
2046
2047 /* Everything else implemented via vector comparisons. */
2048 xops[0] = operands[0];
2049 xops[4] = xops[1] = operands[1];
2050 xops[5] = xops[2] = operands[2];
2051
2052 switch (code)
2053 {
2054 case UMIN:
2055 code = LTU;
2056 break;
2057 case UMAX:
2058 code = GTU;
2059 break;
2060 case SMIN:
2061 code = LT;
2062 break;
2063 case SMAX:
2064 code = GT;
2065 break;
2066 default:
2067 gcc_unreachable ();
2068 }
2069 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2070
2071 ia64_expand_vecint_cmov (xops);
2072 return true;
2073 }
2074
2075 /* The vectors LO and HI each contain N halves of a double-wide vector.
2076 Reassemble either the first N/2 or the second N/2 elements. */
2077
2078 void
2079 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2080 {
2081 machine_mode vmode = GET_MODE (lo);
2082 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2083 struct expand_vec_perm_d d;
2084 bool ok;
2085
2086 d.target = gen_lowpart (vmode, out);
2087 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2088 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2089 d.vmode = vmode;
2090 d.nelt = nelt;
2091 d.one_operand_p = false;
2092 d.testing_p = false;
2093
2094 high = (highp ? nelt / 2 : 0);
2095 for (i = 0; i < nelt / 2; ++i)
2096 {
2097 d.perm[i * 2] = i + high;
2098 d.perm[i * 2 + 1] = i + high + nelt;
2099 }
2100
2101 ok = ia64_expand_vec_perm_const_1 (&d);
2102 gcc_assert (ok);
2103 }
2104
2105 /* Return a vector of the sign-extension of VEC. */
2106
2107 static rtx
2108 ia64_unpack_sign (rtx vec, bool unsignedp)
2109 {
2110 machine_mode mode = GET_MODE (vec);
2111 rtx zero = CONST0_RTX (mode);
2112
2113 if (unsignedp)
2114 return zero;
2115 else
2116 {
2117 rtx sign = gen_reg_rtx (mode);
2118 bool neg;
2119
2120 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2121 gcc_assert (!neg);
2122
2123 return sign;
2124 }
2125 }
2126
2127 /* Emit an integral vector unpack operation. */
2128
2129 void
2130 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2131 {
2132 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2133 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2134 }
2135
2136 /* Emit an integral vector widening sum operations. */
2137
2138 void
2139 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2140 {
2141 machine_mode wmode;
2142 rtx l, h, t, sign;
2143
2144 sign = ia64_unpack_sign (operands[1], unsignedp);
2145
2146 wmode = GET_MODE (operands[0]);
2147 l = gen_reg_rtx (wmode);
2148 h = gen_reg_rtx (wmode);
2149
2150 ia64_unpack_assemble (l, operands[1], sign, false);
2151 ia64_unpack_assemble (h, operands[1], sign, true);
2152
2153 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2154 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2155 if (t != operands[0])
2156 emit_move_insn (operands[0], t);
2157 }
2158
2159 /* Emit the appropriate sequence for a call. */
2160
2161 void
2162 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2163 int sibcall_p)
2164 {
2165 rtx insn, b0;
2166
2167 addr = XEXP (addr, 0);
2168 addr = convert_memory_address (DImode, addr);
2169 b0 = gen_rtx_REG (DImode, R_BR (0));
2170
2171 /* ??? Should do this for functions known to bind local too. */
2172 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2173 {
2174 if (sibcall_p)
2175 insn = gen_sibcall_nogp (addr);
2176 else if (! retval)
2177 insn = gen_call_nogp (addr, b0);
2178 else
2179 insn = gen_call_value_nogp (retval, addr, b0);
2180 insn = emit_call_insn (insn);
2181 }
2182 else
2183 {
2184 if (sibcall_p)
2185 insn = gen_sibcall_gp (addr);
2186 else if (! retval)
2187 insn = gen_call_gp (addr, b0);
2188 else
2189 insn = gen_call_value_gp (retval, addr, b0);
2190 insn = emit_call_insn (insn);
2191
2192 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2193 }
2194
2195 if (sibcall_p)
2196 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2197
2198 if (TARGET_ABI_OPEN_VMS)
2199 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2200 gen_rtx_REG (DImode, GR_REG (25)));
2201 }
2202
2203 static void
2204 reg_emitted (enum ia64_frame_regs r)
2205 {
2206 if (emitted_frame_related_regs[r] == 0)
2207 emitted_frame_related_regs[r] = current_frame_info.r[r];
2208 else
2209 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2210 }
2211
2212 static int
2213 get_reg (enum ia64_frame_regs r)
2214 {
2215 reg_emitted (r);
2216 return current_frame_info.r[r];
2217 }
2218
2219 static bool
2220 is_emitted (int regno)
2221 {
2222 unsigned int r;
2223
2224 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2225 if (emitted_frame_related_regs[r] == regno)
2226 return true;
2227 return false;
2228 }
2229
2230 void
2231 ia64_reload_gp (void)
2232 {
2233 rtx tmp;
2234
2235 if (current_frame_info.r[reg_save_gp])
2236 {
2237 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2238 }
2239 else
2240 {
2241 HOST_WIDE_INT offset;
2242 rtx offset_r;
2243
2244 offset = (current_frame_info.spill_cfa_off
2245 + current_frame_info.spill_size);
2246 if (frame_pointer_needed)
2247 {
2248 tmp = hard_frame_pointer_rtx;
2249 offset = -offset;
2250 }
2251 else
2252 {
2253 tmp = stack_pointer_rtx;
2254 offset = current_frame_info.total_size - offset;
2255 }
2256
2257 offset_r = GEN_INT (offset);
2258 if (satisfies_constraint_I (offset_r))
2259 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2260 else
2261 {
2262 emit_move_insn (pic_offset_table_rtx, offset_r);
2263 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2264 pic_offset_table_rtx, tmp));
2265 }
2266
2267 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2268 }
2269
2270 emit_move_insn (pic_offset_table_rtx, tmp);
2271 }
2272
2273 void
2274 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2275 rtx scratch_b, int noreturn_p, int sibcall_p)
2276 {
2277 rtx insn;
2278 bool is_desc = false;
2279
2280 /* If we find we're calling through a register, then we're actually
2281 calling through a descriptor, so load up the values. */
2282 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2283 {
2284 rtx tmp;
2285 bool addr_dead_p;
2286
2287 /* ??? We are currently constrained to *not* use peep2, because
2288 we can legitimately change the global lifetime of the GP
2289 (in the form of killing where previously live). This is
2290 because a call through a descriptor doesn't use the previous
2291 value of the GP, while a direct call does, and we do not
2292 commit to either form until the split here.
2293
2294 That said, this means that we lack precise life info for
2295 whether ADDR is dead after this call. This is not terribly
2296 important, since we can fix things up essentially for free
2297 with the POST_DEC below, but it's nice to not use it when we
2298 can immediately tell it's not necessary. */
2299 addr_dead_p = ((noreturn_p || sibcall_p
2300 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2301 REGNO (addr)))
2302 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2303
2304 /* Load the code address into scratch_b. */
2305 tmp = gen_rtx_POST_INC (Pmode, addr);
2306 tmp = gen_rtx_MEM (Pmode, tmp);
2307 emit_move_insn (scratch_r, tmp);
2308 emit_move_insn (scratch_b, scratch_r);
2309
2310 /* Load the GP address. If ADDR is not dead here, then we must
2311 revert the change made above via the POST_INCREMENT. */
2312 if (!addr_dead_p)
2313 tmp = gen_rtx_POST_DEC (Pmode, addr);
2314 else
2315 tmp = addr;
2316 tmp = gen_rtx_MEM (Pmode, tmp);
2317 emit_move_insn (pic_offset_table_rtx, tmp);
2318
2319 is_desc = true;
2320 addr = scratch_b;
2321 }
2322
2323 if (sibcall_p)
2324 insn = gen_sibcall_nogp (addr);
2325 else if (retval)
2326 insn = gen_call_value_nogp (retval, addr, retaddr);
2327 else
2328 insn = gen_call_nogp (addr, retaddr);
2329 emit_call_insn (insn);
2330
2331 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2332 ia64_reload_gp ();
2333 }
2334
2335 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2336
2337 This differs from the generic code in that we know about the zero-extending
2338 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2339 also know that ld.acq+cmpxchg.rel equals a full barrier.
2340
2341 The loop we want to generate looks like
2342
2343 cmp_reg = mem;
2344 label:
2345 old_reg = cmp_reg;
2346 new_reg = cmp_reg op val;
2347 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2348 if (cmp_reg != old_reg)
2349 goto label;
2350
2351 Note that we only do the plain load from memory once. Subsequent
2352 iterations use the value loaded by the compare-and-swap pattern. */
2353
2354 void
2355 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2356 rtx old_dst, rtx new_dst, enum memmodel model)
2357 {
2358 machine_mode mode = GET_MODE (mem);
2359 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2360 enum insn_code icode;
2361
2362 /* Special case for using fetchadd. */
2363 if ((mode == SImode || mode == DImode)
2364 && (code == PLUS || code == MINUS)
2365 && fetchadd_operand (val, mode))
2366 {
2367 if (code == MINUS)
2368 val = GEN_INT (-INTVAL (val));
2369
2370 if (!old_dst)
2371 old_dst = gen_reg_rtx (mode);
2372
2373 switch (model)
2374 {
2375 case MEMMODEL_ACQ_REL:
2376 case MEMMODEL_SEQ_CST:
2377 case MEMMODEL_SYNC_SEQ_CST:
2378 emit_insn (gen_memory_barrier ());
2379 /* FALLTHRU */
2380 case MEMMODEL_RELAXED:
2381 case MEMMODEL_ACQUIRE:
2382 case MEMMODEL_SYNC_ACQUIRE:
2383 case MEMMODEL_CONSUME:
2384 if (mode == SImode)
2385 icode = CODE_FOR_fetchadd_acq_si;
2386 else
2387 icode = CODE_FOR_fetchadd_acq_di;
2388 break;
2389 case MEMMODEL_RELEASE:
2390 case MEMMODEL_SYNC_RELEASE:
2391 if (mode == SImode)
2392 icode = CODE_FOR_fetchadd_rel_si;
2393 else
2394 icode = CODE_FOR_fetchadd_rel_di;
2395 break;
2396
2397 default:
2398 gcc_unreachable ();
2399 }
2400
2401 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2402
2403 if (new_dst)
2404 {
2405 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2406 true, OPTAB_WIDEN);
2407 if (new_reg != new_dst)
2408 emit_move_insn (new_dst, new_reg);
2409 }
2410 return;
2411 }
2412
2413 /* Because of the volatile mem read, we get an ld.acq, which is the
2414 front half of the full barrier. The end half is the cmpxchg.rel.
2415 For relaxed and release memory models, we don't need this. But we
2416 also don't bother trying to prevent it either. */
2417 gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2418 || MEM_VOLATILE_P (mem));
2419
2420 old_reg = gen_reg_rtx (DImode);
2421 cmp_reg = gen_reg_rtx (DImode);
2422 label = gen_label_rtx ();
2423
2424 if (mode != DImode)
2425 {
2426 val = simplify_gen_subreg (DImode, val, mode, 0);
2427 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2428 }
2429 else
2430 emit_move_insn (cmp_reg, mem);
2431
2432 emit_label (label);
2433
2434 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2435 emit_move_insn (old_reg, cmp_reg);
2436 emit_move_insn (ar_ccv, cmp_reg);
2437
2438 if (old_dst)
2439 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2440
2441 new_reg = cmp_reg;
2442 if (code == NOT)
2443 {
2444 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2445 true, OPTAB_DIRECT);
2446 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2447 }
2448 else
2449 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2450 true, OPTAB_DIRECT);
2451
2452 if (mode != DImode)
2453 new_reg = gen_lowpart (mode, new_reg);
2454 if (new_dst)
2455 emit_move_insn (new_dst, new_reg);
2456
2457 switch (model)
2458 {
2459 case MEMMODEL_RELAXED:
2460 case MEMMODEL_ACQUIRE:
2461 case MEMMODEL_SYNC_ACQUIRE:
2462 case MEMMODEL_CONSUME:
2463 switch (mode)
2464 {
2465 case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2466 case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2467 case E_SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2468 case E_DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2469 default:
2470 gcc_unreachable ();
2471 }
2472 break;
2473
2474 case MEMMODEL_RELEASE:
2475 case MEMMODEL_SYNC_RELEASE:
2476 case MEMMODEL_ACQ_REL:
2477 case MEMMODEL_SEQ_CST:
2478 case MEMMODEL_SYNC_SEQ_CST:
2479 switch (mode)
2480 {
2481 case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2482 case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2483 case E_SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2484 case E_DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2485 default:
2486 gcc_unreachable ();
2487 }
2488 break;
2489
2490 default:
2491 gcc_unreachable ();
2492 }
2493
2494 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2495
2496 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2497 }
2498 \f
2499 /* Begin the assembly file. */
2500
2501 static void
2502 ia64_file_start (void)
2503 {
2504 default_file_start ();
2505 emit_safe_across_calls ();
2506 }
2507
2508 void
2509 emit_safe_across_calls (void)
2510 {
2511 unsigned int rs, re;
2512 int out_state;
2513
2514 rs = 1;
2515 out_state = 0;
2516 while (1)
2517 {
2518 while (rs < 64 && call_used_regs[PR_REG (rs)])
2519 rs++;
2520 if (rs >= 64)
2521 break;
2522 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2523 continue;
2524 if (out_state == 0)
2525 {
2526 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2527 out_state = 1;
2528 }
2529 else
2530 fputc (',', asm_out_file);
2531 if (re == rs + 1)
2532 fprintf (asm_out_file, "p%u", rs);
2533 else
2534 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2535 rs = re + 1;
2536 }
2537 if (out_state)
2538 fputc ('\n', asm_out_file);
2539 }
2540
2541 /* Globalize a declaration. */
2542
2543 static void
2544 ia64_globalize_decl_name (FILE * stream, tree decl)
2545 {
2546 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2547 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2548 if (version_attr)
2549 {
2550 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2551 const char *p = TREE_STRING_POINTER (v);
2552 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2553 }
2554 targetm.asm_out.globalize_label (stream, name);
2555 if (TREE_CODE (decl) == FUNCTION_DECL)
2556 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2557 }
2558
2559 /* Helper function for ia64_compute_frame_size: find an appropriate general
2560 register to spill some special register to. SPECIAL_SPILL_MASK contains
2561 bits in GR0 to GR31 that have already been allocated by this routine.
2562 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2563
2564 static int
2565 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2566 {
2567 int regno;
2568
2569 if (emitted_frame_related_regs[r] != 0)
2570 {
2571 regno = emitted_frame_related_regs[r];
2572 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2573 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2574 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2575 else if (crtl->is_leaf
2576 && regno >= GR_REG (1) && regno <= GR_REG (31))
2577 current_frame_info.gr_used_mask |= 1 << regno;
2578
2579 return regno;
2580 }
2581
2582 /* If this is a leaf function, first try an otherwise unused
2583 call-clobbered register. */
2584 if (crtl->is_leaf)
2585 {
2586 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2587 if (! df_regs_ever_live_p (regno)
2588 && call_used_regs[regno]
2589 && ! fixed_regs[regno]
2590 && ! global_regs[regno]
2591 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2592 && ! is_emitted (regno))
2593 {
2594 current_frame_info.gr_used_mask |= 1 << regno;
2595 return regno;
2596 }
2597 }
2598
2599 if (try_locals)
2600 {
2601 regno = current_frame_info.n_local_regs;
2602 /* If there is a frame pointer, then we can't use loc79, because
2603 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2604 reg_name switching code in ia64_expand_prologue. */
2605 while (regno < (80 - frame_pointer_needed))
2606 if (! is_emitted (LOC_REG (regno++)))
2607 {
2608 current_frame_info.n_local_regs = regno;
2609 return LOC_REG (regno - 1);
2610 }
2611 }
2612
2613 /* Failed to find a general register to spill to. Must use stack. */
2614 return 0;
2615 }
2616
2617 /* In order to make for nice schedules, we try to allocate every temporary
2618 to a different register. We must of course stay away from call-saved,
2619 fixed, and global registers. We must also stay away from registers
2620 allocated in current_frame_info.gr_used_mask, since those include regs
2621 used all through the prologue.
2622
2623 Any register allocated here must be used immediately. The idea is to
2624 aid scheduling, not to solve data flow problems. */
2625
2626 static int last_scratch_gr_reg;
2627
2628 static int
2629 next_scratch_gr_reg (void)
2630 {
2631 int i, regno;
2632
2633 for (i = 0; i < 32; ++i)
2634 {
2635 regno = (last_scratch_gr_reg + i + 1) & 31;
2636 if (call_used_regs[regno]
2637 && ! fixed_regs[regno]
2638 && ! global_regs[regno]
2639 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2640 {
2641 last_scratch_gr_reg = regno;
2642 return regno;
2643 }
2644 }
2645
2646 /* There must be _something_ available. */
2647 gcc_unreachable ();
2648 }
2649
2650 /* Helper function for ia64_compute_frame_size, called through
2651 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2652
2653 static void
2654 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2655 {
2656 unsigned int regno = REGNO (reg);
2657 if (regno < 32)
2658 {
2659 unsigned int i, n = REG_NREGS (reg);
2660 for (i = 0; i < n; ++i)
2661 current_frame_info.gr_used_mask |= 1 << (regno + i);
2662 }
2663 }
2664
2665
2666 /* Returns the number of bytes offset between the frame pointer and the stack
2667 pointer for the current function. SIZE is the number of bytes of space
2668 needed for local variables. */
2669
2670 static void
2671 ia64_compute_frame_size (HOST_WIDE_INT size)
2672 {
2673 HOST_WIDE_INT total_size;
2674 HOST_WIDE_INT spill_size = 0;
2675 HOST_WIDE_INT extra_spill_size = 0;
2676 HOST_WIDE_INT pretend_args_size;
2677 HARD_REG_SET mask;
2678 int n_spilled = 0;
2679 int spilled_gr_p = 0;
2680 int spilled_fr_p = 0;
2681 unsigned int regno;
2682 int min_regno;
2683 int max_regno;
2684 int i;
2685
2686 if (current_frame_info.initialized)
2687 return;
2688
2689 memset (&current_frame_info, 0, sizeof current_frame_info);
2690 CLEAR_HARD_REG_SET (mask);
2691
2692 /* Don't allocate scratches to the return register. */
2693 diddle_return_value (mark_reg_gr_used_mask, NULL);
2694
2695 /* Don't allocate scratches to the EH scratch registers. */
2696 if (cfun->machine->ia64_eh_epilogue_sp)
2697 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2698 if (cfun->machine->ia64_eh_epilogue_bsp)
2699 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2700
2701 /* Static stack checking uses r2 and r3. */
2702 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2703 current_frame_info.gr_used_mask |= 0xc;
2704
2705 /* Find the size of the register stack frame. We have only 80 local
2706 registers, because we reserve 8 for the inputs and 8 for the
2707 outputs. */
2708
2709 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2710 since we'll be adjusting that down later. */
2711 regno = LOC_REG (78) + ! frame_pointer_needed;
2712 for (; regno >= LOC_REG (0); regno--)
2713 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2714 break;
2715 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2716
2717 /* For functions marked with the syscall_linkage attribute, we must mark
2718 all eight input registers as in use, so that locals aren't visible to
2719 the caller. */
2720
2721 if (cfun->machine->n_varargs > 0
2722 || lookup_attribute ("syscall_linkage",
2723 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2724 current_frame_info.n_input_regs = 8;
2725 else
2726 {
2727 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2728 if (df_regs_ever_live_p (regno))
2729 break;
2730 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2731 }
2732
2733 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2734 if (df_regs_ever_live_p (regno))
2735 break;
2736 i = regno - OUT_REG (0) + 1;
2737
2738 #ifndef PROFILE_HOOK
2739 /* When -p profiling, we need one output register for the mcount argument.
2740 Likewise for -a profiling for the bb_init_func argument. For -ax
2741 profiling, we need two output registers for the two bb_init_trace_func
2742 arguments. */
2743 if (crtl->profile)
2744 i = MAX (i, 1);
2745 #endif
2746 current_frame_info.n_output_regs = i;
2747
2748 /* ??? No rotating register support yet. */
2749 current_frame_info.n_rotate_regs = 0;
2750
2751 /* Discover which registers need spilling, and how much room that
2752 will take. Begin with floating point and general registers,
2753 which will always wind up on the stack. */
2754
2755 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2756 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2757 {
2758 SET_HARD_REG_BIT (mask, regno);
2759 spill_size += 16;
2760 n_spilled += 1;
2761 spilled_fr_p = 1;
2762 }
2763
2764 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2765 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2766 {
2767 SET_HARD_REG_BIT (mask, regno);
2768 spill_size += 8;
2769 n_spilled += 1;
2770 spilled_gr_p = 1;
2771 }
2772
2773 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2774 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2775 {
2776 SET_HARD_REG_BIT (mask, regno);
2777 spill_size += 8;
2778 n_spilled += 1;
2779 }
2780
2781 /* Now come all special registers that might get saved in other
2782 general registers. */
2783
2784 if (frame_pointer_needed)
2785 {
2786 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2787 /* If we did not get a register, then we take LOC79. This is guaranteed
2788 to be free, even if regs_ever_live is already set, because this is
2789 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2790 as we don't count loc79 above. */
2791 if (current_frame_info.r[reg_fp] == 0)
2792 {
2793 current_frame_info.r[reg_fp] = LOC_REG (79);
2794 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2795 }
2796 }
2797
2798 if (! crtl->is_leaf)
2799 {
2800 /* Emit a save of BR0 if we call other functions. Do this even
2801 if this function doesn't return, as EH depends on this to be
2802 able to unwind the stack. */
2803 SET_HARD_REG_BIT (mask, BR_REG (0));
2804
2805 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2806 if (current_frame_info.r[reg_save_b0] == 0)
2807 {
2808 extra_spill_size += 8;
2809 n_spilled += 1;
2810 }
2811
2812 /* Similarly for ar.pfs. */
2813 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2814 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2815 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2816 {
2817 extra_spill_size += 8;
2818 n_spilled += 1;
2819 }
2820
2821 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2822 registers are clobbered, so we fall back to the stack. */
2823 current_frame_info.r[reg_save_gp]
2824 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2825 if (current_frame_info.r[reg_save_gp] == 0)
2826 {
2827 SET_HARD_REG_BIT (mask, GR_REG (1));
2828 spill_size += 8;
2829 n_spilled += 1;
2830 }
2831 }
2832 else
2833 {
2834 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2835 {
2836 SET_HARD_REG_BIT (mask, BR_REG (0));
2837 extra_spill_size += 8;
2838 n_spilled += 1;
2839 }
2840
2841 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2842 {
2843 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2844 current_frame_info.r[reg_save_ar_pfs]
2845 = find_gr_spill (reg_save_ar_pfs, 1);
2846 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2847 {
2848 extra_spill_size += 8;
2849 n_spilled += 1;
2850 }
2851 }
2852 }
2853
2854 /* Unwind descriptor hackery: things are most efficient if we allocate
2855 consecutive GR save registers for RP, PFS, FP in that order. However,
2856 it is absolutely critical that FP get the only hard register that's
2857 guaranteed to be free, so we allocated it first. If all three did
2858 happen to be allocated hard regs, and are consecutive, rearrange them
2859 into the preferred order now.
2860
2861 If we have already emitted code for any of those registers,
2862 then it's already too late to change. */
2863 min_regno = MIN (current_frame_info.r[reg_fp],
2864 MIN (current_frame_info.r[reg_save_b0],
2865 current_frame_info.r[reg_save_ar_pfs]));
2866 max_regno = MAX (current_frame_info.r[reg_fp],
2867 MAX (current_frame_info.r[reg_save_b0],
2868 current_frame_info.r[reg_save_ar_pfs]));
2869 if (min_regno > 0
2870 && min_regno + 2 == max_regno
2871 && (current_frame_info.r[reg_fp] == min_regno + 1
2872 || current_frame_info.r[reg_save_b0] == min_regno + 1
2873 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2874 && (emitted_frame_related_regs[reg_save_b0] == 0
2875 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2876 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2877 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2878 && (emitted_frame_related_regs[reg_fp] == 0
2879 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2880 {
2881 current_frame_info.r[reg_save_b0] = min_regno;
2882 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2883 current_frame_info.r[reg_fp] = min_regno + 2;
2884 }
2885
2886 /* See if we need to store the predicate register block. */
2887 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2888 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2889 break;
2890 if (regno <= PR_REG (63))
2891 {
2892 SET_HARD_REG_BIT (mask, PR_REG (0));
2893 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2894 if (current_frame_info.r[reg_save_pr] == 0)
2895 {
2896 extra_spill_size += 8;
2897 n_spilled += 1;
2898 }
2899
2900 /* ??? Mark them all as used so that register renaming and such
2901 are free to use them. */
2902 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2903 df_set_regs_ever_live (regno, true);
2904 }
2905
2906 /* If we're forced to use st8.spill, we're forced to save and restore
2907 ar.unat as well. The check for existing liveness allows inline asm
2908 to touch ar.unat. */
2909 if (spilled_gr_p || cfun->machine->n_varargs
2910 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2911 {
2912 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2913 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2914 current_frame_info.r[reg_save_ar_unat]
2915 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2916 if (current_frame_info.r[reg_save_ar_unat] == 0)
2917 {
2918 extra_spill_size += 8;
2919 n_spilled += 1;
2920 }
2921 }
2922
2923 if (df_regs_ever_live_p (AR_LC_REGNUM))
2924 {
2925 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2926 current_frame_info.r[reg_save_ar_lc]
2927 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2928 if (current_frame_info.r[reg_save_ar_lc] == 0)
2929 {
2930 extra_spill_size += 8;
2931 n_spilled += 1;
2932 }
2933 }
2934
2935 /* If we have an odd number of words of pretend arguments written to
2936 the stack, then the FR save area will be unaligned. We round the
2937 size of this area up to keep things 16 byte aligned. */
2938 if (spilled_fr_p)
2939 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2940 else
2941 pretend_args_size = crtl->args.pretend_args_size;
2942
2943 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2944 + crtl->outgoing_args_size);
2945 total_size = IA64_STACK_ALIGN (total_size);
2946
2947 /* We always use the 16-byte scratch area provided by the caller, but
2948 if we are a leaf function, there's no one to which we need to provide
2949 a scratch area. However, if the function allocates dynamic stack space,
2950 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2951 so we need to cope. */
2952 if (crtl->is_leaf && !cfun->calls_alloca)
2953 total_size = MAX (0, total_size - 16);
2954
2955 current_frame_info.total_size = total_size;
2956 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2957 current_frame_info.spill_size = spill_size;
2958 current_frame_info.extra_spill_size = extra_spill_size;
2959 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2960 current_frame_info.n_spilled = n_spilled;
2961 current_frame_info.initialized = reload_completed;
2962 }
2963
2964 /* Worker function for TARGET_CAN_ELIMINATE. */
2965
2966 bool
2967 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2968 {
2969 return (to == BR_REG (0) ? crtl->is_leaf : true);
2970 }
2971
2972 /* Compute the initial difference between the specified pair of registers. */
2973
2974 HOST_WIDE_INT
2975 ia64_initial_elimination_offset (int from, int to)
2976 {
2977 HOST_WIDE_INT offset;
2978
2979 ia64_compute_frame_size (get_frame_size ());
2980 switch (from)
2981 {
2982 case FRAME_POINTER_REGNUM:
2983 switch (to)
2984 {
2985 case HARD_FRAME_POINTER_REGNUM:
2986 offset = -current_frame_info.total_size;
2987 if (!crtl->is_leaf || cfun->calls_alloca)
2988 offset += 16 + crtl->outgoing_args_size;
2989 break;
2990
2991 case STACK_POINTER_REGNUM:
2992 offset = 0;
2993 if (!crtl->is_leaf || cfun->calls_alloca)
2994 offset += 16 + crtl->outgoing_args_size;
2995 break;
2996
2997 default:
2998 gcc_unreachable ();
2999 }
3000 break;
3001
3002 case ARG_POINTER_REGNUM:
3003 /* Arguments start above the 16 byte save area, unless stdarg
3004 in which case we store through the 16 byte save area. */
3005 switch (to)
3006 {
3007 case HARD_FRAME_POINTER_REGNUM:
3008 offset = 16 - crtl->args.pretend_args_size;
3009 break;
3010
3011 case STACK_POINTER_REGNUM:
3012 offset = (current_frame_info.total_size
3013 + 16 - crtl->args.pretend_args_size);
3014 break;
3015
3016 default:
3017 gcc_unreachable ();
3018 }
3019 break;
3020
3021 default:
3022 gcc_unreachable ();
3023 }
3024
3025 return offset;
3026 }
3027
3028 /* If there are more than a trivial number of register spills, we use
3029 two interleaved iterators so that we can get two memory references
3030 per insn group.
3031
3032 In order to simplify things in the prologue and epilogue expanders,
3033 we use helper functions to fix up the memory references after the
3034 fact with the appropriate offsets to a POST_MODIFY memory mode.
3035 The following data structure tracks the state of the two iterators
3036 while insns are being emitted. */
3037
3038 struct spill_fill_data
3039 {
3040 rtx_insn *init_after; /* point at which to emit initializations */
3041 rtx init_reg[2]; /* initial base register */
3042 rtx iter_reg[2]; /* the iterator registers */
3043 rtx *prev_addr[2]; /* address of last memory use */
3044 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
3045 HOST_WIDE_INT prev_off[2]; /* last offset */
3046 int n_iter; /* number of iterators in use */
3047 int next_iter; /* next iterator to use */
3048 unsigned int save_gr_used_mask;
3049 };
3050
3051 static struct spill_fill_data spill_fill_data;
3052
3053 static void
3054 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3055 {
3056 int i;
3057
3058 spill_fill_data.init_after = get_last_insn ();
3059 spill_fill_data.init_reg[0] = init_reg;
3060 spill_fill_data.init_reg[1] = init_reg;
3061 spill_fill_data.prev_addr[0] = NULL;
3062 spill_fill_data.prev_addr[1] = NULL;
3063 spill_fill_data.prev_insn[0] = NULL;
3064 spill_fill_data.prev_insn[1] = NULL;
3065 spill_fill_data.prev_off[0] = cfa_off;
3066 spill_fill_data.prev_off[1] = cfa_off;
3067 spill_fill_data.next_iter = 0;
3068 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3069
3070 spill_fill_data.n_iter = 1 + (n_spills > 2);
3071 for (i = 0; i < spill_fill_data.n_iter; ++i)
3072 {
3073 int regno = next_scratch_gr_reg ();
3074 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3075 current_frame_info.gr_used_mask |= 1 << regno;
3076 }
3077 }
3078
3079 static void
3080 finish_spill_pointers (void)
3081 {
3082 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3083 }
3084
3085 static rtx
3086 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3087 {
3088 int iter = spill_fill_data.next_iter;
3089 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3090 rtx disp_rtx = GEN_INT (disp);
3091 rtx mem;
3092
3093 if (spill_fill_data.prev_addr[iter])
3094 {
3095 if (satisfies_constraint_N (disp_rtx))
3096 {
3097 *spill_fill_data.prev_addr[iter]
3098 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3099 gen_rtx_PLUS (DImode,
3100 spill_fill_data.iter_reg[iter],
3101 disp_rtx));
3102 add_reg_note (spill_fill_data.prev_insn[iter],
3103 REG_INC, spill_fill_data.iter_reg[iter]);
3104 }
3105 else
3106 {
3107 /* ??? Could use register post_modify for loads. */
3108 if (!satisfies_constraint_I (disp_rtx))
3109 {
3110 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3111 emit_move_insn (tmp, disp_rtx);
3112 disp_rtx = tmp;
3113 }
3114 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3115 spill_fill_data.iter_reg[iter], disp_rtx));
3116 }
3117 }
3118 /* Micro-optimization: if we've created a frame pointer, it's at
3119 CFA 0, which may allow the real iterator to be initialized lower,
3120 slightly increasing parallelism. Also, if there are few saves
3121 it may eliminate the iterator entirely. */
3122 else if (disp == 0
3123 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3124 && frame_pointer_needed)
3125 {
3126 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3127 set_mem_alias_set (mem, get_varargs_alias_set ());
3128 return mem;
3129 }
3130 else
3131 {
3132 rtx seq;
3133 rtx_insn *insn;
3134
3135 if (disp == 0)
3136 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3137 spill_fill_data.init_reg[iter]);
3138 else
3139 {
3140 start_sequence ();
3141
3142 if (!satisfies_constraint_I (disp_rtx))
3143 {
3144 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3145 emit_move_insn (tmp, disp_rtx);
3146 disp_rtx = tmp;
3147 }
3148
3149 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3150 spill_fill_data.init_reg[iter],
3151 disp_rtx));
3152
3153 seq = get_insns ();
3154 end_sequence ();
3155 }
3156
3157 /* Careful for being the first insn in a sequence. */
3158 if (spill_fill_data.init_after)
3159 insn = emit_insn_after (seq, spill_fill_data.init_after);
3160 else
3161 {
3162 rtx_insn *first = get_insns ();
3163 if (first)
3164 insn = emit_insn_before (seq, first);
3165 else
3166 insn = emit_insn (seq);
3167 }
3168 spill_fill_data.init_after = insn;
3169 }
3170
3171 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3172
3173 /* ??? Not all of the spills are for varargs, but some of them are.
3174 The rest of the spills belong in an alias set of their own. But
3175 it doesn't actually hurt to include them here. */
3176 set_mem_alias_set (mem, get_varargs_alias_set ());
3177
3178 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3179 spill_fill_data.prev_off[iter] = cfa_off;
3180
3181 if (++iter >= spill_fill_data.n_iter)
3182 iter = 0;
3183 spill_fill_data.next_iter = iter;
3184
3185 return mem;
3186 }
3187
3188 static void
3189 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3190 rtx frame_reg)
3191 {
3192 int iter = spill_fill_data.next_iter;
3193 rtx mem;
3194 rtx_insn *insn;
3195
3196 mem = spill_restore_mem (reg, cfa_off);
3197 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3198 spill_fill_data.prev_insn[iter] = insn;
3199
3200 if (frame_reg)
3201 {
3202 rtx base;
3203 HOST_WIDE_INT off;
3204
3205 RTX_FRAME_RELATED_P (insn) = 1;
3206
3207 /* Don't even pretend that the unwind code can intuit its way
3208 through a pair of interleaved post_modify iterators. Just
3209 provide the correct answer. */
3210
3211 if (frame_pointer_needed)
3212 {
3213 base = hard_frame_pointer_rtx;
3214 off = - cfa_off;
3215 }
3216 else
3217 {
3218 base = stack_pointer_rtx;
3219 off = current_frame_info.total_size - cfa_off;
3220 }
3221
3222 add_reg_note (insn, REG_CFA_OFFSET,
3223 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3224 plus_constant (Pmode,
3225 base, off)),
3226 frame_reg));
3227 }
3228 }
3229
3230 static void
3231 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3232 {
3233 int iter = spill_fill_data.next_iter;
3234 rtx_insn *insn;
3235
3236 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3237 GEN_INT (cfa_off)));
3238 spill_fill_data.prev_insn[iter] = insn;
3239 }
3240
3241 /* Wrapper functions that discards the CONST_INT spill offset. These
3242 exist so that we can give gr_spill/gr_fill the offset they need and
3243 use a consistent function interface. */
3244
3245 static rtx
3246 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3247 {
3248 return gen_movdi (dest, src);
3249 }
3250
3251 static rtx
3252 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3253 {
3254 return gen_fr_spill (dest, src);
3255 }
3256
3257 static rtx
3258 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3259 {
3260 return gen_fr_restore (dest, src);
3261 }
3262
3263 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3264
3265 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3266 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3267
3268 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3269 inclusive. These are offsets from the current stack pointer. BS_SIZE
3270 is the size of the backing store. ??? This clobbers r2 and r3. */
3271
3272 static void
3273 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3274 int bs_size)
3275 {
3276 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3277 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3278 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3279
3280 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3281 of the Register Stack Engine. We also need to probe it after checking
3282 that the 2 stacks don't overlap. */
3283 emit_insn (gen_bsp_value (r3));
3284 emit_move_insn (r2, GEN_INT (-(first + size)));
3285
3286 /* Compare current value of BSP and SP registers. */
3287 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3288 r3, stack_pointer_rtx)));
3289
3290 /* Compute the address of the probe for the Backing Store (which grows
3291 towards higher addresses). We probe only at the first offset of
3292 the next page because some OS (eg Linux/ia64) only extend the
3293 backing store when this specific address is hit (but generate a SEGV
3294 on other address). Page size is the worst case (4KB). The reserve
3295 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3296 Also compute the address of the last probe for the memory stack
3297 (which grows towards lower addresses). */
3298 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3299 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3300
3301 /* Compare them and raise SEGV if the former has topped the latter. */
3302 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3303 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3304 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3305 r3, r2))));
3306 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3307 const0_rtx),
3308 const0_rtx));
3309 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3310 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3311 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3312 GEN_INT (11))));
3313
3314 /* Probe the Backing Store if necessary. */
3315 if (bs_size > 0)
3316 emit_stack_probe (r3);
3317
3318 /* Probe the memory stack if necessary. */
3319 if (size == 0)
3320 ;
3321
3322 /* See if we have a constant small number of probes to generate. If so,
3323 that's the easy case. */
3324 else if (size <= PROBE_INTERVAL)
3325 emit_stack_probe (r2);
3326
3327 /* The run-time loop is made up of 9 insns in the generic case while this
3328 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3329 else if (size <= 4 * PROBE_INTERVAL)
3330 {
3331 HOST_WIDE_INT i;
3332
3333 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3334 emit_insn (gen_rtx_SET (r2,
3335 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3336 emit_stack_probe (r2);
3337
3338 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3339 it exceeds SIZE. If only two probes are needed, this will not
3340 generate any code. Then probe at FIRST + SIZE. */
3341 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3342 {
3343 emit_insn (gen_rtx_SET (r2,
3344 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3345 emit_stack_probe (r2);
3346 }
3347
3348 emit_insn (gen_rtx_SET (r2,
3349 plus_constant (Pmode, r2,
3350 (i - PROBE_INTERVAL) - size)));
3351 emit_stack_probe (r2);
3352 }
3353
3354 /* Otherwise, do the same as above, but in a loop. Note that we must be
3355 extra careful with variables wrapping around because we might be at
3356 the very top (or the very bottom) of the address space and we have
3357 to be able to handle this case properly; in particular, we use an
3358 equality test for the loop condition. */
3359 else
3360 {
3361 HOST_WIDE_INT rounded_size;
3362
3363 emit_move_insn (r2, GEN_INT (-first));
3364
3365
3366 /* Step 1: round SIZE to the previous multiple of the interval. */
3367
3368 rounded_size = size & -PROBE_INTERVAL;
3369
3370
3371 /* Step 2: compute initial and final value of the loop counter. */
3372
3373 /* TEST_ADDR = SP + FIRST. */
3374 emit_insn (gen_rtx_SET (r2,
3375 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3376
3377 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3378 if (rounded_size > (1 << 21))
3379 {
3380 emit_move_insn (r3, GEN_INT (-rounded_size));
3381 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3382 }
3383 else
3384 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3385 GEN_INT (-rounded_size))));
3386
3387
3388 /* Step 3: the loop
3389
3390 do
3391 {
3392 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3393 probe at TEST_ADDR
3394 }
3395 while (TEST_ADDR != LAST_ADDR)
3396
3397 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3398 until it is equal to ROUNDED_SIZE. */
3399
3400 emit_insn (gen_probe_stack_range (r2, r2, r3));
3401
3402
3403 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3404 that SIZE is equal to ROUNDED_SIZE. */
3405
3406 /* TEMP = SIZE - ROUNDED_SIZE. */
3407 if (size != rounded_size)
3408 {
3409 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3410 rounded_size - size)));
3411 emit_stack_probe (r2);
3412 }
3413 }
3414
3415 /* Make sure nothing is scheduled before we are done. */
3416 emit_insn (gen_blockage ());
3417 }
3418
3419 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3420 absolute addresses. */
3421
3422 const char *
3423 output_probe_stack_range (rtx reg1, rtx reg2)
3424 {
3425 static int labelno = 0;
3426 char loop_lab[32];
3427 rtx xops[3];
3428
3429 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3430
3431 /* Loop. */
3432 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3433
3434 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3435 xops[0] = reg1;
3436 xops[1] = GEN_INT (-PROBE_INTERVAL);
3437 output_asm_insn ("addl %0 = %1, %0", xops);
3438 fputs ("\t;;\n", asm_out_file);
3439
3440 /* Probe at TEST_ADDR. */
3441 output_asm_insn ("probe.w.fault %0, 0", xops);
3442
3443 /* Test if TEST_ADDR == LAST_ADDR. */
3444 xops[1] = reg2;
3445 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3446 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3447
3448 /* Branch. */
3449 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
3450 assemble_name_raw (asm_out_file, loop_lab);
3451 fputc ('\n', asm_out_file);
3452
3453 return "";
3454 }
3455
3456 /* Called after register allocation to add any instructions needed for the
3457 prologue. Using a prologue insn is favored compared to putting all of the
3458 instructions in output_function_prologue(), since it allows the scheduler
3459 to intermix instructions with the saves of the caller saved registers. In
3460 some cases, it might be necessary to emit a barrier instruction as the last
3461 insn to prevent such scheduling.
3462
3463 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3464 so that the debug info generation code can handle them properly.
3465
3466 The register save area is laid out like so:
3467 cfa+16
3468 [ varargs spill area ]
3469 [ fr register spill area ]
3470 [ br register spill area ]
3471 [ ar register spill area ]
3472 [ pr register spill area ]
3473 [ gr register spill area ] */
3474
3475 /* ??? Get inefficient code when the frame size is larger than can fit in an
3476 adds instruction. */
3477
3478 void
3479 ia64_expand_prologue (void)
3480 {
3481 rtx_insn *insn;
3482 rtx ar_pfs_save_reg, ar_unat_save_reg;
3483 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3484 rtx reg, alt_reg;
3485
3486 ia64_compute_frame_size (get_frame_size ());
3487 last_scratch_gr_reg = 15;
3488
3489 if (flag_stack_usage_info)
3490 current_function_static_stack_size = current_frame_info.total_size;
3491
3492 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3493 {
3494 HOST_WIDE_INT size = current_frame_info.total_size;
3495 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3496 + current_frame_info.n_local_regs);
3497
3498 if (crtl->is_leaf && !cfun->calls_alloca)
3499 {
3500 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3501 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3502 size - STACK_CHECK_PROTECT,
3503 bs_size);
3504 else if (size + bs_size > STACK_CHECK_PROTECT)
3505 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3506 }
3507 else if (size + bs_size > 0)
3508 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3509 }
3510
3511 if (dump_file)
3512 {
3513 fprintf (dump_file, "ia64 frame related registers "
3514 "recorded in current_frame_info.r[]:\n");
3515 #define PRINTREG(a) if (current_frame_info.r[a]) \
3516 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3517 PRINTREG(reg_fp);
3518 PRINTREG(reg_save_b0);
3519 PRINTREG(reg_save_pr);
3520 PRINTREG(reg_save_ar_pfs);
3521 PRINTREG(reg_save_ar_unat);
3522 PRINTREG(reg_save_ar_lc);
3523 PRINTREG(reg_save_gp);
3524 #undef PRINTREG
3525 }
3526
3527 /* If there is no epilogue, then we don't need some prologue insns.
3528 We need to avoid emitting the dead prologue insns, because flow
3529 will complain about them. */
3530 if (optimize)
3531 {
3532 edge e;
3533 edge_iterator ei;
3534
3535 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3536 if ((e->flags & EDGE_FAKE) == 0
3537 && (e->flags & EDGE_FALLTHRU) != 0)
3538 break;
3539 epilogue_p = (e != NULL);
3540 }
3541 else
3542 epilogue_p = 1;
3543
3544 /* Set the local, input, and output register names. We need to do this
3545 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3546 half. If we use in/loc/out register names, then we get assembler errors
3547 in crtn.S because there is no alloc insn or regstk directive in there. */
3548 if (! TARGET_REG_NAMES)
3549 {
3550 int inputs = current_frame_info.n_input_regs;
3551 int locals = current_frame_info.n_local_regs;
3552 int outputs = current_frame_info.n_output_regs;
3553
3554 for (i = 0; i < inputs; i++)
3555 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3556 for (i = 0; i < locals; i++)
3557 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3558 for (i = 0; i < outputs; i++)
3559 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3560 }
3561
3562 /* Set the frame pointer register name. The regnum is logically loc79,
3563 but of course we'll not have allocated that many locals. Rather than
3564 worrying about renumbering the existing rtxs, we adjust the name. */
3565 /* ??? This code means that we can never use one local register when
3566 there is a frame pointer. loc79 gets wasted in this case, as it is
3567 renamed to a register that will never be used. See also the try_locals
3568 code in find_gr_spill. */
3569 if (current_frame_info.r[reg_fp])
3570 {
3571 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3572 reg_names[HARD_FRAME_POINTER_REGNUM]
3573 = reg_names[current_frame_info.r[reg_fp]];
3574 reg_names[current_frame_info.r[reg_fp]] = tmp;
3575 }
3576
3577 /* We don't need an alloc instruction if we've used no outputs or locals. */
3578 if (current_frame_info.n_local_regs == 0
3579 && current_frame_info.n_output_regs == 0
3580 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3581 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3582 {
3583 /* If there is no alloc, but there are input registers used, then we
3584 need a .regstk directive. */
3585 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3586 ar_pfs_save_reg = NULL_RTX;
3587 }
3588 else
3589 {
3590 current_frame_info.need_regstk = 0;
3591
3592 if (current_frame_info.r[reg_save_ar_pfs])
3593 {
3594 regno = current_frame_info.r[reg_save_ar_pfs];
3595 reg_emitted (reg_save_ar_pfs);
3596 }
3597 else
3598 regno = next_scratch_gr_reg ();
3599 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3600
3601 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3602 GEN_INT (current_frame_info.n_input_regs),
3603 GEN_INT (current_frame_info.n_local_regs),
3604 GEN_INT (current_frame_info.n_output_regs),
3605 GEN_INT (current_frame_info.n_rotate_regs)));
3606 if (current_frame_info.r[reg_save_ar_pfs])
3607 {
3608 RTX_FRAME_RELATED_P (insn) = 1;
3609 add_reg_note (insn, REG_CFA_REGISTER,
3610 gen_rtx_SET (ar_pfs_save_reg,
3611 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3612 }
3613 }
3614
3615 /* Set up frame pointer, stack pointer, and spill iterators. */
3616
3617 n_varargs = cfun->machine->n_varargs;
3618 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3619 stack_pointer_rtx, 0);
3620
3621 if (frame_pointer_needed)
3622 {
3623 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3624 RTX_FRAME_RELATED_P (insn) = 1;
3625
3626 /* Force the unwind info to recognize this as defining a new CFA,
3627 rather than some temp register setup. */
3628 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3629 }
3630
3631 if (current_frame_info.total_size != 0)
3632 {
3633 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3634 rtx offset;
3635
3636 if (satisfies_constraint_I (frame_size_rtx))
3637 offset = frame_size_rtx;
3638 else
3639 {
3640 regno = next_scratch_gr_reg ();
3641 offset = gen_rtx_REG (DImode, regno);
3642 emit_move_insn (offset, frame_size_rtx);
3643 }
3644
3645 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3646 stack_pointer_rtx, offset));
3647
3648 if (! frame_pointer_needed)
3649 {
3650 RTX_FRAME_RELATED_P (insn) = 1;
3651 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3652 gen_rtx_SET (stack_pointer_rtx,
3653 gen_rtx_PLUS (DImode,
3654 stack_pointer_rtx,
3655 frame_size_rtx)));
3656 }
3657
3658 /* ??? At this point we must generate a magic insn that appears to
3659 modify the stack pointer, the frame pointer, and all spill
3660 iterators. This would allow the most scheduling freedom. For
3661 now, just hard stop. */
3662 emit_insn (gen_blockage ());
3663 }
3664
3665 /* Must copy out ar.unat before doing any integer spills. */
3666 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3667 {
3668 if (current_frame_info.r[reg_save_ar_unat])
3669 {
3670 ar_unat_save_reg
3671 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3672 reg_emitted (reg_save_ar_unat);
3673 }
3674 else
3675 {
3676 alt_regno = next_scratch_gr_reg ();
3677 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3678 current_frame_info.gr_used_mask |= 1 << alt_regno;
3679 }
3680
3681 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3682 insn = emit_move_insn (ar_unat_save_reg, reg);
3683 if (current_frame_info.r[reg_save_ar_unat])
3684 {
3685 RTX_FRAME_RELATED_P (insn) = 1;
3686 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3687 }
3688
3689 /* Even if we're not going to generate an epilogue, we still
3690 need to save the register so that EH works. */
3691 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3692 emit_insn (gen_prologue_use (ar_unat_save_reg));
3693 }
3694 else
3695 ar_unat_save_reg = NULL_RTX;
3696
3697 /* Spill all varargs registers. Do this before spilling any GR registers,
3698 since we want the UNAT bits for the GR registers to override the UNAT
3699 bits from varargs, which we don't care about. */
3700
3701 cfa_off = -16;
3702 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3703 {
3704 reg = gen_rtx_REG (DImode, regno);
3705 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3706 }
3707
3708 /* Locate the bottom of the register save area. */
3709 cfa_off = (current_frame_info.spill_cfa_off
3710 + current_frame_info.spill_size
3711 + current_frame_info.extra_spill_size);
3712
3713 /* Save the predicate register block either in a register or in memory. */
3714 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3715 {
3716 reg = gen_rtx_REG (DImode, PR_REG (0));
3717 if (current_frame_info.r[reg_save_pr] != 0)
3718 {
3719 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3720 reg_emitted (reg_save_pr);
3721 insn = emit_move_insn (alt_reg, reg);
3722
3723 /* ??? Denote pr spill/fill by a DImode move that modifies all
3724 64 hard registers. */
3725 RTX_FRAME_RELATED_P (insn) = 1;
3726 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3727
3728 /* Even if we're not going to generate an epilogue, we still
3729 need to save the register so that EH works. */
3730 if (! epilogue_p)
3731 emit_insn (gen_prologue_use (alt_reg));
3732 }
3733 else
3734 {
3735 alt_regno = next_scratch_gr_reg ();
3736 alt_reg = gen_rtx_REG (DImode, alt_regno);
3737 insn = emit_move_insn (alt_reg, reg);
3738 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3739 cfa_off -= 8;
3740 }
3741 }
3742
3743 /* Handle AR regs in numerical order. All of them get special handling. */
3744 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3745 && current_frame_info.r[reg_save_ar_unat] == 0)
3746 {
3747 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3748 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3749 cfa_off -= 8;
3750 }
3751
3752 /* The alloc insn already copied ar.pfs into a general register. The
3753 only thing we have to do now is copy that register to a stack slot
3754 if we'd not allocated a local register for the job. */
3755 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3756 && current_frame_info.r[reg_save_ar_pfs] == 0)
3757 {
3758 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3759 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3760 cfa_off -= 8;
3761 }
3762
3763 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3764 {
3765 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3766 if (current_frame_info.r[reg_save_ar_lc] != 0)
3767 {
3768 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3769 reg_emitted (reg_save_ar_lc);
3770 insn = emit_move_insn (alt_reg, reg);
3771 RTX_FRAME_RELATED_P (insn) = 1;
3772 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3773
3774 /* Even if we're not going to generate an epilogue, we still
3775 need to save the register so that EH works. */
3776 if (! epilogue_p)
3777 emit_insn (gen_prologue_use (alt_reg));
3778 }
3779 else
3780 {
3781 alt_regno = next_scratch_gr_reg ();
3782 alt_reg = gen_rtx_REG (DImode, alt_regno);
3783 emit_move_insn (alt_reg, reg);
3784 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3785 cfa_off -= 8;
3786 }
3787 }
3788
3789 /* Save the return pointer. */
3790 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3791 {
3792 reg = gen_rtx_REG (DImode, BR_REG (0));
3793 if (current_frame_info.r[reg_save_b0] != 0)
3794 {
3795 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3796 reg_emitted (reg_save_b0);
3797 insn = emit_move_insn (alt_reg, reg);
3798 RTX_FRAME_RELATED_P (insn) = 1;
3799 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3800
3801 /* Even if we're not going to generate an epilogue, we still
3802 need to save the register so that EH works. */
3803 if (! epilogue_p)
3804 emit_insn (gen_prologue_use (alt_reg));
3805 }
3806 else
3807 {
3808 alt_regno = next_scratch_gr_reg ();
3809 alt_reg = gen_rtx_REG (DImode, alt_regno);
3810 emit_move_insn (alt_reg, reg);
3811 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3812 cfa_off -= 8;
3813 }
3814 }
3815
3816 if (current_frame_info.r[reg_save_gp])
3817 {
3818 reg_emitted (reg_save_gp);
3819 insn = emit_move_insn (gen_rtx_REG (DImode,
3820 current_frame_info.r[reg_save_gp]),
3821 pic_offset_table_rtx);
3822 }
3823
3824 /* We should now be at the base of the gr/br/fr spill area. */
3825 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3826 + current_frame_info.spill_size));
3827
3828 /* Spill all general registers. */
3829 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3830 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3831 {
3832 reg = gen_rtx_REG (DImode, regno);
3833 do_spill (gen_gr_spill, reg, cfa_off, reg);
3834 cfa_off -= 8;
3835 }
3836
3837 /* Spill the rest of the BR registers. */
3838 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3839 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3840 {
3841 alt_regno = next_scratch_gr_reg ();
3842 alt_reg = gen_rtx_REG (DImode, alt_regno);
3843 reg = gen_rtx_REG (DImode, regno);
3844 emit_move_insn (alt_reg, reg);
3845 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3846 cfa_off -= 8;
3847 }
3848
3849 /* Align the frame and spill all FR registers. */
3850 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3851 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3852 {
3853 gcc_assert (!(cfa_off & 15));
3854 reg = gen_rtx_REG (XFmode, regno);
3855 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3856 cfa_off -= 16;
3857 }
3858
3859 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3860
3861 finish_spill_pointers ();
3862 }
3863
3864 /* Output the textual info surrounding the prologue. */
3865
3866 void
3867 ia64_start_function (FILE *file, const char *fnname,
3868 tree decl ATTRIBUTE_UNUSED)
3869 {
3870 #if TARGET_ABI_OPEN_VMS
3871 vms_start_function (fnname);
3872 #endif
3873
3874 fputs ("\t.proc ", file);
3875 assemble_name (file, fnname);
3876 fputc ('\n', file);
3877 ASM_OUTPUT_LABEL (file, fnname);
3878 }
3879
3880 /* Called after register allocation to add any instructions needed for the
3881 epilogue. Using an epilogue insn is favored compared to putting all of the
3882 instructions in output_function_prologue(), since it allows the scheduler
3883 to intermix instructions with the saves of the caller saved registers. In
3884 some cases, it might be necessary to emit a barrier instruction as the last
3885 insn to prevent such scheduling. */
3886
3887 void
3888 ia64_expand_epilogue (int sibcall_p)
3889 {
3890 rtx_insn *insn;
3891 rtx reg, alt_reg, ar_unat_save_reg;
3892 int regno, alt_regno, cfa_off;
3893
3894 ia64_compute_frame_size (get_frame_size ());
3895
3896 /* If there is a frame pointer, then we use it instead of the stack
3897 pointer, so that the stack pointer does not need to be valid when
3898 the epilogue starts. See EXIT_IGNORE_STACK. */
3899 if (frame_pointer_needed)
3900 setup_spill_pointers (current_frame_info.n_spilled,
3901 hard_frame_pointer_rtx, 0);
3902 else
3903 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3904 current_frame_info.total_size);
3905
3906 if (current_frame_info.total_size != 0)
3907 {
3908 /* ??? At this point we must generate a magic insn that appears to
3909 modify the spill iterators and the frame pointer. This would
3910 allow the most scheduling freedom. For now, just hard stop. */
3911 emit_insn (gen_blockage ());
3912 }
3913
3914 /* Locate the bottom of the register save area. */
3915 cfa_off = (current_frame_info.spill_cfa_off
3916 + current_frame_info.spill_size
3917 + current_frame_info.extra_spill_size);
3918
3919 /* Restore the predicate registers. */
3920 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3921 {
3922 if (current_frame_info.r[reg_save_pr] != 0)
3923 {
3924 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3925 reg_emitted (reg_save_pr);
3926 }
3927 else
3928 {
3929 alt_regno = next_scratch_gr_reg ();
3930 alt_reg = gen_rtx_REG (DImode, alt_regno);
3931 do_restore (gen_movdi_x, alt_reg, cfa_off);
3932 cfa_off -= 8;
3933 }
3934 reg = gen_rtx_REG (DImode, PR_REG (0));
3935 emit_move_insn (reg, alt_reg);
3936 }
3937
3938 /* Restore the application registers. */
3939
3940 /* Load the saved unat from the stack, but do not restore it until
3941 after the GRs have been restored. */
3942 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3943 {
3944 if (current_frame_info.r[reg_save_ar_unat] != 0)
3945 {
3946 ar_unat_save_reg
3947 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3948 reg_emitted (reg_save_ar_unat);
3949 }
3950 else
3951 {
3952 alt_regno = next_scratch_gr_reg ();
3953 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3954 current_frame_info.gr_used_mask |= 1 << alt_regno;
3955 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3956 cfa_off -= 8;
3957 }
3958 }
3959 else
3960 ar_unat_save_reg = NULL_RTX;
3961
3962 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3963 {
3964 reg_emitted (reg_save_ar_pfs);
3965 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3966 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3967 emit_move_insn (reg, alt_reg);
3968 }
3969 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3970 {
3971 alt_regno = next_scratch_gr_reg ();
3972 alt_reg = gen_rtx_REG (DImode, alt_regno);
3973 do_restore (gen_movdi_x, alt_reg, cfa_off);
3974 cfa_off -= 8;
3975 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3976 emit_move_insn (reg, alt_reg);
3977 }
3978
3979 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3980 {
3981 if (current_frame_info.r[reg_save_ar_lc] != 0)
3982 {
3983 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3984 reg_emitted (reg_save_ar_lc);
3985 }
3986 else
3987 {
3988 alt_regno = next_scratch_gr_reg ();
3989 alt_reg = gen_rtx_REG (DImode, alt_regno);
3990 do_restore (gen_movdi_x, alt_reg, cfa_off);
3991 cfa_off -= 8;
3992 }
3993 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3994 emit_move_insn (reg, alt_reg);
3995 }
3996
3997 /* Restore the return pointer. */
3998 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3999 {
4000 if (current_frame_info.r[reg_save_b0] != 0)
4001 {
4002 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4003 reg_emitted (reg_save_b0);
4004 }
4005 else
4006 {
4007 alt_regno = next_scratch_gr_reg ();
4008 alt_reg = gen_rtx_REG (DImode, alt_regno);
4009 do_restore (gen_movdi_x, alt_reg, cfa_off);
4010 cfa_off -= 8;
4011 }
4012 reg = gen_rtx_REG (DImode, BR_REG (0));
4013 emit_move_insn (reg, alt_reg);
4014 }
4015
4016 /* We should now be at the base of the gr/br/fr spill area. */
4017 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4018 + current_frame_info.spill_size));
4019
4020 /* The GP may be stored on the stack in the prologue, but it's
4021 never restored in the epilogue. Skip the stack slot. */
4022 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4023 cfa_off -= 8;
4024
4025 /* Restore all general registers. */
4026 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4027 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4028 {
4029 reg = gen_rtx_REG (DImode, regno);
4030 do_restore (gen_gr_restore, reg, cfa_off);
4031 cfa_off -= 8;
4032 }
4033
4034 /* Restore the branch registers. */
4035 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4036 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4037 {
4038 alt_regno = next_scratch_gr_reg ();
4039 alt_reg = gen_rtx_REG (DImode, alt_regno);
4040 do_restore (gen_movdi_x, alt_reg, cfa_off);
4041 cfa_off -= 8;
4042 reg = gen_rtx_REG (DImode, regno);
4043 emit_move_insn (reg, alt_reg);
4044 }
4045
4046 /* Restore floating point registers. */
4047 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4048 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4049 {
4050 gcc_assert (!(cfa_off & 15));
4051 reg = gen_rtx_REG (XFmode, regno);
4052 do_restore (gen_fr_restore_x, reg, cfa_off);
4053 cfa_off -= 16;
4054 }
4055
4056 /* Restore ar.unat for real. */
4057 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4058 {
4059 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4060 emit_move_insn (reg, ar_unat_save_reg);
4061 }
4062
4063 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4064
4065 finish_spill_pointers ();
4066
4067 if (current_frame_info.total_size
4068 || cfun->machine->ia64_eh_epilogue_sp
4069 || frame_pointer_needed)
4070 {
4071 /* ??? At this point we must generate a magic insn that appears to
4072 modify the spill iterators, the stack pointer, and the frame
4073 pointer. This would allow the most scheduling freedom. For now,
4074 just hard stop. */
4075 emit_insn (gen_blockage ());
4076 }
4077
4078 if (cfun->machine->ia64_eh_epilogue_sp)
4079 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4080 else if (frame_pointer_needed)
4081 {
4082 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4083 RTX_FRAME_RELATED_P (insn) = 1;
4084 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4085 }
4086 else if (current_frame_info.total_size)
4087 {
4088 rtx offset, frame_size_rtx;
4089
4090 frame_size_rtx = GEN_INT (current_frame_info.total_size);
4091 if (satisfies_constraint_I (frame_size_rtx))
4092 offset = frame_size_rtx;
4093 else
4094 {
4095 regno = next_scratch_gr_reg ();
4096 offset = gen_rtx_REG (DImode, regno);
4097 emit_move_insn (offset, frame_size_rtx);
4098 }
4099
4100 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4101 offset));
4102
4103 RTX_FRAME_RELATED_P (insn) = 1;
4104 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4105 gen_rtx_SET (stack_pointer_rtx,
4106 gen_rtx_PLUS (DImode,
4107 stack_pointer_rtx,
4108 frame_size_rtx)));
4109 }
4110
4111 if (cfun->machine->ia64_eh_epilogue_bsp)
4112 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4113
4114 if (! sibcall_p)
4115 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4116 else
4117 {
4118 int fp = GR_REG (2);
4119 /* We need a throw away register here, r0 and r1 are reserved,
4120 so r2 is the first available call clobbered register. If
4121 there was a frame_pointer register, we may have swapped the
4122 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4123 sure we're using the string "r2" when emitting the register
4124 name for the assembler. */
4125 if (current_frame_info.r[reg_fp]
4126 && current_frame_info.r[reg_fp] == GR_REG (2))
4127 fp = HARD_FRAME_POINTER_REGNUM;
4128
4129 /* We must emit an alloc to force the input registers to become output
4130 registers. Otherwise, if the callee tries to pass its parameters
4131 through to another call without an intervening alloc, then these
4132 values get lost. */
4133 /* ??? We don't need to preserve all input registers. We only need to
4134 preserve those input registers used as arguments to the sibling call.
4135 It is unclear how to compute that number here. */
4136 if (current_frame_info.n_input_regs != 0)
4137 {
4138 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4139
4140 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4141 const0_rtx, const0_rtx,
4142 n_inputs, const0_rtx));
4143 RTX_FRAME_RELATED_P (insn) = 1;
4144
4145 /* ??? We need to mark the alloc as frame-related so that it gets
4146 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4147 But there's nothing dwarf2 related to be done wrt the register
4148 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4149 the empty parallel means dwarf2out will not see anything. */
4150 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4151 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4152 }
4153 }
4154 }
4155
4156 /* Return 1 if br.ret can do all the work required to return from a
4157 function. */
4158
4159 int
4160 ia64_direct_return (void)
4161 {
4162 if (reload_completed && ! frame_pointer_needed)
4163 {
4164 ia64_compute_frame_size (get_frame_size ());
4165
4166 return (current_frame_info.total_size == 0
4167 && current_frame_info.n_spilled == 0
4168 && current_frame_info.r[reg_save_b0] == 0
4169 && current_frame_info.r[reg_save_pr] == 0
4170 && current_frame_info.r[reg_save_ar_pfs] == 0
4171 && current_frame_info.r[reg_save_ar_unat] == 0
4172 && current_frame_info.r[reg_save_ar_lc] == 0);
4173 }
4174 return 0;
4175 }
4176
4177 /* Return the magic cookie that we use to hold the return address
4178 during early compilation. */
4179
4180 rtx
4181 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4182 {
4183 if (count != 0)
4184 return NULL;
4185 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4186 }
4187
4188 /* Split this value after reload, now that we know where the return
4189 address is saved. */
4190
4191 void
4192 ia64_split_return_addr_rtx (rtx dest)
4193 {
4194 rtx src;
4195
4196 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4197 {
4198 if (current_frame_info.r[reg_save_b0] != 0)
4199 {
4200 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4201 reg_emitted (reg_save_b0);
4202 }
4203 else
4204 {
4205 HOST_WIDE_INT off;
4206 unsigned int regno;
4207 rtx off_r;
4208
4209 /* Compute offset from CFA for BR0. */
4210 /* ??? Must be kept in sync with ia64_expand_prologue. */
4211 off = (current_frame_info.spill_cfa_off
4212 + current_frame_info.spill_size);
4213 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4214 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4215 off -= 8;
4216
4217 /* Convert CFA offset to a register based offset. */
4218 if (frame_pointer_needed)
4219 src = hard_frame_pointer_rtx;
4220 else
4221 {
4222 src = stack_pointer_rtx;
4223 off += current_frame_info.total_size;
4224 }
4225
4226 /* Load address into scratch register. */
4227 off_r = GEN_INT (off);
4228 if (satisfies_constraint_I (off_r))
4229 emit_insn (gen_adddi3 (dest, src, off_r));
4230 else
4231 {
4232 emit_move_insn (dest, off_r);
4233 emit_insn (gen_adddi3 (dest, src, dest));
4234 }
4235
4236 src = gen_rtx_MEM (Pmode, dest);
4237 }
4238 }
4239 else
4240 src = gen_rtx_REG (DImode, BR_REG (0));
4241
4242 emit_move_insn (dest, src);
4243 }
4244
4245 int
4246 ia64_hard_regno_rename_ok (int from, int to)
4247 {
4248 /* Don't clobber any of the registers we reserved for the prologue. */
4249 unsigned int r;
4250
4251 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4252 if (to == current_frame_info.r[r]
4253 || from == current_frame_info.r[r]
4254 || to == emitted_frame_related_regs[r]
4255 || from == emitted_frame_related_regs[r])
4256 return 0;
4257
4258 /* Don't use output registers outside the register frame. */
4259 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4260 return 0;
4261
4262 /* Retain even/oddness on predicate register pairs. */
4263 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4264 return (from & 1) == (to & 1);
4265
4266 return 1;
4267 }
4268
4269 /* Implement TARGET_HARD_REGNO_NREGS.
4270
4271 ??? We say that BImode PR values require two registers. This allows us to
4272 easily store the normal and inverted values. We use CCImode to indicate
4273 a single predicate register. */
4274
4275 static unsigned int
4276 ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
4277 {
4278 if (regno == PR_REG (0) && mode == DImode)
4279 return 64;
4280 if (PR_REGNO_P (regno) && (mode) == BImode)
4281 return 2;
4282 if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
4283 return 1;
4284 if (FR_REGNO_P (regno) && mode == XFmode)
4285 return 1;
4286 if (FR_REGNO_P (regno) && mode == RFmode)
4287 return 1;
4288 if (FR_REGNO_P (regno) && mode == XCmode)
4289 return 2;
4290 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
4291 }
4292
4293 /* Implement TARGET_HARD_REGNO_MODE_OK. */
4294
4295 static bool
4296 ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
4297 {
4298 if (FR_REGNO_P (regno))
4299 return (GET_MODE_CLASS (mode) != MODE_CC
4300 && mode != BImode
4301 && mode != TFmode);
4302
4303 if (PR_REGNO_P (regno))
4304 return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
4305
4306 if (GR_REGNO_P (regno))
4307 return mode != XFmode && mode != XCmode && mode != RFmode;
4308
4309 if (AR_REGNO_P (regno))
4310 return mode == DImode;
4311
4312 if (BR_REGNO_P (regno))
4313 return mode == DImode;
4314
4315 return false;
4316 }
4317
4318 /* Implement TARGET_MODES_TIEABLE_P.
4319
4320 Don't tie integer and FP modes, as that causes us to get integer registers
4321 allocated for FP instructions. XFmode only supported in FP registers so
4322 we can't tie it with any other modes. */
4323
4324 static bool
4325 ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
4326 {
4327 return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
4328 && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
4329 == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
4330 && (mode1 == BImode) == (mode2 == BImode));
4331 }
4332
4333 /* Target hook for assembling integer objects. Handle word-sized
4334 aligned objects and detect the cases when @fptr is needed. */
4335
4336 static bool
4337 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4338 {
4339 if (size == POINTER_SIZE / BITS_PER_UNIT
4340 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4341 && GET_CODE (x) == SYMBOL_REF
4342 && SYMBOL_REF_FUNCTION_P (x))
4343 {
4344 static const char * const directive[2][2] = {
4345 /* 64-bit pointer */ /* 32-bit pointer */
4346 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4347 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4348 };
4349 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4350 output_addr_const (asm_out_file, x);
4351 fputs (")\n", asm_out_file);
4352 return true;
4353 }
4354 return default_assemble_integer (x, size, aligned_p);
4355 }
4356
4357 /* Emit the function prologue. */
4358
4359 static void
4360 ia64_output_function_prologue (FILE *file)
4361 {
4362 int mask, grsave, grsave_prev;
4363
4364 if (current_frame_info.need_regstk)
4365 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4366 current_frame_info.n_input_regs,
4367 current_frame_info.n_local_regs,
4368 current_frame_info.n_output_regs,
4369 current_frame_info.n_rotate_regs);
4370
4371 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4372 return;
4373
4374 /* Emit the .prologue directive. */
4375
4376 mask = 0;
4377 grsave = grsave_prev = 0;
4378 if (current_frame_info.r[reg_save_b0] != 0)
4379 {
4380 mask |= 8;
4381 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4382 }
4383 if (current_frame_info.r[reg_save_ar_pfs] != 0
4384 && (grsave_prev == 0
4385 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4386 {
4387 mask |= 4;
4388 if (grsave_prev == 0)
4389 grsave = current_frame_info.r[reg_save_ar_pfs];
4390 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4391 }
4392 if (current_frame_info.r[reg_fp] != 0
4393 && (grsave_prev == 0
4394 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4395 {
4396 mask |= 2;
4397 if (grsave_prev == 0)
4398 grsave = HARD_FRAME_POINTER_REGNUM;
4399 grsave_prev = current_frame_info.r[reg_fp];
4400 }
4401 if (current_frame_info.r[reg_save_pr] != 0
4402 && (grsave_prev == 0
4403 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4404 {
4405 mask |= 1;
4406 if (grsave_prev == 0)
4407 grsave = current_frame_info.r[reg_save_pr];
4408 }
4409
4410 if (mask && TARGET_GNU_AS)
4411 fprintf (file, "\t.prologue %d, %d\n", mask,
4412 ia64_dbx_register_number (grsave));
4413 else
4414 fputs ("\t.prologue\n", file);
4415
4416 /* Emit a .spill directive, if necessary, to relocate the base of
4417 the register spill area. */
4418 if (current_frame_info.spill_cfa_off != -16)
4419 fprintf (file, "\t.spill %ld\n",
4420 (long) (current_frame_info.spill_cfa_off
4421 + current_frame_info.spill_size));
4422 }
4423
4424 /* Emit the .body directive at the scheduled end of the prologue. */
4425
4426 static void
4427 ia64_output_function_end_prologue (FILE *file)
4428 {
4429 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4430 return;
4431
4432 fputs ("\t.body\n", file);
4433 }
4434
4435 /* Emit the function epilogue. */
4436
4437 static void
4438 ia64_output_function_epilogue (FILE *)
4439 {
4440 int i;
4441
4442 if (current_frame_info.r[reg_fp])
4443 {
4444 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4445 reg_names[HARD_FRAME_POINTER_REGNUM]
4446 = reg_names[current_frame_info.r[reg_fp]];
4447 reg_names[current_frame_info.r[reg_fp]] = tmp;
4448 reg_emitted (reg_fp);
4449 }
4450 if (! TARGET_REG_NAMES)
4451 {
4452 for (i = 0; i < current_frame_info.n_input_regs; i++)
4453 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4454 for (i = 0; i < current_frame_info.n_local_regs; i++)
4455 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4456 for (i = 0; i < current_frame_info.n_output_regs; i++)
4457 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4458 }
4459
4460 current_frame_info.initialized = 0;
4461 }
4462
4463 int
4464 ia64_dbx_register_number (int regno)
4465 {
4466 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4467 from its home at loc79 to something inside the register frame. We
4468 must perform the same renumbering here for the debug info. */
4469 if (current_frame_info.r[reg_fp])
4470 {
4471 if (regno == HARD_FRAME_POINTER_REGNUM)
4472 regno = current_frame_info.r[reg_fp];
4473 else if (regno == current_frame_info.r[reg_fp])
4474 regno = HARD_FRAME_POINTER_REGNUM;
4475 }
4476
4477 if (IN_REGNO_P (regno))
4478 return 32 + regno - IN_REG (0);
4479 else if (LOC_REGNO_P (regno))
4480 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4481 else if (OUT_REGNO_P (regno))
4482 return (32 + current_frame_info.n_input_regs
4483 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4484 else
4485 return regno;
4486 }
4487
4488 /* Implement TARGET_TRAMPOLINE_INIT.
4489
4490 The trampoline should set the static chain pointer to value placed
4491 into the trampoline and should branch to the specified routine.
4492 To make the normal indirect-subroutine calling convention work,
4493 the trampoline must look like a function descriptor; the first
4494 word being the target address and the second being the target's
4495 global pointer.
4496
4497 We abuse the concept of a global pointer by arranging for it
4498 to point to the data we need to load. The complete trampoline
4499 has the following form:
4500
4501 +-------------------+ \
4502 TRAMP: | __ia64_trampoline | |
4503 +-------------------+ > fake function descriptor
4504 | TRAMP+16 | |
4505 +-------------------+ /
4506 | target descriptor |
4507 +-------------------+
4508 | static link |
4509 +-------------------+
4510 */
4511
4512 static void
4513 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4514 {
4515 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4516 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4517
4518 /* The Intel assembler requires that the global __ia64_trampoline symbol
4519 be declared explicitly */
4520 if (!TARGET_GNU_AS)
4521 {
4522 static bool declared_ia64_trampoline = false;
4523
4524 if (!declared_ia64_trampoline)
4525 {
4526 declared_ia64_trampoline = true;
4527 (*targetm.asm_out.globalize_label) (asm_out_file,
4528 "__ia64_trampoline");
4529 }
4530 }
4531
4532 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4533 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4534 fnaddr = convert_memory_address (Pmode, fnaddr);
4535 static_chain = convert_memory_address (Pmode, static_chain);
4536
4537 /* Load up our iterator. */
4538 addr_reg = copy_to_reg (addr);
4539 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4540
4541 /* The first two words are the fake descriptor:
4542 __ia64_trampoline, ADDR+16. */
4543 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4544 if (TARGET_ABI_OPEN_VMS)
4545 {
4546 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4547 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4548 relocation against function symbols to make it identical to the
4549 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4550 strict ELF and dereference to get the bare code address. */
4551 rtx reg = gen_reg_rtx (Pmode);
4552 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4553 emit_move_insn (reg, tramp);
4554 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4555 tramp = reg;
4556 }
4557 emit_move_insn (m_tramp, tramp);
4558 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4559 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4560
4561 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4562 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4563 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4564
4565 /* The third word is the target descriptor. */
4566 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4567 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4568 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4569
4570 /* The fourth word is the static chain. */
4571 emit_move_insn (m_tramp, static_chain);
4572 }
4573 \f
4574 /* Do any needed setup for a variadic function. CUM has not been updated
4575 for the last named argument which has type TYPE and mode MODE.
4576
4577 We generate the actual spill instructions during prologue generation. */
4578
4579 static void
4580 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4581 tree type, int * pretend_size,
4582 int second_time ATTRIBUTE_UNUSED)
4583 {
4584 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4585
4586 /* Skip the current argument. */
4587 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4588
4589 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4590 {
4591 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4592 *pretend_size = n * UNITS_PER_WORD;
4593 cfun->machine->n_varargs = n;
4594 }
4595 }
4596
4597 /* Check whether TYPE is a homogeneous floating point aggregate. If
4598 it is, return the mode of the floating point type that appears
4599 in all leafs. If it is not, return VOIDmode.
4600
4601 An aggregate is a homogeneous floating point aggregate is if all
4602 fields/elements in it have the same floating point type (e.g,
4603 SFmode). 128-bit quad-precision floats are excluded.
4604
4605 Variable sized aggregates should never arrive here, since we should
4606 have already decided to pass them by reference. Top-level zero-sized
4607 aggregates are excluded because our parallels crash the middle-end. */
4608
4609 static machine_mode
4610 hfa_element_mode (const_tree type, bool nested)
4611 {
4612 machine_mode element_mode = VOIDmode;
4613 machine_mode mode;
4614 enum tree_code code = TREE_CODE (type);
4615 int know_element_mode = 0;
4616 tree t;
4617
4618 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4619 return VOIDmode;
4620
4621 switch (code)
4622 {
4623 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4624 case BOOLEAN_TYPE: case POINTER_TYPE:
4625 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4626 case LANG_TYPE: case FUNCTION_TYPE:
4627 return VOIDmode;
4628
4629 /* Fortran complex types are supposed to be HFAs, so we need to handle
4630 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4631 types though. */
4632 case COMPLEX_TYPE:
4633 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4634 && TYPE_MODE (type) != TCmode)
4635 return GET_MODE_INNER (TYPE_MODE (type));
4636 else
4637 return VOIDmode;
4638
4639 case REAL_TYPE:
4640 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4641 mode if this is contained within an aggregate. */
4642 if (nested && TYPE_MODE (type) != TFmode)
4643 return TYPE_MODE (type);
4644 else
4645 return VOIDmode;
4646
4647 case ARRAY_TYPE:
4648 return hfa_element_mode (TREE_TYPE (type), 1);
4649
4650 case RECORD_TYPE:
4651 case UNION_TYPE:
4652 case QUAL_UNION_TYPE:
4653 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4654 {
4655 if (TREE_CODE (t) != FIELD_DECL)
4656 continue;
4657
4658 mode = hfa_element_mode (TREE_TYPE (t), 1);
4659 if (know_element_mode)
4660 {
4661 if (mode != element_mode)
4662 return VOIDmode;
4663 }
4664 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4665 return VOIDmode;
4666 else
4667 {
4668 know_element_mode = 1;
4669 element_mode = mode;
4670 }
4671 }
4672 return element_mode;
4673
4674 default:
4675 /* If we reach here, we probably have some front-end specific type
4676 that the backend doesn't know about. This can happen via the
4677 aggregate_value_p call in init_function_start. All we can do is
4678 ignore unknown tree types. */
4679 return VOIDmode;
4680 }
4681
4682 return VOIDmode;
4683 }
4684
4685 /* Return the number of words required to hold a quantity of TYPE and MODE
4686 when passed as an argument. */
4687 static int
4688 ia64_function_arg_words (const_tree type, machine_mode mode)
4689 {
4690 int words;
4691
4692 if (mode == BLKmode)
4693 words = int_size_in_bytes (type);
4694 else
4695 words = GET_MODE_SIZE (mode);
4696
4697 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4698 }
4699
4700 /* Return the number of registers that should be skipped so the current
4701 argument (described by TYPE and WORDS) will be properly aligned.
4702
4703 Integer and float arguments larger than 8 bytes start at the next
4704 even boundary. Aggregates larger than 8 bytes start at the next
4705 even boundary if the aggregate has 16 byte alignment. Note that
4706 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4707 but are still to be aligned in registers.
4708
4709 ??? The ABI does not specify how to handle aggregates with
4710 alignment from 9 to 15 bytes, or greater than 16. We handle them
4711 all as if they had 16 byte alignment. Such aggregates can occur
4712 only if gcc extensions are used. */
4713 static int
4714 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4715 const_tree type, int words)
4716 {
4717 /* No registers are skipped on VMS. */
4718 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4719 return 0;
4720
4721 if (type
4722 && TREE_CODE (type) != INTEGER_TYPE
4723 && TREE_CODE (type) != REAL_TYPE)
4724 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4725 else
4726 return words > 1;
4727 }
4728
4729 /* Return rtx for register where argument is passed, or zero if it is passed
4730 on the stack. */
4731 /* ??? 128-bit quad-precision floats are always passed in general
4732 registers. */
4733
4734 static rtx
4735 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4736 const_tree type, bool named, bool incoming)
4737 {
4738 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4739
4740 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4741 int words = ia64_function_arg_words (type, mode);
4742 int offset = ia64_function_arg_offset (cum, type, words);
4743 machine_mode hfa_mode = VOIDmode;
4744
4745 /* For OPEN VMS, emit the instruction setting up the argument register here,
4746 when we know this will be together with the other arguments setup related
4747 insns. This is not the conceptually best place to do this, but this is
4748 the easiest as we have convenient access to cumulative args info. */
4749
4750 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4751 && named == 1)
4752 {
4753 unsigned HOST_WIDE_INT regval = cum->words;
4754 int i;
4755
4756 for (i = 0; i < 8; i++)
4757 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4758
4759 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4760 GEN_INT (regval));
4761 }
4762
4763 /* If all argument slots are used, then it must go on the stack. */
4764 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4765 return 0;
4766
4767 /* On OpenVMS argument is either in Rn or Fn. */
4768 if (TARGET_ABI_OPEN_VMS)
4769 {
4770 if (FLOAT_MODE_P (mode))
4771 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4772 else
4773 return gen_rtx_REG (mode, basereg + cum->words);
4774 }
4775
4776 /* Check for and handle homogeneous FP aggregates. */
4777 if (type)
4778 hfa_mode = hfa_element_mode (type, 0);
4779
4780 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4781 and unprototyped hfas are passed specially. */
4782 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4783 {
4784 rtx loc[16];
4785 int i = 0;
4786 int fp_regs = cum->fp_regs;
4787 int int_regs = cum->words + offset;
4788 int hfa_size = GET_MODE_SIZE (hfa_mode);
4789 int byte_size;
4790 int args_byte_size;
4791
4792 /* If prototyped, pass it in FR regs then GR regs.
4793 If not prototyped, pass it in both FR and GR regs.
4794
4795 If this is an SFmode aggregate, then it is possible to run out of
4796 FR regs while GR regs are still left. In that case, we pass the
4797 remaining part in the GR regs. */
4798
4799 /* Fill the FP regs. We do this always. We stop if we reach the end
4800 of the argument, the last FP register, or the last argument slot. */
4801
4802 byte_size = ((mode == BLKmode)
4803 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4804 args_byte_size = int_regs * UNITS_PER_WORD;
4805 offset = 0;
4806 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4807 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4808 {
4809 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4810 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4811 + fp_regs)),
4812 GEN_INT (offset));
4813 offset += hfa_size;
4814 args_byte_size += hfa_size;
4815 fp_regs++;
4816 }
4817
4818 /* If no prototype, then the whole thing must go in GR regs. */
4819 if (! cum->prototype)
4820 offset = 0;
4821 /* If this is an SFmode aggregate, then we might have some left over
4822 that needs to go in GR regs. */
4823 else if (byte_size != offset)
4824 int_regs += offset / UNITS_PER_WORD;
4825
4826 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4827
4828 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4829 {
4830 machine_mode gr_mode = DImode;
4831 unsigned int gr_size;
4832
4833 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4834 then this goes in a GR reg left adjusted/little endian, right
4835 adjusted/big endian. */
4836 /* ??? Currently this is handled wrong, because 4-byte hunks are
4837 always right adjusted/little endian. */
4838 if (offset & 0x4)
4839 gr_mode = SImode;
4840 /* If we have an even 4 byte hunk because the aggregate is a
4841 multiple of 4 bytes in size, then this goes in a GR reg right
4842 adjusted/little endian. */
4843 else if (byte_size - offset == 4)
4844 gr_mode = SImode;
4845
4846 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4847 gen_rtx_REG (gr_mode, (basereg
4848 + int_regs)),
4849 GEN_INT (offset));
4850
4851 gr_size = GET_MODE_SIZE (gr_mode);
4852 offset += gr_size;
4853 if (gr_size == UNITS_PER_WORD
4854 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4855 int_regs++;
4856 else if (gr_size > UNITS_PER_WORD)
4857 int_regs += gr_size / UNITS_PER_WORD;
4858 }
4859 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4860 }
4861
4862 /* Integral and aggregates go in general registers. If we have run out of
4863 FR registers, then FP values must also go in general registers. This can
4864 happen when we have a SFmode HFA. */
4865 else if (mode == TFmode || mode == TCmode
4866 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4867 {
4868 int byte_size = ((mode == BLKmode)
4869 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4870 if (BYTES_BIG_ENDIAN
4871 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4872 && byte_size < UNITS_PER_WORD
4873 && byte_size > 0)
4874 {
4875 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4876 gen_rtx_REG (DImode,
4877 (basereg + cum->words
4878 + offset)),
4879 const0_rtx);
4880 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4881 }
4882 else
4883 return gen_rtx_REG (mode, basereg + cum->words + offset);
4884
4885 }
4886
4887 /* If there is a prototype, then FP values go in a FR register when
4888 named, and in a GR register when unnamed. */
4889 else if (cum->prototype)
4890 {
4891 if (named)
4892 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4893 /* In big-endian mode, an anonymous SFmode value must be represented
4894 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4895 the value into the high half of the general register. */
4896 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4897 return gen_rtx_PARALLEL (mode,
4898 gen_rtvec (1,
4899 gen_rtx_EXPR_LIST (VOIDmode,
4900 gen_rtx_REG (DImode, basereg + cum->words + offset),
4901 const0_rtx)));
4902 else
4903 return gen_rtx_REG (mode, basereg + cum->words + offset);
4904 }
4905 /* If there is no prototype, then FP values go in both FR and GR
4906 registers. */
4907 else
4908 {
4909 /* See comment above. */
4910 machine_mode inner_mode =
4911 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4912
4913 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4914 gen_rtx_REG (mode, (FR_ARG_FIRST
4915 + cum->fp_regs)),
4916 const0_rtx);
4917 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4918 gen_rtx_REG (inner_mode,
4919 (basereg + cum->words
4920 + offset)),
4921 const0_rtx);
4922
4923 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4924 }
4925 }
4926
4927 /* Implement TARGET_FUNCION_ARG target hook. */
4928
4929 static rtx
4930 ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4931 const_tree type, bool named)
4932 {
4933 return ia64_function_arg_1 (cum, mode, type, named, false);
4934 }
4935
4936 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4937
4938 static rtx
4939 ia64_function_incoming_arg (cumulative_args_t cum,
4940 machine_mode mode,
4941 const_tree type, bool named)
4942 {
4943 return ia64_function_arg_1 (cum, mode, type, named, true);
4944 }
4945
4946 /* Return number of bytes, at the beginning of the argument, that must be
4947 put in registers. 0 is the argument is entirely in registers or entirely
4948 in memory. */
4949
4950 static int
4951 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4952 tree type, bool named ATTRIBUTE_UNUSED)
4953 {
4954 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4955
4956 int words = ia64_function_arg_words (type, mode);
4957 int offset = ia64_function_arg_offset (cum, type, words);
4958
4959 /* If all argument slots are used, then it must go on the stack. */
4960 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4961 return 0;
4962
4963 /* It doesn't matter whether the argument goes in FR or GR regs. If
4964 it fits within the 8 argument slots, then it goes entirely in
4965 registers. If it extends past the last argument slot, then the rest
4966 goes on the stack. */
4967
4968 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4969 return 0;
4970
4971 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4972 }
4973
4974 /* Return ivms_arg_type based on machine_mode. */
4975
4976 static enum ivms_arg_type
4977 ia64_arg_type (machine_mode mode)
4978 {
4979 switch (mode)
4980 {
4981 case E_SFmode:
4982 return FS;
4983 case E_DFmode:
4984 return FT;
4985 default:
4986 return I64;
4987 }
4988 }
4989
4990 /* Update CUM to point after this argument. This is patterned after
4991 ia64_function_arg. */
4992
4993 static void
4994 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4995 const_tree type, bool named)
4996 {
4997 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4998 int words = ia64_function_arg_words (type, mode);
4999 int offset = ia64_function_arg_offset (cum, type, words);
5000 machine_mode hfa_mode = VOIDmode;
5001
5002 /* If all arg slots are already full, then there is nothing to do. */
5003 if (cum->words >= MAX_ARGUMENT_SLOTS)
5004 {
5005 cum->words += words + offset;
5006 return;
5007 }
5008
5009 cum->atypes[cum->words] = ia64_arg_type (mode);
5010 cum->words += words + offset;
5011
5012 /* On OpenVMS argument is either in Rn or Fn. */
5013 if (TARGET_ABI_OPEN_VMS)
5014 {
5015 cum->int_regs = cum->words;
5016 cum->fp_regs = cum->words;
5017 return;
5018 }
5019
5020 /* Check for and handle homogeneous FP aggregates. */
5021 if (type)
5022 hfa_mode = hfa_element_mode (type, 0);
5023
5024 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
5025 and unprototyped hfas are passed specially. */
5026 if (hfa_mode != VOIDmode && (! cum->prototype || named))
5027 {
5028 int fp_regs = cum->fp_regs;
5029 /* This is the original value of cum->words + offset. */
5030 int int_regs = cum->words - words;
5031 int hfa_size = GET_MODE_SIZE (hfa_mode);
5032 int byte_size;
5033 int args_byte_size;
5034
5035 /* If prototyped, pass it in FR regs then GR regs.
5036 If not prototyped, pass it in both FR and GR regs.
5037
5038 If this is an SFmode aggregate, then it is possible to run out of
5039 FR regs while GR regs are still left. In that case, we pass the
5040 remaining part in the GR regs. */
5041
5042 /* Fill the FP regs. We do this always. We stop if we reach the end
5043 of the argument, the last FP register, or the last argument slot. */
5044
5045 byte_size = ((mode == BLKmode)
5046 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
5047 args_byte_size = int_regs * UNITS_PER_WORD;
5048 offset = 0;
5049 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
5050 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
5051 {
5052 offset += hfa_size;
5053 args_byte_size += hfa_size;
5054 fp_regs++;
5055 }
5056
5057 cum->fp_regs = fp_regs;
5058 }
5059
5060 /* Integral and aggregates go in general registers. So do TFmode FP values.
5061 If we have run out of FR registers, then other FP values must also go in
5062 general registers. This can happen when we have a SFmode HFA. */
5063 else if (mode == TFmode || mode == TCmode
5064 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
5065 cum->int_regs = cum->words;
5066
5067 /* If there is a prototype, then FP values go in a FR register when
5068 named, and in a GR register when unnamed. */
5069 else if (cum->prototype)
5070 {
5071 if (! named)
5072 cum->int_regs = cum->words;
5073 else
5074 /* ??? Complex types should not reach here. */
5075 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5076 }
5077 /* If there is no prototype, then FP values go in both FR and GR
5078 registers. */
5079 else
5080 {
5081 /* ??? Complex types should not reach here. */
5082 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5083 cum->int_regs = cum->words;
5084 }
5085 }
5086
5087 /* Arguments with alignment larger than 8 bytes start at the next even
5088 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5089 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5090
5091 static unsigned int
5092 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5093 {
5094 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5095 return PARM_BOUNDARY * 2;
5096
5097 if (type)
5098 {
5099 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5100 return PARM_BOUNDARY * 2;
5101 else
5102 return PARM_BOUNDARY;
5103 }
5104
5105 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5106 return PARM_BOUNDARY * 2;
5107 else
5108 return PARM_BOUNDARY;
5109 }
5110
5111 /* True if it is OK to do sibling call optimization for the specified
5112 call expression EXP. DECL will be the called function, or NULL if
5113 this is an indirect call. */
5114 static bool
5115 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5116 {
5117 /* We can't perform a sibcall if the current function has the syscall_linkage
5118 attribute. */
5119 if (lookup_attribute ("syscall_linkage",
5120 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5121 return false;
5122
5123 /* We must always return with our current GP. This means we can
5124 only sibcall to functions defined in the current module unless
5125 TARGET_CONST_GP is set to true. */
5126 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5127 }
5128 \f
5129
5130 /* Implement va_arg. */
5131
5132 static tree
5133 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5134 gimple_seq *post_p)
5135 {
5136 /* Variable sized types are passed by reference. */
5137 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5138 {
5139 tree ptrtype = build_pointer_type (type);
5140 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5141 return build_va_arg_indirect_ref (addr);
5142 }
5143
5144 /* Aggregate arguments with alignment larger than 8 bytes start at
5145 the next even boundary. Integer and floating point arguments
5146 do so if they are larger than 8 bytes, whether or not they are
5147 also aligned larger than 8 bytes. */
5148 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5149 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5150 {
5151 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5152 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5153 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5154 gimplify_assign (unshare_expr (valist), t, pre_p);
5155 }
5156
5157 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5158 }
5159 \f
5160 /* Return 1 if function return value returned in memory. Return 0 if it is
5161 in a register. */
5162
5163 static bool
5164 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5165 {
5166 machine_mode mode;
5167 machine_mode hfa_mode;
5168 HOST_WIDE_INT byte_size;
5169
5170 mode = TYPE_MODE (valtype);
5171 byte_size = GET_MODE_SIZE (mode);
5172 if (mode == BLKmode)
5173 {
5174 byte_size = int_size_in_bytes (valtype);
5175 if (byte_size < 0)
5176 return true;
5177 }
5178
5179 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5180
5181 hfa_mode = hfa_element_mode (valtype, 0);
5182 if (hfa_mode != VOIDmode)
5183 {
5184 int hfa_size = GET_MODE_SIZE (hfa_mode);
5185
5186 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5187 return true;
5188 else
5189 return false;
5190 }
5191 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5192 return true;
5193 else
5194 return false;
5195 }
5196
5197 /* Return rtx for register that holds the function return value. */
5198
5199 static rtx
5200 ia64_function_value (const_tree valtype,
5201 const_tree fn_decl_or_type,
5202 bool outgoing ATTRIBUTE_UNUSED)
5203 {
5204 machine_mode mode;
5205 machine_mode hfa_mode;
5206 int unsignedp;
5207 const_tree func = fn_decl_or_type;
5208
5209 if (fn_decl_or_type
5210 && !DECL_P (fn_decl_or_type))
5211 func = NULL;
5212
5213 mode = TYPE_MODE (valtype);
5214 hfa_mode = hfa_element_mode (valtype, 0);
5215
5216 if (hfa_mode != VOIDmode)
5217 {
5218 rtx loc[8];
5219 int i;
5220 int hfa_size;
5221 int byte_size;
5222 int offset;
5223
5224 hfa_size = GET_MODE_SIZE (hfa_mode);
5225 byte_size = ((mode == BLKmode)
5226 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5227 offset = 0;
5228 for (i = 0; offset < byte_size; i++)
5229 {
5230 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5231 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5232 GEN_INT (offset));
5233 offset += hfa_size;
5234 }
5235 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5236 }
5237 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5238 return gen_rtx_REG (mode, FR_ARG_FIRST);
5239 else
5240 {
5241 bool need_parallel = false;
5242
5243 /* In big-endian mode, we need to manage the layout of aggregates
5244 in the registers so that we get the bits properly aligned in
5245 the highpart of the registers. */
5246 if (BYTES_BIG_ENDIAN
5247 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5248 need_parallel = true;
5249
5250 /* Something like struct S { long double x; char a[0] } is not an
5251 HFA structure, and therefore doesn't go in fp registers. But
5252 the middle-end will give it XFmode anyway, and XFmode values
5253 don't normally fit in integer registers. So we need to smuggle
5254 the value inside a parallel. */
5255 else if (mode == XFmode || mode == XCmode || mode == RFmode)
5256 need_parallel = true;
5257
5258 if (need_parallel)
5259 {
5260 rtx loc[8];
5261 int offset;
5262 int bytesize;
5263 int i;
5264
5265 offset = 0;
5266 bytesize = int_size_in_bytes (valtype);
5267 /* An empty PARALLEL is invalid here, but the return value
5268 doesn't matter for empty structs. */
5269 if (bytesize == 0)
5270 return gen_rtx_REG (mode, GR_RET_FIRST);
5271 for (i = 0; offset < bytesize; i++)
5272 {
5273 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5274 gen_rtx_REG (DImode,
5275 GR_RET_FIRST + i),
5276 GEN_INT (offset));
5277 offset += UNITS_PER_WORD;
5278 }
5279 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5280 }
5281
5282 mode = promote_function_mode (valtype, mode, &unsignedp,
5283 func ? TREE_TYPE (func) : NULL_TREE,
5284 true);
5285
5286 return gen_rtx_REG (mode, GR_RET_FIRST);
5287 }
5288 }
5289
5290 /* Worker function for TARGET_LIBCALL_VALUE. */
5291
5292 static rtx
5293 ia64_libcall_value (machine_mode mode,
5294 const_rtx fun ATTRIBUTE_UNUSED)
5295 {
5296 return gen_rtx_REG (mode,
5297 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5298 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5299 && (mode) != TFmode)
5300 ? FR_RET_FIRST : GR_RET_FIRST));
5301 }
5302
5303 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5304
5305 static bool
5306 ia64_function_value_regno_p (const unsigned int regno)
5307 {
5308 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5309 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5310 }
5311
5312 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5313 We need to emit DTP-relative relocations. */
5314
5315 static void
5316 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5317 {
5318 gcc_assert (size == 4 || size == 8);
5319 if (size == 4)
5320 fputs ("\tdata4.ua\t@dtprel(", file);
5321 else
5322 fputs ("\tdata8.ua\t@dtprel(", file);
5323 output_addr_const (file, x);
5324 fputs (")", file);
5325 }
5326
5327 /* Print a memory address as an operand to reference that memory location. */
5328
5329 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5330 also call this from ia64_print_operand for memory addresses. */
5331
5332 static void
5333 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5334 machine_mode /*mode*/,
5335 rtx address ATTRIBUTE_UNUSED)
5336 {
5337 }
5338
5339 /* Print an operand to an assembler instruction.
5340 C Swap and print a comparison operator.
5341 D Print an FP comparison operator.
5342 E Print 32 - constant, for SImode shifts as extract.
5343 e Print 64 - constant, for DImode rotates.
5344 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5345 a floating point register emitted normally.
5346 G A floating point constant.
5347 I Invert a predicate register by adding 1.
5348 J Select the proper predicate register for a condition.
5349 j Select the inverse predicate register for a condition.
5350 O Append .acq for volatile load.
5351 P Postincrement of a MEM.
5352 Q Append .rel for volatile store.
5353 R Print .s .d or nothing for a single, double or no truncation.
5354 S Shift amount for shladd instruction.
5355 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5356 for Intel assembler.
5357 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5358 for Intel assembler.
5359 X A pair of floating point registers.
5360 r Print register name, or constant 0 as r0. HP compatibility for
5361 Linux kernel.
5362 v Print vector constant value as an 8-byte integer value. */
5363
5364 static void
5365 ia64_print_operand (FILE * file, rtx x, int code)
5366 {
5367 const char *str;
5368
5369 switch (code)
5370 {
5371 case 0:
5372 /* Handled below. */
5373 break;
5374
5375 case 'C':
5376 {
5377 enum rtx_code c = swap_condition (GET_CODE (x));
5378 fputs (GET_RTX_NAME (c), file);
5379 return;
5380 }
5381
5382 case 'D':
5383 switch (GET_CODE (x))
5384 {
5385 case NE:
5386 str = "neq";
5387 break;
5388 case UNORDERED:
5389 str = "unord";
5390 break;
5391 case ORDERED:
5392 str = "ord";
5393 break;
5394 case UNLT:
5395 str = "nge";
5396 break;
5397 case UNLE:
5398 str = "ngt";
5399 break;
5400 case UNGT:
5401 str = "nle";
5402 break;
5403 case UNGE:
5404 str = "nlt";
5405 break;
5406 case UNEQ:
5407 case LTGT:
5408 gcc_unreachable ();
5409 default:
5410 str = GET_RTX_NAME (GET_CODE (x));
5411 break;
5412 }
5413 fputs (str, file);
5414 return;
5415
5416 case 'E':
5417 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5418 return;
5419
5420 case 'e':
5421 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5422 return;
5423
5424 case 'F':
5425 if (x == CONST0_RTX (GET_MODE (x)))
5426 str = reg_names [FR_REG (0)];
5427 else if (x == CONST1_RTX (GET_MODE (x)))
5428 str = reg_names [FR_REG (1)];
5429 else
5430 {
5431 gcc_assert (GET_CODE (x) == REG);
5432 str = reg_names [REGNO (x)];
5433 }
5434 fputs (str, file);
5435 return;
5436
5437 case 'G':
5438 {
5439 long val[4];
5440 real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
5441 if (GET_MODE (x) == SFmode)
5442 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5443 else if (GET_MODE (x) == DFmode)
5444 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5445 & 0xffffffff,
5446 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5447 & 0xffffffff);
5448 else
5449 output_operand_lossage ("invalid %%G mode");
5450 }
5451 return;
5452
5453 case 'I':
5454 fputs (reg_names [REGNO (x) + 1], file);
5455 return;
5456
5457 case 'J':
5458 case 'j':
5459 {
5460 unsigned int regno = REGNO (XEXP (x, 0));
5461 if (GET_CODE (x) == EQ)
5462 regno += 1;
5463 if (code == 'j')
5464 regno ^= 1;
5465 fputs (reg_names [regno], file);
5466 }
5467 return;
5468
5469 case 'O':
5470 if (MEM_VOLATILE_P (x))
5471 fputs(".acq", file);
5472 return;
5473
5474 case 'P':
5475 {
5476 HOST_WIDE_INT value;
5477
5478 switch (GET_CODE (XEXP (x, 0)))
5479 {
5480 default:
5481 return;
5482
5483 case POST_MODIFY:
5484 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5485 if (GET_CODE (x) == CONST_INT)
5486 value = INTVAL (x);
5487 else
5488 {
5489 gcc_assert (GET_CODE (x) == REG);
5490 fprintf (file, ", %s", reg_names[REGNO (x)]);
5491 return;
5492 }
5493 break;
5494
5495 case POST_INC:
5496 value = GET_MODE_SIZE (GET_MODE (x));
5497 break;
5498
5499 case POST_DEC:
5500 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5501 break;
5502 }
5503
5504 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5505 return;
5506 }
5507
5508 case 'Q':
5509 if (MEM_VOLATILE_P (x))
5510 fputs(".rel", file);
5511 return;
5512
5513 case 'R':
5514 if (x == CONST0_RTX (GET_MODE (x)))
5515 fputs(".s", file);
5516 else if (x == CONST1_RTX (GET_MODE (x)))
5517 fputs(".d", file);
5518 else if (x == CONST2_RTX (GET_MODE (x)))
5519 ;
5520 else
5521 output_operand_lossage ("invalid %%R value");
5522 return;
5523
5524 case 'S':
5525 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5526 return;
5527
5528 case 'T':
5529 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5530 {
5531 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5532 return;
5533 }
5534 break;
5535
5536 case 'U':
5537 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5538 {
5539 const char *prefix = "0x";
5540 if (INTVAL (x) & 0x80000000)
5541 {
5542 fprintf (file, "0xffffffff");
5543 prefix = "";
5544 }
5545 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5546 return;
5547 }
5548 break;
5549
5550 case 'X':
5551 {
5552 unsigned int regno = REGNO (x);
5553 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5554 }
5555 return;
5556
5557 case 'r':
5558 /* If this operand is the constant zero, write it as register zero.
5559 Any register, zero, or CONST_INT value is OK here. */
5560 if (GET_CODE (x) == REG)
5561 fputs (reg_names[REGNO (x)], file);
5562 else if (x == CONST0_RTX (GET_MODE (x)))
5563 fputs ("r0", file);
5564 else if (GET_CODE (x) == CONST_INT)
5565 output_addr_const (file, x);
5566 else
5567 output_operand_lossage ("invalid %%r value");
5568 return;
5569
5570 case 'v':
5571 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5572 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5573 break;
5574
5575 case '+':
5576 {
5577 const char *which;
5578
5579 /* For conditional branches, returns or calls, substitute
5580 sptk, dptk, dpnt, or spnt for %s. */
5581 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5582 if (x)
5583 {
5584 int pred_val = profile_probability::from_reg_br_prob_note
5585 (XINT (x, 0)).to_reg_br_prob_base ();
5586
5587 /* Guess top and bottom 10% statically predicted. */
5588 if (pred_val < REG_BR_PROB_BASE / 50
5589 && br_prob_note_reliable_p (x))
5590 which = ".spnt";
5591 else if (pred_val < REG_BR_PROB_BASE / 2)
5592 which = ".dpnt";
5593 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5594 || !br_prob_note_reliable_p (x))
5595 which = ".dptk";
5596 else
5597 which = ".sptk";
5598 }
5599 else if (CALL_P (current_output_insn))
5600 which = ".sptk";
5601 else
5602 which = ".dptk";
5603
5604 fputs (which, file);
5605 return;
5606 }
5607
5608 case ',':
5609 x = current_insn_predicate;
5610 if (x)
5611 {
5612 unsigned int regno = REGNO (XEXP (x, 0));
5613 if (GET_CODE (x) == EQ)
5614 regno += 1;
5615 fprintf (file, "(%s) ", reg_names [regno]);
5616 }
5617 return;
5618
5619 default:
5620 output_operand_lossage ("ia64_print_operand: unknown code");
5621 return;
5622 }
5623
5624 switch (GET_CODE (x))
5625 {
5626 /* This happens for the spill/restore instructions. */
5627 case POST_INC:
5628 case POST_DEC:
5629 case POST_MODIFY:
5630 x = XEXP (x, 0);
5631 /* fall through */
5632
5633 case REG:
5634 fputs (reg_names [REGNO (x)], file);
5635 break;
5636
5637 case MEM:
5638 {
5639 rtx addr = XEXP (x, 0);
5640 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5641 addr = XEXP (addr, 0);
5642 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5643 break;
5644 }
5645
5646 default:
5647 output_addr_const (file, x);
5648 break;
5649 }
5650
5651 return;
5652 }
5653
5654 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5655
5656 static bool
5657 ia64_print_operand_punct_valid_p (unsigned char code)
5658 {
5659 return (code == '+' || code == ',');
5660 }
5661 \f
5662 /* Compute a (partial) cost for rtx X. Return true if the complete
5663 cost has been computed, and false if subexpressions should be
5664 scanned. In either case, *TOTAL contains the cost result. */
5665 /* ??? This is incomplete. */
5666
5667 static bool
5668 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5669 int opno ATTRIBUTE_UNUSED,
5670 int *total, bool speed ATTRIBUTE_UNUSED)
5671 {
5672 int code = GET_CODE (x);
5673
5674 switch (code)
5675 {
5676 case CONST_INT:
5677 switch (outer_code)
5678 {
5679 case SET:
5680 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5681 return true;
5682 case PLUS:
5683 if (satisfies_constraint_I (x))
5684 *total = 0;
5685 else if (satisfies_constraint_J (x))
5686 *total = 1;
5687 else
5688 *total = COSTS_N_INSNS (1);
5689 return true;
5690 default:
5691 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5692 *total = 0;
5693 else
5694 *total = COSTS_N_INSNS (1);
5695 return true;
5696 }
5697
5698 case CONST_DOUBLE:
5699 *total = COSTS_N_INSNS (1);
5700 return true;
5701
5702 case CONST:
5703 case SYMBOL_REF:
5704 case LABEL_REF:
5705 *total = COSTS_N_INSNS (3);
5706 return true;
5707
5708 case FMA:
5709 *total = COSTS_N_INSNS (4);
5710 return true;
5711
5712 case MULT:
5713 /* For multiplies wider than HImode, we have to go to the FPU,
5714 which normally involves copies. Plus there's the latency
5715 of the multiply itself, and the latency of the instructions to
5716 transfer integer regs to FP regs. */
5717 if (FLOAT_MODE_P (mode))
5718 *total = COSTS_N_INSNS (4);
5719 else if (GET_MODE_SIZE (mode) > 2)
5720 *total = COSTS_N_INSNS (10);
5721 else
5722 *total = COSTS_N_INSNS (2);
5723 return true;
5724
5725 case PLUS:
5726 case MINUS:
5727 if (FLOAT_MODE_P (mode))
5728 {
5729 *total = COSTS_N_INSNS (4);
5730 return true;
5731 }
5732 /* FALLTHRU */
5733
5734 case ASHIFT:
5735 case ASHIFTRT:
5736 case LSHIFTRT:
5737 *total = COSTS_N_INSNS (1);
5738 return true;
5739
5740 case DIV:
5741 case UDIV:
5742 case MOD:
5743 case UMOD:
5744 /* We make divide expensive, so that divide-by-constant will be
5745 optimized to a multiply. */
5746 *total = COSTS_N_INSNS (60);
5747 return true;
5748
5749 default:
5750 return false;
5751 }
5752 }
5753
5754 /* Calculate the cost of moving data from a register in class FROM to
5755 one in class TO, using MODE. */
5756
5757 static int
5758 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5759 reg_class_t to)
5760 {
5761 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5762 if (to == ADDL_REGS)
5763 to = GR_REGS;
5764 if (from == ADDL_REGS)
5765 from = GR_REGS;
5766
5767 /* All costs are symmetric, so reduce cases by putting the
5768 lower number class as the destination. */
5769 if (from < to)
5770 {
5771 reg_class_t tmp = to;
5772 to = from, from = tmp;
5773 }
5774
5775 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5776 so that we get secondary memory reloads. Between FR_REGS,
5777 we have to make this at least as expensive as memory_move_cost
5778 to avoid spectacularly poor register class preferencing. */
5779 if (mode == XFmode || mode == RFmode)
5780 {
5781 if (to != GR_REGS || from != GR_REGS)
5782 return memory_move_cost (mode, to, false);
5783 else
5784 return 3;
5785 }
5786
5787 switch (to)
5788 {
5789 case PR_REGS:
5790 /* Moving between PR registers takes two insns. */
5791 if (from == PR_REGS)
5792 return 3;
5793 /* Moving between PR and anything but GR is impossible. */
5794 if (from != GR_REGS)
5795 return memory_move_cost (mode, to, false);
5796 break;
5797
5798 case BR_REGS:
5799 /* Moving between BR and anything but GR is impossible. */
5800 if (from != GR_REGS && from != GR_AND_BR_REGS)
5801 return memory_move_cost (mode, to, false);
5802 break;
5803
5804 case AR_I_REGS:
5805 case AR_M_REGS:
5806 /* Moving between AR and anything but GR is impossible. */
5807 if (from != GR_REGS)
5808 return memory_move_cost (mode, to, false);
5809 break;
5810
5811 case GR_REGS:
5812 case FR_REGS:
5813 case FP_REGS:
5814 case GR_AND_FR_REGS:
5815 case GR_AND_BR_REGS:
5816 case ALL_REGS:
5817 break;
5818
5819 default:
5820 gcc_unreachable ();
5821 }
5822
5823 return 2;
5824 }
5825
5826 /* Calculate the cost of moving data of MODE from a register to or from
5827 memory. */
5828
5829 static int
5830 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5831 reg_class_t rclass,
5832 bool in ATTRIBUTE_UNUSED)
5833 {
5834 if (rclass == GENERAL_REGS
5835 || rclass == FR_REGS
5836 || rclass == FP_REGS
5837 || rclass == GR_AND_FR_REGS)
5838 return 4;
5839 else
5840 return 10;
5841 }
5842
5843 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5844 on RCLASS to use when copying X into that class. */
5845
5846 static reg_class_t
5847 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5848 {
5849 switch (rclass)
5850 {
5851 case FR_REGS:
5852 case FP_REGS:
5853 /* Don't allow volatile mem reloads into floating point registers.
5854 This is defined to force reload to choose the r/m case instead
5855 of the f/f case when reloading (set (reg fX) (mem/v)). */
5856 if (MEM_P (x) && MEM_VOLATILE_P (x))
5857 return NO_REGS;
5858
5859 /* Force all unrecognized constants into the constant pool. */
5860 if (CONSTANT_P (x))
5861 return NO_REGS;
5862 break;
5863
5864 case AR_M_REGS:
5865 case AR_I_REGS:
5866 if (!OBJECT_P (x))
5867 return NO_REGS;
5868 break;
5869
5870 default:
5871 break;
5872 }
5873
5874 return rclass;
5875 }
5876
5877 /* This function returns the register class required for a secondary
5878 register when copying between one of the registers in RCLASS, and X,
5879 using MODE. A return value of NO_REGS means that no secondary register
5880 is required. */
5881
5882 enum reg_class
5883 ia64_secondary_reload_class (enum reg_class rclass,
5884 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5885 {
5886 int regno = -1;
5887
5888 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5889 regno = true_regnum (x);
5890
5891 switch (rclass)
5892 {
5893 case BR_REGS:
5894 case AR_M_REGS:
5895 case AR_I_REGS:
5896 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5897 interaction. We end up with two pseudos with overlapping lifetimes
5898 both of which are equiv to the same constant, and both which need
5899 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5900 changes depending on the path length, which means the qty_first_reg
5901 check in make_regs_eqv can give different answers at different times.
5902 At some point I'll probably need a reload_indi pattern to handle
5903 this.
5904
5905 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5906 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5907 non-general registers for good measure. */
5908 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5909 return GR_REGS;
5910
5911 /* This is needed if a pseudo used as a call_operand gets spilled to a
5912 stack slot. */
5913 if (GET_CODE (x) == MEM)
5914 return GR_REGS;
5915 break;
5916
5917 case FR_REGS:
5918 case FP_REGS:
5919 /* Need to go through general registers to get to other class regs. */
5920 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5921 return GR_REGS;
5922
5923 /* This can happen when a paradoxical subreg is an operand to the
5924 muldi3 pattern. */
5925 /* ??? This shouldn't be necessary after instruction scheduling is
5926 enabled, because paradoxical subregs are not accepted by
5927 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5928 stop the paradoxical subreg stupidity in the *_operand functions
5929 in recog.c. */
5930 if (GET_CODE (x) == MEM
5931 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5932 || GET_MODE (x) == QImode))
5933 return GR_REGS;
5934
5935 /* This can happen because of the ior/and/etc patterns that accept FP
5936 registers as operands. If the third operand is a constant, then it
5937 needs to be reloaded into a FP register. */
5938 if (GET_CODE (x) == CONST_INT)
5939 return GR_REGS;
5940
5941 /* This can happen because of register elimination in a muldi3 insn.
5942 E.g. `26107 * (unsigned long)&u'. */
5943 if (GET_CODE (x) == PLUS)
5944 return GR_REGS;
5945 break;
5946
5947 case PR_REGS:
5948 /* ??? This happens if we cse/gcse a BImode value across a call,
5949 and the function has a nonlocal goto. This is because global
5950 does not allocate call crossing pseudos to hard registers when
5951 crtl->has_nonlocal_goto is true. This is relatively
5952 common for C++ programs that use exceptions. To reproduce,
5953 return NO_REGS and compile libstdc++. */
5954 if (GET_CODE (x) == MEM)
5955 return GR_REGS;
5956
5957 /* This can happen when we take a BImode subreg of a DImode value,
5958 and that DImode value winds up in some non-GR register. */
5959 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5960 return GR_REGS;
5961 break;
5962
5963 default:
5964 break;
5965 }
5966
5967 return NO_REGS;
5968 }
5969
5970 \f
5971 /* Implement targetm.unspec_may_trap_p hook. */
5972 static int
5973 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5974 {
5975 switch (XINT (x, 1))
5976 {
5977 case UNSPEC_LDA:
5978 case UNSPEC_LDS:
5979 case UNSPEC_LDSA:
5980 case UNSPEC_LDCCLR:
5981 case UNSPEC_CHKACLR:
5982 case UNSPEC_CHKS:
5983 /* These unspecs are just wrappers. */
5984 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5985 }
5986
5987 return default_unspec_may_trap_p (x, flags);
5988 }
5989
5990 \f
5991 /* Parse the -mfixed-range= option string. */
5992
5993 static void
5994 fix_range (const char *const_str)
5995 {
5996 int i, first, last;
5997 char *str, *dash, *comma;
5998
5999 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6000 REG2 are either register names or register numbers. The effect
6001 of this option is to mark the registers in the range from REG1 to
6002 REG2 as ``fixed'' so they won't be used by the compiler. This is
6003 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
6004
6005 i = strlen (const_str);
6006 str = (char *) alloca (i + 1);
6007 memcpy (str, const_str, i + 1);
6008
6009 while (1)
6010 {
6011 dash = strchr (str, '-');
6012 if (!dash)
6013 {
6014 warning (0, "value of -mfixed-range must have form REG1-REG2");
6015 return;
6016 }
6017 *dash = '\0';
6018
6019 comma = strchr (dash + 1, ',');
6020 if (comma)
6021 *comma = '\0';
6022
6023 first = decode_reg_name (str);
6024 if (first < 0)
6025 {
6026 warning (0, "unknown register name: %s", str);
6027 return;
6028 }
6029
6030 last = decode_reg_name (dash + 1);
6031 if (last < 0)
6032 {
6033 warning (0, "unknown register name: %s", dash + 1);
6034 return;
6035 }
6036
6037 *dash = '-';
6038
6039 if (first > last)
6040 {
6041 warning (0, "%s-%s is an empty range", str, dash + 1);
6042 return;
6043 }
6044
6045 for (i = first; i <= last; ++i)
6046 fixed_regs[i] = call_used_regs[i] = 1;
6047
6048 if (!comma)
6049 break;
6050
6051 *comma = ',';
6052 str = comma + 1;
6053 }
6054 }
6055
6056 /* Implement TARGET_OPTION_OVERRIDE. */
6057
6058 static void
6059 ia64_option_override (void)
6060 {
6061 unsigned int i;
6062 cl_deferred_option *opt;
6063 vec<cl_deferred_option> *v
6064 = (vec<cl_deferred_option> *) ia64_deferred_options;
6065
6066 if (v)
6067 FOR_EACH_VEC_ELT (*v, i, opt)
6068 {
6069 switch (opt->opt_index)
6070 {
6071 case OPT_mfixed_range_:
6072 fix_range (opt->arg);
6073 break;
6074
6075 default:
6076 gcc_unreachable ();
6077 }
6078 }
6079
6080 if (TARGET_AUTO_PIC)
6081 target_flags |= MASK_CONST_GP;
6082
6083 /* Numerous experiment shows that IRA based loop pressure
6084 calculation works better for RTL loop invariant motion on targets
6085 with enough (>= 32) registers. It is an expensive optimization.
6086 So it is on only for peak performance. */
6087 if (optimize >= 3)
6088 flag_ira_loop_pressure = 1;
6089
6090
6091 ia64_section_threshold = (global_options_set.x_g_switch_value
6092 ? g_switch_value
6093 : IA64_DEFAULT_GVALUE);
6094
6095 init_machine_status = ia64_init_machine_status;
6096
6097 if (align_functions <= 0)
6098 align_functions = 64;
6099 if (align_loops <= 0)
6100 align_loops = 32;
6101 if (TARGET_ABI_OPEN_VMS)
6102 flag_no_common = 1;
6103
6104 ia64_override_options_after_change();
6105 }
6106
6107 /* Implement targetm.override_options_after_change. */
6108
6109 static void
6110 ia64_override_options_after_change (void)
6111 {
6112 if (optimize >= 3
6113 && !global_options_set.x_flag_selective_scheduling
6114 && !global_options_set.x_flag_selective_scheduling2)
6115 {
6116 flag_selective_scheduling2 = 1;
6117 flag_sel_sched_pipelining = 1;
6118 }
6119 if (mflag_sched_control_spec == 2)
6120 {
6121 /* Control speculation is on by default for the selective scheduler,
6122 but not for the Haifa scheduler. */
6123 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6124 }
6125 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6126 {
6127 /* FIXME: remove this when we'd implement breaking autoinsns as
6128 a transformation. */
6129 flag_auto_inc_dec = 0;
6130 }
6131 }
6132
6133 /* Initialize the record of emitted frame related registers. */
6134
6135 void ia64_init_expanders (void)
6136 {
6137 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6138 }
6139
6140 static struct machine_function *
6141 ia64_init_machine_status (void)
6142 {
6143 return ggc_cleared_alloc<machine_function> ();
6144 }
6145 \f
6146 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6147 static enum attr_type ia64_safe_type (rtx_insn *);
6148
6149 static enum attr_itanium_class
6150 ia64_safe_itanium_class (rtx_insn *insn)
6151 {
6152 if (recog_memoized (insn) >= 0)
6153 return get_attr_itanium_class (insn);
6154 else if (DEBUG_INSN_P (insn))
6155 return ITANIUM_CLASS_IGNORE;
6156 else
6157 return ITANIUM_CLASS_UNKNOWN;
6158 }
6159
6160 static enum attr_type
6161 ia64_safe_type (rtx_insn *insn)
6162 {
6163 if (recog_memoized (insn) >= 0)
6164 return get_attr_type (insn);
6165 else
6166 return TYPE_UNKNOWN;
6167 }
6168 \f
6169 /* The following collection of routines emit instruction group stop bits as
6170 necessary to avoid dependencies. */
6171
6172 /* Need to track some additional registers as far as serialization is
6173 concerned so we can properly handle br.call and br.ret. We could
6174 make these registers visible to gcc, but since these registers are
6175 never explicitly used in gcc generated code, it seems wasteful to
6176 do so (plus it would make the call and return patterns needlessly
6177 complex). */
6178 #define REG_RP (BR_REG (0))
6179 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6180 /* This is used for volatile asms which may require a stop bit immediately
6181 before and after them. */
6182 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6183 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6184 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6185
6186 /* For each register, we keep track of how it has been written in the
6187 current instruction group.
6188
6189 If a register is written unconditionally (no qualifying predicate),
6190 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6191
6192 If a register is written if its qualifying predicate P is true, we
6193 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6194 may be written again by the complement of P (P^1) and when this happens,
6195 WRITE_COUNT gets set to 2.
6196
6197 The result of this is that whenever an insn attempts to write a register
6198 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6199
6200 If a predicate register is written by a floating-point insn, we set
6201 WRITTEN_BY_FP to true.
6202
6203 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6204 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6205
6206 #if GCC_VERSION >= 4000
6207 #define RWS_FIELD_TYPE __extension__ unsigned short
6208 #else
6209 #define RWS_FIELD_TYPE unsigned int
6210 #endif
6211 struct reg_write_state
6212 {
6213 RWS_FIELD_TYPE write_count : 2;
6214 RWS_FIELD_TYPE first_pred : 10;
6215 RWS_FIELD_TYPE written_by_fp : 1;
6216 RWS_FIELD_TYPE written_by_and : 1;
6217 RWS_FIELD_TYPE written_by_or : 1;
6218 };
6219
6220 /* Cumulative info for the current instruction group. */
6221 struct reg_write_state rws_sum[NUM_REGS];
6222 #if CHECKING_P
6223 /* Bitmap whether a register has been written in the current insn. */
6224 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6225 / HOST_BITS_PER_WIDEST_FAST_INT];
6226
6227 static inline void
6228 rws_insn_set (int regno)
6229 {
6230 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6231 SET_HARD_REG_BIT (rws_insn, regno);
6232 }
6233
6234 static inline int
6235 rws_insn_test (int regno)
6236 {
6237 return TEST_HARD_REG_BIT (rws_insn, regno);
6238 }
6239 #else
6240 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6241 unsigned char rws_insn[2];
6242
6243 static inline void
6244 rws_insn_set (int regno)
6245 {
6246 if (regno == REG_AR_CFM)
6247 rws_insn[0] = 1;
6248 else if (regno == REG_VOLATILE)
6249 rws_insn[1] = 1;
6250 }
6251
6252 static inline int
6253 rws_insn_test (int regno)
6254 {
6255 if (regno == REG_AR_CFM)
6256 return rws_insn[0];
6257 if (regno == REG_VOLATILE)
6258 return rws_insn[1];
6259 return 0;
6260 }
6261 #endif
6262
6263 /* Indicates whether this is the first instruction after a stop bit,
6264 in which case we don't need another stop bit. Without this,
6265 ia64_variable_issue will die when scheduling an alloc. */
6266 static int first_instruction;
6267
6268 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6269 RTL for one instruction. */
6270 struct reg_flags
6271 {
6272 unsigned int is_write : 1; /* Is register being written? */
6273 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6274 unsigned int is_branch : 1; /* Is register used as part of a branch? */
6275 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6276 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
6277 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
6278 };
6279
6280 static void rws_update (int, struct reg_flags, int);
6281 static int rws_access_regno (int, struct reg_flags, int);
6282 static int rws_access_reg (rtx, struct reg_flags, int);
6283 static void update_set_flags (rtx, struct reg_flags *);
6284 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6285 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6286 static void init_insn_group_barriers (void);
6287 static int group_barrier_needed (rtx_insn *);
6288 static int safe_group_barrier_needed (rtx_insn *);
6289 static int in_safe_group_barrier;
6290
6291 /* Update *RWS for REGNO, which is being written by the current instruction,
6292 with predicate PRED, and associated register flags in FLAGS. */
6293
6294 static void
6295 rws_update (int regno, struct reg_flags flags, int pred)
6296 {
6297 if (pred)
6298 rws_sum[regno].write_count++;
6299 else
6300 rws_sum[regno].write_count = 2;
6301 rws_sum[regno].written_by_fp |= flags.is_fp;
6302 /* ??? Not tracking and/or across differing predicates. */
6303 rws_sum[regno].written_by_and = flags.is_and;
6304 rws_sum[regno].written_by_or = flags.is_or;
6305 rws_sum[regno].first_pred = pred;
6306 }
6307
6308 /* Handle an access to register REGNO of type FLAGS using predicate register
6309 PRED. Update rws_sum array. Return 1 if this access creates
6310 a dependency with an earlier instruction in the same group. */
6311
6312 static int
6313 rws_access_regno (int regno, struct reg_flags flags, int pred)
6314 {
6315 int need_barrier = 0;
6316
6317 gcc_assert (regno < NUM_REGS);
6318
6319 if (! PR_REGNO_P (regno))
6320 flags.is_and = flags.is_or = 0;
6321
6322 if (flags.is_write)
6323 {
6324 int write_count;
6325
6326 rws_insn_set (regno);
6327 write_count = rws_sum[regno].write_count;
6328
6329 switch (write_count)
6330 {
6331 case 0:
6332 /* The register has not been written yet. */
6333 if (!in_safe_group_barrier)
6334 rws_update (regno, flags, pred);
6335 break;
6336
6337 case 1:
6338 /* The register has been written via a predicate. Treat
6339 it like a unconditional write and do not try to check
6340 for complementary pred reg in earlier write. */
6341 if (flags.is_and && rws_sum[regno].written_by_and)
6342 ;
6343 else if (flags.is_or && rws_sum[regno].written_by_or)
6344 ;
6345 else
6346 need_barrier = 1;
6347 if (!in_safe_group_barrier)
6348 rws_update (regno, flags, pred);
6349 break;
6350
6351 case 2:
6352 /* The register has been unconditionally written already. We
6353 need a barrier. */
6354 if (flags.is_and && rws_sum[regno].written_by_and)
6355 ;
6356 else if (flags.is_or && rws_sum[regno].written_by_or)
6357 ;
6358 else
6359 need_barrier = 1;
6360 if (!in_safe_group_barrier)
6361 {
6362 rws_sum[regno].written_by_and = flags.is_and;
6363 rws_sum[regno].written_by_or = flags.is_or;
6364 }
6365 break;
6366
6367 default:
6368 gcc_unreachable ();
6369 }
6370 }
6371 else
6372 {
6373 if (flags.is_branch)
6374 {
6375 /* Branches have several RAW exceptions that allow to avoid
6376 barriers. */
6377
6378 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6379 /* RAW dependencies on branch regs are permissible as long
6380 as the writer is a non-branch instruction. Since we
6381 never generate code that uses a branch register written
6382 by a branch instruction, handling this case is
6383 easy. */
6384 return 0;
6385
6386 if (REGNO_REG_CLASS (regno) == PR_REGS
6387 && ! rws_sum[regno].written_by_fp)
6388 /* The predicates of a branch are available within the
6389 same insn group as long as the predicate was written by
6390 something other than a floating-point instruction. */
6391 return 0;
6392 }
6393
6394 if (flags.is_and && rws_sum[regno].written_by_and)
6395 return 0;
6396 if (flags.is_or && rws_sum[regno].written_by_or)
6397 return 0;
6398
6399 switch (rws_sum[regno].write_count)
6400 {
6401 case 0:
6402 /* The register has not been written yet. */
6403 break;
6404
6405 case 1:
6406 /* The register has been written via a predicate, assume we
6407 need a barrier (don't check for complementary regs). */
6408 need_barrier = 1;
6409 break;
6410
6411 case 2:
6412 /* The register has been unconditionally written already. We
6413 need a barrier. */
6414 need_barrier = 1;
6415 break;
6416
6417 default:
6418 gcc_unreachable ();
6419 }
6420 }
6421
6422 return need_barrier;
6423 }
6424
6425 static int
6426 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6427 {
6428 int regno = REGNO (reg);
6429 int n = REG_NREGS (reg);
6430
6431 if (n == 1)
6432 return rws_access_regno (regno, flags, pred);
6433 else
6434 {
6435 int need_barrier = 0;
6436 while (--n >= 0)
6437 need_barrier |= rws_access_regno (regno + n, flags, pred);
6438 return need_barrier;
6439 }
6440 }
6441
6442 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6443 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6444
6445 static void
6446 update_set_flags (rtx x, struct reg_flags *pflags)
6447 {
6448 rtx src = SET_SRC (x);
6449
6450 switch (GET_CODE (src))
6451 {
6452 case CALL:
6453 return;
6454
6455 case IF_THEN_ELSE:
6456 /* There are four cases here:
6457 (1) The destination is (pc), in which case this is a branch,
6458 nothing here applies.
6459 (2) The destination is ar.lc, in which case this is a
6460 doloop_end_internal,
6461 (3) The destination is an fp register, in which case this is
6462 an fselect instruction.
6463 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6464 this is a check load.
6465 In all cases, nothing we do in this function applies. */
6466 return;
6467
6468 default:
6469 if (COMPARISON_P (src)
6470 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6471 /* Set pflags->is_fp to 1 so that we know we're dealing
6472 with a floating point comparison when processing the
6473 destination of the SET. */
6474 pflags->is_fp = 1;
6475
6476 /* Discover if this is a parallel comparison. We only handle
6477 and.orcm and or.andcm at present, since we must retain a
6478 strict inverse on the predicate pair. */
6479 else if (GET_CODE (src) == AND)
6480 pflags->is_and = 1;
6481 else if (GET_CODE (src) == IOR)
6482 pflags->is_or = 1;
6483
6484 break;
6485 }
6486 }
6487
6488 /* Subroutine of rtx_needs_barrier; this function determines whether the
6489 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6490 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6491 for this insn. */
6492
6493 static int
6494 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6495 {
6496 int need_barrier = 0;
6497 rtx dst;
6498 rtx src = SET_SRC (x);
6499
6500 if (GET_CODE (src) == CALL)
6501 /* We don't need to worry about the result registers that
6502 get written by subroutine call. */
6503 return rtx_needs_barrier (src, flags, pred);
6504 else if (SET_DEST (x) == pc_rtx)
6505 {
6506 /* X is a conditional branch. */
6507 /* ??? This seems redundant, as the caller sets this bit for
6508 all JUMP_INSNs. */
6509 if (!ia64_spec_check_src_p (src))
6510 flags.is_branch = 1;
6511 return rtx_needs_barrier (src, flags, pred);
6512 }
6513
6514 if (ia64_spec_check_src_p (src))
6515 /* Avoid checking one register twice (in condition
6516 and in 'then' section) for ldc pattern. */
6517 {
6518 gcc_assert (REG_P (XEXP (src, 2)));
6519 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6520
6521 /* We process MEM below. */
6522 src = XEXP (src, 1);
6523 }
6524
6525 need_barrier |= rtx_needs_barrier (src, flags, pred);
6526
6527 dst = SET_DEST (x);
6528 if (GET_CODE (dst) == ZERO_EXTRACT)
6529 {
6530 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6531 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6532 }
6533 return need_barrier;
6534 }
6535
6536 /* Handle an access to rtx X of type FLAGS using predicate register
6537 PRED. Return 1 if this access creates a dependency with an earlier
6538 instruction in the same group. */
6539
6540 static int
6541 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6542 {
6543 int i, j;
6544 int is_complemented = 0;
6545 int need_barrier = 0;
6546 const char *format_ptr;
6547 struct reg_flags new_flags;
6548 rtx cond;
6549
6550 if (! x)
6551 return 0;
6552
6553 new_flags = flags;
6554
6555 switch (GET_CODE (x))
6556 {
6557 case SET:
6558 update_set_flags (x, &new_flags);
6559 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6560 if (GET_CODE (SET_SRC (x)) != CALL)
6561 {
6562 new_flags.is_write = 1;
6563 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6564 }
6565 break;
6566
6567 case CALL:
6568 new_flags.is_write = 0;
6569 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6570
6571 /* Avoid multiple register writes, in case this is a pattern with
6572 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6573 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6574 {
6575 new_flags.is_write = 1;
6576 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6577 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6578 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6579 }
6580 break;
6581
6582 case COND_EXEC:
6583 /* X is a predicated instruction. */
6584
6585 cond = COND_EXEC_TEST (x);
6586 gcc_assert (!pred);
6587 need_barrier = rtx_needs_barrier (cond, flags, 0);
6588
6589 if (GET_CODE (cond) == EQ)
6590 is_complemented = 1;
6591 cond = XEXP (cond, 0);
6592 gcc_assert (GET_CODE (cond) == REG
6593 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6594 pred = REGNO (cond);
6595 if (is_complemented)
6596 ++pred;
6597
6598 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6599 return need_barrier;
6600
6601 case CLOBBER:
6602 case USE:
6603 /* Clobber & use are for earlier compiler-phases only. */
6604 break;
6605
6606 case ASM_OPERANDS:
6607 case ASM_INPUT:
6608 /* We always emit stop bits for traditional asms. We emit stop bits
6609 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6610 if (GET_CODE (x) != ASM_OPERANDS
6611 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6612 {
6613 /* Avoid writing the register multiple times if we have multiple
6614 asm outputs. This avoids a failure in rws_access_reg. */
6615 if (! rws_insn_test (REG_VOLATILE))
6616 {
6617 new_flags.is_write = 1;
6618 rws_access_regno (REG_VOLATILE, new_flags, pred);
6619 }
6620 return 1;
6621 }
6622
6623 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6624 We cannot just fall through here since then we would be confused
6625 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6626 traditional asms unlike their normal usage. */
6627
6628 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6629 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6630 need_barrier = 1;
6631 break;
6632
6633 case PARALLEL:
6634 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6635 {
6636 rtx pat = XVECEXP (x, 0, i);
6637 switch (GET_CODE (pat))
6638 {
6639 case SET:
6640 update_set_flags (pat, &new_flags);
6641 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6642 break;
6643
6644 case USE:
6645 case CALL:
6646 case ASM_OPERANDS:
6647 case ASM_INPUT:
6648 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6649 break;
6650
6651 case CLOBBER:
6652 if (REG_P (XEXP (pat, 0))
6653 && extract_asm_operands (x) != NULL_RTX
6654 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6655 {
6656 new_flags.is_write = 1;
6657 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6658 new_flags, pred);
6659 new_flags = flags;
6660 }
6661 break;
6662
6663 case RETURN:
6664 break;
6665
6666 default:
6667 gcc_unreachable ();
6668 }
6669 }
6670 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6671 {
6672 rtx pat = XVECEXP (x, 0, i);
6673 if (GET_CODE (pat) == SET)
6674 {
6675 if (GET_CODE (SET_SRC (pat)) != CALL)
6676 {
6677 new_flags.is_write = 1;
6678 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6679 pred);
6680 }
6681 }
6682 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6683 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6684 }
6685 break;
6686
6687 case SUBREG:
6688 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6689 break;
6690 case REG:
6691 if (REGNO (x) == AR_UNAT_REGNUM)
6692 {
6693 for (i = 0; i < 64; ++i)
6694 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6695 }
6696 else
6697 need_barrier = rws_access_reg (x, flags, pred);
6698 break;
6699
6700 case MEM:
6701 /* Find the regs used in memory address computation. */
6702 new_flags.is_write = 0;
6703 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6704 break;
6705
6706 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6707 case SYMBOL_REF: case LABEL_REF: case CONST:
6708 break;
6709
6710 /* Operators with side-effects. */
6711 case POST_INC: case POST_DEC:
6712 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6713
6714 new_flags.is_write = 0;
6715 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6716 new_flags.is_write = 1;
6717 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6718 break;
6719
6720 case POST_MODIFY:
6721 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6722
6723 new_flags.is_write = 0;
6724 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6725 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6726 new_flags.is_write = 1;
6727 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6728 break;
6729
6730 /* Handle common unary and binary ops for efficiency. */
6731 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6732 case MOD: case UDIV: case UMOD: case AND: case IOR:
6733 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6734 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6735 case NE: case EQ: case GE: case GT: case LE:
6736 case LT: case GEU: case GTU: case LEU: case LTU:
6737 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6738 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6739 break;
6740
6741 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6742 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6743 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6744 case SQRT: case FFS: case POPCOUNT:
6745 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6746 break;
6747
6748 case VEC_SELECT:
6749 /* VEC_SELECT's second argument is a PARALLEL with integers that
6750 describe the elements selected. On ia64, those integers are
6751 always constants. Avoid walking the PARALLEL so that we don't
6752 get confused with "normal" parallels and then die. */
6753 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6754 break;
6755
6756 case UNSPEC:
6757 switch (XINT (x, 1))
6758 {
6759 case UNSPEC_LTOFF_DTPMOD:
6760 case UNSPEC_LTOFF_DTPREL:
6761 case UNSPEC_DTPREL:
6762 case UNSPEC_LTOFF_TPREL:
6763 case UNSPEC_TPREL:
6764 case UNSPEC_PRED_REL_MUTEX:
6765 case UNSPEC_PIC_CALL:
6766 case UNSPEC_MF:
6767 case UNSPEC_FETCHADD_ACQ:
6768 case UNSPEC_FETCHADD_REL:
6769 case UNSPEC_BSP_VALUE:
6770 case UNSPEC_FLUSHRS:
6771 case UNSPEC_BUNDLE_SELECTOR:
6772 break;
6773
6774 case UNSPEC_GR_SPILL:
6775 case UNSPEC_GR_RESTORE:
6776 {
6777 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6778 HOST_WIDE_INT bit = (offset >> 3) & 63;
6779
6780 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6781 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6782 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6783 new_flags, pred);
6784 break;
6785 }
6786
6787 case UNSPEC_FR_SPILL:
6788 case UNSPEC_FR_RESTORE:
6789 case UNSPEC_GETF_EXP:
6790 case UNSPEC_SETF_EXP:
6791 case UNSPEC_ADDP4:
6792 case UNSPEC_FR_SQRT_RECIP_APPROX:
6793 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6794 case UNSPEC_LDA:
6795 case UNSPEC_LDS:
6796 case UNSPEC_LDS_A:
6797 case UNSPEC_LDSA:
6798 case UNSPEC_CHKACLR:
6799 case UNSPEC_CHKS:
6800 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6801 break;
6802
6803 case UNSPEC_FR_RECIP_APPROX:
6804 case UNSPEC_SHRP:
6805 case UNSPEC_COPYSIGN:
6806 case UNSPEC_FR_RECIP_APPROX_RES:
6807 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6808 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6809 break;
6810
6811 case UNSPEC_CMPXCHG_ACQ:
6812 case UNSPEC_CMPXCHG_REL:
6813 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6814 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6815 break;
6816
6817 default:
6818 gcc_unreachable ();
6819 }
6820 break;
6821
6822 case UNSPEC_VOLATILE:
6823 switch (XINT (x, 1))
6824 {
6825 case UNSPECV_ALLOC:
6826 /* Alloc must always be the first instruction of a group.
6827 We force this by always returning true. */
6828 /* ??? We might get better scheduling if we explicitly check for
6829 input/local/output register dependencies, and modify the
6830 scheduler so that alloc is always reordered to the start of
6831 the current group. We could then eliminate all of the
6832 first_instruction code. */
6833 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6834
6835 new_flags.is_write = 1;
6836 rws_access_regno (REG_AR_CFM, new_flags, pred);
6837 return 1;
6838
6839 case UNSPECV_SET_BSP:
6840 case UNSPECV_PROBE_STACK_RANGE:
6841 need_barrier = 1;
6842 break;
6843
6844 case UNSPECV_BLOCKAGE:
6845 case UNSPECV_INSN_GROUP_BARRIER:
6846 case UNSPECV_BREAK:
6847 case UNSPECV_PSAC_ALL:
6848 case UNSPECV_PSAC_NORMAL:
6849 return 0;
6850
6851 case UNSPECV_PROBE_STACK_ADDRESS:
6852 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6853 break;
6854
6855 default:
6856 gcc_unreachable ();
6857 }
6858 break;
6859
6860 case RETURN:
6861 new_flags.is_write = 0;
6862 need_barrier = rws_access_regno (REG_RP, flags, pred);
6863 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6864
6865 new_flags.is_write = 1;
6866 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6867 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6868 break;
6869
6870 default:
6871 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6872 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6873 switch (format_ptr[i])
6874 {
6875 case '0': /* unused field */
6876 case 'i': /* integer */
6877 case 'n': /* note */
6878 case 'w': /* wide integer */
6879 case 's': /* pointer to string */
6880 case 'S': /* optional pointer to string */
6881 break;
6882
6883 case 'e':
6884 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6885 need_barrier = 1;
6886 break;
6887
6888 case 'E':
6889 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6890 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6891 need_barrier = 1;
6892 break;
6893
6894 default:
6895 gcc_unreachable ();
6896 }
6897 break;
6898 }
6899 return need_barrier;
6900 }
6901
6902 /* Clear out the state for group_barrier_needed at the start of a
6903 sequence of insns. */
6904
6905 static void
6906 init_insn_group_barriers (void)
6907 {
6908 memset (rws_sum, 0, sizeof (rws_sum));
6909 first_instruction = 1;
6910 }
6911
6912 /* Given the current state, determine whether a group barrier (a stop bit) is
6913 necessary before INSN. Return nonzero if so. This modifies the state to
6914 include the effects of INSN as a side-effect. */
6915
6916 static int
6917 group_barrier_needed (rtx_insn *insn)
6918 {
6919 rtx pat;
6920 int need_barrier = 0;
6921 struct reg_flags flags;
6922
6923 memset (&flags, 0, sizeof (flags));
6924 switch (GET_CODE (insn))
6925 {
6926 case NOTE:
6927 case DEBUG_INSN:
6928 break;
6929
6930 case BARRIER:
6931 /* A barrier doesn't imply an instruction group boundary. */
6932 break;
6933
6934 case CODE_LABEL:
6935 memset (rws_insn, 0, sizeof (rws_insn));
6936 return 1;
6937
6938 case CALL_INSN:
6939 flags.is_branch = 1;
6940 flags.is_sibcall = SIBLING_CALL_P (insn);
6941 memset (rws_insn, 0, sizeof (rws_insn));
6942
6943 /* Don't bundle a call following another call. */
6944 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6945 {
6946 need_barrier = 1;
6947 break;
6948 }
6949
6950 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6951 break;
6952
6953 case JUMP_INSN:
6954 if (!ia64_spec_check_p (insn))
6955 flags.is_branch = 1;
6956
6957 /* Don't bundle a jump following a call. */
6958 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6959 {
6960 need_barrier = 1;
6961 break;
6962 }
6963 /* FALLTHRU */
6964
6965 case INSN:
6966 if (GET_CODE (PATTERN (insn)) == USE
6967 || GET_CODE (PATTERN (insn)) == CLOBBER)
6968 /* Don't care about USE and CLOBBER "insns"---those are used to
6969 indicate to the optimizer that it shouldn't get rid of
6970 certain operations. */
6971 break;
6972
6973 pat = PATTERN (insn);
6974
6975 /* Ug. Hack hacks hacked elsewhere. */
6976 switch (recog_memoized (insn))
6977 {
6978 /* We play dependency tricks with the epilogue in order
6979 to get proper schedules. Undo this for dv analysis. */
6980 case CODE_FOR_epilogue_deallocate_stack:
6981 case CODE_FOR_prologue_allocate_stack:
6982 pat = XVECEXP (pat, 0, 0);
6983 break;
6984
6985 /* The pattern we use for br.cloop confuses the code above.
6986 The second element of the vector is representative. */
6987 case CODE_FOR_doloop_end_internal:
6988 pat = XVECEXP (pat, 0, 1);
6989 break;
6990
6991 /* Doesn't generate code. */
6992 case CODE_FOR_pred_rel_mutex:
6993 case CODE_FOR_prologue_use:
6994 return 0;
6995
6996 default:
6997 break;
6998 }
6999
7000 memset (rws_insn, 0, sizeof (rws_insn));
7001 need_barrier = rtx_needs_barrier (pat, flags, 0);
7002
7003 /* Check to see if the previous instruction was a volatile
7004 asm. */
7005 if (! need_barrier)
7006 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
7007
7008 break;
7009
7010 default:
7011 gcc_unreachable ();
7012 }
7013
7014 if (first_instruction && important_for_bundling_p (insn))
7015 {
7016 need_barrier = 0;
7017 first_instruction = 0;
7018 }
7019
7020 return need_barrier;
7021 }
7022
7023 /* Like group_barrier_needed, but do not clobber the current state. */
7024
7025 static int
7026 safe_group_barrier_needed (rtx_insn *insn)
7027 {
7028 int saved_first_instruction;
7029 int t;
7030
7031 saved_first_instruction = first_instruction;
7032 in_safe_group_barrier = 1;
7033
7034 t = group_barrier_needed (insn);
7035
7036 first_instruction = saved_first_instruction;
7037 in_safe_group_barrier = 0;
7038
7039 return t;
7040 }
7041
7042 /* Scan the current function and insert stop bits as necessary to
7043 eliminate dependencies. This function assumes that a final
7044 instruction scheduling pass has been run which has already
7045 inserted most of the necessary stop bits. This function only
7046 inserts new ones at basic block boundaries, since these are
7047 invisible to the scheduler. */
7048
7049 static void
7050 emit_insn_group_barriers (FILE *dump)
7051 {
7052 rtx_insn *insn;
7053 rtx_insn *last_label = 0;
7054 int insns_since_last_label = 0;
7055
7056 init_insn_group_barriers ();
7057
7058 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7059 {
7060 if (LABEL_P (insn))
7061 {
7062 if (insns_since_last_label)
7063 last_label = insn;
7064 insns_since_last_label = 0;
7065 }
7066 else if (NOTE_P (insn)
7067 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
7068 {
7069 if (insns_since_last_label)
7070 last_label = insn;
7071 insns_since_last_label = 0;
7072 }
7073 else if (NONJUMP_INSN_P (insn)
7074 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7075 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7076 {
7077 init_insn_group_barriers ();
7078 last_label = 0;
7079 }
7080 else if (NONDEBUG_INSN_P (insn))
7081 {
7082 insns_since_last_label = 1;
7083
7084 if (group_barrier_needed (insn))
7085 {
7086 if (last_label)
7087 {
7088 if (dump)
7089 fprintf (dump, "Emitting stop before label %d\n",
7090 INSN_UID (last_label));
7091 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7092 insn = last_label;
7093
7094 init_insn_group_barriers ();
7095 last_label = 0;
7096 }
7097 }
7098 }
7099 }
7100 }
7101
7102 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7103 This function has to emit all necessary group barriers. */
7104
7105 static void
7106 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7107 {
7108 rtx_insn *insn;
7109
7110 init_insn_group_barriers ();
7111
7112 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7113 {
7114 if (BARRIER_P (insn))
7115 {
7116 rtx_insn *last = prev_active_insn (insn);
7117
7118 if (! last)
7119 continue;
7120 if (JUMP_TABLE_DATA_P (last))
7121 last = prev_active_insn (last);
7122 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7123 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7124
7125 init_insn_group_barriers ();
7126 }
7127 else if (NONDEBUG_INSN_P (insn))
7128 {
7129 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7130 init_insn_group_barriers ();
7131 else if (group_barrier_needed (insn))
7132 {
7133 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7134 init_insn_group_barriers ();
7135 group_barrier_needed (insn);
7136 }
7137 }
7138 }
7139 }
7140
7141 \f
7142
7143 /* Instruction scheduling support. */
7144
7145 #define NR_BUNDLES 10
7146
7147 /* A list of names of all available bundles. */
7148
7149 static const char *bundle_name [NR_BUNDLES] =
7150 {
7151 ".mii",
7152 ".mmi",
7153 ".mfi",
7154 ".mmf",
7155 #if NR_BUNDLES == 10
7156 ".bbb",
7157 ".mbb",
7158 #endif
7159 ".mib",
7160 ".mmb",
7161 ".mfb",
7162 ".mlx"
7163 };
7164
7165 /* Nonzero if we should insert stop bits into the schedule. */
7166
7167 int ia64_final_schedule = 0;
7168
7169 /* Codes of the corresponding queried units: */
7170
7171 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7172 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7173
7174 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7175 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7176
7177 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7178
7179 /* The following variable value is an insn group barrier. */
7180
7181 static rtx_insn *dfa_stop_insn;
7182
7183 /* The following variable value is the last issued insn. */
7184
7185 static rtx_insn *last_scheduled_insn;
7186
7187 /* The following variable value is pointer to a DFA state used as
7188 temporary variable. */
7189
7190 static state_t temp_dfa_state = NULL;
7191
7192 /* The following variable value is DFA state after issuing the last
7193 insn. */
7194
7195 static state_t prev_cycle_state = NULL;
7196
7197 /* The following array element values are TRUE if the corresponding
7198 insn requires to add stop bits before it. */
7199
7200 static char *stops_p = NULL;
7201
7202 /* The following variable is used to set up the mentioned above array. */
7203
7204 static int stop_before_p = 0;
7205
7206 /* The following variable value is length of the arrays `clocks' and
7207 `add_cycles'. */
7208
7209 static int clocks_length;
7210
7211 /* The following variable value is number of data speculations in progress. */
7212 static int pending_data_specs = 0;
7213
7214 /* Number of memory references on current and three future processor cycles. */
7215 static char mem_ops_in_group[4];
7216
7217 /* Number of current processor cycle (from scheduler's point of view). */
7218 static int current_cycle;
7219
7220 static rtx ia64_single_set (rtx_insn *);
7221 static void ia64_emit_insn_before (rtx, rtx_insn *);
7222
7223 /* Map a bundle number to its pseudo-op. */
7224
7225 const char *
7226 get_bundle_name (int b)
7227 {
7228 return bundle_name[b];
7229 }
7230
7231
7232 /* Return the maximum number of instructions a cpu can issue. */
7233
7234 static int
7235 ia64_issue_rate (void)
7236 {
7237 return 6;
7238 }
7239
7240 /* Helper function - like single_set, but look inside COND_EXEC. */
7241
7242 static rtx
7243 ia64_single_set (rtx_insn *insn)
7244 {
7245 rtx x = PATTERN (insn), ret;
7246 if (GET_CODE (x) == COND_EXEC)
7247 x = COND_EXEC_CODE (x);
7248 if (GET_CODE (x) == SET)
7249 return x;
7250
7251 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7252 Although they are not classical single set, the second set is there just
7253 to protect it from moving past FP-relative stack accesses. */
7254 switch (recog_memoized (insn))
7255 {
7256 case CODE_FOR_prologue_allocate_stack:
7257 case CODE_FOR_prologue_allocate_stack_pr:
7258 case CODE_FOR_epilogue_deallocate_stack:
7259 case CODE_FOR_epilogue_deallocate_stack_pr:
7260 ret = XVECEXP (x, 0, 0);
7261 break;
7262
7263 default:
7264 ret = single_set_2 (insn, x);
7265 break;
7266 }
7267
7268 return ret;
7269 }
7270
7271 /* Adjust the cost of a scheduling dependency.
7272 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7273 COST is the current cost, DW is dependency weakness. */
7274 static int
7275 ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7276 int cost, dw_t dw)
7277 {
7278 enum reg_note dep_type = (enum reg_note) dep_type1;
7279 enum attr_itanium_class dep_class;
7280 enum attr_itanium_class insn_class;
7281
7282 insn_class = ia64_safe_itanium_class (insn);
7283 dep_class = ia64_safe_itanium_class (dep_insn);
7284
7285 /* Treat true memory dependencies separately. Ignore apparent true
7286 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7287 if (dep_type == REG_DEP_TRUE
7288 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7289 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7290 return 0;
7291
7292 if (dw == MIN_DEP_WEAK)
7293 /* Store and load are likely to alias, use higher cost to avoid stall. */
7294 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7295 else if (dw > MIN_DEP_WEAK)
7296 {
7297 /* Store and load are less likely to alias. */
7298 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7299 /* Assume there will be no cache conflict for floating-point data.
7300 For integer data, L1 conflict penalty is huge (17 cycles), so we
7301 never assume it will not cause a conflict. */
7302 return 0;
7303 else
7304 return cost;
7305 }
7306
7307 if (dep_type != REG_DEP_OUTPUT)
7308 return cost;
7309
7310 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7311 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7312 return 0;
7313
7314 return cost;
7315 }
7316
7317 /* Like emit_insn_before, but skip cycle_display notes.
7318 ??? When cycle display notes are implemented, update this. */
7319
7320 static void
7321 ia64_emit_insn_before (rtx insn, rtx_insn *before)
7322 {
7323 emit_insn_before (insn, before);
7324 }
7325
7326 /* The following function marks insns who produce addresses for load
7327 and store insns. Such insns will be placed into M slots because it
7328 decrease latency time for Itanium1 (see function
7329 `ia64_produce_address_p' and the DFA descriptions). */
7330
7331 static void
7332 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7333 {
7334 rtx_insn *insn, *next, *next_tail;
7335
7336 /* Before reload, which_alternative is not set, which means that
7337 ia64_safe_itanium_class will produce wrong results for (at least)
7338 move instructions. */
7339 if (!reload_completed)
7340 return;
7341
7342 next_tail = NEXT_INSN (tail);
7343 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7344 if (INSN_P (insn))
7345 insn->call = 0;
7346 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7347 if (INSN_P (insn)
7348 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7349 {
7350 sd_iterator_def sd_it;
7351 dep_t dep;
7352 bool has_mem_op_consumer_p = false;
7353
7354 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7355 {
7356 enum attr_itanium_class c;
7357
7358 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7359 continue;
7360
7361 next = DEP_CON (dep);
7362 c = ia64_safe_itanium_class (next);
7363 if ((c == ITANIUM_CLASS_ST
7364 || c == ITANIUM_CLASS_STF)
7365 && ia64_st_address_bypass_p (insn, next))
7366 {
7367 has_mem_op_consumer_p = true;
7368 break;
7369 }
7370 else if ((c == ITANIUM_CLASS_LD
7371 || c == ITANIUM_CLASS_FLD
7372 || c == ITANIUM_CLASS_FLDP)
7373 && ia64_ld_address_bypass_p (insn, next))
7374 {
7375 has_mem_op_consumer_p = true;
7376 break;
7377 }
7378 }
7379
7380 insn->call = has_mem_op_consumer_p;
7381 }
7382 }
7383
7384 /* We're beginning a new block. Initialize data structures as necessary. */
7385
7386 static void
7387 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7388 int sched_verbose ATTRIBUTE_UNUSED,
7389 int max_ready ATTRIBUTE_UNUSED)
7390 {
7391 if (flag_checking && !sel_sched_p () && reload_completed)
7392 {
7393 for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7394 insn != current_sched_info->next_tail;
7395 insn = NEXT_INSN (insn))
7396 gcc_assert (!SCHED_GROUP_P (insn));
7397 }
7398 last_scheduled_insn = NULL;
7399 init_insn_group_barriers ();
7400
7401 current_cycle = 0;
7402 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7403 }
7404
7405 /* We're beginning a scheduling pass. Check assertion. */
7406
7407 static void
7408 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7409 int sched_verbose ATTRIBUTE_UNUSED,
7410 int max_ready ATTRIBUTE_UNUSED)
7411 {
7412 gcc_assert (pending_data_specs == 0);
7413 }
7414
7415 /* Scheduling pass is now finished. Free/reset static variable. */
7416 static void
7417 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7418 int sched_verbose ATTRIBUTE_UNUSED)
7419 {
7420 gcc_assert (pending_data_specs == 0);
7421 }
7422
7423 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7424 speculation check), FALSE otherwise. */
7425 static bool
7426 is_load_p (rtx_insn *insn)
7427 {
7428 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7429
7430 return
7431 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7432 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7433 }
7434
7435 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7436 (taking account for 3-cycle cache reference postponing for stores: Intel
7437 Itanium 2 Reference Manual for Software Development and Optimization,
7438 6.7.3.1). */
7439 static void
7440 record_memory_reference (rtx_insn *insn)
7441 {
7442 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7443
7444 switch (insn_class) {
7445 case ITANIUM_CLASS_FLD:
7446 case ITANIUM_CLASS_LD:
7447 mem_ops_in_group[current_cycle % 4]++;
7448 break;
7449 case ITANIUM_CLASS_STF:
7450 case ITANIUM_CLASS_ST:
7451 mem_ops_in_group[(current_cycle + 3) % 4]++;
7452 break;
7453 default:;
7454 }
7455 }
7456
7457 /* We are about to being issuing insns for this clock cycle.
7458 Override the default sort algorithm to better slot instructions. */
7459
7460 static int
7461 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7462 int *pn_ready, int clock_var,
7463 int reorder_type)
7464 {
7465 int n_asms;
7466 int n_ready = *pn_ready;
7467 rtx_insn **e_ready = ready + n_ready;
7468 rtx_insn **insnp;
7469
7470 if (sched_verbose)
7471 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7472
7473 if (reorder_type == 0)
7474 {
7475 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7476 n_asms = 0;
7477 for (insnp = ready; insnp < e_ready; insnp++)
7478 if (insnp < e_ready)
7479 {
7480 rtx_insn *insn = *insnp;
7481 enum attr_type t = ia64_safe_type (insn);
7482 if (t == TYPE_UNKNOWN)
7483 {
7484 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7485 || asm_noperands (PATTERN (insn)) >= 0)
7486 {
7487 rtx_insn *lowest = ready[n_asms];
7488 ready[n_asms] = insn;
7489 *insnp = lowest;
7490 n_asms++;
7491 }
7492 else
7493 {
7494 rtx_insn *highest = ready[n_ready - 1];
7495 ready[n_ready - 1] = insn;
7496 *insnp = highest;
7497 return 1;
7498 }
7499 }
7500 }
7501
7502 if (n_asms < n_ready)
7503 {
7504 /* Some normal insns to process. Skip the asms. */
7505 ready += n_asms;
7506 n_ready -= n_asms;
7507 }
7508 else if (n_ready > 0)
7509 return 1;
7510 }
7511
7512 if (ia64_final_schedule)
7513 {
7514 int deleted = 0;
7515 int nr_need_stop = 0;
7516
7517 for (insnp = ready; insnp < e_ready; insnp++)
7518 if (safe_group_barrier_needed (*insnp))
7519 nr_need_stop++;
7520
7521 if (reorder_type == 1 && n_ready == nr_need_stop)
7522 return 0;
7523 if (reorder_type == 0)
7524 return 1;
7525 insnp = e_ready;
7526 /* Move down everything that needs a stop bit, preserving
7527 relative order. */
7528 while (insnp-- > ready + deleted)
7529 while (insnp >= ready + deleted)
7530 {
7531 rtx_insn *insn = *insnp;
7532 if (! safe_group_barrier_needed (insn))
7533 break;
7534 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7535 *ready = insn;
7536 deleted++;
7537 }
7538 n_ready -= deleted;
7539 ready += deleted;
7540 }
7541
7542 current_cycle = clock_var;
7543 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7544 {
7545 int moved = 0;
7546
7547 insnp = e_ready;
7548 /* Move down loads/stores, preserving relative order. */
7549 while (insnp-- > ready + moved)
7550 while (insnp >= ready + moved)
7551 {
7552 rtx_insn *insn = *insnp;
7553 if (! is_load_p (insn))
7554 break;
7555 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7556 *ready = insn;
7557 moved++;
7558 }
7559 n_ready -= moved;
7560 ready += moved;
7561 }
7562
7563 return 1;
7564 }
7565
7566 /* We are about to being issuing insns for this clock cycle. Override
7567 the default sort algorithm to better slot instructions. */
7568
7569 static int
7570 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7571 int *pn_ready, int clock_var)
7572 {
7573 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7574 pn_ready, clock_var, 0);
7575 }
7576
7577 /* Like ia64_sched_reorder, but called after issuing each insn.
7578 Override the default sort algorithm to better slot instructions. */
7579
7580 static int
7581 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7582 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7583 int *pn_ready, int clock_var)
7584 {
7585 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7586 clock_var, 1);
7587 }
7588
7589 /* We are about to issue INSN. Return the number of insns left on the
7590 ready queue that can be issued this cycle. */
7591
7592 static int
7593 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7594 int sched_verbose ATTRIBUTE_UNUSED,
7595 rtx_insn *insn,
7596 int can_issue_more ATTRIBUTE_UNUSED)
7597 {
7598 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7599 /* Modulo scheduling does not extend h_i_d when emitting
7600 new instructions. Don't use h_i_d, if we don't have to. */
7601 {
7602 if (DONE_SPEC (insn) & BEGIN_DATA)
7603 pending_data_specs++;
7604 if (CHECK_SPEC (insn) & BEGIN_DATA)
7605 pending_data_specs--;
7606 }
7607
7608 if (DEBUG_INSN_P (insn))
7609 return 1;
7610
7611 last_scheduled_insn = insn;
7612 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7613 if (reload_completed)
7614 {
7615 int needed = group_barrier_needed (insn);
7616
7617 gcc_assert (!needed);
7618 if (CALL_P (insn))
7619 init_insn_group_barriers ();
7620 stops_p [INSN_UID (insn)] = stop_before_p;
7621 stop_before_p = 0;
7622
7623 record_memory_reference (insn);
7624 }
7625 return 1;
7626 }
7627
7628 /* We are choosing insn from the ready queue. Return zero if INSN
7629 can be chosen. */
7630
7631 static int
7632 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7633 {
7634 gcc_assert (insn && INSN_P (insn));
7635
7636 /* Size of ALAT is 32. As far as we perform conservative
7637 data speculation, we keep ALAT half-empty. */
7638 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7639 return ready_index == 0 ? -1 : 1;
7640
7641 if (ready_index == 0)
7642 return 0;
7643
7644 if ((!reload_completed
7645 || !safe_group_barrier_needed (insn))
7646 && (!mflag_sched_mem_insns_hard_limit
7647 || !is_load_p (insn)
7648 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7649 return 0;
7650
7651 return 1;
7652 }
7653
7654 /* The following variable value is pseudo-insn used by the DFA insn
7655 scheduler to change the DFA state when the simulated clock is
7656 increased. */
7657
7658 static rtx_insn *dfa_pre_cycle_insn;
7659
7660 /* Returns 1 when a meaningful insn was scheduled between the last group
7661 barrier and LAST. */
7662 static int
7663 scheduled_good_insn (rtx_insn *last)
7664 {
7665 if (last && recog_memoized (last) >= 0)
7666 return 1;
7667
7668 for ( ;
7669 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7670 && !stops_p[INSN_UID (last)];
7671 last = PREV_INSN (last))
7672 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7673 the ebb we're scheduling. */
7674 if (INSN_P (last) && recog_memoized (last) >= 0)
7675 return 1;
7676
7677 return 0;
7678 }
7679
7680 /* We are about to being issuing INSN. Return nonzero if we cannot
7681 issue it on given cycle CLOCK and return zero if we should not sort
7682 the ready queue on the next clock start. */
7683
7684 static int
7685 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7686 int clock, int *sort_p)
7687 {
7688 gcc_assert (insn && INSN_P (insn));
7689
7690 if (DEBUG_INSN_P (insn))
7691 return 0;
7692
7693 /* When a group barrier is needed for insn, last_scheduled_insn
7694 should be set. */
7695 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7696 || last_scheduled_insn);
7697
7698 if ((reload_completed
7699 && (safe_group_barrier_needed (insn)
7700 || (mflag_sched_stop_bits_after_every_cycle
7701 && last_clock != clock
7702 && last_scheduled_insn
7703 && scheduled_good_insn (last_scheduled_insn))))
7704 || (last_scheduled_insn
7705 && (CALL_P (last_scheduled_insn)
7706 || unknown_for_bundling_p (last_scheduled_insn))))
7707 {
7708 init_insn_group_barriers ();
7709
7710 if (verbose && dump)
7711 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7712 last_clock == clock ? " + cycle advance" : "");
7713
7714 stop_before_p = 1;
7715 current_cycle = clock;
7716 mem_ops_in_group[current_cycle % 4] = 0;
7717
7718 if (last_clock == clock)
7719 {
7720 state_transition (curr_state, dfa_stop_insn);
7721 if (TARGET_EARLY_STOP_BITS)
7722 *sort_p = (last_scheduled_insn == NULL_RTX
7723 || ! CALL_P (last_scheduled_insn));
7724 else
7725 *sort_p = 0;
7726 return 1;
7727 }
7728
7729 if (last_scheduled_insn)
7730 {
7731 if (unknown_for_bundling_p (last_scheduled_insn))
7732 state_reset (curr_state);
7733 else
7734 {
7735 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7736 state_transition (curr_state, dfa_stop_insn);
7737 state_transition (curr_state, dfa_pre_cycle_insn);
7738 state_transition (curr_state, NULL);
7739 }
7740 }
7741 }
7742 return 0;
7743 }
7744
7745 /* Implement targetm.sched.h_i_d_extended hook.
7746 Extend internal data structures. */
7747 static void
7748 ia64_h_i_d_extended (void)
7749 {
7750 if (stops_p != NULL)
7751 {
7752 int new_clocks_length = get_max_uid () * 3 / 2;
7753 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7754 clocks_length = new_clocks_length;
7755 }
7756 }
7757 \f
7758
7759 /* This structure describes the data used by the backend to guide scheduling.
7760 When the current scheduling point is switched, this data should be saved
7761 and restored later, if the scheduler returns to this point. */
7762 struct _ia64_sched_context
7763 {
7764 state_t prev_cycle_state;
7765 rtx_insn *last_scheduled_insn;
7766 struct reg_write_state rws_sum[NUM_REGS];
7767 struct reg_write_state rws_insn[NUM_REGS];
7768 int first_instruction;
7769 int pending_data_specs;
7770 int current_cycle;
7771 char mem_ops_in_group[4];
7772 };
7773 typedef struct _ia64_sched_context *ia64_sched_context_t;
7774
7775 /* Allocates a scheduling context. */
7776 static void *
7777 ia64_alloc_sched_context (void)
7778 {
7779 return xmalloc (sizeof (struct _ia64_sched_context));
7780 }
7781
7782 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7783 the global context otherwise. */
7784 static void
7785 ia64_init_sched_context (void *_sc, bool clean_p)
7786 {
7787 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7788
7789 sc->prev_cycle_state = xmalloc (dfa_state_size);
7790 if (clean_p)
7791 {
7792 state_reset (sc->prev_cycle_state);
7793 sc->last_scheduled_insn = NULL;
7794 memset (sc->rws_sum, 0, sizeof (rws_sum));
7795 memset (sc->rws_insn, 0, sizeof (rws_insn));
7796 sc->first_instruction = 1;
7797 sc->pending_data_specs = 0;
7798 sc->current_cycle = 0;
7799 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7800 }
7801 else
7802 {
7803 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7804 sc->last_scheduled_insn = last_scheduled_insn;
7805 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7806 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7807 sc->first_instruction = first_instruction;
7808 sc->pending_data_specs = pending_data_specs;
7809 sc->current_cycle = current_cycle;
7810 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7811 }
7812 }
7813
7814 /* Sets the global scheduling context to the one pointed to by _SC. */
7815 static void
7816 ia64_set_sched_context (void *_sc)
7817 {
7818 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7819
7820 gcc_assert (sc != NULL);
7821
7822 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7823 last_scheduled_insn = sc->last_scheduled_insn;
7824 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7825 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7826 first_instruction = sc->first_instruction;
7827 pending_data_specs = sc->pending_data_specs;
7828 current_cycle = sc->current_cycle;
7829 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7830 }
7831
7832 /* Clears the data in the _SC scheduling context. */
7833 static void
7834 ia64_clear_sched_context (void *_sc)
7835 {
7836 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7837
7838 free (sc->prev_cycle_state);
7839 sc->prev_cycle_state = NULL;
7840 }
7841
7842 /* Frees the _SC scheduling context. */
7843 static void
7844 ia64_free_sched_context (void *_sc)
7845 {
7846 gcc_assert (_sc != NULL);
7847
7848 free (_sc);
7849 }
7850
7851 typedef rtx (* gen_func_t) (rtx, rtx);
7852
7853 /* Return a function that will generate a load of mode MODE_NO
7854 with speculation types TS. */
7855 static gen_func_t
7856 get_spec_load_gen_function (ds_t ts, int mode_no)
7857 {
7858 static gen_func_t gen_ld_[] = {
7859 gen_movbi,
7860 gen_movqi_internal,
7861 gen_movhi_internal,
7862 gen_movsi_internal,
7863 gen_movdi_internal,
7864 gen_movsf_internal,
7865 gen_movdf_internal,
7866 gen_movxf_internal,
7867 gen_movti_internal,
7868 gen_zero_extendqidi2,
7869 gen_zero_extendhidi2,
7870 gen_zero_extendsidi2,
7871 };
7872
7873 static gen_func_t gen_ld_a[] = {
7874 gen_movbi_advanced,
7875 gen_movqi_advanced,
7876 gen_movhi_advanced,
7877 gen_movsi_advanced,
7878 gen_movdi_advanced,
7879 gen_movsf_advanced,
7880 gen_movdf_advanced,
7881 gen_movxf_advanced,
7882 gen_movti_advanced,
7883 gen_zero_extendqidi2_advanced,
7884 gen_zero_extendhidi2_advanced,
7885 gen_zero_extendsidi2_advanced,
7886 };
7887 static gen_func_t gen_ld_s[] = {
7888 gen_movbi_speculative,
7889 gen_movqi_speculative,
7890 gen_movhi_speculative,
7891 gen_movsi_speculative,
7892 gen_movdi_speculative,
7893 gen_movsf_speculative,
7894 gen_movdf_speculative,
7895 gen_movxf_speculative,
7896 gen_movti_speculative,
7897 gen_zero_extendqidi2_speculative,
7898 gen_zero_extendhidi2_speculative,
7899 gen_zero_extendsidi2_speculative,
7900 };
7901 static gen_func_t gen_ld_sa[] = {
7902 gen_movbi_speculative_advanced,
7903 gen_movqi_speculative_advanced,
7904 gen_movhi_speculative_advanced,
7905 gen_movsi_speculative_advanced,
7906 gen_movdi_speculative_advanced,
7907 gen_movsf_speculative_advanced,
7908 gen_movdf_speculative_advanced,
7909 gen_movxf_speculative_advanced,
7910 gen_movti_speculative_advanced,
7911 gen_zero_extendqidi2_speculative_advanced,
7912 gen_zero_extendhidi2_speculative_advanced,
7913 gen_zero_extendsidi2_speculative_advanced,
7914 };
7915 static gen_func_t gen_ld_s_a[] = {
7916 gen_movbi_speculative_a,
7917 gen_movqi_speculative_a,
7918 gen_movhi_speculative_a,
7919 gen_movsi_speculative_a,
7920 gen_movdi_speculative_a,
7921 gen_movsf_speculative_a,
7922 gen_movdf_speculative_a,
7923 gen_movxf_speculative_a,
7924 gen_movti_speculative_a,
7925 gen_zero_extendqidi2_speculative_a,
7926 gen_zero_extendhidi2_speculative_a,
7927 gen_zero_extendsidi2_speculative_a,
7928 };
7929
7930 gen_func_t *gen_ld;
7931
7932 if (ts & BEGIN_DATA)
7933 {
7934 if (ts & BEGIN_CONTROL)
7935 gen_ld = gen_ld_sa;
7936 else
7937 gen_ld = gen_ld_a;
7938 }
7939 else if (ts & BEGIN_CONTROL)
7940 {
7941 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7942 || ia64_needs_block_p (ts))
7943 gen_ld = gen_ld_s;
7944 else
7945 gen_ld = gen_ld_s_a;
7946 }
7947 else if (ts == 0)
7948 gen_ld = gen_ld_;
7949 else
7950 gcc_unreachable ();
7951
7952 return gen_ld[mode_no];
7953 }
7954
7955 /* Constants that help mapping 'machine_mode' to int. */
7956 enum SPEC_MODES
7957 {
7958 SPEC_MODE_INVALID = -1,
7959 SPEC_MODE_FIRST = 0,
7960 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7961 SPEC_MODE_FOR_EXTEND_LAST = 3,
7962 SPEC_MODE_LAST = 8
7963 };
7964
7965 enum
7966 {
7967 /* Offset to reach ZERO_EXTEND patterns. */
7968 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7969 };
7970
7971 /* Return index of the MODE. */
7972 static int
7973 ia64_mode_to_int (machine_mode mode)
7974 {
7975 switch (mode)
7976 {
7977 case E_BImode: return 0; /* SPEC_MODE_FIRST */
7978 case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7979 case E_HImode: return 2;
7980 case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7981 case E_DImode: return 4;
7982 case E_SFmode: return 5;
7983 case E_DFmode: return 6;
7984 case E_XFmode: return 7;
7985 case E_TImode:
7986 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7987 mentioned in itanium[12].md. Predicate fp_register_operand also
7988 needs to be defined. Bottom line: better disable for now. */
7989 return SPEC_MODE_INVALID;
7990 default: return SPEC_MODE_INVALID;
7991 }
7992 }
7993
7994 /* Provide information about speculation capabilities. */
7995 static void
7996 ia64_set_sched_flags (spec_info_t spec_info)
7997 {
7998 unsigned int *flags = &(current_sched_info->flags);
7999
8000 if (*flags & SCHED_RGN
8001 || *flags & SCHED_EBB
8002 || *flags & SEL_SCHED)
8003 {
8004 int mask = 0;
8005
8006 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
8007 || (mflag_sched_ar_data_spec && reload_completed))
8008 {
8009 mask |= BEGIN_DATA;
8010
8011 if (!sel_sched_p ()
8012 && ((mflag_sched_br_in_data_spec && !reload_completed)
8013 || (mflag_sched_ar_in_data_spec && reload_completed)))
8014 mask |= BE_IN_DATA;
8015 }
8016
8017 if (mflag_sched_control_spec
8018 && (!sel_sched_p ()
8019 || reload_completed))
8020 {
8021 mask |= BEGIN_CONTROL;
8022
8023 if (!sel_sched_p () && mflag_sched_in_control_spec)
8024 mask |= BE_IN_CONTROL;
8025 }
8026
8027 spec_info->mask = mask;
8028
8029 if (mask)
8030 {
8031 *flags |= USE_DEPS_LIST | DO_SPECULATION;
8032
8033 if (mask & BE_IN_SPEC)
8034 *flags |= NEW_BBS;
8035
8036 spec_info->flags = 0;
8037
8038 if ((mask & CONTROL_SPEC)
8039 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
8040 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
8041
8042 if (sched_verbose >= 1)
8043 spec_info->dump = sched_dump;
8044 else
8045 spec_info->dump = 0;
8046
8047 if (mflag_sched_count_spec_in_critical_path)
8048 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
8049 }
8050 }
8051 else
8052 spec_info->mask = 0;
8053 }
8054
8055 /* If INSN is an appropriate load return its mode.
8056 Return -1 otherwise. */
8057 static int
8058 get_mode_no_for_insn (rtx_insn *insn)
8059 {
8060 rtx reg, mem, mode_rtx;
8061 int mode_no;
8062 bool extend_p;
8063
8064 extract_insn_cached (insn);
8065
8066 /* We use WHICH_ALTERNATIVE only after reload. This will
8067 guarantee that reload won't touch a speculative insn. */
8068
8069 if (recog_data.n_operands != 2)
8070 return -1;
8071
8072 reg = recog_data.operand[0];
8073 mem = recog_data.operand[1];
8074
8075 /* We should use MEM's mode since REG's mode in presence of
8076 ZERO_EXTEND will always be DImode. */
8077 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8078 /* Process non-speculative ld. */
8079 {
8080 if (!reload_completed)
8081 {
8082 /* Do not speculate into regs like ar.lc. */
8083 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8084 return -1;
8085
8086 if (!MEM_P (mem))
8087 return -1;
8088
8089 {
8090 rtx mem_reg = XEXP (mem, 0);
8091
8092 if (!REG_P (mem_reg))
8093 return -1;
8094 }
8095
8096 mode_rtx = mem;
8097 }
8098 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8099 {
8100 gcc_assert (REG_P (reg) && MEM_P (mem));
8101 mode_rtx = mem;
8102 }
8103 else
8104 return -1;
8105 }
8106 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8107 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8108 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8109 /* Process speculative ld or ld.c. */
8110 {
8111 gcc_assert (REG_P (reg) && MEM_P (mem));
8112 mode_rtx = mem;
8113 }
8114 else
8115 {
8116 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8117
8118 if (attr_class == ITANIUM_CLASS_CHK_A
8119 || attr_class == ITANIUM_CLASS_CHK_S_I
8120 || attr_class == ITANIUM_CLASS_CHK_S_F)
8121 /* Process chk. */
8122 mode_rtx = reg;
8123 else
8124 return -1;
8125 }
8126
8127 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8128
8129 if (mode_no == SPEC_MODE_INVALID)
8130 return -1;
8131
8132 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8133
8134 if (extend_p)
8135 {
8136 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8137 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8138 return -1;
8139
8140 mode_no += SPEC_GEN_EXTEND_OFFSET;
8141 }
8142
8143 return mode_no;
8144 }
8145
8146 /* If X is an unspec part of a speculative load, return its code.
8147 Return -1 otherwise. */
8148 static int
8149 get_spec_unspec_code (const_rtx x)
8150 {
8151 if (GET_CODE (x) != UNSPEC)
8152 return -1;
8153
8154 {
8155 int code;
8156
8157 code = XINT (x, 1);
8158
8159 switch (code)
8160 {
8161 case UNSPEC_LDA:
8162 case UNSPEC_LDS:
8163 case UNSPEC_LDS_A:
8164 case UNSPEC_LDSA:
8165 return code;
8166
8167 default:
8168 return -1;
8169 }
8170 }
8171 }
8172
8173 /* Implement skip_rtx_p hook. */
8174 static bool
8175 ia64_skip_rtx_p (const_rtx x)
8176 {
8177 return get_spec_unspec_code (x) != -1;
8178 }
8179
8180 /* If INSN is a speculative load, return its UNSPEC code.
8181 Return -1 otherwise. */
8182 static int
8183 get_insn_spec_code (const_rtx insn)
8184 {
8185 rtx pat, reg, mem;
8186
8187 pat = PATTERN (insn);
8188
8189 if (GET_CODE (pat) == COND_EXEC)
8190 pat = COND_EXEC_CODE (pat);
8191
8192 if (GET_CODE (pat) != SET)
8193 return -1;
8194
8195 reg = SET_DEST (pat);
8196 if (!REG_P (reg))
8197 return -1;
8198
8199 mem = SET_SRC (pat);
8200 if (GET_CODE (mem) == ZERO_EXTEND)
8201 mem = XEXP (mem, 0);
8202
8203 return get_spec_unspec_code (mem);
8204 }
8205
8206 /* If INSN is a speculative load, return a ds with the speculation types.
8207 Otherwise [if INSN is a normal instruction] return 0. */
8208 static ds_t
8209 ia64_get_insn_spec_ds (rtx_insn *insn)
8210 {
8211 int code = get_insn_spec_code (insn);
8212
8213 switch (code)
8214 {
8215 case UNSPEC_LDA:
8216 return BEGIN_DATA;
8217
8218 case UNSPEC_LDS:
8219 case UNSPEC_LDS_A:
8220 return BEGIN_CONTROL;
8221
8222 case UNSPEC_LDSA:
8223 return BEGIN_DATA | BEGIN_CONTROL;
8224
8225 default:
8226 return 0;
8227 }
8228 }
8229
8230 /* If INSN is a speculative load return a ds with the speculation types that
8231 will be checked.
8232 Otherwise [if INSN is a normal instruction] return 0. */
8233 static ds_t
8234 ia64_get_insn_checked_ds (rtx_insn *insn)
8235 {
8236 int code = get_insn_spec_code (insn);
8237
8238 switch (code)
8239 {
8240 case UNSPEC_LDA:
8241 return BEGIN_DATA | BEGIN_CONTROL;
8242
8243 case UNSPEC_LDS:
8244 return BEGIN_CONTROL;
8245
8246 case UNSPEC_LDS_A:
8247 case UNSPEC_LDSA:
8248 return BEGIN_DATA | BEGIN_CONTROL;
8249
8250 default:
8251 return 0;
8252 }
8253 }
8254
8255 /* If GEN_P is true, calculate the index of needed speculation check and return
8256 speculative pattern for INSN with speculative mode TS, machine mode
8257 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8258 If GEN_P is false, just calculate the index of needed speculation check. */
8259 static rtx
8260 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8261 {
8262 rtx pat, new_pat;
8263 gen_func_t gen_load;
8264
8265 gen_load = get_spec_load_gen_function (ts, mode_no);
8266
8267 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8268 copy_rtx (recog_data.operand[1]));
8269
8270 pat = PATTERN (insn);
8271 if (GET_CODE (pat) == COND_EXEC)
8272 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8273 new_pat);
8274
8275 return new_pat;
8276 }
8277
8278 static bool
8279 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8280 ds_t ds ATTRIBUTE_UNUSED)
8281 {
8282 return false;
8283 }
8284
8285 /* Implement targetm.sched.speculate_insn hook.
8286 Check if the INSN can be TS speculative.
8287 If 'no' - return -1.
8288 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8289 If current pattern of the INSN already provides TS speculation,
8290 return 0. */
8291 static int
8292 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8293 {
8294 int mode_no;
8295 int res;
8296
8297 gcc_assert (!(ts & ~SPECULATIVE));
8298
8299 if (ia64_spec_check_p (insn))
8300 return -1;
8301
8302 if ((ts & BE_IN_SPEC)
8303 && !insn_can_be_in_speculative_p (insn, ts))
8304 return -1;
8305
8306 mode_no = get_mode_no_for_insn (insn);
8307
8308 if (mode_no != SPEC_MODE_INVALID)
8309 {
8310 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8311 res = 0;
8312 else
8313 {
8314 res = 1;
8315 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8316 }
8317 }
8318 else
8319 res = -1;
8320
8321 return res;
8322 }
8323
8324 /* Return a function that will generate a check for speculation TS with mode
8325 MODE_NO.
8326 If simple check is needed, pass true for SIMPLE_CHECK_P.
8327 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8328 static gen_func_t
8329 get_spec_check_gen_function (ds_t ts, int mode_no,
8330 bool simple_check_p, bool clearing_check_p)
8331 {
8332 static gen_func_t gen_ld_c_clr[] = {
8333 gen_movbi_clr,
8334 gen_movqi_clr,
8335 gen_movhi_clr,
8336 gen_movsi_clr,
8337 gen_movdi_clr,
8338 gen_movsf_clr,
8339 gen_movdf_clr,
8340 gen_movxf_clr,
8341 gen_movti_clr,
8342 gen_zero_extendqidi2_clr,
8343 gen_zero_extendhidi2_clr,
8344 gen_zero_extendsidi2_clr,
8345 };
8346 static gen_func_t gen_ld_c_nc[] = {
8347 gen_movbi_nc,
8348 gen_movqi_nc,
8349 gen_movhi_nc,
8350 gen_movsi_nc,
8351 gen_movdi_nc,
8352 gen_movsf_nc,
8353 gen_movdf_nc,
8354 gen_movxf_nc,
8355 gen_movti_nc,
8356 gen_zero_extendqidi2_nc,
8357 gen_zero_extendhidi2_nc,
8358 gen_zero_extendsidi2_nc,
8359 };
8360 static gen_func_t gen_chk_a_clr[] = {
8361 gen_advanced_load_check_clr_bi,
8362 gen_advanced_load_check_clr_qi,
8363 gen_advanced_load_check_clr_hi,
8364 gen_advanced_load_check_clr_si,
8365 gen_advanced_load_check_clr_di,
8366 gen_advanced_load_check_clr_sf,
8367 gen_advanced_load_check_clr_df,
8368 gen_advanced_load_check_clr_xf,
8369 gen_advanced_load_check_clr_ti,
8370 gen_advanced_load_check_clr_di,
8371 gen_advanced_load_check_clr_di,
8372 gen_advanced_load_check_clr_di,
8373 };
8374 static gen_func_t gen_chk_a_nc[] = {
8375 gen_advanced_load_check_nc_bi,
8376 gen_advanced_load_check_nc_qi,
8377 gen_advanced_load_check_nc_hi,
8378 gen_advanced_load_check_nc_si,
8379 gen_advanced_load_check_nc_di,
8380 gen_advanced_load_check_nc_sf,
8381 gen_advanced_load_check_nc_df,
8382 gen_advanced_load_check_nc_xf,
8383 gen_advanced_load_check_nc_ti,
8384 gen_advanced_load_check_nc_di,
8385 gen_advanced_load_check_nc_di,
8386 gen_advanced_load_check_nc_di,
8387 };
8388 static gen_func_t gen_chk_s[] = {
8389 gen_speculation_check_bi,
8390 gen_speculation_check_qi,
8391 gen_speculation_check_hi,
8392 gen_speculation_check_si,
8393 gen_speculation_check_di,
8394 gen_speculation_check_sf,
8395 gen_speculation_check_df,
8396 gen_speculation_check_xf,
8397 gen_speculation_check_ti,
8398 gen_speculation_check_di,
8399 gen_speculation_check_di,
8400 gen_speculation_check_di,
8401 };
8402
8403 gen_func_t *gen_check;
8404
8405 if (ts & BEGIN_DATA)
8406 {
8407 /* We don't need recovery because even if this is ld.sa
8408 ALAT entry will be allocated only if NAT bit is set to zero.
8409 So it is enough to use ld.c here. */
8410
8411 if (simple_check_p)
8412 {
8413 gcc_assert (mflag_sched_spec_ldc);
8414
8415 if (clearing_check_p)
8416 gen_check = gen_ld_c_clr;
8417 else
8418 gen_check = gen_ld_c_nc;
8419 }
8420 else
8421 {
8422 if (clearing_check_p)
8423 gen_check = gen_chk_a_clr;
8424 else
8425 gen_check = gen_chk_a_nc;
8426 }
8427 }
8428 else if (ts & BEGIN_CONTROL)
8429 {
8430 if (simple_check_p)
8431 /* We might want to use ld.sa -> ld.c instead of
8432 ld.s -> chk.s. */
8433 {
8434 gcc_assert (!ia64_needs_block_p (ts));
8435
8436 if (clearing_check_p)
8437 gen_check = gen_ld_c_clr;
8438 else
8439 gen_check = gen_ld_c_nc;
8440 }
8441 else
8442 {
8443 gen_check = gen_chk_s;
8444 }
8445 }
8446 else
8447 gcc_unreachable ();
8448
8449 gcc_assert (mode_no >= 0);
8450 return gen_check[mode_no];
8451 }
8452
8453 /* Return nonzero, if INSN needs branchy recovery check. */
8454 static bool
8455 ia64_needs_block_p (ds_t ts)
8456 {
8457 if (ts & BEGIN_DATA)
8458 return !mflag_sched_spec_ldc;
8459
8460 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8461
8462 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8463 }
8464
8465 /* Generate (or regenerate) a recovery check for INSN. */
8466 static rtx
8467 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8468 {
8469 rtx op1, pat, check_pat;
8470 gen_func_t gen_check;
8471 int mode_no;
8472
8473 mode_no = get_mode_no_for_insn (insn);
8474 gcc_assert (mode_no >= 0);
8475
8476 if (label)
8477 op1 = label;
8478 else
8479 {
8480 gcc_assert (!ia64_needs_block_p (ds));
8481 op1 = copy_rtx (recog_data.operand[1]);
8482 }
8483
8484 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8485 true);
8486
8487 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8488
8489 pat = PATTERN (insn);
8490 if (GET_CODE (pat) == COND_EXEC)
8491 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8492 check_pat);
8493
8494 return check_pat;
8495 }
8496
8497 /* Return nonzero, if X is branchy recovery check. */
8498 static int
8499 ia64_spec_check_p (rtx x)
8500 {
8501 x = PATTERN (x);
8502 if (GET_CODE (x) == COND_EXEC)
8503 x = COND_EXEC_CODE (x);
8504 if (GET_CODE (x) == SET)
8505 return ia64_spec_check_src_p (SET_SRC (x));
8506 return 0;
8507 }
8508
8509 /* Return nonzero, if SRC belongs to recovery check. */
8510 static int
8511 ia64_spec_check_src_p (rtx src)
8512 {
8513 if (GET_CODE (src) == IF_THEN_ELSE)
8514 {
8515 rtx t;
8516
8517 t = XEXP (src, 0);
8518 if (GET_CODE (t) == NE)
8519 {
8520 t = XEXP (t, 0);
8521
8522 if (GET_CODE (t) == UNSPEC)
8523 {
8524 int code;
8525
8526 code = XINT (t, 1);
8527
8528 if (code == UNSPEC_LDCCLR
8529 || code == UNSPEC_LDCNC
8530 || code == UNSPEC_CHKACLR
8531 || code == UNSPEC_CHKANC
8532 || code == UNSPEC_CHKS)
8533 {
8534 gcc_assert (code != 0);
8535 return code;
8536 }
8537 }
8538 }
8539 }
8540 return 0;
8541 }
8542 \f
8543
8544 /* The following page contains abstract data `bundle states' which are
8545 used for bundling insns (inserting nops and template generation). */
8546
8547 /* The following describes state of insn bundling. */
8548
8549 struct bundle_state
8550 {
8551 /* Unique bundle state number to identify them in the debugging
8552 output */
8553 int unique_num;
8554 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
8555 /* number nops before and after the insn */
8556 short before_nops_num, after_nops_num;
8557 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8558 insn */
8559 int cost; /* cost of the state in cycles */
8560 int accumulated_insns_num; /* number of all previous insns including
8561 nops. L is considered as 2 insns */
8562 int branch_deviation; /* deviation of previous branches from 3rd slots */
8563 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8564 struct bundle_state *next; /* next state with the same insn_num */
8565 struct bundle_state *originator; /* originator (previous insn state) */
8566 /* All bundle states are in the following chain. */
8567 struct bundle_state *allocated_states_chain;
8568 /* The DFA State after issuing the insn and the nops. */
8569 state_t dfa_state;
8570 };
8571
8572 /* The following is map insn number to the corresponding bundle state. */
8573
8574 static struct bundle_state **index_to_bundle_states;
8575
8576 /* The unique number of next bundle state. */
8577
8578 static int bundle_states_num;
8579
8580 /* All allocated bundle states are in the following chain. */
8581
8582 static struct bundle_state *allocated_bundle_states_chain;
8583
8584 /* All allocated but not used bundle states are in the following
8585 chain. */
8586
8587 static struct bundle_state *free_bundle_state_chain;
8588
8589
8590 /* The following function returns a free bundle state. */
8591
8592 static struct bundle_state *
8593 get_free_bundle_state (void)
8594 {
8595 struct bundle_state *result;
8596
8597 if (free_bundle_state_chain != NULL)
8598 {
8599 result = free_bundle_state_chain;
8600 free_bundle_state_chain = result->next;
8601 }
8602 else
8603 {
8604 result = XNEW (struct bundle_state);
8605 result->dfa_state = xmalloc (dfa_state_size);
8606 result->allocated_states_chain = allocated_bundle_states_chain;
8607 allocated_bundle_states_chain = result;
8608 }
8609 result->unique_num = bundle_states_num++;
8610 return result;
8611
8612 }
8613
8614 /* The following function frees given bundle state. */
8615
8616 static void
8617 free_bundle_state (struct bundle_state *state)
8618 {
8619 state->next = free_bundle_state_chain;
8620 free_bundle_state_chain = state;
8621 }
8622
8623 /* Start work with abstract data `bundle states'. */
8624
8625 static void
8626 initiate_bundle_states (void)
8627 {
8628 bundle_states_num = 0;
8629 free_bundle_state_chain = NULL;
8630 allocated_bundle_states_chain = NULL;
8631 }
8632
8633 /* Finish work with abstract data `bundle states'. */
8634
8635 static void
8636 finish_bundle_states (void)
8637 {
8638 struct bundle_state *curr_state, *next_state;
8639
8640 for (curr_state = allocated_bundle_states_chain;
8641 curr_state != NULL;
8642 curr_state = next_state)
8643 {
8644 next_state = curr_state->allocated_states_chain;
8645 free (curr_state->dfa_state);
8646 free (curr_state);
8647 }
8648 }
8649
8650 /* Hashtable helpers. */
8651
8652 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8653 {
8654 static inline hashval_t hash (const bundle_state *);
8655 static inline bool equal (const bundle_state *, const bundle_state *);
8656 };
8657
8658 /* The function returns hash of BUNDLE_STATE. */
8659
8660 inline hashval_t
8661 bundle_state_hasher::hash (const bundle_state *state)
8662 {
8663 unsigned result, i;
8664
8665 for (result = i = 0; i < dfa_state_size; i++)
8666 result += (((unsigned char *) state->dfa_state) [i]
8667 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8668 return result + state->insn_num;
8669 }
8670
8671 /* The function returns nonzero if the bundle state keys are equal. */
8672
8673 inline bool
8674 bundle_state_hasher::equal (const bundle_state *state1,
8675 const bundle_state *state2)
8676 {
8677 return (state1->insn_num == state2->insn_num
8678 && memcmp (state1->dfa_state, state2->dfa_state,
8679 dfa_state_size) == 0);
8680 }
8681
8682 /* Hash table of the bundle states. The key is dfa_state and insn_num
8683 of the bundle states. */
8684
8685 static hash_table<bundle_state_hasher> *bundle_state_table;
8686
8687 /* The function inserts the BUNDLE_STATE into the hash table. The
8688 function returns nonzero if the bundle has been inserted into the
8689 table. The table contains the best bundle state with given key. */
8690
8691 static int
8692 insert_bundle_state (struct bundle_state *bundle_state)
8693 {
8694 struct bundle_state **entry_ptr;
8695
8696 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8697 if (*entry_ptr == NULL)
8698 {
8699 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8700 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8701 *entry_ptr = bundle_state;
8702 return TRUE;
8703 }
8704 else if (bundle_state->cost < (*entry_ptr)->cost
8705 || (bundle_state->cost == (*entry_ptr)->cost
8706 && ((*entry_ptr)->accumulated_insns_num
8707 > bundle_state->accumulated_insns_num
8708 || ((*entry_ptr)->accumulated_insns_num
8709 == bundle_state->accumulated_insns_num
8710 && ((*entry_ptr)->branch_deviation
8711 > bundle_state->branch_deviation
8712 || ((*entry_ptr)->branch_deviation
8713 == bundle_state->branch_deviation
8714 && (*entry_ptr)->middle_bundle_stops
8715 > bundle_state->middle_bundle_stops))))))
8716
8717 {
8718 struct bundle_state temp;
8719
8720 temp = **entry_ptr;
8721 **entry_ptr = *bundle_state;
8722 (*entry_ptr)->next = temp.next;
8723 *bundle_state = temp;
8724 }
8725 return FALSE;
8726 }
8727
8728 /* Start work with the hash table. */
8729
8730 static void
8731 initiate_bundle_state_table (void)
8732 {
8733 bundle_state_table = new hash_table<bundle_state_hasher> (50);
8734 }
8735
8736 /* Finish work with the hash table. */
8737
8738 static void
8739 finish_bundle_state_table (void)
8740 {
8741 delete bundle_state_table;
8742 bundle_state_table = NULL;
8743 }
8744
8745 \f
8746
8747 /* The following variable is a insn `nop' used to check bundle states
8748 with different number of inserted nops. */
8749
8750 static rtx_insn *ia64_nop;
8751
8752 /* The following function tries to issue NOPS_NUM nops for the current
8753 state without advancing processor cycle. If it failed, the
8754 function returns FALSE and frees the current state. */
8755
8756 static int
8757 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8758 {
8759 int i;
8760
8761 for (i = 0; i < nops_num; i++)
8762 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8763 {
8764 free_bundle_state (curr_state);
8765 return FALSE;
8766 }
8767 return TRUE;
8768 }
8769
8770 /* The following function tries to issue INSN for the current
8771 state without advancing processor cycle. If it failed, the
8772 function returns FALSE and frees the current state. */
8773
8774 static int
8775 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8776 {
8777 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8778 {
8779 free_bundle_state (curr_state);
8780 return FALSE;
8781 }
8782 return TRUE;
8783 }
8784
8785 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8786 starting with ORIGINATOR without advancing processor cycle. If
8787 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8788 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8789 If it was successful, the function creates new bundle state and
8790 insert into the hash table and into `index_to_bundle_states'. */
8791
8792 static void
8793 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8794 rtx_insn *insn, int try_bundle_end_p,
8795 int only_bundle_end_p)
8796 {
8797 struct bundle_state *curr_state;
8798
8799 curr_state = get_free_bundle_state ();
8800 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8801 curr_state->insn = insn;
8802 curr_state->insn_num = originator->insn_num + 1;
8803 curr_state->cost = originator->cost;
8804 curr_state->originator = originator;
8805 curr_state->before_nops_num = before_nops_num;
8806 curr_state->after_nops_num = 0;
8807 curr_state->accumulated_insns_num
8808 = originator->accumulated_insns_num + before_nops_num;
8809 curr_state->branch_deviation = originator->branch_deviation;
8810 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8811 gcc_assert (insn);
8812 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8813 {
8814 gcc_assert (GET_MODE (insn) != TImode);
8815 if (!try_issue_nops (curr_state, before_nops_num))
8816 return;
8817 if (!try_issue_insn (curr_state, insn))
8818 return;
8819 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8820 if (curr_state->accumulated_insns_num % 3 != 0)
8821 curr_state->middle_bundle_stops++;
8822 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8823 && curr_state->accumulated_insns_num % 3 != 0)
8824 {
8825 free_bundle_state (curr_state);
8826 return;
8827 }
8828 }
8829 else if (GET_MODE (insn) != TImode)
8830 {
8831 if (!try_issue_nops (curr_state, before_nops_num))
8832 return;
8833 if (!try_issue_insn (curr_state, insn))
8834 return;
8835 curr_state->accumulated_insns_num++;
8836 gcc_assert (!unknown_for_bundling_p (insn));
8837
8838 if (ia64_safe_type (insn) == TYPE_L)
8839 curr_state->accumulated_insns_num++;
8840 }
8841 else
8842 {
8843 /* If this is an insn that must be first in a group, then don't allow
8844 nops to be emitted before it. Currently, alloc is the only such
8845 supported instruction. */
8846 /* ??? The bundling automatons should handle this for us, but they do
8847 not yet have support for the first_insn attribute. */
8848 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8849 {
8850 free_bundle_state (curr_state);
8851 return;
8852 }
8853
8854 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8855 state_transition (curr_state->dfa_state, NULL);
8856 curr_state->cost++;
8857 if (!try_issue_nops (curr_state, before_nops_num))
8858 return;
8859 if (!try_issue_insn (curr_state, insn))
8860 return;
8861 curr_state->accumulated_insns_num++;
8862 if (unknown_for_bundling_p (insn))
8863 {
8864 /* Finish bundle containing asm insn. */
8865 curr_state->after_nops_num
8866 = 3 - curr_state->accumulated_insns_num % 3;
8867 curr_state->accumulated_insns_num
8868 += 3 - curr_state->accumulated_insns_num % 3;
8869 }
8870 else if (ia64_safe_type (insn) == TYPE_L)
8871 curr_state->accumulated_insns_num++;
8872 }
8873 if (ia64_safe_type (insn) == TYPE_B)
8874 curr_state->branch_deviation
8875 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8876 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8877 {
8878 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8879 {
8880 state_t dfa_state;
8881 struct bundle_state *curr_state1;
8882 struct bundle_state *allocated_states_chain;
8883
8884 curr_state1 = get_free_bundle_state ();
8885 dfa_state = curr_state1->dfa_state;
8886 allocated_states_chain = curr_state1->allocated_states_chain;
8887 *curr_state1 = *curr_state;
8888 curr_state1->dfa_state = dfa_state;
8889 curr_state1->allocated_states_chain = allocated_states_chain;
8890 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8891 dfa_state_size);
8892 curr_state = curr_state1;
8893 }
8894 if (!try_issue_nops (curr_state,
8895 3 - curr_state->accumulated_insns_num % 3))
8896 return;
8897 curr_state->after_nops_num
8898 = 3 - curr_state->accumulated_insns_num % 3;
8899 curr_state->accumulated_insns_num
8900 += 3 - curr_state->accumulated_insns_num % 3;
8901 }
8902 if (!insert_bundle_state (curr_state))
8903 free_bundle_state (curr_state);
8904 return;
8905 }
8906
8907 /* The following function returns position in the two window bundle
8908 for given STATE. */
8909
8910 static int
8911 get_max_pos (state_t state)
8912 {
8913 if (cpu_unit_reservation_p (state, pos_6))
8914 return 6;
8915 else if (cpu_unit_reservation_p (state, pos_5))
8916 return 5;
8917 else if (cpu_unit_reservation_p (state, pos_4))
8918 return 4;
8919 else if (cpu_unit_reservation_p (state, pos_3))
8920 return 3;
8921 else if (cpu_unit_reservation_p (state, pos_2))
8922 return 2;
8923 else if (cpu_unit_reservation_p (state, pos_1))
8924 return 1;
8925 else
8926 return 0;
8927 }
8928
8929 /* The function returns code of a possible template for given position
8930 and state. The function should be called only with 2 values of
8931 position equal to 3 or 6. We avoid generating F NOPs by putting
8932 templates containing F insns at the end of the template search
8933 because undocumented anomaly in McKinley derived cores which can
8934 cause stalls if an F-unit insn (including a NOP) is issued within a
8935 six-cycle window after reading certain application registers (such
8936 as ar.bsp). Furthermore, power-considerations also argue against
8937 the use of F-unit instructions unless they're really needed. */
8938
8939 static int
8940 get_template (state_t state, int pos)
8941 {
8942 switch (pos)
8943 {
8944 case 3:
8945 if (cpu_unit_reservation_p (state, _0mmi_))
8946 return 1;
8947 else if (cpu_unit_reservation_p (state, _0mii_))
8948 return 0;
8949 else if (cpu_unit_reservation_p (state, _0mmb_))
8950 return 7;
8951 else if (cpu_unit_reservation_p (state, _0mib_))
8952 return 6;
8953 else if (cpu_unit_reservation_p (state, _0mbb_))
8954 return 5;
8955 else if (cpu_unit_reservation_p (state, _0bbb_))
8956 return 4;
8957 else if (cpu_unit_reservation_p (state, _0mmf_))
8958 return 3;
8959 else if (cpu_unit_reservation_p (state, _0mfi_))
8960 return 2;
8961 else if (cpu_unit_reservation_p (state, _0mfb_))
8962 return 8;
8963 else if (cpu_unit_reservation_p (state, _0mlx_))
8964 return 9;
8965 else
8966 gcc_unreachable ();
8967 case 6:
8968 if (cpu_unit_reservation_p (state, _1mmi_))
8969 return 1;
8970 else if (cpu_unit_reservation_p (state, _1mii_))
8971 return 0;
8972 else if (cpu_unit_reservation_p (state, _1mmb_))
8973 return 7;
8974 else if (cpu_unit_reservation_p (state, _1mib_))
8975 return 6;
8976 else if (cpu_unit_reservation_p (state, _1mbb_))
8977 return 5;
8978 else if (cpu_unit_reservation_p (state, _1bbb_))
8979 return 4;
8980 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8981 return 3;
8982 else if (cpu_unit_reservation_p (state, _1mfi_))
8983 return 2;
8984 else if (cpu_unit_reservation_p (state, _1mfb_))
8985 return 8;
8986 else if (cpu_unit_reservation_p (state, _1mlx_))
8987 return 9;
8988 else
8989 gcc_unreachable ();
8990 default:
8991 gcc_unreachable ();
8992 }
8993 }
8994
8995 /* True when INSN is important for bundling. */
8996
8997 static bool
8998 important_for_bundling_p (rtx_insn *insn)
8999 {
9000 return (INSN_P (insn)
9001 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
9002 && GET_CODE (PATTERN (insn)) != USE
9003 && GET_CODE (PATTERN (insn)) != CLOBBER);
9004 }
9005
9006 /* The following function returns an insn important for insn bundling
9007 followed by INSN and before TAIL. */
9008
9009 static rtx_insn *
9010 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
9011 {
9012 for (; insn && insn != tail; insn = NEXT_INSN (insn))
9013 if (important_for_bundling_p (insn))
9014 return insn;
9015 return NULL;
9016 }
9017
9018 /* True when INSN is unknown, but important, for bundling. */
9019
9020 static bool
9021 unknown_for_bundling_p (rtx_insn *insn)
9022 {
9023 return (INSN_P (insn)
9024 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
9025 && GET_CODE (PATTERN (insn)) != USE
9026 && GET_CODE (PATTERN (insn)) != CLOBBER);
9027 }
9028
9029 /* Add a bundle selector TEMPLATE0 before INSN. */
9030
9031 static void
9032 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
9033 {
9034 rtx b = gen_bundle_selector (GEN_INT (template0));
9035
9036 ia64_emit_insn_before (b, insn);
9037 #if NR_BUNDLES == 10
9038 if ((template0 == 4 || template0 == 5)
9039 && ia64_except_unwind_info (&global_options) == UI_TARGET)
9040 {
9041 int i;
9042 rtx note = NULL_RTX;
9043
9044 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9045 first or second slot. If it is and has REG_EH_NOTE set, copy it
9046 to following nops, as br.call sets rp to the address of following
9047 bundle and therefore an EH region end must be on a bundle
9048 boundary. */
9049 insn = PREV_INSN (insn);
9050 for (i = 0; i < 3; i++)
9051 {
9052 do
9053 insn = next_active_insn (insn);
9054 while (NONJUMP_INSN_P (insn)
9055 && get_attr_empty (insn) == EMPTY_YES);
9056 if (CALL_P (insn))
9057 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9058 else if (note)
9059 {
9060 int code;
9061
9062 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9063 || code == CODE_FOR_nop_b);
9064 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9065 note = NULL_RTX;
9066 else
9067 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9068 }
9069 }
9070 }
9071 #endif
9072 }
9073
9074 /* The following function does insn bundling. Bundling means
9075 inserting templates and nop insns to fit insn groups into permitted
9076 templates. Instruction scheduling uses NDFA (non-deterministic
9077 finite automata) encoding informations about the templates and the
9078 inserted nops. Nondeterminism of the automata permits follows
9079 all possible insn sequences very fast.
9080
9081 Unfortunately it is not possible to get information about inserting
9082 nop insns and used templates from the automata states. The
9083 automata only says that we can issue an insn possibly inserting
9084 some nops before it and using some template. Therefore insn
9085 bundling in this function is implemented by using DFA
9086 (deterministic finite automata). We follow all possible insn
9087 sequences by inserting 0-2 nops (that is what the NDFA describe for
9088 insn scheduling) before/after each insn being bundled. We know the
9089 start of simulated processor cycle from insn scheduling (insn
9090 starting a new cycle has TImode).
9091
9092 Simple implementation of insn bundling would create enormous
9093 number of possible insn sequences satisfying information about new
9094 cycle ticks taken from the insn scheduling. To make the algorithm
9095 practical we use dynamic programming. Each decision (about
9096 inserting nops and implicitly about previous decisions) is described
9097 by structure bundle_state (see above). If we generate the same
9098 bundle state (key is automaton state after issuing the insns and
9099 nops for it), we reuse already generated one. As consequence we
9100 reject some decisions which cannot improve the solution and
9101 reduce memory for the algorithm.
9102
9103 When we reach the end of EBB (extended basic block), we choose the
9104 best sequence and then, moving back in EBB, insert templates for
9105 the best alternative. The templates are taken from querying
9106 automaton state for each insn in chosen bundle states.
9107
9108 So the algorithm makes two (forward and backward) passes through
9109 EBB. */
9110
9111 static void
9112 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9113 {
9114 struct bundle_state *curr_state, *next_state, *best_state;
9115 rtx_insn *insn, *next_insn;
9116 int insn_num;
9117 int i, bundle_end_p, only_bundle_end_p, asm_p;
9118 int pos = 0, max_pos, template0, template1;
9119 rtx_insn *b;
9120 enum attr_type type;
9121
9122 insn_num = 0;
9123 /* Count insns in the EBB. */
9124 for (insn = NEXT_INSN (prev_head_insn);
9125 insn && insn != tail;
9126 insn = NEXT_INSN (insn))
9127 if (INSN_P (insn))
9128 insn_num++;
9129 if (insn_num == 0)
9130 return;
9131 bundling_p = 1;
9132 dfa_clean_insn_cache ();
9133 initiate_bundle_state_table ();
9134 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9135 /* First (forward) pass -- generation of bundle states. */
9136 curr_state = get_free_bundle_state ();
9137 curr_state->insn = NULL;
9138 curr_state->before_nops_num = 0;
9139 curr_state->after_nops_num = 0;
9140 curr_state->insn_num = 0;
9141 curr_state->cost = 0;
9142 curr_state->accumulated_insns_num = 0;
9143 curr_state->branch_deviation = 0;
9144 curr_state->middle_bundle_stops = 0;
9145 curr_state->next = NULL;
9146 curr_state->originator = NULL;
9147 state_reset (curr_state->dfa_state);
9148 index_to_bundle_states [0] = curr_state;
9149 insn_num = 0;
9150 /* Shift cycle mark if it is put on insn which could be ignored. */
9151 for (insn = NEXT_INSN (prev_head_insn);
9152 insn != tail;
9153 insn = NEXT_INSN (insn))
9154 if (INSN_P (insn)
9155 && !important_for_bundling_p (insn)
9156 && GET_MODE (insn) == TImode)
9157 {
9158 PUT_MODE (insn, VOIDmode);
9159 for (next_insn = NEXT_INSN (insn);
9160 next_insn != tail;
9161 next_insn = NEXT_INSN (next_insn))
9162 if (important_for_bundling_p (next_insn)
9163 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9164 {
9165 PUT_MODE (next_insn, TImode);
9166 break;
9167 }
9168 }
9169 /* Forward pass: generation of bundle states. */
9170 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9171 insn != NULL_RTX;
9172 insn = next_insn)
9173 {
9174 gcc_assert (important_for_bundling_p (insn));
9175 type = ia64_safe_type (insn);
9176 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9177 insn_num++;
9178 index_to_bundle_states [insn_num] = NULL;
9179 for (curr_state = index_to_bundle_states [insn_num - 1];
9180 curr_state != NULL;
9181 curr_state = next_state)
9182 {
9183 pos = curr_state->accumulated_insns_num % 3;
9184 next_state = curr_state->next;
9185 /* We must fill up the current bundle in order to start a
9186 subsequent asm insn in a new bundle. Asm insn is always
9187 placed in a separate bundle. */
9188 only_bundle_end_p
9189 = (next_insn != NULL_RTX
9190 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9191 && unknown_for_bundling_p (next_insn));
9192 /* We may fill up the current bundle if it is the cycle end
9193 without a group barrier. */
9194 bundle_end_p
9195 = (only_bundle_end_p || next_insn == NULL_RTX
9196 || (GET_MODE (next_insn) == TImode
9197 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9198 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9199 || type == TYPE_S)
9200 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9201 only_bundle_end_p);
9202 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9203 only_bundle_end_p);
9204 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9205 only_bundle_end_p);
9206 }
9207 gcc_assert (index_to_bundle_states [insn_num]);
9208 for (curr_state = index_to_bundle_states [insn_num];
9209 curr_state != NULL;
9210 curr_state = curr_state->next)
9211 if (verbose >= 2 && dump)
9212 {
9213 /* This structure is taken from generated code of the
9214 pipeline hazard recognizer (see file insn-attrtab.c).
9215 Please don't forget to change the structure if a new
9216 automaton is added to .md file. */
9217 struct DFA_chip
9218 {
9219 unsigned short one_automaton_state;
9220 unsigned short oneb_automaton_state;
9221 unsigned short two_automaton_state;
9222 unsigned short twob_automaton_state;
9223 };
9224
9225 fprintf
9226 (dump,
9227 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9228 curr_state->unique_num,
9229 (curr_state->originator == NULL
9230 ? -1 : curr_state->originator->unique_num),
9231 curr_state->cost,
9232 curr_state->before_nops_num, curr_state->after_nops_num,
9233 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9234 curr_state->middle_bundle_stops,
9235 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9236 INSN_UID (insn));
9237 }
9238 }
9239
9240 /* We should find a solution because the 2nd insn scheduling has
9241 found one. */
9242 gcc_assert (index_to_bundle_states [insn_num]);
9243 /* Find a state corresponding to the best insn sequence. */
9244 best_state = NULL;
9245 for (curr_state = index_to_bundle_states [insn_num];
9246 curr_state != NULL;
9247 curr_state = curr_state->next)
9248 /* We are just looking at the states with fully filled up last
9249 bundle. The first we prefer insn sequences with minimal cost
9250 then with minimal inserted nops and finally with branch insns
9251 placed in the 3rd slots. */
9252 if (curr_state->accumulated_insns_num % 3 == 0
9253 && (best_state == NULL || best_state->cost > curr_state->cost
9254 || (best_state->cost == curr_state->cost
9255 && (curr_state->accumulated_insns_num
9256 < best_state->accumulated_insns_num
9257 || (curr_state->accumulated_insns_num
9258 == best_state->accumulated_insns_num
9259 && (curr_state->branch_deviation
9260 < best_state->branch_deviation
9261 || (curr_state->branch_deviation
9262 == best_state->branch_deviation
9263 && curr_state->middle_bundle_stops
9264 < best_state->middle_bundle_stops)))))))
9265 best_state = curr_state;
9266 /* Second (backward) pass: adding nops and templates. */
9267 gcc_assert (best_state);
9268 insn_num = best_state->before_nops_num;
9269 template0 = template1 = -1;
9270 for (curr_state = best_state;
9271 curr_state->originator != NULL;
9272 curr_state = curr_state->originator)
9273 {
9274 insn = curr_state->insn;
9275 asm_p = unknown_for_bundling_p (insn);
9276 insn_num++;
9277 if (verbose >= 2 && dump)
9278 {
9279 struct DFA_chip
9280 {
9281 unsigned short one_automaton_state;
9282 unsigned short oneb_automaton_state;
9283 unsigned short two_automaton_state;
9284 unsigned short twob_automaton_state;
9285 };
9286
9287 fprintf
9288 (dump,
9289 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9290 curr_state->unique_num,
9291 (curr_state->originator == NULL
9292 ? -1 : curr_state->originator->unique_num),
9293 curr_state->cost,
9294 curr_state->before_nops_num, curr_state->after_nops_num,
9295 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9296 curr_state->middle_bundle_stops,
9297 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9298 INSN_UID (insn));
9299 }
9300 /* Find the position in the current bundle window. The window can
9301 contain at most two bundles. Two bundle window means that
9302 the processor will make two bundle rotation. */
9303 max_pos = get_max_pos (curr_state->dfa_state);
9304 if (max_pos == 6
9305 /* The following (negative template number) means that the
9306 processor did one bundle rotation. */
9307 || (max_pos == 3 && template0 < 0))
9308 {
9309 /* We are at the end of the window -- find template(s) for
9310 its bundle(s). */
9311 pos = max_pos;
9312 if (max_pos == 3)
9313 template0 = get_template (curr_state->dfa_state, 3);
9314 else
9315 {
9316 template1 = get_template (curr_state->dfa_state, 3);
9317 template0 = get_template (curr_state->dfa_state, 6);
9318 }
9319 }
9320 if (max_pos > 3 && template1 < 0)
9321 /* It may happen when we have the stop inside a bundle. */
9322 {
9323 gcc_assert (pos <= 3);
9324 template1 = get_template (curr_state->dfa_state, 3);
9325 pos += 3;
9326 }
9327 if (!asm_p)
9328 /* Emit nops after the current insn. */
9329 for (i = 0; i < curr_state->after_nops_num; i++)
9330 {
9331 rtx nop_pat = gen_nop ();
9332 rtx_insn *nop = emit_insn_after (nop_pat, insn);
9333 pos--;
9334 gcc_assert (pos >= 0);
9335 if (pos % 3 == 0)
9336 {
9337 /* We are at the start of a bundle: emit the template
9338 (it should be defined). */
9339 gcc_assert (template0 >= 0);
9340 ia64_add_bundle_selector_before (template0, nop);
9341 /* If we have two bundle window, we make one bundle
9342 rotation. Otherwise template0 will be undefined
9343 (negative value). */
9344 template0 = template1;
9345 template1 = -1;
9346 }
9347 }
9348 /* Move the position backward in the window. Group barrier has
9349 no slot. Asm insn takes all bundle. */
9350 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9351 && !unknown_for_bundling_p (insn))
9352 pos--;
9353 /* Long insn takes 2 slots. */
9354 if (ia64_safe_type (insn) == TYPE_L)
9355 pos--;
9356 gcc_assert (pos >= 0);
9357 if (pos % 3 == 0
9358 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9359 && !unknown_for_bundling_p (insn))
9360 {
9361 /* The current insn is at the bundle start: emit the
9362 template. */
9363 gcc_assert (template0 >= 0);
9364 ia64_add_bundle_selector_before (template0, insn);
9365 b = PREV_INSN (insn);
9366 insn = b;
9367 /* See comment above in analogous place for emitting nops
9368 after the insn. */
9369 template0 = template1;
9370 template1 = -1;
9371 }
9372 /* Emit nops after the current insn. */
9373 for (i = 0; i < curr_state->before_nops_num; i++)
9374 {
9375 rtx nop_pat = gen_nop ();
9376 ia64_emit_insn_before (nop_pat, insn);
9377 rtx_insn *nop = PREV_INSN (insn);
9378 insn = nop;
9379 pos--;
9380 gcc_assert (pos >= 0);
9381 if (pos % 3 == 0)
9382 {
9383 /* See comment above in analogous place for emitting nops
9384 after the insn. */
9385 gcc_assert (template0 >= 0);
9386 ia64_add_bundle_selector_before (template0, insn);
9387 b = PREV_INSN (insn);
9388 insn = b;
9389 template0 = template1;
9390 template1 = -1;
9391 }
9392 }
9393 }
9394
9395 if (flag_checking)
9396 {
9397 /* Assert right calculation of middle_bundle_stops. */
9398 int num = best_state->middle_bundle_stops;
9399 bool start_bundle = true, end_bundle = false;
9400
9401 for (insn = NEXT_INSN (prev_head_insn);
9402 insn && insn != tail;
9403 insn = NEXT_INSN (insn))
9404 {
9405 if (!INSN_P (insn))
9406 continue;
9407 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9408 start_bundle = true;
9409 else
9410 {
9411 rtx_insn *next_insn;
9412
9413 for (next_insn = NEXT_INSN (insn);
9414 next_insn && next_insn != tail;
9415 next_insn = NEXT_INSN (next_insn))
9416 if (INSN_P (next_insn)
9417 && (ia64_safe_itanium_class (next_insn)
9418 != ITANIUM_CLASS_IGNORE
9419 || recog_memoized (next_insn)
9420 == CODE_FOR_bundle_selector)
9421 && GET_CODE (PATTERN (next_insn)) != USE
9422 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9423 break;
9424
9425 end_bundle = next_insn == NULL_RTX
9426 || next_insn == tail
9427 || (INSN_P (next_insn)
9428 && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9429 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9430 && !start_bundle && !end_bundle
9431 && next_insn
9432 && !unknown_for_bundling_p (next_insn))
9433 num--;
9434
9435 start_bundle = false;
9436 }
9437 }
9438
9439 gcc_assert (num == 0);
9440 }
9441
9442 free (index_to_bundle_states);
9443 finish_bundle_state_table ();
9444 bundling_p = 0;
9445 dfa_clean_insn_cache ();
9446 }
9447
9448 /* The following function is called at the end of scheduling BB or
9449 EBB. After reload, it inserts stop bits and does insn bundling. */
9450
9451 static void
9452 ia64_sched_finish (FILE *dump, int sched_verbose)
9453 {
9454 if (sched_verbose)
9455 fprintf (dump, "// Finishing schedule.\n");
9456 if (!reload_completed)
9457 return;
9458 if (reload_completed)
9459 {
9460 final_emit_insn_group_barriers (dump);
9461 bundling (dump, sched_verbose, current_sched_info->prev_head,
9462 current_sched_info->next_tail);
9463 if (sched_verbose && dump)
9464 fprintf (dump, "// finishing %d-%d\n",
9465 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9466 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9467
9468 return;
9469 }
9470 }
9471
9472 /* The following function inserts stop bits in scheduled BB or EBB. */
9473
9474 static void
9475 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9476 {
9477 rtx_insn *insn;
9478 int need_barrier_p = 0;
9479 int seen_good_insn = 0;
9480
9481 init_insn_group_barriers ();
9482
9483 for (insn = NEXT_INSN (current_sched_info->prev_head);
9484 insn != current_sched_info->next_tail;
9485 insn = NEXT_INSN (insn))
9486 {
9487 if (BARRIER_P (insn))
9488 {
9489 rtx_insn *last = prev_active_insn (insn);
9490
9491 if (! last)
9492 continue;
9493 if (JUMP_TABLE_DATA_P (last))
9494 last = prev_active_insn (last);
9495 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9496 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9497
9498 init_insn_group_barriers ();
9499 seen_good_insn = 0;
9500 need_barrier_p = 0;
9501 }
9502 else if (NONDEBUG_INSN_P (insn))
9503 {
9504 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9505 {
9506 init_insn_group_barriers ();
9507 seen_good_insn = 0;
9508 need_barrier_p = 0;
9509 }
9510 else if (need_barrier_p || group_barrier_needed (insn)
9511 || (mflag_sched_stop_bits_after_every_cycle
9512 && GET_MODE (insn) == TImode
9513 && seen_good_insn))
9514 {
9515 if (TARGET_EARLY_STOP_BITS)
9516 {
9517 rtx_insn *last;
9518
9519 for (last = insn;
9520 last != current_sched_info->prev_head;
9521 last = PREV_INSN (last))
9522 if (INSN_P (last) && GET_MODE (last) == TImode
9523 && stops_p [INSN_UID (last)])
9524 break;
9525 if (last == current_sched_info->prev_head)
9526 last = insn;
9527 last = prev_active_insn (last);
9528 if (last
9529 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9530 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9531 last);
9532 init_insn_group_barriers ();
9533 for (last = NEXT_INSN (last);
9534 last != insn;
9535 last = NEXT_INSN (last))
9536 if (INSN_P (last))
9537 {
9538 group_barrier_needed (last);
9539 if (recog_memoized (last) >= 0
9540 && important_for_bundling_p (last))
9541 seen_good_insn = 1;
9542 }
9543 }
9544 else
9545 {
9546 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9547 insn);
9548 init_insn_group_barriers ();
9549 seen_good_insn = 0;
9550 }
9551 group_barrier_needed (insn);
9552 if (recog_memoized (insn) >= 0
9553 && important_for_bundling_p (insn))
9554 seen_good_insn = 1;
9555 }
9556 else if (recog_memoized (insn) >= 0
9557 && important_for_bundling_p (insn))
9558 seen_good_insn = 1;
9559 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9560 }
9561 }
9562 }
9563
9564 \f
9565
9566 /* If the following function returns TRUE, we will use the DFA
9567 insn scheduler. */
9568
9569 static int
9570 ia64_first_cycle_multipass_dfa_lookahead (void)
9571 {
9572 return (reload_completed ? 6 : 4);
9573 }
9574
9575 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9576
9577 static void
9578 ia64_init_dfa_pre_cycle_insn (void)
9579 {
9580 if (temp_dfa_state == NULL)
9581 {
9582 dfa_state_size = state_size ();
9583 temp_dfa_state = xmalloc (dfa_state_size);
9584 prev_cycle_state = xmalloc (dfa_state_size);
9585 }
9586 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9587 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9588 recog_memoized (dfa_pre_cycle_insn);
9589 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9590 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9591 recog_memoized (dfa_stop_insn);
9592 }
9593
9594 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9595 used by the DFA insn scheduler. */
9596
9597 static rtx
9598 ia64_dfa_pre_cycle_insn (void)
9599 {
9600 return dfa_pre_cycle_insn;
9601 }
9602
9603 /* The following function returns TRUE if PRODUCER (of type ilog or
9604 ld) produces address for CONSUMER (of type st or stf). */
9605
9606 int
9607 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9608 {
9609 rtx dest, reg, mem;
9610
9611 gcc_assert (producer && consumer);
9612 dest = ia64_single_set (producer);
9613 gcc_assert (dest);
9614 reg = SET_DEST (dest);
9615 gcc_assert (reg);
9616 if (GET_CODE (reg) == SUBREG)
9617 reg = SUBREG_REG (reg);
9618 gcc_assert (GET_CODE (reg) == REG);
9619
9620 dest = ia64_single_set (consumer);
9621 gcc_assert (dest);
9622 mem = SET_DEST (dest);
9623 gcc_assert (mem && GET_CODE (mem) == MEM);
9624 return reg_mentioned_p (reg, mem);
9625 }
9626
9627 /* The following function returns TRUE if PRODUCER (of type ilog or
9628 ld) produces address for CONSUMER (of type ld or fld). */
9629
9630 int
9631 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9632 {
9633 rtx dest, src, reg, mem;
9634
9635 gcc_assert (producer && consumer);
9636 dest = ia64_single_set (producer);
9637 gcc_assert (dest);
9638 reg = SET_DEST (dest);
9639 gcc_assert (reg);
9640 if (GET_CODE (reg) == SUBREG)
9641 reg = SUBREG_REG (reg);
9642 gcc_assert (GET_CODE (reg) == REG);
9643
9644 src = ia64_single_set (consumer);
9645 gcc_assert (src);
9646 mem = SET_SRC (src);
9647 gcc_assert (mem);
9648
9649 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9650 mem = XVECEXP (mem, 0, 0);
9651 else if (GET_CODE (mem) == IF_THEN_ELSE)
9652 /* ??? Is this bypass necessary for ld.c? */
9653 {
9654 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9655 mem = XEXP (mem, 1);
9656 }
9657
9658 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9659 mem = XEXP (mem, 0);
9660
9661 if (GET_CODE (mem) == UNSPEC)
9662 {
9663 int c = XINT (mem, 1);
9664
9665 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9666 || c == UNSPEC_LDSA);
9667 mem = XVECEXP (mem, 0, 0);
9668 }
9669
9670 /* Note that LO_SUM is used for GOT loads. */
9671 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9672
9673 return reg_mentioned_p (reg, mem);
9674 }
9675
9676 /* The following function returns TRUE if INSN produces address for a
9677 load/store insn. We will place such insns into M slot because it
9678 decreases its latency time. */
9679
9680 int
9681 ia64_produce_address_p (rtx insn)
9682 {
9683 return insn->call;
9684 }
9685
9686 \f
9687 /* Emit pseudo-ops for the assembler to describe predicate relations.
9688 At present this assumes that we only consider predicate pairs to
9689 be mutex, and that the assembler can deduce proper values from
9690 straight-line code. */
9691
9692 static void
9693 emit_predicate_relation_info (void)
9694 {
9695 basic_block bb;
9696
9697 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9698 {
9699 int r;
9700 rtx_insn *head = BB_HEAD (bb);
9701
9702 /* We only need such notes at code labels. */
9703 if (! LABEL_P (head))
9704 continue;
9705 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9706 head = NEXT_INSN (head);
9707
9708 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9709 grabbing the entire block of predicate registers. */
9710 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9711 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9712 {
9713 rtx p = gen_rtx_REG (BImode, r);
9714 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9715 if (head == BB_END (bb))
9716 BB_END (bb) = n;
9717 head = n;
9718 }
9719 }
9720
9721 /* Look for conditional calls that do not return, and protect predicate
9722 relations around them. Otherwise the assembler will assume the call
9723 returns, and complain about uses of call-clobbered predicates after
9724 the call. */
9725 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9726 {
9727 rtx_insn *insn = BB_HEAD (bb);
9728
9729 while (1)
9730 {
9731 if (CALL_P (insn)
9732 && GET_CODE (PATTERN (insn)) == COND_EXEC
9733 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9734 {
9735 rtx_insn *b =
9736 emit_insn_before (gen_safe_across_calls_all (), insn);
9737 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9738 if (BB_HEAD (bb) == insn)
9739 BB_HEAD (bb) = b;
9740 if (BB_END (bb) == insn)
9741 BB_END (bb) = a;
9742 }
9743
9744 if (insn == BB_END (bb))
9745 break;
9746 insn = NEXT_INSN (insn);
9747 }
9748 }
9749 }
9750
9751 /* Perform machine dependent operations on the rtl chain INSNS. */
9752
9753 static void
9754 ia64_reorg (void)
9755 {
9756 /* We are freeing block_for_insn in the toplev to keep compatibility
9757 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9758 compute_bb_for_insn ();
9759
9760 /* If optimizing, we'll have split before scheduling. */
9761 if (optimize == 0)
9762 split_all_insns ();
9763
9764 if (optimize && flag_schedule_insns_after_reload
9765 && dbg_cnt (ia64_sched2))
9766 {
9767 basic_block bb;
9768 timevar_push (TV_SCHED2);
9769 ia64_final_schedule = 1;
9770
9771 /* We can't let modulo-sched prevent us from scheduling any bbs,
9772 since we need the final schedule to produce bundle information. */
9773 FOR_EACH_BB_FN (bb, cfun)
9774 bb->flags &= ~BB_DISABLE_SCHEDULE;
9775
9776 initiate_bundle_states ();
9777 ia64_nop = make_insn_raw (gen_nop ());
9778 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9779 recog_memoized (ia64_nop);
9780 clocks_length = get_max_uid () + 1;
9781 stops_p = XCNEWVEC (char, clocks_length);
9782
9783 if (ia64_tune == PROCESSOR_ITANIUM2)
9784 {
9785 pos_1 = get_cpu_unit_code ("2_1");
9786 pos_2 = get_cpu_unit_code ("2_2");
9787 pos_3 = get_cpu_unit_code ("2_3");
9788 pos_4 = get_cpu_unit_code ("2_4");
9789 pos_5 = get_cpu_unit_code ("2_5");
9790 pos_6 = get_cpu_unit_code ("2_6");
9791 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9792 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9793 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9794 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9795 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9796 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9797 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9798 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9799 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9800 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9801 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9802 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9803 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9804 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9805 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9806 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9807 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9808 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9809 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9810 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9811 }
9812 else
9813 {
9814 pos_1 = get_cpu_unit_code ("1_1");
9815 pos_2 = get_cpu_unit_code ("1_2");
9816 pos_3 = get_cpu_unit_code ("1_3");
9817 pos_4 = get_cpu_unit_code ("1_4");
9818 pos_5 = get_cpu_unit_code ("1_5");
9819 pos_6 = get_cpu_unit_code ("1_6");
9820 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9821 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9822 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9823 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9824 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9825 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9826 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9827 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9828 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9829 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9830 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9831 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9832 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9833 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9834 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9835 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9836 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9837 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9838 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9839 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9840 }
9841
9842 if (flag_selective_scheduling2
9843 && !maybe_skip_selective_scheduling ())
9844 run_selective_scheduling ();
9845 else
9846 schedule_ebbs ();
9847
9848 /* Redo alignment computation, as it might gone wrong. */
9849 compute_alignments ();
9850
9851 /* We cannot reuse this one because it has been corrupted by the
9852 evil glat. */
9853 finish_bundle_states ();
9854 free (stops_p);
9855 stops_p = NULL;
9856 emit_insn_group_barriers (dump_file);
9857
9858 ia64_final_schedule = 0;
9859 timevar_pop (TV_SCHED2);
9860 }
9861 else
9862 emit_all_insn_group_barriers (dump_file);
9863
9864 df_analyze ();
9865
9866 /* A call must not be the last instruction in a function, so that the
9867 return address is still within the function, so that unwinding works
9868 properly. Note that IA-64 differs from dwarf2 on this point. */
9869 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9870 {
9871 rtx_insn *insn;
9872 int saw_stop = 0;
9873
9874 insn = get_last_insn ();
9875 if (! INSN_P (insn))
9876 insn = prev_active_insn (insn);
9877 if (insn)
9878 {
9879 /* Skip over insns that expand to nothing. */
9880 while (NONJUMP_INSN_P (insn)
9881 && get_attr_empty (insn) == EMPTY_YES)
9882 {
9883 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9884 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9885 saw_stop = 1;
9886 insn = prev_active_insn (insn);
9887 }
9888 if (CALL_P (insn))
9889 {
9890 if (! saw_stop)
9891 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9892 emit_insn (gen_break_f ());
9893 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9894 }
9895 }
9896 }
9897
9898 emit_predicate_relation_info ();
9899
9900 if (flag_var_tracking)
9901 {
9902 timevar_push (TV_VAR_TRACKING);
9903 variable_tracking_main ();
9904 timevar_pop (TV_VAR_TRACKING);
9905 }
9906 df_finish_pass (false);
9907 }
9908 \f
9909 /* Return true if REGNO is used by the epilogue. */
9910
9911 int
9912 ia64_epilogue_uses (int regno)
9913 {
9914 switch (regno)
9915 {
9916 case R_GR (1):
9917 /* With a call to a function in another module, we will write a new
9918 value to "gp". After returning from such a call, we need to make
9919 sure the function restores the original gp-value, even if the
9920 function itself does not use the gp anymore. */
9921 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9922
9923 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9924 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9925 /* For functions defined with the syscall_linkage attribute, all
9926 input registers are marked as live at all function exits. This
9927 prevents the register allocator from using the input registers,
9928 which in turn makes it possible to restart a system call after
9929 an interrupt without having to save/restore the input registers.
9930 This also prevents kernel data from leaking to application code. */
9931 return lookup_attribute ("syscall_linkage",
9932 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9933
9934 case R_BR (0):
9935 /* Conditional return patterns can't represent the use of `b0' as
9936 the return address, so we force the value live this way. */
9937 return 1;
9938
9939 case AR_PFS_REGNUM:
9940 /* Likewise for ar.pfs, which is used by br.ret. */
9941 return 1;
9942
9943 default:
9944 return 0;
9945 }
9946 }
9947
9948 /* Return true if REGNO is used by the frame unwinder. */
9949
9950 int
9951 ia64_eh_uses (int regno)
9952 {
9953 unsigned int r;
9954
9955 if (! reload_completed)
9956 return 0;
9957
9958 if (regno == 0)
9959 return 0;
9960
9961 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9962 if (regno == current_frame_info.r[r]
9963 || regno == emitted_frame_related_regs[r])
9964 return 1;
9965
9966 return 0;
9967 }
9968 \f
9969 /* Return true if this goes in small data/bss. */
9970
9971 /* ??? We could also support own long data here. Generating movl/add/ld8
9972 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9973 code faster because there is one less load. This also includes incomplete
9974 types which can't go in sdata/sbss. */
9975
9976 static bool
9977 ia64_in_small_data_p (const_tree exp)
9978 {
9979 if (TARGET_NO_SDATA)
9980 return false;
9981
9982 /* We want to merge strings, so we never consider them small data. */
9983 if (TREE_CODE (exp) == STRING_CST)
9984 return false;
9985
9986 /* Functions are never small data. */
9987 if (TREE_CODE (exp) == FUNCTION_DECL)
9988 return false;
9989
9990 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9991 {
9992 const char *section = DECL_SECTION_NAME (exp);
9993
9994 if (strcmp (section, ".sdata") == 0
9995 || strncmp (section, ".sdata.", 7) == 0
9996 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9997 || strcmp (section, ".sbss") == 0
9998 || strncmp (section, ".sbss.", 6) == 0
9999 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
10000 return true;
10001 }
10002 else
10003 {
10004 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
10005
10006 /* If this is an incomplete type with size 0, then we can't put it
10007 in sdata because it might be too big when completed. */
10008 if (size > 0 && size <= ia64_section_threshold)
10009 return true;
10010 }
10011
10012 return false;
10013 }
10014 \f
10015 /* Output assembly directives for prologue regions. */
10016
10017 /* The current basic block number. */
10018
10019 static bool last_block;
10020
10021 /* True if we need a copy_state command at the start of the next block. */
10022
10023 static bool need_copy_state;
10024
10025 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
10026 # define MAX_ARTIFICIAL_LABEL_BYTES 30
10027 #endif
10028
10029 /* The function emits unwind directives for the start of an epilogue. */
10030
10031 static void
10032 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
10033 bool unwind, bool frame ATTRIBUTE_UNUSED)
10034 {
10035 /* If this isn't the last block of the function, then we need to label the
10036 current state, and copy it back in at the start of the next block. */
10037
10038 if (!last_block)
10039 {
10040 if (unwind)
10041 fprintf (asm_out_file, "\t.label_state %d\n",
10042 ++cfun->machine->state_num);
10043 need_copy_state = true;
10044 }
10045
10046 if (unwind)
10047 fprintf (asm_out_file, "\t.restore sp\n");
10048 }
10049
10050 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
10051
10052 static void
10053 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10054 bool unwind, bool frame)
10055 {
10056 rtx dest = SET_DEST (pat);
10057 rtx src = SET_SRC (pat);
10058
10059 if (dest == stack_pointer_rtx)
10060 {
10061 if (GET_CODE (src) == PLUS)
10062 {
10063 rtx op0 = XEXP (src, 0);
10064 rtx op1 = XEXP (src, 1);
10065
10066 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10067
10068 if (INTVAL (op1) < 0)
10069 {
10070 gcc_assert (!frame_pointer_needed);
10071 if (unwind)
10072 fprintf (asm_out_file,
10073 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
10074 -INTVAL (op1));
10075 }
10076 else
10077 process_epilogue (asm_out_file, insn, unwind, frame);
10078 }
10079 else
10080 {
10081 gcc_assert (src == hard_frame_pointer_rtx);
10082 process_epilogue (asm_out_file, insn, unwind, frame);
10083 }
10084 }
10085 else if (dest == hard_frame_pointer_rtx)
10086 {
10087 gcc_assert (src == stack_pointer_rtx);
10088 gcc_assert (frame_pointer_needed);
10089
10090 if (unwind)
10091 fprintf (asm_out_file, "\t.vframe r%d\n",
10092 ia64_dbx_register_number (REGNO (dest)));
10093 }
10094 else
10095 gcc_unreachable ();
10096 }
10097
10098 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10099
10100 static void
10101 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10102 {
10103 rtx dest = SET_DEST (pat);
10104 rtx src = SET_SRC (pat);
10105 int dest_regno = REGNO (dest);
10106 int src_regno;
10107
10108 if (src == pc_rtx)
10109 {
10110 /* Saving return address pointer. */
10111 if (unwind)
10112 fprintf (asm_out_file, "\t.save rp, r%d\n",
10113 ia64_dbx_register_number (dest_regno));
10114 return;
10115 }
10116
10117 src_regno = REGNO (src);
10118
10119 switch (src_regno)
10120 {
10121 case PR_REG (0):
10122 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10123 if (unwind)
10124 fprintf (asm_out_file, "\t.save pr, r%d\n",
10125 ia64_dbx_register_number (dest_regno));
10126 break;
10127
10128 case AR_UNAT_REGNUM:
10129 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10130 if (unwind)
10131 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10132 ia64_dbx_register_number (dest_regno));
10133 break;
10134
10135 case AR_LC_REGNUM:
10136 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10137 if (unwind)
10138 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10139 ia64_dbx_register_number (dest_regno));
10140 break;
10141
10142 default:
10143 /* Everything else should indicate being stored to memory. */
10144 gcc_unreachable ();
10145 }
10146 }
10147
10148 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10149
10150 static void
10151 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10152 {
10153 rtx dest = SET_DEST (pat);
10154 rtx src = SET_SRC (pat);
10155 int src_regno = REGNO (src);
10156 const char *saveop;
10157 HOST_WIDE_INT off;
10158 rtx base;
10159
10160 gcc_assert (MEM_P (dest));
10161 if (GET_CODE (XEXP (dest, 0)) == REG)
10162 {
10163 base = XEXP (dest, 0);
10164 off = 0;
10165 }
10166 else
10167 {
10168 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10169 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10170 base = XEXP (XEXP (dest, 0), 0);
10171 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10172 }
10173
10174 if (base == hard_frame_pointer_rtx)
10175 {
10176 saveop = ".savepsp";
10177 off = - off;
10178 }
10179 else
10180 {
10181 gcc_assert (base == stack_pointer_rtx);
10182 saveop = ".savesp";
10183 }
10184
10185 src_regno = REGNO (src);
10186 switch (src_regno)
10187 {
10188 case BR_REG (0):
10189 gcc_assert (!current_frame_info.r[reg_save_b0]);
10190 if (unwind)
10191 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10192 saveop, off);
10193 break;
10194
10195 case PR_REG (0):
10196 gcc_assert (!current_frame_info.r[reg_save_pr]);
10197 if (unwind)
10198 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10199 saveop, off);
10200 break;
10201
10202 case AR_LC_REGNUM:
10203 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10204 if (unwind)
10205 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10206 saveop, off);
10207 break;
10208
10209 case AR_PFS_REGNUM:
10210 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10211 if (unwind)
10212 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10213 saveop, off);
10214 break;
10215
10216 case AR_UNAT_REGNUM:
10217 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10218 if (unwind)
10219 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10220 saveop, off);
10221 break;
10222
10223 case GR_REG (4):
10224 case GR_REG (5):
10225 case GR_REG (6):
10226 case GR_REG (7):
10227 if (unwind)
10228 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10229 1 << (src_regno - GR_REG (4)));
10230 break;
10231
10232 case BR_REG (1):
10233 case BR_REG (2):
10234 case BR_REG (3):
10235 case BR_REG (4):
10236 case BR_REG (5):
10237 if (unwind)
10238 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10239 1 << (src_regno - BR_REG (1)));
10240 break;
10241
10242 case FR_REG (2):
10243 case FR_REG (3):
10244 case FR_REG (4):
10245 case FR_REG (5):
10246 if (unwind)
10247 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10248 1 << (src_regno - FR_REG (2)));
10249 break;
10250
10251 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10252 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10253 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10254 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10255 if (unwind)
10256 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10257 1 << (src_regno - FR_REG (12)));
10258 break;
10259
10260 default:
10261 /* ??? For some reason we mark other general registers, even those
10262 we can't represent in the unwind info. Ignore them. */
10263 break;
10264 }
10265 }
10266
10267 /* This function looks at a single insn and emits any directives
10268 required to unwind this insn. */
10269
10270 static void
10271 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10272 {
10273 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10274 bool frame = dwarf2out_do_frame ();
10275 rtx note, pat;
10276 bool handled_one;
10277
10278 if (!unwind && !frame)
10279 return;
10280
10281 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10282 {
10283 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10284 == EXIT_BLOCK_PTR_FOR_FN (cfun);
10285
10286 /* Restore unwind state from immediately before the epilogue. */
10287 if (need_copy_state)
10288 {
10289 if (unwind)
10290 {
10291 fprintf (asm_out_file, "\t.body\n");
10292 fprintf (asm_out_file, "\t.copy_state %d\n",
10293 cfun->machine->state_num);
10294 }
10295 need_copy_state = false;
10296 }
10297 }
10298
10299 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10300 return;
10301
10302 /* Look for the ALLOC insn. */
10303 if (INSN_CODE (insn) == CODE_FOR_alloc)
10304 {
10305 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10306 int dest_regno = REGNO (dest);
10307
10308 /* If this is the final destination for ar.pfs, then this must
10309 be the alloc in the prologue. */
10310 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10311 {
10312 if (unwind)
10313 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10314 ia64_dbx_register_number (dest_regno));
10315 }
10316 else
10317 {
10318 /* This must be an alloc before a sibcall. We must drop the
10319 old frame info. The easiest way to drop the old frame
10320 info is to ensure we had a ".restore sp" directive
10321 followed by a new prologue. If the procedure doesn't
10322 have a memory-stack frame, we'll issue a dummy ".restore
10323 sp" now. */
10324 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10325 /* if haven't done process_epilogue() yet, do it now */
10326 process_epilogue (asm_out_file, insn, unwind, frame);
10327 if (unwind)
10328 fprintf (asm_out_file, "\t.prologue\n");
10329 }
10330 return;
10331 }
10332
10333 handled_one = false;
10334 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10335 switch (REG_NOTE_KIND (note))
10336 {
10337 case REG_CFA_ADJUST_CFA:
10338 pat = XEXP (note, 0);
10339 if (pat == NULL)
10340 pat = PATTERN (insn);
10341 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10342 handled_one = true;
10343 break;
10344
10345 case REG_CFA_OFFSET:
10346 pat = XEXP (note, 0);
10347 if (pat == NULL)
10348 pat = PATTERN (insn);
10349 process_cfa_offset (asm_out_file, pat, unwind);
10350 handled_one = true;
10351 break;
10352
10353 case REG_CFA_REGISTER:
10354 pat = XEXP (note, 0);
10355 if (pat == NULL)
10356 pat = PATTERN (insn);
10357 process_cfa_register (asm_out_file, pat, unwind);
10358 handled_one = true;
10359 break;
10360
10361 case REG_FRAME_RELATED_EXPR:
10362 case REG_CFA_DEF_CFA:
10363 case REG_CFA_EXPRESSION:
10364 case REG_CFA_RESTORE:
10365 case REG_CFA_SET_VDRAP:
10366 /* Not used in the ia64 port. */
10367 gcc_unreachable ();
10368
10369 default:
10370 /* Not a frame-related note. */
10371 break;
10372 }
10373
10374 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10375 explicit action to take. No guessing required. */
10376 gcc_assert (handled_one);
10377 }
10378
10379 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10380
10381 static void
10382 ia64_asm_emit_except_personality (rtx personality)
10383 {
10384 fputs ("\t.personality\t", asm_out_file);
10385 output_addr_const (asm_out_file, personality);
10386 fputc ('\n', asm_out_file);
10387 }
10388
10389 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10390
10391 static void
10392 ia64_asm_init_sections (void)
10393 {
10394 exception_section = get_unnamed_section (0, output_section_asm_op,
10395 "\t.handlerdata");
10396 }
10397
10398 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10399
10400 static enum unwind_info_type
10401 ia64_debug_unwind_info (void)
10402 {
10403 return UI_TARGET;
10404 }
10405 \f
10406 enum ia64_builtins
10407 {
10408 IA64_BUILTIN_BSP,
10409 IA64_BUILTIN_COPYSIGNQ,
10410 IA64_BUILTIN_FABSQ,
10411 IA64_BUILTIN_FLUSHRS,
10412 IA64_BUILTIN_INFQ,
10413 IA64_BUILTIN_HUGE_VALQ,
10414 IA64_BUILTIN_NANQ,
10415 IA64_BUILTIN_NANSQ,
10416 IA64_BUILTIN_max
10417 };
10418
10419 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10420
10421 void
10422 ia64_init_builtins (void)
10423 {
10424 tree fpreg_type;
10425 tree float80_type;
10426 tree decl;
10427
10428 /* The __fpreg type. */
10429 fpreg_type = make_node (REAL_TYPE);
10430 TYPE_PRECISION (fpreg_type) = 82;
10431 layout_type (fpreg_type);
10432 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10433
10434 /* The __float80 type. */
10435 if (float64x_type_node != NULL_TREE
10436 && TYPE_MODE (float64x_type_node) == XFmode)
10437 float80_type = float64x_type_node;
10438 else
10439 {
10440 float80_type = make_node (REAL_TYPE);
10441 TYPE_PRECISION (float80_type) = 80;
10442 layout_type (float80_type);
10443 }
10444 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10445
10446 /* The __float128 type. */
10447 if (!TARGET_HPUX)
10448 {
10449 tree ftype;
10450 tree const_string_type
10451 = build_pointer_type (build_qualified_type
10452 (char_type_node, TYPE_QUAL_CONST));
10453
10454 (*lang_hooks.types.register_builtin_type) (float128_type_node,
10455 "__float128");
10456
10457 /* TFmode support builtins. */
10458 ftype = build_function_type_list (float128_type_node, NULL_TREE);
10459 decl = add_builtin_function ("__builtin_infq", ftype,
10460 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10461 NULL, NULL_TREE);
10462 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10463
10464 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10465 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10466 NULL, NULL_TREE);
10467 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10468
10469 ftype = build_function_type_list (float128_type_node,
10470 const_string_type,
10471 NULL_TREE);
10472 decl = add_builtin_function ("__builtin_nanq", ftype,
10473 IA64_BUILTIN_NANQ, BUILT_IN_MD,
10474 "nanq", NULL_TREE);
10475 TREE_READONLY (decl) = 1;
10476 ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10477
10478 decl = add_builtin_function ("__builtin_nansq", ftype,
10479 IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10480 "nansq", NULL_TREE);
10481 TREE_READONLY (decl) = 1;
10482 ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10483
10484 ftype = build_function_type_list (float128_type_node,
10485 float128_type_node,
10486 NULL_TREE);
10487 decl = add_builtin_function ("__builtin_fabsq", ftype,
10488 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10489 "__fabstf2", NULL_TREE);
10490 TREE_READONLY (decl) = 1;
10491 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10492
10493 ftype = build_function_type_list (float128_type_node,
10494 float128_type_node,
10495 float128_type_node,
10496 NULL_TREE);
10497 decl = add_builtin_function ("__builtin_copysignq", ftype,
10498 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10499 "__copysigntf3", NULL_TREE);
10500 TREE_READONLY (decl) = 1;
10501 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10502 }
10503 else
10504 /* Under HPUX, this is a synonym for "long double". */
10505 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10506 "__float128");
10507
10508 /* Fwrite on VMS is non-standard. */
10509 #if TARGET_ABI_OPEN_VMS
10510 vms_patch_builtins ();
10511 #endif
10512
10513 #define def_builtin(name, type, code) \
10514 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10515 NULL, NULL_TREE)
10516
10517 decl = def_builtin ("__builtin_ia64_bsp",
10518 build_function_type_list (ptr_type_node, NULL_TREE),
10519 IA64_BUILTIN_BSP);
10520 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10521
10522 decl = def_builtin ("__builtin_ia64_flushrs",
10523 build_function_type_list (void_type_node, NULL_TREE),
10524 IA64_BUILTIN_FLUSHRS);
10525 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10526
10527 #undef def_builtin
10528
10529 if (TARGET_HPUX)
10530 {
10531 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10532 set_user_assembler_name (decl, "_Isfinite");
10533 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10534 set_user_assembler_name (decl, "_Isfinitef");
10535 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10536 set_user_assembler_name (decl, "_Isfinitef128");
10537 }
10538 }
10539
10540 static tree
10541 ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10542 tree *args, bool ignore ATTRIBUTE_UNUSED)
10543 {
10544 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10545 {
10546 enum ia64_builtins fn_code = (enum ia64_builtins)
10547 DECL_FUNCTION_CODE (fndecl);
10548 switch (fn_code)
10549 {
10550 case IA64_BUILTIN_NANQ:
10551 case IA64_BUILTIN_NANSQ:
10552 {
10553 tree type = TREE_TYPE (TREE_TYPE (fndecl));
10554 const char *str = c_getstr (*args);
10555 int quiet = fn_code == IA64_BUILTIN_NANQ;
10556 REAL_VALUE_TYPE real;
10557
10558 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10559 return build_real (type, real);
10560 return NULL_TREE;
10561 }
10562
10563 default:
10564 break;
10565 }
10566 }
10567
10568 #ifdef SUBTARGET_FOLD_BUILTIN
10569 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10570 #endif
10571
10572 return NULL_TREE;
10573 }
10574
10575 rtx
10576 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10577 machine_mode mode ATTRIBUTE_UNUSED,
10578 int ignore ATTRIBUTE_UNUSED)
10579 {
10580 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10581 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10582
10583 switch (fcode)
10584 {
10585 case IA64_BUILTIN_BSP:
10586 if (! target || ! register_operand (target, DImode))
10587 target = gen_reg_rtx (DImode);
10588 emit_insn (gen_bsp_value (target));
10589 #ifdef POINTERS_EXTEND_UNSIGNED
10590 target = convert_memory_address (ptr_mode, target);
10591 #endif
10592 return target;
10593
10594 case IA64_BUILTIN_FLUSHRS:
10595 emit_insn (gen_flushrs ());
10596 return const0_rtx;
10597
10598 case IA64_BUILTIN_INFQ:
10599 case IA64_BUILTIN_HUGE_VALQ:
10600 {
10601 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10602 REAL_VALUE_TYPE inf;
10603 rtx tmp;
10604
10605 real_inf (&inf);
10606 tmp = const_double_from_real_value (inf, target_mode);
10607
10608 tmp = validize_mem (force_const_mem (target_mode, tmp));
10609
10610 if (target == 0)
10611 target = gen_reg_rtx (target_mode);
10612
10613 emit_move_insn (target, tmp);
10614 return target;
10615 }
10616
10617 case IA64_BUILTIN_NANQ:
10618 case IA64_BUILTIN_NANSQ:
10619 case IA64_BUILTIN_FABSQ:
10620 case IA64_BUILTIN_COPYSIGNQ:
10621 return expand_call (exp, target, ignore);
10622
10623 default:
10624 gcc_unreachable ();
10625 }
10626
10627 return NULL_RTX;
10628 }
10629
10630 /* Return the ia64 builtin for CODE. */
10631
10632 static tree
10633 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10634 {
10635 if (code >= IA64_BUILTIN_max)
10636 return error_mark_node;
10637
10638 return ia64_builtins[code];
10639 }
10640
10641 /* Implement TARGET_FUNCTION_ARG_PADDING.
10642
10643 For the HP-UX IA64 aggregate parameters are passed stored in the
10644 most significant bits of the stack slot. */
10645
10646 static pad_direction
10647 ia64_function_arg_padding (machine_mode mode, const_tree type)
10648 {
10649 /* Exception to normal case for structures/unions/etc. */
10650 if (TARGET_HPUX
10651 && type
10652 && AGGREGATE_TYPE_P (type)
10653 && int_size_in_bytes (type) < UNITS_PER_WORD)
10654 return PAD_UPWARD;
10655
10656 /* Fall back to the default. */
10657 return default_function_arg_padding (mode, type);
10658 }
10659
10660 /* Emit text to declare externally defined variables and functions, because
10661 the Intel assembler does not support undefined externals. */
10662
10663 void
10664 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10665 {
10666 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10667 set in order to avoid putting out names that are never really
10668 used. */
10669 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10670 {
10671 /* maybe_assemble_visibility will return 1 if the assembler
10672 visibility directive is output. */
10673 int need_visibility = ((*targetm.binds_local_p) (decl)
10674 && maybe_assemble_visibility (decl));
10675
10676 /* GNU as does not need anything here, but the HP linker does
10677 need something for external functions. */
10678 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10679 && TREE_CODE (decl) == FUNCTION_DECL)
10680 (*targetm.asm_out.globalize_decl_name) (file, decl);
10681 else if (need_visibility && !TARGET_GNU_AS)
10682 (*targetm.asm_out.globalize_label) (file, name);
10683 }
10684 }
10685
10686 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10687 modes of word_mode and larger. Rename the TFmode libfuncs using the
10688 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10689 backward compatibility. */
10690
10691 static void
10692 ia64_init_libfuncs (void)
10693 {
10694 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10695 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10696 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10697 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10698
10699 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10700 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10701 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10702 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10703 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10704
10705 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10706 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10707 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10708 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10709 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10710 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10711
10712 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10713 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10714 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10715 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10716 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10717
10718 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10719 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10720 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10721 /* HP-UX 11.23 libc does not have a function for unsigned
10722 SImode-to-TFmode conversion. */
10723 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10724 }
10725
10726 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10727
10728 static void
10729 ia64_hpux_init_libfuncs (void)
10730 {
10731 ia64_init_libfuncs ();
10732
10733 /* The HP SI millicode division and mod functions expect DI arguments.
10734 By turning them off completely we avoid using both libgcc and the
10735 non-standard millicode routines and use the HP DI millicode routines
10736 instead. */
10737
10738 set_optab_libfunc (sdiv_optab, SImode, 0);
10739 set_optab_libfunc (udiv_optab, SImode, 0);
10740 set_optab_libfunc (smod_optab, SImode, 0);
10741 set_optab_libfunc (umod_optab, SImode, 0);
10742
10743 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10744 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10745 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10746 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10747
10748 /* HP-UX libc has TF min/max/abs routines in it. */
10749 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10750 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10751 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10752
10753 /* ia64_expand_compare uses this. */
10754 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10755
10756 /* These should never be used. */
10757 set_optab_libfunc (eq_optab, TFmode, 0);
10758 set_optab_libfunc (ne_optab, TFmode, 0);
10759 set_optab_libfunc (gt_optab, TFmode, 0);
10760 set_optab_libfunc (ge_optab, TFmode, 0);
10761 set_optab_libfunc (lt_optab, TFmode, 0);
10762 set_optab_libfunc (le_optab, TFmode, 0);
10763 }
10764
10765 /* Rename the division and modulus functions in VMS. */
10766
10767 static void
10768 ia64_vms_init_libfuncs (void)
10769 {
10770 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10771 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10772 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10773 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10774 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10775 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10776 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10777 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10778 #ifdef MEM_LIBFUNCS_INIT
10779 MEM_LIBFUNCS_INIT;
10780 #endif
10781 }
10782
10783 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10784 the HPUX conventions. */
10785
10786 static void
10787 ia64_sysv4_init_libfuncs (void)
10788 {
10789 ia64_init_libfuncs ();
10790
10791 /* These functions are not part of the HPUX TFmode interface. We
10792 use them instead of _U_Qfcmp, which doesn't work the way we
10793 expect. */
10794 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10795 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10796 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10797 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10798 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10799 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10800
10801 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10802 glibc doesn't have them. */
10803 }
10804
10805 /* Use soft-fp. */
10806
10807 static void
10808 ia64_soft_fp_init_libfuncs (void)
10809 {
10810 }
10811
10812 static bool
10813 ia64_vms_valid_pointer_mode (scalar_int_mode mode)
10814 {
10815 return (mode == SImode || mode == DImode);
10816 }
10817 \f
10818 /* For HPUX, it is illegal to have relocations in shared segments. */
10819
10820 static int
10821 ia64_hpux_reloc_rw_mask (void)
10822 {
10823 return 3;
10824 }
10825
10826 /* For others, relax this so that relocations to local data goes in
10827 read-only segments, but we still cannot allow global relocations
10828 in read-only segments. */
10829
10830 static int
10831 ia64_reloc_rw_mask (void)
10832 {
10833 return flag_pic ? 3 : 2;
10834 }
10835
10836 /* Return the section to use for X. The only special thing we do here
10837 is to honor small data. */
10838
10839 static section *
10840 ia64_select_rtx_section (machine_mode mode, rtx x,
10841 unsigned HOST_WIDE_INT align)
10842 {
10843 if (GET_MODE_SIZE (mode) > 0
10844 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10845 && !TARGET_NO_SDATA)
10846 return sdata_section;
10847 else
10848 return default_elf_select_rtx_section (mode, x, align);
10849 }
10850
10851 static unsigned int
10852 ia64_section_type_flags (tree decl, const char *name, int reloc)
10853 {
10854 unsigned int flags = 0;
10855
10856 if (strcmp (name, ".sdata") == 0
10857 || strncmp (name, ".sdata.", 7) == 0
10858 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10859 || strncmp (name, ".sdata2.", 8) == 0
10860 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10861 || strcmp (name, ".sbss") == 0
10862 || strncmp (name, ".sbss.", 6) == 0
10863 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10864 flags = SECTION_SMALL;
10865
10866 flags |= default_section_type_flags (decl, name, reloc);
10867 return flags;
10868 }
10869
10870 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10871 structure type and that the address of that type should be passed
10872 in out0, rather than in r8. */
10873
10874 static bool
10875 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10876 {
10877 tree ret_type = TREE_TYPE (fntype);
10878
10879 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10880 as the structure return address parameter, if the return value
10881 type has a non-trivial copy constructor or destructor. It is not
10882 clear if this same convention should be used for other
10883 programming languages. Until G++ 3.4, we incorrectly used r8 for
10884 these return values. */
10885 return (abi_version_at_least (2)
10886 && ret_type
10887 && TYPE_MODE (ret_type) == BLKmode
10888 && TREE_ADDRESSABLE (ret_type)
10889 && lang_GNU_CXX ());
10890 }
10891
10892 /* Output the assembler code for a thunk function. THUNK_DECL is the
10893 declaration for the thunk function itself, FUNCTION is the decl for
10894 the target function. DELTA is an immediate constant offset to be
10895 added to THIS. If VCALL_OFFSET is nonzero, the word at
10896 *(*this + vcall_offset) should be added to THIS. */
10897
10898 static void
10899 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10900 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10901 tree function)
10902 {
10903 rtx this_rtx, funexp;
10904 rtx_insn *insn;
10905 unsigned int this_parmno;
10906 unsigned int this_regno;
10907 rtx delta_rtx;
10908
10909 reload_completed = 1;
10910 epilogue_completed = 1;
10911
10912 /* Set things up as ia64_expand_prologue might. */
10913 last_scratch_gr_reg = 15;
10914
10915 memset (&current_frame_info, 0, sizeof (current_frame_info));
10916 current_frame_info.spill_cfa_off = -16;
10917 current_frame_info.n_input_regs = 1;
10918 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10919
10920 /* Mark the end of the (empty) prologue. */
10921 emit_note (NOTE_INSN_PROLOGUE_END);
10922
10923 /* Figure out whether "this" will be the first parameter (the
10924 typical case) or the second parameter (as happens when the
10925 virtual function returns certain class objects). */
10926 this_parmno
10927 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10928 ? 1 : 0);
10929 this_regno = IN_REG (this_parmno);
10930 if (!TARGET_REG_NAMES)
10931 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10932
10933 this_rtx = gen_rtx_REG (Pmode, this_regno);
10934
10935 /* Apply the constant offset, if required. */
10936 delta_rtx = GEN_INT (delta);
10937 if (TARGET_ILP32)
10938 {
10939 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10940 REG_POINTER (tmp) = 1;
10941 if (delta && satisfies_constraint_I (delta_rtx))
10942 {
10943 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10944 delta = 0;
10945 }
10946 else
10947 emit_insn (gen_ptr_extend (this_rtx, tmp));
10948 }
10949 if (delta)
10950 {
10951 if (!satisfies_constraint_I (delta_rtx))
10952 {
10953 rtx tmp = gen_rtx_REG (Pmode, 2);
10954 emit_move_insn (tmp, delta_rtx);
10955 delta_rtx = tmp;
10956 }
10957 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10958 }
10959
10960 /* Apply the offset from the vtable, if required. */
10961 if (vcall_offset)
10962 {
10963 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10964 rtx tmp = gen_rtx_REG (Pmode, 2);
10965
10966 if (TARGET_ILP32)
10967 {
10968 rtx t = gen_rtx_REG (ptr_mode, 2);
10969 REG_POINTER (t) = 1;
10970 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10971 if (satisfies_constraint_I (vcall_offset_rtx))
10972 {
10973 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10974 vcall_offset = 0;
10975 }
10976 else
10977 emit_insn (gen_ptr_extend (tmp, t));
10978 }
10979 else
10980 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10981
10982 if (vcall_offset)
10983 {
10984 if (!satisfies_constraint_J (vcall_offset_rtx))
10985 {
10986 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10987 emit_move_insn (tmp2, vcall_offset_rtx);
10988 vcall_offset_rtx = tmp2;
10989 }
10990 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10991 }
10992
10993 if (TARGET_ILP32)
10994 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10995 else
10996 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10997
10998 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10999 }
11000
11001 /* Generate a tail call to the target function. */
11002 if (! TREE_USED (function))
11003 {
11004 assemble_external (function);
11005 TREE_USED (function) = 1;
11006 }
11007 funexp = XEXP (DECL_RTL (function), 0);
11008 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11009 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
11010 insn = get_last_insn ();
11011 SIBLING_CALL_P (insn) = 1;
11012
11013 /* Code generation for calls relies on splitting. */
11014 reload_completed = 1;
11015 epilogue_completed = 1;
11016 try_split (PATTERN (insn), insn, 0);
11017
11018 emit_barrier ();
11019
11020 /* Run just enough of rest_of_compilation to get the insns emitted.
11021 There's not really enough bulk here to make other passes such as
11022 instruction scheduling worth while. Note that use_thunk calls
11023 assemble_start_function and assemble_end_function. */
11024
11025 emit_all_insn_group_barriers (NULL);
11026 insn = get_insns ();
11027 shorten_branches (insn);
11028 final_start_function (insn, file, 1);
11029 final (insn, file, 1);
11030 final_end_function ();
11031
11032 reload_completed = 0;
11033 epilogue_completed = 0;
11034 }
11035
11036 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
11037
11038 static rtx
11039 ia64_struct_value_rtx (tree fntype,
11040 int incoming ATTRIBUTE_UNUSED)
11041 {
11042 if (TARGET_ABI_OPEN_VMS ||
11043 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
11044 return NULL_RTX;
11045 return gen_rtx_REG (Pmode, GR_REG (8));
11046 }
11047
11048 static bool
11049 ia64_scalar_mode_supported_p (scalar_mode mode)
11050 {
11051 switch (mode)
11052 {
11053 case E_QImode:
11054 case E_HImode:
11055 case E_SImode:
11056 case E_DImode:
11057 case E_TImode:
11058 return true;
11059
11060 case E_SFmode:
11061 case E_DFmode:
11062 case E_XFmode:
11063 case E_RFmode:
11064 return true;
11065
11066 case E_TFmode:
11067 return true;
11068
11069 default:
11070 return false;
11071 }
11072 }
11073
11074 static bool
11075 ia64_vector_mode_supported_p (machine_mode mode)
11076 {
11077 switch (mode)
11078 {
11079 case E_V8QImode:
11080 case E_V4HImode:
11081 case E_V2SImode:
11082 return true;
11083
11084 case E_V2SFmode:
11085 return true;
11086
11087 default:
11088 return false;
11089 }
11090 }
11091
11092 /* Implement the FUNCTION_PROFILER macro. */
11093
11094 void
11095 ia64_output_function_profiler (FILE *file, int labelno)
11096 {
11097 bool indirect_call;
11098
11099 /* If the function needs a static chain and the static chain
11100 register is r15, we use an indirect call so as to bypass
11101 the PLT stub in case the executable is dynamically linked,
11102 because the stub clobbers r15 as per 5.3.6 of the psABI.
11103 We don't need to do that in non canonical PIC mode. */
11104
11105 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11106 {
11107 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11108 indirect_call = true;
11109 }
11110 else
11111 indirect_call = false;
11112
11113 if (TARGET_GNU_AS)
11114 fputs ("\t.prologue 4, r40\n", file);
11115 else
11116 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11117 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11118
11119 if (NO_PROFILE_COUNTERS)
11120 fputs ("\tmov out3 = r0\n", file);
11121 else
11122 {
11123 char buf[20];
11124 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11125
11126 if (TARGET_AUTO_PIC)
11127 fputs ("\tmovl out3 = @gprel(", file);
11128 else
11129 fputs ("\taddl out3 = @ltoff(", file);
11130 assemble_name (file, buf);
11131 if (TARGET_AUTO_PIC)
11132 fputs (")\n", file);
11133 else
11134 fputs ("), r1\n", file);
11135 }
11136
11137 if (indirect_call)
11138 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11139 fputs ("\t;;\n", file);
11140
11141 fputs ("\t.save rp, r42\n", file);
11142 fputs ("\tmov out2 = b0\n", file);
11143 if (indirect_call)
11144 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11145 fputs ("\t.body\n", file);
11146 fputs ("\tmov out1 = r1\n", file);
11147 if (indirect_call)
11148 {
11149 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11150 fputs ("\tmov b6 = r16\n", file);
11151 fputs ("\tld8 r1 = [r14]\n", file);
11152 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11153 }
11154 else
11155 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11156 }
11157
11158 static GTY(()) rtx mcount_func_rtx;
11159 static rtx
11160 gen_mcount_func_rtx (void)
11161 {
11162 if (!mcount_func_rtx)
11163 mcount_func_rtx = init_one_libfunc ("_mcount");
11164 return mcount_func_rtx;
11165 }
11166
11167 void
11168 ia64_profile_hook (int labelno)
11169 {
11170 rtx label, ip;
11171
11172 if (NO_PROFILE_COUNTERS)
11173 label = const0_rtx;
11174 else
11175 {
11176 char buf[30];
11177 const char *label_name;
11178 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11179 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11180 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11181 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11182 }
11183 ip = gen_reg_rtx (Pmode);
11184 emit_insn (gen_ip_value (ip));
11185 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11186 VOIDmode,
11187 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11188 ip, Pmode,
11189 label, Pmode);
11190 }
11191
11192 /* Return the mangling of TYPE if it is an extended fundamental type. */
11193
11194 static const char *
11195 ia64_mangle_type (const_tree type)
11196 {
11197 type = TYPE_MAIN_VARIANT (type);
11198
11199 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11200 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11201 return NULL;
11202
11203 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11204 mangled as "e". */
11205 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11206 return "g";
11207 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11208 an extended mangling. Elsewhere, "e" is available since long
11209 double is 80 bits. */
11210 if (TYPE_MODE (type) == XFmode)
11211 return TARGET_HPUX ? "u9__float80" : "e";
11212 if (TYPE_MODE (type) == RFmode)
11213 return "u7__fpreg";
11214 return NULL;
11215 }
11216
11217 /* Return the diagnostic message string if conversion from FROMTYPE to
11218 TOTYPE is not allowed, NULL otherwise. */
11219 static const char *
11220 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11221 {
11222 /* Reject nontrivial conversion to or from __fpreg. */
11223 if (TYPE_MODE (fromtype) == RFmode
11224 && TYPE_MODE (totype) != RFmode
11225 && TYPE_MODE (totype) != VOIDmode)
11226 return N_("invalid conversion from %<__fpreg%>");
11227 if (TYPE_MODE (totype) == RFmode
11228 && TYPE_MODE (fromtype) != RFmode)
11229 return N_("invalid conversion to %<__fpreg%>");
11230 return NULL;
11231 }
11232
11233 /* Return the diagnostic message string if the unary operation OP is
11234 not permitted on TYPE, NULL otherwise. */
11235 static const char *
11236 ia64_invalid_unary_op (int op, const_tree type)
11237 {
11238 /* Reject operations on __fpreg other than unary + or &. */
11239 if (TYPE_MODE (type) == RFmode
11240 && op != CONVERT_EXPR
11241 && op != ADDR_EXPR)
11242 return N_("invalid operation on %<__fpreg%>");
11243 return NULL;
11244 }
11245
11246 /* Return the diagnostic message string if the binary operation OP is
11247 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11248 static const char *
11249 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11250 {
11251 /* Reject operations on __fpreg. */
11252 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11253 return N_("invalid operation on %<__fpreg%>");
11254 return NULL;
11255 }
11256
11257 /* HP-UX version_id attribute.
11258 For object foo, if the version_id is set to 1234 put out an alias
11259 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11260 other than an alias statement because it is an illegal symbol name. */
11261
11262 static tree
11263 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11264 tree name ATTRIBUTE_UNUSED,
11265 tree args,
11266 int flags ATTRIBUTE_UNUSED,
11267 bool *no_add_attrs)
11268 {
11269 tree arg = TREE_VALUE (args);
11270
11271 if (TREE_CODE (arg) != STRING_CST)
11272 {
11273 error("version attribute is not a string");
11274 *no_add_attrs = true;
11275 return NULL_TREE;
11276 }
11277 return NULL_TREE;
11278 }
11279
11280 /* Target hook for c_mode_for_suffix. */
11281
11282 static machine_mode
11283 ia64_c_mode_for_suffix (char suffix)
11284 {
11285 if (suffix == 'q')
11286 return TFmode;
11287 if (suffix == 'w')
11288 return XFmode;
11289
11290 return VOIDmode;
11291 }
11292
11293 static GTY(()) rtx ia64_dconst_0_5_rtx;
11294
11295 rtx
11296 ia64_dconst_0_5 (void)
11297 {
11298 if (! ia64_dconst_0_5_rtx)
11299 {
11300 REAL_VALUE_TYPE rv;
11301 real_from_string (&rv, "0.5");
11302 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11303 }
11304 return ia64_dconst_0_5_rtx;
11305 }
11306
11307 static GTY(()) rtx ia64_dconst_0_375_rtx;
11308
11309 rtx
11310 ia64_dconst_0_375 (void)
11311 {
11312 if (! ia64_dconst_0_375_rtx)
11313 {
11314 REAL_VALUE_TYPE rv;
11315 real_from_string (&rv, "0.375");
11316 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11317 }
11318 return ia64_dconst_0_375_rtx;
11319 }
11320
11321 static machine_mode
11322 ia64_get_reg_raw_mode (int regno)
11323 {
11324 if (FR_REGNO_P (regno))
11325 return XFmode;
11326 return default_get_reg_raw_mode(regno);
11327 }
11328
11329 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11330 anymore. */
11331
11332 bool
11333 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11334 {
11335 return TARGET_HPUX && mode == TFmode;
11336 }
11337
11338 /* Always default to .text section until HP-UX linker is fixed. */
11339
11340 ATTRIBUTE_UNUSED static section *
11341 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11342 enum node_frequency freq ATTRIBUTE_UNUSED,
11343 bool startup ATTRIBUTE_UNUSED,
11344 bool exit ATTRIBUTE_UNUSED)
11345 {
11346 return NULL;
11347 }
11348 \f
11349 /* Construct (set target (vec_select op0 (parallel perm))) and
11350 return true if that's a valid instruction in the active ISA. */
11351
11352 static bool
11353 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11354 {
11355 rtx rperm[MAX_VECT_LEN], x;
11356 unsigned i;
11357
11358 for (i = 0; i < nelt; ++i)
11359 rperm[i] = GEN_INT (perm[i]);
11360
11361 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11362 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11363 x = gen_rtx_SET (target, x);
11364
11365 rtx_insn *insn = emit_insn (x);
11366 if (recog_memoized (insn) < 0)
11367 {
11368 remove_insn (insn);
11369 return false;
11370 }
11371 return true;
11372 }
11373
11374 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11375
11376 static bool
11377 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11378 const unsigned char *perm, unsigned nelt)
11379 {
11380 machine_mode v2mode;
11381 rtx x;
11382
11383 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
11384 return false;
11385 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11386 return expand_vselect (target, x, perm, nelt);
11387 }
11388
11389 /* Try to expand a no-op permutation. */
11390
11391 static bool
11392 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11393 {
11394 unsigned i, nelt = d->nelt;
11395
11396 for (i = 0; i < nelt; ++i)
11397 if (d->perm[i] != i)
11398 return false;
11399
11400 if (!d->testing_p)
11401 emit_move_insn (d->target, d->op0);
11402
11403 return true;
11404 }
11405
11406 /* Try to expand D via a shrp instruction. */
11407
11408 static bool
11409 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11410 {
11411 unsigned i, nelt = d->nelt, shift, mask;
11412 rtx tmp, hi, lo;
11413
11414 /* ??? Don't force V2SFmode into the integer registers. */
11415 if (d->vmode == V2SFmode)
11416 return false;
11417
11418 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11419
11420 shift = d->perm[0];
11421 if (BYTES_BIG_ENDIAN && shift > nelt)
11422 return false;
11423
11424 for (i = 1; i < nelt; ++i)
11425 if (d->perm[i] != ((shift + i) & mask))
11426 return false;
11427
11428 if (d->testing_p)
11429 return true;
11430
11431 hi = shift < nelt ? d->op1 : d->op0;
11432 lo = shift < nelt ? d->op0 : d->op1;
11433
11434 shift %= nelt;
11435
11436 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11437
11438 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11439 gcc_assert (IN_RANGE (shift, 1, 63));
11440
11441 /* Recall that big-endian elements are numbered starting at the top of
11442 the register. Ideally we'd have a shift-left-pair. But since we
11443 don't, convert to a shift the other direction. */
11444 if (BYTES_BIG_ENDIAN)
11445 shift = 64 - shift;
11446
11447 tmp = gen_reg_rtx (DImode);
11448 hi = gen_lowpart (DImode, hi);
11449 lo = gen_lowpart (DImode, lo);
11450 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11451
11452 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11453 return true;
11454 }
11455
11456 /* Try to instantiate D in a single instruction. */
11457
11458 static bool
11459 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11460 {
11461 unsigned i, nelt = d->nelt;
11462 unsigned char perm2[MAX_VECT_LEN];
11463
11464 /* Try single-operand selections. */
11465 if (d->one_operand_p)
11466 {
11467 if (expand_vec_perm_identity (d))
11468 return true;
11469 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11470 return true;
11471 }
11472
11473 /* Try two operand selections. */
11474 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11475 return true;
11476
11477 /* Recognize interleave style patterns with reversed operands. */
11478 if (!d->one_operand_p)
11479 {
11480 for (i = 0; i < nelt; ++i)
11481 {
11482 unsigned e = d->perm[i];
11483 if (e >= nelt)
11484 e -= nelt;
11485 else
11486 e += nelt;
11487 perm2[i] = e;
11488 }
11489
11490 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11491 return true;
11492 }
11493
11494 if (expand_vec_perm_shrp (d))
11495 return true;
11496
11497 /* ??? Look for deposit-like permutations where most of the result
11498 comes from one vector unchanged and the rest comes from a
11499 sequential hunk of the other vector. */
11500
11501 return false;
11502 }
11503
11504 /* Pattern match broadcast permutations. */
11505
11506 static bool
11507 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11508 {
11509 unsigned i, elt, nelt = d->nelt;
11510 unsigned char perm2[2];
11511 rtx temp;
11512 bool ok;
11513
11514 if (!d->one_operand_p)
11515 return false;
11516
11517 elt = d->perm[0];
11518 for (i = 1; i < nelt; ++i)
11519 if (d->perm[i] != elt)
11520 return false;
11521
11522 switch (d->vmode)
11523 {
11524 case E_V2SImode:
11525 case E_V2SFmode:
11526 /* Implementable by interleave. */
11527 perm2[0] = elt;
11528 perm2[1] = elt + 2;
11529 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11530 gcc_assert (ok);
11531 break;
11532
11533 case E_V8QImode:
11534 /* Implementable by extract + broadcast. */
11535 if (BYTES_BIG_ENDIAN)
11536 elt = 7 - elt;
11537 elt *= BITS_PER_UNIT;
11538 temp = gen_reg_rtx (DImode);
11539 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11540 GEN_INT (8), GEN_INT (elt)));
11541 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11542 break;
11543
11544 case E_V4HImode:
11545 /* Should have been matched directly by vec_select. */
11546 default:
11547 gcc_unreachable ();
11548 }
11549
11550 return true;
11551 }
11552
11553 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11554 two vector permutation into a single vector permutation by using
11555 an interleave operation to merge the vectors. */
11556
11557 static bool
11558 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11559 {
11560 struct expand_vec_perm_d dremap, dfinal;
11561 unsigned char remap[2 * MAX_VECT_LEN];
11562 unsigned contents, i, nelt, nelt2;
11563 unsigned h0, h1, h2, h3;
11564 rtx_insn *seq;
11565 bool ok;
11566
11567 if (d->one_operand_p)
11568 return false;
11569
11570 nelt = d->nelt;
11571 nelt2 = nelt / 2;
11572
11573 /* Examine from whence the elements come. */
11574 contents = 0;
11575 for (i = 0; i < nelt; ++i)
11576 contents |= 1u << d->perm[i];
11577
11578 memset (remap, 0xff, sizeof (remap));
11579 dremap = *d;
11580
11581 h0 = (1u << nelt2) - 1;
11582 h1 = h0 << nelt2;
11583 h2 = h0 << nelt;
11584 h3 = h0 << (nelt + nelt2);
11585
11586 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11587 {
11588 for (i = 0; i < nelt; ++i)
11589 {
11590 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11591 remap[which] = i;
11592 dremap.perm[i] = which;
11593 }
11594 }
11595 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11596 {
11597 for (i = 0; i < nelt; ++i)
11598 {
11599 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11600 remap[which] = i;
11601 dremap.perm[i] = which;
11602 }
11603 }
11604 else if ((contents & 0x5555) == contents) /* mix even elements */
11605 {
11606 for (i = 0; i < nelt; ++i)
11607 {
11608 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11609 remap[which] = i;
11610 dremap.perm[i] = which;
11611 }
11612 }
11613 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11614 {
11615 for (i = 0; i < nelt; ++i)
11616 {
11617 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11618 remap[which] = i;
11619 dremap.perm[i] = which;
11620 }
11621 }
11622 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11623 {
11624 unsigned shift = ctz_hwi (contents);
11625 for (i = 0; i < nelt; ++i)
11626 {
11627 unsigned which = (i + shift) & (2 * nelt - 1);
11628 remap[which] = i;
11629 dremap.perm[i] = which;
11630 }
11631 }
11632 else
11633 return false;
11634
11635 /* Use the remapping array set up above to move the elements from their
11636 swizzled locations into their final destinations. */
11637 dfinal = *d;
11638 for (i = 0; i < nelt; ++i)
11639 {
11640 unsigned e = remap[d->perm[i]];
11641 gcc_assert (e < nelt);
11642 dfinal.perm[i] = e;
11643 }
11644 if (d->testing_p)
11645 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11646 else
11647 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11648 dfinal.op1 = dfinal.op0;
11649 dfinal.one_operand_p = true;
11650 dremap.target = dfinal.op0;
11651
11652 /* Test if the final remap can be done with a single insn. For V4HImode
11653 this *will* succeed. For V8QImode or V2SImode it may not. */
11654 start_sequence ();
11655 ok = expand_vec_perm_1 (&dfinal);
11656 seq = get_insns ();
11657 end_sequence ();
11658 if (!ok)
11659 return false;
11660 if (d->testing_p)
11661 return true;
11662
11663 ok = expand_vec_perm_1 (&dremap);
11664 gcc_assert (ok);
11665
11666 emit_insn (seq);
11667 return true;
11668 }
11669
11670 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11671 constant permutation via two mux2 and a merge. */
11672
11673 static bool
11674 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11675 {
11676 unsigned char perm2[4];
11677 rtx rmask[4];
11678 unsigned i;
11679 rtx t0, t1, mask, x;
11680 bool ok;
11681
11682 if (d->vmode != V4HImode || d->one_operand_p)
11683 return false;
11684 if (d->testing_p)
11685 return true;
11686
11687 for (i = 0; i < 4; ++i)
11688 {
11689 perm2[i] = d->perm[i] & 3;
11690 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11691 }
11692 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11693 mask = force_reg (V4HImode, mask);
11694
11695 t0 = gen_reg_rtx (V4HImode);
11696 t1 = gen_reg_rtx (V4HImode);
11697
11698 ok = expand_vselect (t0, d->op0, perm2, 4);
11699 gcc_assert (ok);
11700 ok = expand_vselect (t1, d->op1, perm2, 4);
11701 gcc_assert (ok);
11702
11703 x = gen_rtx_AND (V4HImode, mask, t0);
11704 emit_insn (gen_rtx_SET (t0, x));
11705
11706 x = gen_rtx_NOT (V4HImode, mask);
11707 x = gen_rtx_AND (V4HImode, x, t1);
11708 emit_insn (gen_rtx_SET (t1, x));
11709
11710 x = gen_rtx_IOR (V4HImode, t0, t1);
11711 emit_insn (gen_rtx_SET (d->target, x));
11712
11713 return true;
11714 }
11715
11716 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11717 With all of the interface bits taken care of, perform the expansion
11718 in D and return true on success. */
11719
11720 static bool
11721 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11722 {
11723 if (expand_vec_perm_1 (d))
11724 return true;
11725 if (expand_vec_perm_broadcast (d))
11726 return true;
11727 if (expand_vec_perm_interleave_2 (d))
11728 return true;
11729 if (expand_vec_perm_v4hi_5 (d))
11730 return true;
11731 return false;
11732 }
11733
11734 bool
11735 ia64_expand_vec_perm_const (rtx operands[4])
11736 {
11737 struct expand_vec_perm_d d;
11738 unsigned char perm[MAX_VECT_LEN];
11739 int i, nelt, which;
11740 rtx sel;
11741
11742 d.target = operands[0];
11743 d.op0 = operands[1];
11744 d.op1 = operands[2];
11745 sel = operands[3];
11746
11747 d.vmode = GET_MODE (d.target);
11748 gcc_assert (VECTOR_MODE_P (d.vmode));
11749 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11750 d.testing_p = false;
11751
11752 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11753 gcc_assert (XVECLEN (sel, 0) == nelt);
11754 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11755
11756 for (i = which = 0; i < nelt; ++i)
11757 {
11758 rtx e = XVECEXP (sel, 0, i);
11759 int ei = INTVAL (e) & (2 * nelt - 1);
11760
11761 which |= (ei < nelt ? 1 : 2);
11762 d.perm[i] = ei;
11763 perm[i] = ei;
11764 }
11765
11766 switch (which)
11767 {
11768 default:
11769 gcc_unreachable();
11770
11771 case 3:
11772 if (!rtx_equal_p (d.op0, d.op1))
11773 {
11774 d.one_operand_p = false;
11775 break;
11776 }
11777
11778 /* The elements of PERM do not suggest that only the first operand
11779 is used, but both operands are identical. Allow easier matching
11780 of the permutation by folding the permutation into the single
11781 input vector. */
11782 for (i = 0; i < nelt; ++i)
11783 if (d.perm[i] >= nelt)
11784 d.perm[i] -= nelt;
11785 /* FALLTHRU */
11786
11787 case 1:
11788 d.op1 = d.op0;
11789 d.one_operand_p = true;
11790 break;
11791
11792 case 2:
11793 for (i = 0; i < nelt; ++i)
11794 d.perm[i] -= nelt;
11795 d.op0 = d.op1;
11796 d.one_operand_p = true;
11797 break;
11798 }
11799
11800 if (ia64_expand_vec_perm_const_1 (&d))
11801 return true;
11802
11803 /* If the mask says both arguments are needed, but they are the same,
11804 the above tried to expand with one_operand_p true. If that didn't
11805 work, retry with one_operand_p false, as that's what we used in _ok. */
11806 if (which == 3 && d.one_operand_p)
11807 {
11808 memcpy (d.perm, perm, sizeof (perm));
11809 d.one_operand_p = false;
11810 return ia64_expand_vec_perm_const_1 (&d);
11811 }
11812
11813 return false;
11814 }
11815
11816 /* Implement targetm.vectorize.vec_perm_const_ok. */
11817
11818 static bool
11819 ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11820 const unsigned char *sel)
11821 {
11822 struct expand_vec_perm_d d;
11823 unsigned int i, nelt, which;
11824 bool ret;
11825
11826 d.vmode = vmode;
11827 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11828 d.testing_p = true;
11829
11830 /* Extract the values from the vector CST into the permutation
11831 array in D. */
11832 memcpy (d.perm, sel, nelt);
11833 for (i = which = 0; i < nelt; ++i)
11834 {
11835 unsigned char e = d.perm[i];
11836 gcc_assert (e < 2 * nelt);
11837 which |= (e < nelt ? 1 : 2);
11838 }
11839
11840 /* For all elements from second vector, fold the elements to first. */
11841 if (which == 2)
11842 for (i = 0; i < nelt; ++i)
11843 d.perm[i] -= nelt;
11844
11845 /* Check whether the mask can be applied to the vector type. */
11846 d.one_operand_p = (which != 3);
11847
11848 /* Otherwise we have to go through the motions and see if we can
11849 figure out how to generate the requested permutation. */
11850 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11851 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11852 if (!d.one_operand_p)
11853 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11854
11855 start_sequence ();
11856 ret = ia64_expand_vec_perm_const_1 (&d);
11857 end_sequence ();
11858
11859 return ret;
11860 }
11861
11862 void
11863 ia64_expand_vec_setv2sf (rtx operands[3])
11864 {
11865 struct expand_vec_perm_d d;
11866 unsigned int which;
11867 bool ok;
11868
11869 d.target = operands[0];
11870 d.op0 = operands[0];
11871 d.op1 = gen_reg_rtx (V2SFmode);
11872 d.vmode = V2SFmode;
11873 d.nelt = 2;
11874 d.one_operand_p = false;
11875 d.testing_p = false;
11876
11877 which = INTVAL (operands[2]);
11878 gcc_assert (which <= 1);
11879 d.perm[0] = 1 - which;
11880 d.perm[1] = which + 2;
11881
11882 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11883
11884 ok = ia64_expand_vec_perm_const_1 (&d);
11885 gcc_assert (ok);
11886 }
11887
11888 void
11889 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11890 {
11891 struct expand_vec_perm_d d;
11892 machine_mode vmode = GET_MODE (target);
11893 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11894 bool ok;
11895
11896 d.target = target;
11897 d.op0 = op0;
11898 d.op1 = op1;
11899 d.vmode = vmode;
11900 d.nelt = nelt;
11901 d.one_operand_p = false;
11902 d.testing_p = false;
11903
11904 for (i = 0; i < nelt; ++i)
11905 d.perm[i] = i * 2 + odd;
11906
11907 ok = ia64_expand_vec_perm_const_1 (&d);
11908 gcc_assert (ok);
11909 }
11910
11911 #include "gt-ia64.h"