]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/ia64/ia64.c
rtl.h (always_void_p): New function.
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stringpool.h"
39 #include "stor-layout.h"
40 #include "calls.h"
41 #include "varasm.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "recog.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "except.h"
64 #include "ggc.h"
65 #include "predict.h"
66 #include "dominance.h"
67 #include "cfg.h"
68 #include "cfgrtl.h"
69 #include "cfganal.h"
70 #include "lcm.h"
71 #include "cfgbuild.h"
72 #include "cfgcleanup.h"
73 #include "basic-block.h"
74 #include "libfuncs.h"
75 #include "diagnostic-core.h"
76 #include "sched-int.h"
77 #include "timevar.h"
78 #include "target.h"
79 #include "target-def.h"
80 #include "common/common-target.h"
81 #include "tm_p.h"
82 #include "hash-table.h"
83 #include "langhooks.h"
84 #include "tree-ssa-alias.h"
85 #include "internal-fn.h"
86 #include "gimple-fold.h"
87 #include "tree-eh.h"
88 #include "gimple-expr.h"
89 #include "is-a.h"
90 #include "gimple.h"
91 #include "gimplify.h"
92 #include "intl.h"
93 #include "df.h"
94 #include "debug.h"
95 #include "params.h"
96 #include "dbgcnt.h"
97 #include "tm-constrs.h"
98 #include "sel-sched.h"
99 #include "reload.h"
100 #include "opts.h"
101 #include "dumpfile.h"
102 #include "builtins.h"
103
104 /* This is used for communication between ASM_OUTPUT_LABEL and
105 ASM_OUTPUT_LABELREF. */
106 int ia64_asm_output_label = 0;
107
108 /* Register names for ia64_expand_prologue. */
109 static const char * const ia64_reg_numbers[96] =
110 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
111 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
112 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
113 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
114 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
115 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
116 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
117 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
118 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
119 "r104","r105","r106","r107","r108","r109","r110","r111",
120 "r112","r113","r114","r115","r116","r117","r118","r119",
121 "r120","r121","r122","r123","r124","r125","r126","r127"};
122
123 /* ??? These strings could be shared with REGISTER_NAMES. */
124 static const char * const ia64_input_reg_names[8] =
125 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
126
127 /* ??? These strings could be shared with REGISTER_NAMES. */
128 static const char * const ia64_local_reg_names[80] =
129 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
130 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
131 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
132 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
133 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
134 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
135 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
136 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
137 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
138 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
139
140 /* ??? These strings could be shared with REGISTER_NAMES. */
141 static const char * const ia64_output_reg_names[8] =
142 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
143
144 /* Variables which are this size or smaller are put in the sdata/sbss
145 sections. */
146
147 unsigned int ia64_section_threshold;
148
149 /* The following variable is used by the DFA insn scheduler. The value is
150 TRUE if we do insn bundling instead of insn scheduling. */
151 int bundling_p = 0;
152
153 enum ia64_frame_regs
154 {
155 reg_fp,
156 reg_save_b0,
157 reg_save_pr,
158 reg_save_ar_pfs,
159 reg_save_ar_unat,
160 reg_save_ar_lc,
161 reg_save_gp,
162 number_of_ia64_frame_regs
163 };
164
165 /* Structure to be filled in by ia64_compute_frame_size with register
166 save masks and offsets for the current function. */
167
168 struct ia64_frame_info
169 {
170 HOST_WIDE_INT total_size; /* size of the stack frame, not including
171 the caller's scratch area. */
172 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
173 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
174 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
175 HARD_REG_SET mask; /* mask of saved registers. */
176 unsigned int gr_used_mask; /* mask of registers in use as gr spill
177 registers or long-term scratches. */
178 int n_spilled; /* number of spilled registers. */
179 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
180 int n_input_regs; /* number of input registers used. */
181 int n_local_regs; /* number of local registers used. */
182 int n_output_regs; /* number of output registers used. */
183 int n_rotate_regs; /* number of rotating registers used. */
184
185 char need_regstk; /* true if a .regstk directive needed. */
186 char initialized; /* true if the data is finalized. */
187 };
188
189 /* Current frame information calculated by ia64_compute_frame_size. */
190 static struct ia64_frame_info current_frame_info;
191 /* The actual registers that are emitted. */
192 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
193 \f
194 static int ia64_first_cycle_multipass_dfa_lookahead (void);
195 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
196 static void ia64_init_dfa_pre_cycle_insn (void);
197 static rtx ia64_dfa_pre_cycle_insn (void);
198 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
199 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
200 static void ia64_h_i_d_extended (void);
201 static void * ia64_alloc_sched_context (void);
202 static void ia64_init_sched_context (void *, bool);
203 static void ia64_set_sched_context (void *);
204 static void ia64_clear_sched_context (void *);
205 static void ia64_free_sched_context (void *);
206 static int ia64_mode_to_int (machine_mode);
207 static void ia64_set_sched_flags (spec_info_t);
208 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
209 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
210 static bool ia64_skip_rtx_p (const_rtx);
211 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
212 static bool ia64_needs_block_p (ds_t);
213 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
214 static int ia64_spec_check_p (rtx);
215 static int ia64_spec_check_src_p (rtx);
216 static rtx gen_tls_get_addr (void);
217 static rtx gen_thread_pointer (void);
218 static int find_gr_spill (enum ia64_frame_regs, int);
219 static int next_scratch_gr_reg (void);
220 static void mark_reg_gr_used_mask (rtx, void *);
221 static void ia64_compute_frame_size (HOST_WIDE_INT);
222 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
223 static void finish_spill_pointers (void);
224 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
225 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
226 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
227 static rtx gen_movdi_x (rtx, rtx, rtx);
228 static rtx gen_fr_spill_x (rtx, rtx, rtx);
229 static rtx gen_fr_restore_x (rtx, rtx, rtx);
230
231 static void ia64_option_override (void);
232 static bool ia64_can_eliminate (const int, const int);
233 static machine_mode hfa_element_mode (const_tree, bool);
234 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
235 tree, int *, int);
236 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
237 tree, bool);
238 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
239 const_tree, bool, bool);
240 static rtx ia64_function_arg (cumulative_args_t, machine_mode,
241 const_tree, bool);
242 static rtx ia64_function_incoming_arg (cumulative_args_t,
243 machine_mode, const_tree, bool);
244 static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
245 const_tree, bool);
246 static unsigned int ia64_function_arg_boundary (machine_mode,
247 const_tree);
248 static bool ia64_function_ok_for_sibcall (tree, tree);
249 static bool ia64_return_in_memory (const_tree, const_tree);
250 static rtx ia64_function_value (const_tree, const_tree, bool);
251 static rtx ia64_libcall_value (machine_mode, const_rtx);
252 static bool ia64_function_value_regno_p (const unsigned int);
253 static int ia64_register_move_cost (machine_mode, reg_class_t,
254 reg_class_t);
255 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
256 bool);
257 static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
258 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
259 static void fix_range (const char *);
260 static struct machine_function * ia64_init_machine_status (void);
261 static void emit_insn_group_barriers (FILE *);
262 static void emit_all_insn_group_barriers (FILE *);
263 static void final_emit_insn_group_barriers (FILE *);
264 static void emit_predicate_relation_info (void);
265 static void ia64_reorg (void);
266 static bool ia64_in_small_data_p (const_tree);
267 static void process_epilogue (FILE *, rtx, bool, bool);
268
269 static bool ia64_assemble_integer (rtx, unsigned int, int);
270 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
271 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
272 static void ia64_output_function_end_prologue (FILE *);
273
274 static void ia64_print_operand (FILE *, rtx, int);
275 static void ia64_print_operand_address (FILE *, rtx);
276 static bool ia64_print_operand_punct_valid_p (unsigned char code);
277
278 static int ia64_issue_rate (void);
279 static int ia64_adjust_cost_2 (rtx_insn *, int, rtx_insn *, int, dw_t);
280 static void ia64_sched_init (FILE *, int, int);
281 static void ia64_sched_init_global (FILE *, int, int);
282 static void ia64_sched_finish_global (FILE *, int);
283 static void ia64_sched_finish (FILE *, int);
284 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
285 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
286 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
287 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
288
289 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
290 static void ia64_asm_emit_except_personality (rtx);
291 static void ia64_asm_init_sections (void);
292
293 static enum unwind_info_type ia64_debug_unwind_info (void);
294
295 static struct bundle_state *get_free_bundle_state (void);
296 static void free_bundle_state (struct bundle_state *);
297 static void initiate_bundle_states (void);
298 static void finish_bundle_states (void);
299 static int insert_bundle_state (struct bundle_state *);
300 static void initiate_bundle_state_table (void);
301 static void finish_bundle_state_table (void);
302 static int try_issue_nops (struct bundle_state *, int);
303 static int try_issue_insn (struct bundle_state *, rtx);
304 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
305 int, int);
306 static int get_max_pos (state_t);
307 static int get_template (state_t, int);
308
309 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
310 static bool important_for_bundling_p (rtx_insn *);
311 static bool unknown_for_bundling_p (rtx_insn *);
312 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
313
314 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
315 HOST_WIDE_INT, tree);
316 static void ia64_file_start (void);
317 static void ia64_globalize_decl_name (FILE *, tree);
318
319 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
320 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
321 static section *ia64_select_rtx_section (machine_mode, rtx,
322 unsigned HOST_WIDE_INT);
323 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
324 ATTRIBUTE_UNUSED;
325 static unsigned int ia64_section_type_flags (tree, const char *, int);
326 static void ia64_init_libfuncs (void)
327 ATTRIBUTE_UNUSED;
328 static void ia64_hpux_init_libfuncs (void)
329 ATTRIBUTE_UNUSED;
330 static void ia64_sysv4_init_libfuncs (void)
331 ATTRIBUTE_UNUSED;
332 static void ia64_vms_init_libfuncs (void)
333 ATTRIBUTE_UNUSED;
334 static void ia64_soft_fp_init_libfuncs (void)
335 ATTRIBUTE_UNUSED;
336 static bool ia64_vms_valid_pointer_mode (machine_mode mode)
337 ATTRIBUTE_UNUSED;
338 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
339 ATTRIBUTE_UNUSED;
340
341 static bool ia64_attribute_takes_identifier_p (const_tree);
342 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
343 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
344 static void ia64_encode_section_info (tree, rtx, int);
345 static rtx ia64_struct_value_rtx (tree, int);
346 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
347 static bool ia64_scalar_mode_supported_p (machine_mode mode);
348 static bool ia64_vector_mode_supported_p (machine_mode mode);
349 static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode);
350 static bool ia64_legitimate_constant_p (machine_mode, rtx);
351 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
352 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
353 static const char *ia64_mangle_type (const_tree);
354 static const char *ia64_invalid_conversion (const_tree, const_tree);
355 static const char *ia64_invalid_unary_op (int, const_tree);
356 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
357 static machine_mode ia64_c_mode_for_suffix (char);
358 static void ia64_trampoline_init (rtx, tree, rtx);
359 static void ia64_override_options_after_change (void);
360 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
361
362 static tree ia64_builtin_decl (unsigned, bool);
363
364 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
365 static machine_mode ia64_get_reg_raw_mode (int regno);
366 static section * ia64_hpux_function_section (tree, enum node_frequency,
367 bool, bool);
368
369 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
370 const unsigned char *sel);
371
372 #define MAX_VECT_LEN 8
373
374 struct expand_vec_perm_d
375 {
376 rtx target, op0, op1;
377 unsigned char perm[MAX_VECT_LEN];
378 machine_mode vmode;
379 unsigned char nelt;
380 bool one_operand_p;
381 bool testing_p;
382 };
383
384 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
385
386 \f
387 /* Table of valid machine attributes. */
388 static const struct attribute_spec ia64_attribute_table[] =
389 {
390 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
391 affects_type_identity } */
392 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
393 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
394 false },
395 #if TARGET_ABI_OPEN_VMS
396 { "common_object", 1, 1, true, false, false,
397 ia64_vms_common_object_attribute, false },
398 #endif
399 { "version_id", 1, 1, true, false, false,
400 ia64_handle_version_id_attribute, false },
401 { NULL, 0, 0, false, false, false, NULL, false }
402 };
403
404 /* Initialize the GCC target structure. */
405 #undef TARGET_ATTRIBUTE_TABLE
406 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
407
408 #undef TARGET_INIT_BUILTINS
409 #define TARGET_INIT_BUILTINS ia64_init_builtins
410
411 #undef TARGET_EXPAND_BUILTIN
412 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
413
414 #undef TARGET_BUILTIN_DECL
415 #define TARGET_BUILTIN_DECL ia64_builtin_decl
416
417 #undef TARGET_ASM_BYTE_OP
418 #define TARGET_ASM_BYTE_OP "\tdata1\t"
419 #undef TARGET_ASM_ALIGNED_HI_OP
420 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
421 #undef TARGET_ASM_ALIGNED_SI_OP
422 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
423 #undef TARGET_ASM_ALIGNED_DI_OP
424 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
425 #undef TARGET_ASM_UNALIGNED_HI_OP
426 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
427 #undef TARGET_ASM_UNALIGNED_SI_OP
428 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
429 #undef TARGET_ASM_UNALIGNED_DI_OP
430 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
431 #undef TARGET_ASM_INTEGER
432 #define TARGET_ASM_INTEGER ia64_assemble_integer
433
434 #undef TARGET_OPTION_OVERRIDE
435 #define TARGET_OPTION_OVERRIDE ia64_option_override
436
437 #undef TARGET_ASM_FUNCTION_PROLOGUE
438 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
439 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
440 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
441 #undef TARGET_ASM_FUNCTION_EPILOGUE
442 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
443
444 #undef TARGET_PRINT_OPERAND
445 #define TARGET_PRINT_OPERAND ia64_print_operand
446 #undef TARGET_PRINT_OPERAND_ADDRESS
447 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
448 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
449 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
450
451 #undef TARGET_IN_SMALL_DATA_P
452 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
453
454 #undef TARGET_SCHED_ADJUST_COST_2
455 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
456 #undef TARGET_SCHED_ISSUE_RATE
457 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
458 #undef TARGET_SCHED_VARIABLE_ISSUE
459 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
460 #undef TARGET_SCHED_INIT
461 #define TARGET_SCHED_INIT ia64_sched_init
462 #undef TARGET_SCHED_FINISH
463 #define TARGET_SCHED_FINISH ia64_sched_finish
464 #undef TARGET_SCHED_INIT_GLOBAL
465 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
466 #undef TARGET_SCHED_FINISH_GLOBAL
467 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
468 #undef TARGET_SCHED_REORDER
469 #define TARGET_SCHED_REORDER ia64_sched_reorder
470 #undef TARGET_SCHED_REORDER2
471 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
472
473 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
474 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
475
476 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
477 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
478
479 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
480 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
481 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
482 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
483
484 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
485 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
486 ia64_first_cycle_multipass_dfa_lookahead_guard
487
488 #undef TARGET_SCHED_DFA_NEW_CYCLE
489 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
490
491 #undef TARGET_SCHED_H_I_D_EXTENDED
492 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
493
494 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
495 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
496
497 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
498 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
499
500 #undef TARGET_SCHED_SET_SCHED_CONTEXT
501 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
502
503 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
504 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
505
506 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
507 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
508
509 #undef TARGET_SCHED_SET_SCHED_FLAGS
510 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
511
512 #undef TARGET_SCHED_GET_INSN_SPEC_DS
513 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
514
515 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
516 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
517
518 #undef TARGET_SCHED_SPECULATE_INSN
519 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
520
521 #undef TARGET_SCHED_NEEDS_BLOCK_P
522 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
523
524 #undef TARGET_SCHED_GEN_SPEC_CHECK
525 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
526
527 #undef TARGET_SCHED_SKIP_RTX_P
528 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
529
530 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
531 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
532 #undef TARGET_ARG_PARTIAL_BYTES
533 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
534 #undef TARGET_FUNCTION_ARG
535 #define TARGET_FUNCTION_ARG ia64_function_arg
536 #undef TARGET_FUNCTION_INCOMING_ARG
537 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
538 #undef TARGET_FUNCTION_ARG_ADVANCE
539 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
540 #undef TARGET_FUNCTION_ARG_BOUNDARY
541 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
542
543 #undef TARGET_ASM_OUTPUT_MI_THUNK
544 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
545 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
546 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
547
548 #undef TARGET_ASM_FILE_START
549 #define TARGET_ASM_FILE_START ia64_file_start
550
551 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
552 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
553
554 #undef TARGET_REGISTER_MOVE_COST
555 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
556 #undef TARGET_MEMORY_MOVE_COST
557 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
558 #undef TARGET_RTX_COSTS
559 #define TARGET_RTX_COSTS ia64_rtx_costs
560 #undef TARGET_ADDRESS_COST
561 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
562
563 #undef TARGET_UNSPEC_MAY_TRAP_P
564 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
565
566 #undef TARGET_MACHINE_DEPENDENT_REORG
567 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
568
569 #undef TARGET_ENCODE_SECTION_INFO
570 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
571
572 #undef TARGET_SECTION_TYPE_FLAGS
573 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
574
575 #ifdef HAVE_AS_TLS
576 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
577 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
578 #endif
579
580 /* ??? Investigate. */
581 #if 0
582 #undef TARGET_PROMOTE_PROTOTYPES
583 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
584 #endif
585
586 #undef TARGET_FUNCTION_VALUE
587 #define TARGET_FUNCTION_VALUE ia64_function_value
588 #undef TARGET_LIBCALL_VALUE
589 #define TARGET_LIBCALL_VALUE ia64_libcall_value
590 #undef TARGET_FUNCTION_VALUE_REGNO_P
591 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
592
593 #undef TARGET_STRUCT_VALUE_RTX
594 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
595 #undef TARGET_RETURN_IN_MEMORY
596 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
597 #undef TARGET_SETUP_INCOMING_VARARGS
598 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
599 #undef TARGET_STRICT_ARGUMENT_NAMING
600 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
601 #undef TARGET_MUST_PASS_IN_STACK
602 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
603 #undef TARGET_GET_RAW_RESULT_MODE
604 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
605 #undef TARGET_GET_RAW_ARG_MODE
606 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
607
608 #undef TARGET_MEMBER_TYPE_FORCES_BLK
609 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
610
611 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
612 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
613
614 #undef TARGET_ASM_UNWIND_EMIT
615 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
616 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
617 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
618 #undef TARGET_ASM_INIT_SECTIONS
619 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
620
621 #undef TARGET_DEBUG_UNWIND_INFO
622 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
623
624 #undef TARGET_SCALAR_MODE_SUPPORTED_P
625 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
626 #undef TARGET_VECTOR_MODE_SUPPORTED_P
627 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
628
629 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
630 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
631 ia64_libgcc_floating_mode_supported_p
632
633 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
634 in an order different from the specified program order. */
635 #undef TARGET_RELAXED_ORDERING
636 #define TARGET_RELAXED_ORDERING true
637
638 #undef TARGET_LEGITIMATE_CONSTANT_P
639 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
640 #undef TARGET_LEGITIMATE_ADDRESS_P
641 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
642
643 #undef TARGET_CANNOT_FORCE_CONST_MEM
644 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
645
646 #undef TARGET_MANGLE_TYPE
647 #define TARGET_MANGLE_TYPE ia64_mangle_type
648
649 #undef TARGET_INVALID_CONVERSION
650 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
651 #undef TARGET_INVALID_UNARY_OP
652 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
653 #undef TARGET_INVALID_BINARY_OP
654 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
655
656 #undef TARGET_C_MODE_FOR_SUFFIX
657 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
658
659 #undef TARGET_CAN_ELIMINATE
660 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
661
662 #undef TARGET_TRAMPOLINE_INIT
663 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
664
665 #undef TARGET_CAN_USE_DOLOOP_P
666 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
667 #undef TARGET_INVALID_WITHIN_DOLOOP
668 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
669
670 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
671 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
672
673 #undef TARGET_PREFERRED_RELOAD_CLASS
674 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
675
676 #undef TARGET_DELAY_SCHED2
677 #define TARGET_DELAY_SCHED2 true
678
679 /* Variable tracking should be run after all optimizations which
680 change order of insns. It also needs a valid CFG. */
681 #undef TARGET_DELAY_VARTRACK
682 #define TARGET_DELAY_VARTRACK true
683
684 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
685 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
686
687 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
688 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
689
690 struct gcc_target targetm = TARGET_INITIALIZER;
691 \f
692 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
693 identifier as an argument, so the front end shouldn't look it up. */
694
695 static bool
696 ia64_attribute_takes_identifier_p (const_tree attr_id)
697 {
698 if (is_attribute_p ("model", attr_id))
699 return true;
700 #if TARGET_ABI_OPEN_VMS
701 if (is_attribute_p ("common_object", attr_id))
702 return true;
703 #endif
704 return false;
705 }
706
707 typedef enum
708 {
709 ADDR_AREA_NORMAL, /* normal address area */
710 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
711 }
712 ia64_addr_area;
713
714 static GTY(()) tree small_ident1;
715 static GTY(()) tree small_ident2;
716
717 static void
718 init_idents (void)
719 {
720 if (small_ident1 == 0)
721 {
722 small_ident1 = get_identifier ("small");
723 small_ident2 = get_identifier ("__small__");
724 }
725 }
726
727 /* Retrieve the address area that has been chosen for the given decl. */
728
729 static ia64_addr_area
730 ia64_get_addr_area (tree decl)
731 {
732 tree model_attr;
733
734 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
735 if (model_attr)
736 {
737 tree id;
738
739 init_idents ();
740 id = TREE_VALUE (TREE_VALUE (model_attr));
741 if (id == small_ident1 || id == small_ident2)
742 return ADDR_AREA_SMALL;
743 }
744 return ADDR_AREA_NORMAL;
745 }
746
747 static tree
748 ia64_handle_model_attribute (tree *node, tree name, tree args,
749 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
750 {
751 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
752 ia64_addr_area area;
753 tree arg, decl = *node;
754
755 init_idents ();
756 arg = TREE_VALUE (args);
757 if (arg == small_ident1 || arg == small_ident2)
758 {
759 addr_area = ADDR_AREA_SMALL;
760 }
761 else
762 {
763 warning (OPT_Wattributes, "invalid argument of %qE attribute",
764 name);
765 *no_add_attrs = true;
766 }
767
768 switch (TREE_CODE (decl))
769 {
770 case VAR_DECL:
771 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
772 == FUNCTION_DECL)
773 && !TREE_STATIC (decl))
774 {
775 error_at (DECL_SOURCE_LOCATION (decl),
776 "an address area attribute cannot be specified for "
777 "local variables");
778 *no_add_attrs = true;
779 }
780 area = ia64_get_addr_area (decl);
781 if (area != ADDR_AREA_NORMAL && addr_area != area)
782 {
783 error ("address area of %q+D conflicts with previous "
784 "declaration", decl);
785 *no_add_attrs = true;
786 }
787 break;
788
789 case FUNCTION_DECL:
790 error_at (DECL_SOURCE_LOCATION (decl),
791 "address area attribute cannot be specified for "
792 "functions");
793 *no_add_attrs = true;
794 break;
795
796 default:
797 warning (OPT_Wattributes, "%qE attribute ignored",
798 name);
799 *no_add_attrs = true;
800 break;
801 }
802
803 return NULL_TREE;
804 }
805
806 /* Part of the low level implementation of DEC Ada pragma Common_Object which
807 enables the shared use of variables stored in overlaid linker areas
808 corresponding to the use of Fortran COMMON. */
809
810 static tree
811 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
812 int flags ATTRIBUTE_UNUSED,
813 bool *no_add_attrs)
814 {
815 tree decl = *node;
816 tree id;
817
818 gcc_assert (DECL_P (decl));
819
820 DECL_COMMON (decl) = 1;
821 id = TREE_VALUE (args);
822 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
823 {
824 error ("%qE attribute requires a string constant argument", name);
825 *no_add_attrs = true;
826 return NULL_TREE;
827 }
828 return NULL_TREE;
829 }
830
831 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
832
833 void
834 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
835 unsigned HOST_WIDE_INT size,
836 unsigned int align)
837 {
838 tree attr = DECL_ATTRIBUTES (decl);
839
840 if (attr)
841 attr = lookup_attribute ("common_object", attr);
842 if (attr)
843 {
844 tree id = TREE_VALUE (TREE_VALUE (attr));
845 const char *name;
846
847 if (TREE_CODE (id) == IDENTIFIER_NODE)
848 name = IDENTIFIER_POINTER (id);
849 else if (TREE_CODE (id) == STRING_CST)
850 name = TREE_STRING_POINTER (id);
851 else
852 abort ();
853
854 fprintf (file, "\t.vms_common\t\"%s\",", name);
855 }
856 else
857 fprintf (file, "%s", COMMON_ASM_OP);
858
859 /* Code from elfos.h. */
860 assemble_name (file, name);
861 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u",
862 size, align / BITS_PER_UNIT);
863
864 fputc ('\n', file);
865 }
866
867 static void
868 ia64_encode_addr_area (tree decl, rtx symbol)
869 {
870 int flags;
871
872 flags = SYMBOL_REF_FLAGS (symbol);
873 switch (ia64_get_addr_area (decl))
874 {
875 case ADDR_AREA_NORMAL: break;
876 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
877 default: gcc_unreachable ();
878 }
879 SYMBOL_REF_FLAGS (symbol) = flags;
880 }
881
882 static void
883 ia64_encode_section_info (tree decl, rtx rtl, int first)
884 {
885 default_encode_section_info (decl, rtl, first);
886
887 /* Careful not to prod global register variables. */
888 if (TREE_CODE (decl) == VAR_DECL
889 && GET_CODE (DECL_RTL (decl)) == MEM
890 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
891 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
892 ia64_encode_addr_area (decl, XEXP (rtl, 0));
893 }
894 \f
895 /* Return 1 if the operands of a move are ok. */
896
897 int
898 ia64_move_ok (rtx dst, rtx src)
899 {
900 /* If we're under init_recog_no_volatile, we'll not be able to use
901 memory_operand. So check the code directly and don't worry about
902 the validity of the underlying address, which should have been
903 checked elsewhere anyway. */
904 if (GET_CODE (dst) != MEM)
905 return 1;
906 if (GET_CODE (src) == MEM)
907 return 0;
908 if (register_operand (src, VOIDmode))
909 return 1;
910
911 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
912 if (INTEGRAL_MODE_P (GET_MODE (dst)))
913 return src == const0_rtx;
914 else
915 return satisfies_constraint_G (src);
916 }
917
918 /* Return 1 if the operands are ok for a floating point load pair. */
919
920 int
921 ia64_load_pair_ok (rtx dst, rtx src)
922 {
923 /* ??? There is a thinko in the implementation of the "x" constraint and the
924 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
925 also return false for it. */
926 if (GET_CODE (dst) != REG
927 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
928 return 0;
929 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
930 return 0;
931 switch (GET_CODE (XEXP (src, 0)))
932 {
933 case REG:
934 case POST_INC:
935 break;
936 case POST_DEC:
937 return 0;
938 case POST_MODIFY:
939 {
940 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
941
942 if (GET_CODE (adjust) != CONST_INT
943 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
944 return 0;
945 }
946 break;
947 default:
948 abort ();
949 }
950 return 1;
951 }
952
953 int
954 addp4_optimize_ok (rtx op1, rtx op2)
955 {
956 return (basereg_operand (op1, GET_MODE(op1)) !=
957 basereg_operand (op2, GET_MODE(op2)));
958 }
959
960 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
961 Return the length of the field, or <= 0 on failure. */
962
963 int
964 ia64_depz_field_mask (rtx rop, rtx rshift)
965 {
966 unsigned HOST_WIDE_INT op = INTVAL (rop);
967 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
968
969 /* Get rid of the zero bits we're shifting in. */
970 op >>= shift;
971
972 /* We must now have a solid block of 1's at bit 0. */
973 return exact_log2 (op + 1);
974 }
975
976 /* Return the TLS model to use for ADDR. */
977
978 static enum tls_model
979 tls_symbolic_operand_type (rtx addr)
980 {
981 enum tls_model tls_kind = TLS_MODEL_NONE;
982
983 if (GET_CODE (addr) == CONST)
984 {
985 if (GET_CODE (XEXP (addr, 0)) == PLUS
986 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
987 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
988 }
989 else if (GET_CODE (addr) == SYMBOL_REF)
990 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
991
992 return tls_kind;
993 }
994
995 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
996 as a base register. */
997
998 static inline bool
999 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
1000 {
1001 if (strict
1002 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
1003 return true;
1004 else if (!strict
1005 && (GENERAL_REGNO_P (REGNO (reg))
1006 || !HARD_REGISTER_P (reg)))
1007 return true;
1008 else
1009 return false;
1010 }
1011
1012 static bool
1013 ia64_legitimate_address_reg (const_rtx reg, bool strict)
1014 {
1015 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1016 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1017 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1018 return true;
1019
1020 return false;
1021 }
1022
1023 static bool
1024 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1025 {
1026 if (GET_CODE (disp) == PLUS
1027 && rtx_equal_p (reg, XEXP (disp, 0))
1028 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1029 || (CONST_INT_P (XEXP (disp, 1))
1030 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1031 return true;
1032
1033 return false;
1034 }
1035
1036 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1037
1038 static bool
1039 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1040 rtx x, bool strict)
1041 {
1042 if (ia64_legitimate_address_reg (x, strict))
1043 return true;
1044 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1045 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1046 && XEXP (x, 0) != arg_pointer_rtx)
1047 return true;
1048 else if (GET_CODE (x) == POST_MODIFY
1049 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1050 && XEXP (x, 0) != arg_pointer_rtx
1051 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1052 return true;
1053 else
1054 return false;
1055 }
1056
1057 /* Return true if X is a constant that is valid for some immediate
1058 field in an instruction. */
1059
1060 static bool
1061 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1062 {
1063 switch (GET_CODE (x))
1064 {
1065 case CONST_INT:
1066 case LABEL_REF:
1067 return true;
1068
1069 case CONST_DOUBLE:
1070 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1071 return true;
1072 return satisfies_constraint_G (x);
1073
1074 case CONST:
1075 case SYMBOL_REF:
1076 /* ??? Short term workaround for PR 28490. We must make the code here
1077 match the code in ia64_expand_move and move_operand, even though they
1078 are both technically wrong. */
1079 if (tls_symbolic_operand_type (x) == 0)
1080 {
1081 HOST_WIDE_INT addend = 0;
1082 rtx op = x;
1083
1084 if (GET_CODE (op) == CONST
1085 && GET_CODE (XEXP (op, 0)) == PLUS
1086 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1087 {
1088 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1089 op = XEXP (XEXP (op, 0), 0);
1090 }
1091
1092 if (any_offset_symbol_operand (op, mode)
1093 || function_operand (op, mode))
1094 return true;
1095 if (aligned_offset_symbol_operand (op, mode))
1096 return (addend & 0x3fff) == 0;
1097 return false;
1098 }
1099 return false;
1100
1101 case CONST_VECTOR:
1102 if (mode == V2SFmode)
1103 return satisfies_constraint_Y (x);
1104
1105 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1106 && GET_MODE_SIZE (mode) <= 8);
1107
1108 default:
1109 return false;
1110 }
1111 }
1112
1113 /* Don't allow TLS addresses to get spilled to memory. */
1114
1115 static bool
1116 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1117 {
1118 if (mode == RFmode)
1119 return true;
1120 return tls_symbolic_operand_type (x) != 0;
1121 }
1122
1123 /* Expand a symbolic constant load. */
1124
1125 bool
1126 ia64_expand_load_address (rtx dest, rtx src)
1127 {
1128 gcc_assert (GET_CODE (dest) == REG);
1129
1130 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1131 having to pointer-extend the value afterward. Other forms of address
1132 computation below are also more natural to compute as 64-bit quantities.
1133 If we've been given an SImode destination register, change it. */
1134 if (GET_MODE (dest) != Pmode)
1135 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1136 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1137
1138 if (TARGET_NO_PIC)
1139 return false;
1140 if (small_addr_symbolic_operand (src, VOIDmode))
1141 return false;
1142
1143 if (TARGET_AUTO_PIC)
1144 emit_insn (gen_load_gprel64 (dest, src));
1145 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1146 emit_insn (gen_load_fptr (dest, src));
1147 else if (sdata_symbolic_operand (src, VOIDmode))
1148 emit_insn (gen_load_gprel (dest, src));
1149 else
1150 {
1151 HOST_WIDE_INT addend = 0;
1152 rtx tmp;
1153
1154 /* We did split constant offsets in ia64_expand_move, and we did try
1155 to keep them split in move_operand, but we also allowed reload to
1156 rematerialize arbitrary constants rather than spill the value to
1157 the stack and reload it. So we have to be prepared here to split
1158 them apart again. */
1159 if (GET_CODE (src) == CONST)
1160 {
1161 HOST_WIDE_INT hi, lo;
1162
1163 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1164 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1165 hi = hi - lo;
1166
1167 if (lo != 0)
1168 {
1169 addend = lo;
1170 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1171 }
1172 }
1173
1174 tmp = gen_rtx_HIGH (Pmode, src);
1175 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1176 emit_insn (gen_rtx_SET (dest, tmp));
1177
1178 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1179 emit_insn (gen_rtx_SET (dest, tmp));
1180
1181 if (addend)
1182 {
1183 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1184 emit_insn (gen_rtx_SET (dest, tmp));
1185 }
1186 }
1187
1188 return true;
1189 }
1190
1191 static GTY(()) rtx gen_tls_tga;
1192 static rtx
1193 gen_tls_get_addr (void)
1194 {
1195 if (!gen_tls_tga)
1196 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1197 return gen_tls_tga;
1198 }
1199
1200 static GTY(()) rtx thread_pointer_rtx;
1201 static rtx
1202 gen_thread_pointer (void)
1203 {
1204 if (!thread_pointer_rtx)
1205 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1206 return thread_pointer_rtx;
1207 }
1208
1209 static rtx
1210 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1211 rtx orig_op1, HOST_WIDE_INT addend)
1212 {
1213 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1214 rtx_insn *insns;
1215 rtx orig_op0 = op0;
1216 HOST_WIDE_INT addend_lo, addend_hi;
1217
1218 switch (tls_kind)
1219 {
1220 case TLS_MODEL_GLOBAL_DYNAMIC:
1221 start_sequence ();
1222
1223 tga_op1 = gen_reg_rtx (Pmode);
1224 emit_insn (gen_load_dtpmod (tga_op1, op1));
1225
1226 tga_op2 = gen_reg_rtx (Pmode);
1227 emit_insn (gen_load_dtprel (tga_op2, op1));
1228
1229 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1230 LCT_CONST, Pmode, 2, tga_op1,
1231 Pmode, tga_op2, Pmode);
1232
1233 insns = get_insns ();
1234 end_sequence ();
1235
1236 if (GET_MODE (op0) != Pmode)
1237 op0 = tga_ret;
1238 emit_libcall_block (insns, op0, tga_ret, op1);
1239 break;
1240
1241 case TLS_MODEL_LOCAL_DYNAMIC:
1242 /* ??? This isn't the completely proper way to do local-dynamic
1243 If the call to __tls_get_addr is used only by a single symbol,
1244 then we should (somehow) move the dtprel to the second arg
1245 to avoid the extra add. */
1246 start_sequence ();
1247
1248 tga_op1 = gen_reg_rtx (Pmode);
1249 emit_insn (gen_load_dtpmod (tga_op1, op1));
1250
1251 tga_op2 = const0_rtx;
1252
1253 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1254 LCT_CONST, Pmode, 2, tga_op1,
1255 Pmode, tga_op2, Pmode);
1256
1257 insns = get_insns ();
1258 end_sequence ();
1259
1260 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1261 UNSPEC_LD_BASE);
1262 tmp = gen_reg_rtx (Pmode);
1263 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1264
1265 if (!register_operand (op0, Pmode))
1266 op0 = gen_reg_rtx (Pmode);
1267 if (TARGET_TLS64)
1268 {
1269 emit_insn (gen_load_dtprel (op0, op1));
1270 emit_insn (gen_adddi3 (op0, tmp, op0));
1271 }
1272 else
1273 emit_insn (gen_add_dtprel (op0, op1, tmp));
1274 break;
1275
1276 case TLS_MODEL_INITIAL_EXEC:
1277 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1278 addend_hi = addend - addend_lo;
1279
1280 op1 = plus_constant (Pmode, op1, addend_hi);
1281 addend = addend_lo;
1282
1283 tmp = gen_reg_rtx (Pmode);
1284 emit_insn (gen_load_tprel (tmp, op1));
1285
1286 if (!register_operand (op0, Pmode))
1287 op0 = gen_reg_rtx (Pmode);
1288 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1289 break;
1290
1291 case TLS_MODEL_LOCAL_EXEC:
1292 if (!register_operand (op0, Pmode))
1293 op0 = gen_reg_rtx (Pmode);
1294
1295 op1 = orig_op1;
1296 addend = 0;
1297 if (TARGET_TLS64)
1298 {
1299 emit_insn (gen_load_tprel (op0, op1));
1300 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1301 }
1302 else
1303 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1304 break;
1305
1306 default:
1307 gcc_unreachable ();
1308 }
1309
1310 if (addend)
1311 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1312 orig_op0, 1, OPTAB_DIRECT);
1313 if (orig_op0 == op0)
1314 return NULL_RTX;
1315 if (GET_MODE (orig_op0) == Pmode)
1316 return op0;
1317 return gen_lowpart (GET_MODE (orig_op0), op0);
1318 }
1319
1320 rtx
1321 ia64_expand_move (rtx op0, rtx op1)
1322 {
1323 machine_mode mode = GET_MODE (op0);
1324
1325 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1326 op1 = force_reg (mode, op1);
1327
1328 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1329 {
1330 HOST_WIDE_INT addend = 0;
1331 enum tls_model tls_kind;
1332 rtx sym = op1;
1333
1334 if (GET_CODE (op1) == CONST
1335 && GET_CODE (XEXP (op1, 0)) == PLUS
1336 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1337 {
1338 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1339 sym = XEXP (XEXP (op1, 0), 0);
1340 }
1341
1342 tls_kind = tls_symbolic_operand_type (sym);
1343 if (tls_kind)
1344 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1345
1346 if (any_offset_symbol_operand (sym, mode))
1347 addend = 0;
1348 else if (aligned_offset_symbol_operand (sym, mode))
1349 {
1350 HOST_WIDE_INT addend_lo, addend_hi;
1351
1352 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1353 addend_hi = addend - addend_lo;
1354
1355 if (addend_lo != 0)
1356 {
1357 op1 = plus_constant (mode, sym, addend_hi);
1358 addend = addend_lo;
1359 }
1360 else
1361 addend = 0;
1362 }
1363 else
1364 op1 = sym;
1365
1366 if (reload_completed)
1367 {
1368 /* We really should have taken care of this offset earlier. */
1369 gcc_assert (addend == 0);
1370 if (ia64_expand_load_address (op0, op1))
1371 return NULL_RTX;
1372 }
1373
1374 if (addend)
1375 {
1376 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1377
1378 emit_insn (gen_rtx_SET (subtarget, op1));
1379
1380 op1 = expand_simple_binop (mode, PLUS, subtarget,
1381 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1382 if (op0 == op1)
1383 return NULL_RTX;
1384 }
1385 }
1386
1387 return op1;
1388 }
1389
1390 /* Split a move from OP1 to OP0 conditional on COND. */
1391
1392 void
1393 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1394 {
1395 rtx_insn *insn, *first = get_last_insn ();
1396
1397 emit_move_insn (op0, op1);
1398
1399 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1400 if (INSN_P (insn))
1401 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1402 PATTERN (insn));
1403 }
1404
1405 /* Split a post-reload TImode or TFmode reference into two DImode
1406 components. This is made extra difficult by the fact that we do
1407 not get any scratch registers to work with, because reload cannot
1408 be prevented from giving us a scratch that overlaps the register
1409 pair involved. So instead, when addressing memory, we tweak the
1410 pointer register up and back down with POST_INCs. Or up and not
1411 back down when we can get away with it.
1412
1413 REVERSED is true when the loads must be done in reversed order
1414 (high word first) for correctness. DEAD is true when the pointer
1415 dies with the second insn we generate and therefore the second
1416 address must not carry a postmodify.
1417
1418 May return an insn which is to be emitted after the moves. */
1419
1420 static rtx
1421 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1422 {
1423 rtx fixup = 0;
1424
1425 switch (GET_CODE (in))
1426 {
1427 case REG:
1428 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1429 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1430 break;
1431
1432 case CONST_INT:
1433 case CONST_DOUBLE:
1434 /* Cannot occur reversed. */
1435 gcc_assert (!reversed);
1436
1437 if (GET_MODE (in) != TFmode)
1438 split_double (in, &out[0], &out[1]);
1439 else
1440 /* split_double does not understand how to split a TFmode
1441 quantity into a pair of DImode constants. */
1442 {
1443 REAL_VALUE_TYPE r;
1444 unsigned HOST_WIDE_INT p[2];
1445 long l[4]; /* TFmode is 128 bits */
1446
1447 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1448 real_to_target (l, &r, TFmode);
1449
1450 if (FLOAT_WORDS_BIG_ENDIAN)
1451 {
1452 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1453 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1454 }
1455 else
1456 {
1457 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1458 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1459 }
1460 out[0] = GEN_INT (p[0]);
1461 out[1] = GEN_INT (p[1]);
1462 }
1463 break;
1464
1465 case MEM:
1466 {
1467 rtx base = XEXP (in, 0);
1468 rtx offset;
1469
1470 switch (GET_CODE (base))
1471 {
1472 case REG:
1473 if (!reversed)
1474 {
1475 out[0] = adjust_automodify_address
1476 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1477 out[1] = adjust_automodify_address
1478 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1479 }
1480 else
1481 {
1482 /* Reversal requires a pre-increment, which can only
1483 be done as a separate insn. */
1484 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1485 out[0] = adjust_automodify_address
1486 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1487 out[1] = adjust_address (in, DImode, 0);
1488 }
1489 break;
1490
1491 case POST_INC:
1492 gcc_assert (!reversed && !dead);
1493
1494 /* Just do the increment in two steps. */
1495 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1496 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1497 break;
1498
1499 case POST_DEC:
1500 gcc_assert (!reversed && !dead);
1501
1502 /* Add 8, subtract 24. */
1503 base = XEXP (base, 0);
1504 out[0] = adjust_automodify_address
1505 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1506 out[1] = adjust_automodify_address
1507 (in, DImode,
1508 gen_rtx_POST_MODIFY (Pmode, base,
1509 plus_constant (Pmode, base, -24)),
1510 8);
1511 break;
1512
1513 case POST_MODIFY:
1514 gcc_assert (!reversed && !dead);
1515
1516 /* Extract and adjust the modification. This case is
1517 trickier than the others, because we might have an
1518 index register, or we might have a combined offset that
1519 doesn't fit a signed 9-bit displacement field. We can
1520 assume the incoming expression is already legitimate. */
1521 offset = XEXP (base, 1);
1522 base = XEXP (base, 0);
1523
1524 out[0] = adjust_automodify_address
1525 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1526
1527 if (GET_CODE (XEXP (offset, 1)) == REG)
1528 {
1529 /* Can't adjust the postmodify to match. Emit the
1530 original, then a separate addition insn. */
1531 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1532 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1533 }
1534 else
1535 {
1536 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1537 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1538 {
1539 /* Again the postmodify cannot be made to match,
1540 but in this case it's more efficient to get rid
1541 of the postmodify entirely and fix up with an
1542 add insn. */
1543 out[1] = adjust_automodify_address (in, DImode, base, 8);
1544 fixup = gen_adddi3
1545 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1546 }
1547 else
1548 {
1549 /* Combined offset still fits in the displacement field.
1550 (We cannot overflow it at the high end.) */
1551 out[1] = adjust_automodify_address
1552 (in, DImode, gen_rtx_POST_MODIFY
1553 (Pmode, base, gen_rtx_PLUS
1554 (Pmode, base,
1555 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1556 8);
1557 }
1558 }
1559 break;
1560
1561 default:
1562 gcc_unreachable ();
1563 }
1564 break;
1565 }
1566
1567 default:
1568 gcc_unreachable ();
1569 }
1570
1571 return fixup;
1572 }
1573
1574 /* Split a TImode or TFmode move instruction after reload.
1575 This is used by *movtf_internal and *movti_internal. */
1576 void
1577 ia64_split_tmode_move (rtx operands[])
1578 {
1579 rtx in[2], out[2], insn;
1580 rtx fixup[2];
1581 bool dead = false;
1582 bool reversed = false;
1583
1584 /* It is possible for reload to decide to overwrite a pointer with
1585 the value it points to. In that case we have to do the loads in
1586 the appropriate order so that the pointer is not destroyed too
1587 early. Also we must not generate a postmodify for that second
1588 load, or rws_access_regno will die. And we must not generate a
1589 postmodify for the second load if the destination register
1590 overlaps with the base register. */
1591 if (GET_CODE (operands[1]) == MEM
1592 && reg_overlap_mentioned_p (operands[0], operands[1]))
1593 {
1594 rtx base = XEXP (operands[1], 0);
1595 while (GET_CODE (base) != REG)
1596 base = XEXP (base, 0);
1597
1598 if (REGNO (base) == REGNO (operands[0]))
1599 reversed = true;
1600
1601 if (refers_to_regno_p (REGNO (operands[0]),
1602 REGNO (operands[0])+2,
1603 base, 0))
1604 dead = true;
1605 }
1606 /* Another reason to do the moves in reversed order is if the first
1607 element of the target register pair is also the second element of
1608 the source register pair. */
1609 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1610 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1611 reversed = true;
1612
1613 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1614 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1615
1616 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1617 if (GET_CODE (EXP) == MEM \
1618 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1619 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1620 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1621 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1622
1623 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1624 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1625 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1626
1627 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1628 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1629 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1630
1631 if (fixup[0])
1632 emit_insn (fixup[0]);
1633 if (fixup[1])
1634 emit_insn (fixup[1]);
1635
1636 #undef MAYBE_ADD_REG_INC_NOTE
1637 }
1638
1639 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1640 through memory plus an extra GR scratch register. Except that you can
1641 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1642 SECONDARY_RELOAD_CLASS, but not both.
1643
1644 We got into problems in the first place by allowing a construct like
1645 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1646 This solution attempts to prevent this situation from occurring. When
1647 we see something like the above, we spill the inner register to memory. */
1648
1649 static rtx
1650 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1651 {
1652 if (GET_CODE (in) == SUBREG
1653 && GET_MODE (SUBREG_REG (in)) == TImode
1654 && GET_CODE (SUBREG_REG (in)) == REG)
1655 {
1656 rtx memt = assign_stack_temp (TImode, 16);
1657 emit_move_insn (memt, SUBREG_REG (in));
1658 return adjust_address (memt, mode, 0);
1659 }
1660 else if (force && GET_CODE (in) == REG)
1661 {
1662 rtx memx = assign_stack_temp (mode, 16);
1663 emit_move_insn (memx, in);
1664 return memx;
1665 }
1666 else
1667 return in;
1668 }
1669
1670 /* Expand the movxf or movrf pattern (MODE says which) with the given
1671 OPERANDS, returning true if the pattern should then invoke
1672 DONE. */
1673
1674 bool
1675 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1676 {
1677 rtx op0 = operands[0];
1678
1679 if (GET_CODE (op0) == SUBREG)
1680 op0 = SUBREG_REG (op0);
1681
1682 /* We must support XFmode loads into general registers for stdarg/vararg,
1683 unprototyped calls, and a rare case where a long double is passed as
1684 an argument after a float HFA fills the FP registers. We split them into
1685 DImode loads for convenience. We also need to support XFmode stores
1686 for the last case. This case does not happen for stdarg/vararg routines,
1687 because we do a block store to memory of unnamed arguments. */
1688
1689 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1690 {
1691 rtx out[2];
1692
1693 /* We're hoping to transform everything that deals with XFmode
1694 quantities and GR registers early in the compiler. */
1695 gcc_assert (can_create_pseudo_p ());
1696
1697 /* Struct to register can just use TImode instead. */
1698 if ((GET_CODE (operands[1]) == SUBREG
1699 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1700 || (GET_CODE (operands[1]) == REG
1701 && GR_REGNO_P (REGNO (operands[1]))))
1702 {
1703 rtx op1 = operands[1];
1704
1705 if (GET_CODE (op1) == SUBREG)
1706 op1 = SUBREG_REG (op1);
1707 else
1708 op1 = gen_rtx_REG (TImode, REGNO (op1));
1709
1710 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1711 return true;
1712 }
1713
1714 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1715 {
1716 /* Don't word-swap when reading in the constant. */
1717 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1718 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1719 0, mode));
1720 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1721 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1722 0, mode));
1723 return true;
1724 }
1725
1726 /* If the quantity is in a register not known to be GR, spill it. */
1727 if (register_operand (operands[1], mode))
1728 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1729
1730 gcc_assert (GET_CODE (operands[1]) == MEM);
1731
1732 /* Don't word-swap when reading in the value. */
1733 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1734 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1735
1736 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1737 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1738 return true;
1739 }
1740
1741 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1742 {
1743 /* We're hoping to transform everything that deals with XFmode
1744 quantities and GR registers early in the compiler. */
1745 gcc_assert (can_create_pseudo_p ());
1746
1747 /* Op0 can't be a GR_REG here, as that case is handled above.
1748 If op0 is a register, then we spill op1, so that we now have a
1749 MEM operand. This requires creating an XFmode subreg of a TImode reg
1750 to force the spill. */
1751 if (register_operand (operands[0], mode))
1752 {
1753 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1754 op1 = gen_rtx_SUBREG (mode, op1, 0);
1755 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1756 }
1757
1758 else
1759 {
1760 rtx in[2];
1761
1762 gcc_assert (GET_CODE (operands[0]) == MEM);
1763
1764 /* Don't word-swap when writing out the value. */
1765 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1766 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1767
1768 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1769 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1770 return true;
1771 }
1772 }
1773
1774 if (!reload_in_progress && !reload_completed)
1775 {
1776 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1777
1778 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1779 {
1780 rtx memt, memx, in = operands[1];
1781 if (CONSTANT_P (in))
1782 in = validize_mem (force_const_mem (mode, in));
1783 if (GET_CODE (in) == MEM)
1784 memt = adjust_address (in, TImode, 0);
1785 else
1786 {
1787 memt = assign_stack_temp (TImode, 16);
1788 memx = adjust_address (memt, mode, 0);
1789 emit_move_insn (memx, in);
1790 }
1791 emit_move_insn (op0, memt);
1792 return true;
1793 }
1794
1795 if (!ia64_move_ok (operands[0], operands[1]))
1796 operands[1] = force_reg (mode, operands[1]);
1797 }
1798
1799 return false;
1800 }
1801
1802 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1803 with the expression that holds the compare result (in VOIDmode). */
1804
1805 static GTY(()) rtx cmptf_libfunc;
1806
1807 void
1808 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1809 {
1810 enum rtx_code code = GET_CODE (*expr);
1811 rtx cmp;
1812
1813 /* If we have a BImode input, then we already have a compare result, and
1814 do not need to emit another comparison. */
1815 if (GET_MODE (*op0) == BImode)
1816 {
1817 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1818 cmp = *op0;
1819 }
1820 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1821 magic number as its third argument, that indicates what to do.
1822 The return value is an integer to be compared against zero. */
1823 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1824 {
1825 enum qfcmp_magic {
1826 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
1827 QCMP_UNORD = 2,
1828 QCMP_EQ = 4,
1829 QCMP_LT = 8,
1830 QCMP_GT = 16
1831 };
1832 int magic;
1833 enum rtx_code ncode;
1834 rtx ret, insns;
1835
1836 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1837 switch (code)
1838 {
1839 /* 1 = equal, 0 = not equal. Equality operators do
1840 not raise FP_INVALID when given a NaN operand. */
1841 case EQ: magic = QCMP_EQ; ncode = NE; break;
1842 case NE: magic = QCMP_EQ; ncode = EQ; break;
1843 /* isunordered() from C99. */
1844 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1845 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1846 /* Relational operators raise FP_INVALID when given
1847 a NaN operand. */
1848 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1849 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1850 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1851 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1852 /* Unordered relational operators do not raise FP_INVALID
1853 when given a NaN operand. */
1854 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1855 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1856 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1857 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1858 /* Not supported. */
1859 case UNEQ:
1860 case LTGT:
1861 default: gcc_unreachable ();
1862 }
1863
1864 start_sequence ();
1865
1866 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1867 *op0, TFmode, *op1, TFmode,
1868 GEN_INT (magic), DImode);
1869 cmp = gen_reg_rtx (BImode);
1870 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1871 ret, const0_rtx)));
1872
1873 insns = get_insns ();
1874 end_sequence ();
1875
1876 emit_libcall_block (insns, cmp, cmp,
1877 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1878 code = NE;
1879 }
1880 else
1881 {
1882 cmp = gen_reg_rtx (BImode);
1883 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1884 code = NE;
1885 }
1886
1887 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1888 *op0 = cmp;
1889 *op1 = const0_rtx;
1890 }
1891
1892 /* Generate an integral vector comparison. Return true if the condition has
1893 been reversed, and so the sense of the comparison should be inverted. */
1894
1895 static bool
1896 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1897 rtx dest, rtx op0, rtx op1)
1898 {
1899 bool negate = false;
1900 rtx x;
1901
1902 /* Canonicalize the comparison to EQ, GT, GTU. */
1903 switch (code)
1904 {
1905 case EQ:
1906 case GT:
1907 case GTU:
1908 break;
1909
1910 case NE:
1911 case LE:
1912 case LEU:
1913 code = reverse_condition (code);
1914 negate = true;
1915 break;
1916
1917 case GE:
1918 case GEU:
1919 code = reverse_condition (code);
1920 negate = true;
1921 /* FALLTHRU */
1922
1923 case LT:
1924 case LTU:
1925 code = swap_condition (code);
1926 x = op0, op0 = op1, op1 = x;
1927 break;
1928
1929 default:
1930 gcc_unreachable ();
1931 }
1932
1933 /* Unsigned parallel compare is not supported by the hardware. Play some
1934 tricks to turn this into a signed comparison against 0. */
1935 if (code == GTU)
1936 {
1937 switch (mode)
1938 {
1939 case V2SImode:
1940 {
1941 rtx t1, t2, mask;
1942
1943 /* Subtract (-(INT MAX) - 1) from both operands to make
1944 them signed. */
1945 mask = GEN_INT (0x80000000);
1946 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1947 mask = force_reg (mode, mask);
1948 t1 = gen_reg_rtx (mode);
1949 emit_insn (gen_subv2si3 (t1, op0, mask));
1950 t2 = gen_reg_rtx (mode);
1951 emit_insn (gen_subv2si3 (t2, op1, mask));
1952 op0 = t1;
1953 op1 = t2;
1954 code = GT;
1955 }
1956 break;
1957
1958 case V8QImode:
1959 case V4HImode:
1960 /* Perform a parallel unsigned saturating subtraction. */
1961 x = gen_reg_rtx (mode);
1962 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1963
1964 code = EQ;
1965 op0 = x;
1966 op1 = CONST0_RTX (mode);
1967 negate = !negate;
1968 break;
1969
1970 default:
1971 gcc_unreachable ();
1972 }
1973 }
1974
1975 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1976 emit_insn (gen_rtx_SET (dest, x));
1977
1978 return negate;
1979 }
1980
1981 /* Emit an integral vector conditional move. */
1982
1983 void
1984 ia64_expand_vecint_cmov (rtx operands[])
1985 {
1986 machine_mode mode = GET_MODE (operands[0]);
1987 enum rtx_code code = GET_CODE (operands[3]);
1988 bool negate;
1989 rtx cmp, x, ot, of;
1990
1991 cmp = gen_reg_rtx (mode);
1992 negate = ia64_expand_vecint_compare (code, mode, cmp,
1993 operands[4], operands[5]);
1994
1995 ot = operands[1+negate];
1996 of = operands[2-negate];
1997
1998 if (ot == CONST0_RTX (mode))
1999 {
2000 if (of == CONST0_RTX (mode))
2001 {
2002 emit_move_insn (operands[0], ot);
2003 return;
2004 }
2005
2006 x = gen_rtx_NOT (mode, cmp);
2007 x = gen_rtx_AND (mode, x, of);
2008 emit_insn (gen_rtx_SET (operands[0], x));
2009 }
2010 else if (of == CONST0_RTX (mode))
2011 {
2012 x = gen_rtx_AND (mode, cmp, ot);
2013 emit_insn (gen_rtx_SET (operands[0], x));
2014 }
2015 else
2016 {
2017 rtx t, f;
2018
2019 t = gen_reg_rtx (mode);
2020 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2021 emit_insn (gen_rtx_SET (t, x));
2022
2023 f = gen_reg_rtx (mode);
2024 x = gen_rtx_NOT (mode, cmp);
2025 x = gen_rtx_AND (mode, x, operands[2-negate]);
2026 emit_insn (gen_rtx_SET (f, x));
2027
2028 x = gen_rtx_IOR (mode, t, f);
2029 emit_insn (gen_rtx_SET (operands[0], x));
2030 }
2031 }
2032
2033 /* Emit an integral vector min or max operation. Return true if all done. */
2034
2035 bool
2036 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2037 rtx operands[])
2038 {
2039 rtx xops[6];
2040
2041 /* These four combinations are supported directly. */
2042 if (mode == V8QImode && (code == UMIN || code == UMAX))
2043 return false;
2044 if (mode == V4HImode && (code == SMIN || code == SMAX))
2045 return false;
2046
2047 /* This combination can be implemented with only saturating subtraction. */
2048 if (mode == V4HImode && code == UMAX)
2049 {
2050 rtx x, tmp = gen_reg_rtx (mode);
2051
2052 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2053 emit_insn (gen_rtx_SET (tmp, x));
2054
2055 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2056 return true;
2057 }
2058
2059 /* Everything else implemented via vector comparisons. */
2060 xops[0] = operands[0];
2061 xops[4] = xops[1] = operands[1];
2062 xops[5] = xops[2] = operands[2];
2063
2064 switch (code)
2065 {
2066 case UMIN:
2067 code = LTU;
2068 break;
2069 case UMAX:
2070 code = GTU;
2071 break;
2072 case SMIN:
2073 code = LT;
2074 break;
2075 case SMAX:
2076 code = GT;
2077 break;
2078 default:
2079 gcc_unreachable ();
2080 }
2081 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2082
2083 ia64_expand_vecint_cmov (xops);
2084 return true;
2085 }
2086
2087 /* The vectors LO and HI each contain N halves of a double-wide vector.
2088 Reassemble either the first N/2 or the second N/2 elements. */
2089
2090 void
2091 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2092 {
2093 machine_mode vmode = GET_MODE (lo);
2094 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2095 struct expand_vec_perm_d d;
2096 bool ok;
2097
2098 d.target = gen_lowpart (vmode, out);
2099 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2100 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2101 d.vmode = vmode;
2102 d.nelt = nelt;
2103 d.one_operand_p = false;
2104 d.testing_p = false;
2105
2106 high = (highp ? nelt / 2 : 0);
2107 for (i = 0; i < nelt / 2; ++i)
2108 {
2109 d.perm[i * 2] = i + high;
2110 d.perm[i * 2 + 1] = i + high + nelt;
2111 }
2112
2113 ok = ia64_expand_vec_perm_const_1 (&d);
2114 gcc_assert (ok);
2115 }
2116
2117 /* Return a vector of the sign-extension of VEC. */
2118
2119 static rtx
2120 ia64_unpack_sign (rtx vec, bool unsignedp)
2121 {
2122 machine_mode mode = GET_MODE (vec);
2123 rtx zero = CONST0_RTX (mode);
2124
2125 if (unsignedp)
2126 return zero;
2127 else
2128 {
2129 rtx sign = gen_reg_rtx (mode);
2130 bool neg;
2131
2132 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2133 gcc_assert (!neg);
2134
2135 return sign;
2136 }
2137 }
2138
2139 /* Emit an integral vector unpack operation. */
2140
2141 void
2142 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2143 {
2144 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2145 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2146 }
2147
2148 /* Emit an integral vector widening sum operations. */
2149
2150 void
2151 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2152 {
2153 machine_mode wmode;
2154 rtx l, h, t, sign;
2155
2156 sign = ia64_unpack_sign (operands[1], unsignedp);
2157
2158 wmode = GET_MODE (operands[0]);
2159 l = gen_reg_rtx (wmode);
2160 h = gen_reg_rtx (wmode);
2161
2162 ia64_unpack_assemble (l, operands[1], sign, false);
2163 ia64_unpack_assemble (h, operands[1], sign, true);
2164
2165 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2166 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2167 if (t != operands[0])
2168 emit_move_insn (operands[0], t);
2169 }
2170
2171 /* Emit the appropriate sequence for a call. */
2172
2173 void
2174 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2175 int sibcall_p)
2176 {
2177 rtx insn, b0;
2178
2179 addr = XEXP (addr, 0);
2180 addr = convert_memory_address (DImode, addr);
2181 b0 = gen_rtx_REG (DImode, R_BR (0));
2182
2183 /* ??? Should do this for functions known to bind local too. */
2184 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2185 {
2186 if (sibcall_p)
2187 insn = gen_sibcall_nogp (addr);
2188 else if (! retval)
2189 insn = gen_call_nogp (addr, b0);
2190 else
2191 insn = gen_call_value_nogp (retval, addr, b0);
2192 insn = emit_call_insn (insn);
2193 }
2194 else
2195 {
2196 if (sibcall_p)
2197 insn = gen_sibcall_gp (addr);
2198 else if (! retval)
2199 insn = gen_call_gp (addr, b0);
2200 else
2201 insn = gen_call_value_gp (retval, addr, b0);
2202 insn = emit_call_insn (insn);
2203
2204 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2205 }
2206
2207 if (sibcall_p)
2208 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2209
2210 if (TARGET_ABI_OPEN_VMS)
2211 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2212 gen_rtx_REG (DImode, GR_REG (25)));
2213 }
2214
2215 static void
2216 reg_emitted (enum ia64_frame_regs r)
2217 {
2218 if (emitted_frame_related_regs[r] == 0)
2219 emitted_frame_related_regs[r] = current_frame_info.r[r];
2220 else
2221 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2222 }
2223
2224 static int
2225 get_reg (enum ia64_frame_regs r)
2226 {
2227 reg_emitted (r);
2228 return current_frame_info.r[r];
2229 }
2230
2231 static bool
2232 is_emitted (int regno)
2233 {
2234 unsigned int r;
2235
2236 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2237 if (emitted_frame_related_regs[r] == regno)
2238 return true;
2239 return false;
2240 }
2241
2242 void
2243 ia64_reload_gp (void)
2244 {
2245 rtx tmp;
2246
2247 if (current_frame_info.r[reg_save_gp])
2248 {
2249 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2250 }
2251 else
2252 {
2253 HOST_WIDE_INT offset;
2254 rtx offset_r;
2255
2256 offset = (current_frame_info.spill_cfa_off
2257 + current_frame_info.spill_size);
2258 if (frame_pointer_needed)
2259 {
2260 tmp = hard_frame_pointer_rtx;
2261 offset = -offset;
2262 }
2263 else
2264 {
2265 tmp = stack_pointer_rtx;
2266 offset = current_frame_info.total_size - offset;
2267 }
2268
2269 offset_r = GEN_INT (offset);
2270 if (satisfies_constraint_I (offset_r))
2271 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2272 else
2273 {
2274 emit_move_insn (pic_offset_table_rtx, offset_r);
2275 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2276 pic_offset_table_rtx, tmp));
2277 }
2278
2279 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2280 }
2281
2282 emit_move_insn (pic_offset_table_rtx, tmp);
2283 }
2284
2285 void
2286 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2287 rtx scratch_b, int noreturn_p, int sibcall_p)
2288 {
2289 rtx insn;
2290 bool is_desc = false;
2291
2292 /* If we find we're calling through a register, then we're actually
2293 calling through a descriptor, so load up the values. */
2294 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2295 {
2296 rtx tmp;
2297 bool addr_dead_p;
2298
2299 /* ??? We are currently constrained to *not* use peep2, because
2300 we can legitimately change the global lifetime of the GP
2301 (in the form of killing where previously live). This is
2302 because a call through a descriptor doesn't use the previous
2303 value of the GP, while a direct call does, and we do not
2304 commit to either form until the split here.
2305
2306 That said, this means that we lack precise life info for
2307 whether ADDR is dead after this call. This is not terribly
2308 important, since we can fix things up essentially for free
2309 with the POST_DEC below, but it's nice to not use it when we
2310 can immediately tell it's not necessary. */
2311 addr_dead_p = ((noreturn_p || sibcall_p
2312 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2313 REGNO (addr)))
2314 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2315
2316 /* Load the code address into scratch_b. */
2317 tmp = gen_rtx_POST_INC (Pmode, addr);
2318 tmp = gen_rtx_MEM (Pmode, tmp);
2319 emit_move_insn (scratch_r, tmp);
2320 emit_move_insn (scratch_b, scratch_r);
2321
2322 /* Load the GP address. If ADDR is not dead here, then we must
2323 revert the change made above via the POST_INCREMENT. */
2324 if (!addr_dead_p)
2325 tmp = gen_rtx_POST_DEC (Pmode, addr);
2326 else
2327 tmp = addr;
2328 tmp = gen_rtx_MEM (Pmode, tmp);
2329 emit_move_insn (pic_offset_table_rtx, tmp);
2330
2331 is_desc = true;
2332 addr = scratch_b;
2333 }
2334
2335 if (sibcall_p)
2336 insn = gen_sibcall_nogp (addr);
2337 else if (retval)
2338 insn = gen_call_value_nogp (retval, addr, retaddr);
2339 else
2340 insn = gen_call_nogp (addr, retaddr);
2341 emit_call_insn (insn);
2342
2343 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2344 ia64_reload_gp ();
2345 }
2346
2347 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2348
2349 This differs from the generic code in that we know about the zero-extending
2350 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2351 also know that ld.acq+cmpxchg.rel equals a full barrier.
2352
2353 The loop we want to generate looks like
2354
2355 cmp_reg = mem;
2356 label:
2357 old_reg = cmp_reg;
2358 new_reg = cmp_reg op val;
2359 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2360 if (cmp_reg != old_reg)
2361 goto label;
2362
2363 Note that we only do the plain load from memory once. Subsequent
2364 iterations use the value loaded by the compare-and-swap pattern. */
2365
2366 void
2367 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2368 rtx old_dst, rtx new_dst, enum memmodel model)
2369 {
2370 machine_mode mode = GET_MODE (mem);
2371 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2372 enum insn_code icode;
2373
2374 /* Special case for using fetchadd. */
2375 if ((mode == SImode || mode == DImode)
2376 && (code == PLUS || code == MINUS)
2377 && fetchadd_operand (val, mode))
2378 {
2379 if (code == MINUS)
2380 val = GEN_INT (-INTVAL (val));
2381
2382 if (!old_dst)
2383 old_dst = gen_reg_rtx (mode);
2384
2385 switch (model)
2386 {
2387 case MEMMODEL_ACQ_REL:
2388 case MEMMODEL_SEQ_CST:
2389 emit_insn (gen_memory_barrier ());
2390 /* FALLTHRU */
2391 case MEMMODEL_RELAXED:
2392 case MEMMODEL_ACQUIRE:
2393 case MEMMODEL_CONSUME:
2394 if (mode == SImode)
2395 icode = CODE_FOR_fetchadd_acq_si;
2396 else
2397 icode = CODE_FOR_fetchadd_acq_di;
2398 break;
2399 case MEMMODEL_RELEASE:
2400 if (mode == SImode)
2401 icode = CODE_FOR_fetchadd_rel_si;
2402 else
2403 icode = CODE_FOR_fetchadd_rel_di;
2404 break;
2405
2406 default:
2407 gcc_unreachable ();
2408 }
2409
2410 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2411
2412 if (new_dst)
2413 {
2414 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2415 true, OPTAB_WIDEN);
2416 if (new_reg != new_dst)
2417 emit_move_insn (new_dst, new_reg);
2418 }
2419 return;
2420 }
2421
2422 /* Because of the volatile mem read, we get an ld.acq, which is the
2423 front half of the full barrier. The end half is the cmpxchg.rel.
2424 For relaxed and release memory models, we don't need this. But we
2425 also don't bother trying to prevent it either. */
2426 gcc_assert (model == MEMMODEL_RELAXED
2427 || model == MEMMODEL_RELEASE
2428 || MEM_VOLATILE_P (mem));
2429
2430 old_reg = gen_reg_rtx (DImode);
2431 cmp_reg = gen_reg_rtx (DImode);
2432 label = gen_label_rtx ();
2433
2434 if (mode != DImode)
2435 {
2436 val = simplify_gen_subreg (DImode, val, mode, 0);
2437 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2438 }
2439 else
2440 emit_move_insn (cmp_reg, mem);
2441
2442 emit_label (label);
2443
2444 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2445 emit_move_insn (old_reg, cmp_reg);
2446 emit_move_insn (ar_ccv, cmp_reg);
2447
2448 if (old_dst)
2449 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2450
2451 new_reg = cmp_reg;
2452 if (code == NOT)
2453 {
2454 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2455 true, OPTAB_DIRECT);
2456 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2457 }
2458 else
2459 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2460 true, OPTAB_DIRECT);
2461
2462 if (mode != DImode)
2463 new_reg = gen_lowpart (mode, new_reg);
2464 if (new_dst)
2465 emit_move_insn (new_dst, new_reg);
2466
2467 switch (model)
2468 {
2469 case MEMMODEL_RELAXED:
2470 case MEMMODEL_ACQUIRE:
2471 case MEMMODEL_CONSUME:
2472 switch (mode)
2473 {
2474 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2475 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2476 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2477 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2478 default:
2479 gcc_unreachable ();
2480 }
2481 break;
2482
2483 case MEMMODEL_RELEASE:
2484 case MEMMODEL_ACQ_REL:
2485 case MEMMODEL_SEQ_CST:
2486 switch (mode)
2487 {
2488 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2489 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2490 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2491 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2492 default:
2493 gcc_unreachable ();
2494 }
2495 break;
2496
2497 default:
2498 gcc_unreachable ();
2499 }
2500
2501 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2502
2503 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2504 }
2505 \f
2506 /* Begin the assembly file. */
2507
2508 static void
2509 ia64_file_start (void)
2510 {
2511 default_file_start ();
2512 emit_safe_across_calls ();
2513 }
2514
2515 void
2516 emit_safe_across_calls (void)
2517 {
2518 unsigned int rs, re;
2519 int out_state;
2520
2521 rs = 1;
2522 out_state = 0;
2523 while (1)
2524 {
2525 while (rs < 64 && call_used_regs[PR_REG (rs)])
2526 rs++;
2527 if (rs >= 64)
2528 break;
2529 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2530 continue;
2531 if (out_state == 0)
2532 {
2533 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2534 out_state = 1;
2535 }
2536 else
2537 fputc (',', asm_out_file);
2538 if (re == rs + 1)
2539 fprintf (asm_out_file, "p%u", rs);
2540 else
2541 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2542 rs = re + 1;
2543 }
2544 if (out_state)
2545 fputc ('\n', asm_out_file);
2546 }
2547
2548 /* Globalize a declaration. */
2549
2550 static void
2551 ia64_globalize_decl_name (FILE * stream, tree decl)
2552 {
2553 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2554 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2555 if (version_attr)
2556 {
2557 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2558 const char *p = TREE_STRING_POINTER (v);
2559 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2560 }
2561 targetm.asm_out.globalize_label (stream, name);
2562 if (TREE_CODE (decl) == FUNCTION_DECL)
2563 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2564 }
2565
2566 /* Helper function for ia64_compute_frame_size: find an appropriate general
2567 register to spill some special register to. SPECIAL_SPILL_MASK contains
2568 bits in GR0 to GR31 that have already been allocated by this routine.
2569 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2570
2571 static int
2572 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2573 {
2574 int regno;
2575
2576 if (emitted_frame_related_regs[r] != 0)
2577 {
2578 regno = emitted_frame_related_regs[r];
2579 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2580 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2581 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2582 else if (crtl->is_leaf
2583 && regno >= GR_REG (1) && regno <= GR_REG (31))
2584 current_frame_info.gr_used_mask |= 1 << regno;
2585
2586 return regno;
2587 }
2588
2589 /* If this is a leaf function, first try an otherwise unused
2590 call-clobbered register. */
2591 if (crtl->is_leaf)
2592 {
2593 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2594 if (! df_regs_ever_live_p (regno)
2595 && call_used_regs[regno]
2596 && ! fixed_regs[regno]
2597 && ! global_regs[regno]
2598 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2599 && ! is_emitted (regno))
2600 {
2601 current_frame_info.gr_used_mask |= 1 << regno;
2602 return regno;
2603 }
2604 }
2605
2606 if (try_locals)
2607 {
2608 regno = current_frame_info.n_local_regs;
2609 /* If there is a frame pointer, then we can't use loc79, because
2610 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2611 reg_name switching code in ia64_expand_prologue. */
2612 while (regno < (80 - frame_pointer_needed))
2613 if (! is_emitted (LOC_REG (regno++)))
2614 {
2615 current_frame_info.n_local_regs = regno;
2616 return LOC_REG (regno - 1);
2617 }
2618 }
2619
2620 /* Failed to find a general register to spill to. Must use stack. */
2621 return 0;
2622 }
2623
2624 /* In order to make for nice schedules, we try to allocate every temporary
2625 to a different register. We must of course stay away from call-saved,
2626 fixed, and global registers. We must also stay away from registers
2627 allocated in current_frame_info.gr_used_mask, since those include regs
2628 used all through the prologue.
2629
2630 Any register allocated here must be used immediately. The idea is to
2631 aid scheduling, not to solve data flow problems. */
2632
2633 static int last_scratch_gr_reg;
2634
2635 static int
2636 next_scratch_gr_reg (void)
2637 {
2638 int i, regno;
2639
2640 for (i = 0; i < 32; ++i)
2641 {
2642 regno = (last_scratch_gr_reg + i + 1) & 31;
2643 if (call_used_regs[regno]
2644 && ! fixed_regs[regno]
2645 && ! global_regs[regno]
2646 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2647 {
2648 last_scratch_gr_reg = regno;
2649 return regno;
2650 }
2651 }
2652
2653 /* There must be _something_ available. */
2654 gcc_unreachable ();
2655 }
2656
2657 /* Helper function for ia64_compute_frame_size, called through
2658 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2659
2660 static void
2661 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2662 {
2663 unsigned int regno = REGNO (reg);
2664 if (regno < 32)
2665 {
2666 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2667 for (i = 0; i < n; ++i)
2668 current_frame_info.gr_used_mask |= 1 << (regno + i);
2669 }
2670 }
2671
2672
2673 /* Returns the number of bytes offset between the frame pointer and the stack
2674 pointer for the current function. SIZE is the number of bytes of space
2675 needed for local variables. */
2676
2677 static void
2678 ia64_compute_frame_size (HOST_WIDE_INT size)
2679 {
2680 HOST_WIDE_INT total_size;
2681 HOST_WIDE_INT spill_size = 0;
2682 HOST_WIDE_INT extra_spill_size = 0;
2683 HOST_WIDE_INT pretend_args_size;
2684 HARD_REG_SET mask;
2685 int n_spilled = 0;
2686 int spilled_gr_p = 0;
2687 int spilled_fr_p = 0;
2688 unsigned int regno;
2689 int min_regno;
2690 int max_regno;
2691 int i;
2692
2693 if (current_frame_info.initialized)
2694 return;
2695
2696 memset (&current_frame_info, 0, sizeof current_frame_info);
2697 CLEAR_HARD_REG_SET (mask);
2698
2699 /* Don't allocate scratches to the return register. */
2700 diddle_return_value (mark_reg_gr_used_mask, NULL);
2701
2702 /* Don't allocate scratches to the EH scratch registers. */
2703 if (cfun->machine->ia64_eh_epilogue_sp)
2704 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2705 if (cfun->machine->ia64_eh_epilogue_bsp)
2706 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2707
2708 /* Static stack checking uses r2 and r3. */
2709 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2710 current_frame_info.gr_used_mask |= 0xc;
2711
2712 /* Find the size of the register stack frame. We have only 80 local
2713 registers, because we reserve 8 for the inputs and 8 for the
2714 outputs. */
2715
2716 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2717 since we'll be adjusting that down later. */
2718 regno = LOC_REG (78) + ! frame_pointer_needed;
2719 for (; regno >= LOC_REG (0); regno--)
2720 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2721 break;
2722 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2723
2724 /* For functions marked with the syscall_linkage attribute, we must mark
2725 all eight input registers as in use, so that locals aren't visible to
2726 the caller. */
2727
2728 if (cfun->machine->n_varargs > 0
2729 || lookup_attribute ("syscall_linkage",
2730 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2731 current_frame_info.n_input_regs = 8;
2732 else
2733 {
2734 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2735 if (df_regs_ever_live_p (regno))
2736 break;
2737 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2738 }
2739
2740 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2741 if (df_regs_ever_live_p (regno))
2742 break;
2743 i = regno - OUT_REG (0) + 1;
2744
2745 #ifndef PROFILE_HOOK
2746 /* When -p profiling, we need one output register for the mcount argument.
2747 Likewise for -a profiling for the bb_init_func argument. For -ax
2748 profiling, we need two output registers for the two bb_init_trace_func
2749 arguments. */
2750 if (crtl->profile)
2751 i = MAX (i, 1);
2752 #endif
2753 current_frame_info.n_output_regs = i;
2754
2755 /* ??? No rotating register support yet. */
2756 current_frame_info.n_rotate_regs = 0;
2757
2758 /* Discover which registers need spilling, and how much room that
2759 will take. Begin with floating point and general registers,
2760 which will always wind up on the stack. */
2761
2762 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2763 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2764 {
2765 SET_HARD_REG_BIT (mask, regno);
2766 spill_size += 16;
2767 n_spilled += 1;
2768 spilled_fr_p = 1;
2769 }
2770
2771 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2772 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2773 {
2774 SET_HARD_REG_BIT (mask, regno);
2775 spill_size += 8;
2776 n_spilled += 1;
2777 spilled_gr_p = 1;
2778 }
2779
2780 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2781 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2782 {
2783 SET_HARD_REG_BIT (mask, regno);
2784 spill_size += 8;
2785 n_spilled += 1;
2786 }
2787
2788 /* Now come all special registers that might get saved in other
2789 general registers. */
2790
2791 if (frame_pointer_needed)
2792 {
2793 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2794 /* If we did not get a register, then we take LOC79. This is guaranteed
2795 to be free, even if regs_ever_live is already set, because this is
2796 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2797 as we don't count loc79 above. */
2798 if (current_frame_info.r[reg_fp] == 0)
2799 {
2800 current_frame_info.r[reg_fp] = LOC_REG (79);
2801 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2802 }
2803 }
2804
2805 if (! crtl->is_leaf)
2806 {
2807 /* Emit a save of BR0 if we call other functions. Do this even
2808 if this function doesn't return, as EH depends on this to be
2809 able to unwind the stack. */
2810 SET_HARD_REG_BIT (mask, BR_REG (0));
2811
2812 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2813 if (current_frame_info.r[reg_save_b0] == 0)
2814 {
2815 extra_spill_size += 8;
2816 n_spilled += 1;
2817 }
2818
2819 /* Similarly for ar.pfs. */
2820 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2821 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2822 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2823 {
2824 extra_spill_size += 8;
2825 n_spilled += 1;
2826 }
2827
2828 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2829 registers are clobbered, so we fall back to the stack. */
2830 current_frame_info.r[reg_save_gp]
2831 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2832 if (current_frame_info.r[reg_save_gp] == 0)
2833 {
2834 SET_HARD_REG_BIT (mask, GR_REG (1));
2835 spill_size += 8;
2836 n_spilled += 1;
2837 }
2838 }
2839 else
2840 {
2841 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2842 {
2843 SET_HARD_REG_BIT (mask, BR_REG (0));
2844 extra_spill_size += 8;
2845 n_spilled += 1;
2846 }
2847
2848 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2849 {
2850 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2851 current_frame_info.r[reg_save_ar_pfs]
2852 = find_gr_spill (reg_save_ar_pfs, 1);
2853 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2854 {
2855 extra_spill_size += 8;
2856 n_spilled += 1;
2857 }
2858 }
2859 }
2860
2861 /* Unwind descriptor hackery: things are most efficient if we allocate
2862 consecutive GR save registers for RP, PFS, FP in that order. However,
2863 it is absolutely critical that FP get the only hard register that's
2864 guaranteed to be free, so we allocated it first. If all three did
2865 happen to be allocated hard regs, and are consecutive, rearrange them
2866 into the preferred order now.
2867
2868 If we have already emitted code for any of those registers,
2869 then it's already too late to change. */
2870 min_regno = MIN (current_frame_info.r[reg_fp],
2871 MIN (current_frame_info.r[reg_save_b0],
2872 current_frame_info.r[reg_save_ar_pfs]));
2873 max_regno = MAX (current_frame_info.r[reg_fp],
2874 MAX (current_frame_info.r[reg_save_b0],
2875 current_frame_info.r[reg_save_ar_pfs]));
2876 if (min_regno > 0
2877 && min_regno + 2 == max_regno
2878 && (current_frame_info.r[reg_fp] == min_regno + 1
2879 || current_frame_info.r[reg_save_b0] == min_regno + 1
2880 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2881 && (emitted_frame_related_regs[reg_save_b0] == 0
2882 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2883 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2884 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2885 && (emitted_frame_related_regs[reg_fp] == 0
2886 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2887 {
2888 current_frame_info.r[reg_save_b0] = min_regno;
2889 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2890 current_frame_info.r[reg_fp] = min_regno + 2;
2891 }
2892
2893 /* See if we need to store the predicate register block. */
2894 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2895 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2896 break;
2897 if (regno <= PR_REG (63))
2898 {
2899 SET_HARD_REG_BIT (mask, PR_REG (0));
2900 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2901 if (current_frame_info.r[reg_save_pr] == 0)
2902 {
2903 extra_spill_size += 8;
2904 n_spilled += 1;
2905 }
2906
2907 /* ??? Mark them all as used so that register renaming and such
2908 are free to use them. */
2909 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2910 df_set_regs_ever_live (regno, true);
2911 }
2912
2913 /* If we're forced to use st8.spill, we're forced to save and restore
2914 ar.unat as well. The check for existing liveness allows inline asm
2915 to touch ar.unat. */
2916 if (spilled_gr_p || cfun->machine->n_varargs
2917 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2918 {
2919 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2920 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2921 current_frame_info.r[reg_save_ar_unat]
2922 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2923 if (current_frame_info.r[reg_save_ar_unat] == 0)
2924 {
2925 extra_spill_size += 8;
2926 n_spilled += 1;
2927 }
2928 }
2929
2930 if (df_regs_ever_live_p (AR_LC_REGNUM))
2931 {
2932 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2933 current_frame_info.r[reg_save_ar_lc]
2934 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2935 if (current_frame_info.r[reg_save_ar_lc] == 0)
2936 {
2937 extra_spill_size += 8;
2938 n_spilled += 1;
2939 }
2940 }
2941
2942 /* If we have an odd number of words of pretend arguments written to
2943 the stack, then the FR save area will be unaligned. We round the
2944 size of this area up to keep things 16 byte aligned. */
2945 if (spilled_fr_p)
2946 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2947 else
2948 pretend_args_size = crtl->args.pretend_args_size;
2949
2950 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2951 + crtl->outgoing_args_size);
2952 total_size = IA64_STACK_ALIGN (total_size);
2953
2954 /* We always use the 16-byte scratch area provided by the caller, but
2955 if we are a leaf function, there's no one to which we need to provide
2956 a scratch area. However, if the function allocates dynamic stack space,
2957 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2958 so we need to cope. */
2959 if (crtl->is_leaf && !cfun->calls_alloca)
2960 total_size = MAX (0, total_size - 16);
2961
2962 current_frame_info.total_size = total_size;
2963 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2964 current_frame_info.spill_size = spill_size;
2965 current_frame_info.extra_spill_size = extra_spill_size;
2966 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2967 current_frame_info.n_spilled = n_spilled;
2968 current_frame_info.initialized = reload_completed;
2969 }
2970
2971 /* Worker function for TARGET_CAN_ELIMINATE. */
2972
2973 bool
2974 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2975 {
2976 return (to == BR_REG (0) ? crtl->is_leaf : true);
2977 }
2978
2979 /* Compute the initial difference between the specified pair of registers. */
2980
2981 HOST_WIDE_INT
2982 ia64_initial_elimination_offset (int from, int to)
2983 {
2984 HOST_WIDE_INT offset;
2985
2986 ia64_compute_frame_size (get_frame_size ());
2987 switch (from)
2988 {
2989 case FRAME_POINTER_REGNUM:
2990 switch (to)
2991 {
2992 case HARD_FRAME_POINTER_REGNUM:
2993 offset = -current_frame_info.total_size;
2994 if (!crtl->is_leaf || cfun->calls_alloca)
2995 offset += 16 + crtl->outgoing_args_size;
2996 break;
2997
2998 case STACK_POINTER_REGNUM:
2999 offset = 0;
3000 if (!crtl->is_leaf || cfun->calls_alloca)
3001 offset += 16 + crtl->outgoing_args_size;
3002 break;
3003
3004 default:
3005 gcc_unreachable ();
3006 }
3007 break;
3008
3009 case ARG_POINTER_REGNUM:
3010 /* Arguments start above the 16 byte save area, unless stdarg
3011 in which case we store through the 16 byte save area. */
3012 switch (to)
3013 {
3014 case HARD_FRAME_POINTER_REGNUM:
3015 offset = 16 - crtl->args.pretend_args_size;
3016 break;
3017
3018 case STACK_POINTER_REGNUM:
3019 offset = (current_frame_info.total_size
3020 + 16 - crtl->args.pretend_args_size);
3021 break;
3022
3023 default:
3024 gcc_unreachable ();
3025 }
3026 break;
3027
3028 default:
3029 gcc_unreachable ();
3030 }
3031
3032 return offset;
3033 }
3034
3035 /* If there are more than a trivial number of register spills, we use
3036 two interleaved iterators so that we can get two memory references
3037 per insn group.
3038
3039 In order to simplify things in the prologue and epilogue expanders,
3040 we use helper functions to fix up the memory references after the
3041 fact with the appropriate offsets to a POST_MODIFY memory mode.
3042 The following data structure tracks the state of the two iterators
3043 while insns are being emitted. */
3044
3045 struct spill_fill_data
3046 {
3047 rtx_insn *init_after; /* point at which to emit initializations */
3048 rtx init_reg[2]; /* initial base register */
3049 rtx iter_reg[2]; /* the iterator registers */
3050 rtx *prev_addr[2]; /* address of last memory use */
3051 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
3052 HOST_WIDE_INT prev_off[2]; /* last offset */
3053 int n_iter; /* number of iterators in use */
3054 int next_iter; /* next iterator to use */
3055 unsigned int save_gr_used_mask;
3056 };
3057
3058 static struct spill_fill_data spill_fill_data;
3059
3060 static void
3061 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3062 {
3063 int i;
3064
3065 spill_fill_data.init_after = get_last_insn ();
3066 spill_fill_data.init_reg[0] = init_reg;
3067 spill_fill_data.init_reg[1] = init_reg;
3068 spill_fill_data.prev_addr[0] = NULL;
3069 spill_fill_data.prev_addr[1] = NULL;
3070 spill_fill_data.prev_insn[0] = NULL;
3071 spill_fill_data.prev_insn[1] = NULL;
3072 spill_fill_data.prev_off[0] = cfa_off;
3073 spill_fill_data.prev_off[1] = cfa_off;
3074 spill_fill_data.next_iter = 0;
3075 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3076
3077 spill_fill_data.n_iter = 1 + (n_spills > 2);
3078 for (i = 0; i < spill_fill_data.n_iter; ++i)
3079 {
3080 int regno = next_scratch_gr_reg ();
3081 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3082 current_frame_info.gr_used_mask |= 1 << regno;
3083 }
3084 }
3085
3086 static void
3087 finish_spill_pointers (void)
3088 {
3089 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3090 }
3091
3092 static rtx
3093 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3094 {
3095 int iter = spill_fill_data.next_iter;
3096 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3097 rtx disp_rtx = GEN_INT (disp);
3098 rtx mem;
3099
3100 if (spill_fill_data.prev_addr[iter])
3101 {
3102 if (satisfies_constraint_N (disp_rtx))
3103 {
3104 *spill_fill_data.prev_addr[iter]
3105 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3106 gen_rtx_PLUS (DImode,
3107 spill_fill_data.iter_reg[iter],
3108 disp_rtx));
3109 add_reg_note (spill_fill_data.prev_insn[iter],
3110 REG_INC, spill_fill_data.iter_reg[iter]);
3111 }
3112 else
3113 {
3114 /* ??? Could use register post_modify for loads. */
3115 if (!satisfies_constraint_I (disp_rtx))
3116 {
3117 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3118 emit_move_insn (tmp, disp_rtx);
3119 disp_rtx = tmp;
3120 }
3121 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3122 spill_fill_data.iter_reg[iter], disp_rtx));
3123 }
3124 }
3125 /* Micro-optimization: if we've created a frame pointer, it's at
3126 CFA 0, which may allow the real iterator to be initialized lower,
3127 slightly increasing parallelism. Also, if there are few saves
3128 it may eliminate the iterator entirely. */
3129 else if (disp == 0
3130 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3131 && frame_pointer_needed)
3132 {
3133 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3134 set_mem_alias_set (mem, get_varargs_alias_set ());
3135 return mem;
3136 }
3137 else
3138 {
3139 rtx seq;
3140 rtx_insn *insn;
3141
3142 if (disp == 0)
3143 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3144 spill_fill_data.init_reg[iter]);
3145 else
3146 {
3147 start_sequence ();
3148
3149 if (!satisfies_constraint_I (disp_rtx))
3150 {
3151 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3152 emit_move_insn (tmp, disp_rtx);
3153 disp_rtx = tmp;
3154 }
3155
3156 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3157 spill_fill_data.init_reg[iter],
3158 disp_rtx));
3159
3160 seq = get_insns ();
3161 end_sequence ();
3162 }
3163
3164 /* Careful for being the first insn in a sequence. */
3165 if (spill_fill_data.init_after)
3166 insn = emit_insn_after (seq, spill_fill_data.init_after);
3167 else
3168 {
3169 rtx_insn *first = get_insns ();
3170 if (first)
3171 insn = emit_insn_before (seq, first);
3172 else
3173 insn = emit_insn (seq);
3174 }
3175 spill_fill_data.init_after = insn;
3176 }
3177
3178 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3179
3180 /* ??? Not all of the spills are for varargs, but some of them are.
3181 The rest of the spills belong in an alias set of their own. But
3182 it doesn't actually hurt to include them here. */
3183 set_mem_alias_set (mem, get_varargs_alias_set ());
3184
3185 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3186 spill_fill_data.prev_off[iter] = cfa_off;
3187
3188 if (++iter >= spill_fill_data.n_iter)
3189 iter = 0;
3190 spill_fill_data.next_iter = iter;
3191
3192 return mem;
3193 }
3194
3195 static void
3196 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3197 rtx frame_reg)
3198 {
3199 int iter = spill_fill_data.next_iter;
3200 rtx mem;
3201 rtx_insn *insn;
3202
3203 mem = spill_restore_mem (reg, cfa_off);
3204 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3205 spill_fill_data.prev_insn[iter] = insn;
3206
3207 if (frame_reg)
3208 {
3209 rtx base;
3210 HOST_WIDE_INT off;
3211
3212 RTX_FRAME_RELATED_P (insn) = 1;
3213
3214 /* Don't even pretend that the unwind code can intuit its way
3215 through a pair of interleaved post_modify iterators. Just
3216 provide the correct answer. */
3217
3218 if (frame_pointer_needed)
3219 {
3220 base = hard_frame_pointer_rtx;
3221 off = - cfa_off;
3222 }
3223 else
3224 {
3225 base = stack_pointer_rtx;
3226 off = current_frame_info.total_size - cfa_off;
3227 }
3228
3229 add_reg_note (insn, REG_CFA_OFFSET,
3230 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3231 plus_constant (Pmode,
3232 base, off)),
3233 frame_reg));
3234 }
3235 }
3236
3237 static void
3238 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3239 {
3240 int iter = spill_fill_data.next_iter;
3241 rtx_insn *insn;
3242
3243 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3244 GEN_INT (cfa_off)));
3245 spill_fill_data.prev_insn[iter] = insn;
3246 }
3247
3248 /* Wrapper functions that discards the CONST_INT spill offset. These
3249 exist so that we can give gr_spill/gr_fill the offset they need and
3250 use a consistent function interface. */
3251
3252 static rtx
3253 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3254 {
3255 return gen_movdi (dest, src);
3256 }
3257
3258 static rtx
3259 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3260 {
3261 return gen_fr_spill (dest, src);
3262 }
3263
3264 static rtx
3265 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3266 {
3267 return gen_fr_restore (dest, src);
3268 }
3269
3270 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3271
3272 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3273 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3274
3275 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3276 inclusive. These are offsets from the current stack pointer. BS_SIZE
3277 is the size of the backing store. ??? This clobbers r2 and r3. */
3278
3279 static void
3280 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3281 int bs_size)
3282 {
3283 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3284 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3285 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3286
3287 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3288 of the Register Stack Engine. We also need to probe it after checking
3289 that the 2 stacks don't overlap. */
3290 emit_insn (gen_bsp_value (r3));
3291 emit_move_insn (r2, GEN_INT (-(first + size)));
3292
3293 /* Compare current value of BSP and SP registers. */
3294 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3295 r3, stack_pointer_rtx)));
3296
3297 /* Compute the address of the probe for the Backing Store (which grows
3298 towards higher addresses). We probe only at the first offset of
3299 the next page because some OS (eg Linux/ia64) only extend the
3300 backing store when this specific address is hit (but generate a SEGV
3301 on other address). Page size is the worst case (4KB). The reserve
3302 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3303 Also compute the address of the last probe for the memory stack
3304 (which grows towards lower addresses). */
3305 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3306 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3307
3308 /* Compare them and raise SEGV if the former has topped the latter. */
3309 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3310 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3311 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3312 r3, r2))));
3313 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3314 const0_rtx),
3315 const0_rtx));
3316 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3317 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3318 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3319 GEN_INT (11))));
3320
3321 /* Probe the Backing Store if necessary. */
3322 if (bs_size > 0)
3323 emit_stack_probe (r3);
3324
3325 /* Probe the memory stack if necessary. */
3326 if (size == 0)
3327 ;
3328
3329 /* See if we have a constant small number of probes to generate. If so,
3330 that's the easy case. */
3331 else if (size <= PROBE_INTERVAL)
3332 emit_stack_probe (r2);
3333
3334 /* The run-time loop is made up of 8 insns in the generic case while this
3335 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3336 else if (size <= 4 * PROBE_INTERVAL)
3337 {
3338 HOST_WIDE_INT i;
3339
3340 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3341 emit_insn (gen_rtx_SET (r2,
3342 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3343 emit_stack_probe (r2);
3344
3345 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3346 it exceeds SIZE. If only two probes are needed, this will not
3347 generate any code. Then probe at FIRST + SIZE. */
3348 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3349 {
3350 emit_insn (gen_rtx_SET (r2,
3351 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3352 emit_stack_probe (r2);
3353 }
3354
3355 emit_insn (gen_rtx_SET (r2,
3356 plus_constant (Pmode, r2,
3357 (i - PROBE_INTERVAL) - size)));
3358 emit_stack_probe (r2);
3359 }
3360
3361 /* Otherwise, do the same as above, but in a loop. Note that we must be
3362 extra careful with variables wrapping around because we might be at
3363 the very top (or the very bottom) of the address space and we have
3364 to be able to handle this case properly; in particular, we use an
3365 equality test for the loop condition. */
3366 else
3367 {
3368 HOST_WIDE_INT rounded_size;
3369
3370 emit_move_insn (r2, GEN_INT (-first));
3371
3372
3373 /* Step 1: round SIZE to the previous multiple of the interval. */
3374
3375 rounded_size = size & -PROBE_INTERVAL;
3376
3377
3378 /* Step 2: compute initial and final value of the loop counter. */
3379
3380 /* TEST_ADDR = SP + FIRST. */
3381 emit_insn (gen_rtx_SET (r2,
3382 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3383
3384 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3385 if (rounded_size > (1 << 21))
3386 {
3387 emit_move_insn (r3, GEN_INT (-rounded_size));
3388 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3389 }
3390 else
3391 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3392 GEN_INT (-rounded_size))));
3393
3394
3395 /* Step 3: the loop
3396
3397 while (TEST_ADDR != LAST_ADDR)
3398 {
3399 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3400 probe at TEST_ADDR
3401 }
3402
3403 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3404 until it is equal to ROUNDED_SIZE. */
3405
3406 emit_insn (gen_probe_stack_range (r2, r2, r3));
3407
3408
3409 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3410 that SIZE is equal to ROUNDED_SIZE. */
3411
3412 /* TEMP = SIZE - ROUNDED_SIZE. */
3413 if (size != rounded_size)
3414 {
3415 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3416 rounded_size - size)));
3417 emit_stack_probe (r2);
3418 }
3419 }
3420
3421 /* Make sure nothing is scheduled before we are done. */
3422 emit_insn (gen_blockage ());
3423 }
3424
3425 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3426 absolute addresses. */
3427
3428 const char *
3429 output_probe_stack_range (rtx reg1, rtx reg2)
3430 {
3431 static int labelno = 0;
3432 char loop_lab[32], end_lab[32];
3433 rtx xops[3];
3434
3435 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
3436 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
3437
3438 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3439
3440 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
3441 xops[0] = reg1;
3442 xops[1] = reg2;
3443 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3444 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3445 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [REGNO (xops[2])]);
3446 assemble_name_raw (asm_out_file, end_lab);
3447 fputc ('\n', asm_out_file);
3448
3449 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3450 xops[1] = GEN_INT (-PROBE_INTERVAL);
3451 output_asm_insn ("addl %0 = %1, %0", xops);
3452 fputs ("\t;;\n", asm_out_file);
3453
3454 /* Probe at TEST_ADDR and branch. */
3455 output_asm_insn ("probe.w.fault %0, 0", xops);
3456 fprintf (asm_out_file, "\tbr ");
3457 assemble_name_raw (asm_out_file, loop_lab);
3458 fputc ('\n', asm_out_file);
3459
3460 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
3461
3462 return "";
3463 }
3464
3465 /* Called after register allocation to add any instructions needed for the
3466 prologue. Using a prologue insn is favored compared to putting all of the
3467 instructions in output_function_prologue(), since it allows the scheduler
3468 to intermix instructions with the saves of the caller saved registers. In
3469 some cases, it might be necessary to emit a barrier instruction as the last
3470 insn to prevent such scheduling.
3471
3472 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3473 so that the debug info generation code can handle them properly.
3474
3475 The register save area is laid out like so:
3476 cfa+16
3477 [ varargs spill area ]
3478 [ fr register spill area ]
3479 [ br register spill area ]
3480 [ ar register spill area ]
3481 [ pr register spill area ]
3482 [ gr register spill area ] */
3483
3484 /* ??? Get inefficient code when the frame size is larger than can fit in an
3485 adds instruction. */
3486
3487 void
3488 ia64_expand_prologue (void)
3489 {
3490 rtx_insn *insn;
3491 rtx ar_pfs_save_reg, ar_unat_save_reg;
3492 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3493 rtx reg, alt_reg;
3494
3495 ia64_compute_frame_size (get_frame_size ());
3496 last_scratch_gr_reg = 15;
3497
3498 if (flag_stack_usage_info)
3499 current_function_static_stack_size = current_frame_info.total_size;
3500
3501 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3502 {
3503 HOST_WIDE_INT size = current_frame_info.total_size;
3504 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3505 + current_frame_info.n_local_regs);
3506
3507 if (crtl->is_leaf && !cfun->calls_alloca)
3508 {
3509 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3510 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3511 size - STACK_CHECK_PROTECT,
3512 bs_size);
3513 else if (size + bs_size > STACK_CHECK_PROTECT)
3514 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3515 }
3516 else if (size + bs_size > 0)
3517 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3518 }
3519
3520 if (dump_file)
3521 {
3522 fprintf (dump_file, "ia64 frame related registers "
3523 "recorded in current_frame_info.r[]:\n");
3524 #define PRINTREG(a) if (current_frame_info.r[a]) \
3525 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3526 PRINTREG(reg_fp);
3527 PRINTREG(reg_save_b0);
3528 PRINTREG(reg_save_pr);
3529 PRINTREG(reg_save_ar_pfs);
3530 PRINTREG(reg_save_ar_unat);
3531 PRINTREG(reg_save_ar_lc);
3532 PRINTREG(reg_save_gp);
3533 #undef PRINTREG
3534 }
3535
3536 /* If there is no epilogue, then we don't need some prologue insns.
3537 We need to avoid emitting the dead prologue insns, because flow
3538 will complain about them. */
3539 if (optimize)
3540 {
3541 edge e;
3542 edge_iterator ei;
3543
3544 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3545 if ((e->flags & EDGE_FAKE) == 0
3546 && (e->flags & EDGE_FALLTHRU) != 0)
3547 break;
3548 epilogue_p = (e != NULL);
3549 }
3550 else
3551 epilogue_p = 1;
3552
3553 /* Set the local, input, and output register names. We need to do this
3554 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3555 half. If we use in/loc/out register names, then we get assembler errors
3556 in crtn.S because there is no alloc insn or regstk directive in there. */
3557 if (! TARGET_REG_NAMES)
3558 {
3559 int inputs = current_frame_info.n_input_regs;
3560 int locals = current_frame_info.n_local_regs;
3561 int outputs = current_frame_info.n_output_regs;
3562
3563 for (i = 0; i < inputs; i++)
3564 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3565 for (i = 0; i < locals; i++)
3566 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3567 for (i = 0; i < outputs; i++)
3568 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3569 }
3570
3571 /* Set the frame pointer register name. The regnum is logically loc79,
3572 but of course we'll not have allocated that many locals. Rather than
3573 worrying about renumbering the existing rtxs, we adjust the name. */
3574 /* ??? This code means that we can never use one local register when
3575 there is a frame pointer. loc79 gets wasted in this case, as it is
3576 renamed to a register that will never be used. See also the try_locals
3577 code in find_gr_spill. */
3578 if (current_frame_info.r[reg_fp])
3579 {
3580 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3581 reg_names[HARD_FRAME_POINTER_REGNUM]
3582 = reg_names[current_frame_info.r[reg_fp]];
3583 reg_names[current_frame_info.r[reg_fp]] = tmp;
3584 }
3585
3586 /* We don't need an alloc instruction if we've used no outputs or locals. */
3587 if (current_frame_info.n_local_regs == 0
3588 && current_frame_info.n_output_regs == 0
3589 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3590 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3591 {
3592 /* If there is no alloc, but there are input registers used, then we
3593 need a .regstk directive. */
3594 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3595 ar_pfs_save_reg = NULL_RTX;
3596 }
3597 else
3598 {
3599 current_frame_info.need_regstk = 0;
3600
3601 if (current_frame_info.r[reg_save_ar_pfs])
3602 {
3603 regno = current_frame_info.r[reg_save_ar_pfs];
3604 reg_emitted (reg_save_ar_pfs);
3605 }
3606 else
3607 regno = next_scratch_gr_reg ();
3608 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3609
3610 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3611 GEN_INT (current_frame_info.n_input_regs),
3612 GEN_INT (current_frame_info.n_local_regs),
3613 GEN_INT (current_frame_info.n_output_regs),
3614 GEN_INT (current_frame_info.n_rotate_regs)));
3615 if (current_frame_info.r[reg_save_ar_pfs])
3616 {
3617 RTX_FRAME_RELATED_P (insn) = 1;
3618 add_reg_note (insn, REG_CFA_REGISTER,
3619 gen_rtx_SET (ar_pfs_save_reg,
3620 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3621 }
3622 }
3623
3624 /* Set up frame pointer, stack pointer, and spill iterators. */
3625
3626 n_varargs = cfun->machine->n_varargs;
3627 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3628 stack_pointer_rtx, 0);
3629
3630 if (frame_pointer_needed)
3631 {
3632 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3633 RTX_FRAME_RELATED_P (insn) = 1;
3634
3635 /* Force the unwind info to recognize this as defining a new CFA,
3636 rather than some temp register setup. */
3637 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3638 }
3639
3640 if (current_frame_info.total_size != 0)
3641 {
3642 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3643 rtx offset;
3644
3645 if (satisfies_constraint_I (frame_size_rtx))
3646 offset = frame_size_rtx;
3647 else
3648 {
3649 regno = next_scratch_gr_reg ();
3650 offset = gen_rtx_REG (DImode, regno);
3651 emit_move_insn (offset, frame_size_rtx);
3652 }
3653
3654 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3655 stack_pointer_rtx, offset));
3656
3657 if (! frame_pointer_needed)
3658 {
3659 RTX_FRAME_RELATED_P (insn) = 1;
3660 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3661 gen_rtx_SET (stack_pointer_rtx,
3662 gen_rtx_PLUS (DImode,
3663 stack_pointer_rtx,
3664 frame_size_rtx)));
3665 }
3666
3667 /* ??? At this point we must generate a magic insn that appears to
3668 modify the stack pointer, the frame pointer, and all spill
3669 iterators. This would allow the most scheduling freedom. For
3670 now, just hard stop. */
3671 emit_insn (gen_blockage ());
3672 }
3673
3674 /* Must copy out ar.unat before doing any integer spills. */
3675 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3676 {
3677 if (current_frame_info.r[reg_save_ar_unat])
3678 {
3679 ar_unat_save_reg
3680 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3681 reg_emitted (reg_save_ar_unat);
3682 }
3683 else
3684 {
3685 alt_regno = next_scratch_gr_reg ();
3686 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3687 current_frame_info.gr_used_mask |= 1 << alt_regno;
3688 }
3689
3690 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3691 insn = emit_move_insn (ar_unat_save_reg, reg);
3692 if (current_frame_info.r[reg_save_ar_unat])
3693 {
3694 RTX_FRAME_RELATED_P (insn) = 1;
3695 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3696 }
3697
3698 /* Even if we're not going to generate an epilogue, we still
3699 need to save the register so that EH works. */
3700 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3701 emit_insn (gen_prologue_use (ar_unat_save_reg));
3702 }
3703 else
3704 ar_unat_save_reg = NULL_RTX;
3705
3706 /* Spill all varargs registers. Do this before spilling any GR registers,
3707 since we want the UNAT bits for the GR registers to override the UNAT
3708 bits from varargs, which we don't care about. */
3709
3710 cfa_off = -16;
3711 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3712 {
3713 reg = gen_rtx_REG (DImode, regno);
3714 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3715 }
3716
3717 /* Locate the bottom of the register save area. */
3718 cfa_off = (current_frame_info.spill_cfa_off
3719 + current_frame_info.spill_size
3720 + current_frame_info.extra_spill_size);
3721
3722 /* Save the predicate register block either in a register or in memory. */
3723 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3724 {
3725 reg = gen_rtx_REG (DImode, PR_REG (0));
3726 if (current_frame_info.r[reg_save_pr] != 0)
3727 {
3728 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3729 reg_emitted (reg_save_pr);
3730 insn = emit_move_insn (alt_reg, reg);
3731
3732 /* ??? Denote pr spill/fill by a DImode move that modifies all
3733 64 hard registers. */
3734 RTX_FRAME_RELATED_P (insn) = 1;
3735 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3736
3737 /* Even if we're not going to generate an epilogue, we still
3738 need to save the register so that EH works. */
3739 if (! epilogue_p)
3740 emit_insn (gen_prologue_use (alt_reg));
3741 }
3742 else
3743 {
3744 alt_regno = next_scratch_gr_reg ();
3745 alt_reg = gen_rtx_REG (DImode, alt_regno);
3746 insn = emit_move_insn (alt_reg, reg);
3747 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3748 cfa_off -= 8;
3749 }
3750 }
3751
3752 /* Handle AR regs in numerical order. All of them get special handling. */
3753 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3754 && current_frame_info.r[reg_save_ar_unat] == 0)
3755 {
3756 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3757 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3758 cfa_off -= 8;
3759 }
3760
3761 /* The alloc insn already copied ar.pfs into a general register. The
3762 only thing we have to do now is copy that register to a stack slot
3763 if we'd not allocated a local register for the job. */
3764 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3765 && current_frame_info.r[reg_save_ar_pfs] == 0)
3766 {
3767 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3768 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3769 cfa_off -= 8;
3770 }
3771
3772 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3773 {
3774 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3775 if (current_frame_info.r[reg_save_ar_lc] != 0)
3776 {
3777 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3778 reg_emitted (reg_save_ar_lc);
3779 insn = emit_move_insn (alt_reg, reg);
3780 RTX_FRAME_RELATED_P (insn) = 1;
3781 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3782
3783 /* Even if we're not going to generate an epilogue, we still
3784 need to save the register so that EH works. */
3785 if (! epilogue_p)
3786 emit_insn (gen_prologue_use (alt_reg));
3787 }
3788 else
3789 {
3790 alt_regno = next_scratch_gr_reg ();
3791 alt_reg = gen_rtx_REG (DImode, alt_regno);
3792 emit_move_insn (alt_reg, reg);
3793 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3794 cfa_off -= 8;
3795 }
3796 }
3797
3798 /* Save the return pointer. */
3799 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3800 {
3801 reg = gen_rtx_REG (DImode, BR_REG (0));
3802 if (current_frame_info.r[reg_save_b0] != 0)
3803 {
3804 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3805 reg_emitted (reg_save_b0);
3806 insn = emit_move_insn (alt_reg, reg);
3807 RTX_FRAME_RELATED_P (insn) = 1;
3808 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3809
3810 /* Even if we're not going to generate an epilogue, we still
3811 need to save the register so that EH works. */
3812 if (! epilogue_p)
3813 emit_insn (gen_prologue_use (alt_reg));
3814 }
3815 else
3816 {
3817 alt_regno = next_scratch_gr_reg ();
3818 alt_reg = gen_rtx_REG (DImode, alt_regno);
3819 emit_move_insn (alt_reg, reg);
3820 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3821 cfa_off -= 8;
3822 }
3823 }
3824
3825 if (current_frame_info.r[reg_save_gp])
3826 {
3827 reg_emitted (reg_save_gp);
3828 insn = emit_move_insn (gen_rtx_REG (DImode,
3829 current_frame_info.r[reg_save_gp]),
3830 pic_offset_table_rtx);
3831 }
3832
3833 /* We should now be at the base of the gr/br/fr spill area. */
3834 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3835 + current_frame_info.spill_size));
3836
3837 /* Spill all general registers. */
3838 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3839 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3840 {
3841 reg = gen_rtx_REG (DImode, regno);
3842 do_spill (gen_gr_spill, reg, cfa_off, reg);
3843 cfa_off -= 8;
3844 }
3845
3846 /* Spill the rest of the BR registers. */
3847 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3848 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3849 {
3850 alt_regno = next_scratch_gr_reg ();
3851 alt_reg = gen_rtx_REG (DImode, alt_regno);
3852 reg = gen_rtx_REG (DImode, regno);
3853 emit_move_insn (alt_reg, reg);
3854 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3855 cfa_off -= 8;
3856 }
3857
3858 /* Align the frame and spill all FR registers. */
3859 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3860 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3861 {
3862 gcc_assert (!(cfa_off & 15));
3863 reg = gen_rtx_REG (XFmode, regno);
3864 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3865 cfa_off -= 16;
3866 }
3867
3868 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3869
3870 finish_spill_pointers ();
3871 }
3872
3873 /* Output the textual info surrounding the prologue. */
3874
3875 void
3876 ia64_start_function (FILE *file, const char *fnname,
3877 tree decl ATTRIBUTE_UNUSED)
3878 {
3879 #if TARGET_ABI_OPEN_VMS
3880 vms_start_function (fnname);
3881 #endif
3882
3883 fputs ("\t.proc ", file);
3884 assemble_name (file, fnname);
3885 fputc ('\n', file);
3886 ASM_OUTPUT_LABEL (file, fnname);
3887 }
3888
3889 /* Called after register allocation to add any instructions needed for the
3890 epilogue. Using an epilogue insn is favored compared to putting all of the
3891 instructions in output_function_prologue(), since it allows the scheduler
3892 to intermix instructions with the saves of the caller saved registers. In
3893 some cases, it might be necessary to emit a barrier instruction as the last
3894 insn to prevent such scheduling. */
3895
3896 void
3897 ia64_expand_epilogue (int sibcall_p)
3898 {
3899 rtx_insn *insn;
3900 rtx reg, alt_reg, ar_unat_save_reg;
3901 int regno, alt_regno, cfa_off;
3902
3903 ia64_compute_frame_size (get_frame_size ());
3904
3905 /* If there is a frame pointer, then we use it instead of the stack
3906 pointer, so that the stack pointer does not need to be valid when
3907 the epilogue starts. See EXIT_IGNORE_STACK. */
3908 if (frame_pointer_needed)
3909 setup_spill_pointers (current_frame_info.n_spilled,
3910 hard_frame_pointer_rtx, 0);
3911 else
3912 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3913 current_frame_info.total_size);
3914
3915 if (current_frame_info.total_size != 0)
3916 {
3917 /* ??? At this point we must generate a magic insn that appears to
3918 modify the spill iterators and the frame pointer. This would
3919 allow the most scheduling freedom. For now, just hard stop. */
3920 emit_insn (gen_blockage ());
3921 }
3922
3923 /* Locate the bottom of the register save area. */
3924 cfa_off = (current_frame_info.spill_cfa_off
3925 + current_frame_info.spill_size
3926 + current_frame_info.extra_spill_size);
3927
3928 /* Restore the predicate registers. */
3929 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3930 {
3931 if (current_frame_info.r[reg_save_pr] != 0)
3932 {
3933 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3934 reg_emitted (reg_save_pr);
3935 }
3936 else
3937 {
3938 alt_regno = next_scratch_gr_reg ();
3939 alt_reg = gen_rtx_REG (DImode, alt_regno);
3940 do_restore (gen_movdi_x, alt_reg, cfa_off);
3941 cfa_off -= 8;
3942 }
3943 reg = gen_rtx_REG (DImode, PR_REG (0));
3944 emit_move_insn (reg, alt_reg);
3945 }
3946
3947 /* Restore the application registers. */
3948
3949 /* Load the saved unat from the stack, but do not restore it until
3950 after the GRs have been restored. */
3951 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3952 {
3953 if (current_frame_info.r[reg_save_ar_unat] != 0)
3954 {
3955 ar_unat_save_reg
3956 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3957 reg_emitted (reg_save_ar_unat);
3958 }
3959 else
3960 {
3961 alt_regno = next_scratch_gr_reg ();
3962 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3963 current_frame_info.gr_used_mask |= 1 << alt_regno;
3964 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3965 cfa_off -= 8;
3966 }
3967 }
3968 else
3969 ar_unat_save_reg = NULL_RTX;
3970
3971 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3972 {
3973 reg_emitted (reg_save_ar_pfs);
3974 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3975 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3976 emit_move_insn (reg, alt_reg);
3977 }
3978 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3979 {
3980 alt_regno = next_scratch_gr_reg ();
3981 alt_reg = gen_rtx_REG (DImode, alt_regno);
3982 do_restore (gen_movdi_x, alt_reg, cfa_off);
3983 cfa_off -= 8;
3984 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3985 emit_move_insn (reg, alt_reg);
3986 }
3987
3988 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3989 {
3990 if (current_frame_info.r[reg_save_ar_lc] != 0)
3991 {
3992 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3993 reg_emitted (reg_save_ar_lc);
3994 }
3995 else
3996 {
3997 alt_regno = next_scratch_gr_reg ();
3998 alt_reg = gen_rtx_REG (DImode, alt_regno);
3999 do_restore (gen_movdi_x, alt_reg, cfa_off);
4000 cfa_off -= 8;
4001 }
4002 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4003 emit_move_insn (reg, alt_reg);
4004 }
4005
4006 /* Restore the return pointer. */
4007 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4008 {
4009 if (current_frame_info.r[reg_save_b0] != 0)
4010 {
4011 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4012 reg_emitted (reg_save_b0);
4013 }
4014 else
4015 {
4016 alt_regno = next_scratch_gr_reg ();
4017 alt_reg = gen_rtx_REG (DImode, alt_regno);
4018 do_restore (gen_movdi_x, alt_reg, cfa_off);
4019 cfa_off -= 8;
4020 }
4021 reg = gen_rtx_REG (DImode, BR_REG (0));
4022 emit_move_insn (reg, alt_reg);
4023 }
4024
4025 /* We should now be at the base of the gr/br/fr spill area. */
4026 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4027 + current_frame_info.spill_size));
4028
4029 /* The GP may be stored on the stack in the prologue, but it's
4030 never restored in the epilogue. Skip the stack slot. */
4031 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4032 cfa_off -= 8;
4033
4034 /* Restore all general registers. */
4035 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4036 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4037 {
4038 reg = gen_rtx_REG (DImode, regno);
4039 do_restore (gen_gr_restore, reg, cfa_off);
4040 cfa_off -= 8;
4041 }
4042
4043 /* Restore the branch registers. */
4044 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4045 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4046 {
4047 alt_regno = next_scratch_gr_reg ();
4048 alt_reg = gen_rtx_REG (DImode, alt_regno);
4049 do_restore (gen_movdi_x, alt_reg, cfa_off);
4050 cfa_off -= 8;
4051 reg = gen_rtx_REG (DImode, regno);
4052 emit_move_insn (reg, alt_reg);
4053 }
4054
4055 /* Restore floating point registers. */
4056 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4057 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4058 {
4059 gcc_assert (!(cfa_off & 15));
4060 reg = gen_rtx_REG (XFmode, regno);
4061 do_restore (gen_fr_restore_x, reg, cfa_off);
4062 cfa_off -= 16;
4063 }
4064
4065 /* Restore ar.unat for real. */
4066 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4067 {
4068 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4069 emit_move_insn (reg, ar_unat_save_reg);
4070 }
4071
4072 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4073
4074 finish_spill_pointers ();
4075
4076 if (current_frame_info.total_size
4077 || cfun->machine->ia64_eh_epilogue_sp
4078 || frame_pointer_needed)
4079 {
4080 /* ??? At this point we must generate a magic insn that appears to
4081 modify the spill iterators, the stack pointer, and the frame
4082 pointer. This would allow the most scheduling freedom. For now,
4083 just hard stop. */
4084 emit_insn (gen_blockage ());
4085 }
4086
4087 if (cfun->machine->ia64_eh_epilogue_sp)
4088 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4089 else if (frame_pointer_needed)
4090 {
4091 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4092 RTX_FRAME_RELATED_P (insn) = 1;
4093 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4094 }
4095 else if (current_frame_info.total_size)
4096 {
4097 rtx offset, frame_size_rtx;
4098
4099 frame_size_rtx = GEN_INT (current_frame_info.total_size);
4100 if (satisfies_constraint_I (frame_size_rtx))
4101 offset = frame_size_rtx;
4102 else
4103 {
4104 regno = next_scratch_gr_reg ();
4105 offset = gen_rtx_REG (DImode, regno);
4106 emit_move_insn (offset, frame_size_rtx);
4107 }
4108
4109 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4110 offset));
4111
4112 RTX_FRAME_RELATED_P (insn) = 1;
4113 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4114 gen_rtx_SET (stack_pointer_rtx,
4115 gen_rtx_PLUS (DImode,
4116 stack_pointer_rtx,
4117 frame_size_rtx)));
4118 }
4119
4120 if (cfun->machine->ia64_eh_epilogue_bsp)
4121 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4122
4123 if (! sibcall_p)
4124 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4125 else
4126 {
4127 int fp = GR_REG (2);
4128 /* We need a throw away register here, r0 and r1 are reserved,
4129 so r2 is the first available call clobbered register. If
4130 there was a frame_pointer register, we may have swapped the
4131 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4132 sure we're using the string "r2" when emitting the register
4133 name for the assembler. */
4134 if (current_frame_info.r[reg_fp]
4135 && current_frame_info.r[reg_fp] == GR_REG (2))
4136 fp = HARD_FRAME_POINTER_REGNUM;
4137
4138 /* We must emit an alloc to force the input registers to become output
4139 registers. Otherwise, if the callee tries to pass its parameters
4140 through to another call without an intervening alloc, then these
4141 values get lost. */
4142 /* ??? We don't need to preserve all input registers. We only need to
4143 preserve those input registers used as arguments to the sibling call.
4144 It is unclear how to compute that number here. */
4145 if (current_frame_info.n_input_regs != 0)
4146 {
4147 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4148
4149 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4150 const0_rtx, const0_rtx,
4151 n_inputs, const0_rtx));
4152 RTX_FRAME_RELATED_P (insn) = 1;
4153
4154 /* ??? We need to mark the alloc as frame-related so that it gets
4155 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4156 But there's nothing dwarf2 related to be done wrt the register
4157 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4158 the empty parallel means dwarf2out will not see anything. */
4159 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4160 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4161 }
4162 }
4163 }
4164
4165 /* Return 1 if br.ret can do all the work required to return from a
4166 function. */
4167
4168 int
4169 ia64_direct_return (void)
4170 {
4171 if (reload_completed && ! frame_pointer_needed)
4172 {
4173 ia64_compute_frame_size (get_frame_size ());
4174
4175 return (current_frame_info.total_size == 0
4176 && current_frame_info.n_spilled == 0
4177 && current_frame_info.r[reg_save_b0] == 0
4178 && current_frame_info.r[reg_save_pr] == 0
4179 && current_frame_info.r[reg_save_ar_pfs] == 0
4180 && current_frame_info.r[reg_save_ar_unat] == 0
4181 && current_frame_info.r[reg_save_ar_lc] == 0);
4182 }
4183 return 0;
4184 }
4185
4186 /* Return the magic cookie that we use to hold the return address
4187 during early compilation. */
4188
4189 rtx
4190 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4191 {
4192 if (count != 0)
4193 return NULL;
4194 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4195 }
4196
4197 /* Split this value after reload, now that we know where the return
4198 address is saved. */
4199
4200 void
4201 ia64_split_return_addr_rtx (rtx dest)
4202 {
4203 rtx src;
4204
4205 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4206 {
4207 if (current_frame_info.r[reg_save_b0] != 0)
4208 {
4209 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4210 reg_emitted (reg_save_b0);
4211 }
4212 else
4213 {
4214 HOST_WIDE_INT off;
4215 unsigned int regno;
4216 rtx off_r;
4217
4218 /* Compute offset from CFA for BR0. */
4219 /* ??? Must be kept in sync with ia64_expand_prologue. */
4220 off = (current_frame_info.spill_cfa_off
4221 + current_frame_info.spill_size);
4222 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4223 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4224 off -= 8;
4225
4226 /* Convert CFA offset to a register based offset. */
4227 if (frame_pointer_needed)
4228 src = hard_frame_pointer_rtx;
4229 else
4230 {
4231 src = stack_pointer_rtx;
4232 off += current_frame_info.total_size;
4233 }
4234
4235 /* Load address into scratch register. */
4236 off_r = GEN_INT (off);
4237 if (satisfies_constraint_I (off_r))
4238 emit_insn (gen_adddi3 (dest, src, off_r));
4239 else
4240 {
4241 emit_move_insn (dest, off_r);
4242 emit_insn (gen_adddi3 (dest, src, dest));
4243 }
4244
4245 src = gen_rtx_MEM (Pmode, dest);
4246 }
4247 }
4248 else
4249 src = gen_rtx_REG (DImode, BR_REG (0));
4250
4251 emit_move_insn (dest, src);
4252 }
4253
4254 int
4255 ia64_hard_regno_rename_ok (int from, int to)
4256 {
4257 /* Don't clobber any of the registers we reserved for the prologue. */
4258 unsigned int r;
4259
4260 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4261 if (to == current_frame_info.r[r]
4262 || from == current_frame_info.r[r]
4263 || to == emitted_frame_related_regs[r]
4264 || from == emitted_frame_related_regs[r])
4265 return 0;
4266
4267 /* Don't use output registers outside the register frame. */
4268 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4269 return 0;
4270
4271 /* Retain even/oddness on predicate register pairs. */
4272 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4273 return (from & 1) == (to & 1);
4274
4275 return 1;
4276 }
4277
4278 /* Target hook for assembling integer objects. Handle word-sized
4279 aligned objects and detect the cases when @fptr is needed. */
4280
4281 static bool
4282 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4283 {
4284 if (size == POINTER_SIZE / BITS_PER_UNIT
4285 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4286 && GET_CODE (x) == SYMBOL_REF
4287 && SYMBOL_REF_FUNCTION_P (x))
4288 {
4289 static const char * const directive[2][2] = {
4290 /* 64-bit pointer */ /* 32-bit pointer */
4291 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4292 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4293 };
4294 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4295 output_addr_const (asm_out_file, x);
4296 fputs (")\n", asm_out_file);
4297 return true;
4298 }
4299 return default_assemble_integer (x, size, aligned_p);
4300 }
4301
4302 /* Emit the function prologue. */
4303
4304 static void
4305 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4306 {
4307 int mask, grsave, grsave_prev;
4308
4309 if (current_frame_info.need_regstk)
4310 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4311 current_frame_info.n_input_regs,
4312 current_frame_info.n_local_regs,
4313 current_frame_info.n_output_regs,
4314 current_frame_info.n_rotate_regs);
4315
4316 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4317 return;
4318
4319 /* Emit the .prologue directive. */
4320
4321 mask = 0;
4322 grsave = grsave_prev = 0;
4323 if (current_frame_info.r[reg_save_b0] != 0)
4324 {
4325 mask |= 8;
4326 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4327 }
4328 if (current_frame_info.r[reg_save_ar_pfs] != 0
4329 && (grsave_prev == 0
4330 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4331 {
4332 mask |= 4;
4333 if (grsave_prev == 0)
4334 grsave = current_frame_info.r[reg_save_ar_pfs];
4335 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4336 }
4337 if (current_frame_info.r[reg_fp] != 0
4338 && (grsave_prev == 0
4339 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4340 {
4341 mask |= 2;
4342 if (grsave_prev == 0)
4343 grsave = HARD_FRAME_POINTER_REGNUM;
4344 grsave_prev = current_frame_info.r[reg_fp];
4345 }
4346 if (current_frame_info.r[reg_save_pr] != 0
4347 && (grsave_prev == 0
4348 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4349 {
4350 mask |= 1;
4351 if (grsave_prev == 0)
4352 grsave = current_frame_info.r[reg_save_pr];
4353 }
4354
4355 if (mask && TARGET_GNU_AS)
4356 fprintf (file, "\t.prologue %d, %d\n", mask,
4357 ia64_dbx_register_number (grsave));
4358 else
4359 fputs ("\t.prologue\n", file);
4360
4361 /* Emit a .spill directive, if necessary, to relocate the base of
4362 the register spill area. */
4363 if (current_frame_info.spill_cfa_off != -16)
4364 fprintf (file, "\t.spill %ld\n",
4365 (long) (current_frame_info.spill_cfa_off
4366 + current_frame_info.spill_size));
4367 }
4368
4369 /* Emit the .body directive at the scheduled end of the prologue. */
4370
4371 static void
4372 ia64_output_function_end_prologue (FILE *file)
4373 {
4374 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4375 return;
4376
4377 fputs ("\t.body\n", file);
4378 }
4379
4380 /* Emit the function epilogue. */
4381
4382 static void
4383 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4384 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4385 {
4386 int i;
4387
4388 if (current_frame_info.r[reg_fp])
4389 {
4390 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4391 reg_names[HARD_FRAME_POINTER_REGNUM]
4392 = reg_names[current_frame_info.r[reg_fp]];
4393 reg_names[current_frame_info.r[reg_fp]] = tmp;
4394 reg_emitted (reg_fp);
4395 }
4396 if (! TARGET_REG_NAMES)
4397 {
4398 for (i = 0; i < current_frame_info.n_input_regs; i++)
4399 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4400 for (i = 0; i < current_frame_info.n_local_regs; i++)
4401 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4402 for (i = 0; i < current_frame_info.n_output_regs; i++)
4403 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4404 }
4405
4406 current_frame_info.initialized = 0;
4407 }
4408
4409 int
4410 ia64_dbx_register_number (int regno)
4411 {
4412 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4413 from its home at loc79 to something inside the register frame. We
4414 must perform the same renumbering here for the debug info. */
4415 if (current_frame_info.r[reg_fp])
4416 {
4417 if (regno == HARD_FRAME_POINTER_REGNUM)
4418 regno = current_frame_info.r[reg_fp];
4419 else if (regno == current_frame_info.r[reg_fp])
4420 regno = HARD_FRAME_POINTER_REGNUM;
4421 }
4422
4423 if (IN_REGNO_P (regno))
4424 return 32 + regno - IN_REG (0);
4425 else if (LOC_REGNO_P (regno))
4426 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4427 else if (OUT_REGNO_P (regno))
4428 return (32 + current_frame_info.n_input_regs
4429 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4430 else
4431 return regno;
4432 }
4433
4434 /* Implement TARGET_TRAMPOLINE_INIT.
4435
4436 The trampoline should set the static chain pointer to value placed
4437 into the trampoline and should branch to the specified routine.
4438 To make the normal indirect-subroutine calling convention work,
4439 the trampoline must look like a function descriptor; the first
4440 word being the target address and the second being the target's
4441 global pointer.
4442
4443 We abuse the concept of a global pointer by arranging for it
4444 to point to the data we need to load. The complete trampoline
4445 has the following form:
4446
4447 +-------------------+ \
4448 TRAMP: | __ia64_trampoline | |
4449 +-------------------+ > fake function descriptor
4450 | TRAMP+16 | |
4451 +-------------------+ /
4452 | target descriptor |
4453 +-------------------+
4454 | static link |
4455 +-------------------+
4456 */
4457
4458 static void
4459 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4460 {
4461 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4462 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4463
4464 /* The Intel assembler requires that the global __ia64_trampoline symbol
4465 be declared explicitly */
4466 if (!TARGET_GNU_AS)
4467 {
4468 static bool declared_ia64_trampoline = false;
4469
4470 if (!declared_ia64_trampoline)
4471 {
4472 declared_ia64_trampoline = true;
4473 (*targetm.asm_out.globalize_label) (asm_out_file,
4474 "__ia64_trampoline");
4475 }
4476 }
4477
4478 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4479 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4480 fnaddr = convert_memory_address (Pmode, fnaddr);
4481 static_chain = convert_memory_address (Pmode, static_chain);
4482
4483 /* Load up our iterator. */
4484 addr_reg = copy_to_reg (addr);
4485 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4486
4487 /* The first two words are the fake descriptor:
4488 __ia64_trampoline, ADDR+16. */
4489 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4490 if (TARGET_ABI_OPEN_VMS)
4491 {
4492 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4493 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4494 relocation against function symbols to make it identical to the
4495 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4496 strict ELF and dereference to get the bare code address. */
4497 rtx reg = gen_reg_rtx (Pmode);
4498 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4499 emit_move_insn (reg, tramp);
4500 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4501 tramp = reg;
4502 }
4503 emit_move_insn (m_tramp, tramp);
4504 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4505 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4506
4507 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4508 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4509 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4510
4511 /* The third word is the target descriptor. */
4512 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4513 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4514 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4515
4516 /* The fourth word is the static chain. */
4517 emit_move_insn (m_tramp, static_chain);
4518 }
4519 \f
4520 /* Do any needed setup for a variadic function. CUM has not been updated
4521 for the last named argument which has type TYPE and mode MODE.
4522
4523 We generate the actual spill instructions during prologue generation. */
4524
4525 static void
4526 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4527 tree type, int * pretend_size,
4528 int second_time ATTRIBUTE_UNUSED)
4529 {
4530 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4531
4532 /* Skip the current argument. */
4533 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4534
4535 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4536 {
4537 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4538 *pretend_size = n * UNITS_PER_WORD;
4539 cfun->machine->n_varargs = n;
4540 }
4541 }
4542
4543 /* Check whether TYPE is a homogeneous floating point aggregate. If
4544 it is, return the mode of the floating point type that appears
4545 in all leafs. If it is not, return VOIDmode.
4546
4547 An aggregate is a homogeneous floating point aggregate is if all
4548 fields/elements in it have the same floating point type (e.g,
4549 SFmode). 128-bit quad-precision floats are excluded.
4550
4551 Variable sized aggregates should never arrive here, since we should
4552 have already decided to pass them by reference. Top-level zero-sized
4553 aggregates are excluded because our parallels crash the middle-end. */
4554
4555 static machine_mode
4556 hfa_element_mode (const_tree type, bool nested)
4557 {
4558 machine_mode element_mode = VOIDmode;
4559 machine_mode mode;
4560 enum tree_code code = TREE_CODE (type);
4561 int know_element_mode = 0;
4562 tree t;
4563
4564 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4565 return VOIDmode;
4566
4567 switch (code)
4568 {
4569 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4570 case BOOLEAN_TYPE: case POINTER_TYPE:
4571 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4572 case LANG_TYPE: case FUNCTION_TYPE:
4573 return VOIDmode;
4574
4575 /* Fortran complex types are supposed to be HFAs, so we need to handle
4576 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4577 types though. */
4578 case COMPLEX_TYPE:
4579 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4580 && TYPE_MODE (type) != TCmode)
4581 return GET_MODE_INNER (TYPE_MODE (type));
4582 else
4583 return VOIDmode;
4584
4585 case REAL_TYPE:
4586 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4587 mode if this is contained within an aggregate. */
4588 if (nested && TYPE_MODE (type) != TFmode)
4589 return TYPE_MODE (type);
4590 else
4591 return VOIDmode;
4592
4593 case ARRAY_TYPE:
4594 return hfa_element_mode (TREE_TYPE (type), 1);
4595
4596 case RECORD_TYPE:
4597 case UNION_TYPE:
4598 case QUAL_UNION_TYPE:
4599 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4600 {
4601 if (TREE_CODE (t) != FIELD_DECL)
4602 continue;
4603
4604 mode = hfa_element_mode (TREE_TYPE (t), 1);
4605 if (know_element_mode)
4606 {
4607 if (mode != element_mode)
4608 return VOIDmode;
4609 }
4610 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4611 return VOIDmode;
4612 else
4613 {
4614 know_element_mode = 1;
4615 element_mode = mode;
4616 }
4617 }
4618 return element_mode;
4619
4620 default:
4621 /* If we reach here, we probably have some front-end specific type
4622 that the backend doesn't know about. This can happen via the
4623 aggregate_value_p call in init_function_start. All we can do is
4624 ignore unknown tree types. */
4625 return VOIDmode;
4626 }
4627
4628 return VOIDmode;
4629 }
4630
4631 /* Return the number of words required to hold a quantity of TYPE and MODE
4632 when passed as an argument. */
4633 static int
4634 ia64_function_arg_words (const_tree type, machine_mode mode)
4635 {
4636 int words;
4637
4638 if (mode == BLKmode)
4639 words = int_size_in_bytes (type);
4640 else
4641 words = GET_MODE_SIZE (mode);
4642
4643 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4644 }
4645
4646 /* Return the number of registers that should be skipped so the current
4647 argument (described by TYPE and WORDS) will be properly aligned.
4648
4649 Integer and float arguments larger than 8 bytes start at the next
4650 even boundary. Aggregates larger than 8 bytes start at the next
4651 even boundary if the aggregate has 16 byte alignment. Note that
4652 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4653 but are still to be aligned in registers.
4654
4655 ??? The ABI does not specify how to handle aggregates with
4656 alignment from 9 to 15 bytes, or greater than 16. We handle them
4657 all as if they had 16 byte alignment. Such aggregates can occur
4658 only if gcc extensions are used. */
4659 static int
4660 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4661 const_tree type, int words)
4662 {
4663 /* No registers are skipped on VMS. */
4664 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4665 return 0;
4666
4667 if (type
4668 && TREE_CODE (type) != INTEGER_TYPE
4669 && TREE_CODE (type) != REAL_TYPE)
4670 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4671 else
4672 return words > 1;
4673 }
4674
4675 /* Return rtx for register where argument is passed, or zero if it is passed
4676 on the stack. */
4677 /* ??? 128-bit quad-precision floats are always passed in general
4678 registers. */
4679
4680 static rtx
4681 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4682 const_tree type, bool named, bool incoming)
4683 {
4684 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4685
4686 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4687 int words = ia64_function_arg_words (type, mode);
4688 int offset = ia64_function_arg_offset (cum, type, words);
4689 machine_mode hfa_mode = VOIDmode;
4690
4691 /* For OPEN VMS, emit the instruction setting up the argument register here,
4692 when we know this will be together with the other arguments setup related
4693 insns. This is not the conceptually best place to do this, but this is
4694 the easiest as we have convenient access to cumulative args info. */
4695
4696 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4697 && named == 1)
4698 {
4699 unsigned HOST_WIDE_INT regval = cum->words;
4700 int i;
4701
4702 for (i = 0; i < 8; i++)
4703 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4704
4705 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4706 GEN_INT (regval));
4707 }
4708
4709 /* If all argument slots are used, then it must go on the stack. */
4710 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4711 return 0;
4712
4713 /* On OpenVMS argument is either in Rn or Fn. */
4714 if (TARGET_ABI_OPEN_VMS)
4715 {
4716 if (FLOAT_MODE_P (mode))
4717 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4718 else
4719 return gen_rtx_REG (mode, basereg + cum->words);
4720 }
4721
4722 /* Check for and handle homogeneous FP aggregates. */
4723 if (type)
4724 hfa_mode = hfa_element_mode (type, 0);
4725
4726 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4727 and unprototyped hfas are passed specially. */
4728 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4729 {
4730 rtx loc[16];
4731 int i = 0;
4732 int fp_regs = cum->fp_regs;
4733 int int_regs = cum->words + offset;
4734 int hfa_size = GET_MODE_SIZE (hfa_mode);
4735 int byte_size;
4736 int args_byte_size;
4737
4738 /* If prototyped, pass it in FR regs then GR regs.
4739 If not prototyped, pass it in both FR and GR regs.
4740
4741 If this is an SFmode aggregate, then it is possible to run out of
4742 FR regs while GR regs are still left. In that case, we pass the
4743 remaining part in the GR regs. */
4744
4745 /* Fill the FP regs. We do this always. We stop if we reach the end
4746 of the argument, the last FP register, or the last argument slot. */
4747
4748 byte_size = ((mode == BLKmode)
4749 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4750 args_byte_size = int_regs * UNITS_PER_WORD;
4751 offset = 0;
4752 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4753 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4754 {
4755 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4756 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4757 + fp_regs)),
4758 GEN_INT (offset));
4759 offset += hfa_size;
4760 args_byte_size += hfa_size;
4761 fp_regs++;
4762 }
4763
4764 /* If no prototype, then the whole thing must go in GR regs. */
4765 if (! cum->prototype)
4766 offset = 0;
4767 /* If this is an SFmode aggregate, then we might have some left over
4768 that needs to go in GR regs. */
4769 else if (byte_size != offset)
4770 int_regs += offset / UNITS_PER_WORD;
4771
4772 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4773
4774 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4775 {
4776 machine_mode gr_mode = DImode;
4777 unsigned int gr_size;
4778
4779 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4780 then this goes in a GR reg left adjusted/little endian, right
4781 adjusted/big endian. */
4782 /* ??? Currently this is handled wrong, because 4-byte hunks are
4783 always right adjusted/little endian. */
4784 if (offset & 0x4)
4785 gr_mode = SImode;
4786 /* If we have an even 4 byte hunk because the aggregate is a
4787 multiple of 4 bytes in size, then this goes in a GR reg right
4788 adjusted/little endian. */
4789 else if (byte_size - offset == 4)
4790 gr_mode = SImode;
4791
4792 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4793 gen_rtx_REG (gr_mode, (basereg
4794 + int_regs)),
4795 GEN_INT (offset));
4796
4797 gr_size = GET_MODE_SIZE (gr_mode);
4798 offset += gr_size;
4799 if (gr_size == UNITS_PER_WORD
4800 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4801 int_regs++;
4802 else if (gr_size > UNITS_PER_WORD)
4803 int_regs += gr_size / UNITS_PER_WORD;
4804 }
4805 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4806 }
4807
4808 /* Integral and aggregates go in general registers. If we have run out of
4809 FR registers, then FP values must also go in general registers. This can
4810 happen when we have a SFmode HFA. */
4811 else if (mode == TFmode || mode == TCmode
4812 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4813 {
4814 int byte_size = ((mode == BLKmode)
4815 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4816 if (BYTES_BIG_ENDIAN
4817 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4818 && byte_size < UNITS_PER_WORD
4819 && byte_size > 0)
4820 {
4821 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4822 gen_rtx_REG (DImode,
4823 (basereg + cum->words
4824 + offset)),
4825 const0_rtx);
4826 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4827 }
4828 else
4829 return gen_rtx_REG (mode, basereg + cum->words + offset);
4830
4831 }
4832
4833 /* If there is a prototype, then FP values go in a FR register when
4834 named, and in a GR register when unnamed. */
4835 else if (cum->prototype)
4836 {
4837 if (named)
4838 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4839 /* In big-endian mode, an anonymous SFmode value must be represented
4840 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4841 the value into the high half of the general register. */
4842 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4843 return gen_rtx_PARALLEL (mode,
4844 gen_rtvec (1,
4845 gen_rtx_EXPR_LIST (VOIDmode,
4846 gen_rtx_REG (DImode, basereg + cum->words + offset),
4847 const0_rtx)));
4848 else
4849 return gen_rtx_REG (mode, basereg + cum->words + offset);
4850 }
4851 /* If there is no prototype, then FP values go in both FR and GR
4852 registers. */
4853 else
4854 {
4855 /* See comment above. */
4856 machine_mode inner_mode =
4857 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4858
4859 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4860 gen_rtx_REG (mode, (FR_ARG_FIRST
4861 + cum->fp_regs)),
4862 const0_rtx);
4863 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4864 gen_rtx_REG (inner_mode,
4865 (basereg + cum->words
4866 + offset)),
4867 const0_rtx);
4868
4869 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4870 }
4871 }
4872
4873 /* Implement TARGET_FUNCION_ARG target hook. */
4874
4875 static rtx
4876 ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4877 const_tree type, bool named)
4878 {
4879 return ia64_function_arg_1 (cum, mode, type, named, false);
4880 }
4881
4882 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4883
4884 static rtx
4885 ia64_function_incoming_arg (cumulative_args_t cum,
4886 machine_mode mode,
4887 const_tree type, bool named)
4888 {
4889 return ia64_function_arg_1 (cum, mode, type, named, true);
4890 }
4891
4892 /* Return number of bytes, at the beginning of the argument, that must be
4893 put in registers. 0 is the argument is entirely in registers or entirely
4894 in memory. */
4895
4896 static int
4897 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4898 tree type, bool named ATTRIBUTE_UNUSED)
4899 {
4900 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4901
4902 int words = ia64_function_arg_words (type, mode);
4903 int offset = ia64_function_arg_offset (cum, type, words);
4904
4905 /* If all argument slots are used, then it must go on the stack. */
4906 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4907 return 0;
4908
4909 /* It doesn't matter whether the argument goes in FR or GR regs. If
4910 it fits within the 8 argument slots, then it goes entirely in
4911 registers. If it extends past the last argument slot, then the rest
4912 goes on the stack. */
4913
4914 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4915 return 0;
4916
4917 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4918 }
4919
4920 /* Return ivms_arg_type based on machine_mode. */
4921
4922 static enum ivms_arg_type
4923 ia64_arg_type (machine_mode mode)
4924 {
4925 switch (mode)
4926 {
4927 case SFmode:
4928 return FS;
4929 case DFmode:
4930 return FT;
4931 default:
4932 return I64;
4933 }
4934 }
4935
4936 /* Update CUM to point after this argument. This is patterned after
4937 ia64_function_arg. */
4938
4939 static void
4940 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4941 const_tree type, bool named)
4942 {
4943 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4944 int words = ia64_function_arg_words (type, mode);
4945 int offset = ia64_function_arg_offset (cum, type, words);
4946 machine_mode hfa_mode = VOIDmode;
4947
4948 /* If all arg slots are already full, then there is nothing to do. */
4949 if (cum->words >= MAX_ARGUMENT_SLOTS)
4950 {
4951 cum->words += words + offset;
4952 return;
4953 }
4954
4955 cum->atypes[cum->words] = ia64_arg_type (mode);
4956 cum->words += words + offset;
4957
4958 /* On OpenVMS argument is either in Rn or Fn. */
4959 if (TARGET_ABI_OPEN_VMS)
4960 {
4961 cum->int_regs = cum->words;
4962 cum->fp_regs = cum->words;
4963 return;
4964 }
4965
4966 /* Check for and handle homogeneous FP aggregates. */
4967 if (type)
4968 hfa_mode = hfa_element_mode (type, 0);
4969
4970 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4971 and unprototyped hfas are passed specially. */
4972 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4973 {
4974 int fp_regs = cum->fp_regs;
4975 /* This is the original value of cum->words + offset. */
4976 int int_regs = cum->words - words;
4977 int hfa_size = GET_MODE_SIZE (hfa_mode);
4978 int byte_size;
4979 int args_byte_size;
4980
4981 /* If prototyped, pass it in FR regs then GR regs.
4982 If not prototyped, pass it in both FR and GR regs.
4983
4984 If this is an SFmode aggregate, then it is possible to run out of
4985 FR regs while GR regs are still left. In that case, we pass the
4986 remaining part in the GR regs. */
4987
4988 /* Fill the FP regs. We do this always. We stop if we reach the end
4989 of the argument, the last FP register, or the last argument slot. */
4990
4991 byte_size = ((mode == BLKmode)
4992 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4993 args_byte_size = int_regs * UNITS_PER_WORD;
4994 offset = 0;
4995 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4996 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4997 {
4998 offset += hfa_size;
4999 args_byte_size += hfa_size;
5000 fp_regs++;
5001 }
5002
5003 cum->fp_regs = fp_regs;
5004 }
5005
5006 /* Integral and aggregates go in general registers. So do TFmode FP values.
5007 If we have run out of FR registers, then other FP values must also go in
5008 general registers. This can happen when we have a SFmode HFA. */
5009 else if (mode == TFmode || mode == TCmode
5010 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
5011 cum->int_regs = cum->words;
5012
5013 /* If there is a prototype, then FP values go in a FR register when
5014 named, and in a GR register when unnamed. */
5015 else if (cum->prototype)
5016 {
5017 if (! named)
5018 cum->int_regs = cum->words;
5019 else
5020 /* ??? Complex types should not reach here. */
5021 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5022 }
5023 /* If there is no prototype, then FP values go in both FR and GR
5024 registers. */
5025 else
5026 {
5027 /* ??? Complex types should not reach here. */
5028 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5029 cum->int_regs = cum->words;
5030 }
5031 }
5032
5033 /* Arguments with alignment larger than 8 bytes start at the next even
5034 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5035 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5036
5037 static unsigned int
5038 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5039 {
5040 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5041 return PARM_BOUNDARY * 2;
5042
5043 if (type)
5044 {
5045 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5046 return PARM_BOUNDARY * 2;
5047 else
5048 return PARM_BOUNDARY;
5049 }
5050
5051 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5052 return PARM_BOUNDARY * 2;
5053 else
5054 return PARM_BOUNDARY;
5055 }
5056
5057 /* True if it is OK to do sibling call optimization for the specified
5058 call expression EXP. DECL will be the called function, or NULL if
5059 this is an indirect call. */
5060 static bool
5061 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5062 {
5063 /* We can't perform a sibcall if the current function has the syscall_linkage
5064 attribute. */
5065 if (lookup_attribute ("syscall_linkage",
5066 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5067 return false;
5068
5069 /* We must always return with our current GP. This means we can
5070 only sibcall to functions defined in the current module unless
5071 TARGET_CONST_GP is set to true. */
5072 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5073 }
5074 \f
5075
5076 /* Implement va_arg. */
5077
5078 static tree
5079 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5080 gimple_seq *post_p)
5081 {
5082 /* Variable sized types are passed by reference. */
5083 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5084 {
5085 tree ptrtype = build_pointer_type (type);
5086 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5087 return build_va_arg_indirect_ref (addr);
5088 }
5089
5090 /* Aggregate arguments with alignment larger than 8 bytes start at
5091 the next even boundary. Integer and floating point arguments
5092 do so if they are larger than 8 bytes, whether or not they are
5093 also aligned larger than 8 bytes. */
5094 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5095 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5096 {
5097 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5098 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5099 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5100 gimplify_assign (unshare_expr (valist), t, pre_p);
5101 }
5102
5103 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5104 }
5105 \f
5106 /* Return 1 if function return value returned in memory. Return 0 if it is
5107 in a register. */
5108
5109 static bool
5110 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5111 {
5112 machine_mode mode;
5113 machine_mode hfa_mode;
5114 HOST_WIDE_INT byte_size;
5115
5116 mode = TYPE_MODE (valtype);
5117 byte_size = GET_MODE_SIZE (mode);
5118 if (mode == BLKmode)
5119 {
5120 byte_size = int_size_in_bytes (valtype);
5121 if (byte_size < 0)
5122 return true;
5123 }
5124
5125 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5126
5127 hfa_mode = hfa_element_mode (valtype, 0);
5128 if (hfa_mode != VOIDmode)
5129 {
5130 int hfa_size = GET_MODE_SIZE (hfa_mode);
5131
5132 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5133 return true;
5134 else
5135 return false;
5136 }
5137 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5138 return true;
5139 else
5140 return false;
5141 }
5142
5143 /* Return rtx for register that holds the function return value. */
5144
5145 static rtx
5146 ia64_function_value (const_tree valtype,
5147 const_tree fn_decl_or_type,
5148 bool outgoing ATTRIBUTE_UNUSED)
5149 {
5150 machine_mode mode;
5151 machine_mode hfa_mode;
5152 int unsignedp;
5153 const_tree func = fn_decl_or_type;
5154
5155 if (fn_decl_or_type
5156 && !DECL_P (fn_decl_or_type))
5157 func = NULL;
5158
5159 mode = TYPE_MODE (valtype);
5160 hfa_mode = hfa_element_mode (valtype, 0);
5161
5162 if (hfa_mode != VOIDmode)
5163 {
5164 rtx loc[8];
5165 int i;
5166 int hfa_size;
5167 int byte_size;
5168 int offset;
5169
5170 hfa_size = GET_MODE_SIZE (hfa_mode);
5171 byte_size = ((mode == BLKmode)
5172 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5173 offset = 0;
5174 for (i = 0; offset < byte_size; i++)
5175 {
5176 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5177 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5178 GEN_INT (offset));
5179 offset += hfa_size;
5180 }
5181 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5182 }
5183 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5184 return gen_rtx_REG (mode, FR_ARG_FIRST);
5185 else
5186 {
5187 bool need_parallel = false;
5188
5189 /* In big-endian mode, we need to manage the layout of aggregates
5190 in the registers so that we get the bits properly aligned in
5191 the highpart of the registers. */
5192 if (BYTES_BIG_ENDIAN
5193 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5194 need_parallel = true;
5195
5196 /* Something like struct S { long double x; char a[0] } is not an
5197 HFA structure, and therefore doesn't go in fp registers. But
5198 the middle-end will give it XFmode anyway, and XFmode values
5199 don't normally fit in integer registers. So we need to smuggle
5200 the value inside a parallel. */
5201 else if (mode == XFmode || mode == XCmode || mode == RFmode)
5202 need_parallel = true;
5203
5204 if (need_parallel)
5205 {
5206 rtx loc[8];
5207 int offset;
5208 int bytesize;
5209 int i;
5210
5211 offset = 0;
5212 bytesize = int_size_in_bytes (valtype);
5213 /* An empty PARALLEL is invalid here, but the return value
5214 doesn't matter for empty structs. */
5215 if (bytesize == 0)
5216 return gen_rtx_REG (mode, GR_RET_FIRST);
5217 for (i = 0; offset < bytesize; i++)
5218 {
5219 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5220 gen_rtx_REG (DImode,
5221 GR_RET_FIRST + i),
5222 GEN_INT (offset));
5223 offset += UNITS_PER_WORD;
5224 }
5225 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5226 }
5227
5228 mode = promote_function_mode (valtype, mode, &unsignedp,
5229 func ? TREE_TYPE (func) : NULL_TREE,
5230 true);
5231
5232 return gen_rtx_REG (mode, GR_RET_FIRST);
5233 }
5234 }
5235
5236 /* Worker function for TARGET_LIBCALL_VALUE. */
5237
5238 static rtx
5239 ia64_libcall_value (machine_mode mode,
5240 const_rtx fun ATTRIBUTE_UNUSED)
5241 {
5242 return gen_rtx_REG (mode,
5243 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5244 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5245 && (mode) != TFmode)
5246 ? FR_RET_FIRST : GR_RET_FIRST));
5247 }
5248
5249 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5250
5251 static bool
5252 ia64_function_value_regno_p (const unsigned int regno)
5253 {
5254 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5255 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5256 }
5257
5258 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5259 We need to emit DTP-relative relocations. */
5260
5261 static void
5262 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5263 {
5264 gcc_assert (size == 4 || size == 8);
5265 if (size == 4)
5266 fputs ("\tdata4.ua\t@dtprel(", file);
5267 else
5268 fputs ("\tdata8.ua\t@dtprel(", file);
5269 output_addr_const (file, x);
5270 fputs (")", file);
5271 }
5272
5273 /* Print a memory address as an operand to reference that memory location. */
5274
5275 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5276 also call this from ia64_print_operand for memory addresses. */
5277
5278 static void
5279 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5280 rtx address ATTRIBUTE_UNUSED)
5281 {
5282 }
5283
5284 /* Print an operand to an assembler instruction.
5285 C Swap and print a comparison operator.
5286 D Print an FP comparison operator.
5287 E Print 32 - constant, for SImode shifts as extract.
5288 e Print 64 - constant, for DImode rotates.
5289 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5290 a floating point register emitted normally.
5291 G A floating point constant.
5292 I Invert a predicate register by adding 1.
5293 J Select the proper predicate register for a condition.
5294 j Select the inverse predicate register for a condition.
5295 O Append .acq for volatile load.
5296 P Postincrement of a MEM.
5297 Q Append .rel for volatile store.
5298 R Print .s .d or nothing for a single, double or no truncation.
5299 S Shift amount for shladd instruction.
5300 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5301 for Intel assembler.
5302 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5303 for Intel assembler.
5304 X A pair of floating point registers.
5305 r Print register name, or constant 0 as r0. HP compatibility for
5306 Linux kernel.
5307 v Print vector constant value as an 8-byte integer value. */
5308
5309 static void
5310 ia64_print_operand (FILE * file, rtx x, int code)
5311 {
5312 const char *str;
5313
5314 switch (code)
5315 {
5316 case 0:
5317 /* Handled below. */
5318 break;
5319
5320 case 'C':
5321 {
5322 enum rtx_code c = swap_condition (GET_CODE (x));
5323 fputs (GET_RTX_NAME (c), file);
5324 return;
5325 }
5326
5327 case 'D':
5328 switch (GET_CODE (x))
5329 {
5330 case NE:
5331 str = "neq";
5332 break;
5333 case UNORDERED:
5334 str = "unord";
5335 break;
5336 case ORDERED:
5337 str = "ord";
5338 break;
5339 case UNLT:
5340 str = "nge";
5341 break;
5342 case UNLE:
5343 str = "ngt";
5344 break;
5345 case UNGT:
5346 str = "nle";
5347 break;
5348 case UNGE:
5349 str = "nlt";
5350 break;
5351 case UNEQ:
5352 case LTGT:
5353 gcc_unreachable ();
5354 default:
5355 str = GET_RTX_NAME (GET_CODE (x));
5356 break;
5357 }
5358 fputs (str, file);
5359 return;
5360
5361 case 'E':
5362 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5363 return;
5364
5365 case 'e':
5366 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5367 return;
5368
5369 case 'F':
5370 if (x == CONST0_RTX (GET_MODE (x)))
5371 str = reg_names [FR_REG (0)];
5372 else if (x == CONST1_RTX (GET_MODE (x)))
5373 str = reg_names [FR_REG (1)];
5374 else
5375 {
5376 gcc_assert (GET_CODE (x) == REG);
5377 str = reg_names [REGNO (x)];
5378 }
5379 fputs (str, file);
5380 return;
5381
5382 case 'G':
5383 {
5384 long val[4];
5385 REAL_VALUE_TYPE rv;
5386 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5387 real_to_target (val, &rv, GET_MODE (x));
5388 if (GET_MODE (x) == SFmode)
5389 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5390 else if (GET_MODE (x) == DFmode)
5391 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5392 & 0xffffffff,
5393 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5394 & 0xffffffff);
5395 else
5396 output_operand_lossage ("invalid %%G mode");
5397 }
5398 return;
5399
5400 case 'I':
5401 fputs (reg_names [REGNO (x) + 1], file);
5402 return;
5403
5404 case 'J':
5405 case 'j':
5406 {
5407 unsigned int regno = REGNO (XEXP (x, 0));
5408 if (GET_CODE (x) == EQ)
5409 regno += 1;
5410 if (code == 'j')
5411 regno ^= 1;
5412 fputs (reg_names [regno], file);
5413 }
5414 return;
5415
5416 case 'O':
5417 if (MEM_VOLATILE_P (x))
5418 fputs(".acq", file);
5419 return;
5420
5421 case 'P':
5422 {
5423 HOST_WIDE_INT value;
5424
5425 switch (GET_CODE (XEXP (x, 0)))
5426 {
5427 default:
5428 return;
5429
5430 case POST_MODIFY:
5431 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5432 if (GET_CODE (x) == CONST_INT)
5433 value = INTVAL (x);
5434 else
5435 {
5436 gcc_assert (GET_CODE (x) == REG);
5437 fprintf (file, ", %s", reg_names[REGNO (x)]);
5438 return;
5439 }
5440 break;
5441
5442 case POST_INC:
5443 value = GET_MODE_SIZE (GET_MODE (x));
5444 break;
5445
5446 case POST_DEC:
5447 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5448 break;
5449 }
5450
5451 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5452 return;
5453 }
5454
5455 case 'Q':
5456 if (MEM_VOLATILE_P (x))
5457 fputs(".rel", file);
5458 return;
5459
5460 case 'R':
5461 if (x == CONST0_RTX (GET_MODE (x)))
5462 fputs(".s", file);
5463 else if (x == CONST1_RTX (GET_MODE (x)))
5464 fputs(".d", file);
5465 else if (x == CONST2_RTX (GET_MODE (x)))
5466 ;
5467 else
5468 output_operand_lossage ("invalid %%R value");
5469 return;
5470
5471 case 'S':
5472 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5473 return;
5474
5475 case 'T':
5476 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5477 {
5478 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5479 return;
5480 }
5481 break;
5482
5483 case 'U':
5484 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5485 {
5486 const char *prefix = "0x";
5487 if (INTVAL (x) & 0x80000000)
5488 {
5489 fprintf (file, "0xffffffff");
5490 prefix = "";
5491 }
5492 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5493 return;
5494 }
5495 break;
5496
5497 case 'X':
5498 {
5499 unsigned int regno = REGNO (x);
5500 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5501 }
5502 return;
5503
5504 case 'r':
5505 /* If this operand is the constant zero, write it as register zero.
5506 Any register, zero, or CONST_INT value is OK here. */
5507 if (GET_CODE (x) == REG)
5508 fputs (reg_names[REGNO (x)], file);
5509 else if (x == CONST0_RTX (GET_MODE (x)))
5510 fputs ("r0", file);
5511 else if (GET_CODE (x) == CONST_INT)
5512 output_addr_const (file, x);
5513 else
5514 output_operand_lossage ("invalid %%r value");
5515 return;
5516
5517 case 'v':
5518 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5519 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5520 break;
5521
5522 case '+':
5523 {
5524 const char *which;
5525
5526 /* For conditional branches, returns or calls, substitute
5527 sptk, dptk, dpnt, or spnt for %s. */
5528 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5529 if (x)
5530 {
5531 int pred_val = XINT (x, 0);
5532
5533 /* Guess top and bottom 10% statically predicted. */
5534 if (pred_val < REG_BR_PROB_BASE / 50
5535 && br_prob_note_reliable_p (x))
5536 which = ".spnt";
5537 else if (pred_val < REG_BR_PROB_BASE / 2)
5538 which = ".dpnt";
5539 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5540 || !br_prob_note_reliable_p (x))
5541 which = ".dptk";
5542 else
5543 which = ".sptk";
5544 }
5545 else if (CALL_P (current_output_insn))
5546 which = ".sptk";
5547 else
5548 which = ".dptk";
5549
5550 fputs (which, file);
5551 return;
5552 }
5553
5554 case ',':
5555 x = current_insn_predicate;
5556 if (x)
5557 {
5558 unsigned int regno = REGNO (XEXP (x, 0));
5559 if (GET_CODE (x) == EQ)
5560 regno += 1;
5561 fprintf (file, "(%s) ", reg_names [regno]);
5562 }
5563 return;
5564
5565 default:
5566 output_operand_lossage ("ia64_print_operand: unknown code");
5567 return;
5568 }
5569
5570 switch (GET_CODE (x))
5571 {
5572 /* This happens for the spill/restore instructions. */
5573 case POST_INC:
5574 case POST_DEC:
5575 case POST_MODIFY:
5576 x = XEXP (x, 0);
5577 /* ... fall through ... */
5578
5579 case REG:
5580 fputs (reg_names [REGNO (x)], file);
5581 break;
5582
5583 case MEM:
5584 {
5585 rtx addr = XEXP (x, 0);
5586 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5587 addr = XEXP (addr, 0);
5588 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5589 break;
5590 }
5591
5592 default:
5593 output_addr_const (file, x);
5594 break;
5595 }
5596
5597 return;
5598 }
5599
5600 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5601
5602 static bool
5603 ia64_print_operand_punct_valid_p (unsigned char code)
5604 {
5605 return (code == '+' || code == ',');
5606 }
5607 \f
5608 /* Compute a (partial) cost for rtx X. Return true if the complete
5609 cost has been computed, and false if subexpressions should be
5610 scanned. In either case, *TOTAL contains the cost result. */
5611 /* ??? This is incomplete. */
5612
5613 static bool
5614 ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5615 int *total, bool speed ATTRIBUTE_UNUSED)
5616 {
5617 switch (code)
5618 {
5619 case CONST_INT:
5620 switch (outer_code)
5621 {
5622 case SET:
5623 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5624 return true;
5625 case PLUS:
5626 if (satisfies_constraint_I (x))
5627 *total = 0;
5628 else if (satisfies_constraint_J (x))
5629 *total = 1;
5630 else
5631 *total = COSTS_N_INSNS (1);
5632 return true;
5633 default:
5634 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5635 *total = 0;
5636 else
5637 *total = COSTS_N_INSNS (1);
5638 return true;
5639 }
5640
5641 case CONST_DOUBLE:
5642 *total = COSTS_N_INSNS (1);
5643 return true;
5644
5645 case CONST:
5646 case SYMBOL_REF:
5647 case LABEL_REF:
5648 *total = COSTS_N_INSNS (3);
5649 return true;
5650
5651 case FMA:
5652 *total = COSTS_N_INSNS (4);
5653 return true;
5654
5655 case MULT:
5656 /* For multiplies wider than HImode, we have to go to the FPU,
5657 which normally involves copies. Plus there's the latency
5658 of the multiply itself, and the latency of the instructions to
5659 transfer integer regs to FP regs. */
5660 if (FLOAT_MODE_P (GET_MODE (x)))
5661 *total = COSTS_N_INSNS (4);
5662 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5663 *total = COSTS_N_INSNS (10);
5664 else
5665 *total = COSTS_N_INSNS (2);
5666 return true;
5667
5668 case PLUS:
5669 case MINUS:
5670 if (FLOAT_MODE_P (GET_MODE (x)))
5671 {
5672 *total = COSTS_N_INSNS (4);
5673 return true;
5674 }
5675 /* FALLTHRU */
5676
5677 case ASHIFT:
5678 case ASHIFTRT:
5679 case LSHIFTRT:
5680 *total = COSTS_N_INSNS (1);
5681 return true;
5682
5683 case DIV:
5684 case UDIV:
5685 case MOD:
5686 case UMOD:
5687 /* We make divide expensive, so that divide-by-constant will be
5688 optimized to a multiply. */
5689 *total = COSTS_N_INSNS (60);
5690 return true;
5691
5692 default:
5693 return false;
5694 }
5695 }
5696
5697 /* Calculate the cost of moving data from a register in class FROM to
5698 one in class TO, using MODE. */
5699
5700 static int
5701 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5702 reg_class_t to)
5703 {
5704 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5705 if (to == ADDL_REGS)
5706 to = GR_REGS;
5707 if (from == ADDL_REGS)
5708 from = GR_REGS;
5709
5710 /* All costs are symmetric, so reduce cases by putting the
5711 lower number class as the destination. */
5712 if (from < to)
5713 {
5714 reg_class_t tmp = to;
5715 to = from, from = tmp;
5716 }
5717
5718 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5719 so that we get secondary memory reloads. Between FR_REGS,
5720 we have to make this at least as expensive as memory_move_cost
5721 to avoid spectacularly poor register class preferencing. */
5722 if (mode == XFmode || mode == RFmode)
5723 {
5724 if (to != GR_REGS || from != GR_REGS)
5725 return memory_move_cost (mode, to, false);
5726 else
5727 return 3;
5728 }
5729
5730 switch (to)
5731 {
5732 case PR_REGS:
5733 /* Moving between PR registers takes two insns. */
5734 if (from == PR_REGS)
5735 return 3;
5736 /* Moving between PR and anything but GR is impossible. */
5737 if (from != GR_REGS)
5738 return memory_move_cost (mode, to, false);
5739 break;
5740
5741 case BR_REGS:
5742 /* Moving between BR and anything but GR is impossible. */
5743 if (from != GR_REGS && from != GR_AND_BR_REGS)
5744 return memory_move_cost (mode, to, false);
5745 break;
5746
5747 case AR_I_REGS:
5748 case AR_M_REGS:
5749 /* Moving between AR and anything but GR is impossible. */
5750 if (from != GR_REGS)
5751 return memory_move_cost (mode, to, false);
5752 break;
5753
5754 case GR_REGS:
5755 case FR_REGS:
5756 case FP_REGS:
5757 case GR_AND_FR_REGS:
5758 case GR_AND_BR_REGS:
5759 case ALL_REGS:
5760 break;
5761
5762 default:
5763 gcc_unreachable ();
5764 }
5765
5766 return 2;
5767 }
5768
5769 /* Calculate the cost of moving data of MODE from a register to or from
5770 memory. */
5771
5772 static int
5773 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5774 reg_class_t rclass,
5775 bool in ATTRIBUTE_UNUSED)
5776 {
5777 if (rclass == GENERAL_REGS
5778 || rclass == FR_REGS
5779 || rclass == FP_REGS
5780 || rclass == GR_AND_FR_REGS)
5781 return 4;
5782 else
5783 return 10;
5784 }
5785
5786 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5787 on RCLASS to use when copying X into that class. */
5788
5789 static reg_class_t
5790 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5791 {
5792 switch (rclass)
5793 {
5794 case FR_REGS:
5795 case FP_REGS:
5796 /* Don't allow volatile mem reloads into floating point registers.
5797 This is defined to force reload to choose the r/m case instead
5798 of the f/f case when reloading (set (reg fX) (mem/v)). */
5799 if (MEM_P (x) && MEM_VOLATILE_P (x))
5800 return NO_REGS;
5801
5802 /* Force all unrecognized constants into the constant pool. */
5803 if (CONSTANT_P (x))
5804 return NO_REGS;
5805 break;
5806
5807 case AR_M_REGS:
5808 case AR_I_REGS:
5809 if (!OBJECT_P (x))
5810 return NO_REGS;
5811 break;
5812
5813 default:
5814 break;
5815 }
5816
5817 return rclass;
5818 }
5819
5820 /* This function returns the register class required for a secondary
5821 register when copying between one of the registers in RCLASS, and X,
5822 using MODE. A return value of NO_REGS means that no secondary register
5823 is required. */
5824
5825 enum reg_class
5826 ia64_secondary_reload_class (enum reg_class rclass,
5827 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5828 {
5829 int regno = -1;
5830
5831 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5832 regno = true_regnum (x);
5833
5834 switch (rclass)
5835 {
5836 case BR_REGS:
5837 case AR_M_REGS:
5838 case AR_I_REGS:
5839 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5840 interaction. We end up with two pseudos with overlapping lifetimes
5841 both of which are equiv to the same constant, and both which need
5842 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5843 changes depending on the path length, which means the qty_first_reg
5844 check in make_regs_eqv can give different answers at different times.
5845 At some point I'll probably need a reload_indi pattern to handle
5846 this.
5847
5848 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5849 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5850 non-general registers for good measure. */
5851 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5852 return GR_REGS;
5853
5854 /* This is needed if a pseudo used as a call_operand gets spilled to a
5855 stack slot. */
5856 if (GET_CODE (x) == MEM)
5857 return GR_REGS;
5858 break;
5859
5860 case FR_REGS:
5861 case FP_REGS:
5862 /* Need to go through general registers to get to other class regs. */
5863 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5864 return GR_REGS;
5865
5866 /* This can happen when a paradoxical subreg is an operand to the
5867 muldi3 pattern. */
5868 /* ??? This shouldn't be necessary after instruction scheduling is
5869 enabled, because paradoxical subregs are not accepted by
5870 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5871 stop the paradoxical subreg stupidity in the *_operand functions
5872 in recog.c. */
5873 if (GET_CODE (x) == MEM
5874 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5875 || GET_MODE (x) == QImode))
5876 return GR_REGS;
5877
5878 /* This can happen because of the ior/and/etc patterns that accept FP
5879 registers as operands. If the third operand is a constant, then it
5880 needs to be reloaded into a FP register. */
5881 if (GET_CODE (x) == CONST_INT)
5882 return GR_REGS;
5883
5884 /* This can happen because of register elimination in a muldi3 insn.
5885 E.g. `26107 * (unsigned long)&u'. */
5886 if (GET_CODE (x) == PLUS)
5887 return GR_REGS;
5888 break;
5889
5890 case PR_REGS:
5891 /* ??? This happens if we cse/gcse a BImode value across a call,
5892 and the function has a nonlocal goto. This is because global
5893 does not allocate call crossing pseudos to hard registers when
5894 crtl->has_nonlocal_goto is true. This is relatively
5895 common for C++ programs that use exceptions. To reproduce,
5896 return NO_REGS and compile libstdc++. */
5897 if (GET_CODE (x) == MEM)
5898 return GR_REGS;
5899
5900 /* This can happen when we take a BImode subreg of a DImode value,
5901 and that DImode value winds up in some non-GR register. */
5902 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5903 return GR_REGS;
5904 break;
5905
5906 default:
5907 break;
5908 }
5909
5910 return NO_REGS;
5911 }
5912
5913 \f
5914 /* Implement targetm.unspec_may_trap_p hook. */
5915 static int
5916 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5917 {
5918 switch (XINT (x, 1))
5919 {
5920 case UNSPEC_LDA:
5921 case UNSPEC_LDS:
5922 case UNSPEC_LDSA:
5923 case UNSPEC_LDCCLR:
5924 case UNSPEC_CHKACLR:
5925 case UNSPEC_CHKS:
5926 /* These unspecs are just wrappers. */
5927 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5928 }
5929
5930 return default_unspec_may_trap_p (x, flags);
5931 }
5932
5933 \f
5934 /* Parse the -mfixed-range= option string. */
5935
5936 static void
5937 fix_range (const char *const_str)
5938 {
5939 int i, first, last;
5940 char *str, *dash, *comma;
5941
5942 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5943 REG2 are either register names or register numbers. The effect
5944 of this option is to mark the registers in the range from REG1 to
5945 REG2 as ``fixed'' so they won't be used by the compiler. This is
5946 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5947
5948 i = strlen (const_str);
5949 str = (char *) alloca (i + 1);
5950 memcpy (str, const_str, i + 1);
5951
5952 while (1)
5953 {
5954 dash = strchr (str, '-');
5955 if (!dash)
5956 {
5957 warning (0, "value of -mfixed-range must have form REG1-REG2");
5958 return;
5959 }
5960 *dash = '\0';
5961
5962 comma = strchr (dash + 1, ',');
5963 if (comma)
5964 *comma = '\0';
5965
5966 first = decode_reg_name (str);
5967 if (first < 0)
5968 {
5969 warning (0, "unknown register name: %s", str);
5970 return;
5971 }
5972
5973 last = decode_reg_name (dash + 1);
5974 if (last < 0)
5975 {
5976 warning (0, "unknown register name: %s", dash + 1);
5977 return;
5978 }
5979
5980 *dash = '-';
5981
5982 if (first > last)
5983 {
5984 warning (0, "%s-%s is an empty range", str, dash + 1);
5985 return;
5986 }
5987
5988 for (i = first; i <= last; ++i)
5989 fixed_regs[i] = call_used_regs[i] = 1;
5990
5991 if (!comma)
5992 break;
5993
5994 *comma = ',';
5995 str = comma + 1;
5996 }
5997 }
5998
5999 /* Implement TARGET_OPTION_OVERRIDE. */
6000
6001 static void
6002 ia64_option_override (void)
6003 {
6004 unsigned int i;
6005 cl_deferred_option *opt;
6006 vec<cl_deferred_option> *v
6007 = (vec<cl_deferred_option> *) ia64_deferred_options;
6008
6009 if (v)
6010 FOR_EACH_VEC_ELT (*v, i, opt)
6011 {
6012 switch (opt->opt_index)
6013 {
6014 case OPT_mfixed_range_:
6015 fix_range (opt->arg);
6016 break;
6017
6018 default:
6019 gcc_unreachable ();
6020 }
6021 }
6022
6023 if (TARGET_AUTO_PIC)
6024 target_flags |= MASK_CONST_GP;
6025
6026 /* Numerous experiment shows that IRA based loop pressure
6027 calculation works better for RTL loop invariant motion on targets
6028 with enough (>= 32) registers. It is an expensive optimization.
6029 So it is on only for peak performance. */
6030 if (optimize >= 3)
6031 flag_ira_loop_pressure = 1;
6032
6033
6034 ia64_section_threshold = (global_options_set.x_g_switch_value
6035 ? g_switch_value
6036 : IA64_DEFAULT_GVALUE);
6037
6038 init_machine_status = ia64_init_machine_status;
6039
6040 if (align_functions <= 0)
6041 align_functions = 64;
6042 if (align_loops <= 0)
6043 align_loops = 32;
6044 if (TARGET_ABI_OPEN_VMS)
6045 flag_no_common = 1;
6046
6047 ia64_override_options_after_change();
6048 }
6049
6050 /* Implement targetm.override_options_after_change. */
6051
6052 static void
6053 ia64_override_options_after_change (void)
6054 {
6055 if (optimize >= 3
6056 && !global_options_set.x_flag_selective_scheduling
6057 && !global_options_set.x_flag_selective_scheduling2)
6058 {
6059 flag_selective_scheduling2 = 1;
6060 flag_sel_sched_pipelining = 1;
6061 }
6062 if (mflag_sched_control_spec == 2)
6063 {
6064 /* Control speculation is on by default for the selective scheduler,
6065 but not for the Haifa scheduler. */
6066 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6067 }
6068 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6069 {
6070 /* FIXME: remove this when we'd implement breaking autoinsns as
6071 a transformation. */
6072 flag_auto_inc_dec = 0;
6073 }
6074 }
6075
6076 /* Initialize the record of emitted frame related registers. */
6077
6078 void ia64_init_expanders (void)
6079 {
6080 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6081 }
6082
6083 static struct machine_function *
6084 ia64_init_machine_status (void)
6085 {
6086 return ggc_cleared_alloc<machine_function> ();
6087 }
6088 \f
6089 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6090 static enum attr_type ia64_safe_type (rtx_insn *);
6091
6092 static enum attr_itanium_class
6093 ia64_safe_itanium_class (rtx_insn *insn)
6094 {
6095 if (recog_memoized (insn) >= 0)
6096 return get_attr_itanium_class (insn);
6097 else if (DEBUG_INSN_P (insn))
6098 return ITANIUM_CLASS_IGNORE;
6099 else
6100 return ITANIUM_CLASS_UNKNOWN;
6101 }
6102
6103 static enum attr_type
6104 ia64_safe_type (rtx_insn *insn)
6105 {
6106 if (recog_memoized (insn) >= 0)
6107 return get_attr_type (insn);
6108 else
6109 return TYPE_UNKNOWN;
6110 }
6111 \f
6112 /* The following collection of routines emit instruction group stop bits as
6113 necessary to avoid dependencies. */
6114
6115 /* Need to track some additional registers as far as serialization is
6116 concerned so we can properly handle br.call and br.ret. We could
6117 make these registers visible to gcc, but since these registers are
6118 never explicitly used in gcc generated code, it seems wasteful to
6119 do so (plus it would make the call and return patterns needlessly
6120 complex). */
6121 #define REG_RP (BR_REG (0))
6122 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6123 /* This is used for volatile asms which may require a stop bit immediately
6124 before and after them. */
6125 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6126 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6127 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6128
6129 /* For each register, we keep track of how it has been written in the
6130 current instruction group.
6131
6132 If a register is written unconditionally (no qualifying predicate),
6133 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6134
6135 If a register is written if its qualifying predicate P is true, we
6136 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6137 may be written again by the complement of P (P^1) and when this happens,
6138 WRITE_COUNT gets set to 2.
6139
6140 The result of this is that whenever an insn attempts to write a register
6141 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6142
6143 If a predicate register is written by a floating-point insn, we set
6144 WRITTEN_BY_FP to true.
6145
6146 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6147 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6148
6149 #if GCC_VERSION >= 4000
6150 #define RWS_FIELD_TYPE __extension__ unsigned short
6151 #else
6152 #define RWS_FIELD_TYPE unsigned int
6153 #endif
6154 struct reg_write_state
6155 {
6156 RWS_FIELD_TYPE write_count : 2;
6157 RWS_FIELD_TYPE first_pred : 10;
6158 RWS_FIELD_TYPE written_by_fp : 1;
6159 RWS_FIELD_TYPE written_by_and : 1;
6160 RWS_FIELD_TYPE written_by_or : 1;
6161 };
6162
6163 /* Cumulative info for the current instruction group. */
6164 struct reg_write_state rws_sum[NUM_REGS];
6165 #ifdef ENABLE_CHECKING
6166 /* Bitmap whether a register has been written in the current insn. */
6167 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6168 / HOST_BITS_PER_WIDEST_FAST_INT];
6169
6170 static inline void
6171 rws_insn_set (int regno)
6172 {
6173 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6174 SET_HARD_REG_BIT (rws_insn, regno);
6175 }
6176
6177 static inline int
6178 rws_insn_test (int regno)
6179 {
6180 return TEST_HARD_REG_BIT (rws_insn, regno);
6181 }
6182 #else
6183 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6184 unsigned char rws_insn[2];
6185
6186 static inline void
6187 rws_insn_set (int regno)
6188 {
6189 if (regno == REG_AR_CFM)
6190 rws_insn[0] = 1;
6191 else if (regno == REG_VOLATILE)
6192 rws_insn[1] = 1;
6193 }
6194
6195 static inline int
6196 rws_insn_test (int regno)
6197 {
6198 if (regno == REG_AR_CFM)
6199 return rws_insn[0];
6200 if (regno == REG_VOLATILE)
6201 return rws_insn[1];
6202 return 0;
6203 }
6204 #endif
6205
6206 /* Indicates whether this is the first instruction after a stop bit,
6207 in which case we don't need another stop bit. Without this,
6208 ia64_variable_issue will die when scheduling an alloc. */
6209 static int first_instruction;
6210
6211 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6212 RTL for one instruction. */
6213 struct reg_flags
6214 {
6215 unsigned int is_write : 1; /* Is register being written? */
6216 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6217 unsigned int is_branch : 1; /* Is register used as part of a branch? */
6218 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6219 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
6220 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
6221 };
6222
6223 static void rws_update (int, struct reg_flags, int);
6224 static int rws_access_regno (int, struct reg_flags, int);
6225 static int rws_access_reg (rtx, struct reg_flags, int);
6226 static void update_set_flags (rtx, struct reg_flags *);
6227 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6228 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6229 static void init_insn_group_barriers (void);
6230 static int group_barrier_needed (rtx_insn *);
6231 static int safe_group_barrier_needed (rtx_insn *);
6232 static int in_safe_group_barrier;
6233
6234 /* Update *RWS for REGNO, which is being written by the current instruction,
6235 with predicate PRED, and associated register flags in FLAGS. */
6236
6237 static void
6238 rws_update (int regno, struct reg_flags flags, int pred)
6239 {
6240 if (pred)
6241 rws_sum[regno].write_count++;
6242 else
6243 rws_sum[regno].write_count = 2;
6244 rws_sum[regno].written_by_fp |= flags.is_fp;
6245 /* ??? Not tracking and/or across differing predicates. */
6246 rws_sum[regno].written_by_and = flags.is_and;
6247 rws_sum[regno].written_by_or = flags.is_or;
6248 rws_sum[regno].first_pred = pred;
6249 }
6250
6251 /* Handle an access to register REGNO of type FLAGS using predicate register
6252 PRED. Update rws_sum array. Return 1 if this access creates
6253 a dependency with an earlier instruction in the same group. */
6254
6255 static int
6256 rws_access_regno (int regno, struct reg_flags flags, int pred)
6257 {
6258 int need_barrier = 0;
6259
6260 gcc_assert (regno < NUM_REGS);
6261
6262 if (! PR_REGNO_P (regno))
6263 flags.is_and = flags.is_or = 0;
6264
6265 if (flags.is_write)
6266 {
6267 int write_count;
6268
6269 rws_insn_set (regno);
6270 write_count = rws_sum[regno].write_count;
6271
6272 switch (write_count)
6273 {
6274 case 0:
6275 /* The register has not been written yet. */
6276 if (!in_safe_group_barrier)
6277 rws_update (regno, flags, pred);
6278 break;
6279
6280 case 1:
6281 /* The register has been written via a predicate. Treat
6282 it like a unconditional write and do not try to check
6283 for complementary pred reg in earlier write. */
6284 if (flags.is_and && rws_sum[regno].written_by_and)
6285 ;
6286 else if (flags.is_or && rws_sum[regno].written_by_or)
6287 ;
6288 else
6289 need_barrier = 1;
6290 if (!in_safe_group_barrier)
6291 rws_update (regno, flags, pred);
6292 break;
6293
6294 case 2:
6295 /* The register has been unconditionally written already. We
6296 need a barrier. */
6297 if (flags.is_and && rws_sum[regno].written_by_and)
6298 ;
6299 else if (flags.is_or && rws_sum[regno].written_by_or)
6300 ;
6301 else
6302 need_barrier = 1;
6303 if (!in_safe_group_barrier)
6304 {
6305 rws_sum[regno].written_by_and = flags.is_and;
6306 rws_sum[regno].written_by_or = flags.is_or;
6307 }
6308 break;
6309
6310 default:
6311 gcc_unreachable ();
6312 }
6313 }
6314 else
6315 {
6316 if (flags.is_branch)
6317 {
6318 /* Branches have several RAW exceptions that allow to avoid
6319 barriers. */
6320
6321 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6322 /* RAW dependencies on branch regs are permissible as long
6323 as the writer is a non-branch instruction. Since we
6324 never generate code that uses a branch register written
6325 by a branch instruction, handling this case is
6326 easy. */
6327 return 0;
6328
6329 if (REGNO_REG_CLASS (regno) == PR_REGS
6330 && ! rws_sum[regno].written_by_fp)
6331 /* The predicates of a branch are available within the
6332 same insn group as long as the predicate was written by
6333 something other than a floating-point instruction. */
6334 return 0;
6335 }
6336
6337 if (flags.is_and && rws_sum[regno].written_by_and)
6338 return 0;
6339 if (flags.is_or && rws_sum[regno].written_by_or)
6340 return 0;
6341
6342 switch (rws_sum[regno].write_count)
6343 {
6344 case 0:
6345 /* The register has not been written yet. */
6346 break;
6347
6348 case 1:
6349 /* The register has been written via a predicate, assume we
6350 need a barrier (don't check for complementary regs). */
6351 need_barrier = 1;
6352 break;
6353
6354 case 2:
6355 /* The register has been unconditionally written already. We
6356 need a barrier. */
6357 need_barrier = 1;
6358 break;
6359
6360 default:
6361 gcc_unreachable ();
6362 }
6363 }
6364
6365 return need_barrier;
6366 }
6367
6368 static int
6369 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6370 {
6371 int regno = REGNO (reg);
6372 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6373
6374 if (n == 1)
6375 return rws_access_regno (regno, flags, pred);
6376 else
6377 {
6378 int need_barrier = 0;
6379 while (--n >= 0)
6380 need_barrier |= rws_access_regno (regno + n, flags, pred);
6381 return need_barrier;
6382 }
6383 }
6384
6385 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6386 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6387
6388 static void
6389 update_set_flags (rtx x, struct reg_flags *pflags)
6390 {
6391 rtx src = SET_SRC (x);
6392
6393 switch (GET_CODE (src))
6394 {
6395 case CALL:
6396 return;
6397
6398 case IF_THEN_ELSE:
6399 /* There are four cases here:
6400 (1) The destination is (pc), in which case this is a branch,
6401 nothing here applies.
6402 (2) The destination is ar.lc, in which case this is a
6403 doloop_end_internal,
6404 (3) The destination is an fp register, in which case this is
6405 an fselect instruction.
6406 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6407 this is a check load.
6408 In all cases, nothing we do in this function applies. */
6409 return;
6410
6411 default:
6412 if (COMPARISON_P (src)
6413 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6414 /* Set pflags->is_fp to 1 so that we know we're dealing
6415 with a floating point comparison when processing the
6416 destination of the SET. */
6417 pflags->is_fp = 1;
6418
6419 /* Discover if this is a parallel comparison. We only handle
6420 and.orcm and or.andcm at present, since we must retain a
6421 strict inverse on the predicate pair. */
6422 else if (GET_CODE (src) == AND)
6423 pflags->is_and = 1;
6424 else if (GET_CODE (src) == IOR)
6425 pflags->is_or = 1;
6426
6427 break;
6428 }
6429 }
6430
6431 /* Subroutine of rtx_needs_barrier; this function determines whether the
6432 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6433 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6434 for this insn. */
6435
6436 static int
6437 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6438 {
6439 int need_barrier = 0;
6440 rtx dst;
6441 rtx src = SET_SRC (x);
6442
6443 if (GET_CODE (src) == CALL)
6444 /* We don't need to worry about the result registers that
6445 get written by subroutine call. */
6446 return rtx_needs_barrier (src, flags, pred);
6447 else if (SET_DEST (x) == pc_rtx)
6448 {
6449 /* X is a conditional branch. */
6450 /* ??? This seems redundant, as the caller sets this bit for
6451 all JUMP_INSNs. */
6452 if (!ia64_spec_check_src_p (src))
6453 flags.is_branch = 1;
6454 return rtx_needs_barrier (src, flags, pred);
6455 }
6456
6457 if (ia64_spec_check_src_p (src))
6458 /* Avoid checking one register twice (in condition
6459 and in 'then' section) for ldc pattern. */
6460 {
6461 gcc_assert (REG_P (XEXP (src, 2)));
6462 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6463
6464 /* We process MEM below. */
6465 src = XEXP (src, 1);
6466 }
6467
6468 need_barrier |= rtx_needs_barrier (src, flags, pred);
6469
6470 dst = SET_DEST (x);
6471 if (GET_CODE (dst) == ZERO_EXTRACT)
6472 {
6473 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6474 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6475 }
6476 return need_barrier;
6477 }
6478
6479 /* Handle an access to rtx X of type FLAGS using predicate register
6480 PRED. Return 1 if this access creates a dependency with an earlier
6481 instruction in the same group. */
6482
6483 static int
6484 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6485 {
6486 int i, j;
6487 int is_complemented = 0;
6488 int need_barrier = 0;
6489 const char *format_ptr;
6490 struct reg_flags new_flags;
6491 rtx cond;
6492
6493 if (! x)
6494 return 0;
6495
6496 new_flags = flags;
6497
6498 switch (GET_CODE (x))
6499 {
6500 case SET:
6501 update_set_flags (x, &new_flags);
6502 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6503 if (GET_CODE (SET_SRC (x)) != CALL)
6504 {
6505 new_flags.is_write = 1;
6506 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6507 }
6508 break;
6509
6510 case CALL:
6511 new_flags.is_write = 0;
6512 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6513
6514 /* Avoid multiple register writes, in case this is a pattern with
6515 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6516 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6517 {
6518 new_flags.is_write = 1;
6519 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6520 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6521 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6522 }
6523 break;
6524
6525 case COND_EXEC:
6526 /* X is a predicated instruction. */
6527
6528 cond = COND_EXEC_TEST (x);
6529 gcc_assert (!pred);
6530 need_barrier = rtx_needs_barrier (cond, flags, 0);
6531
6532 if (GET_CODE (cond) == EQ)
6533 is_complemented = 1;
6534 cond = XEXP (cond, 0);
6535 gcc_assert (GET_CODE (cond) == REG
6536 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6537 pred = REGNO (cond);
6538 if (is_complemented)
6539 ++pred;
6540
6541 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6542 return need_barrier;
6543
6544 case CLOBBER:
6545 case USE:
6546 /* Clobber & use are for earlier compiler-phases only. */
6547 break;
6548
6549 case ASM_OPERANDS:
6550 case ASM_INPUT:
6551 /* We always emit stop bits for traditional asms. We emit stop bits
6552 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6553 if (GET_CODE (x) != ASM_OPERANDS
6554 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6555 {
6556 /* Avoid writing the register multiple times if we have multiple
6557 asm outputs. This avoids a failure in rws_access_reg. */
6558 if (! rws_insn_test (REG_VOLATILE))
6559 {
6560 new_flags.is_write = 1;
6561 rws_access_regno (REG_VOLATILE, new_flags, pred);
6562 }
6563 return 1;
6564 }
6565
6566 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6567 We cannot just fall through here since then we would be confused
6568 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6569 traditional asms unlike their normal usage. */
6570
6571 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6572 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6573 need_barrier = 1;
6574 break;
6575
6576 case PARALLEL:
6577 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6578 {
6579 rtx pat = XVECEXP (x, 0, i);
6580 switch (GET_CODE (pat))
6581 {
6582 case SET:
6583 update_set_flags (pat, &new_flags);
6584 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6585 break;
6586
6587 case USE:
6588 case CALL:
6589 case ASM_OPERANDS:
6590 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6591 break;
6592
6593 case CLOBBER:
6594 if (REG_P (XEXP (pat, 0))
6595 && extract_asm_operands (x) != NULL_RTX
6596 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6597 {
6598 new_flags.is_write = 1;
6599 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6600 new_flags, pred);
6601 new_flags = flags;
6602 }
6603 break;
6604
6605 case RETURN:
6606 break;
6607
6608 default:
6609 gcc_unreachable ();
6610 }
6611 }
6612 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6613 {
6614 rtx pat = XVECEXP (x, 0, i);
6615 if (GET_CODE (pat) == SET)
6616 {
6617 if (GET_CODE (SET_SRC (pat)) != CALL)
6618 {
6619 new_flags.is_write = 1;
6620 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6621 pred);
6622 }
6623 }
6624 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6625 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6626 }
6627 break;
6628
6629 case SUBREG:
6630 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6631 break;
6632 case REG:
6633 if (REGNO (x) == AR_UNAT_REGNUM)
6634 {
6635 for (i = 0; i < 64; ++i)
6636 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6637 }
6638 else
6639 need_barrier = rws_access_reg (x, flags, pred);
6640 break;
6641
6642 case MEM:
6643 /* Find the regs used in memory address computation. */
6644 new_flags.is_write = 0;
6645 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6646 break;
6647
6648 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6649 case SYMBOL_REF: case LABEL_REF: case CONST:
6650 break;
6651
6652 /* Operators with side-effects. */
6653 case POST_INC: case POST_DEC:
6654 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6655
6656 new_flags.is_write = 0;
6657 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6658 new_flags.is_write = 1;
6659 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6660 break;
6661
6662 case POST_MODIFY:
6663 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6664
6665 new_flags.is_write = 0;
6666 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6667 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6668 new_flags.is_write = 1;
6669 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6670 break;
6671
6672 /* Handle common unary and binary ops for efficiency. */
6673 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6674 case MOD: case UDIV: case UMOD: case AND: case IOR:
6675 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6676 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6677 case NE: case EQ: case GE: case GT: case LE:
6678 case LT: case GEU: case GTU: case LEU: case LTU:
6679 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6680 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6681 break;
6682
6683 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6684 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6685 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6686 case SQRT: case FFS: case POPCOUNT:
6687 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6688 break;
6689
6690 case VEC_SELECT:
6691 /* VEC_SELECT's second argument is a PARALLEL with integers that
6692 describe the elements selected. On ia64, those integers are
6693 always constants. Avoid walking the PARALLEL so that we don't
6694 get confused with "normal" parallels and then die. */
6695 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6696 break;
6697
6698 case UNSPEC:
6699 switch (XINT (x, 1))
6700 {
6701 case UNSPEC_LTOFF_DTPMOD:
6702 case UNSPEC_LTOFF_DTPREL:
6703 case UNSPEC_DTPREL:
6704 case UNSPEC_LTOFF_TPREL:
6705 case UNSPEC_TPREL:
6706 case UNSPEC_PRED_REL_MUTEX:
6707 case UNSPEC_PIC_CALL:
6708 case UNSPEC_MF:
6709 case UNSPEC_FETCHADD_ACQ:
6710 case UNSPEC_FETCHADD_REL:
6711 case UNSPEC_BSP_VALUE:
6712 case UNSPEC_FLUSHRS:
6713 case UNSPEC_BUNDLE_SELECTOR:
6714 break;
6715
6716 case UNSPEC_GR_SPILL:
6717 case UNSPEC_GR_RESTORE:
6718 {
6719 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6720 HOST_WIDE_INT bit = (offset >> 3) & 63;
6721
6722 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6723 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6724 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6725 new_flags, pred);
6726 break;
6727 }
6728
6729 case UNSPEC_FR_SPILL:
6730 case UNSPEC_FR_RESTORE:
6731 case UNSPEC_GETF_EXP:
6732 case UNSPEC_SETF_EXP:
6733 case UNSPEC_ADDP4:
6734 case UNSPEC_FR_SQRT_RECIP_APPROX:
6735 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6736 case UNSPEC_LDA:
6737 case UNSPEC_LDS:
6738 case UNSPEC_LDS_A:
6739 case UNSPEC_LDSA:
6740 case UNSPEC_CHKACLR:
6741 case UNSPEC_CHKS:
6742 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6743 break;
6744
6745 case UNSPEC_FR_RECIP_APPROX:
6746 case UNSPEC_SHRP:
6747 case UNSPEC_COPYSIGN:
6748 case UNSPEC_FR_RECIP_APPROX_RES:
6749 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6750 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6751 break;
6752
6753 case UNSPEC_CMPXCHG_ACQ:
6754 case UNSPEC_CMPXCHG_REL:
6755 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6756 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6757 break;
6758
6759 default:
6760 gcc_unreachable ();
6761 }
6762 break;
6763
6764 case UNSPEC_VOLATILE:
6765 switch (XINT (x, 1))
6766 {
6767 case UNSPECV_ALLOC:
6768 /* Alloc must always be the first instruction of a group.
6769 We force this by always returning true. */
6770 /* ??? We might get better scheduling if we explicitly check for
6771 input/local/output register dependencies, and modify the
6772 scheduler so that alloc is always reordered to the start of
6773 the current group. We could then eliminate all of the
6774 first_instruction code. */
6775 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6776
6777 new_flags.is_write = 1;
6778 rws_access_regno (REG_AR_CFM, new_flags, pred);
6779 return 1;
6780
6781 case UNSPECV_SET_BSP:
6782 case UNSPECV_PROBE_STACK_RANGE:
6783 need_barrier = 1;
6784 break;
6785
6786 case UNSPECV_BLOCKAGE:
6787 case UNSPECV_INSN_GROUP_BARRIER:
6788 case UNSPECV_BREAK:
6789 case UNSPECV_PSAC_ALL:
6790 case UNSPECV_PSAC_NORMAL:
6791 return 0;
6792
6793 case UNSPECV_PROBE_STACK_ADDRESS:
6794 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6795 break;
6796
6797 default:
6798 gcc_unreachable ();
6799 }
6800 break;
6801
6802 case RETURN:
6803 new_flags.is_write = 0;
6804 need_barrier = rws_access_regno (REG_RP, flags, pred);
6805 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6806
6807 new_flags.is_write = 1;
6808 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6809 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6810 break;
6811
6812 default:
6813 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6814 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6815 switch (format_ptr[i])
6816 {
6817 case '0': /* unused field */
6818 case 'i': /* integer */
6819 case 'n': /* note */
6820 case 'w': /* wide integer */
6821 case 's': /* pointer to string */
6822 case 'S': /* optional pointer to string */
6823 break;
6824
6825 case 'e':
6826 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6827 need_barrier = 1;
6828 break;
6829
6830 case 'E':
6831 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6832 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6833 need_barrier = 1;
6834 break;
6835
6836 default:
6837 gcc_unreachable ();
6838 }
6839 break;
6840 }
6841 return need_barrier;
6842 }
6843
6844 /* Clear out the state for group_barrier_needed at the start of a
6845 sequence of insns. */
6846
6847 static void
6848 init_insn_group_barriers (void)
6849 {
6850 memset (rws_sum, 0, sizeof (rws_sum));
6851 first_instruction = 1;
6852 }
6853
6854 /* Given the current state, determine whether a group barrier (a stop bit) is
6855 necessary before INSN. Return nonzero if so. This modifies the state to
6856 include the effects of INSN as a side-effect. */
6857
6858 static int
6859 group_barrier_needed (rtx_insn *insn)
6860 {
6861 rtx pat;
6862 int need_barrier = 0;
6863 struct reg_flags flags;
6864
6865 memset (&flags, 0, sizeof (flags));
6866 switch (GET_CODE (insn))
6867 {
6868 case NOTE:
6869 case DEBUG_INSN:
6870 break;
6871
6872 case BARRIER:
6873 /* A barrier doesn't imply an instruction group boundary. */
6874 break;
6875
6876 case CODE_LABEL:
6877 memset (rws_insn, 0, sizeof (rws_insn));
6878 return 1;
6879
6880 case CALL_INSN:
6881 flags.is_branch = 1;
6882 flags.is_sibcall = SIBLING_CALL_P (insn);
6883 memset (rws_insn, 0, sizeof (rws_insn));
6884
6885 /* Don't bundle a call following another call. */
6886 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6887 {
6888 need_barrier = 1;
6889 break;
6890 }
6891
6892 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6893 break;
6894
6895 case JUMP_INSN:
6896 if (!ia64_spec_check_p (insn))
6897 flags.is_branch = 1;
6898
6899 /* Don't bundle a jump following a call. */
6900 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6901 {
6902 need_barrier = 1;
6903 break;
6904 }
6905 /* FALLTHRU */
6906
6907 case INSN:
6908 if (GET_CODE (PATTERN (insn)) == USE
6909 || GET_CODE (PATTERN (insn)) == CLOBBER)
6910 /* Don't care about USE and CLOBBER "insns"---those are used to
6911 indicate to the optimizer that it shouldn't get rid of
6912 certain operations. */
6913 break;
6914
6915 pat = PATTERN (insn);
6916
6917 /* Ug. Hack hacks hacked elsewhere. */
6918 switch (recog_memoized (insn))
6919 {
6920 /* We play dependency tricks with the epilogue in order
6921 to get proper schedules. Undo this for dv analysis. */
6922 case CODE_FOR_epilogue_deallocate_stack:
6923 case CODE_FOR_prologue_allocate_stack:
6924 pat = XVECEXP (pat, 0, 0);
6925 break;
6926
6927 /* The pattern we use for br.cloop confuses the code above.
6928 The second element of the vector is representative. */
6929 case CODE_FOR_doloop_end_internal:
6930 pat = XVECEXP (pat, 0, 1);
6931 break;
6932
6933 /* Doesn't generate code. */
6934 case CODE_FOR_pred_rel_mutex:
6935 case CODE_FOR_prologue_use:
6936 return 0;
6937
6938 default:
6939 break;
6940 }
6941
6942 memset (rws_insn, 0, sizeof (rws_insn));
6943 need_barrier = rtx_needs_barrier (pat, flags, 0);
6944
6945 /* Check to see if the previous instruction was a volatile
6946 asm. */
6947 if (! need_barrier)
6948 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6949
6950 break;
6951
6952 default:
6953 gcc_unreachable ();
6954 }
6955
6956 if (first_instruction && important_for_bundling_p (insn))
6957 {
6958 need_barrier = 0;
6959 first_instruction = 0;
6960 }
6961
6962 return need_barrier;
6963 }
6964
6965 /* Like group_barrier_needed, but do not clobber the current state. */
6966
6967 static int
6968 safe_group_barrier_needed (rtx_insn *insn)
6969 {
6970 int saved_first_instruction;
6971 int t;
6972
6973 saved_first_instruction = first_instruction;
6974 in_safe_group_barrier = 1;
6975
6976 t = group_barrier_needed (insn);
6977
6978 first_instruction = saved_first_instruction;
6979 in_safe_group_barrier = 0;
6980
6981 return t;
6982 }
6983
6984 /* Scan the current function and insert stop bits as necessary to
6985 eliminate dependencies. This function assumes that a final
6986 instruction scheduling pass has been run which has already
6987 inserted most of the necessary stop bits. This function only
6988 inserts new ones at basic block boundaries, since these are
6989 invisible to the scheduler. */
6990
6991 static void
6992 emit_insn_group_barriers (FILE *dump)
6993 {
6994 rtx_insn *insn;
6995 rtx_insn *last_label = 0;
6996 int insns_since_last_label = 0;
6997
6998 init_insn_group_barriers ();
6999
7000 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7001 {
7002 if (LABEL_P (insn))
7003 {
7004 if (insns_since_last_label)
7005 last_label = insn;
7006 insns_since_last_label = 0;
7007 }
7008 else if (NOTE_P (insn)
7009 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
7010 {
7011 if (insns_since_last_label)
7012 last_label = insn;
7013 insns_since_last_label = 0;
7014 }
7015 else if (NONJUMP_INSN_P (insn)
7016 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7017 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7018 {
7019 init_insn_group_barriers ();
7020 last_label = 0;
7021 }
7022 else if (NONDEBUG_INSN_P (insn))
7023 {
7024 insns_since_last_label = 1;
7025
7026 if (group_barrier_needed (insn))
7027 {
7028 if (last_label)
7029 {
7030 if (dump)
7031 fprintf (dump, "Emitting stop before label %d\n",
7032 INSN_UID (last_label));
7033 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7034 insn = last_label;
7035
7036 init_insn_group_barriers ();
7037 last_label = 0;
7038 }
7039 }
7040 }
7041 }
7042 }
7043
7044 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7045 This function has to emit all necessary group barriers. */
7046
7047 static void
7048 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7049 {
7050 rtx_insn *insn;
7051
7052 init_insn_group_barriers ();
7053
7054 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7055 {
7056 if (BARRIER_P (insn))
7057 {
7058 rtx_insn *last = prev_active_insn (insn);
7059
7060 if (! last)
7061 continue;
7062 if (JUMP_TABLE_DATA_P (last))
7063 last = prev_active_insn (last);
7064 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7065 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7066
7067 init_insn_group_barriers ();
7068 }
7069 else if (NONDEBUG_INSN_P (insn))
7070 {
7071 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7072 init_insn_group_barriers ();
7073 else if (group_barrier_needed (insn))
7074 {
7075 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7076 init_insn_group_barriers ();
7077 group_barrier_needed (insn);
7078 }
7079 }
7080 }
7081 }
7082
7083 \f
7084
7085 /* Instruction scheduling support. */
7086
7087 #define NR_BUNDLES 10
7088
7089 /* A list of names of all available bundles. */
7090
7091 static const char *bundle_name [NR_BUNDLES] =
7092 {
7093 ".mii",
7094 ".mmi",
7095 ".mfi",
7096 ".mmf",
7097 #if NR_BUNDLES == 10
7098 ".bbb",
7099 ".mbb",
7100 #endif
7101 ".mib",
7102 ".mmb",
7103 ".mfb",
7104 ".mlx"
7105 };
7106
7107 /* Nonzero if we should insert stop bits into the schedule. */
7108
7109 int ia64_final_schedule = 0;
7110
7111 /* Codes of the corresponding queried units: */
7112
7113 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7114 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7115
7116 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7117 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7118
7119 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7120
7121 /* The following variable value is an insn group barrier. */
7122
7123 static rtx_insn *dfa_stop_insn;
7124
7125 /* The following variable value is the last issued insn. */
7126
7127 static rtx_insn *last_scheduled_insn;
7128
7129 /* The following variable value is pointer to a DFA state used as
7130 temporary variable. */
7131
7132 static state_t temp_dfa_state = NULL;
7133
7134 /* The following variable value is DFA state after issuing the last
7135 insn. */
7136
7137 static state_t prev_cycle_state = NULL;
7138
7139 /* The following array element values are TRUE if the corresponding
7140 insn requires to add stop bits before it. */
7141
7142 static char *stops_p = NULL;
7143
7144 /* The following variable is used to set up the mentioned above array. */
7145
7146 static int stop_before_p = 0;
7147
7148 /* The following variable value is length of the arrays `clocks' and
7149 `add_cycles'. */
7150
7151 static int clocks_length;
7152
7153 /* The following variable value is number of data speculations in progress. */
7154 static int pending_data_specs = 0;
7155
7156 /* Number of memory references on current and three future processor cycles. */
7157 static char mem_ops_in_group[4];
7158
7159 /* Number of current processor cycle (from scheduler's point of view). */
7160 static int current_cycle;
7161
7162 static rtx ia64_single_set (rtx_insn *);
7163 static void ia64_emit_insn_before (rtx, rtx);
7164
7165 /* Map a bundle number to its pseudo-op. */
7166
7167 const char *
7168 get_bundle_name (int b)
7169 {
7170 return bundle_name[b];
7171 }
7172
7173
7174 /* Return the maximum number of instructions a cpu can issue. */
7175
7176 static int
7177 ia64_issue_rate (void)
7178 {
7179 return 6;
7180 }
7181
7182 /* Helper function - like single_set, but look inside COND_EXEC. */
7183
7184 static rtx
7185 ia64_single_set (rtx_insn *insn)
7186 {
7187 rtx x = PATTERN (insn), ret;
7188 if (GET_CODE (x) == COND_EXEC)
7189 x = COND_EXEC_CODE (x);
7190 if (GET_CODE (x) == SET)
7191 return x;
7192
7193 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7194 Although they are not classical single set, the second set is there just
7195 to protect it from moving past FP-relative stack accesses. */
7196 switch (recog_memoized (insn))
7197 {
7198 case CODE_FOR_prologue_allocate_stack:
7199 case CODE_FOR_prologue_allocate_stack_pr:
7200 case CODE_FOR_epilogue_deallocate_stack:
7201 case CODE_FOR_epilogue_deallocate_stack_pr:
7202 ret = XVECEXP (x, 0, 0);
7203 break;
7204
7205 default:
7206 ret = single_set_2 (insn, x);
7207 break;
7208 }
7209
7210 return ret;
7211 }
7212
7213 /* Adjust the cost of a scheduling dependency.
7214 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7215 COST is the current cost, DW is dependency weakness. */
7216 static int
7217 ia64_adjust_cost_2 (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7218 int cost, dw_t dw)
7219 {
7220 enum reg_note dep_type = (enum reg_note) dep_type1;
7221 enum attr_itanium_class dep_class;
7222 enum attr_itanium_class insn_class;
7223
7224 insn_class = ia64_safe_itanium_class (insn);
7225 dep_class = ia64_safe_itanium_class (dep_insn);
7226
7227 /* Treat true memory dependencies separately. Ignore apparent true
7228 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7229 if (dep_type == REG_DEP_TRUE
7230 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7231 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7232 return 0;
7233
7234 if (dw == MIN_DEP_WEAK)
7235 /* Store and load are likely to alias, use higher cost to avoid stall. */
7236 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7237 else if (dw > MIN_DEP_WEAK)
7238 {
7239 /* Store and load are less likely to alias. */
7240 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7241 /* Assume there will be no cache conflict for floating-point data.
7242 For integer data, L1 conflict penalty is huge (17 cycles), so we
7243 never assume it will not cause a conflict. */
7244 return 0;
7245 else
7246 return cost;
7247 }
7248
7249 if (dep_type != REG_DEP_OUTPUT)
7250 return cost;
7251
7252 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7253 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7254 return 0;
7255
7256 return cost;
7257 }
7258
7259 /* Like emit_insn_before, but skip cycle_display notes.
7260 ??? When cycle display notes are implemented, update this. */
7261
7262 static void
7263 ia64_emit_insn_before (rtx insn, rtx before)
7264 {
7265 emit_insn_before (insn, before);
7266 }
7267
7268 /* The following function marks insns who produce addresses for load
7269 and store insns. Such insns will be placed into M slots because it
7270 decrease latency time for Itanium1 (see function
7271 `ia64_produce_address_p' and the DFA descriptions). */
7272
7273 static void
7274 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7275 {
7276 rtx_insn *insn, *next, *next_tail;
7277
7278 /* Before reload, which_alternative is not set, which means that
7279 ia64_safe_itanium_class will produce wrong results for (at least)
7280 move instructions. */
7281 if (!reload_completed)
7282 return;
7283
7284 next_tail = NEXT_INSN (tail);
7285 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7286 if (INSN_P (insn))
7287 insn->call = 0;
7288 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7289 if (INSN_P (insn)
7290 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7291 {
7292 sd_iterator_def sd_it;
7293 dep_t dep;
7294 bool has_mem_op_consumer_p = false;
7295
7296 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7297 {
7298 enum attr_itanium_class c;
7299
7300 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7301 continue;
7302
7303 next = DEP_CON (dep);
7304 c = ia64_safe_itanium_class (next);
7305 if ((c == ITANIUM_CLASS_ST
7306 || c == ITANIUM_CLASS_STF)
7307 && ia64_st_address_bypass_p (insn, next))
7308 {
7309 has_mem_op_consumer_p = true;
7310 break;
7311 }
7312 else if ((c == ITANIUM_CLASS_LD
7313 || c == ITANIUM_CLASS_FLD
7314 || c == ITANIUM_CLASS_FLDP)
7315 && ia64_ld_address_bypass_p (insn, next))
7316 {
7317 has_mem_op_consumer_p = true;
7318 break;
7319 }
7320 }
7321
7322 insn->call = has_mem_op_consumer_p;
7323 }
7324 }
7325
7326 /* We're beginning a new block. Initialize data structures as necessary. */
7327
7328 static void
7329 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7330 int sched_verbose ATTRIBUTE_UNUSED,
7331 int max_ready ATTRIBUTE_UNUSED)
7332 {
7333 #ifdef ENABLE_CHECKING
7334 rtx_insn *insn;
7335
7336 if (!sel_sched_p () && reload_completed)
7337 for (insn = NEXT_INSN (current_sched_info->prev_head);
7338 insn != current_sched_info->next_tail;
7339 insn = NEXT_INSN (insn))
7340 gcc_assert (!SCHED_GROUP_P (insn));
7341 #endif
7342 last_scheduled_insn = NULL;
7343 init_insn_group_barriers ();
7344
7345 current_cycle = 0;
7346 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7347 }
7348
7349 /* We're beginning a scheduling pass. Check assertion. */
7350
7351 static void
7352 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7353 int sched_verbose ATTRIBUTE_UNUSED,
7354 int max_ready ATTRIBUTE_UNUSED)
7355 {
7356 gcc_assert (pending_data_specs == 0);
7357 }
7358
7359 /* Scheduling pass is now finished. Free/reset static variable. */
7360 static void
7361 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7362 int sched_verbose ATTRIBUTE_UNUSED)
7363 {
7364 gcc_assert (pending_data_specs == 0);
7365 }
7366
7367 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7368 speculation check), FALSE otherwise. */
7369 static bool
7370 is_load_p (rtx_insn *insn)
7371 {
7372 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7373
7374 return
7375 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7376 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7377 }
7378
7379 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7380 (taking account for 3-cycle cache reference postponing for stores: Intel
7381 Itanium 2 Reference Manual for Software Development and Optimization,
7382 6.7.3.1). */
7383 static void
7384 record_memory_reference (rtx_insn *insn)
7385 {
7386 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7387
7388 switch (insn_class) {
7389 case ITANIUM_CLASS_FLD:
7390 case ITANIUM_CLASS_LD:
7391 mem_ops_in_group[current_cycle % 4]++;
7392 break;
7393 case ITANIUM_CLASS_STF:
7394 case ITANIUM_CLASS_ST:
7395 mem_ops_in_group[(current_cycle + 3) % 4]++;
7396 break;
7397 default:;
7398 }
7399 }
7400
7401 /* We are about to being issuing insns for this clock cycle.
7402 Override the default sort algorithm to better slot instructions. */
7403
7404 static int
7405 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7406 int *pn_ready, int clock_var,
7407 int reorder_type)
7408 {
7409 int n_asms;
7410 int n_ready = *pn_ready;
7411 rtx_insn **e_ready = ready + n_ready;
7412 rtx_insn **insnp;
7413
7414 if (sched_verbose)
7415 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7416
7417 if (reorder_type == 0)
7418 {
7419 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7420 n_asms = 0;
7421 for (insnp = ready; insnp < e_ready; insnp++)
7422 if (insnp < e_ready)
7423 {
7424 rtx_insn *insn = *insnp;
7425 enum attr_type t = ia64_safe_type (insn);
7426 if (t == TYPE_UNKNOWN)
7427 {
7428 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7429 || asm_noperands (PATTERN (insn)) >= 0)
7430 {
7431 rtx_insn *lowest = ready[n_asms];
7432 ready[n_asms] = insn;
7433 *insnp = lowest;
7434 n_asms++;
7435 }
7436 else
7437 {
7438 rtx_insn *highest = ready[n_ready - 1];
7439 ready[n_ready - 1] = insn;
7440 *insnp = highest;
7441 return 1;
7442 }
7443 }
7444 }
7445
7446 if (n_asms < n_ready)
7447 {
7448 /* Some normal insns to process. Skip the asms. */
7449 ready += n_asms;
7450 n_ready -= n_asms;
7451 }
7452 else if (n_ready > 0)
7453 return 1;
7454 }
7455
7456 if (ia64_final_schedule)
7457 {
7458 int deleted = 0;
7459 int nr_need_stop = 0;
7460
7461 for (insnp = ready; insnp < e_ready; insnp++)
7462 if (safe_group_barrier_needed (*insnp))
7463 nr_need_stop++;
7464
7465 if (reorder_type == 1 && n_ready == nr_need_stop)
7466 return 0;
7467 if (reorder_type == 0)
7468 return 1;
7469 insnp = e_ready;
7470 /* Move down everything that needs a stop bit, preserving
7471 relative order. */
7472 while (insnp-- > ready + deleted)
7473 while (insnp >= ready + deleted)
7474 {
7475 rtx_insn *insn = *insnp;
7476 if (! safe_group_barrier_needed (insn))
7477 break;
7478 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7479 *ready = insn;
7480 deleted++;
7481 }
7482 n_ready -= deleted;
7483 ready += deleted;
7484 }
7485
7486 current_cycle = clock_var;
7487 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7488 {
7489 int moved = 0;
7490
7491 insnp = e_ready;
7492 /* Move down loads/stores, preserving relative order. */
7493 while (insnp-- > ready + moved)
7494 while (insnp >= ready + moved)
7495 {
7496 rtx_insn *insn = *insnp;
7497 if (! is_load_p (insn))
7498 break;
7499 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7500 *ready = insn;
7501 moved++;
7502 }
7503 n_ready -= moved;
7504 ready += moved;
7505 }
7506
7507 return 1;
7508 }
7509
7510 /* We are about to being issuing insns for this clock cycle. Override
7511 the default sort algorithm to better slot instructions. */
7512
7513 static int
7514 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7515 int *pn_ready, int clock_var)
7516 {
7517 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7518 pn_ready, clock_var, 0);
7519 }
7520
7521 /* Like ia64_sched_reorder, but called after issuing each insn.
7522 Override the default sort algorithm to better slot instructions. */
7523
7524 static int
7525 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7526 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7527 int *pn_ready, int clock_var)
7528 {
7529 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7530 clock_var, 1);
7531 }
7532
7533 /* We are about to issue INSN. Return the number of insns left on the
7534 ready queue that can be issued this cycle. */
7535
7536 static int
7537 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7538 int sched_verbose ATTRIBUTE_UNUSED,
7539 rtx_insn *insn,
7540 int can_issue_more ATTRIBUTE_UNUSED)
7541 {
7542 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7543 /* Modulo scheduling does not extend h_i_d when emitting
7544 new instructions. Don't use h_i_d, if we don't have to. */
7545 {
7546 if (DONE_SPEC (insn) & BEGIN_DATA)
7547 pending_data_specs++;
7548 if (CHECK_SPEC (insn) & BEGIN_DATA)
7549 pending_data_specs--;
7550 }
7551
7552 if (DEBUG_INSN_P (insn))
7553 return 1;
7554
7555 last_scheduled_insn = insn;
7556 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7557 if (reload_completed)
7558 {
7559 int needed = group_barrier_needed (insn);
7560
7561 gcc_assert (!needed);
7562 if (CALL_P (insn))
7563 init_insn_group_barriers ();
7564 stops_p [INSN_UID (insn)] = stop_before_p;
7565 stop_before_p = 0;
7566
7567 record_memory_reference (insn);
7568 }
7569 return 1;
7570 }
7571
7572 /* We are choosing insn from the ready queue. Return zero if INSN
7573 can be chosen. */
7574
7575 static int
7576 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7577 {
7578 gcc_assert (insn && INSN_P (insn));
7579
7580 /* Size of ALAT is 32. As far as we perform conservative
7581 data speculation, we keep ALAT half-empty. */
7582 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7583 return ready_index == 0 ? -1 : 1;
7584
7585 if (ready_index == 0)
7586 return 0;
7587
7588 if ((!reload_completed
7589 || !safe_group_barrier_needed (insn))
7590 && (!mflag_sched_mem_insns_hard_limit
7591 || !is_load_p (insn)
7592 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7593 return 0;
7594
7595 return 1;
7596 }
7597
7598 /* The following variable value is pseudo-insn used by the DFA insn
7599 scheduler to change the DFA state when the simulated clock is
7600 increased. */
7601
7602 static rtx_insn *dfa_pre_cycle_insn;
7603
7604 /* Returns 1 when a meaningful insn was scheduled between the last group
7605 barrier and LAST. */
7606 static int
7607 scheduled_good_insn (rtx_insn *last)
7608 {
7609 if (last && recog_memoized (last) >= 0)
7610 return 1;
7611
7612 for ( ;
7613 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7614 && !stops_p[INSN_UID (last)];
7615 last = PREV_INSN (last))
7616 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7617 the ebb we're scheduling. */
7618 if (INSN_P (last) && recog_memoized (last) >= 0)
7619 return 1;
7620
7621 return 0;
7622 }
7623
7624 /* We are about to being issuing INSN. Return nonzero if we cannot
7625 issue it on given cycle CLOCK and return zero if we should not sort
7626 the ready queue on the next clock start. */
7627
7628 static int
7629 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7630 int clock, int *sort_p)
7631 {
7632 gcc_assert (insn && INSN_P (insn));
7633
7634 if (DEBUG_INSN_P (insn))
7635 return 0;
7636
7637 /* When a group barrier is needed for insn, last_scheduled_insn
7638 should be set. */
7639 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7640 || last_scheduled_insn);
7641
7642 if ((reload_completed
7643 && (safe_group_barrier_needed (insn)
7644 || (mflag_sched_stop_bits_after_every_cycle
7645 && last_clock != clock
7646 && last_scheduled_insn
7647 && scheduled_good_insn (last_scheduled_insn))))
7648 || (last_scheduled_insn
7649 && (CALL_P (last_scheduled_insn)
7650 || unknown_for_bundling_p (last_scheduled_insn))))
7651 {
7652 init_insn_group_barriers ();
7653
7654 if (verbose && dump)
7655 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7656 last_clock == clock ? " + cycle advance" : "");
7657
7658 stop_before_p = 1;
7659 current_cycle = clock;
7660 mem_ops_in_group[current_cycle % 4] = 0;
7661
7662 if (last_clock == clock)
7663 {
7664 state_transition (curr_state, dfa_stop_insn);
7665 if (TARGET_EARLY_STOP_BITS)
7666 *sort_p = (last_scheduled_insn == NULL_RTX
7667 || ! CALL_P (last_scheduled_insn));
7668 else
7669 *sort_p = 0;
7670 return 1;
7671 }
7672
7673 if (last_scheduled_insn)
7674 {
7675 if (unknown_for_bundling_p (last_scheduled_insn))
7676 state_reset (curr_state);
7677 else
7678 {
7679 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7680 state_transition (curr_state, dfa_stop_insn);
7681 state_transition (curr_state, dfa_pre_cycle_insn);
7682 state_transition (curr_state, NULL);
7683 }
7684 }
7685 }
7686 return 0;
7687 }
7688
7689 /* Implement targetm.sched.h_i_d_extended hook.
7690 Extend internal data structures. */
7691 static void
7692 ia64_h_i_d_extended (void)
7693 {
7694 if (stops_p != NULL)
7695 {
7696 int new_clocks_length = get_max_uid () * 3 / 2;
7697 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7698 clocks_length = new_clocks_length;
7699 }
7700 }
7701 \f
7702
7703 /* This structure describes the data used by the backend to guide scheduling.
7704 When the current scheduling point is switched, this data should be saved
7705 and restored later, if the scheduler returns to this point. */
7706 struct _ia64_sched_context
7707 {
7708 state_t prev_cycle_state;
7709 rtx_insn *last_scheduled_insn;
7710 struct reg_write_state rws_sum[NUM_REGS];
7711 struct reg_write_state rws_insn[NUM_REGS];
7712 int first_instruction;
7713 int pending_data_specs;
7714 int current_cycle;
7715 char mem_ops_in_group[4];
7716 };
7717 typedef struct _ia64_sched_context *ia64_sched_context_t;
7718
7719 /* Allocates a scheduling context. */
7720 static void *
7721 ia64_alloc_sched_context (void)
7722 {
7723 return xmalloc (sizeof (struct _ia64_sched_context));
7724 }
7725
7726 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7727 the global context otherwise. */
7728 static void
7729 ia64_init_sched_context (void *_sc, bool clean_p)
7730 {
7731 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7732
7733 sc->prev_cycle_state = xmalloc (dfa_state_size);
7734 if (clean_p)
7735 {
7736 state_reset (sc->prev_cycle_state);
7737 sc->last_scheduled_insn = NULL;
7738 memset (sc->rws_sum, 0, sizeof (rws_sum));
7739 memset (sc->rws_insn, 0, sizeof (rws_insn));
7740 sc->first_instruction = 1;
7741 sc->pending_data_specs = 0;
7742 sc->current_cycle = 0;
7743 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7744 }
7745 else
7746 {
7747 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7748 sc->last_scheduled_insn = last_scheduled_insn;
7749 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7750 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7751 sc->first_instruction = first_instruction;
7752 sc->pending_data_specs = pending_data_specs;
7753 sc->current_cycle = current_cycle;
7754 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7755 }
7756 }
7757
7758 /* Sets the global scheduling context to the one pointed to by _SC. */
7759 static void
7760 ia64_set_sched_context (void *_sc)
7761 {
7762 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7763
7764 gcc_assert (sc != NULL);
7765
7766 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7767 last_scheduled_insn = sc->last_scheduled_insn;
7768 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7769 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7770 first_instruction = sc->first_instruction;
7771 pending_data_specs = sc->pending_data_specs;
7772 current_cycle = sc->current_cycle;
7773 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7774 }
7775
7776 /* Clears the data in the _SC scheduling context. */
7777 static void
7778 ia64_clear_sched_context (void *_sc)
7779 {
7780 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7781
7782 free (sc->prev_cycle_state);
7783 sc->prev_cycle_state = NULL;
7784 }
7785
7786 /* Frees the _SC scheduling context. */
7787 static void
7788 ia64_free_sched_context (void *_sc)
7789 {
7790 gcc_assert (_sc != NULL);
7791
7792 free (_sc);
7793 }
7794
7795 typedef rtx (* gen_func_t) (rtx, rtx);
7796
7797 /* Return a function that will generate a load of mode MODE_NO
7798 with speculation types TS. */
7799 static gen_func_t
7800 get_spec_load_gen_function (ds_t ts, int mode_no)
7801 {
7802 static gen_func_t gen_ld_[] = {
7803 gen_movbi,
7804 gen_movqi_internal,
7805 gen_movhi_internal,
7806 gen_movsi_internal,
7807 gen_movdi_internal,
7808 gen_movsf_internal,
7809 gen_movdf_internal,
7810 gen_movxf_internal,
7811 gen_movti_internal,
7812 gen_zero_extendqidi2,
7813 gen_zero_extendhidi2,
7814 gen_zero_extendsidi2,
7815 };
7816
7817 static gen_func_t gen_ld_a[] = {
7818 gen_movbi_advanced,
7819 gen_movqi_advanced,
7820 gen_movhi_advanced,
7821 gen_movsi_advanced,
7822 gen_movdi_advanced,
7823 gen_movsf_advanced,
7824 gen_movdf_advanced,
7825 gen_movxf_advanced,
7826 gen_movti_advanced,
7827 gen_zero_extendqidi2_advanced,
7828 gen_zero_extendhidi2_advanced,
7829 gen_zero_extendsidi2_advanced,
7830 };
7831 static gen_func_t gen_ld_s[] = {
7832 gen_movbi_speculative,
7833 gen_movqi_speculative,
7834 gen_movhi_speculative,
7835 gen_movsi_speculative,
7836 gen_movdi_speculative,
7837 gen_movsf_speculative,
7838 gen_movdf_speculative,
7839 gen_movxf_speculative,
7840 gen_movti_speculative,
7841 gen_zero_extendqidi2_speculative,
7842 gen_zero_extendhidi2_speculative,
7843 gen_zero_extendsidi2_speculative,
7844 };
7845 static gen_func_t gen_ld_sa[] = {
7846 gen_movbi_speculative_advanced,
7847 gen_movqi_speculative_advanced,
7848 gen_movhi_speculative_advanced,
7849 gen_movsi_speculative_advanced,
7850 gen_movdi_speculative_advanced,
7851 gen_movsf_speculative_advanced,
7852 gen_movdf_speculative_advanced,
7853 gen_movxf_speculative_advanced,
7854 gen_movti_speculative_advanced,
7855 gen_zero_extendqidi2_speculative_advanced,
7856 gen_zero_extendhidi2_speculative_advanced,
7857 gen_zero_extendsidi2_speculative_advanced,
7858 };
7859 static gen_func_t gen_ld_s_a[] = {
7860 gen_movbi_speculative_a,
7861 gen_movqi_speculative_a,
7862 gen_movhi_speculative_a,
7863 gen_movsi_speculative_a,
7864 gen_movdi_speculative_a,
7865 gen_movsf_speculative_a,
7866 gen_movdf_speculative_a,
7867 gen_movxf_speculative_a,
7868 gen_movti_speculative_a,
7869 gen_zero_extendqidi2_speculative_a,
7870 gen_zero_extendhidi2_speculative_a,
7871 gen_zero_extendsidi2_speculative_a,
7872 };
7873
7874 gen_func_t *gen_ld;
7875
7876 if (ts & BEGIN_DATA)
7877 {
7878 if (ts & BEGIN_CONTROL)
7879 gen_ld = gen_ld_sa;
7880 else
7881 gen_ld = gen_ld_a;
7882 }
7883 else if (ts & BEGIN_CONTROL)
7884 {
7885 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7886 || ia64_needs_block_p (ts))
7887 gen_ld = gen_ld_s;
7888 else
7889 gen_ld = gen_ld_s_a;
7890 }
7891 else if (ts == 0)
7892 gen_ld = gen_ld_;
7893 else
7894 gcc_unreachable ();
7895
7896 return gen_ld[mode_no];
7897 }
7898
7899 /* Constants that help mapping 'machine_mode' to int. */
7900 enum SPEC_MODES
7901 {
7902 SPEC_MODE_INVALID = -1,
7903 SPEC_MODE_FIRST = 0,
7904 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7905 SPEC_MODE_FOR_EXTEND_LAST = 3,
7906 SPEC_MODE_LAST = 8
7907 };
7908
7909 enum
7910 {
7911 /* Offset to reach ZERO_EXTEND patterns. */
7912 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7913 };
7914
7915 /* Return index of the MODE. */
7916 static int
7917 ia64_mode_to_int (machine_mode mode)
7918 {
7919 switch (mode)
7920 {
7921 case BImode: return 0; /* SPEC_MODE_FIRST */
7922 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7923 case HImode: return 2;
7924 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7925 case DImode: return 4;
7926 case SFmode: return 5;
7927 case DFmode: return 6;
7928 case XFmode: return 7;
7929 case TImode:
7930 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7931 mentioned in itanium[12].md. Predicate fp_register_operand also
7932 needs to be defined. Bottom line: better disable for now. */
7933 return SPEC_MODE_INVALID;
7934 default: return SPEC_MODE_INVALID;
7935 }
7936 }
7937
7938 /* Provide information about speculation capabilities. */
7939 static void
7940 ia64_set_sched_flags (spec_info_t spec_info)
7941 {
7942 unsigned int *flags = &(current_sched_info->flags);
7943
7944 if (*flags & SCHED_RGN
7945 || *flags & SCHED_EBB
7946 || *flags & SEL_SCHED)
7947 {
7948 int mask = 0;
7949
7950 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7951 || (mflag_sched_ar_data_spec && reload_completed))
7952 {
7953 mask |= BEGIN_DATA;
7954
7955 if (!sel_sched_p ()
7956 && ((mflag_sched_br_in_data_spec && !reload_completed)
7957 || (mflag_sched_ar_in_data_spec && reload_completed)))
7958 mask |= BE_IN_DATA;
7959 }
7960
7961 if (mflag_sched_control_spec
7962 && (!sel_sched_p ()
7963 || reload_completed))
7964 {
7965 mask |= BEGIN_CONTROL;
7966
7967 if (!sel_sched_p () && mflag_sched_in_control_spec)
7968 mask |= BE_IN_CONTROL;
7969 }
7970
7971 spec_info->mask = mask;
7972
7973 if (mask)
7974 {
7975 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7976
7977 if (mask & BE_IN_SPEC)
7978 *flags |= NEW_BBS;
7979
7980 spec_info->flags = 0;
7981
7982 if ((mask & CONTROL_SPEC)
7983 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7984 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7985
7986 if (sched_verbose >= 1)
7987 spec_info->dump = sched_dump;
7988 else
7989 spec_info->dump = 0;
7990
7991 if (mflag_sched_count_spec_in_critical_path)
7992 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7993 }
7994 }
7995 else
7996 spec_info->mask = 0;
7997 }
7998
7999 /* If INSN is an appropriate load return its mode.
8000 Return -1 otherwise. */
8001 static int
8002 get_mode_no_for_insn (rtx_insn *insn)
8003 {
8004 rtx reg, mem, mode_rtx;
8005 int mode_no;
8006 bool extend_p;
8007
8008 extract_insn_cached (insn);
8009
8010 /* We use WHICH_ALTERNATIVE only after reload. This will
8011 guarantee that reload won't touch a speculative insn. */
8012
8013 if (recog_data.n_operands != 2)
8014 return -1;
8015
8016 reg = recog_data.operand[0];
8017 mem = recog_data.operand[1];
8018
8019 /* We should use MEM's mode since REG's mode in presence of
8020 ZERO_EXTEND will always be DImode. */
8021 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8022 /* Process non-speculative ld. */
8023 {
8024 if (!reload_completed)
8025 {
8026 /* Do not speculate into regs like ar.lc. */
8027 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8028 return -1;
8029
8030 if (!MEM_P (mem))
8031 return -1;
8032
8033 {
8034 rtx mem_reg = XEXP (mem, 0);
8035
8036 if (!REG_P (mem_reg))
8037 return -1;
8038 }
8039
8040 mode_rtx = mem;
8041 }
8042 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8043 {
8044 gcc_assert (REG_P (reg) && MEM_P (mem));
8045 mode_rtx = mem;
8046 }
8047 else
8048 return -1;
8049 }
8050 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8051 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8052 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8053 /* Process speculative ld or ld.c. */
8054 {
8055 gcc_assert (REG_P (reg) && MEM_P (mem));
8056 mode_rtx = mem;
8057 }
8058 else
8059 {
8060 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8061
8062 if (attr_class == ITANIUM_CLASS_CHK_A
8063 || attr_class == ITANIUM_CLASS_CHK_S_I
8064 || attr_class == ITANIUM_CLASS_CHK_S_F)
8065 /* Process chk. */
8066 mode_rtx = reg;
8067 else
8068 return -1;
8069 }
8070
8071 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8072
8073 if (mode_no == SPEC_MODE_INVALID)
8074 return -1;
8075
8076 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8077
8078 if (extend_p)
8079 {
8080 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8081 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8082 return -1;
8083
8084 mode_no += SPEC_GEN_EXTEND_OFFSET;
8085 }
8086
8087 return mode_no;
8088 }
8089
8090 /* If X is an unspec part of a speculative load, return its code.
8091 Return -1 otherwise. */
8092 static int
8093 get_spec_unspec_code (const_rtx x)
8094 {
8095 if (GET_CODE (x) != UNSPEC)
8096 return -1;
8097
8098 {
8099 int code;
8100
8101 code = XINT (x, 1);
8102
8103 switch (code)
8104 {
8105 case UNSPEC_LDA:
8106 case UNSPEC_LDS:
8107 case UNSPEC_LDS_A:
8108 case UNSPEC_LDSA:
8109 return code;
8110
8111 default:
8112 return -1;
8113 }
8114 }
8115 }
8116
8117 /* Implement skip_rtx_p hook. */
8118 static bool
8119 ia64_skip_rtx_p (const_rtx x)
8120 {
8121 return get_spec_unspec_code (x) != -1;
8122 }
8123
8124 /* If INSN is a speculative load, return its UNSPEC code.
8125 Return -1 otherwise. */
8126 static int
8127 get_insn_spec_code (const_rtx insn)
8128 {
8129 rtx pat, reg, mem;
8130
8131 pat = PATTERN (insn);
8132
8133 if (GET_CODE (pat) == COND_EXEC)
8134 pat = COND_EXEC_CODE (pat);
8135
8136 if (GET_CODE (pat) != SET)
8137 return -1;
8138
8139 reg = SET_DEST (pat);
8140 if (!REG_P (reg))
8141 return -1;
8142
8143 mem = SET_SRC (pat);
8144 if (GET_CODE (mem) == ZERO_EXTEND)
8145 mem = XEXP (mem, 0);
8146
8147 return get_spec_unspec_code (mem);
8148 }
8149
8150 /* If INSN is a speculative load, return a ds with the speculation types.
8151 Otherwise [if INSN is a normal instruction] return 0. */
8152 static ds_t
8153 ia64_get_insn_spec_ds (rtx_insn *insn)
8154 {
8155 int code = get_insn_spec_code (insn);
8156
8157 switch (code)
8158 {
8159 case UNSPEC_LDA:
8160 return BEGIN_DATA;
8161
8162 case UNSPEC_LDS:
8163 case UNSPEC_LDS_A:
8164 return BEGIN_CONTROL;
8165
8166 case UNSPEC_LDSA:
8167 return BEGIN_DATA | BEGIN_CONTROL;
8168
8169 default:
8170 return 0;
8171 }
8172 }
8173
8174 /* If INSN is a speculative load return a ds with the speculation types that
8175 will be checked.
8176 Otherwise [if INSN is a normal instruction] return 0. */
8177 static ds_t
8178 ia64_get_insn_checked_ds (rtx_insn *insn)
8179 {
8180 int code = get_insn_spec_code (insn);
8181
8182 switch (code)
8183 {
8184 case UNSPEC_LDA:
8185 return BEGIN_DATA | BEGIN_CONTROL;
8186
8187 case UNSPEC_LDS:
8188 return BEGIN_CONTROL;
8189
8190 case UNSPEC_LDS_A:
8191 case UNSPEC_LDSA:
8192 return BEGIN_DATA | BEGIN_CONTROL;
8193
8194 default:
8195 return 0;
8196 }
8197 }
8198
8199 /* If GEN_P is true, calculate the index of needed speculation check and return
8200 speculative pattern for INSN with speculative mode TS, machine mode
8201 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8202 If GEN_P is false, just calculate the index of needed speculation check. */
8203 static rtx
8204 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8205 {
8206 rtx pat, new_pat;
8207 gen_func_t gen_load;
8208
8209 gen_load = get_spec_load_gen_function (ts, mode_no);
8210
8211 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8212 copy_rtx (recog_data.operand[1]));
8213
8214 pat = PATTERN (insn);
8215 if (GET_CODE (pat) == COND_EXEC)
8216 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8217 new_pat);
8218
8219 return new_pat;
8220 }
8221
8222 static bool
8223 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8224 ds_t ds ATTRIBUTE_UNUSED)
8225 {
8226 return false;
8227 }
8228
8229 /* Implement targetm.sched.speculate_insn hook.
8230 Check if the INSN can be TS speculative.
8231 If 'no' - return -1.
8232 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8233 If current pattern of the INSN already provides TS speculation,
8234 return 0. */
8235 static int
8236 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8237 {
8238 int mode_no;
8239 int res;
8240
8241 gcc_assert (!(ts & ~SPECULATIVE));
8242
8243 if (ia64_spec_check_p (insn))
8244 return -1;
8245
8246 if ((ts & BE_IN_SPEC)
8247 && !insn_can_be_in_speculative_p (insn, ts))
8248 return -1;
8249
8250 mode_no = get_mode_no_for_insn (insn);
8251
8252 if (mode_no != SPEC_MODE_INVALID)
8253 {
8254 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8255 res = 0;
8256 else
8257 {
8258 res = 1;
8259 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8260 }
8261 }
8262 else
8263 res = -1;
8264
8265 return res;
8266 }
8267
8268 /* Return a function that will generate a check for speculation TS with mode
8269 MODE_NO.
8270 If simple check is needed, pass true for SIMPLE_CHECK_P.
8271 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8272 static gen_func_t
8273 get_spec_check_gen_function (ds_t ts, int mode_no,
8274 bool simple_check_p, bool clearing_check_p)
8275 {
8276 static gen_func_t gen_ld_c_clr[] = {
8277 gen_movbi_clr,
8278 gen_movqi_clr,
8279 gen_movhi_clr,
8280 gen_movsi_clr,
8281 gen_movdi_clr,
8282 gen_movsf_clr,
8283 gen_movdf_clr,
8284 gen_movxf_clr,
8285 gen_movti_clr,
8286 gen_zero_extendqidi2_clr,
8287 gen_zero_extendhidi2_clr,
8288 gen_zero_extendsidi2_clr,
8289 };
8290 static gen_func_t gen_ld_c_nc[] = {
8291 gen_movbi_nc,
8292 gen_movqi_nc,
8293 gen_movhi_nc,
8294 gen_movsi_nc,
8295 gen_movdi_nc,
8296 gen_movsf_nc,
8297 gen_movdf_nc,
8298 gen_movxf_nc,
8299 gen_movti_nc,
8300 gen_zero_extendqidi2_nc,
8301 gen_zero_extendhidi2_nc,
8302 gen_zero_extendsidi2_nc,
8303 };
8304 static gen_func_t gen_chk_a_clr[] = {
8305 gen_advanced_load_check_clr_bi,
8306 gen_advanced_load_check_clr_qi,
8307 gen_advanced_load_check_clr_hi,
8308 gen_advanced_load_check_clr_si,
8309 gen_advanced_load_check_clr_di,
8310 gen_advanced_load_check_clr_sf,
8311 gen_advanced_load_check_clr_df,
8312 gen_advanced_load_check_clr_xf,
8313 gen_advanced_load_check_clr_ti,
8314 gen_advanced_load_check_clr_di,
8315 gen_advanced_load_check_clr_di,
8316 gen_advanced_load_check_clr_di,
8317 };
8318 static gen_func_t gen_chk_a_nc[] = {
8319 gen_advanced_load_check_nc_bi,
8320 gen_advanced_load_check_nc_qi,
8321 gen_advanced_load_check_nc_hi,
8322 gen_advanced_load_check_nc_si,
8323 gen_advanced_load_check_nc_di,
8324 gen_advanced_load_check_nc_sf,
8325 gen_advanced_load_check_nc_df,
8326 gen_advanced_load_check_nc_xf,
8327 gen_advanced_load_check_nc_ti,
8328 gen_advanced_load_check_nc_di,
8329 gen_advanced_load_check_nc_di,
8330 gen_advanced_load_check_nc_di,
8331 };
8332 static gen_func_t gen_chk_s[] = {
8333 gen_speculation_check_bi,
8334 gen_speculation_check_qi,
8335 gen_speculation_check_hi,
8336 gen_speculation_check_si,
8337 gen_speculation_check_di,
8338 gen_speculation_check_sf,
8339 gen_speculation_check_df,
8340 gen_speculation_check_xf,
8341 gen_speculation_check_ti,
8342 gen_speculation_check_di,
8343 gen_speculation_check_di,
8344 gen_speculation_check_di,
8345 };
8346
8347 gen_func_t *gen_check;
8348
8349 if (ts & BEGIN_DATA)
8350 {
8351 /* We don't need recovery because even if this is ld.sa
8352 ALAT entry will be allocated only if NAT bit is set to zero.
8353 So it is enough to use ld.c here. */
8354
8355 if (simple_check_p)
8356 {
8357 gcc_assert (mflag_sched_spec_ldc);
8358
8359 if (clearing_check_p)
8360 gen_check = gen_ld_c_clr;
8361 else
8362 gen_check = gen_ld_c_nc;
8363 }
8364 else
8365 {
8366 if (clearing_check_p)
8367 gen_check = gen_chk_a_clr;
8368 else
8369 gen_check = gen_chk_a_nc;
8370 }
8371 }
8372 else if (ts & BEGIN_CONTROL)
8373 {
8374 if (simple_check_p)
8375 /* We might want to use ld.sa -> ld.c instead of
8376 ld.s -> chk.s. */
8377 {
8378 gcc_assert (!ia64_needs_block_p (ts));
8379
8380 if (clearing_check_p)
8381 gen_check = gen_ld_c_clr;
8382 else
8383 gen_check = gen_ld_c_nc;
8384 }
8385 else
8386 {
8387 gen_check = gen_chk_s;
8388 }
8389 }
8390 else
8391 gcc_unreachable ();
8392
8393 gcc_assert (mode_no >= 0);
8394 return gen_check[mode_no];
8395 }
8396
8397 /* Return nonzero, if INSN needs branchy recovery check. */
8398 static bool
8399 ia64_needs_block_p (ds_t ts)
8400 {
8401 if (ts & BEGIN_DATA)
8402 return !mflag_sched_spec_ldc;
8403
8404 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8405
8406 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8407 }
8408
8409 /* Generate (or regenerate) a recovery check for INSN. */
8410 static rtx
8411 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8412 {
8413 rtx op1, pat, check_pat;
8414 gen_func_t gen_check;
8415 int mode_no;
8416
8417 mode_no = get_mode_no_for_insn (insn);
8418 gcc_assert (mode_no >= 0);
8419
8420 if (label)
8421 op1 = label;
8422 else
8423 {
8424 gcc_assert (!ia64_needs_block_p (ds));
8425 op1 = copy_rtx (recog_data.operand[1]);
8426 }
8427
8428 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8429 true);
8430
8431 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8432
8433 pat = PATTERN (insn);
8434 if (GET_CODE (pat) == COND_EXEC)
8435 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8436 check_pat);
8437
8438 return check_pat;
8439 }
8440
8441 /* Return nonzero, if X is branchy recovery check. */
8442 static int
8443 ia64_spec_check_p (rtx x)
8444 {
8445 x = PATTERN (x);
8446 if (GET_CODE (x) == COND_EXEC)
8447 x = COND_EXEC_CODE (x);
8448 if (GET_CODE (x) == SET)
8449 return ia64_spec_check_src_p (SET_SRC (x));
8450 return 0;
8451 }
8452
8453 /* Return nonzero, if SRC belongs to recovery check. */
8454 static int
8455 ia64_spec_check_src_p (rtx src)
8456 {
8457 if (GET_CODE (src) == IF_THEN_ELSE)
8458 {
8459 rtx t;
8460
8461 t = XEXP (src, 0);
8462 if (GET_CODE (t) == NE)
8463 {
8464 t = XEXP (t, 0);
8465
8466 if (GET_CODE (t) == UNSPEC)
8467 {
8468 int code;
8469
8470 code = XINT (t, 1);
8471
8472 if (code == UNSPEC_LDCCLR
8473 || code == UNSPEC_LDCNC
8474 || code == UNSPEC_CHKACLR
8475 || code == UNSPEC_CHKANC
8476 || code == UNSPEC_CHKS)
8477 {
8478 gcc_assert (code != 0);
8479 return code;
8480 }
8481 }
8482 }
8483 }
8484 return 0;
8485 }
8486 \f
8487
8488 /* The following page contains abstract data `bundle states' which are
8489 used for bundling insns (inserting nops and template generation). */
8490
8491 /* The following describes state of insn bundling. */
8492
8493 struct bundle_state
8494 {
8495 /* Unique bundle state number to identify them in the debugging
8496 output */
8497 int unique_num;
8498 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
8499 /* number nops before and after the insn */
8500 short before_nops_num, after_nops_num;
8501 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8502 insn */
8503 int cost; /* cost of the state in cycles */
8504 int accumulated_insns_num; /* number of all previous insns including
8505 nops. L is considered as 2 insns */
8506 int branch_deviation; /* deviation of previous branches from 3rd slots */
8507 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8508 struct bundle_state *next; /* next state with the same insn_num */
8509 struct bundle_state *originator; /* originator (previous insn state) */
8510 /* All bundle states are in the following chain. */
8511 struct bundle_state *allocated_states_chain;
8512 /* The DFA State after issuing the insn and the nops. */
8513 state_t dfa_state;
8514 };
8515
8516 /* The following is map insn number to the corresponding bundle state. */
8517
8518 static struct bundle_state **index_to_bundle_states;
8519
8520 /* The unique number of next bundle state. */
8521
8522 static int bundle_states_num;
8523
8524 /* All allocated bundle states are in the following chain. */
8525
8526 static struct bundle_state *allocated_bundle_states_chain;
8527
8528 /* All allocated but not used bundle states are in the following
8529 chain. */
8530
8531 static struct bundle_state *free_bundle_state_chain;
8532
8533
8534 /* The following function returns a free bundle state. */
8535
8536 static struct bundle_state *
8537 get_free_bundle_state (void)
8538 {
8539 struct bundle_state *result;
8540
8541 if (free_bundle_state_chain != NULL)
8542 {
8543 result = free_bundle_state_chain;
8544 free_bundle_state_chain = result->next;
8545 }
8546 else
8547 {
8548 result = XNEW (struct bundle_state);
8549 result->dfa_state = xmalloc (dfa_state_size);
8550 result->allocated_states_chain = allocated_bundle_states_chain;
8551 allocated_bundle_states_chain = result;
8552 }
8553 result->unique_num = bundle_states_num++;
8554 return result;
8555
8556 }
8557
8558 /* The following function frees given bundle state. */
8559
8560 static void
8561 free_bundle_state (struct bundle_state *state)
8562 {
8563 state->next = free_bundle_state_chain;
8564 free_bundle_state_chain = state;
8565 }
8566
8567 /* Start work with abstract data `bundle states'. */
8568
8569 static void
8570 initiate_bundle_states (void)
8571 {
8572 bundle_states_num = 0;
8573 free_bundle_state_chain = NULL;
8574 allocated_bundle_states_chain = NULL;
8575 }
8576
8577 /* Finish work with abstract data `bundle states'. */
8578
8579 static void
8580 finish_bundle_states (void)
8581 {
8582 struct bundle_state *curr_state, *next_state;
8583
8584 for (curr_state = allocated_bundle_states_chain;
8585 curr_state != NULL;
8586 curr_state = next_state)
8587 {
8588 next_state = curr_state->allocated_states_chain;
8589 free (curr_state->dfa_state);
8590 free (curr_state);
8591 }
8592 }
8593
8594 /* Hashtable helpers. */
8595
8596 struct bundle_state_hasher : typed_noop_remove <bundle_state>
8597 {
8598 typedef bundle_state *value_type;
8599 typedef bundle_state *compare_type;
8600 static inline hashval_t hash (const bundle_state *);
8601 static inline bool equal (const bundle_state *, const bundle_state *);
8602 };
8603
8604 /* The function returns hash of BUNDLE_STATE. */
8605
8606 inline hashval_t
8607 bundle_state_hasher::hash (const bundle_state *state)
8608 {
8609 unsigned result, i;
8610
8611 for (result = i = 0; i < dfa_state_size; i++)
8612 result += (((unsigned char *) state->dfa_state) [i]
8613 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8614 return result + state->insn_num;
8615 }
8616
8617 /* The function returns nonzero if the bundle state keys are equal. */
8618
8619 inline bool
8620 bundle_state_hasher::equal (const bundle_state *state1,
8621 const bundle_state *state2)
8622 {
8623 return (state1->insn_num == state2->insn_num
8624 && memcmp (state1->dfa_state, state2->dfa_state,
8625 dfa_state_size) == 0);
8626 }
8627
8628 /* Hash table of the bundle states. The key is dfa_state and insn_num
8629 of the bundle states. */
8630
8631 static hash_table<bundle_state_hasher> *bundle_state_table;
8632
8633 /* The function inserts the BUNDLE_STATE into the hash table. The
8634 function returns nonzero if the bundle has been inserted into the
8635 table. The table contains the best bundle state with given key. */
8636
8637 static int
8638 insert_bundle_state (struct bundle_state *bundle_state)
8639 {
8640 struct bundle_state **entry_ptr;
8641
8642 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8643 if (*entry_ptr == NULL)
8644 {
8645 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8646 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8647 *entry_ptr = bundle_state;
8648 return TRUE;
8649 }
8650 else if (bundle_state->cost < (*entry_ptr)->cost
8651 || (bundle_state->cost == (*entry_ptr)->cost
8652 && ((*entry_ptr)->accumulated_insns_num
8653 > bundle_state->accumulated_insns_num
8654 || ((*entry_ptr)->accumulated_insns_num
8655 == bundle_state->accumulated_insns_num
8656 && ((*entry_ptr)->branch_deviation
8657 > bundle_state->branch_deviation
8658 || ((*entry_ptr)->branch_deviation
8659 == bundle_state->branch_deviation
8660 && (*entry_ptr)->middle_bundle_stops
8661 > bundle_state->middle_bundle_stops))))))
8662
8663 {
8664 struct bundle_state temp;
8665
8666 temp = **entry_ptr;
8667 **entry_ptr = *bundle_state;
8668 (*entry_ptr)->next = temp.next;
8669 *bundle_state = temp;
8670 }
8671 return FALSE;
8672 }
8673
8674 /* Start work with the hash table. */
8675
8676 static void
8677 initiate_bundle_state_table (void)
8678 {
8679 bundle_state_table = new hash_table<bundle_state_hasher> (50);
8680 }
8681
8682 /* Finish work with the hash table. */
8683
8684 static void
8685 finish_bundle_state_table (void)
8686 {
8687 delete bundle_state_table;
8688 bundle_state_table = NULL;
8689 }
8690
8691 \f
8692
8693 /* The following variable is a insn `nop' used to check bundle states
8694 with different number of inserted nops. */
8695
8696 static rtx_insn *ia64_nop;
8697
8698 /* The following function tries to issue NOPS_NUM nops for the current
8699 state without advancing processor cycle. If it failed, the
8700 function returns FALSE and frees the current state. */
8701
8702 static int
8703 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8704 {
8705 int i;
8706
8707 for (i = 0; i < nops_num; i++)
8708 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8709 {
8710 free_bundle_state (curr_state);
8711 return FALSE;
8712 }
8713 return TRUE;
8714 }
8715
8716 /* The following function tries to issue INSN for the current
8717 state without advancing processor cycle. If it failed, the
8718 function returns FALSE and frees the current state. */
8719
8720 static int
8721 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8722 {
8723 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8724 {
8725 free_bundle_state (curr_state);
8726 return FALSE;
8727 }
8728 return TRUE;
8729 }
8730
8731 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8732 starting with ORIGINATOR without advancing processor cycle. If
8733 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8734 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8735 If it was successful, the function creates new bundle state and
8736 insert into the hash table and into `index_to_bundle_states'. */
8737
8738 static void
8739 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8740 rtx_insn *insn, int try_bundle_end_p,
8741 int only_bundle_end_p)
8742 {
8743 struct bundle_state *curr_state;
8744
8745 curr_state = get_free_bundle_state ();
8746 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8747 curr_state->insn = insn;
8748 curr_state->insn_num = originator->insn_num + 1;
8749 curr_state->cost = originator->cost;
8750 curr_state->originator = originator;
8751 curr_state->before_nops_num = before_nops_num;
8752 curr_state->after_nops_num = 0;
8753 curr_state->accumulated_insns_num
8754 = originator->accumulated_insns_num + before_nops_num;
8755 curr_state->branch_deviation = originator->branch_deviation;
8756 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8757 gcc_assert (insn);
8758 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8759 {
8760 gcc_assert (GET_MODE (insn) != TImode);
8761 if (!try_issue_nops (curr_state, before_nops_num))
8762 return;
8763 if (!try_issue_insn (curr_state, insn))
8764 return;
8765 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8766 if (curr_state->accumulated_insns_num % 3 != 0)
8767 curr_state->middle_bundle_stops++;
8768 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8769 && curr_state->accumulated_insns_num % 3 != 0)
8770 {
8771 free_bundle_state (curr_state);
8772 return;
8773 }
8774 }
8775 else if (GET_MODE (insn) != TImode)
8776 {
8777 if (!try_issue_nops (curr_state, before_nops_num))
8778 return;
8779 if (!try_issue_insn (curr_state, insn))
8780 return;
8781 curr_state->accumulated_insns_num++;
8782 gcc_assert (!unknown_for_bundling_p (insn));
8783
8784 if (ia64_safe_type (insn) == TYPE_L)
8785 curr_state->accumulated_insns_num++;
8786 }
8787 else
8788 {
8789 /* If this is an insn that must be first in a group, then don't allow
8790 nops to be emitted before it. Currently, alloc is the only such
8791 supported instruction. */
8792 /* ??? The bundling automatons should handle this for us, but they do
8793 not yet have support for the first_insn attribute. */
8794 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8795 {
8796 free_bundle_state (curr_state);
8797 return;
8798 }
8799
8800 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8801 state_transition (curr_state->dfa_state, NULL);
8802 curr_state->cost++;
8803 if (!try_issue_nops (curr_state, before_nops_num))
8804 return;
8805 if (!try_issue_insn (curr_state, insn))
8806 return;
8807 curr_state->accumulated_insns_num++;
8808 if (unknown_for_bundling_p (insn))
8809 {
8810 /* Finish bundle containing asm insn. */
8811 curr_state->after_nops_num
8812 = 3 - curr_state->accumulated_insns_num % 3;
8813 curr_state->accumulated_insns_num
8814 += 3 - curr_state->accumulated_insns_num % 3;
8815 }
8816 else if (ia64_safe_type (insn) == TYPE_L)
8817 curr_state->accumulated_insns_num++;
8818 }
8819 if (ia64_safe_type (insn) == TYPE_B)
8820 curr_state->branch_deviation
8821 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8822 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8823 {
8824 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8825 {
8826 state_t dfa_state;
8827 struct bundle_state *curr_state1;
8828 struct bundle_state *allocated_states_chain;
8829
8830 curr_state1 = get_free_bundle_state ();
8831 dfa_state = curr_state1->dfa_state;
8832 allocated_states_chain = curr_state1->allocated_states_chain;
8833 *curr_state1 = *curr_state;
8834 curr_state1->dfa_state = dfa_state;
8835 curr_state1->allocated_states_chain = allocated_states_chain;
8836 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8837 dfa_state_size);
8838 curr_state = curr_state1;
8839 }
8840 if (!try_issue_nops (curr_state,
8841 3 - curr_state->accumulated_insns_num % 3))
8842 return;
8843 curr_state->after_nops_num
8844 = 3 - curr_state->accumulated_insns_num % 3;
8845 curr_state->accumulated_insns_num
8846 += 3 - curr_state->accumulated_insns_num % 3;
8847 }
8848 if (!insert_bundle_state (curr_state))
8849 free_bundle_state (curr_state);
8850 return;
8851 }
8852
8853 /* The following function returns position in the two window bundle
8854 for given STATE. */
8855
8856 static int
8857 get_max_pos (state_t state)
8858 {
8859 if (cpu_unit_reservation_p (state, pos_6))
8860 return 6;
8861 else if (cpu_unit_reservation_p (state, pos_5))
8862 return 5;
8863 else if (cpu_unit_reservation_p (state, pos_4))
8864 return 4;
8865 else if (cpu_unit_reservation_p (state, pos_3))
8866 return 3;
8867 else if (cpu_unit_reservation_p (state, pos_2))
8868 return 2;
8869 else if (cpu_unit_reservation_p (state, pos_1))
8870 return 1;
8871 else
8872 return 0;
8873 }
8874
8875 /* The function returns code of a possible template for given position
8876 and state. The function should be called only with 2 values of
8877 position equal to 3 or 6. We avoid generating F NOPs by putting
8878 templates containing F insns at the end of the template search
8879 because undocumented anomaly in McKinley derived cores which can
8880 cause stalls if an F-unit insn (including a NOP) is issued within a
8881 six-cycle window after reading certain application registers (such
8882 as ar.bsp). Furthermore, power-considerations also argue against
8883 the use of F-unit instructions unless they're really needed. */
8884
8885 static int
8886 get_template (state_t state, int pos)
8887 {
8888 switch (pos)
8889 {
8890 case 3:
8891 if (cpu_unit_reservation_p (state, _0mmi_))
8892 return 1;
8893 else if (cpu_unit_reservation_p (state, _0mii_))
8894 return 0;
8895 else if (cpu_unit_reservation_p (state, _0mmb_))
8896 return 7;
8897 else if (cpu_unit_reservation_p (state, _0mib_))
8898 return 6;
8899 else if (cpu_unit_reservation_p (state, _0mbb_))
8900 return 5;
8901 else if (cpu_unit_reservation_p (state, _0bbb_))
8902 return 4;
8903 else if (cpu_unit_reservation_p (state, _0mmf_))
8904 return 3;
8905 else if (cpu_unit_reservation_p (state, _0mfi_))
8906 return 2;
8907 else if (cpu_unit_reservation_p (state, _0mfb_))
8908 return 8;
8909 else if (cpu_unit_reservation_p (state, _0mlx_))
8910 return 9;
8911 else
8912 gcc_unreachable ();
8913 case 6:
8914 if (cpu_unit_reservation_p (state, _1mmi_))
8915 return 1;
8916 else if (cpu_unit_reservation_p (state, _1mii_))
8917 return 0;
8918 else if (cpu_unit_reservation_p (state, _1mmb_))
8919 return 7;
8920 else if (cpu_unit_reservation_p (state, _1mib_))
8921 return 6;
8922 else if (cpu_unit_reservation_p (state, _1mbb_))
8923 return 5;
8924 else if (cpu_unit_reservation_p (state, _1bbb_))
8925 return 4;
8926 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8927 return 3;
8928 else if (cpu_unit_reservation_p (state, _1mfi_))
8929 return 2;
8930 else if (cpu_unit_reservation_p (state, _1mfb_))
8931 return 8;
8932 else if (cpu_unit_reservation_p (state, _1mlx_))
8933 return 9;
8934 else
8935 gcc_unreachable ();
8936 default:
8937 gcc_unreachable ();
8938 }
8939 }
8940
8941 /* True when INSN is important for bundling. */
8942
8943 static bool
8944 important_for_bundling_p (rtx_insn *insn)
8945 {
8946 return (INSN_P (insn)
8947 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8948 && GET_CODE (PATTERN (insn)) != USE
8949 && GET_CODE (PATTERN (insn)) != CLOBBER);
8950 }
8951
8952 /* The following function returns an insn important for insn bundling
8953 followed by INSN and before TAIL. */
8954
8955 static rtx_insn *
8956 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
8957 {
8958 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8959 if (important_for_bundling_p (insn))
8960 return insn;
8961 return NULL;
8962 }
8963
8964 /* True when INSN is unknown, but important, for bundling. */
8965
8966 static bool
8967 unknown_for_bundling_p (rtx_insn *insn)
8968 {
8969 return (INSN_P (insn)
8970 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8971 && GET_CODE (PATTERN (insn)) != USE
8972 && GET_CODE (PATTERN (insn)) != CLOBBER);
8973 }
8974
8975 /* Add a bundle selector TEMPLATE0 before INSN. */
8976
8977 static void
8978 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
8979 {
8980 rtx b = gen_bundle_selector (GEN_INT (template0));
8981
8982 ia64_emit_insn_before (b, insn);
8983 #if NR_BUNDLES == 10
8984 if ((template0 == 4 || template0 == 5)
8985 && ia64_except_unwind_info (&global_options) == UI_TARGET)
8986 {
8987 int i;
8988 rtx note = NULL_RTX;
8989
8990 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8991 first or second slot. If it is and has REG_EH_NOTE set, copy it
8992 to following nops, as br.call sets rp to the address of following
8993 bundle and therefore an EH region end must be on a bundle
8994 boundary. */
8995 insn = PREV_INSN (insn);
8996 for (i = 0; i < 3; i++)
8997 {
8998 do
8999 insn = next_active_insn (insn);
9000 while (NONJUMP_INSN_P (insn)
9001 && get_attr_empty (insn) == EMPTY_YES);
9002 if (CALL_P (insn))
9003 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9004 else if (note)
9005 {
9006 int code;
9007
9008 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9009 || code == CODE_FOR_nop_b);
9010 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9011 note = NULL_RTX;
9012 else
9013 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9014 }
9015 }
9016 }
9017 #endif
9018 }
9019
9020 /* The following function does insn bundling. Bundling means
9021 inserting templates and nop insns to fit insn groups into permitted
9022 templates. Instruction scheduling uses NDFA (non-deterministic
9023 finite automata) encoding informations about the templates and the
9024 inserted nops. Nondeterminism of the automata permits follows
9025 all possible insn sequences very fast.
9026
9027 Unfortunately it is not possible to get information about inserting
9028 nop insns and used templates from the automata states. The
9029 automata only says that we can issue an insn possibly inserting
9030 some nops before it and using some template. Therefore insn
9031 bundling in this function is implemented by using DFA
9032 (deterministic finite automata). We follow all possible insn
9033 sequences by inserting 0-2 nops (that is what the NDFA describe for
9034 insn scheduling) before/after each insn being bundled. We know the
9035 start of simulated processor cycle from insn scheduling (insn
9036 starting a new cycle has TImode).
9037
9038 Simple implementation of insn bundling would create enormous
9039 number of possible insn sequences satisfying information about new
9040 cycle ticks taken from the insn scheduling. To make the algorithm
9041 practical we use dynamic programming. Each decision (about
9042 inserting nops and implicitly about previous decisions) is described
9043 by structure bundle_state (see above). If we generate the same
9044 bundle state (key is automaton state after issuing the insns and
9045 nops for it), we reuse already generated one. As consequence we
9046 reject some decisions which cannot improve the solution and
9047 reduce memory for the algorithm.
9048
9049 When we reach the end of EBB (extended basic block), we choose the
9050 best sequence and then, moving back in EBB, insert templates for
9051 the best alternative. The templates are taken from querying
9052 automaton state for each insn in chosen bundle states.
9053
9054 So the algorithm makes two (forward and backward) passes through
9055 EBB. */
9056
9057 static void
9058 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9059 {
9060 struct bundle_state *curr_state, *next_state, *best_state;
9061 rtx_insn *insn, *next_insn;
9062 int insn_num;
9063 int i, bundle_end_p, only_bundle_end_p, asm_p;
9064 int pos = 0, max_pos, template0, template1;
9065 rtx_insn *b;
9066 enum attr_type type;
9067
9068 insn_num = 0;
9069 /* Count insns in the EBB. */
9070 for (insn = NEXT_INSN (prev_head_insn);
9071 insn && insn != tail;
9072 insn = NEXT_INSN (insn))
9073 if (INSN_P (insn))
9074 insn_num++;
9075 if (insn_num == 0)
9076 return;
9077 bundling_p = 1;
9078 dfa_clean_insn_cache ();
9079 initiate_bundle_state_table ();
9080 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9081 /* First (forward) pass -- generation of bundle states. */
9082 curr_state = get_free_bundle_state ();
9083 curr_state->insn = NULL;
9084 curr_state->before_nops_num = 0;
9085 curr_state->after_nops_num = 0;
9086 curr_state->insn_num = 0;
9087 curr_state->cost = 0;
9088 curr_state->accumulated_insns_num = 0;
9089 curr_state->branch_deviation = 0;
9090 curr_state->middle_bundle_stops = 0;
9091 curr_state->next = NULL;
9092 curr_state->originator = NULL;
9093 state_reset (curr_state->dfa_state);
9094 index_to_bundle_states [0] = curr_state;
9095 insn_num = 0;
9096 /* Shift cycle mark if it is put on insn which could be ignored. */
9097 for (insn = NEXT_INSN (prev_head_insn);
9098 insn != tail;
9099 insn = NEXT_INSN (insn))
9100 if (INSN_P (insn)
9101 && !important_for_bundling_p (insn)
9102 && GET_MODE (insn) == TImode)
9103 {
9104 PUT_MODE (insn, VOIDmode);
9105 for (next_insn = NEXT_INSN (insn);
9106 next_insn != tail;
9107 next_insn = NEXT_INSN (next_insn))
9108 if (important_for_bundling_p (next_insn)
9109 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9110 {
9111 PUT_MODE (next_insn, TImode);
9112 break;
9113 }
9114 }
9115 /* Forward pass: generation of bundle states. */
9116 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9117 insn != NULL_RTX;
9118 insn = next_insn)
9119 {
9120 gcc_assert (important_for_bundling_p (insn));
9121 type = ia64_safe_type (insn);
9122 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9123 insn_num++;
9124 index_to_bundle_states [insn_num] = NULL;
9125 for (curr_state = index_to_bundle_states [insn_num - 1];
9126 curr_state != NULL;
9127 curr_state = next_state)
9128 {
9129 pos = curr_state->accumulated_insns_num % 3;
9130 next_state = curr_state->next;
9131 /* We must fill up the current bundle in order to start a
9132 subsequent asm insn in a new bundle. Asm insn is always
9133 placed in a separate bundle. */
9134 only_bundle_end_p
9135 = (next_insn != NULL_RTX
9136 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9137 && unknown_for_bundling_p (next_insn));
9138 /* We may fill up the current bundle if it is the cycle end
9139 without a group barrier. */
9140 bundle_end_p
9141 = (only_bundle_end_p || next_insn == NULL_RTX
9142 || (GET_MODE (next_insn) == TImode
9143 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9144 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9145 || type == TYPE_S)
9146 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9147 only_bundle_end_p);
9148 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9149 only_bundle_end_p);
9150 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9151 only_bundle_end_p);
9152 }
9153 gcc_assert (index_to_bundle_states [insn_num]);
9154 for (curr_state = index_to_bundle_states [insn_num];
9155 curr_state != NULL;
9156 curr_state = curr_state->next)
9157 if (verbose >= 2 && dump)
9158 {
9159 /* This structure is taken from generated code of the
9160 pipeline hazard recognizer (see file insn-attrtab.c).
9161 Please don't forget to change the structure if a new
9162 automaton is added to .md file. */
9163 struct DFA_chip
9164 {
9165 unsigned short one_automaton_state;
9166 unsigned short oneb_automaton_state;
9167 unsigned short two_automaton_state;
9168 unsigned short twob_automaton_state;
9169 };
9170
9171 fprintf
9172 (dump,
9173 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9174 curr_state->unique_num,
9175 (curr_state->originator == NULL
9176 ? -1 : curr_state->originator->unique_num),
9177 curr_state->cost,
9178 curr_state->before_nops_num, curr_state->after_nops_num,
9179 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9180 curr_state->middle_bundle_stops,
9181 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9182 INSN_UID (insn));
9183 }
9184 }
9185
9186 /* We should find a solution because the 2nd insn scheduling has
9187 found one. */
9188 gcc_assert (index_to_bundle_states [insn_num]);
9189 /* Find a state corresponding to the best insn sequence. */
9190 best_state = NULL;
9191 for (curr_state = index_to_bundle_states [insn_num];
9192 curr_state != NULL;
9193 curr_state = curr_state->next)
9194 /* We are just looking at the states with fully filled up last
9195 bundle. The first we prefer insn sequences with minimal cost
9196 then with minimal inserted nops and finally with branch insns
9197 placed in the 3rd slots. */
9198 if (curr_state->accumulated_insns_num % 3 == 0
9199 && (best_state == NULL || best_state->cost > curr_state->cost
9200 || (best_state->cost == curr_state->cost
9201 && (curr_state->accumulated_insns_num
9202 < best_state->accumulated_insns_num
9203 || (curr_state->accumulated_insns_num
9204 == best_state->accumulated_insns_num
9205 && (curr_state->branch_deviation
9206 < best_state->branch_deviation
9207 || (curr_state->branch_deviation
9208 == best_state->branch_deviation
9209 && curr_state->middle_bundle_stops
9210 < best_state->middle_bundle_stops)))))))
9211 best_state = curr_state;
9212 /* Second (backward) pass: adding nops and templates. */
9213 gcc_assert (best_state);
9214 insn_num = best_state->before_nops_num;
9215 template0 = template1 = -1;
9216 for (curr_state = best_state;
9217 curr_state->originator != NULL;
9218 curr_state = curr_state->originator)
9219 {
9220 insn = curr_state->insn;
9221 asm_p = unknown_for_bundling_p (insn);
9222 insn_num++;
9223 if (verbose >= 2 && dump)
9224 {
9225 struct DFA_chip
9226 {
9227 unsigned short one_automaton_state;
9228 unsigned short oneb_automaton_state;
9229 unsigned short two_automaton_state;
9230 unsigned short twob_automaton_state;
9231 };
9232
9233 fprintf
9234 (dump,
9235 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9236 curr_state->unique_num,
9237 (curr_state->originator == NULL
9238 ? -1 : curr_state->originator->unique_num),
9239 curr_state->cost,
9240 curr_state->before_nops_num, curr_state->after_nops_num,
9241 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9242 curr_state->middle_bundle_stops,
9243 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9244 INSN_UID (insn));
9245 }
9246 /* Find the position in the current bundle window. The window can
9247 contain at most two bundles. Two bundle window means that
9248 the processor will make two bundle rotation. */
9249 max_pos = get_max_pos (curr_state->dfa_state);
9250 if (max_pos == 6
9251 /* The following (negative template number) means that the
9252 processor did one bundle rotation. */
9253 || (max_pos == 3 && template0 < 0))
9254 {
9255 /* We are at the end of the window -- find template(s) for
9256 its bundle(s). */
9257 pos = max_pos;
9258 if (max_pos == 3)
9259 template0 = get_template (curr_state->dfa_state, 3);
9260 else
9261 {
9262 template1 = get_template (curr_state->dfa_state, 3);
9263 template0 = get_template (curr_state->dfa_state, 6);
9264 }
9265 }
9266 if (max_pos > 3 && template1 < 0)
9267 /* It may happen when we have the stop inside a bundle. */
9268 {
9269 gcc_assert (pos <= 3);
9270 template1 = get_template (curr_state->dfa_state, 3);
9271 pos += 3;
9272 }
9273 if (!asm_p)
9274 /* Emit nops after the current insn. */
9275 for (i = 0; i < curr_state->after_nops_num; i++)
9276 {
9277 rtx nop_pat = gen_nop ();
9278 rtx_insn *nop = emit_insn_after (nop_pat, insn);
9279 pos--;
9280 gcc_assert (pos >= 0);
9281 if (pos % 3 == 0)
9282 {
9283 /* We are at the start of a bundle: emit the template
9284 (it should be defined). */
9285 gcc_assert (template0 >= 0);
9286 ia64_add_bundle_selector_before (template0, nop);
9287 /* If we have two bundle window, we make one bundle
9288 rotation. Otherwise template0 will be undefined
9289 (negative value). */
9290 template0 = template1;
9291 template1 = -1;
9292 }
9293 }
9294 /* Move the position backward in the window. Group barrier has
9295 no slot. Asm insn takes all bundle. */
9296 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9297 && !unknown_for_bundling_p (insn))
9298 pos--;
9299 /* Long insn takes 2 slots. */
9300 if (ia64_safe_type (insn) == TYPE_L)
9301 pos--;
9302 gcc_assert (pos >= 0);
9303 if (pos % 3 == 0
9304 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9305 && !unknown_for_bundling_p (insn))
9306 {
9307 /* The current insn is at the bundle start: emit the
9308 template. */
9309 gcc_assert (template0 >= 0);
9310 ia64_add_bundle_selector_before (template0, insn);
9311 b = PREV_INSN (insn);
9312 insn = b;
9313 /* See comment above in analogous place for emitting nops
9314 after the insn. */
9315 template0 = template1;
9316 template1 = -1;
9317 }
9318 /* Emit nops after the current insn. */
9319 for (i = 0; i < curr_state->before_nops_num; i++)
9320 {
9321 rtx nop_pat = gen_nop ();
9322 ia64_emit_insn_before (nop_pat, insn);
9323 rtx_insn *nop = PREV_INSN (insn);
9324 insn = nop;
9325 pos--;
9326 gcc_assert (pos >= 0);
9327 if (pos % 3 == 0)
9328 {
9329 /* See comment above in analogous place for emitting nops
9330 after the insn. */
9331 gcc_assert (template0 >= 0);
9332 ia64_add_bundle_selector_before (template0, insn);
9333 b = PREV_INSN (insn);
9334 insn = b;
9335 template0 = template1;
9336 template1 = -1;
9337 }
9338 }
9339 }
9340
9341 #ifdef ENABLE_CHECKING
9342 {
9343 /* Assert right calculation of middle_bundle_stops. */
9344 int num = best_state->middle_bundle_stops;
9345 bool start_bundle = true, end_bundle = false;
9346
9347 for (insn = NEXT_INSN (prev_head_insn);
9348 insn && insn != tail;
9349 insn = NEXT_INSN (insn))
9350 {
9351 if (!INSN_P (insn))
9352 continue;
9353 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9354 start_bundle = true;
9355 else
9356 {
9357 rtx_insn *next_insn;
9358
9359 for (next_insn = NEXT_INSN (insn);
9360 next_insn && next_insn != tail;
9361 next_insn = NEXT_INSN (next_insn))
9362 if (INSN_P (next_insn)
9363 && (ia64_safe_itanium_class (next_insn)
9364 != ITANIUM_CLASS_IGNORE
9365 || recog_memoized (next_insn)
9366 == CODE_FOR_bundle_selector)
9367 && GET_CODE (PATTERN (next_insn)) != USE
9368 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9369 break;
9370
9371 end_bundle = next_insn == NULL_RTX
9372 || next_insn == tail
9373 || (INSN_P (next_insn)
9374 && recog_memoized (next_insn)
9375 == CODE_FOR_bundle_selector);
9376 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9377 && !start_bundle && !end_bundle
9378 && next_insn
9379 && !unknown_for_bundling_p (next_insn))
9380 num--;
9381
9382 start_bundle = false;
9383 }
9384 }
9385
9386 gcc_assert (num == 0);
9387 }
9388 #endif
9389
9390 free (index_to_bundle_states);
9391 finish_bundle_state_table ();
9392 bundling_p = 0;
9393 dfa_clean_insn_cache ();
9394 }
9395
9396 /* The following function is called at the end of scheduling BB or
9397 EBB. After reload, it inserts stop bits and does insn bundling. */
9398
9399 static void
9400 ia64_sched_finish (FILE *dump, int sched_verbose)
9401 {
9402 if (sched_verbose)
9403 fprintf (dump, "// Finishing schedule.\n");
9404 if (!reload_completed)
9405 return;
9406 if (reload_completed)
9407 {
9408 final_emit_insn_group_barriers (dump);
9409 bundling (dump, sched_verbose, current_sched_info->prev_head,
9410 current_sched_info->next_tail);
9411 if (sched_verbose && dump)
9412 fprintf (dump, "// finishing %d-%d\n",
9413 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9414 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9415
9416 return;
9417 }
9418 }
9419
9420 /* The following function inserts stop bits in scheduled BB or EBB. */
9421
9422 static void
9423 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9424 {
9425 rtx_insn *insn;
9426 int need_barrier_p = 0;
9427 int seen_good_insn = 0;
9428
9429 init_insn_group_barriers ();
9430
9431 for (insn = NEXT_INSN (current_sched_info->prev_head);
9432 insn != current_sched_info->next_tail;
9433 insn = NEXT_INSN (insn))
9434 {
9435 if (BARRIER_P (insn))
9436 {
9437 rtx_insn *last = prev_active_insn (insn);
9438
9439 if (! last)
9440 continue;
9441 if (JUMP_TABLE_DATA_P (last))
9442 last = prev_active_insn (last);
9443 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9444 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9445
9446 init_insn_group_barriers ();
9447 seen_good_insn = 0;
9448 need_barrier_p = 0;
9449 }
9450 else if (NONDEBUG_INSN_P (insn))
9451 {
9452 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9453 {
9454 init_insn_group_barriers ();
9455 seen_good_insn = 0;
9456 need_barrier_p = 0;
9457 }
9458 else if (need_barrier_p || group_barrier_needed (insn)
9459 || (mflag_sched_stop_bits_after_every_cycle
9460 && GET_MODE (insn) == TImode
9461 && seen_good_insn))
9462 {
9463 if (TARGET_EARLY_STOP_BITS)
9464 {
9465 rtx_insn *last;
9466
9467 for (last = insn;
9468 last != current_sched_info->prev_head;
9469 last = PREV_INSN (last))
9470 if (INSN_P (last) && GET_MODE (last) == TImode
9471 && stops_p [INSN_UID (last)])
9472 break;
9473 if (last == current_sched_info->prev_head)
9474 last = insn;
9475 last = prev_active_insn (last);
9476 if (last
9477 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9478 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9479 last);
9480 init_insn_group_barriers ();
9481 for (last = NEXT_INSN (last);
9482 last != insn;
9483 last = NEXT_INSN (last))
9484 if (INSN_P (last))
9485 {
9486 group_barrier_needed (last);
9487 if (recog_memoized (last) >= 0
9488 && important_for_bundling_p (last))
9489 seen_good_insn = 1;
9490 }
9491 }
9492 else
9493 {
9494 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9495 insn);
9496 init_insn_group_barriers ();
9497 seen_good_insn = 0;
9498 }
9499 group_barrier_needed (insn);
9500 if (recog_memoized (insn) >= 0
9501 && important_for_bundling_p (insn))
9502 seen_good_insn = 1;
9503 }
9504 else if (recog_memoized (insn) >= 0
9505 && important_for_bundling_p (insn))
9506 seen_good_insn = 1;
9507 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9508 }
9509 }
9510 }
9511
9512 \f
9513
9514 /* If the following function returns TRUE, we will use the DFA
9515 insn scheduler. */
9516
9517 static int
9518 ia64_first_cycle_multipass_dfa_lookahead (void)
9519 {
9520 return (reload_completed ? 6 : 4);
9521 }
9522
9523 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9524
9525 static void
9526 ia64_init_dfa_pre_cycle_insn (void)
9527 {
9528 if (temp_dfa_state == NULL)
9529 {
9530 dfa_state_size = state_size ();
9531 temp_dfa_state = xmalloc (dfa_state_size);
9532 prev_cycle_state = xmalloc (dfa_state_size);
9533 }
9534 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9535 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9536 recog_memoized (dfa_pre_cycle_insn);
9537 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9538 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9539 recog_memoized (dfa_stop_insn);
9540 }
9541
9542 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9543 used by the DFA insn scheduler. */
9544
9545 static rtx
9546 ia64_dfa_pre_cycle_insn (void)
9547 {
9548 return dfa_pre_cycle_insn;
9549 }
9550
9551 /* The following function returns TRUE if PRODUCER (of type ilog or
9552 ld) produces address for CONSUMER (of type st or stf). */
9553
9554 int
9555 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9556 {
9557 rtx dest, reg, mem;
9558
9559 gcc_assert (producer && consumer);
9560 dest = ia64_single_set (producer);
9561 gcc_assert (dest);
9562 reg = SET_DEST (dest);
9563 gcc_assert (reg);
9564 if (GET_CODE (reg) == SUBREG)
9565 reg = SUBREG_REG (reg);
9566 gcc_assert (GET_CODE (reg) == REG);
9567
9568 dest = ia64_single_set (consumer);
9569 gcc_assert (dest);
9570 mem = SET_DEST (dest);
9571 gcc_assert (mem && GET_CODE (mem) == MEM);
9572 return reg_mentioned_p (reg, mem);
9573 }
9574
9575 /* The following function returns TRUE if PRODUCER (of type ilog or
9576 ld) produces address for CONSUMER (of type ld or fld). */
9577
9578 int
9579 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9580 {
9581 rtx dest, src, reg, mem;
9582
9583 gcc_assert (producer && consumer);
9584 dest = ia64_single_set (producer);
9585 gcc_assert (dest);
9586 reg = SET_DEST (dest);
9587 gcc_assert (reg);
9588 if (GET_CODE (reg) == SUBREG)
9589 reg = SUBREG_REG (reg);
9590 gcc_assert (GET_CODE (reg) == REG);
9591
9592 src = ia64_single_set (consumer);
9593 gcc_assert (src);
9594 mem = SET_SRC (src);
9595 gcc_assert (mem);
9596
9597 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9598 mem = XVECEXP (mem, 0, 0);
9599 else if (GET_CODE (mem) == IF_THEN_ELSE)
9600 /* ??? Is this bypass necessary for ld.c? */
9601 {
9602 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9603 mem = XEXP (mem, 1);
9604 }
9605
9606 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9607 mem = XEXP (mem, 0);
9608
9609 if (GET_CODE (mem) == UNSPEC)
9610 {
9611 int c = XINT (mem, 1);
9612
9613 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9614 || c == UNSPEC_LDSA);
9615 mem = XVECEXP (mem, 0, 0);
9616 }
9617
9618 /* Note that LO_SUM is used for GOT loads. */
9619 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9620
9621 return reg_mentioned_p (reg, mem);
9622 }
9623
9624 /* The following function returns TRUE if INSN produces address for a
9625 load/store insn. We will place such insns into M slot because it
9626 decreases its latency time. */
9627
9628 int
9629 ia64_produce_address_p (rtx insn)
9630 {
9631 return insn->call;
9632 }
9633
9634 \f
9635 /* Emit pseudo-ops for the assembler to describe predicate relations.
9636 At present this assumes that we only consider predicate pairs to
9637 be mutex, and that the assembler can deduce proper values from
9638 straight-line code. */
9639
9640 static void
9641 emit_predicate_relation_info (void)
9642 {
9643 basic_block bb;
9644
9645 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9646 {
9647 int r;
9648 rtx_insn *head = BB_HEAD (bb);
9649
9650 /* We only need such notes at code labels. */
9651 if (! LABEL_P (head))
9652 continue;
9653 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9654 head = NEXT_INSN (head);
9655
9656 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9657 grabbing the entire block of predicate registers. */
9658 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9659 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9660 {
9661 rtx p = gen_rtx_REG (BImode, r);
9662 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9663 if (head == BB_END (bb))
9664 BB_END (bb) = n;
9665 head = n;
9666 }
9667 }
9668
9669 /* Look for conditional calls that do not return, and protect predicate
9670 relations around them. Otherwise the assembler will assume the call
9671 returns, and complain about uses of call-clobbered predicates after
9672 the call. */
9673 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9674 {
9675 rtx_insn *insn = BB_HEAD (bb);
9676
9677 while (1)
9678 {
9679 if (CALL_P (insn)
9680 && GET_CODE (PATTERN (insn)) == COND_EXEC
9681 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9682 {
9683 rtx_insn *b =
9684 emit_insn_before (gen_safe_across_calls_all (), insn);
9685 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9686 if (BB_HEAD (bb) == insn)
9687 BB_HEAD (bb) = b;
9688 if (BB_END (bb) == insn)
9689 BB_END (bb) = a;
9690 }
9691
9692 if (insn == BB_END (bb))
9693 break;
9694 insn = NEXT_INSN (insn);
9695 }
9696 }
9697 }
9698
9699 /* Perform machine dependent operations on the rtl chain INSNS. */
9700
9701 static void
9702 ia64_reorg (void)
9703 {
9704 /* We are freeing block_for_insn in the toplev to keep compatibility
9705 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9706 compute_bb_for_insn ();
9707
9708 /* If optimizing, we'll have split before scheduling. */
9709 if (optimize == 0)
9710 split_all_insns ();
9711
9712 if (optimize && flag_schedule_insns_after_reload
9713 && dbg_cnt (ia64_sched2))
9714 {
9715 basic_block bb;
9716 timevar_push (TV_SCHED2);
9717 ia64_final_schedule = 1;
9718
9719 /* We can't let modulo-sched prevent us from scheduling any bbs,
9720 since we need the final schedule to produce bundle information. */
9721 FOR_EACH_BB_FN (bb, cfun)
9722 bb->flags &= ~BB_DISABLE_SCHEDULE;
9723
9724 initiate_bundle_states ();
9725 ia64_nop = make_insn_raw (gen_nop ());
9726 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9727 recog_memoized (ia64_nop);
9728 clocks_length = get_max_uid () + 1;
9729 stops_p = XCNEWVEC (char, clocks_length);
9730
9731 if (ia64_tune == PROCESSOR_ITANIUM2)
9732 {
9733 pos_1 = get_cpu_unit_code ("2_1");
9734 pos_2 = get_cpu_unit_code ("2_2");
9735 pos_3 = get_cpu_unit_code ("2_3");
9736 pos_4 = get_cpu_unit_code ("2_4");
9737 pos_5 = get_cpu_unit_code ("2_5");
9738 pos_6 = get_cpu_unit_code ("2_6");
9739 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9740 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9741 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9742 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9743 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9744 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9745 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9746 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9747 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9748 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9749 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9750 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9751 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9752 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9753 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9754 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9755 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9756 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9757 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9758 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9759 }
9760 else
9761 {
9762 pos_1 = get_cpu_unit_code ("1_1");
9763 pos_2 = get_cpu_unit_code ("1_2");
9764 pos_3 = get_cpu_unit_code ("1_3");
9765 pos_4 = get_cpu_unit_code ("1_4");
9766 pos_5 = get_cpu_unit_code ("1_5");
9767 pos_6 = get_cpu_unit_code ("1_6");
9768 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9769 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9770 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9771 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9772 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9773 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9774 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9775 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9776 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9777 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9778 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9779 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9780 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9781 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9782 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9783 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9784 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9785 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9786 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9787 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9788 }
9789
9790 if (flag_selective_scheduling2
9791 && !maybe_skip_selective_scheduling ())
9792 run_selective_scheduling ();
9793 else
9794 schedule_ebbs ();
9795
9796 /* Redo alignment computation, as it might gone wrong. */
9797 compute_alignments ();
9798
9799 /* We cannot reuse this one because it has been corrupted by the
9800 evil glat. */
9801 finish_bundle_states ();
9802 free (stops_p);
9803 stops_p = NULL;
9804 emit_insn_group_barriers (dump_file);
9805
9806 ia64_final_schedule = 0;
9807 timevar_pop (TV_SCHED2);
9808 }
9809 else
9810 emit_all_insn_group_barriers (dump_file);
9811
9812 df_analyze ();
9813
9814 /* A call must not be the last instruction in a function, so that the
9815 return address is still within the function, so that unwinding works
9816 properly. Note that IA-64 differs from dwarf2 on this point. */
9817 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9818 {
9819 rtx_insn *insn;
9820 int saw_stop = 0;
9821
9822 insn = get_last_insn ();
9823 if (! INSN_P (insn))
9824 insn = prev_active_insn (insn);
9825 if (insn)
9826 {
9827 /* Skip over insns that expand to nothing. */
9828 while (NONJUMP_INSN_P (insn)
9829 && get_attr_empty (insn) == EMPTY_YES)
9830 {
9831 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9832 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9833 saw_stop = 1;
9834 insn = prev_active_insn (insn);
9835 }
9836 if (CALL_P (insn))
9837 {
9838 if (! saw_stop)
9839 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9840 emit_insn (gen_break_f ());
9841 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9842 }
9843 }
9844 }
9845
9846 emit_predicate_relation_info ();
9847
9848 if (flag_var_tracking)
9849 {
9850 timevar_push (TV_VAR_TRACKING);
9851 variable_tracking_main ();
9852 timevar_pop (TV_VAR_TRACKING);
9853 }
9854 df_finish_pass (false);
9855 }
9856 \f
9857 /* Return true if REGNO is used by the epilogue. */
9858
9859 int
9860 ia64_epilogue_uses (int regno)
9861 {
9862 switch (regno)
9863 {
9864 case R_GR (1):
9865 /* With a call to a function in another module, we will write a new
9866 value to "gp". After returning from such a call, we need to make
9867 sure the function restores the original gp-value, even if the
9868 function itself does not use the gp anymore. */
9869 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9870
9871 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9872 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9873 /* For functions defined with the syscall_linkage attribute, all
9874 input registers are marked as live at all function exits. This
9875 prevents the register allocator from using the input registers,
9876 which in turn makes it possible to restart a system call after
9877 an interrupt without having to save/restore the input registers.
9878 This also prevents kernel data from leaking to application code. */
9879 return lookup_attribute ("syscall_linkage",
9880 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9881
9882 case R_BR (0):
9883 /* Conditional return patterns can't represent the use of `b0' as
9884 the return address, so we force the value live this way. */
9885 return 1;
9886
9887 case AR_PFS_REGNUM:
9888 /* Likewise for ar.pfs, which is used by br.ret. */
9889 return 1;
9890
9891 default:
9892 return 0;
9893 }
9894 }
9895
9896 /* Return true if REGNO is used by the frame unwinder. */
9897
9898 int
9899 ia64_eh_uses (int regno)
9900 {
9901 unsigned int r;
9902
9903 if (! reload_completed)
9904 return 0;
9905
9906 if (regno == 0)
9907 return 0;
9908
9909 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9910 if (regno == current_frame_info.r[r]
9911 || regno == emitted_frame_related_regs[r])
9912 return 1;
9913
9914 return 0;
9915 }
9916 \f
9917 /* Return true if this goes in small data/bss. */
9918
9919 /* ??? We could also support own long data here. Generating movl/add/ld8
9920 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9921 code faster because there is one less load. This also includes incomplete
9922 types which can't go in sdata/sbss. */
9923
9924 static bool
9925 ia64_in_small_data_p (const_tree exp)
9926 {
9927 if (TARGET_NO_SDATA)
9928 return false;
9929
9930 /* We want to merge strings, so we never consider them small data. */
9931 if (TREE_CODE (exp) == STRING_CST)
9932 return false;
9933
9934 /* Functions are never small data. */
9935 if (TREE_CODE (exp) == FUNCTION_DECL)
9936 return false;
9937
9938 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9939 {
9940 const char *section = DECL_SECTION_NAME (exp);
9941
9942 if (strcmp (section, ".sdata") == 0
9943 || strncmp (section, ".sdata.", 7) == 0
9944 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9945 || strcmp (section, ".sbss") == 0
9946 || strncmp (section, ".sbss.", 6) == 0
9947 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9948 return true;
9949 }
9950 else
9951 {
9952 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9953
9954 /* If this is an incomplete type with size 0, then we can't put it
9955 in sdata because it might be too big when completed. */
9956 if (size > 0 && size <= ia64_section_threshold)
9957 return true;
9958 }
9959
9960 return false;
9961 }
9962 \f
9963 /* Output assembly directives for prologue regions. */
9964
9965 /* The current basic block number. */
9966
9967 static bool last_block;
9968
9969 /* True if we need a copy_state command at the start of the next block. */
9970
9971 static bool need_copy_state;
9972
9973 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9974 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9975 #endif
9976
9977 /* The function emits unwind directives for the start of an epilogue. */
9978
9979 static void
9980 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9981 bool unwind, bool frame ATTRIBUTE_UNUSED)
9982 {
9983 /* If this isn't the last block of the function, then we need to label the
9984 current state, and copy it back in at the start of the next block. */
9985
9986 if (!last_block)
9987 {
9988 if (unwind)
9989 fprintf (asm_out_file, "\t.label_state %d\n",
9990 ++cfun->machine->state_num);
9991 need_copy_state = true;
9992 }
9993
9994 if (unwind)
9995 fprintf (asm_out_file, "\t.restore sp\n");
9996 }
9997
9998 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9999
10000 static void
10001 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10002 bool unwind, bool frame)
10003 {
10004 rtx dest = SET_DEST (pat);
10005 rtx src = SET_SRC (pat);
10006
10007 if (dest == stack_pointer_rtx)
10008 {
10009 if (GET_CODE (src) == PLUS)
10010 {
10011 rtx op0 = XEXP (src, 0);
10012 rtx op1 = XEXP (src, 1);
10013
10014 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10015
10016 if (INTVAL (op1) < 0)
10017 {
10018 gcc_assert (!frame_pointer_needed);
10019 if (unwind)
10020 fprintf (asm_out_file,
10021 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
10022 -INTVAL (op1));
10023 }
10024 else
10025 process_epilogue (asm_out_file, insn, unwind, frame);
10026 }
10027 else
10028 {
10029 gcc_assert (src == hard_frame_pointer_rtx);
10030 process_epilogue (asm_out_file, insn, unwind, frame);
10031 }
10032 }
10033 else if (dest == hard_frame_pointer_rtx)
10034 {
10035 gcc_assert (src == stack_pointer_rtx);
10036 gcc_assert (frame_pointer_needed);
10037
10038 if (unwind)
10039 fprintf (asm_out_file, "\t.vframe r%d\n",
10040 ia64_dbx_register_number (REGNO (dest)));
10041 }
10042 else
10043 gcc_unreachable ();
10044 }
10045
10046 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10047
10048 static void
10049 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10050 {
10051 rtx dest = SET_DEST (pat);
10052 rtx src = SET_SRC (pat);
10053 int dest_regno = REGNO (dest);
10054 int src_regno;
10055
10056 if (src == pc_rtx)
10057 {
10058 /* Saving return address pointer. */
10059 if (unwind)
10060 fprintf (asm_out_file, "\t.save rp, r%d\n",
10061 ia64_dbx_register_number (dest_regno));
10062 return;
10063 }
10064
10065 src_regno = REGNO (src);
10066
10067 switch (src_regno)
10068 {
10069 case PR_REG (0):
10070 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10071 if (unwind)
10072 fprintf (asm_out_file, "\t.save pr, r%d\n",
10073 ia64_dbx_register_number (dest_regno));
10074 break;
10075
10076 case AR_UNAT_REGNUM:
10077 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10078 if (unwind)
10079 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10080 ia64_dbx_register_number (dest_regno));
10081 break;
10082
10083 case AR_LC_REGNUM:
10084 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10085 if (unwind)
10086 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10087 ia64_dbx_register_number (dest_regno));
10088 break;
10089
10090 default:
10091 /* Everything else should indicate being stored to memory. */
10092 gcc_unreachable ();
10093 }
10094 }
10095
10096 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10097
10098 static void
10099 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10100 {
10101 rtx dest = SET_DEST (pat);
10102 rtx src = SET_SRC (pat);
10103 int src_regno = REGNO (src);
10104 const char *saveop;
10105 HOST_WIDE_INT off;
10106 rtx base;
10107
10108 gcc_assert (MEM_P (dest));
10109 if (GET_CODE (XEXP (dest, 0)) == REG)
10110 {
10111 base = XEXP (dest, 0);
10112 off = 0;
10113 }
10114 else
10115 {
10116 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10117 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10118 base = XEXP (XEXP (dest, 0), 0);
10119 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10120 }
10121
10122 if (base == hard_frame_pointer_rtx)
10123 {
10124 saveop = ".savepsp";
10125 off = - off;
10126 }
10127 else
10128 {
10129 gcc_assert (base == stack_pointer_rtx);
10130 saveop = ".savesp";
10131 }
10132
10133 src_regno = REGNO (src);
10134 switch (src_regno)
10135 {
10136 case BR_REG (0):
10137 gcc_assert (!current_frame_info.r[reg_save_b0]);
10138 if (unwind)
10139 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10140 saveop, off);
10141 break;
10142
10143 case PR_REG (0):
10144 gcc_assert (!current_frame_info.r[reg_save_pr]);
10145 if (unwind)
10146 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10147 saveop, off);
10148 break;
10149
10150 case AR_LC_REGNUM:
10151 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10152 if (unwind)
10153 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10154 saveop, off);
10155 break;
10156
10157 case AR_PFS_REGNUM:
10158 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10159 if (unwind)
10160 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10161 saveop, off);
10162 break;
10163
10164 case AR_UNAT_REGNUM:
10165 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10166 if (unwind)
10167 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10168 saveop, off);
10169 break;
10170
10171 case GR_REG (4):
10172 case GR_REG (5):
10173 case GR_REG (6):
10174 case GR_REG (7):
10175 if (unwind)
10176 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10177 1 << (src_regno - GR_REG (4)));
10178 break;
10179
10180 case BR_REG (1):
10181 case BR_REG (2):
10182 case BR_REG (3):
10183 case BR_REG (4):
10184 case BR_REG (5):
10185 if (unwind)
10186 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10187 1 << (src_regno - BR_REG (1)));
10188 break;
10189
10190 case FR_REG (2):
10191 case FR_REG (3):
10192 case FR_REG (4):
10193 case FR_REG (5):
10194 if (unwind)
10195 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10196 1 << (src_regno - FR_REG (2)));
10197 break;
10198
10199 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10200 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10201 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10202 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10203 if (unwind)
10204 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10205 1 << (src_regno - FR_REG (12)));
10206 break;
10207
10208 default:
10209 /* ??? For some reason we mark other general registers, even those
10210 we can't represent in the unwind info. Ignore them. */
10211 break;
10212 }
10213 }
10214
10215 /* This function looks at a single insn and emits any directives
10216 required to unwind this insn. */
10217
10218 static void
10219 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10220 {
10221 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10222 bool frame = dwarf2out_do_frame ();
10223 rtx note, pat;
10224 bool handled_one;
10225
10226 if (!unwind && !frame)
10227 return;
10228
10229 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10230 {
10231 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10232 == EXIT_BLOCK_PTR_FOR_FN (cfun);
10233
10234 /* Restore unwind state from immediately before the epilogue. */
10235 if (need_copy_state)
10236 {
10237 if (unwind)
10238 {
10239 fprintf (asm_out_file, "\t.body\n");
10240 fprintf (asm_out_file, "\t.copy_state %d\n",
10241 cfun->machine->state_num);
10242 }
10243 need_copy_state = false;
10244 }
10245 }
10246
10247 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10248 return;
10249
10250 /* Look for the ALLOC insn. */
10251 if (INSN_CODE (insn) == CODE_FOR_alloc)
10252 {
10253 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10254 int dest_regno = REGNO (dest);
10255
10256 /* If this is the final destination for ar.pfs, then this must
10257 be the alloc in the prologue. */
10258 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10259 {
10260 if (unwind)
10261 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10262 ia64_dbx_register_number (dest_regno));
10263 }
10264 else
10265 {
10266 /* This must be an alloc before a sibcall. We must drop the
10267 old frame info. The easiest way to drop the old frame
10268 info is to ensure we had a ".restore sp" directive
10269 followed by a new prologue. If the procedure doesn't
10270 have a memory-stack frame, we'll issue a dummy ".restore
10271 sp" now. */
10272 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10273 /* if haven't done process_epilogue() yet, do it now */
10274 process_epilogue (asm_out_file, insn, unwind, frame);
10275 if (unwind)
10276 fprintf (asm_out_file, "\t.prologue\n");
10277 }
10278 return;
10279 }
10280
10281 handled_one = false;
10282 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10283 switch (REG_NOTE_KIND (note))
10284 {
10285 case REG_CFA_ADJUST_CFA:
10286 pat = XEXP (note, 0);
10287 if (pat == NULL)
10288 pat = PATTERN (insn);
10289 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10290 handled_one = true;
10291 break;
10292
10293 case REG_CFA_OFFSET:
10294 pat = XEXP (note, 0);
10295 if (pat == NULL)
10296 pat = PATTERN (insn);
10297 process_cfa_offset (asm_out_file, pat, unwind);
10298 handled_one = true;
10299 break;
10300
10301 case REG_CFA_REGISTER:
10302 pat = XEXP (note, 0);
10303 if (pat == NULL)
10304 pat = PATTERN (insn);
10305 process_cfa_register (asm_out_file, pat, unwind);
10306 handled_one = true;
10307 break;
10308
10309 case REG_FRAME_RELATED_EXPR:
10310 case REG_CFA_DEF_CFA:
10311 case REG_CFA_EXPRESSION:
10312 case REG_CFA_RESTORE:
10313 case REG_CFA_SET_VDRAP:
10314 /* Not used in the ia64 port. */
10315 gcc_unreachable ();
10316
10317 default:
10318 /* Not a frame-related note. */
10319 break;
10320 }
10321
10322 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10323 explicit action to take. No guessing required. */
10324 gcc_assert (handled_one);
10325 }
10326
10327 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10328
10329 static void
10330 ia64_asm_emit_except_personality (rtx personality)
10331 {
10332 fputs ("\t.personality\t", asm_out_file);
10333 output_addr_const (asm_out_file, personality);
10334 fputc ('\n', asm_out_file);
10335 }
10336
10337 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10338
10339 static void
10340 ia64_asm_init_sections (void)
10341 {
10342 exception_section = get_unnamed_section (0, output_section_asm_op,
10343 "\t.handlerdata");
10344 }
10345
10346 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10347
10348 static enum unwind_info_type
10349 ia64_debug_unwind_info (void)
10350 {
10351 return UI_TARGET;
10352 }
10353 \f
10354 enum ia64_builtins
10355 {
10356 IA64_BUILTIN_BSP,
10357 IA64_BUILTIN_COPYSIGNQ,
10358 IA64_BUILTIN_FABSQ,
10359 IA64_BUILTIN_FLUSHRS,
10360 IA64_BUILTIN_INFQ,
10361 IA64_BUILTIN_HUGE_VALQ,
10362 IA64_BUILTIN_max
10363 };
10364
10365 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10366
10367 void
10368 ia64_init_builtins (void)
10369 {
10370 tree fpreg_type;
10371 tree float80_type;
10372 tree decl;
10373
10374 /* The __fpreg type. */
10375 fpreg_type = make_node (REAL_TYPE);
10376 TYPE_PRECISION (fpreg_type) = 82;
10377 layout_type (fpreg_type);
10378 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10379
10380 /* The __float80 type. */
10381 float80_type = make_node (REAL_TYPE);
10382 TYPE_PRECISION (float80_type) = 80;
10383 layout_type (float80_type);
10384 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10385
10386 /* The __float128 type. */
10387 if (!TARGET_HPUX)
10388 {
10389 tree ftype;
10390 tree float128_type = make_node (REAL_TYPE);
10391
10392 TYPE_PRECISION (float128_type) = 128;
10393 layout_type (float128_type);
10394 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10395
10396 /* TFmode support builtins. */
10397 ftype = build_function_type_list (float128_type, NULL_TREE);
10398 decl = add_builtin_function ("__builtin_infq", ftype,
10399 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10400 NULL, NULL_TREE);
10401 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10402
10403 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10404 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10405 NULL, NULL_TREE);
10406 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10407
10408 ftype = build_function_type_list (float128_type,
10409 float128_type,
10410 NULL_TREE);
10411 decl = add_builtin_function ("__builtin_fabsq", ftype,
10412 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10413 "__fabstf2", NULL_TREE);
10414 TREE_READONLY (decl) = 1;
10415 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10416
10417 ftype = build_function_type_list (float128_type,
10418 float128_type,
10419 float128_type,
10420 NULL_TREE);
10421 decl = add_builtin_function ("__builtin_copysignq", ftype,
10422 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10423 "__copysigntf3", NULL_TREE);
10424 TREE_READONLY (decl) = 1;
10425 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10426 }
10427 else
10428 /* Under HPUX, this is a synonym for "long double". */
10429 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10430 "__float128");
10431
10432 /* Fwrite on VMS is non-standard. */
10433 #if TARGET_ABI_OPEN_VMS
10434 vms_patch_builtins ();
10435 #endif
10436
10437 #define def_builtin(name, type, code) \
10438 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10439 NULL, NULL_TREE)
10440
10441 decl = def_builtin ("__builtin_ia64_bsp",
10442 build_function_type_list (ptr_type_node, NULL_TREE),
10443 IA64_BUILTIN_BSP);
10444 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10445
10446 decl = def_builtin ("__builtin_ia64_flushrs",
10447 build_function_type_list (void_type_node, NULL_TREE),
10448 IA64_BUILTIN_FLUSHRS);
10449 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10450
10451 #undef def_builtin
10452
10453 if (TARGET_HPUX)
10454 {
10455 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10456 set_user_assembler_name (decl, "_Isfinite");
10457 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10458 set_user_assembler_name (decl, "_Isfinitef");
10459 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10460 set_user_assembler_name (decl, "_Isfinitef128");
10461 }
10462 }
10463
10464 rtx
10465 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10466 machine_mode mode ATTRIBUTE_UNUSED,
10467 int ignore ATTRIBUTE_UNUSED)
10468 {
10469 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10470 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10471
10472 switch (fcode)
10473 {
10474 case IA64_BUILTIN_BSP:
10475 if (! target || ! register_operand (target, DImode))
10476 target = gen_reg_rtx (DImode);
10477 emit_insn (gen_bsp_value (target));
10478 #ifdef POINTERS_EXTEND_UNSIGNED
10479 target = convert_memory_address (ptr_mode, target);
10480 #endif
10481 return target;
10482
10483 case IA64_BUILTIN_FLUSHRS:
10484 emit_insn (gen_flushrs ());
10485 return const0_rtx;
10486
10487 case IA64_BUILTIN_INFQ:
10488 case IA64_BUILTIN_HUGE_VALQ:
10489 {
10490 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10491 REAL_VALUE_TYPE inf;
10492 rtx tmp;
10493
10494 real_inf (&inf);
10495 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10496
10497 tmp = validize_mem (force_const_mem (target_mode, tmp));
10498
10499 if (target == 0)
10500 target = gen_reg_rtx (target_mode);
10501
10502 emit_move_insn (target, tmp);
10503 return target;
10504 }
10505
10506 case IA64_BUILTIN_FABSQ:
10507 case IA64_BUILTIN_COPYSIGNQ:
10508 return expand_call (exp, target, ignore);
10509
10510 default:
10511 gcc_unreachable ();
10512 }
10513
10514 return NULL_RTX;
10515 }
10516
10517 /* Return the ia64 builtin for CODE. */
10518
10519 static tree
10520 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10521 {
10522 if (code >= IA64_BUILTIN_max)
10523 return error_mark_node;
10524
10525 return ia64_builtins[code];
10526 }
10527
10528 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10529 most significant bits of the stack slot. */
10530
10531 enum direction
10532 ia64_hpux_function_arg_padding (machine_mode mode, const_tree type)
10533 {
10534 /* Exception to normal case for structures/unions/etc. */
10535
10536 if (type && AGGREGATE_TYPE_P (type)
10537 && int_size_in_bytes (type) < UNITS_PER_WORD)
10538 return upward;
10539
10540 /* Fall back to the default. */
10541 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10542 }
10543
10544 /* Emit text to declare externally defined variables and functions, because
10545 the Intel assembler does not support undefined externals. */
10546
10547 void
10548 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10549 {
10550 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10551 set in order to avoid putting out names that are never really
10552 used. */
10553 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10554 {
10555 /* maybe_assemble_visibility will return 1 if the assembler
10556 visibility directive is output. */
10557 int need_visibility = ((*targetm.binds_local_p) (decl)
10558 && maybe_assemble_visibility (decl));
10559
10560 /* GNU as does not need anything here, but the HP linker does
10561 need something for external functions. */
10562 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10563 && TREE_CODE (decl) == FUNCTION_DECL)
10564 (*targetm.asm_out.globalize_decl_name) (file, decl);
10565 else if (need_visibility && !TARGET_GNU_AS)
10566 (*targetm.asm_out.globalize_label) (file, name);
10567 }
10568 }
10569
10570 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10571 modes of word_mode and larger. Rename the TFmode libfuncs using the
10572 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10573 backward compatibility. */
10574
10575 static void
10576 ia64_init_libfuncs (void)
10577 {
10578 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10579 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10580 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10581 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10582
10583 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10584 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10585 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10586 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10587 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10588
10589 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10590 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10591 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10592 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10593 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10594 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10595
10596 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10597 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10598 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10599 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10600 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10601
10602 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10603 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10604 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10605 /* HP-UX 11.23 libc does not have a function for unsigned
10606 SImode-to-TFmode conversion. */
10607 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10608 }
10609
10610 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10611
10612 static void
10613 ia64_hpux_init_libfuncs (void)
10614 {
10615 ia64_init_libfuncs ();
10616
10617 /* The HP SI millicode division and mod functions expect DI arguments.
10618 By turning them off completely we avoid using both libgcc and the
10619 non-standard millicode routines and use the HP DI millicode routines
10620 instead. */
10621
10622 set_optab_libfunc (sdiv_optab, SImode, 0);
10623 set_optab_libfunc (udiv_optab, SImode, 0);
10624 set_optab_libfunc (smod_optab, SImode, 0);
10625 set_optab_libfunc (umod_optab, SImode, 0);
10626
10627 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10628 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10629 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10630 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10631
10632 /* HP-UX libc has TF min/max/abs routines in it. */
10633 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10634 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10635 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10636
10637 /* ia64_expand_compare uses this. */
10638 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10639
10640 /* These should never be used. */
10641 set_optab_libfunc (eq_optab, TFmode, 0);
10642 set_optab_libfunc (ne_optab, TFmode, 0);
10643 set_optab_libfunc (gt_optab, TFmode, 0);
10644 set_optab_libfunc (ge_optab, TFmode, 0);
10645 set_optab_libfunc (lt_optab, TFmode, 0);
10646 set_optab_libfunc (le_optab, TFmode, 0);
10647 }
10648
10649 /* Rename the division and modulus functions in VMS. */
10650
10651 static void
10652 ia64_vms_init_libfuncs (void)
10653 {
10654 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10655 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10656 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10657 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10658 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10659 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10660 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10661 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10662 abort_libfunc = init_one_libfunc ("decc$abort");
10663 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10664 #ifdef MEM_LIBFUNCS_INIT
10665 MEM_LIBFUNCS_INIT;
10666 #endif
10667 }
10668
10669 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10670 the HPUX conventions. */
10671
10672 static void
10673 ia64_sysv4_init_libfuncs (void)
10674 {
10675 ia64_init_libfuncs ();
10676
10677 /* These functions are not part of the HPUX TFmode interface. We
10678 use them instead of _U_Qfcmp, which doesn't work the way we
10679 expect. */
10680 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10681 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10682 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10683 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10684 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10685 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10686
10687 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10688 glibc doesn't have them. */
10689 }
10690
10691 /* Use soft-fp. */
10692
10693 static void
10694 ia64_soft_fp_init_libfuncs (void)
10695 {
10696 }
10697
10698 static bool
10699 ia64_vms_valid_pointer_mode (machine_mode mode)
10700 {
10701 return (mode == SImode || mode == DImode);
10702 }
10703 \f
10704 /* For HPUX, it is illegal to have relocations in shared segments. */
10705
10706 static int
10707 ia64_hpux_reloc_rw_mask (void)
10708 {
10709 return 3;
10710 }
10711
10712 /* For others, relax this so that relocations to local data goes in
10713 read-only segments, but we still cannot allow global relocations
10714 in read-only segments. */
10715
10716 static int
10717 ia64_reloc_rw_mask (void)
10718 {
10719 return flag_pic ? 3 : 2;
10720 }
10721
10722 /* Return the section to use for X. The only special thing we do here
10723 is to honor small data. */
10724
10725 static section *
10726 ia64_select_rtx_section (machine_mode mode, rtx x,
10727 unsigned HOST_WIDE_INT align)
10728 {
10729 if (GET_MODE_SIZE (mode) > 0
10730 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10731 && !TARGET_NO_SDATA)
10732 return sdata_section;
10733 else
10734 return default_elf_select_rtx_section (mode, x, align);
10735 }
10736
10737 static unsigned int
10738 ia64_section_type_flags (tree decl, const char *name, int reloc)
10739 {
10740 unsigned int flags = 0;
10741
10742 if (strcmp (name, ".sdata") == 0
10743 || strncmp (name, ".sdata.", 7) == 0
10744 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10745 || strncmp (name, ".sdata2.", 8) == 0
10746 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10747 || strcmp (name, ".sbss") == 0
10748 || strncmp (name, ".sbss.", 6) == 0
10749 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10750 flags = SECTION_SMALL;
10751
10752 flags |= default_section_type_flags (decl, name, reloc);
10753 return flags;
10754 }
10755
10756 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10757 structure type and that the address of that type should be passed
10758 in out0, rather than in r8. */
10759
10760 static bool
10761 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10762 {
10763 tree ret_type = TREE_TYPE (fntype);
10764
10765 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10766 as the structure return address parameter, if the return value
10767 type has a non-trivial copy constructor or destructor. It is not
10768 clear if this same convention should be used for other
10769 programming languages. Until G++ 3.4, we incorrectly used r8 for
10770 these return values. */
10771 return (abi_version_at_least (2)
10772 && ret_type
10773 && TYPE_MODE (ret_type) == BLKmode
10774 && TREE_ADDRESSABLE (ret_type)
10775 && lang_GNU_CXX ());
10776 }
10777
10778 /* Output the assembler code for a thunk function. THUNK_DECL is the
10779 declaration for the thunk function itself, FUNCTION is the decl for
10780 the target function. DELTA is an immediate constant offset to be
10781 added to THIS. If VCALL_OFFSET is nonzero, the word at
10782 *(*this + vcall_offset) should be added to THIS. */
10783
10784 static void
10785 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10786 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10787 tree function)
10788 {
10789 rtx this_rtx, funexp;
10790 rtx_insn *insn;
10791 unsigned int this_parmno;
10792 unsigned int this_regno;
10793 rtx delta_rtx;
10794
10795 reload_completed = 1;
10796 epilogue_completed = 1;
10797
10798 /* Set things up as ia64_expand_prologue might. */
10799 last_scratch_gr_reg = 15;
10800
10801 memset (&current_frame_info, 0, sizeof (current_frame_info));
10802 current_frame_info.spill_cfa_off = -16;
10803 current_frame_info.n_input_regs = 1;
10804 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10805
10806 /* Mark the end of the (empty) prologue. */
10807 emit_note (NOTE_INSN_PROLOGUE_END);
10808
10809 /* Figure out whether "this" will be the first parameter (the
10810 typical case) or the second parameter (as happens when the
10811 virtual function returns certain class objects). */
10812 this_parmno
10813 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10814 ? 1 : 0);
10815 this_regno = IN_REG (this_parmno);
10816 if (!TARGET_REG_NAMES)
10817 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10818
10819 this_rtx = gen_rtx_REG (Pmode, this_regno);
10820
10821 /* Apply the constant offset, if required. */
10822 delta_rtx = GEN_INT (delta);
10823 if (TARGET_ILP32)
10824 {
10825 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10826 REG_POINTER (tmp) = 1;
10827 if (delta && satisfies_constraint_I (delta_rtx))
10828 {
10829 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10830 delta = 0;
10831 }
10832 else
10833 emit_insn (gen_ptr_extend (this_rtx, tmp));
10834 }
10835 if (delta)
10836 {
10837 if (!satisfies_constraint_I (delta_rtx))
10838 {
10839 rtx tmp = gen_rtx_REG (Pmode, 2);
10840 emit_move_insn (tmp, delta_rtx);
10841 delta_rtx = tmp;
10842 }
10843 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10844 }
10845
10846 /* Apply the offset from the vtable, if required. */
10847 if (vcall_offset)
10848 {
10849 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10850 rtx tmp = gen_rtx_REG (Pmode, 2);
10851
10852 if (TARGET_ILP32)
10853 {
10854 rtx t = gen_rtx_REG (ptr_mode, 2);
10855 REG_POINTER (t) = 1;
10856 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10857 if (satisfies_constraint_I (vcall_offset_rtx))
10858 {
10859 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10860 vcall_offset = 0;
10861 }
10862 else
10863 emit_insn (gen_ptr_extend (tmp, t));
10864 }
10865 else
10866 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10867
10868 if (vcall_offset)
10869 {
10870 if (!satisfies_constraint_J (vcall_offset_rtx))
10871 {
10872 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10873 emit_move_insn (tmp2, vcall_offset_rtx);
10874 vcall_offset_rtx = tmp2;
10875 }
10876 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10877 }
10878
10879 if (TARGET_ILP32)
10880 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10881 else
10882 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10883
10884 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10885 }
10886
10887 /* Generate a tail call to the target function. */
10888 if (! TREE_USED (function))
10889 {
10890 assemble_external (function);
10891 TREE_USED (function) = 1;
10892 }
10893 funexp = XEXP (DECL_RTL (function), 0);
10894 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10895 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10896 insn = get_last_insn ();
10897 SIBLING_CALL_P (insn) = 1;
10898
10899 /* Code generation for calls relies on splitting. */
10900 reload_completed = 1;
10901 epilogue_completed = 1;
10902 try_split (PATTERN (insn), insn, 0);
10903
10904 emit_barrier ();
10905
10906 /* Run just enough of rest_of_compilation to get the insns emitted.
10907 There's not really enough bulk here to make other passes such as
10908 instruction scheduling worth while. Note that use_thunk calls
10909 assemble_start_function and assemble_end_function. */
10910
10911 emit_all_insn_group_barriers (NULL);
10912 insn = get_insns ();
10913 shorten_branches (insn);
10914 final_start_function (insn, file, 1);
10915 final (insn, file, 1);
10916 final_end_function ();
10917
10918 reload_completed = 0;
10919 epilogue_completed = 0;
10920 }
10921
10922 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10923
10924 static rtx
10925 ia64_struct_value_rtx (tree fntype,
10926 int incoming ATTRIBUTE_UNUSED)
10927 {
10928 if (TARGET_ABI_OPEN_VMS ||
10929 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10930 return NULL_RTX;
10931 return gen_rtx_REG (Pmode, GR_REG (8));
10932 }
10933
10934 static bool
10935 ia64_scalar_mode_supported_p (machine_mode mode)
10936 {
10937 switch (mode)
10938 {
10939 case QImode:
10940 case HImode:
10941 case SImode:
10942 case DImode:
10943 case TImode:
10944 return true;
10945
10946 case SFmode:
10947 case DFmode:
10948 case XFmode:
10949 case RFmode:
10950 return true;
10951
10952 case TFmode:
10953 return true;
10954
10955 default:
10956 return false;
10957 }
10958 }
10959
10960 static bool
10961 ia64_vector_mode_supported_p (machine_mode mode)
10962 {
10963 switch (mode)
10964 {
10965 case V8QImode:
10966 case V4HImode:
10967 case V2SImode:
10968 return true;
10969
10970 case V2SFmode:
10971 return true;
10972
10973 default:
10974 return false;
10975 }
10976 }
10977
10978 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P. */
10979
10980 static bool
10981 ia64_libgcc_floating_mode_supported_p (machine_mode mode)
10982 {
10983 switch (mode)
10984 {
10985 case SFmode:
10986 case DFmode:
10987 return true;
10988
10989 case XFmode:
10990 #ifdef IA64_NO_LIBGCC_XFMODE
10991 return false;
10992 #else
10993 return true;
10994 #endif
10995
10996 case TFmode:
10997 #ifdef IA64_NO_LIBGCC_TFMODE
10998 return false;
10999 #else
11000 return true;
11001 #endif
11002
11003 default:
11004 return false;
11005 }
11006 }
11007
11008 /* Implement the FUNCTION_PROFILER macro. */
11009
11010 void
11011 ia64_output_function_profiler (FILE *file, int labelno)
11012 {
11013 bool indirect_call;
11014
11015 /* If the function needs a static chain and the static chain
11016 register is r15, we use an indirect call so as to bypass
11017 the PLT stub in case the executable is dynamically linked,
11018 because the stub clobbers r15 as per 5.3.6 of the psABI.
11019 We don't need to do that in non canonical PIC mode. */
11020
11021 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11022 {
11023 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11024 indirect_call = true;
11025 }
11026 else
11027 indirect_call = false;
11028
11029 if (TARGET_GNU_AS)
11030 fputs ("\t.prologue 4, r40\n", file);
11031 else
11032 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11033 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11034
11035 if (NO_PROFILE_COUNTERS)
11036 fputs ("\tmov out3 = r0\n", file);
11037 else
11038 {
11039 char buf[20];
11040 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11041
11042 if (TARGET_AUTO_PIC)
11043 fputs ("\tmovl out3 = @gprel(", file);
11044 else
11045 fputs ("\taddl out3 = @ltoff(", file);
11046 assemble_name (file, buf);
11047 if (TARGET_AUTO_PIC)
11048 fputs (")\n", file);
11049 else
11050 fputs ("), r1\n", file);
11051 }
11052
11053 if (indirect_call)
11054 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11055 fputs ("\t;;\n", file);
11056
11057 fputs ("\t.save rp, r42\n", file);
11058 fputs ("\tmov out2 = b0\n", file);
11059 if (indirect_call)
11060 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11061 fputs ("\t.body\n", file);
11062 fputs ("\tmov out1 = r1\n", file);
11063 if (indirect_call)
11064 {
11065 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11066 fputs ("\tmov b6 = r16\n", file);
11067 fputs ("\tld8 r1 = [r14]\n", file);
11068 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11069 }
11070 else
11071 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11072 }
11073
11074 static GTY(()) rtx mcount_func_rtx;
11075 static rtx
11076 gen_mcount_func_rtx (void)
11077 {
11078 if (!mcount_func_rtx)
11079 mcount_func_rtx = init_one_libfunc ("_mcount");
11080 return mcount_func_rtx;
11081 }
11082
11083 void
11084 ia64_profile_hook (int labelno)
11085 {
11086 rtx label, ip;
11087
11088 if (NO_PROFILE_COUNTERS)
11089 label = const0_rtx;
11090 else
11091 {
11092 char buf[30];
11093 const char *label_name;
11094 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11095 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11096 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11097 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11098 }
11099 ip = gen_reg_rtx (Pmode);
11100 emit_insn (gen_ip_value (ip));
11101 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11102 VOIDmode, 3,
11103 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11104 ip, Pmode,
11105 label, Pmode);
11106 }
11107
11108 /* Return the mangling of TYPE if it is an extended fundamental type. */
11109
11110 static const char *
11111 ia64_mangle_type (const_tree type)
11112 {
11113 type = TYPE_MAIN_VARIANT (type);
11114
11115 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11116 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11117 return NULL;
11118
11119 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11120 mangled as "e". */
11121 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11122 return "g";
11123 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11124 an extended mangling. Elsewhere, "e" is available since long
11125 double is 80 bits. */
11126 if (TYPE_MODE (type) == XFmode)
11127 return TARGET_HPUX ? "u9__float80" : "e";
11128 if (TYPE_MODE (type) == RFmode)
11129 return "u7__fpreg";
11130 return NULL;
11131 }
11132
11133 /* Return the diagnostic message string if conversion from FROMTYPE to
11134 TOTYPE is not allowed, NULL otherwise. */
11135 static const char *
11136 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11137 {
11138 /* Reject nontrivial conversion to or from __fpreg. */
11139 if (TYPE_MODE (fromtype) == RFmode
11140 && TYPE_MODE (totype) != RFmode
11141 && TYPE_MODE (totype) != VOIDmode)
11142 return N_("invalid conversion from %<__fpreg%>");
11143 if (TYPE_MODE (totype) == RFmode
11144 && TYPE_MODE (fromtype) != RFmode)
11145 return N_("invalid conversion to %<__fpreg%>");
11146 return NULL;
11147 }
11148
11149 /* Return the diagnostic message string if the unary operation OP is
11150 not permitted on TYPE, NULL otherwise. */
11151 static const char *
11152 ia64_invalid_unary_op (int op, const_tree type)
11153 {
11154 /* Reject operations on __fpreg other than unary + or &. */
11155 if (TYPE_MODE (type) == RFmode
11156 && op != CONVERT_EXPR
11157 && op != ADDR_EXPR)
11158 return N_("invalid operation on %<__fpreg%>");
11159 return NULL;
11160 }
11161
11162 /* Return the diagnostic message string if the binary operation OP is
11163 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11164 static const char *
11165 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11166 {
11167 /* Reject operations on __fpreg. */
11168 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11169 return N_("invalid operation on %<__fpreg%>");
11170 return NULL;
11171 }
11172
11173 /* HP-UX version_id attribute.
11174 For object foo, if the version_id is set to 1234 put out an alias
11175 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11176 other than an alias statement because it is an illegal symbol name. */
11177
11178 static tree
11179 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11180 tree name ATTRIBUTE_UNUSED,
11181 tree args,
11182 int flags ATTRIBUTE_UNUSED,
11183 bool *no_add_attrs)
11184 {
11185 tree arg = TREE_VALUE (args);
11186
11187 if (TREE_CODE (arg) != STRING_CST)
11188 {
11189 error("version attribute is not a string");
11190 *no_add_attrs = true;
11191 return NULL_TREE;
11192 }
11193 return NULL_TREE;
11194 }
11195
11196 /* Target hook for c_mode_for_suffix. */
11197
11198 static machine_mode
11199 ia64_c_mode_for_suffix (char suffix)
11200 {
11201 if (suffix == 'q')
11202 return TFmode;
11203 if (suffix == 'w')
11204 return XFmode;
11205
11206 return VOIDmode;
11207 }
11208
11209 static GTY(()) rtx ia64_dconst_0_5_rtx;
11210
11211 rtx
11212 ia64_dconst_0_5 (void)
11213 {
11214 if (! ia64_dconst_0_5_rtx)
11215 {
11216 REAL_VALUE_TYPE rv;
11217 real_from_string (&rv, "0.5");
11218 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11219 }
11220 return ia64_dconst_0_5_rtx;
11221 }
11222
11223 static GTY(()) rtx ia64_dconst_0_375_rtx;
11224
11225 rtx
11226 ia64_dconst_0_375 (void)
11227 {
11228 if (! ia64_dconst_0_375_rtx)
11229 {
11230 REAL_VALUE_TYPE rv;
11231 real_from_string (&rv, "0.375");
11232 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11233 }
11234 return ia64_dconst_0_375_rtx;
11235 }
11236
11237 static machine_mode
11238 ia64_get_reg_raw_mode (int regno)
11239 {
11240 if (FR_REGNO_P (regno))
11241 return XFmode;
11242 return default_get_reg_raw_mode(regno);
11243 }
11244
11245 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11246 anymore. */
11247
11248 bool
11249 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11250 {
11251 return TARGET_HPUX && mode == TFmode;
11252 }
11253
11254 /* Always default to .text section until HP-UX linker is fixed. */
11255
11256 ATTRIBUTE_UNUSED static section *
11257 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11258 enum node_frequency freq ATTRIBUTE_UNUSED,
11259 bool startup ATTRIBUTE_UNUSED,
11260 bool exit ATTRIBUTE_UNUSED)
11261 {
11262 return NULL;
11263 }
11264 \f
11265 /* Construct (set target (vec_select op0 (parallel perm))) and
11266 return true if that's a valid instruction in the active ISA. */
11267
11268 static bool
11269 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11270 {
11271 rtx rperm[MAX_VECT_LEN], x;
11272 unsigned i;
11273
11274 for (i = 0; i < nelt; ++i)
11275 rperm[i] = GEN_INT (perm[i]);
11276
11277 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11278 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11279 x = gen_rtx_SET (target, x);
11280
11281 rtx_insn *insn = emit_insn (x);
11282 if (recog_memoized (insn) < 0)
11283 {
11284 remove_insn (insn);
11285 return false;
11286 }
11287 return true;
11288 }
11289
11290 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11291
11292 static bool
11293 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11294 const unsigned char *perm, unsigned nelt)
11295 {
11296 machine_mode v2mode;
11297 rtx x;
11298
11299 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11300 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11301 return expand_vselect (target, x, perm, nelt);
11302 }
11303
11304 /* Try to expand a no-op permutation. */
11305
11306 static bool
11307 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11308 {
11309 unsigned i, nelt = d->nelt;
11310
11311 for (i = 0; i < nelt; ++i)
11312 if (d->perm[i] != i)
11313 return false;
11314
11315 if (!d->testing_p)
11316 emit_move_insn (d->target, d->op0);
11317
11318 return true;
11319 }
11320
11321 /* Try to expand D via a shrp instruction. */
11322
11323 static bool
11324 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11325 {
11326 unsigned i, nelt = d->nelt, shift, mask;
11327 rtx tmp, hi, lo;
11328
11329 /* ??? Don't force V2SFmode into the integer registers. */
11330 if (d->vmode == V2SFmode)
11331 return false;
11332
11333 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11334
11335 shift = d->perm[0];
11336 if (BYTES_BIG_ENDIAN && shift > nelt)
11337 return false;
11338
11339 for (i = 1; i < nelt; ++i)
11340 if (d->perm[i] != ((shift + i) & mask))
11341 return false;
11342
11343 if (d->testing_p)
11344 return true;
11345
11346 hi = shift < nelt ? d->op1 : d->op0;
11347 lo = shift < nelt ? d->op0 : d->op1;
11348
11349 shift %= nelt;
11350
11351 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11352
11353 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11354 gcc_assert (IN_RANGE (shift, 1, 63));
11355
11356 /* Recall that big-endian elements are numbered starting at the top of
11357 the register. Ideally we'd have a shift-left-pair. But since we
11358 don't, convert to a shift the other direction. */
11359 if (BYTES_BIG_ENDIAN)
11360 shift = 64 - shift;
11361
11362 tmp = gen_reg_rtx (DImode);
11363 hi = gen_lowpart (DImode, hi);
11364 lo = gen_lowpart (DImode, lo);
11365 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11366
11367 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11368 return true;
11369 }
11370
11371 /* Try to instantiate D in a single instruction. */
11372
11373 static bool
11374 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11375 {
11376 unsigned i, nelt = d->nelt;
11377 unsigned char perm2[MAX_VECT_LEN];
11378
11379 /* Try single-operand selections. */
11380 if (d->one_operand_p)
11381 {
11382 if (expand_vec_perm_identity (d))
11383 return true;
11384 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11385 return true;
11386 }
11387
11388 /* Try two operand selections. */
11389 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11390 return true;
11391
11392 /* Recognize interleave style patterns with reversed operands. */
11393 if (!d->one_operand_p)
11394 {
11395 for (i = 0; i < nelt; ++i)
11396 {
11397 unsigned e = d->perm[i];
11398 if (e >= nelt)
11399 e -= nelt;
11400 else
11401 e += nelt;
11402 perm2[i] = e;
11403 }
11404
11405 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11406 return true;
11407 }
11408
11409 if (expand_vec_perm_shrp (d))
11410 return true;
11411
11412 /* ??? Look for deposit-like permutations where most of the result
11413 comes from one vector unchanged and the rest comes from a
11414 sequential hunk of the other vector. */
11415
11416 return false;
11417 }
11418
11419 /* Pattern match broadcast permutations. */
11420
11421 static bool
11422 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11423 {
11424 unsigned i, elt, nelt = d->nelt;
11425 unsigned char perm2[2];
11426 rtx temp;
11427 bool ok;
11428
11429 if (!d->one_operand_p)
11430 return false;
11431
11432 elt = d->perm[0];
11433 for (i = 1; i < nelt; ++i)
11434 if (d->perm[i] != elt)
11435 return false;
11436
11437 switch (d->vmode)
11438 {
11439 case V2SImode:
11440 case V2SFmode:
11441 /* Implementable by interleave. */
11442 perm2[0] = elt;
11443 perm2[1] = elt + 2;
11444 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11445 gcc_assert (ok);
11446 break;
11447
11448 case V8QImode:
11449 /* Implementable by extract + broadcast. */
11450 if (BYTES_BIG_ENDIAN)
11451 elt = 7 - elt;
11452 elt *= BITS_PER_UNIT;
11453 temp = gen_reg_rtx (DImode);
11454 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11455 GEN_INT (8), GEN_INT (elt)));
11456 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11457 break;
11458
11459 case V4HImode:
11460 /* Should have been matched directly by vec_select. */
11461 default:
11462 gcc_unreachable ();
11463 }
11464
11465 return true;
11466 }
11467
11468 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11469 two vector permutation into a single vector permutation by using
11470 an interleave operation to merge the vectors. */
11471
11472 static bool
11473 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11474 {
11475 struct expand_vec_perm_d dremap, dfinal;
11476 unsigned char remap[2 * MAX_VECT_LEN];
11477 unsigned contents, i, nelt, nelt2;
11478 unsigned h0, h1, h2, h3;
11479 rtx_insn *seq;
11480 bool ok;
11481
11482 if (d->one_operand_p)
11483 return false;
11484
11485 nelt = d->nelt;
11486 nelt2 = nelt / 2;
11487
11488 /* Examine from whence the elements come. */
11489 contents = 0;
11490 for (i = 0; i < nelt; ++i)
11491 contents |= 1u << d->perm[i];
11492
11493 memset (remap, 0xff, sizeof (remap));
11494 dremap = *d;
11495
11496 h0 = (1u << nelt2) - 1;
11497 h1 = h0 << nelt2;
11498 h2 = h0 << nelt;
11499 h3 = h0 << (nelt + nelt2);
11500
11501 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11502 {
11503 for (i = 0; i < nelt; ++i)
11504 {
11505 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11506 remap[which] = i;
11507 dremap.perm[i] = which;
11508 }
11509 }
11510 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11511 {
11512 for (i = 0; i < nelt; ++i)
11513 {
11514 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11515 remap[which] = i;
11516 dremap.perm[i] = which;
11517 }
11518 }
11519 else if ((contents & 0x5555) == contents) /* mix even elements */
11520 {
11521 for (i = 0; i < nelt; ++i)
11522 {
11523 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11524 remap[which] = i;
11525 dremap.perm[i] = which;
11526 }
11527 }
11528 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11529 {
11530 for (i = 0; i < nelt; ++i)
11531 {
11532 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11533 remap[which] = i;
11534 dremap.perm[i] = which;
11535 }
11536 }
11537 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11538 {
11539 unsigned shift = ctz_hwi (contents);
11540 for (i = 0; i < nelt; ++i)
11541 {
11542 unsigned which = (i + shift) & (2 * nelt - 1);
11543 remap[which] = i;
11544 dremap.perm[i] = which;
11545 }
11546 }
11547 else
11548 return false;
11549
11550 /* Use the remapping array set up above to move the elements from their
11551 swizzled locations into their final destinations. */
11552 dfinal = *d;
11553 for (i = 0; i < nelt; ++i)
11554 {
11555 unsigned e = remap[d->perm[i]];
11556 gcc_assert (e < nelt);
11557 dfinal.perm[i] = e;
11558 }
11559 if (d->testing_p)
11560 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11561 else
11562 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11563 dfinal.op1 = dfinal.op0;
11564 dfinal.one_operand_p = true;
11565 dremap.target = dfinal.op0;
11566
11567 /* Test if the final remap can be done with a single insn. For V4HImode
11568 this *will* succeed. For V8QImode or V2SImode it may not. */
11569 start_sequence ();
11570 ok = expand_vec_perm_1 (&dfinal);
11571 seq = get_insns ();
11572 end_sequence ();
11573 if (!ok)
11574 return false;
11575 if (d->testing_p)
11576 return true;
11577
11578 ok = expand_vec_perm_1 (&dremap);
11579 gcc_assert (ok);
11580
11581 emit_insn (seq);
11582 return true;
11583 }
11584
11585 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11586 constant permutation via two mux2 and a merge. */
11587
11588 static bool
11589 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11590 {
11591 unsigned char perm2[4];
11592 rtx rmask[4];
11593 unsigned i;
11594 rtx t0, t1, mask, x;
11595 bool ok;
11596
11597 if (d->vmode != V4HImode || d->one_operand_p)
11598 return false;
11599 if (d->testing_p)
11600 return true;
11601
11602 for (i = 0; i < 4; ++i)
11603 {
11604 perm2[i] = d->perm[i] & 3;
11605 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11606 }
11607 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11608 mask = force_reg (V4HImode, mask);
11609
11610 t0 = gen_reg_rtx (V4HImode);
11611 t1 = gen_reg_rtx (V4HImode);
11612
11613 ok = expand_vselect (t0, d->op0, perm2, 4);
11614 gcc_assert (ok);
11615 ok = expand_vselect (t1, d->op1, perm2, 4);
11616 gcc_assert (ok);
11617
11618 x = gen_rtx_AND (V4HImode, mask, t0);
11619 emit_insn (gen_rtx_SET (t0, x));
11620
11621 x = gen_rtx_NOT (V4HImode, mask);
11622 x = gen_rtx_AND (V4HImode, x, t1);
11623 emit_insn (gen_rtx_SET (t1, x));
11624
11625 x = gen_rtx_IOR (V4HImode, t0, t1);
11626 emit_insn (gen_rtx_SET (d->target, x));
11627
11628 return true;
11629 }
11630
11631 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11632 With all of the interface bits taken care of, perform the expansion
11633 in D and return true on success. */
11634
11635 static bool
11636 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11637 {
11638 if (expand_vec_perm_1 (d))
11639 return true;
11640 if (expand_vec_perm_broadcast (d))
11641 return true;
11642 if (expand_vec_perm_interleave_2 (d))
11643 return true;
11644 if (expand_vec_perm_v4hi_5 (d))
11645 return true;
11646 return false;
11647 }
11648
11649 bool
11650 ia64_expand_vec_perm_const (rtx operands[4])
11651 {
11652 struct expand_vec_perm_d d;
11653 unsigned char perm[MAX_VECT_LEN];
11654 int i, nelt, which;
11655 rtx sel;
11656
11657 d.target = operands[0];
11658 d.op0 = operands[1];
11659 d.op1 = operands[2];
11660 sel = operands[3];
11661
11662 d.vmode = GET_MODE (d.target);
11663 gcc_assert (VECTOR_MODE_P (d.vmode));
11664 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11665 d.testing_p = false;
11666
11667 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11668 gcc_assert (XVECLEN (sel, 0) == nelt);
11669 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11670
11671 for (i = which = 0; i < nelt; ++i)
11672 {
11673 rtx e = XVECEXP (sel, 0, i);
11674 int ei = INTVAL (e) & (2 * nelt - 1);
11675
11676 which |= (ei < nelt ? 1 : 2);
11677 d.perm[i] = ei;
11678 perm[i] = ei;
11679 }
11680
11681 switch (which)
11682 {
11683 default:
11684 gcc_unreachable();
11685
11686 case 3:
11687 if (!rtx_equal_p (d.op0, d.op1))
11688 {
11689 d.one_operand_p = false;
11690 break;
11691 }
11692
11693 /* The elements of PERM do not suggest that only the first operand
11694 is used, but both operands are identical. Allow easier matching
11695 of the permutation by folding the permutation into the single
11696 input vector. */
11697 for (i = 0; i < nelt; ++i)
11698 if (d.perm[i] >= nelt)
11699 d.perm[i] -= nelt;
11700 /* FALLTHRU */
11701
11702 case 1:
11703 d.op1 = d.op0;
11704 d.one_operand_p = true;
11705 break;
11706
11707 case 2:
11708 for (i = 0; i < nelt; ++i)
11709 d.perm[i] -= nelt;
11710 d.op0 = d.op1;
11711 d.one_operand_p = true;
11712 break;
11713 }
11714
11715 if (ia64_expand_vec_perm_const_1 (&d))
11716 return true;
11717
11718 /* If the mask says both arguments are needed, but they are the same,
11719 the above tried to expand with one_operand_p true. If that didn't
11720 work, retry with one_operand_p false, as that's what we used in _ok. */
11721 if (which == 3 && d.one_operand_p)
11722 {
11723 memcpy (d.perm, perm, sizeof (perm));
11724 d.one_operand_p = false;
11725 return ia64_expand_vec_perm_const_1 (&d);
11726 }
11727
11728 return false;
11729 }
11730
11731 /* Implement targetm.vectorize.vec_perm_const_ok. */
11732
11733 static bool
11734 ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11735 const unsigned char *sel)
11736 {
11737 struct expand_vec_perm_d d;
11738 unsigned int i, nelt, which;
11739 bool ret;
11740
11741 d.vmode = vmode;
11742 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11743 d.testing_p = true;
11744
11745 /* Extract the values from the vector CST into the permutation
11746 array in D. */
11747 memcpy (d.perm, sel, nelt);
11748 for (i = which = 0; i < nelt; ++i)
11749 {
11750 unsigned char e = d.perm[i];
11751 gcc_assert (e < 2 * nelt);
11752 which |= (e < nelt ? 1 : 2);
11753 }
11754
11755 /* For all elements from second vector, fold the elements to first. */
11756 if (which == 2)
11757 for (i = 0; i < nelt; ++i)
11758 d.perm[i] -= nelt;
11759
11760 /* Check whether the mask can be applied to the vector type. */
11761 d.one_operand_p = (which != 3);
11762
11763 /* Otherwise we have to go through the motions and see if we can
11764 figure out how to generate the requested permutation. */
11765 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11766 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11767 if (!d.one_operand_p)
11768 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11769
11770 start_sequence ();
11771 ret = ia64_expand_vec_perm_const_1 (&d);
11772 end_sequence ();
11773
11774 return ret;
11775 }
11776
11777 void
11778 ia64_expand_vec_setv2sf (rtx operands[3])
11779 {
11780 struct expand_vec_perm_d d;
11781 unsigned int which;
11782 bool ok;
11783
11784 d.target = operands[0];
11785 d.op0 = operands[0];
11786 d.op1 = gen_reg_rtx (V2SFmode);
11787 d.vmode = V2SFmode;
11788 d.nelt = 2;
11789 d.one_operand_p = false;
11790 d.testing_p = false;
11791
11792 which = INTVAL (operands[2]);
11793 gcc_assert (which <= 1);
11794 d.perm[0] = 1 - which;
11795 d.perm[1] = which + 2;
11796
11797 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11798
11799 ok = ia64_expand_vec_perm_const_1 (&d);
11800 gcc_assert (ok);
11801 }
11802
11803 void
11804 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11805 {
11806 struct expand_vec_perm_d d;
11807 machine_mode vmode = GET_MODE (target);
11808 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11809 bool ok;
11810
11811 d.target = target;
11812 d.op0 = op0;
11813 d.op1 = op1;
11814 d.vmode = vmode;
11815 d.nelt = nelt;
11816 d.one_operand_p = false;
11817 d.testing_p = false;
11818
11819 for (i = 0; i < nelt; ++i)
11820 d.perm[i] = i * 2 + odd;
11821
11822 ok = ia64_expand_vec_perm_const_1 (&d);
11823 gcc_assert (ok);
11824 }
11825
11826 #include "gt-ia64.h"