]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/ia64/ia64.c
backport: basic-block.h: Include vec.h, errors.h.
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
55
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
59
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
64
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
79
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
83
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
96
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
100
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string;
103
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size = 22;
107
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string;
110
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune;
113
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string;
116
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2;
120
121 /* Determines whether we run variable tracking in machine dependent
122 reorganization. */
123 static int ia64_flag_var_tracking;
124
125 /* Variables which are this size or smaller are put in the sdata/sbss
126 sections. */
127
128 unsigned int ia64_section_threshold;
129
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
132 int bundling_p = 0;
133
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
136
137 struct ia64_frame_info
138 {
139 HOST_WIDE_INT total_size; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
144 HARD_REG_SET mask; /* mask of saved registers. */
145 unsigned int gr_used_mask; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled; /* number of spilled registers. */
148 int reg_fp; /* register for fp. */
149 int reg_save_b0; /* save register for b0. */
150 int reg_save_pr; /* save register for prs. */
151 int reg_save_ar_pfs; /* save register for ar.pfs. */
152 int reg_save_ar_unat; /* save register for ar.unat. */
153 int reg_save_ar_lc; /* save register for ar.lc. */
154 int reg_save_gp; /* save register for gp. */
155 int n_input_regs; /* number of input registers used. */
156 int n_local_regs; /* number of local registers used. */
157 int n_output_regs; /* number of output registers used. */
158 int n_rotate_regs; /* number of rotating registers used. */
159
160 char need_regstk; /* true if a .regstk directive needed. */
161 char initialized; /* true if the data is finalized. */
162 };
163
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info;
166 \f
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx, rtx);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static rtx gen_tls_get_addr (void);
174 static rtx gen_thread_pointer (void);
175 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT);
180 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
181 static void finish_spill_pointers (void);
182 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
183 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
184 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
185 static rtx gen_movdi_x (rtx, rtx, rtx);
186 static rtx gen_fr_spill_x (rtx, rtx, rtx);
187 static rtx gen_fr_restore_x (rtx, rtx, rtx);
188
189 static enum machine_mode hfa_element_mode (tree, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
191 tree, int *, int);
192 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
193 tree, bool);
194 static bool ia64_function_ok_for_sibcall (tree, tree);
195 static bool ia64_return_in_memory (tree, tree);
196 static bool ia64_rtx_costs (rtx, int, int, int *);
197 static void fix_range (const char *);
198 static struct machine_function * ia64_init_machine_status (void);
199 static void emit_insn_group_barriers (FILE *);
200 static void emit_all_insn_group_barriers (FILE *);
201 static void final_emit_insn_group_barriers (FILE *);
202 static void emit_predicate_relation_info (void);
203 static void ia64_reorg (void);
204 static bool ia64_in_small_data_p (tree);
205 static void process_epilogue (void);
206 static int process_set (FILE *, rtx);
207
208 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
209 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
210 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
211 int, tree, rtx);
212 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
213 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
214 static bool ia64_assemble_integer (rtx, unsigned int, int);
215 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
216 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
217 static void ia64_output_function_end_prologue (FILE *);
218
219 static int ia64_issue_rate (void);
220 static int ia64_adjust_cost (rtx, rtx, rtx, int);
221 static void ia64_sched_init (FILE *, int, int);
222 static void ia64_sched_finish (FILE *, int);
223 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
224 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
225 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
226 static int ia64_variable_issue (FILE *, int, rtx, int);
227
228 static struct bundle_state *get_free_bundle_state (void);
229 static void free_bundle_state (struct bundle_state *);
230 static void initiate_bundle_states (void);
231 static void finish_bundle_states (void);
232 static unsigned bundle_state_hash (const void *);
233 static int bundle_state_eq_p (const void *, const void *);
234 static int insert_bundle_state (struct bundle_state *);
235 static void initiate_bundle_state_table (void);
236 static void finish_bundle_state_table (void);
237 static int try_issue_nops (struct bundle_state *, int);
238 static int try_issue_insn (struct bundle_state *, rtx);
239 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
240 static int get_max_pos (state_t);
241 static int get_template (state_t, int);
242
243 static rtx get_next_important_insn (rtx, rtx);
244 static void bundling (FILE *, int, rtx, rtx);
245
246 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
247 HOST_WIDE_INT, tree);
248 static void ia64_file_start (void);
249
250 static void ia64_select_rtx_section (enum machine_mode, rtx,
251 unsigned HOST_WIDE_INT);
252 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
253 ATTRIBUTE_UNUSED;
254 static void ia64_rwreloc_unique_section (tree, int)
255 ATTRIBUTE_UNUSED;
256 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
257 unsigned HOST_WIDE_INT)
258 ATTRIBUTE_UNUSED;
259 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
260 ATTRIBUTE_UNUSED;
261
262 static void ia64_hpux_add_extern_decl (tree decl)
263 ATTRIBUTE_UNUSED;
264 static void ia64_hpux_file_end (void)
265 ATTRIBUTE_UNUSED;
266 static void ia64_init_libfuncs (void)
267 ATTRIBUTE_UNUSED;
268 static void ia64_hpux_init_libfuncs (void)
269 ATTRIBUTE_UNUSED;
270 static void ia64_sysv4_init_libfuncs (void)
271 ATTRIBUTE_UNUSED;
272 static void ia64_vms_init_libfuncs (void)
273 ATTRIBUTE_UNUSED;
274
275 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
276 static void ia64_encode_section_info (tree, rtx, int);
277 static rtx ia64_struct_value_rtx (tree, int);
278 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
279 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
280
281 \f
282 /* Table of valid machine attributes. */
283 static const struct attribute_spec ia64_attribute_table[] =
284 {
285 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
286 { "syscall_linkage", 0, 0, false, true, true, NULL },
287 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
288 { NULL, 0, 0, false, false, false, NULL }
289 };
290
291 /* Initialize the GCC target structure. */
292 #undef TARGET_ATTRIBUTE_TABLE
293 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
294
295 #undef TARGET_INIT_BUILTINS
296 #define TARGET_INIT_BUILTINS ia64_init_builtins
297
298 #undef TARGET_EXPAND_BUILTIN
299 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
300
301 #undef TARGET_ASM_BYTE_OP
302 #define TARGET_ASM_BYTE_OP "\tdata1\t"
303 #undef TARGET_ASM_ALIGNED_HI_OP
304 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
305 #undef TARGET_ASM_ALIGNED_SI_OP
306 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
307 #undef TARGET_ASM_ALIGNED_DI_OP
308 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
309 #undef TARGET_ASM_UNALIGNED_HI_OP
310 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
311 #undef TARGET_ASM_UNALIGNED_SI_OP
312 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
315 #undef TARGET_ASM_INTEGER
316 #define TARGET_ASM_INTEGER ia64_assemble_integer
317
318 #undef TARGET_ASM_FUNCTION_PROLOGUE
319 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
320 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
321 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
322 #undef TARGET_ASM_FUNCTION_EPILOGUE
323 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
324
325 #undef TARGET_IN_SMALL_DATA_P
326 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
327
328 #undef TARGET_SCHED_ADJUST_COST
329 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
330 #undef TARGET_SCHED_ISSUE_RATE
331 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
332 #undef TARGET_SCHED_VARIABLE_ISSUE
333 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
334 #undef TARGET_SCHED_INIT
335 #define TARGET_SCHED_INIT ia64_sched_init
336 #undef TARGET_SCHED_FINISH
337 #define TARGET_SCHED_FINISH ia64_sched_finish
338 #undef TARGET_SCHED_REORDER
339 #define TARGET_SCHED_REORDER ia64_sched_reorder
340 #undef TARGET_SCHED_REORDER2
341 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
342
343 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
344 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
345
346 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
347 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
348
349 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
350 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
351 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
352 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
353
354 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
355 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
356 ia64_first_cycle_multipass_dfa_lookahead_guard
357
358 #undef TARGET_SCHED_DFA_NEW_CYCLE
359 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
360
361 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
362 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
363 #undef TARGET_PASS_BY_REFERENCE
364 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
365
366 #undef TARGET_ASM_OUTPUT_MI_THUNK
367 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
368 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
369 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
370
371 #undef TARGET_ASM_FILE_START
372 #define TARGET_ASM_FILE_START ia64_file_start
373
374 #undef TARGET_RTX_COSTS
375 #define TARGET_RTX_COSTS ia64_rtx_costs
376 #undef TARGET_ADDRESS_COST
377 #define TARGET_ADDRESS_COST hook_int_rtx_0
378
379 #undef TARGET_MACHINE_DEPENDENT_REORG
380 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
381
382 #undef TARGET_ENCODE_SECTION_INFO
383 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
384
385 /* ??? ABI doesn't allow us to define this. */
386 #if 0
387 #undef TARGET_PROMOTE_FUNCTION_ARGS
388 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
389 #endif
390
391 /* ??? ABI doesn't allow us to define this. */
392 #if 0
393 #undef TARGET_PROMOTE_FUNCTION_RETURN
394 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
395 #endif
396
397 /* ??? Investigate. */
398 #if 0
399 #undef TARGET_PROMOTE_PROTOTYPES
400 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
401 #endif
402
403 #undef TARGET_STRUCT_VALUE_RTX
404 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
405 #undef TARGET_RETURN_IN_MEMORY
406 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
407 #undef TARGET_SETUP_INCOMING_VARARGS
408 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
409 #undef TARGET_STRICT_ARGUMENT_NAMING
410 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
411 #undef TARGET_MUST_PASS_IN_STACK
412 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
413
414 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
415 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
416
417 #undef TARGET_UNWIND_EMIT
418 #define TARGET_UNWIND_EMIT process_for_unwind_directive
419
420 #undef TARGET_SCALAR_MODE_SUPPORTED_P
421 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
422
423 struct gcc_target targetm = TARGET_INITIALIZER;
424 \f
425 typedef enum
426 {
427 ADDR_AREA_NORMAL, /* normal address area */
428 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
429 }
430 ia64_addr_area;
431
432 static GTY(()) tree small_ident1;
433 static GTY(()) tree small_ident2;
434
435 static void
436 init_idents (void)
437 {
438 if (small_ident1 == 0)
439 {
440 small_ident1 = get_identifier ("small");
441 small_ident2 = get_identifier ("__small__");
442 }
443 }
444
445 /* Retrieve the address area that has been chosen for the given decl. */
446
447 static ia64_addr_area
448 ia64_get_addr_area (tree decl)
449 {
450 tree model_attr;
451
452 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
453 if (model_attr)
454 {
455 tree id;
456
457 init_idents ();
458 id = TREE_VALUE (TREE_VALUE (model_attr));
459 if (id == small_ident1 || id == small_ident2)
460 return ADDR_AREA_SMALL;
461 }
462 return ADDR_AREA_NORMAL;
463 }
464
465 static tree
466 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
467 {
468 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
469 ia64_addr_area area;
470 tree arg, decl = *node;
471
472 init_idents ();
473 arg = TREE_VALUE (args);
474 if (arg == small_ident1 || arg == small_ident2)
475 {
476 addr_area = ADDR_AREA_SMALL;
477 }
478 else
479 {
480 warning ("invalid argument of `%s' attribute",
481 IDENTIFIER_POINTER (name));
482 *no_add_attrs = true;
483 }
484
485 switch (TREE_CODE (decl))
486 {
487 case VAR_DECL:
488 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
489 == FUNCTION_DECL)
490 && !TREE_STATIC (decl))
491 {
492 error ("%Jan address area attribute cannot be specified for "
493 "local variables", decl, decl);
494 *no_add_attrs = true;
495 }
496 area = ia64_get_addr_area (decl);
497 if (area != ADDR_AREA_NORMAL && addr_area != area)
498 {
499 error ("%Jaddress area of '%s' conflicts with previous "
500 "declaration", decl, decl);
501 *no_add_attrs = true;
502 }
503 break;
504
505 case FUNCTION_DECL:
506 error ("%Jaddress area attribute cannot be specified for functions",
507 decl, decl);
508 *no_add_attrs = true;
509 break;
510
511 default:
512 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
513 *no_add_attrs = true;
514 break;
515 }
516
517 return NULL_TREE;
518 }
519
520 static void
521 ia64_encode_addr_area (tree decl, rtx symbol)
522 {
523 int flags;
524
525 flags = SYMBOL_REF_FLAGS (symbol);
526 switch (ia64_get_addr_area (decl))
527 {
528 case ADDR_AREA_NORMAL: break;
529 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
530 default: abort ();
531 }
532 SYMBOL_REF_FLAGS (symbol) = flags;
533 }
534
535 static void
536 ia64_encode_section_info (tree decl, rtx rtl, int first)
537 {
538 default_encode_section_info (decl, rtl, first);
539
540 /* Careful not to prod global register variables. */
541 if (TREE_CODE (decl) == VAR_DECL
542 && GET_CODE (DECL_RTL (decl)) == MEM
543 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
544 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
545 ia64_encode_addr_area (decl, XEXP (rtl, 0));
546 }
547 \f
548 /* Return 1 if the operands of a move are ok. */
549
550 int
551 ia64_move_ok (rtx dst, rtx src)
552 {
553 /* If we're under init_recog_no_volatile, we'll not be able to use
554 memory_operand. So check the code directly and don't worry about
555 the validity of the underlying address, which should have been
556 checked elsewhere anyway. */
557 if (GET_CODE (dst) != MEM)
558 return 1;
559 if (GET_CODE (src) == MEM)
560 return 0;
561 if (register_operand (src, VOIDmode))
562 return 1;
563
564 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
565 if (INTEGRAL_MODE_P (GET_MODE (dst)))
566 return src == const0_rtx;
567 else
568 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
569 }
570
571 int
572 addp4_optimize_ok (rtx op1, rtx op2)
573 {
574 return (basereg_operand (op1, GET_MODE(op1)) !=
575 basereg_operand (op2, GET_MODE(op2)));
576 }
577
578 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
579 Return the length of the field, or <= 0 on failure. */
580
581 int
582 ia64_depz_field_mask (rtx rop, rtx rshift)
583 {
584 unsigned HOST_WIDE_INT op = INTVAL (rop);
585 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
586
587 /* Get rid of the zero bits we're shifting in. */
588 op >>= shift;
589
590 /* We must now have a solid block of 1's at bit 0. */
591 return exact_log2 (op + 1);
592 }
593
594 /* Expand a symbolic constant load. */
595
596 void
597 ia64_expand_load_address (rtx dest, rtx src)
598 {
599 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (src))
600 abort ();
601 if (GET_CODE (dest) != REG)
602 abort ();
603
604 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
605 having to pointer-extend the value afterward. Other forms of address
606 computation below are also more natural to compute as 64-bit quantities.
607 If we've been given an SImode destination register, change it. */
608 if (GET_MODE (dest) != Pmode)
609 dest = gen_rtx_REG (Pmode, REGNO (dest));
610
611 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
612 {
613 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
614 return;
615 }
616 else if (TARGET_AUTO_PIC)
617 {
618 emit_insn (gen_load_gprel64 (dest, src));
619 return;
620 }
621 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
622 {
623 emit_insn (gen_load_fptr (dest, src));
624 return;
625 }
626 else if (sdata_symbolic_operand (src, VOIDmode))
627 {
628 emit_insn (gen_load_gprel (dest, src));
629 return;
630 }
631
632 if (GET_CODE (src) == CONST
633 && GET_CODE (XEXP (src, 0)) == PLUS
634 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
635 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x3fff) != 0)
636 {
637 rtx sym = XEXP (XEXP (src, 0), 0);
638 HOST_WIDE_INT ofs, hi, lo;
639
640 /* Split the offset into a sign extended 14-bit low part
641 and a complementary high part. */
642 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
643 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
644 hi = ofs - lo;
645
646 ia64_expand_load_address (dest, plus_constant (sym, hi));
647 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
648 }
649 else
650 {
651 rtx tmp;
652
653 tmp = gen_rtx_HIGH (Pmode, src);
654 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
655 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
656
657 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
658 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
659 }
660 }
661
662 static GTY(()) rtx gen_tls_tga;
663 static rtx
664 gen_tls_get_addr (void)
665 {
666 if (!gen_tls_tga)
667 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
668 return gen_tls_tga;
669 }
670
671 static GTY(()) rtx thread_pointer_rtx;
672 static rtx
673 gen_thread_pointer (void)
674 {
675 if (!thread_pointer_rtx)
676 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
677 return thread_pointer_rtx;
678 }
679
680 static rtx
681 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
682 {
683 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
684 rtx orig_op0 = op0;
685
686 switch (tls_kind)
687 {
688 case TLS_MODEL_GLOBAL_DYNAMIC:
689 start_sequence ();
690
691 tga_op1 = gen_reg_rtx (Pmode);
692 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
693 tga_op1 = gen_const_mem (Pmode, tga_op1);
694
695 tga_op2 = gen_reg_rtx (Pmode);
696 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
697 tga_op2 = gen_const_mem (Pmode, tga_op2);
698
699 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
700 LCT_CONST, Pmode, 2, tga_op1,
701 Pmode, tga_op2, Pmode);
702
703 insns = get_insns ();
704 end_sequence ();
705
706 if (GET_MODE (op0) != Pmode)
707 op0 = tga_ret;
708 emit_libcall_block (insns, op0, tga_ret, op1);
709 break;
710
711 case TLS_MODEL_LOCAL_DYNAMIC:
712 /* ??? This isn't the completely proper way to do local-dynamic
713 If the call to __tls_get_addr is used only by a single symbol,
714 then we should (somehow) move the dtprel to the second arg
715 to avoid the extra add. */
716 start_sequence ();
717
718 tga_op1 = gen_reg_rtx (Pmode);
719 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
720 tga_op1 = gen_const_mem (Pmode, tga_op1);
721
722 tga_op2 = const0_rtx;
723
724 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
725 LCT_CONST, Pmode, 2, tga_op1,
726 Pmode, tga_op2, Pmode);
727
728 insns = get_insns ();
729 end_sequence ();
730
731 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
732 UNSPEC_LD_BASE);
733 tmp = gen_reg_rtx (Pmode);
734 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
735
736 if (!register_operand (op0, Pmode))
737 op0 = gen_reg_rtx (Pmode);
738 if (TARGET_TLS64)
739 {
740 emit_insn (gen_load_dtprel (op0, op1));
741 emit_insn (gen_adddi3 (op0, tmp, op0));
742 }
743 else
744 emit_insn (gen_add_dtprel (op0, tmp, op1));
745 break;
746
747 case TLS_MODEL_INITIAL_EXEC:
748 tmp = gen_reg_rtx (Pmode);
749 emit_insn (gen_load_ltoff_tprel (tmp, op1));
750 tmp = gen_const_mem (Pmode, tmp);
751 tmp = force_reg (Pmode, tmp);
752
753 if (!register_operand (op0, Pmode))
754 op0 = gen_reg_rtx (Pmode);
755 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
756 break;
757
758 case TLS_MODEL_LOCAL_EXEC:
759 if (!register_operand (op0, Pmode))
760 op0 = gen_reg_rtx (Pmode);
761 if (TARGET_TLS64)
762 {
763 emit_insn (gen_load_tprel (op0, op1));
764 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
765 }
766 else
767 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
768 break;
769
770 default:
771 abort ();
772 }
773
774 if (orig_op0 == op0)
775 return NULL_RTX;
776 if (GET_MODE (orig_op0) == Pmode)
777 return op0;
778 return gen_lowpart (GET_MODE (orig_op0), op0);
779 }
780
781 rtx
782 ia64_expand_move (rtx op0, rtx op1)
783 {
784 enum machine_mode mode = GET_MODE (op0);
785
786 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
787 op1 = force_reg (mode, op1);
788
789 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
790 {
791 enum tls_model tls_kind;
792 if (GET_CODE (op1) == SYMBOL_REF
793 && (tls_kind = SYMBOL_REF_TLS_MODEL (op1)))
794 return ia64_expand_tls_address (tls_kind, op0, op1);
795
796 if (!TARGET_NO_PIC && reload_completed)
797 {
798 ia64_expand_load_address (op0, op1);
799 return NULL_RTX;
800 }
801 }
802
803 return op1;
804 }
805
806 /* Split a move from OP1 to OP0 conditional on COND. */
807
808 void
809 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
810 {
811 rtx insn, first = get_last_insn ();
812
813 emit_move_insn (op0, op1);
814
815 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
816 if (INSN_P (insn))
817 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
818 PATTERN (insn));
819 }
820
821 /* Split a post-reload TImode or TFmode reference into two DImode
822 components. This is made extra difficult by the fact that we do
823 not get any scratch registers to work with, because reload cannot
824 be prevented from giving us a scratch that overlaps the register
825 pair involved. So instead, when addressing memory, we tweak the
826 pointer register up and back down with POST_INCs. Or up and not
827 back down when we can get away with it.
828
829 REVERSED is true when the loads must be done in reversed order
830 (high word first) for correctness. DEAD is true when the pointer
831 dies with the second insn we generate and therefore the second
832 address must not carry a postmodify.
833
834 May return an insn which is to be emitted after the moves. */
835
836 static rtx
837 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
838 {
839 rtx fixup = 0;
840
841 switch (GET_CODE (in))
842 {
843 case REG:
844 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
845 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
846 break;
847
848 case CONST_INT:
849 case CONST_DOUBLE:
850 /* Cannot occur reversed. */
851 if (reversed) abort ();
852
853 if (GET_MODE (in) != TFmode)
854 split_double (in, &out[0], &out[1]);
855 else
856 /* split_double does not understand how to split a TFmode
857 quantity into a pair of DImode constants. */
858 {
859 REAL_VALUE_TYPE r;
860 unsigned HOST_WIDE_INT p[2];
861 long l[4]; /* TFmode is 128 bits */
862
863 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
864 real_to_target (l, &r, TFmode);
865
866 if (FLOAT_WORDS_BIG_ENDIAN)
867 {
868 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
869 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
870 }
871 else
872 {
873 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
874 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
875 }
876 out[0] = GEN_INT (p[0]);
877 out[1] = GEN_INT (p[1]);
878 }
879 break;
880
881 case MEM:
882 {
883 rtx base = XEXP (in, 0);
884 rtx offset;
885
886 switch (GET_CODE (base))
887 {
888 case REG:
889 if (!reversed)
890 {
891 out[0] = adjust_automodify_address
892 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
893 out[1] = adjust_automodify_address
894 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
895 }
896 else
897 {
898 /* Reversal requires a pre-increment, which can only
899 be done as a separate insn. */
900 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
901 out[0] = adjust_automodify_address
902 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
903 out[1] = adjust_address (in, DImode, 0);
904 }
905 break;
906
907 case POST_INC:
908 if (reversed || dead) abort ();
909 /* Just do the increment in two steps. */
910 out[0] = adjust_automodify_address (in, DImode, 0, 0);
911 out[1] = adjust_automodify_address (in, DImode, 0, 8);
912 break;
913
914 case POST_DEC:
915 if (reversed || dead) abort ();
916 /* Add 8, subtract 24. */
917 base = XEXP (base, 0);
918 out[0] = adjust_automodify_address
919 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
920 out[1] = adjust_automodify_address
921 (in, DImode,
922 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
923 8);
924 break;
925
926 case POST_MODIFY:
927 if (reversed || dead) abort ();
928 /* Extract and adjust the modification. This case is
929 trickier than the others, because we might have an
930 index register, or we might have a combined offset that
931 doesn't fit a signed 9-bit displacement field. We can
932 assume the incoming expression is already legitimate. */
933 offset = XEXP (base, 1);
934 base = XEXP (base, 0);
935
936 out[0] = adjust_automodify_address
937 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
938
939 if (GET_CODE (XEXP (offset, 1)) == REG)
940 {
941 /* Can't adjust the postmodify to match. Emit the
942 original, then a separate addition insn. */
943 out[1] = adjust_automodify_address (in, DImode, 0, 8);
944 fixup = gen_adddi3 (base, base, GEN_INT (-8));
945 }
946 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
947 abort ();
948 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
949 {
950 /* Again the postmodify cannot be made to match, but
951 in this case it's more efficient to get rid of the
952 postmodify entirely and fix up with an add insn. */
953 out[1] = adjust_automodify_address (in, DImode, base, 8);
954 fixup = gen_adddi3 (base, base,
955 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
956 }
957 else
958 {
959 /* Combined offset still fits in the displacement field.
960 (We cannot overflow it at the high end.) */
961 out[1] = adjust_automodify_address
962 (in, DImode,
963 gen_rtx_POST_MODIFY (Pmode, base,
964 gen_rtx_PLUS (Pmode, base,
965 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
966 8);
967 }
968 break;
969
970 default:
971 abort ();
972 }
973 break;
974 }
975
976 default:
977 abort ();
978 }
979
980 return fixup;
981 }
982
983 /* Split a TImode or TFmode move instruction after reload.
984 This is used by *movtf_internal and *movti_internal. */
985 void
986 ia64_split_tmode_move (rtx operands[])
987 {
988 rtx in[2], out[2], insn;
989 rtx fixup[2];
990 bool dead = false;
991 bool reversed = false;
992
993 /* It is possible for reload to decide to overwrite a pointer with
994 the value it points to. In that case we have to do the loads in
995 the appropriate order so that the pointer is not destroyed too
996 early. Also we must not generate a postmodify for that second
997 load, or rws_access_regno will abort. */
998 if (GET_CODE (operands[1]) == MEM
999 && reg_overlap_mentioned_p (operands[0], operands[1]))
1000 {
1001 rtx base = XEXP (operands[1], 0);
1002 while (GET_CODE (base) != REG)
1003 base = XEXP (base, 0);
1004
1005 if (REGNO (base) == REGNO (operands[0]))
1006 reversed = true;
1007 dead = true;
1008 }
1009 /* Another reason to do the moves in reversed order is if the first
1010 element of the target register pair is also the second element of
1011 the source register pair. */
1012 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1013 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1014 reversed = true;
1015
1016 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1017 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1018
1019 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1020 if (GET_CODE (EXP) == MEM \
1021 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1022 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1023 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1024 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1025 XEXP (XEXP (EXP, 0), 0), \
1026 REG_NOTES (INSN))
1027
1028 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1029 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1030 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1031
1032 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1033 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1034 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1035
1036 if (fixup[0])
1037 emit_insn (fixup[0]);
1038 if (fixup[1])
1039 emit_insn (fixup[1]);
1040
1041 #undef MAYBE_ADD_REG_INC_NOTE
1042 }
1043
1044 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1045 through memory plus an extra GR scratch register. Except that you can
1046 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1047 SECONDARY_RELOAD_CLASS, but not both.
1048
1049 We got into problems in the first place by allowing a construct like
1050 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1051 This solution attempts to prevent this situation from occurring. When
1052 we see something like the above, we spill the inner register to memory. */
1053
1054 rtx
1055 spill_xfmode_operand (rtx in, int force)
1056 {
1057 if (GET_CODE (in) == SUBREG
1058 && GET_MODE (SUBREG_REG (in)) == TImode
1059 && GET_CODE (SUBREG_REG (in)) == REG)
1060 {
1061 rtx memt = assign_stack_temp (TImode, 16, 0);
1062 emit_move_insn (memt, SUBREG_REG (in));
1063 return adjust_address (memt, XFmode, 0);
1064 }
1065 else if (force && GET_CODE (in) == REG)
1066 {
1067 rtx memx = assign_stack_temp (XFmode, 16, 0);
1068 emit_move_insn (memx, in);
1069 return memx;
1070 }
1071 else
1072 return in;
1073 }
1074
1075 /* Emit comparison instruction if necessary, returning the expression
1076 that holds the compare result in the proper mode. */
1077
1078 static GTY(()) rtx cmptf_libfunc;
1079
1080 rtx
1081 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1082 {
1083 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1084 rtx cmp;
1085
1086 /* If we have a BImode input, then we already have a compare result, and
1087 do not need to emit another comparison. */
1088 if (GET_MODE (op0) == BImode)
1089 {
1090 if ((code == NE || code == EQ) && op1 == const0_rtx)
1091 cmp = op0;
1092 else
1093 abort ();
1094 }
1095 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1096 magic number as its third argument, that indicates what to do.
1097 The return value is an integer to be compared against zero. */
1098 else if (GET_MODE (op0) == TFmode)
1099 {
1100 enum qfcmp_magic {
1101 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1102 QCMP_UNORD = 2,
1103 QCMP_EQ = 4,
1104 QCMP_LT = 8,
1105 QCMP_GT = 16
1106 } magic;
1107 enum rtx_code ncode;
1108 rtx ret, insns;
1109 if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
1110 abort ();
1111 switch (code)
1112 {
1113 /* 1 = equal, 0 = not equal. Equality operators do
1114 not raise FP_INVALID when given an SNaN operand. */
1115 case EQ: magic = QCMP_EQ; ncode = NE; break;
1116 case NE: magic = QCMP_EQ; ncode = EQ; break;
1117 /* isunordered() from C99. */
1118 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1119 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1120 /* Relational operators raise FP_INVALID when given
1121 an SNaN operand. */
1122 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1123 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1124 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1125 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1126 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1127 Expanders for buneq etc. weuld have to be added to ia64.md
1128 for this to be useful. */
1129 default: abort ();
1130 }
1131
1132 start_sequence ();
1133
1134 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1135 op0, TFmode, op1, TFmode,
1136 GEN_INT (magic), DImode);
1137 cmp = gen_reg_rtx (BImode);
1138 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1139 gen_rtx_fmt_ee (ncode, BImode,
1140 ret, const0_rtx)));
1141
1142 insns = get_insns ();
1143 end_sequence ();
1144
1145 emit_libcall_block (insns, cmp, cmp,
1146 gen_rtx_fmt_ee (code, BImode, op0, op1));
1147 code = NE;
1148 }
1149 else
1150 {
1151 cmp = gen_reg_rtx (BImode);
1152 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1153 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1154 code = NE;
1155 }
1156
1157 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1158 }
1159
1160 /* Emit the appropriate sequence for a call. */
1161
1162 void
1163 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1164 int sibcall_p)
1165 {
1166 rtx insn, b0;
1167
1168 addr = XEXP (addr, 0);
1169 addr = convert_memory_address (DImode, addr);
1170 b0 = gen_rtx_REG (DImode, R_BR (0));
1171
1172 /* ??? Should do this for functions known to bind local too. */
1173 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1174 {
1175 if (sibcall_p)
1176 insn = gen_sibcall_nogp (addr);
1177 else if (! retval)
1178 insn = gen_call_nogp (addr, b0);
1179 else
1180 insn = gen_call_value_nogp (retval, addr, b0);
1181 insn = emit_call_insn (insn);
1182 }
1183 else
1184 {
1185 if (sibcall_p)
1186 insn = gen_sibcall_gp (addr);
1187 else if (! retval)
1188 insn = gen_call_gp (addr, b0);
1189 else
1190 insn = gen_call_value_gp (retval, addr, b0);
1191 insn = emit_call_insn (insn);
1192
1193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1194 }
1195
1196 if (sibcall_p)
1197 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1198 }
1199
1200 void
1201 ia64_reload_gp (void)
1202 {
1203 rtx tmp;
1204
1205 if (current_frame_info.reg_save_gp)
1206 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1207 else
1208 {
1209 HOST_WIDE_INT offset;
1210
1211 offset = (current_frame_info.spill_cfa_off
1212 + current_frame_info.spill_size);
1213 if (frame_pointer_needed)
1214 {
1215 tmp = hard_frame_pointer_rtx;
1216 offset = -offset;
1217 }
1218 else
1219 {
1220 tmp = stack_pointer_rtx;
1221 offset = current_frame_info.total_size - offset;
1222 }
1223
1224 if (CONST_OK_FOR_I (offset))
1225 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1226 tmp, GEN_INT (offset)));
1227 else
1228 {
1229 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1230 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1231 pic_offset_table_rtx, tmp));
1232 }
1233
1234 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1235 }
1236
1237 emit_move_insn (pic_offset_table_rtx, tmp);
1238 }
1239
1240 void
1241 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1242 rtx scratch_b, int noreturn_p, int sibcall_p)
1243 {
1244 rtx insn;
1245 bool is_desc = false;
1246
1247 /* If we find we're calling through a register, then we're actually
1248 calling through a descriptor, so load up the values. */
1249 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1250 {
1251 rtx tmp;
1252 bool addr_dead_p;
1253
1254 /* ??? We are currently constrained to *not* use peep2, because
1255 we can legitimately change the global lifetime of the GP
1256 (in the form of killing where previously live). This is
1257 because a call through a descriptor doesn't use the previous
1258 value of the GP, while a direct call does, and we do not
1259 commit to either form until the split here.
1260
1261 That said, this means that we lack precise life info for
1262 whether ADDR is dead after this call. This is not terribly
1263 important, since we can fix things up essentially for free
1264 with the POST_DEC below, but it's nice to not use it when we
1265 can immediately tell it's not necessary. */
1266 addr_dead_p = ((noreturn_p || sibcall_p
1267 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1268 REGNO (addr)))
1269 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1270
1271 /* Load the code address into scratch_b. */
1272 tmp = gen_rtx_POST_INC (Pmode, addr);
1273 tmp = gen_rtx_MEM (Pmode, tmp);
1274 emit_move_insn (scratch_r, tmp);
1275 emit_move_insn (scratch_b, scratch_r);
1276
1277 /* Load the GP address. If ADDR is not dead here, then we must
1278 revert the change made above via the POST_INCREMENT. */
1279 if (!addr_dead_p)
1280 tmp = gen_rtx_POST_DEC (Pmode, addr);
1281 else
1282 tmp = addr;
1283 tmp = gen_rtx_MEM (Pmode, tmp);
1284 emit_move_insn (pic_offset_table_rtx, tmp);
1285
1286 is_desc = true;
1287 addr = scratch_b;
1288 }
1289
1290 if (sibcall_p)
1291 insn = gen_sibcall_nogp (addr);
1292 else if (retval)
1293 insn = gen_call_value_nogp (retval, addr, retaddr);
1294 else
1295 insn = gen_call_nogp (addr, retaddr);
1296 emit_call_insn (insn);
1297
1298 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1299 ia64_reload_gp ();
1300 }
1301 \f
1302 /* Begin the assembly file. */
1303
1304 static void
1305 ia64_file_start (void)
1306 {
1307 default_file_start ();
1308 emit_safe_across_calls ();
1309 }
1310
1311 void
1312 emit_safe_across_calls (void)
1313 {
1314 unsigned int rs, re;
1315 int out_state;
1316
1317 rs = 1;
1318 out_state = 0;
1319 while (1)
1320 {
1321 while (rs < 64 && call_used_regs[PR_REG (rs)])
1322 rs++;
1323 if (rs >= 64)
1324 break;
1325 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1326 continue;
1327 if (out_state == 0)
1328 {
1329 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1330 out_state = 1;
1331 }
1332 else
1333 fputc (',', asm_out_file);
1334 if (re == rs + 1)
1335 fprintf (asm_out_file, "p%u", rs);
1336 else
1337 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1338 rs = re + 1;
1339 }
1340 if (out_state)
1341 fputc ('\n', asm_out_file);
1342 }
1343
1344 /* Helper function for ia64_compute_frame_size: find an appropriate general
1345 register to spill some special register to. SPECIAL_SPILL_MASK contains
1346 bits in GR0 to GR31 that have already been allocated by this routine.
1347 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1348
1349 static int
1350 find_gr_spill (int try_locals)
1351 {
1352 int regno;
1353
1354 /* If this is a leaf function, first try an otherwise unused
1355 call-clobbered register. */
1356 if (current_function_is_leaf)
1357 {
1358 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1359 if (! regs_ever_live[regno]
1360 && call_used_regs[regno]
1361 && ! fixed_regs[regno]
1362 && ! global_regs[regno]
1363 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1364 {
1365 current_frame_info.gr_used_mask |= 1 << regno;
1366 return regno;
1367 }
1368 }
1369
1370 if (try_locals)
1371 {
1372 regno = current_frame_info.n_local_regs;
1373 /* If there is a frame pointer, then we can't use loc79, because
1374 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1375 reg_name switching code in ia64_expand_prologue. */
1376 if (regno < (80 - frame_pointer_needed))
1377 {
1378 current_frame_info.n_local_regs = regno + 1;
1379 return LOC_REG (0) + regno;
1380 }
1381 }
1382
1383 /* Failed to find a general register to spill to. Must use stack. */
1384 return 0;
1385 }
1386
1387 /* In order to make for nice schedules, we try to allocate every temporary
1388 to a different register. We must of course stay away from call-saved,
1389 fixed, and global registers. We must also stay away from registers
1390 allocated in current_frame_info.gr_used_mask, since those include regs
1391 used all through the prologue.
1392
1393 Any register allocated here must be used immediately. The idea is to
1394 aid scheduling, not to solve data flow problems. */
1395
1396 static int last_scratch_gr_reg;
1397
1398 static int
1399 next_scratch_gr_reg (void)
1400 {
1401 int i, regno;
1402
1403 for (i = 0; i < 32; ++i)
1404 {
1405 regno = (last_scratch_gr_reg + i + 1) & 31;
1406 if (call_used_regs[regno]
1407 && ! fixed_regs[regno]
1408 && ! global_regs[regno]
1409 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1410 {
1411 last_scratch_gr_reg = regno;
1412 return regno;
1413 }
1414 }
1415
1416 /* There must be _something_ available. */
1417 abort ();
1418 }
1419
1420 /* Helper function for ia64_compute_frame_size, called through
1421 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1422
1423 static void
1424 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
1425 {
1426 unsigned int regno = REGNO (reg);
1427 if (regno < 32)
1428 {
1429 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1430 for (i = 0; i < n; ++i)
1431 current_frame_info.gr_used_mask |= 1 << (regno + i);
1432 }
1433 }
1434
1435 /* Returns the number of bytes offset between the frame pointer and the stack
1436 pointer for the current function. SIZE is the number of bytes of space
1437 needed for local variables. */
1438
1439 static void
1440 ia64_compute_frame_size (HOST_WIDE_INT size)
1441 {
1442 HOST_WIDE_INT total_size;
1443 HOST_WIDE_INT spill_size = 0;
1444 HOST_WIDE_INT extra_spill_size = 0;
1445 HOST_WIDE_INT pretend_args_size;
1446 HARD_REG_SET mask;
1447 int n_spilled = 0;
1448 int spilled_gr_p = 0;
1449 int spilled_fr_p = 0;
1450 unsigned int regno;
1451 int i;
1452
1453 if (current_frame_info.initialized)
1454 return;
1455
1456 memset (&current_frame_info, 0, sizeof current_frame_info);
1457 CLEAR_HARD_REG_SET (mask);
1458
1459 /* Don't allocate scratches to the return register. */
1460 diddle_return_value (mark_reg_gr_used_mask, NULL);
1461
1462 /* Don't allocate scratches to the EH scratch registers. */
1463 if (cfun->machine->ia64_eh_epilogue_sp)
1464 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1465 if (cfun->machine->ia64_eh_epilogue_bsp)
1466 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1467
1468 /* Find the size of the register stack frame. We have only 80 local
1469 registers, because we reserve 8 for the inputs and 8 for the
1470 outputs. */
1471
1472 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1473 since we'll be adjusting that down later. */
1474 regno = LOC_REG (78) + ! frame_pointer_needed;
1475 for (; regno >= LOC_REG (0); regno--)
1476 if (regs_ever_live[regno])
1477 break;
1478 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1479
1480 /* For functions marked with the syscall_linkage attribute, we must mark
1481 all eight input registers as in use, so that locals aren't visible to
1482 the caller. */
1483
1484 if (cfun->machine->n_varargs > 0
1485 || lookup_attribute ("syscall_linkage",
1486 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1487 current_frame_info.n_input_regs = 8;
1488 else
1489 {
1490 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1491 if (regs_ever_live[regno])
1492 break;
1493 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1494 }
1495
1496 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1497 if (regs_ever_live[regno])
1498 break;
1499 i = regno - OUT_REG (0) + 1;
1500
1501 /* When -p profiling, we need one output register for the mcount argument.
1502 Likewise for -a profiling for the bb_init_func argument. For -ax
1503 profiling, we need two output registers for the two bb_init_trace_func
1504 arguments. */
1505 if (current_function_profile)
1506 i = MAX (i, 1);
1507 current_frame_info.n_output_regs = i;
1508
1509 /* ??? No rotating register support yet. */
1510 current_frame_info.n_rotate_regs = 0;
1511
1512 /* Discover which registers need spilling, and how much room that
1513 will take. Begin with floating point and general registers,
1514 which will always wind up on the stack. */
1515
1516 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1517 if (regs_ever_live[regno] && ! call_used_regs[regno])
1518 {
1519 SET_HARD_REG_BIT (mask, regno);
1520 spill_size += 16;
1521 n_spilled += 1;
1522 spilled_fr_p = 1;
1523 }
1524
1525 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1526 if (regs_ever_live[regno] && ! call_used_regs[regno])
1527 {
1528 SET_HARD_REG_BIT (mask, regno);
1529 spill_size += 8;
1530 n_spilled += 1;
1531 spilled_gr_p = 1;
1532 }
1533
1534 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1535 if (regs_ever_live[regno] && ! call_used_regs[regno])
1536 {
1537 SET_HARD_REG_BIT (mask, regno);
1538 spill_size += 8;
1539 n_spilled += 1;
1540 }
1541
1542 /* Now come all special registers that might get saved in other
1543 general registers. */
1544
1545 if (frame_pointer_needed)
1546 {
1547 current_frame_info.reg_fp = find_gr_spill (1);
1548 /* If we did not get a register, then we take LOC79. This is guaranteed
1549 to be free, even if regs_ever_live is already set, because this is
1550 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1551 as we don't count loc79 above. */
1552 if (current_frame_info.reg_fp == 0)
1553 {
1554 current_frame_info.reg_fp = LOC_REG (79);
1555 current_frame_info.n_local_regs++;
1556 }
1557 }
1558
1559 if (! current_function_is_leaf)
1560 {
1561 /* Emit a save of BR0 if we call other functions. Do this even
1562 if this function doesn't return, as EH depends on this to be
1563 able to unwind the stack. */
1564 SET_HARD_REG_BIT (mask, BR_REG (0));
1565
1566 current_frame_info.reg_save_b0 = find_gr_spill (1);
1567 if (current_frame_info.reg_save_b0 == 0)
1568 {
1569 spill_size += 8;
1570 n_spilled += 1;
1571 }
1572
1573 /* Similarly for ar.pfs. */
1574 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1575 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1576 if (current_frame_info.reg_save_ar_pfs == 0)
1577 {
1578 extra_spill_size += 8;
1579 n_spilled += 1;
1580 }
1581
1582 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1583 registers are clobbered, so we fall back to the stack. */
1584 current_frame_info.reg_save_gp
1585 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1586 if (current_frame_info.reg_save_gp == 0)
1587 {
1588 SET_HARD_REG_BIT (mask, GR_REG (1));
1589 spill_size += 8;
1590 n_spilled += 1;
1591 }
1592 }
1593 else
1594 {
1595 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1596 {
1597 SET_HARD_REG_BIT (mask, BR_REG (0));
1598 spill_size += 8;
1599 n_spilled += 1;
1600 }
1601
1602 if (regs_ever_live[AR_PFS_REGNUM])
1603 {
1604 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1605 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1606 if (current_frame_info.reg_save_ar_pfs == 0)
1607 {
1608 extra_spill_size += 8;
1609 n_spilled += 1;
1610 }
1611 }
1612 }
1613
1614 /* Unwind descriptor hackery: things are most efficient if we allocate
1615 consecutive GR save registers for RP, PFS, FP in that order. However,
1616 it is absolutely critical that FP get the only hard register that's
1617 guaranteed to be free, so we allocated it first. If all three did
1618 happen to be allocated hard regs, and are consecutive, rearrange them
1619 into the preferred order now. */
1620 if (current_frame_info.reg_fp != 0
1621 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1622 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1623 {
1624 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1625 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1626 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1627 }
1628
1629 /* See if we need to store the predicate register block. */
1630 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1631 if (regs_ever_live[regno] && ! call_used_regs[regno])
1632 break;
1633 if (regno <= PR_REG (63))
1634 {
1635 SET_HARD_REG_BIT (mask, PR_REG (0));
1636 current_frame_info.reg_save_pr = find_gr_spill (1);
1637 if (current_frame_info.reg_save_pr == 0)
1638 {
1639 extra_spill_size += 8;
1640 n_spilled += 1;
1641 }
1642
1643 /* ??? Mark them all as used so that register renaming and such
1644 are free to use them. */
1645 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1646 regs_ever_live[regno] = 1;
1647 }
1648
1649 /* If we're forced to use st8.spill, we're forced to save and restore
1650 ar.unat as well. The check for existing liveness allows inline asm
1651 to touch ar.unat. */
1652 if (spilled_gr_p || cfun->machine->n_varargs
1653 || regs_ever_live[AR_UNAT_REGNUM])
1654 {
1655 regs_ever_live[AR_UNAT_REGNUM] = 1;
1656 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1657 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1658 if (current_frame_info.reg_save_ar_unat == 0)
1659 {
1660 extra_spill_size += 8;
1661 n_spilled += 1;
1662 }
1663 }
1664
1665 if (regs_ever_live[AR_LC_REGNUM])
1666 {
1667 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1668 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1669 if (current_frame_info.reg_save_ar_lc == 0)
1670 {
1671 extra_spill_size += 8;
1672 n_spilled += 1;
1673 }
1674 }
1675
1676 /* If we have an odd number of words of pretend arguments written to
1677 the stack, then the FR save area will be unaligned. We round the
1678 size of this area up to keep things 16 byte aligned. */
1679 if (spilled_fr_p)
1680 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1681 else
1682 pretend_args_size = current_function_pretend_args_size;
1683
1684 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1685 + current_function_outgoing_args_size);
1686 total_size = IA64_STACK_ALIGN (total_size);
1687
1688 /* We always use the 16-byte scratch area provided by the caller, but
1689 if we are a leaf function, there's no one to which we need to provide
1690 a scratch area. */
1691 if (current_function_is_leaf)
1692 total_size = MAX (0, total_size - 16);
1693
1694 current_frame_info.total_size = total_size;
1695 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1696 current_frame_info.spill_size = spill_size;
1697 current_frame_info.extra_spill_size = extra_spill_size;
1698 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1699 current_frame_info.n_spilled = n_spilled;
1700 current_frame_info.initialized = reload_completed;
1701 }
1702
1703 /* Compute the initial difference between the specified pair of registers. */
1704
1705 HOST_WIDE_INT
1706 ia64_initial_elimination_offset (int from, int to)
1707 {
1708 HOST_WIDE_INT offset;
1709
1710 ia64_compute_frame_size (get_frame_size ());
1711 switch (from)
1712 {
1713 case FRAME_POINTER_REGNUM:
1714 if (to == HARD_FRAME_POINTER_REGNUM)
1715 {
1716 if (current_function_is_leaf)
1717 offset = -current_frame_info.total_size;
1718 else
1719 offset = -(current_frame_info.total_size
1720 - current_function_outgoing_args_size - 16);
1721 }
1722 else if (to == STACK_POINTER_REGNUM)
1723 {
1724 if (current_function_is_leaf)
1725 offset = 0;
1726 else
1727 offset = 16 + current_function_outgoing_args_size;
1728 }
1729 else
1730 abort ();
1731 break;
1732
1733 case ARG_POINTER_REGNUM:
1734 /* Arguments start above the 16 byte save area, unless stdarg
1735 in which case we store through the 16 byte save area. */
1736 if (to == HARD_FRAME_POINTER_REGNUM)
1737 offset = 16 - current_function_pretend_args_size;
1738 else if (to == STACK_POINTER_REGNUM)
1739 offset = (current_frame_info.total_size
1740 + 16 - current_function_pretend_args_size);
1741 else
1742 abort ();
1743 break;
1744
1745 default:
1746 abort ();
1747 }
1748
1749 return offset;
1750 }
1751
1752 /* If there are more than a trivial number of register spills, we use
1753 two interleaved iterators so that we can get two memory references
1754 per insn group.
1755
1756 In order to simplify things in the prologue and epilogue expanders,
1757 we use helper functions to fix up the memory references after the
1758 fact with the appropriate offsets to a POST_MODIFY memory mode.
1759 The following data structure tracks the state of the two iterators
1760 while insns are being emitted. */
1761
1762 struct spill_fill_data
1763 {
1764 rtx init_after; /* point at which to emit initializations */
1765 rtx init_reg[2]; /* initial base register */
1766 rtx iter_reg[2]; /* the iterator registers */
1767 rtx *prev_addr[2]; /* address of last memory use */
1768 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1769 HOST_WIDE_INT prev_off[2]; /* last offset */
1770 int n_iter; /* number of iterators in use */
1771 int next_iter; /* next iterator to use */
1772 unsigned int save_gr_used_mask;
1773 };
1774
1775 static struct spill_fill_data spill_fill_data;
1776
1777 static void
1778 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
1779 {
1780 int i;
1781
1782 spill_fill_data.init_after = get_last_insn ();
1783 spill_fill_data.init_reg[0] = init_reg;
1784 spill_fill_data.init_reg[1] = init_reg;
1785 spill_fill_data.prev_addr[0] = NULL;
1786 spill_fill_data.prev_addr[1] = NULL;
1787 spill_fill_data.prev_insn[0] = NULL;
1788 spill_fill_data.prev_insn[1] = NULL;
1789 spill_fill_data.prev_off[0] = cfa_off;
1790 spill_fill_data.prev_off[1] = cfa_off;
1791 spill_fill_data.next_iter = 0;
1792 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1793
1794 spill_fill_data.n_iter = 1 + (n_spills > 2);
1795 for (i = 0; i < spill_fill_data.n_iter; ++i)
1796 {
1797 int regno = next_scratch_gr_reg ();
1798 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1799 current_frame_info.gr_used_mask |= 1 << regno;
1800 }
1801 }
1802
1803 static void
1804 finish_spill_pointers (void)
1805 {
1806 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1807 }
1808
1809 static rtx
1810 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
1811 {
1812 int iter = spill_fill_data.next_iter;
1813 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1814 rtx disp_rtx = GEN_INT (disp);
1815 rtx mem;
1816
1817 if (spill_fill_data.prev_addr[iter])
1818 {
1819 if (CONST_OK_FOR_N (disp))
1820 {
1821 *spill_fill_data.prev_addr[iter]
1822 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1823 gen_rtx_PLUS (DImode,
1824 spill_fill_data.iter_reg[iter],
1825 disp_rtx));
1826 REG_NOTES (spill_fill_data.prev_insn[iter])
1827 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1828 REG_NOTES (spill_fill_data.prev_insn[iter]));
1829 }
1830 else
1831 {
1832 /* ??? Could use register post_modify for loads. */
1833 if (! CONST_OK_FOR_I (disp))
1834 {
1835 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1836 emit_move_insn (tmp, disp_rtx);
1837 disp_rtx = tmp;
1838 }
1839 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1840 spill_fill_data.iter_reg[iter], disp_rtx));
1841 }
1842 }
1843 /* Micro-optimization: if we've created a frame pointer, it's at
1844 CFA 0, which may allow the real iterator to be initialized lower,
1845 slightly increasing parallelism. Also, if there are few saves
1846 it may eliminate the iterator entirely. */
1847 else if (disp == 0
1848 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1849 && frame_pointer_needed)
1850 {
1851 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1852 set_mem_alias_set (mem, get_varargs_alias_set ());
1853 return mem;
1854 }
1855 else
1856 {
1857 rtx seq, insn;
1858
1859 if (disp == 0)
1860 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1861 spill_fill_data.init_reg[iter]);
1862 else
1863 {
1864 start_sequence ();
1865
1866 if (! CONST_OK_FOR_I (disp))
1867 {
1868 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1869 emit_move_insn (tmp, disp_rtx);
1870 disp_rtx = tmp;
1871 }
1872
1873 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1874 spill_fill_data.init_reg[iter],
1875 disp_rtx));
1876
1877 seq = get_insns ();
1878 end_sequence ();
1879 }
1880
1881 /* Careful for being the first insn in a sequence. */
1882 if (spill_fill_data.init_after)
1883 insn = emit_insn_after (seq, spill_fill_data.init_after);
1884 else
1885 {
1886 rtx first = get_insns ();
1887 if (first)
1888 insn = emit_insn_before (seq, first);
1889 else
1890 insn = emit_insn (seq);
1891 }
1892 spill_fill_data.init_after = insn;
1893
1894 /* If DISP is 0, we may or may not have a further adjustment
1895 afterward. If we do, then the load/store insn may be modified
1896 to be a post-modify. If we don't, then this copy may be
1897 eliminated by copyprop_hardreg_forward, which makes this
1898 insn garbage, which runs afoul of the sanity check in
1899 propagate_one_insn. So mark this insn as legal to delete. */
1900 if (disp == 0)
1901 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1902 REG_NOTES (insn));
1903 }
1904
1905 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1906
1907 /* ??? Not all of the spills are for varargs, but some of them are.
1908 The rest of the spills belong in an alias set of their own. But
1909 it doesn't actually hurt to include them here. */
1910 set_mem_alias_set (mem, get_varargs_alias_set ());
1911
1912 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1913 spill_fill_data.prev_off[iter] = cfa_off;
1914
1915 if (++iter >= spill_fill_data.n_iter)
1916 iter = 0;
1917 spill_fill_data.next_iter = iter;
1918
1919 return mem;
1920 }
1921
1922 static void
1923 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
1924 rtx frame_reg)
1925 {
1926 int iter = spill_fill_data.next_iter;
1927 rtx mem, insn;
1928
1929 mem = spill_restore_mem (reg, cfa_off);
1930 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1931 spill_fill_data.prev_insn[iter] = insn;
1932
1933 if (frame_reg)
1934 {
1935 rtx base;
1936 HOST_WIDE_INT off;
1937
1938 RTX_FRAME_RELATED_P (insn) = 1;
1939
1940 /* Don't even pretend that the unwind code can intuit its way
1941 through a pair of interleaved post_modify iterators. Just
1942 provide the correct answer. */
1943
1944 if (frame_pointer_needed)
1945 {
1946 base = hard_frame_pointer_rtx;
1947 off = - cfa_off;
1948 }
1949 else
1950 {
1951 base = stack_pointer_rtx;
1952 off = current_frame_info.total_size - cfa_off;
1953 }
1954
1955 REG_NOTES (insn)
1956 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1957 gen_rtx_SET (VOIDmode,
1958 gen_rtx_MEM (GET_MODE (reg),
1959 plus_constant (base, off)),
1960 frame_reg),
1961 REG_NOTES (insn));
1962 }
1963 }
1964
1965 static void
1966 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
1967 {
1968 int iter = spill_fill_data.next_iter;
1969 rtx insn;
1970
1971 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1972 GEN_INT (cfa_off)));
1973 spill_fill_data.prev_insn[iter] = insn;
1974 }
1975
1976 /* Wrapper functions that discards the CONST_INT spill offset. These
1977 exist so that we can give gr_spill/gr_fill the offset they need and
1978 use a consistent function interface. */
1979
1980 static rtx
1981 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1982 {
1983 return gen_movdi (dest, src);
1984 }
1985
1986 static rtx
1987 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1988 {
1989 return gen_fr_spill (dest, src);
1990 }
1991
1992 static rtx
1993 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1994 {
1995 return gen_fr_restore (dest, src);
1996 }
1997
1998 /* Called after register allocation to add any instructions needed for the
1999 prologue. Using a prologue insn is favored compared to putting all of the
2000 instructions in output_function_prologue(), since it allows the scheduler
2001 to intermix instructions with the saves of the caller saved registers. In
2002 some cases, it might be necessary to emit a barrier instruction as the last
2003 insn to prevent such scheduling.
2004
2005 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2006 so that the debug info generation code can handle them properly.
2007
2008 The register save area is layed out like so:
2009 cfa+16
2010 [ varargs spill area ]
2011 [ fr register spill area ]
2012 [ br register spill area ]
2013 [ ar register spill area ]
2014 [ pr register spill area ]
2015 [ gr register spill area ] */
2016
2017 /* ??? Get inefficient code when the frame size is larger than can fit in an
2018 adds instruction. */
2019
2020 void
2021 ia64_expand_prologue (void)
2022 {
2023 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2024 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2025 rtx reg, alt_reg;
2026
2027 ia64_compute_frame_size (get_frame_size ());
2028 last_scratch_gr_reg = 15;
2029
2030 /* If there is no epilogue, then we don't need some prologue insns.
2031 We need to avoid emitting the dead prologue insns, because flow
2032 will complain about them. */
2033 if (optimize)
2034 {
2035 edge e;
2036
2037 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2038 if ((e->flags & EDGE_FAKE) == 0
2039 && (e->flags & EDGE_FALLTHRU) != 0)
2040 break;
2041 epilogue_p = (e != NULL);
2042 }
2043 else
2044 epilogue_p = 1;
2045
2046 /* Set the local, input, and output register names. We need to do this
2047 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2048 half. If we use in/loc/out register names, then we get assembler errors
2049 in crtn.S because there is no alloc insn or regstk directive in there. */
2050 if (! TARGET_REG_NAMES)
2051 {
2052 int inputs = current_frame_info.n_input_regs;
2053 int locals = current_frame_info.n_local_regs;
2054 int outputs = current_frame_info.n_output_regs;
2055
2056 for (i = 0; i < inputs; i++)
2057 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2058 for (i = 0; i < locals; i++)
2059 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2060 for (i = 0; i < outputs; i++)
2061 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2062 }
2063
2064 /* Set the frame pointer register name. The regnum is logically loc79,
2065 but of course we'll not have allocated that many locals. Rather than
2066 worrying about renumbering the existing rtxs, we adjust the name. */
2067 /* ??? This code means that we can never use one local register when
2068 there is a frame pointer. loc79 gets wasted in this case, as it is
2069 renamed to a register that will never be used. See also the try_locals
2070 code in find_gr_spill. */
2071 if (current_frame_info.reg_fp)
2072 {
2073 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2074 reg_names[HARD_FRAME_POINTER_REGNUM]
2075 = reg_names[current_frame_info.reg_fp];
2076 reg_names[current_frame_info.reg_fp] = tmp;
2077 }
2078
2079 /* We don't need an alloc instruction if we've used no outputs or locals. */
2080 if (current_frame_info.n_local_regs == 0
2081 && current_frame_info.n_output_regs == 0
2082 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2083 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2084 {
2085 /* If there is no alloc, but there are input registers used, then we
2086 need a .regstk directive. */
2087 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2088 ar_pfs_save_reg = NULL_RTX;
2089 }
2090 else
2091 {
2092 current_frame_info.need_regstk = 0;
2093
2094 if (current_frame_info.reg_save_ar_pfs)
2095 regno = current_frame_info.reg_save_ar_pfs;
2096 else
2097 regno = next_scratch_gr_reg ();
2098 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2099
2100 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2101 GEN_INT (current_frame_info.n_input_regs),
2102 GEN_INT (current_frame_info.n_local_regs),
2103 GEN_INT (current_frame_info.n_output_regs),
2104 GEN_INT (current_frame_info.n_rotate_regs)));
2105 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2106 }
2107
2108 /* Set up frame pointer, stack pointer, and spill iterators. */
2109
2110 n_varargs = cfun->machine->n_varargs;
2111 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2112 stack_pointer_rtx, 0);
2113
2114 if (frame_pointer_needed)
2115 {
2116 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2117 RTX_FRAME_RELATED_P (insn) = 1;
2118 }
2119
2120 if (current_frame_info.total_size != 0)
2121 {
2122 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2123 rtx offset;
2124
2125 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2126 offset = frame_size_rtx;
2127 else
2128 {
2129 regno = next_scratch_gr_reg ();
2130 offset = gen_rtx_REG (DImode, regno);
2131 emit_move_insn (offset, frame_size_rtx);
2132 }
2133
2134 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2135 stack_pointer_rtx, offset));
2136
2137 if (! frame_pointer_needed)
2138 {
2139 RTX_FRAME_RELATED_P (insn) = 1;
2140 if (GET_CODE (offset) != CONST_INT)
2141 {
2142 REG_NOTES (insn)
2143 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2144 gen_rtx_SET (VOIDmode,
2145 stack_pointer_rtx,
2146 gen_rtx_PLUS (DImode,
2147 stack_pointer_rtx,
2148 frame_size_rtx)),
2149 REG_NOTES (insn));
2150 }
2151 }
2152
2153 /* ??? At this point we must generate a magic insn that appears to
2154 modify the stack pointer, the frame pointer, and all spill
2155 iterators. This would allow the most scheduling freedom. For
2156 now, just hard stop. */
2157 emit_insn (gen_blockage ());
2158 }
2159
2160 /* Must copy out ar.unat before doing any integer spills. */
2161 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2162 {
2163 if (current_frame_info.reg_save_ar_unat)
2164 ar_unat_save_reg
2165 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2166 else
2167 {
2168 alt_regno = next_scratch_gr_reg ();
2169 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2170 current_frame_info.gr_used_mask |= 1 << alt_regno;
2171 }
2172
2173 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2174 insn = emit_move_insn (ar_unat_save_reg, reg);
2175 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2176
2177 /* Even if we're not going to generate an epilogue, we still
2178 need to save the register so that EH works. */
2179 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2180 emit_insn (gen_prologue_use (ar_unat_save_reg));
2181 }
2182 else
2183 ar_unat_save_reg = NULL_RTX;
2184
2185 /* Spill all varargs registers. Do this before spilling any GR registers,
2186 since we want the UNAT bits for the GR registers to override the UNAT
2187 bits from varargs, which we don't care about. */
2188
2189 cfa_off = -16;
2190 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2191 {
2192 reg = gen_rtx_REG (DImode, regno);
2193 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2194 }
2195
2196 /* Locate the bottom of the register save area. */
2197 cfa_off = (current_frame_info.spill_cfa_off
2198 + current_frame_info.spill_size
2199 + current_frame_info.extra_spill_size);
2200
2201 /* Save the predicate register block either in a register or in memory. */
2202 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2203 {
2204 reg = gen_rtx_REG (DImode, PR_REG (0));
2205 if (current_frame_info.reg_save_pr != 0)
2206 {
2207 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2208 insn = emit_move_insn (alt_reg, reg);
2209
2210 /* ??? Denote pr spill/fill by a DImode move that modifies all
2211 64 hard registers. */
2212 RTX_FRAME_RELATED_P (insn) = 1;
2213 REG_NOTES (insn)
2214 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2215 gen_rtx_SET (VOIDmode, alt_reg, reg),
2216 REG_NOTES (insn));
2217
2218 /* Even if we're not going to generate an epilogue, we still
2219 need to save the register so that EH works. */
2220 if (! epilogue_p)
2221 emit_insn (gen_prologue_use (alt_reg));
2222 }
2223 else
2224 {
2225 alt_regno = next_scratch_gr_reg ();
2226 alt_reg = gen_rtx_REG (DImode, alt_regno);
2227 insn = emit_move_insn (alt_reg, reg);
2228 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2229 cfa_off -= 8;
2230 }
2231 }
2232
2233 /* Handle AR regs in numerical order. All of them get special handling. */
2234 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2235 && current_frame_info.reg_save_ar_unat == 0)
2236 {
2237 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2238 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2239 cfa_off -= 8;
2240 }
2241
2242 /* The alloc insn already copied ar.pfs into a general register. The
2243 only thing we have to do now is copy that register to a stack slot
2244 if we'd not allocated a local register for the job. */
2245 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2246 && current_frame_info.reg_save_ar_pfs == 0)
2247 {
2248 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2249 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2250 cfa_off -= 8;
2251 }
2252
2253 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2254 {
2255 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2256 if (current_frame_info.reg_save_ar_lc != 0)
2257 {
2258 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2259 insn = emit_move_insn (alt_reg, reg);
2260 RTX_FRAME_RELATED_P (insn) = 1;
2261
2262 /* Even if we're not going to generate an epilogue, we still
2263 need to save the register so that EH works. */
2264 if (! epilogue_p)
2265 emit_insn (gen_prologue_use (alt_reg));
2266 }
2267 else
2268 {
2269 alt_regno = next_scratch_gr_reg ();
2270 alt_reg = gen_rtx_REG (DImode, alt_regno);
2271 emit_move_insn (alt_reg, reg);
2272 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2273 cfa_off -= 8;
2274 }
2275 }
2276
2277 if (current_frame_info.reg_save_gp)
2278 {
2279 insn = emit_move_insn (gen_rtx_REG (DImode,
2280 current_frame_info.reg_save_gp),
2281 pic_offset_table_rtx);
2282 /* We don't know for sure yet if this is actually needed, since
2283 we've not split the PIC call patterns. If all of the calls
2284 are indirect, and not followed by any uses of the gp, then
2285 this save is dead. Allow it to go away. */
2286 REG_NOTES (insn)
2287 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2288 }
2289
2290 /* We should now be at the base of the gr/br/fr spill area. */
2291 if (cfa_off != (current_frame_info.spill_cfa_off
2292 + current_frame_info.spill_size))
2293 abort ();
2294
2295 /* Spill all general registers. */
2296 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2297 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2298 {
2299 reg = gen_rtx_REG (DImode, regno);
2300 do_spill (gen_gr_spill, reg, cfa_off, reg);
2301 cfa_off -= 8;
2302 }
2303
2304 /* Handle BR0 specially -- it may be getting stored permanently in
2305 some GR register. */
2306 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2307 {
2308 reg = gen_rtx_REG (DImode, BR_REG (0));
2309 if (current_frame_info.reg_save_b0 != 0)
2310 {
2311 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2312 insn = emit_move_insn (alt_reg, reg);
2313 RTX_FRAME_RELATED_P (insn) = 1;
2314
2315 /* Even if we're not going to generate an epilogue, we still
2316 need to save the register so that EH works. */
2317 if (! epilogue_p)
2318 emit_insn (gen_prologue_use (alt_reg));
2319 }
2320 else
2321 {
2322 alt_regno = next_scratch_gr_reg ();
2323 alt_reg = gen_rtx_REG (DImode, alt_regno);
2324 emit_move_insn (alt_reg, reg);
2325 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2326 cfa_off -= 8;
2327 }
2328 }
2329
2330 /* Spill the rest of the BR registers. */
2331 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2332 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2333 {
2334 alt_regno = next_scratch_gr_reg ();
2335 alt_reg = gen_rtx_REG (DImode, alt_regno);
2336 reg = gen_rtx_REG (DImode, regno);
2337 emit_move_insn (alt_reg, reg);
2338 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2339 cfa_off -= 8;
2340 }
2341
2342 /* Align the frame and spill all FR registers. */
2343 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2344 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2345 {
2346 if (cfa_off & 15)
2347 abort ();
2348 reg = gen_rtx_REG (XFmode, regno);
2349 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2350 cfa_off -= 16;
2351 }
2352
2353 if (cfa_off != current_frame_info.spill_cfa_off)
2354 abort ();
2355
2356 finish_spill_pointers ();
2357 }
2358
2359 /* Called after register allocation to add any instructions needed for the
2360 epilogue. Using an epilogue insn is favored compared to putting all of the
2361 instructions in output_function_prologue(), since it allows the scheduler
2362 to intermix instructions with the saves of the caller saved registers. In
2363 some cases, it might be necessary to emit a barrier instruction as the last
2364 insn to prevent such scheduling. */
2365
2366 void
2367 ia64_expand_epilogue (int sibcall_p)
2368 {
2369 rtx insn, reg, alt_reg, ar_unat_save_reg;
2370 int regno, alt_regno, cfa_off;
2371
2372 ia64_compute_frame_size (get_frame_size ());
2373
2374 /* If there is a frame pointer, then we use it instead of the stack
2375 pointer, so that the stack pointer does not need to be valid when
2376 the epilogue starts. See EXIT_IGNORE_STACK. */
2377 if (frame_pointer_needed)
2378 setup_spill_pointers (current_frame_info.n_spilled,
2379 hard_frame_pointer_rtx, 0);
2380 else
2381 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2382 current_frame_info.total_size);
2383
2384 if (current_frame_info.total_size != 0)
2385 {
2386 /* ??? At this point we must generate a magic insn that appears to
2387 modify the spill iterators and the frame pointer. This would
2388 allow the most scheduling freedom. For now, just hard stop. */
2389 emit_insn (gen_blockage ());
2390 }
2391
2392 /* Locate the bottom of the register save area. */
2393 cfa_off = (current_frame_info.spill_cfa_off
2394 + current_frame_info.spill_size
2395 + current_frame_info.extra_spill_size);
2396
2397 /* Restore the predicate registers. */
2398 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2399 {
2400 if (current_frame_info.reg_save_pr != 0)
2401 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2402 else
2403 {
2404 alt_regno = next_scratch_gr_reg ();
2405 alt_reg = gen_rtx_REG (DImode, alt_regno);
2406 do_restore (gen_movdi_x, alt_reg, cfa_off);
2407 cfa_off -= 8;
2408 }
2409 reg = gen_rtx_REG (DImode, PR_REG (0));
2410 emit_move_insn (reg, alt_reg);
2411 }
2412
2413 /* Restore the application registers. */
2414
2415 /* Load the saved unat from the stack, but do not restore it until
2416 after the GRs have been restored. */
2417 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2418 {
2419 if (current_frame_info.reg_save_ar_unat != 0)
2420 ar_unat_save_reg
2421 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2422 else
2423 {
2424 alt_regno = next_scratch_gr_reg ();
2425 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2426 current_frame_info.gr_used_mask |= 1 << alt_regno;
2427 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2428 cfa_off -= 8;
2429 }
2430 }
2431 else
2432 ar_unat_save_reg = NULL_RTX;
2433
2434 if (current_frame_info.reg_save_ar_pfs != 0)
2435 {
2436 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2437 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2438 emit_move_insn (reg, alt_reg);
2439 }
2440 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2441 {
2442 alt_regno = next_scratch_gr_reg ();
2443 alt_reg = gen_rtx_REG (DImode, alt_regno);
2444 do_restore (gen_movdi_x, alt_reg, cfa_off);
2445 cfa_off -= 8;
2446 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2447 emit_move_insn (reg, alt_reg);
2448 }
2449
2450 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2451 {
2452 if (current_frame_info.reg_save_ar_lc != 0)
2453 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2454 else
2455 {
2456 alt_regno = next_scratch_gr_reg ();
2457 alt_reg = gen_rtx_REG (DImode, alt_regno);
2458 do_restore (gen_movdi_x, alt_reg, cfa_off);
2459 cfa_off -= 8;
2460 }
2461 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2462 emit_move_insn (reg, alt_reg);
2463 }
2464
2465 /* We should now be at the base of the gr/br/fr spill area. */
2466 if (cfa_off != (current_frame_info.spill_cfa_off
2467 + current_frame_info.spill_size))
2468 abort ();
2469
2470 /* The GP may be stored on the stack in the prologue, but it's
2471 never restored in the epilogue. Skip the stack slot. */
2472 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2473 cfa_off -= 8;
2474
2475 /* Restore all general registers. */
2476 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2477 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2478 {
2479 reg = gen_rtx_REG (DImode, regno);
2480 do_restore (gen_gr_restore, reg, cfa_off);
2481 cfa_off -= 8;
2482 }
2483
2484 /* Restore the branch registers. Handle B0 specially, as it may
2485 have gotten stored in some GR register. */
2486 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2487 {
2488 if (current_frame_info.reg_save_b0 != 0)
2489 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2490 else
2491 {
2492 alt_regno = next_scratch_gr_reg ();
2493 alt_reg = gen_rtx_REG (DImode, alt_regno);
2494 do_restore (gen_movdi_x, alt_reg, cfa_off);
2495 cfa_off -= 8;
2496 }
2497 reg = gen_rtx_REG (DImode, BR_REG (0));
2498 emit_move_insn (reg, alt_reg);
2499 }
2500
2501 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2502 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2503 {
2504 alt_regno = next_scratch_gr_reg ();
2505 alt_reg = gen_rtx_REG (DImode, alt_regno);
2506 do_restore (gen_movdi_x, alt_reg, cfa_off);
2507 cfa_off -= 8;
2508 reg = gen_rtx_REG (DImode, regno);
2509 emit_move_insn (reg, alt_reg);
2510 }
2511
2512 /* Restore floating point registers. */
2513 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2514 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2515 {
2516 if (cfa_off & 15)
2517 abort ();
2518 reg = gen_rtx_REG (XFmode, regno);
2519 do_restore (gen_fr_restore_x, reg, cfa_off);
2520 cfa_off -= 16;
2521 }
2522
2523 /* Restore ar.unat for real. */
2524 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2525 {
2526 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2527 emit_move_insn (reg, ar_unat_save_reg);
2528 }
2529
2530 if (cfa_off != current_frame_info.spill_cfa_off)
2531 abort ();
2532
2533 finish_spill_pointers ();
2534
2535 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2536 {
2537 /* ??? At this point we must generate a magic insn that appears to
2538 modify the spill iterators, the stack pointer, and the frame
2539 pointer. This would allow the most scheduling freedom. For now,
2540 just hard stop. */
2541 emit_insn (gen_blockage ());
2542 }
2543
2544 if (cfun->machine->ia64_eh_epilogue_sp)
2545 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2546 else if (frame_pointer_needed)
2547 {
2548 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2549 RTX_FRAME_RELATED_P (insn) = 1;
2550 }
2551 else if (current_frame_info.total_size)
2552 {
2553 rtx offset, frame_size_rtx;
2554
2555 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2556 if (CONST_OK_FOR_I (current_frame_info.total_size))
2557 offset = frame_size_rtx;
2558 else
2559 {
2560 regno = next_scratch_gr_reg ();
2561 offset = gen_rtx_REG (DImode, regno);
2562 emit_move_insn (offset, frame_size_rtx);
2563 }
2564
2565 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2566 offset));
2567
2568 RTX_FRAME_RELATED_P (insn) = 1;
2569 if (GET_CODE (offset) != CONST_INT)
2570 {
2571 REG_NOTES (insn)
2572 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2573 gen_rtx_SET (VOIDmode,
2574 stack_pointer_rtx,
2575 gen_rtx_PLUS (DImode,
2576 stack_pointer_rtx,
2577 frame_size_rtx)),
2578 REG_NOTES (insn));
2579 }
2580 }
2581
2582 if (cfun->machine->ia64_eh_epilogue_bsp)
2583 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2584
2585 if (! sibcall_p)
2586 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2587 else
2588 {
2589 int fp = GR_REG (2);
2590 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2591 first available call clobbered register. If there was a frame_pointer
2592 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2593 so we have to make sure we're using the string "r2" when emitting
2594 the register name for the assembler. */
2595 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2596 fp = HARD_FRAME_POINTER_REGNUM;
2597
2598 /* We must emit an alloc to force the input registers to become output
2599 registers. Otherwise, if the callee tries to pass its parameters
2600 through to another call without an intervening alloc, then these
2601 values get lost. */
2602 /* ??? We don't need to preserve all input registers. We only need to
2603 preserve those input registers used as arguments to the sibling call.
2604 It is unclear how to compute that number here. */
2605 if (current_frame_info.n_input_regs != 0)
2606 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2607 const0_rtx, const0_rtx,
2608 GEN_INT (current_frame_info.n_input_regs),
2609 const0_rtx));
2610 }
2611 }
2612
2613 /* Return 1 if br.ret can do all the work required to return from a
2614 function. */
2615
2616 int
2617 ia64_direct_return (void)
2618 {
2619 if (reload_completed && ! frame_pointer_needed)
2620 {
2621 ia64_compute_frame_size (get_frame_size ());
2622
2623 return (current_frame_info.total_size == 0
2624 && current_frame_info.n_spilled == 0
2625 && current_frame_info.reg_save_b0 == 0
2626 && current_frame_info.reg_save_pr == 0
2627 && current_frame_info.reg_save_ar_pfs == 0
2628 && current_frame_info.reg_save_ar_unat == 0
2629 && current_frame_info.reg_save_ar_lc == 0);
2630 }
2631 return 0;
2632 }
2633
2634 /* Return the magic cookie that we use to hold the return address
2635 during early compilation. */
2636
2637 rtx
2638 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
2639 {
2640 if (count != 0)
2641 return NULL;
2642 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
2643 }
2644
2645 /* Split this value after reload, now that we know where the return
2646 address is saved. */
2647
2648 void
2649 ia64_split_return_addr_rtx (rtx dest)
2650 {
2651 rtx src;
2652
2653 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2654 {
2655 if (current_frame_info.reg_save_b0 != 0)
2656 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2657 else
2658 {
2659 HOST_WIDE_INT off;
2660 unsigned int regno;
2661
2662 /* Compute offset from CFA for BR0. */
2663 /* ??? Must be kept in sync with ia64_expand_prologue. */
2664 off = (current_frame_info.spill_cfa_off
2665 + current_frame_info.spill_size);
2666 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2667 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2668 off -= 8;
2669
2670 /* Convert CFA offset to a register based offset. */
2671 if (frame_pointer_needed)
2672 src = hard_frame_pointer_rtx;
2673 else
2674 {
2675 src = stack_pointer_rtx;
2676 off += current_frame_info.total_size;
2677 }
2678
2679 /* Load address into scratch register. */
2680 if (CONST_OK_FOR_I (off))
2681 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
2682 else
2683 {
2684 emit_move_insn (dest, GEN_INT (off));
2685 emit_insn (gen_adddi3 (dest, src, dest));
2686 }
2687
2688 src = gen_rtx_MEM (Pmode, dest);
2689 }
2690 }
2691 else
2692 src = gen_rtx_REG (DImode, BR_REG (0));
2693
2694 emit_move_insn (dest, src);
2695 }
2696
2697 int
2698 ia64_hard_regno_rename_ok (int from, int to)
2699 {
2700 /* Don't clobber any of the registers we reserved for the prologue. */
2701 if (to == current_frame_info.reg_fp
2702 || to == current_frame_info.reg_save_b0
2703 || to == current_frame_info.reg_save_pr
2704 || to == current_frame_info.reg_save_ar_pfs
2705 || to == current_frame_info.reg_save_ar_unat
2706 || to == current_frame_info.reg_save_ar_lc)
2707 return 0;
2708
2709 if (from == current_frame_info.reg_fp
2710 || from == current_frame_info.reg_save_b0
2711 || from == current_frame_info.reg_save_pr
2712 || from == current_frame_info.reg_save_ar_pfs
2713 || from == current_frame_info.reg_save_ar_unat
2714 || from == current_frame_info.reg_save_ar_lc)
2715 return 0;
2716
2717 /* Don't use output registers outside the register frame. */
2718 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2719 return 0;
2720
2721 /* Retain even/oddness on predicate register pairs. */
2722 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2723 return (from & 1) == (to & 1);
2724
2725 return 1;
2726 }
2727
2728 /* Target hook for assembling integer objects. Handle word-sized
2729 aligned objects and detect the cases when @fptr is needed. */
2730
2731 static bool
2732 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
2733 {
2734 if (size == POINTER_SIZE / BITS_PER_UNIT
2735 && aligned_p
2736 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2737 && GET_CODE (x) == SYMBOL_REF
2738 && SYMBOL_REF_FUNCTION_P (x))
2739 {
2740 if (POINTER_SIZE == 32)
2741 fputs ("\tdata4\t@fptr(", asm_out_file);
2742 else
2743 fputs ("\tdata8\t@fptr(", asm_out_file);
2744 output_addr_const (asm_out_file, x);
2745 fputs (")\n", asm_out_file);
2746 return true;
2747 }
2748 return default_assemble_integer (x, size, aligned_p);
2749 }
2750
2751 /* Emit the function prologue. */
2752
2753 static void
2754 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
2755 {
2756 int mask, grsave, grsave_prev;
2757
2758 if (current_frame_info.need_regstk)
2759 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2760 current_frame_info.n_input_regs,
2761 current_frame_info.n_local_regs,
2762 current_frame_info.n_output_regs,
2763 current_frame_info.n_rotate_regs);
2764
2765 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2766 return;
2767
2768 /* Emit the .prologue directive. */
2769
2770 mask = 0;
2771 grsave = grsave_prev = 0;
2772 if (current_frame_info.reg_save_b0 != 0)
2773 {
2774 mask |= 8;
2775 grsave = grsave_prev = current_frame_info.reg_save_b0;
2776 }
2777 if (current_frame_info.reg_save_ar_pfs != 0
2778 && (grsave_prev == 0
2779 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2780 {
2781 mask |= 4;
2782 if (grsave_prev == 0)
2783 grsave = current_frame_info.reg_save_ar_pfs;
2784 grsave_prev = current_frame_info.reg_save_ar_pfs;
2785 }
2786 if (current_frame_info.reg_fp != 0
2787 && (grsave_prev == 0
2788 || current_frame_info.reg_fp == grsave_prev + 1))
2789 {
2790 mask |= 2;
2791 if (grsave_prev == 0)
2792 grsave = HARD_FRAME_POINTER_REGNUM;
2793 grsave_prev = current_frame_info.reg_fp;
2794 }
2795 if (current_frame_info.reg_save_pr != 0
2796 && (grsave_prev == 0
2797 || current_frame_info.reg_save_pr == grsave_prev + 1))
2798 {
2799 mask |= 1;
2800 if (grsave_prev == 0)
2801 grsave = current_frame_info.reg_save_pr;
2802 }
2803
2804 if (mask && TARGET_GNU_AS)
2805 fprintf (file, "\t.prologue %d, %d\n", mask,
2806 ia64_dbx_register_number (grsave));
2807 else
2808 fputs ("\t.prologue\n", file);
2809
2810 /* Emit a .spill directive, if necessary, to relocate the base of
2811 the register spill area. */
2812 if (current_frame_info.spill_cfa_off != -16)
2813 fprintf (file, "\t.spill %ld\n",
2814 (long) (current_frame_info.spill_cfa_off
2815 + current_frame_info.spill_size));
2816 }
2817
2818 /* Emit the .body directive at the scheduled end of the prologue. */
2819
2820 static void
2821 ia64_output_function_end_prologue (FILE *file)
2822 {
2823 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2824 return;
2825
2826 fputs ("\t.body\n", file);
2827 }
2828
2829 /* Emit the function epilogue. */
2830
2831 static void
2832 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
2833 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
2834 {
2835 int i;
2836
2837 if (current_frame_info.reg_fp)
2838 {
2839 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2840 reg_names[HARD_FRAME_POINTER_REGNUM]
2841 = reg_names[current_frame_info.reg_fp];
2842 reg_names[current_frame_info.reg_fp] = tmp;
2843 }
2844 if (! TARGET_REG_NAMES)
2845 {
2846 for (i = 0; i < current_frame_info.n_input_regs; i++)
2847 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2848 for (i = 0; i < current_frame_info.n_local_regs; i++)
2849 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2850 for (i = 0; i < current_frame_info.n_output_regs; i++)
2851 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2852 }
2853
2854 current_frame_info.initialized = 0;
2855 }
2856
2857 int
2858 ia64_dbx_register_number (int regno)
2859 {
2860 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2861 from its home at loc79 to something inside the register frame. We
2862 must perform the same renumbering here for the debug info. */
2863 if (current_frame_info.reg_fp)
2864 {
2865 if (regno == HARD_FRAME_POINTER_REGNUM)
2866 regno = current_frame_info.reg_fp;
2867 else if (regno == current_frame_info.reg_fp)
2868 regno = HARD_FRAME_POINTER_REGNUM;
2869 }
2870
2871 if (IN_REGNO_P (regno))
2872 return 32 + regno - IN_REG (0);
2873 else if (LOC_REGNO_P (regno))
2874 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2875 else if (OUT_REGNO_P (regno))
2876 return (32 + current_frame_info.n_input_regs
2877 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2878 else
2879 return regno;
2880 }
2881
2882 void
2883 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
2884 {
2885 rtx addr_reg, eight = GEN_INT (8);
2886
2887 /* The Intel assembler requires that the global __ia64_trampoline symbol
2888 be declared explicitly */
2889 if (!TARGET_GNU_AS)
2890 {
2891 static bool declared_ia64_trampoline = false;
2892
2893 if (!declared_ia64_trampoline)
2894 {
2895 declared_ia64_trampoline = true;
2896 (*targetm.asm_out.globalize_label) (asm_out_file,
2897 "__ia64_trampoline");
2898 }
2899 }
2900
2901 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2902 addr = convert_memory_address (Pmode, addr);
2903 fnaddr = convert_memory_address (Pmode, fnaddr);
2904 static_chain = convert_memory_address (Pmode, static_chain);
2905
2906 /* Load up our iterator. */
2907 addr_reg = gen_reg_rtx (Pmode);
2908 emit_move_insn (addr_reg, addr);
2909
2910 /* The first two words are the fake descriptor:
2911 __ia64_trampoline, ADDR+16. */
2912 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2913 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2914 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2915
2916 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2917 copy_to_reg (plus_constant (addr, 16)));
2918 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2919
2920 /* The third word is the target descriptor. */
2921 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2922 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2923
2924 /* The fourth word is the static chain. */
2925 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2926 }
2927 \f
2928 /* Do any needed setup for a variadic function. CUM has not been updated
2929 for the last named argument which has type TYPE and mode MODE.
2930
2931 We generate the actual spill instructions during prologue generation. */
2932
2933 static void
2934 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2935 tree type, int * pretend_size,
2936 int second_time ATTRIBUTE_UNUSED)
2937 {
2938 CUMULATIVE_ARGS next_cum = *cum;
2939
2940 /* Skip the current argument. */
2941 ia64_function_arg_advance (&next_cum, mode, type, 1);
2942
2943 if (next_cum.words < MAX_ARGUMENT_SLOTS)
2944 {
2945 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
2946 *pretend_size = n * UNITS_PER_WORD;
2947 cfun->machine->n_varargs = n;
2948 }
2949 }
2950
2951 /* Check whether TYPE is a homogeneous floating point aggregate. If
2952 it is, return the mode of the floating point type that appears
2953 in all leafs. If it is not, return VOIDmode.
2954
2955 An aggregate is a homogeneous floating point aggregate is if all
2956 fields/elements in it have the same floating point type (e.g,
2957 SFmode). 128-bit quad-precision floats are excluded. */
2958
2959 static enum machine_mode
2960 hfa_element_mode (tree type, int nested)
2961 {
2962 enum machine_mode element_mode = VOIDmode;
2963 enum machine_mode mode;
2964 enum tree_code code = TREE_CODE (type);
2965 int know_element_mode = 0;
2966 tree t;
2967
2968 switch (code)
2969 {
2970 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2971 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2972 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2973 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2974 case FUNCTION_TYPE:
2975 return VOIDmode;
2976
2977 /* Fortran complex types are supposed to be HFAs, so we need to handle
2978 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2979 types though. */
2980 case COMPLEX_TYPE:
2981 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
2982 && TYPE_MODE (type) != TCmode)
2983 return GET_MODE_INNER (TYPE_MODE (type));
2984 else
2985 return VOIDmode;
2986
2987 case REAL_TYPE:
2988 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2989 mode if this is contained within an aggregate. */
2990 if (nested && TYPE_MODE (type) != TFmode)
2991 return TYPE_MODE (type);
2992 else
2993 return VOIDmode;
2994
2995 case ARRAY_TYPE:
2996 return hfa_element_mode (TREE_TYPE (type), 1);
2997
2998 case RECORD_TYPE:
2999 case UNION_TYPE:
3000 case QUAL_UNION_TYPE:
3001 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3002 {
3003 if (TREE_CODE (t) != FIELD_DECL)
3004 continue;
3005
3006 mode = hfa_element_mode (TREE_TYPE (t), 1);
3007 if (know_element_mode)
3008 {
3009 if (mode != element_mode)
3010 return VOIDmode;
3011 }
3012 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3013 return VOIDmode;
3014 else
3015 {
3016 know_element_mode = 1;
3017 element_mode = mode;
3018 }
3019 }
3020 return element_mode;
3021
3022 default:
3023 /* If we reach here, we probably have some front-end specific type
3024 that the backend doesn't know about. This can happen via the
3025 aggregate_value_p call in init_function_start. All we can do is
3026 ignore unknown tree types. */
3027 return VOIDmode;
3028 }
3029
3030 return VOIDmode;
3031 }
3032
3033 /* Return the number of words required to hold a quantity of TYPE and MODE
3034 when passed as an argument. */
3035 static int
3036 ia64_function_arg_words (tree type, enum machine_mode mode)
3037 {
3038 int words;
3039
3040 if (mode == BLKmode)
3041 words = int_size_in_bytes (type);
3042 else
3043 words = GET_MODE_SIZE (mode);
3044
3045 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3046 }
3047
3048 /* Return the number of registers that should be skipped so the current
3049 argument (described by TYPE and WORDS) will be properly aligned.
3050
3051 Integer and float arguments larger than 8 bytes start at the next
3052 even boundary. Aggregates larger than 8 bytes start at the next
3053 even boundary if the aggregate has 16 byte alignment. Note that
3054 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3055 but are still to be aligned in registers.
3056
3057 ??? The ABI does not specify how to handle aggregates with
3058 alignment from 9 to 15 bytes, or greater than 16. We handle them
3059 all as if they had 16 byte alignment. Such aggregates can occur
3060 only if gcc extensions are used. */
3061 static int
3062 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3063 {
3064 if ((cum->words & 1) == 0)
3065 return 0;
3066
3067 if (type
3068 && TREE_CODE (type) != INTEGER_TYPE
3069 && TREE_CODE (type) != REAL_TYPE)
3070 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3071 else
3072 return words > 1;
3073 }
3074
3075 /* Return rtx for register where argument is passed, or zero if it is passed
3076 on the stack. */
3077 /* ??? 128-bit quad-precision floats are always passed in general
3078 registers. */
3079
3080 rtx
3081 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3082 int named, int incoming)
3083 {
3084 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3085 int words = ia64_function_arg_words (type, mode);
3086 int offset = ia64_function_arg_offset (cum, type, words);
3087 enum machine_mode hfa_mode = VOIDmode;
3088
3089 /* If all argument slots are used, then it must go on the stack. */
3090 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3091 return 0;
3092
3093 /* Check for and handle homogeneous FP aggregates. */
3094 if (type)
3095 hfa_mode = hfa_element_mode (type, 0);
3096
3097 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3098 and unprototyped hfas are passed specially. */
3099 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3100 {
3101 rtx loc[16];
3102 int i = 0;
3103 int fp_regs = cum->fp_regs;
3104 int int_regs = cum->words + offset;
3105 int hfa_size = GET_MODE_SIZE (hfa_mode);
3106 int byte_size;
3107 int args_byte_size;
3108
3109 /* If prototyped, pass it in FR regs then GR regs.
3110 If not prototyped, pass it in both FR and GR regs.
3111
3112 If this is an SFmode aggregate, then it is possible to run out of
3113 FR regs while GR regs are still left. In that case, we pass the
3114 remaining part in the GR regs. */
3115
3116 /* Fill the FP regs. We do this always. We stop if we reach the end
3117 of the argument, the last FP register, or the last argument slot. */
3118
3119 byte_size = ((mode == BLKmode)
3120 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3121 args_byte_size = int_regs * UNITS_PER_WORD;
3122 offset = 0;
3123 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3124 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3125 {
3126 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3127 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3128 + fp_regs)),
3129 GEN_INT (offset));
3130 offset += hfa_size;
3131 args_byte_size += hfa_size;
3132 fp_regs++;
3133 }
3134
3135 /* If no prototype, then the whole thing must go in GR regs. */
3136 if (! cum->prototype)
3137 offset = 0;
3138 /* If this is an SFmode aggregate, then we might have some left over
3139 that needs to go in GR regs. */
3140 else if (byte_size != offset)
3141 int_regs += offset / UNITS_PER_WORD;
3142
3143 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3144
3145 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3146 {
3147 enum machine_mode gr_mode = DImode;
3148 unsigned int gr_size;
3149
3150 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3151 then this goes in a GR reg left adjusted/little endian, right
3152 adjusted/big endian. */
3153 /* ??? Currently this is handled wrong, because 4-byte hunks are
3154 always right adjusted/little endian. */
3155 if (offset & 0x4)
3156 gr_mode = SImode;
3157 /* If we have an even 4 byte hunk because the aggregate is a
3158 multiple of 4 bytes in size, then this goes in a GR reg right
3159 adjusted/little endian. */
3160 else if (byte_size - offset == 4)
3161 gr_mode = SImode;
3162
3163 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3164 gen_rtx_REG (gr_mode, (basereg
3165 + int_regs)),
3166 GEN_INT (offset));
3167
3168 gr_size = GET_MODE_SIZE (gr_mode);
3169 offset += gr_size;
3170 if (gr_size == UNITS_PER_WORD
3171 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3172 int_regs++;
3173 else if (gr_size > UNITS_PER_WORD)
3174 int_regs += gr_size / UNITS_PER_WORD;
3175 }
3176 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3177 }
3178
3179 /* Integral and aggregates go in general registers. If we have run out of
3180 FR registers, then FP values must also go in general registers. This can
3181 happen when we have a SFmode HFA. */
3182 else if (mode == TFmode || mode == TCmode
3183 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3184 {
3185 int byte_size = ((mode == BLKmode)
3186 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3187 if (BYTES_BIG_ENDIAN
3188 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3189 && byte_size < UNITS_PER_WORD
3190 && byte_size > 0)
3191 {
3192 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3193 gen_rtx_REG (DImode,
3194 (basereg + cum->words
3195 + offset)),
3196 const0_rtx);
3197 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3198 }
3199 else
3200 return gen_rtx_REG (mode, basereg + cum->words + offset);
3201
3202 }
3203
3204 /* If there is a prototype, then FP values go in a FR register when
3205 named, and in a GR register when unnamed. */
3206 else if (cum->prototype)
3207 {
3208 if (named)
3209 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3210 /* In big-endian mode, an anonymous SFmode value must be represented
3211 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3212 the value into the high half of the general register. */
3213 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3214 return gen_rtx_PARALLEL (mode,
3215 gen_rtvec (1,
3216 gen_rtx_EXPR_LIST (VOIDmode,
3217 gen_rtx_REG (DImode, basereg + cum->words + offset),
3218 const0_rtx)));
3219 else
3220 return gen_rtx_REG (mode, basereg + cum->words + offset);
3221 }
3222 /* If there is no prototype, then FP values go in both FR and GR
3223 registers. */
3224 else
3225 {
3226 /* See comment above. */
3227 enum machine_mode inner_mode =
3228 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3229
3230 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3231 gen_rtx_REG (mode, (FR_ARG_FIRST
3232 + cum->fp_regs)),
3233 const0_rtx);
3234 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3235 gen_rtx_REG (inner_mode,
3236 (basereg + cum->words
3237 + offset)),
3238 const0_rtx);
3239
3240 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3241 }
3242 }
3243
3244 /* Return number of words, at the beginning of the argument, that must be
3245 put in registers. 0 is the argument is entirely in registers or entirely
3246 in memory. */
3247
3248 int
3249 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3250 tree type, int named ATTRIBUTE_UNUSED)
3251 {
3252 int words = ia64_function_arg_words (type, mode);
3253 int offset = ia64_function_arg_offset (cum, type, words);
3254
3255 /* If all argument slots are used, then it must go on the stack. */
3256 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3257 return 0;
3258
3259 /* It doesn't matter whether the argument goes in FR or GR regs. If
3260 it fits within the 8 argument slots, then it goes entirely in
3261 registers. If it extends past the last argument slot, then the rest
3262 goes on the stack. */
3263
3264 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3265 return 0;
3266
3267 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3268 }
3269
3270 /* Update CUM to point after this argument. This is patterned after
3271 ia64_function_arg. */
3272
3273 void
3274 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3275 tree type, int named)
3276 {
3277 int words = ia64_function_arg_words (type, mode);
3278 int offset = ia64_function_arg_offset (cum, type, words);
3279 enum machine_mode hfa_mode = VOIDmode;
3280
3281 /* If all arg slots are already full, then there is nothing to do. */
3282 if (cum->words >= MAX_ARGUMENT_SLOTS)
3283 return;
3284
3285 cum->words += words + offset;
3286
3287 /* Check for and handle homogeneous FP aggregates. */
3288 if (type)
3289 hfa_mode = hfa_element_mode (type, 0);
3290
3291 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3292 and unprototyped hfas are passed specially. */
3293 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3294 {
3295 int fp_regs = cum->fp_regs;
3296 /* This is the original value of cum->words + offset. */
3297 int int_regs = cum->words - words;
3298 int hfa_size = GET_MODE_SIZE (hfa_mode);
3299 int byte_size;
3300 int args_byte_size;
3301
3302 /* If prototyped, pass it in FR regs then GR regs.
3303 If not prototyped, pass it in both FR and GR regs.
3304
3305 If this is an SFmode aggregate, then it is possible to run out of
3306 FR regs while GR regs are still left. In that case, we pass the
3307 remaining part in the GR regs. */
3308
3309 /* Fill the FP regs. We do this always. We stop if we reach the end
3310 of the argument, the last FP register, or the last argument slot. */
3311
3312 byte_size = ((mode == BLKmode)
3313 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3314 args_byte_size = int_regs * UNITS_PER_WORD;
3315 offset = 0;
3316 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3317 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3318 {
3319 offset += hfa_size;
3320 args_byte_size += hfa_size;
3321 fp_regs++;
3322 }
3323
3324 cum->fp_regs = fp_regs;
3325 }
3326
3327 /* Integral and aggregates go in general registers. If we have run out of
3328 FR registers, then FP values must also go in general registers. This can
3329 happen when we have a SFmode HFA. */
3330 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3331 cum->int_regs = cum->words;
3332
3333 /* If there is a prototype, then FP values go in a FR register when
3334 named, and in a GR register when unnamed. */
3335 else if (cum->prototype)
3336 {
3337 if (! named)
3338 cum->int_regs = cum->words;
3339 else
3340 /* ??? Complex types should not reach here. */
3341 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3342 }
3343 /* If there is no prototype, then FP values go in both FR and GR
3344 registers. */
3345 else
3346 {
3347 /* ??? Complex types should not reach here. */
3348 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3349 cum->int_regs = cum->words;
3350 }
3351 }
3352
3353 /* Variable sized types are passed by reference. */
3354 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3355
3356 static bool
3357 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3358 enum machine_mode mode ATTRIBUTE_UNUSED,
3359 tree type, bool named ATTRIBUTE_UNUSED)
3360 {
3361 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3362 }
3363
3364 /* True if it is OK to do sibling call optimization for the specified
3365 call expression EXP. DECL will be the called function, or NULL if
3366 this is an indirect call. */
3367 static bool
3368 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3369 {
3370 /* We can't perform a sibcall if the current function has the syscall_linkage
3371 attribute. */
3372 if (lookup_attribute ("syscall_linkage",
3373 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
3374 return false;
3375
3376 /* We must always return with our current GP. This means we can
3377 only sibcall to functions defined in the current module. */
3378 return decl && (*targetm.binds_local_p) (decl);
3379 }
3380 \f
3381
3382 /* Implement va_arg. */
3383
3384 static tree
3385 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3386 {
3387 /* Variable sized types are passed by reference. */
3388 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
3389 {
3390 tree ptrtype = build_pointer_type (type);
3391 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
3392 return build_va_arg_indirect_ref (addr);
3393 }
3394
3395 /* Aggregate arguments with alignment larger than 8 bytes start at
3396 the next even boundary. Integer and floating point arguments
3397 do so if they are larger than 8 bytes, whether or not they are
3398 also aligned larger than 8 bytes. */
3399 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3400 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3401 {
3402 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3403 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
3404 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3405 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
3406 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3407 gimplify_and_add (t, pre_p);
3408 }
3409
3410 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3411 }
3412 \f
3413 /* Return 1 if function return value returned in memory. Return 0 if it is
3414 in a register. */
3415
3416 static bool
3417 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
3418 {
3419 enum machine_mode mode;
3420 enum machine_mode hfa_mode;
3421 HOST_WIDE_INT byte_size;
3422
3423 mode = TYPE_MODE (valtype);
3424 byte_size = GET_MODE_SIZE (mode);
3425 if (mode == BLKmode)
3426 {
3427 byte_size = int_size_in_bytes (valtype);
3428 if (byte_size < 0)
3429 return true;
3430 }
3431
3432 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3433
3434 hfa_mode = hfa_element_mode (valtype, 0);
3435 if (hfa_mode != VOIDmode)
3436 {
3437 int hfa_size = GET_MODE_SIZE (hfa_mode);
3438
3439 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3440 return true;
3441 else
3442 return false;
3443 }
3444 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3445 return true;
3446 else
3447 return false;
3448 }
3449
3450 /* Return rtx for register that holds the function return value. */
3451
3452 rtx
3453 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
3454 {
3455 enum machine_mode mode;
3456 enum machine_mode hfa_mode;
3457
3458 mode = TYPE_MODE (valtype);
3459 hfa_mode = hfa_element_mode (valtype, 0);
3460
3461 if (hfa_mode != VOIDmode)
3462 {
3463 rtx loc[8];
3464 int i;
3465 int hfa_size;
3466 int byte_size;
3467 int offset;
3468
3469 hfa_size = GET_MODE_SIZE (hfa_mode);
3470 byte_size = ((mode == BLKmode)
3471 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3472 offset = 0;
3473 for (i = 0; offset < byte_size; i++)
3474 {
3475 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3476 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3477 GEN_INT (offset));
3478 offset += hfa_size;
3479 }
3480 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3481 }
3482 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
3483 return gen_rtx_REG (mode, FR_ARG_FIRST);
3484 else
3485 {
3486 if (BYTES_BIG_ENDIAN
3487 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3488 {
3489 rtx loc[8];
3490 int offset;
3491 int bytesize;
3492 int i;
3493
3494 offset = 0;
3495 bytesize = int_size_in_bytes (valtype);
3496 for (i = 0; offset < bytesize; i++)
3497 {
3498 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3499 gen_rtx_REG (DImode,
3500 GR_RET_FIRST + i),
3501 GEN_INT (offset));
3502 offset += UNITS_PER_WORD;
3503 }
3504 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3505 }
3506 else
3507 return gen_rtx_REG (mode, GR_RET_FIRST);
3508 }
3509 }
3510
3511 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3512 We need to emit DTP-relative relocations. */
3513
3514 void
3515 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
3516 {
3517 if (size != 8)
3518 abort ();
3519 fputs ("\tdata8.ua\t@dtprel(", file);
3520 output_addr_const (file, x);
3521 fputs (")", file);
3522 }
3523
3524 /* Print a memory address as an operand to reference that memory location. */
3525
3526 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3527 also call this from ia64_print_operand for memory addresses. */
3528
3529 void
3530 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
3531 rtx address ATTRIBUTE_UNUSED)
3532 {
3533 }
3534
3535 /* Print an operand to an assembler instruction.
3536 C Swap and print a comparison operator.
3537 D Print an FP comparison operator.
3538 E Print 32 - constant, for SImode shifts as extract.
3539 e Print 64 - constant, for DImode rotates.
3540 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3541 a floating point register emitted normally.
3542 I Invert a predicate register by adding 1.
3543 J Select the proper predicate register for a condition.
3544 j Select the inverse predicate register for a condition.
3545 O Append .acq for volatile load.
3546 P Postincrement of a MEM.
3547 Q Append .rel for volatile store.
3548 S Shift amount for shladd instruction.
3549 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3550 for Intel assembler.
3551 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3552 for Intel assembler.
3553 r Print register name, or constant 0 as r0. HP compatibility for
3554 Linux kernel. */
3555 void
3556 ia64_print_operand (FILE * file, rtx x, int code)
3557 {
3558 const char *str;
3559
3560 switch (code)
3561 {
3562 case 0:
3563 /* Handled below. */
3564 break;
3565
3566 case 'C':
3567 {
3568 enum rtx_code c = swap_condition (GET_CODE (x));
3569 fputs (GET_RTX_NAME (c), file);
3570 return;
3571 }
3572
3573 case 'D':
3574 switch (GET_CODE (x))
3575 {
3576 case NE:
3577 str = "neq";
3578 break;
3579 case UNORDERED:
3580 str = "unord";
3581 break;
3582 case ORDERED:
3583 str = "ord";
3584 break;
3585 default:
3586 str = GET_RTX_NAME (GET_CODE (x));
3587 break;
3588 }
3589 fputs (str, file);
3590 return;
3591
3592 case 'E':
3593 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3594 return;
3595
3596 case 'e':
3597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3598 return;
3599
3600 case 'F':
3601 if (x == CONST0_RTX (GET_MODE (x)))
3602 str = reg_names [FR_REG (0)];
3603 else if (x == CONST1_RTX (GET_MODE (x)))
3604 str = reg_names [FR_REG (1)];
3605 else if (GET_CODE (x) == REG)
3606 str = reg_names [REGNO (x)];
3607 else
3608 abort ();
3609 fputs (str, file);
3610 return;
3611
3612 case 'I':
3613 fputs (reg_names [REGNO (x) + 1], file);
3614 return;
3615
3616 case 'J':
3617 case 'j':
3618 {
3619 unsigned int regno = REGNO (XEXP (x, 0));
3620 if (GET_CODE (x) == EQ)
3621 regno += 1;
3622 if (code == 'j')
3623 regno ^= 1;
3624 fputs (reg_names [regno], file);
3625 }
3626 return;
3627
3628 case 'O':
3629 if (MEM_VOLATILE_P (x))
3630 fputs(".acq", file);
3631 return;
3632
3633 case 'P':
3634 {
3635 HOST_WIDE_INT value;
3636
3637 switch (GET_CODE (XEXP (x, 0)))
3638 {
3639 default:
3640 return;
3641
3642 case POST_MODIFY:
3643 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3644 if (GET_CODE (x) == CONST_INT)
3645 value = INTVAL (x);
3646 else if (GET_CODE (x) == REG)
3647 {
3648 fprintf (file, ", %s", reg_names[REGNO (x)]);
3649 return;
3650 }
3651 else
3652 abort ();
3653 break;
3654
3655 case POST_INC:
3656 value = GET_MODE_SIZE (GET_MODE (x));
3657 break;
3658
3659 case POST_DEC:
3660 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3661 break;
3662 }
3663
3664 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
3665 return;
3666 }
3667
3668 case 'Q':
3669 if (MEM_VOLATILE_P (x))
3670 fputs(".rel", file);
3671 return;
3672
3673 case 'S':
3674 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3675 return;
3676
3677 case 'T':
3678 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3679 {
3680 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3681 return;
3682 }
3683 break;
3684
3685 case 'U':
3686 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3687 {
3688 const char *prefix = "0x";
3689 if (INTVAL (x) & 0x80000000)
3690 {
3691 fprintf (file, "0xffffffff");
3692 prefix = "";
3693 }
3694 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3695 return;
3696 }
3697 break;
3698
3699 case 'r':
3700 /* If this operand is the constant zero, write it as register zero.
3701 Any register, zero, or CONST_INT value is OK here. */
3702 if (GET_CODE (x) == REG)
3703 fputs (reg_names[REGNO (x)], file);
3704 else if (x == CONST0_RTX (GET_MODE (x)))
3705 fputs ("r0", file);
3706 else if (GET_CODE (x) == CONST_INT)
3707 output_addr_const (file, x);
3708 else
3709 output_operand_lossage ("invalid %%r value");
3710 return;
3711
3712 case '+':
3713 {
3714 const char *which;
3715
3716 /* For conditional branches, returns or calls, substitute
3717 sptk, dptk, dpnt, or spnt for %s. */
3718 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3719 if (x)
3720 {
3721 int pred_val = INTVAL (XEXP (x, 0));
3722
3723 /* Guess top and bottom 10% statically predicted. */
3724 if (pred_val < REG_BR_PROB_BASE / 50)
3725 which = ".spnt";
3726 else if (pred_val < REG_BR_PROB_BASE / 2)
3727 which = ".dpnt";
3728 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3729 which = ".dptk";
3730 else
3731 which = ".sptk";
3732 }
3733 else if (GET_CODE (current_output_insn) == CALL_INSN)
3734 which = ".sptk";
3735 else
3736 which = ".dptk";
3737
3738 fputs (which, file);
3739 return;
3740 }
3741
3742 case ',':
3743 x = current_insn_predicate;
3744 if (x)
3745 {
3746 unsigned int regno = REGNO (XEXP (x, 0));
3747 if (GET_CODE (x) == EQ)
3748 regno += 1;
3749 fprintf (file, "(%s) ", reg_names [regno]);
3750 }
3751 return;
3752
3753 default:
3754 output_operand_lossage ("ia64_print_operand: unknown code");
3755 return;
3756 }
3757
3758 switch (GET_CODE (x))
3759 {
3760 /* This happens for the spill/restore instructions. */
3761 case POST_INC:
3762 case POST_DEC:
3763 case POST_MODIFY:
3764 x = XEXP (x, 0);
3765 /* ... fall through ... */
3766
3767 case REG:
3768 fputs (reg_names [REGNO (x)], file);
3769 break;
3770
3771 case MEM:
3772 {
3773 rtx addr = XEXP (x, 0);
3774 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
3775 addr = XEXP (addr, 0);
3776 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3777 break;
3778 }
3779
3780 default:
3781 output_addr_const (file, x);
3782 break;
3783 }
3784
3785 return;
3786 }
3787 \f
3788 /* Compute a (partial) cost for rtx X. Return true if the complete
3789 cost has been computed, and false if subexpressions should be
3790 scanned. In either case, *TOTAL contains the cost result. */
3791 /* ??? This is incomplete. */
3792
3793 static bool
3794 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
3795 {
3796 switch (code)
3797 {
3798 case CONST_INT:
3799 switch (outer_code)
3800 {
3801 case SET:
3802 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
3803 return true;
3804 case PLUS:
3805 if (CONST_OK_FOR_I (INTVAL (x)))
3806 *total = 0;
3807 else if (CONST_OK_FOR_J (INTVAL (x)))
3808 *total = 1;
3809 else
3810 *total = COSTS_N_INSNS (1);
3811 return true;
3812 default:
3813 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
3814 *total = 0;
3815 else
3816 *total = COSTS_N_INSNS (1);
3817 return true;
3818 }
3819
3820 case CONST_DOUBLE:
3821 *total = COSTS_N_INSNS (1);
3822 return true;
3823
3824 case CONST:
3825 case SYMBOL_REF:
3826 case LABEL_REF:
3827 *total = COSTS_N_INSNS (3);
3828 return true;
3829
3830 case MULT:
3831 /* For multiplies wider than HImode, we have to go to the FPU,
3832 which normally involves copies. Plus there's the latency
3833 of the multiply itself, and the latency of the instructions to
3834 transfer integer regs to FP regs. */
3835 /* ??? Check for FP mode. */
3836 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
3837 *total = COSTS_N_INSNS (10);
3838 else
3839 *total = COSTS_N_INSNS (2);
3840 return true;
3841
3842 case PLUS:
3843 case MINUS:
3844 case ASHIFT:
3845 case ASHIFTRT:
3846 case LSHIFTRT:
3847 *total = COSTS_N_INSNS (1);
3848 return true;
3849
3850 case DIV:
3851 case UDIV:
3852 case MOD:
3853 case UMOD:
3854 /* We make divide expensive, so that divide-by-constant will be
3855 optimized to a multiply. */
3856 *total = COSTS_N_INSNS (60);
3857 return true;
3858
3859 default:
3860 return false;
3861 }
3862 }
3863
3864 /* Calculate the cost of moving data from a register in class FROM to
3865 one in class TO, using MODE. */
3866
3867 int
3868 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
3869 enum reg_class to)
3870 {
3871 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3872 if (to == ADDL_REGS)
3873 to = GR_REGS;
3874 if (from == ADDL_REGS)
3875 from = GR_REGS;
3876
3877 /* All costs are symmetric, so reduce cases by putting the
3878 lower number class as the destination. */
3879 if (from < to)
3880 {
3881 enum reg_class tmp = to;
3882 to = from, from = tmp;
3883 }
3884
3885 /* Moving from FR<->GR in XFmode must be more expensive than 2,
3886 so that we get secondary memory reloads. Between FR_REGS,
3887 we have to make this at least as expensive as MEMORY_MOVE_COST
3888 to avoid spectacularly poor register class preferencing. */
3889 if (mode == XFmode)
3890 {
3891 if (to != GR_REGS || from != GR_REGS)
3892 return MEMORY_MOVE_COST (mode, to, 0);
3893 else
3894 return 3;
3895 }
3896
3897 switch (to)
3898 {
3899 case PR_REGS:
3900 /* Moving between PR registers takes two insns. */
3901 if (from == PR_REGS)
3902 return 3;
3903 /* Moving between PR and anything but GR is impossible. */
3904 if (from != GR_REGS)
3905 return MEMORY_MOVE_COST (mode, to, 0);
3906 break;
3907
3908 case BR_REGS:
3909 /* Moving between BR and anything but GR is impossible. */
3910 if (from != GR_REGS && from != GR_AND_BR_REGS)
3911 return MEMORY_MOVE_COST (mode, to, 0);
3912 break;
3913
3914 case AR_I_REGS:
3915 case AR_M_REGS:
3916 /* Moving between AR and anything but GR is impossible. */
3917 if (from != GR_REGS)
3918 return MEMORY_MOVE_COST (mode, to, 0);
3919 break;
3920
3921 case GR_REGS:
3922 case FR_REGS:
3923 case GR_AND_FR_REGS:
3924 case GR_AND_BR_REGS:
3925 case ALL_REGS:
3926 break;
3927
3928 default:
3929 abort ();
3930 }
3931
3932 return 2;
3933 }
3934
3935 /* This function returns the register class required for a secondary
3936 register when copying between one of the registers in CLASS, and X,
3937 using MODE. A return value of NO_REGS means that no secondary register
3938 is required. */
3939
3940 enum reg_class
3941 ia64_secondary_reload_class (enum reg_class class,
3942 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3943 {
3944 int regno = -1;
3945
3946 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3947 regno = true_regnum (x);
3948
3949 switch (class)
3950 {
3951 case BR_REGS:
3952 case AR_M_REGS:
3953 case AR_I_REGS:
3954 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3955 interaction. We end up with two pseudos with overlapping lifetimes
3956 both of which are equiv to the same constant, and both which need
3957 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3958 changes depending on the path length, which means the qty_first_reg
3959 check in make_regs_eqv can give different answers at different times.
3960 At some point I'll probably need a reload_indi pattern to handle
3961 this.
3962
3963 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3964 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3965 non-general registers for good measure. */
3966 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3967 return GR_REGS;
3968
3969 /* This is needed if a pseudo used as a call_operand gets spilled to a
3970 stack slot. */
3971 if (GET_CODE (x) == MEM)
3972 return GR_REGS;
3973 break;
3974
3975 case FR_REGS:
3976 /* Need to go through general registers to get to other class regs. */
3977 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3978 return GR_REGS;
3979
3980 /* This can happen when a paradoxical subreg is an operand to the
3981 muldi3 pattern. */
3982 /* ??? This shouldn't be necessary after instruction scheduling is
3983 enabled, because paradoxical subregs are not accepted by
3984 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3985 stop the paradoxical subreg stupidity in the *_operand functions
3986 in recog.c. */
3987 if (GET_CODE (x) == MEM
3988 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3989 || GET_MODE (x) == QImode))
3990 return GR_REGS;
3991
3992 /* This can happen because of the ior/and/etc patterns that accept FP
3993 registers as operands. If the third operand is a constant, then it
3994 needs to be reloaded into a FP register. */
3995 if (GET_CODE (x) == CONST_INT)
3996 return GR_REGS;
3997
3998 /* This can happen because of register elimination in a muldi3 insn.
3999 E.g. `26107 * (unsigned long)&u'. */
4000 if (GET_CODE (x) == PLUS)
4001 return GR_REGS;
4002 break;
4003
4004 case PR_REGS:
4005 /* ??? This happens if we cse/gcse a BImode value across a call,
4006 and the function has a nonlocal goto. This is because global
4007 does not allocate call crossing pseudos to hard registers when
4008 current_function_has_nonlocal_goto is true. This is relatively
4009 common for C++ programs that use exceptions. To reproduce,
4010 return NO_REGS and compile libstdc++. */
4011 if (GET_CODE (x) == MEM)
4012 return GR_REGS;
4013
4014 /* This can happen when we take a BImode subreg of a DImode value,
4015 and that DImode value winds up in some non-GR register. */
4016 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4017 return GR_REGS;
4018 break;
4019
4020 default:
4021 break;
4022 }
4023
4024 return NO_REGS;
4025 }
4026
4027 \f
4028 /* Emit text to declare externally defined variables and functions, because
4029 the Intel assembler does not support undefined externals. */
4030
4031 void
4032 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4033 {
4034 int save_referenced;
4035
4036 /* GNU as does not need anything here, but the HP linker does need
4037 something for external functions. */
4038
4039 if (TARGET_GNU_AS
4040 && (!TARGET_HPUX_LD
4041 || TREE_CODE (decl) != FUNCTION_DECL
4042 || strstr (name, "__builtin_") == name))
4043 return;
4044
4045 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4046 the linker when we do this, so we need to be careful not to do this for
4047 builtin functions which have no library equivalent. Unfortunately, we
4048 can't tell here whether or not a function will actually be called by
4049 expand_expr, so we pull in library functions even if we may not need
4050 them later. */
4051 if (! strcmp (name, "__builtin_next_arg")
4052 || ! strcmp (name, "alloca")
4053 || ! strcmp (name, "__builtin_constant_p")
4054 || ! strcmp (name, "__builtin_args_info"))
4055 return;
4056
4057 if (TARGET_HPUX_LD)
4058 ia64_hpux_add_extern_decl (decl);
4059 else
4060 {
4061 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4062 restore it. */
4063 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4064 if (TREE_CODE (decl) == FUNCTION_DECL)
4065 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4066 (*targetm.asm_out.globalize_label) (file, name);
4067 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4068 }
4069 }
4070 \f
4071 /* Parse the -mfixed-range= option string. */
4072
4073 static void
4074 fix_range (const char *const_str)
4075 {
4076 int i, first, last;
4077 char *str, *dash, *comma;
4078
4079 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4080 REG2 are either register names or register numbers. The effect
4081 of this option is to mark the registers in the range from REG1 to
4082 REG2 as ``fixed'' so they won't be used by the compiler. This is
4083 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4084
4085 i = strlen (const_str);
4086 str = (char *) alloca (i + 1);
4087 memcpy (str, const_str, i + 1);
4088
4089 while (1)
4090 {
4091 dash = strchr (str, '-');
4092 if (!dash)
4093 {
4094 warning ("value of -mfixed-range must have form REG1-REG2");
4095 return;
4096 }
4097 *dash = '\0';
4098
4099 comma = strchr (dash + 1, ',');
4100 if (comma)
4101 *comma = '\0';
4102
4103 first = decode_reg_name (str);
4104 if (first < 0)
4105 {
4106 warning ("unknown register name: %s", str);
4107 return;
4108 }
4109
4110 last = decode_reg_name (dash + 1);
4111 if (last < 0)
4112 {
4113 warning ("unknown register name: %s", dash + 1);
4114 return;
4115 }
4116
4117 *dash = '-';
4118
4119 if (first > last)
4120 {
4121 warning ("%s-%s is an empty range", str, dash + 1);
4122 return;
4123 }
4124
4125 for (i = first; i <= last; ++i)
4126 fixed_regs[i] = call_used_regs[i] = 1;
4127
4128 if (!comma)
4129 break;
4130
4131 *comma = ',';
4132 str = comma + 1;
4133 }
4134 }
4135
4136 static struct machine_function *
4137 ia64_init_machine_status (void)
4138 {
4139 return ggc_alloc_cleared (sizeof (struct machine_function));
4140 }
4141
4142 /* Handle TARGET_OPTIONS switches. */
4143
4144 void
4145 ia64_override_options (void)
4146 {
4147 static struct pta
4148 {
4149 const char *const name; /* processor name or nickname. */
4150 const enum processor_type processor;
4151 }
4152 const processor_alias_table[] =
4153 {
4154 {"itanium", PROCESSOR_ITANIUM},
4155 {"itanium1", PROCESSOR_ITANIUM},
4156 {"merced", PROCESSOR_ITANIUM},
4157 {"itanium2", PROCESSOR_ITANIUM2},
4158 {"mckinley", PROCESSOR_ITANIUM2},
4159 };
4160
4161 int const pta_size = ARRAY_SIZE (processor_alias_table);
4162 int i;
4163
4164 if (TARGET_AUTO_PIC)
4165 target_flags |= MASK_CONST_GP;
4166
4167 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4168 {
4169 if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
4170 && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
4171 {
4172 warning ("cannot optimize floating point division for both latency and throughput");
4173 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4174 }
4175 else
4176 {
4177 if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
4178 target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
4179 else
4180 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4181 }
4182 }
4183
4184 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4185 {
4186 if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
4187 && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
4188 {
4189 warning ("cannot optimize integer division for both latency and throughput");
4190 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4191 }
4192 else
4193 {
4194 if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
4195 target_flags &= ~MASK_INLINE_INT_DIV_LAT;
4196 else
4197 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4198 }
4199 }
4200
4201 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4202 {
4203 if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
4204 && (target_flags_explicit & MASK_INLINE_SQRT_THR))
4205 {
4206 warning ("cannot optimize square root for both latency and throughput");
4207 target_flags &= ~MASK_INLINE_SQRT_THR;
4208 }
4209 else
4210 {
4211 if (target_flags_explicit & MASK_INLINE_SQRT_THR)
4212 target_flags &= ~MASK_INLINE_SQRT_LAT;
4213 else
4214 target_flags &= ~MASK_INLINE_SQRT_THR;
4215 }
4216 }
4217
4218 if (TARGET_INLINE_SQRT_LAT)
4219 {
4220 warning ("not yet implemented: latency-optimized inline square root");
4221 target_flags &= ~MASK_INLINE_SQRT_LAT;
4222 }
4223
4224 if (ia64_fixed_range_string)
4225 fix_range (ia64_fixed_range_string);
4226
4227 if (ia64_tls_size_string)
4228 {
4229 char *end;
4230 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4231 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4232 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4233 else
4234 ia64_tls_size = tmp;
4235 }
4236
4237 if (!ia64_tune_string)
4238 ia64_tune_string = "itanium2";
4239
4240 for (i = 0; i < pta_size; i++)
4241 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4242 {
4243 ia64_tune = processor_alias_table[i].processor;
4244 break;
4245 }
4246
4247 if (i == pta_size)
4248 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4249
4250 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4251 flag_schedule_insns_after_reload = 0;
4252
4253 /* Variable tracking should be run after all optimizations which change order
4254 of insns. It also needs a valid CFG. */
4255 ia64_flag_var_tracking = flag_var_tracking;
4256 flag_var_tracking = 0;
4257
4258 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4259
4260 init_machine_status = ia64_init_machine_status;
4261 }
4262 \f
4263 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4264 static enum attr_type ia64_safe_type (rtx);
4265
4266 static enum attr_itanium_class
4267 ia64_safe_itanium_class (rtx insn)
4268 {
4269 if (recog_memoized (insn) >= 0)
4270 return get_attr_itanium_class (insn);
4271 else
4272 return ITANIUM_CLASS_UNKNOWN;
4273 }
4274
4275 static enum attr_type
4276 ia64_safe_type (rtx insn)
4277 {
4278 if (recog_memoized (insn) >= 0)
4279 return get_attr_type (insn);
4280 else
4281 return TYPE_UNKNOWN;
4282 }
4283 \f
4284 /* The following collection of routines emit instruction group stop bits as
4285 necessary to avoid dependencies. */
4286
4287 /* Need to track some additional registers as far as serialization is
4288 concerned so we can properly handle br.call and br.ret. We could
4289 make these registers visible to gcc, but since these registers are
4290 never explicitly used in gcc generated code, it seems wasteful to
4291 do so (plus it would make the call and return patterns needlessly
4292 complex). */
4293 #define REG_RP (BR_REG (0))
4294 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4295 /* This is used for volatile asms which may require a stop bit immediately
4296 before and after them. */
4297 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4298 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4299 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4300
4301 /* For each register, we keep track of how it has been written in the
4302 current instruction group.
4303
4304 If a register is written unconditionally (no qualifying predicate),
4305 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4306
4307 If a register is written if its qualifying predicate P is true, we
4308 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4309 may be written again by the complement of P (P^1) and when this happens,
4310 WRITE_COUNT gets set to 2.
4311
4312 The result of this is that whenever an insn attempts to write a register
4313 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4314
4315 If a predicate register is written by a floating-point insn, we set
4316 WRITTEN_BY_FP to true.
4317
4318 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4319 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4320
4321 struct reg_write_state
4322 {
4323 unsigned int write_count : 2;
4324 unsigned int first_pred : 16;
4325 unsigned int written_by_fp : 1;
4326 unsigned int written_by_and : 1;
4327 unsigned int written_by_or : 1;
4328 };
4329
4330 /* Cumulative info for the current instruction group. */
4331 struct reg_write_state rws_sum[NUM_REGS];
4332 /* Info for the current instruction. This gets copied to rws_sum after a
4333 stop bit is emitted. */
4334 struct reg_write_state rws_insn[NUM_REGS];
4335
4336 /* Indicates whether this is the first instruction after a stop bit,
4337 in which case we don't need another stop bit. Without this, we hit
4338 the abort in ia64_variable_issue when scheduling an alloc. */
4339 static int first_instruction;
4340
4341 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4342 RTL for one instruction. */
4343 struct reg_flags
4344 {
4345 unsigned int is_write : 1; /* Is register being written? */
4346 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4347 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4348 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4349 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4350 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4351 };
4352
4353 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4354 static int rws_access_regno (int, struct reg_flags, int);
4355 static int rws_access_reg (rtx, struct reg_flags, int);
4356 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4357 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4358 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4359 static void init_insn_group_barriers (void);
4360 static int group_barrier_needed_p (rtx);
4361 static int safe_group_barrier_needed_p (rtx);
4362
4363 /* Update *RWS for REGNO, which is being written by the current instruction,
4364 with predicate PRED, and associated register flags in FLAGS. */
4365
4366 static void
4367 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4368 {
4369 if (pred)
4370 rws[regno].write_count++;
4371 else
4372 rws[regno].write_count = 2;
4373 rws[regno].written_by_fp |= flags.is_fp;
4374 /* ??? Not tracking and/or across differing predicates. */
4375 rws[regno].written_by_and = flags.is_and;
4376 rws[regno].written_by_or = flags.is_or;
4377 rws[regno].first_pred = pred;
4378 }
4379
4380 /* Handle an access to register REGNO of type FLAGS using predicate register
4381 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4382 a dependency with an earlier instruction in the same group. */
4383
4384 static int
4385 rws_access_regno (int regno, struct reg_flags flags, int pred)
4386 {
4387 int need_barrier = 0;
4388
4389 if (regno >= NUM_REGS)
4390 abort ();
4391
4392 if (! PR_REGNO_P (regno))
4393 flags.is_and = flags.is_or = 0;
4394
4395 if (flags.is_write)
4396 {
4397 int write_count;
4398
4399 /* One insn writes same reg multiple times? */
4400 if (rws_insn[regno].write_count > 0)
4401 abort ();
4402
4403 /* Update info for current instruction. */
4404 rws_update (rws_insn, regno, flags, pred);
4405 write_count = rws_sum[regno].write_count;
4406
4407 switch (write_count)
4408 {
4409 case 0:
4410 /* The register has not been written yet. */
4411 rws_update (rws_sum, regno, flags, pred);
4412 break;
4413
4414 case 1:
4415 /* The register has been written via a predicate. If this is
4416 not a complementary predicate, then we need a barrier. */
4417 /* ??? This assumes that P and P+1 are always complementary
4418 predicates for P even. */
4419 if (flags.is_and && rws_sum[regno].written_by_and)
4420 ;
4421 else if (flags.is_or && rws_sum[regno].written_by_or)
4422 ;
4423 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4424 need_barrier = 1;
4425 rws_update (rws_sum, regno, flags, pred);
4426 break;
4427
4428 case 2:
4429 /* The register has been unconditionally written already. We
4430 need a barrier. */
4431 if (flags.is_and && rws_sum[regno].written_by_and)
4432 ;
4433 else if (flags.is_or && rws_sum[regno].written_by_or)
4434 ;
4435 else
4436 need_barrier = 1;
4437 rws_sum[regno].written_by_and = flags.is_and;
4438 rws_sum[regno].written_by_or = flags.is_or;
4439 break;
4440
4441 default:
4442 abort ();
4443 }
4444 }
4445 else
4446 {
4447 if (flags.is_branch)
4448 {
4449 /* Branches have several RAW exceptions that allow to avoid
4450 barriers. */
4451
4452 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4453 /* RAW dependencies on branch regs are permissible as long
4454 as the writer is a non-branch instruction. Since we
4455 never generate code that uses a branch register written
4456 by a branch instruction, handling this case is
4457 easy. */
4458 return 0;
4459
4460 if (REGNO_REG_CLASS (regno) == PR_REGS
4461 && ! rws_sum[regno].written_by_fp)
4462 /* The predicates of a branch are available within the
4463 same insn group as long as the predicate was written by
4464 something other than a floating-point instruction. */
4465 return 0;
4466 }
4467
4468 if (flags.is_and && rws_sum[regno].written_by_and)
4469 return 0;
4470 if (flags.is_or && rws_sum[regno].written_by_or)
4471 return 0;
4472
4473 switch (rws_sum[regno].write_count)
4474 {
4475 case 0:
4476 /* The register has not been written yet. */
4477 break;
4478
4479 case 1:
4480 /* The register has been written via a predicate. If this is
4481 not a complementary predicate, then we need a barrier. */
4482 /* ??? This assumes that P and P+1 are always complementary
4483 predicates for P even. */
4484 if ((rws_sum[regno].first_pred ^ 1) != pred)
4485 need_barrier = 1;
4486 break;
4487
4488 case 2:
4489 /* The register has been unconditionally written already. We
4490 need a barrier. */
4491 need_barrier = 1;
4492 break;
4493
4494 default:
4495 abort ();
4496 }
4497 }
4498
4499 return need_barrier;
4500 }
4501
4502 static int
4503 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
4504 {
4505 int regno = REGNO (reg);
4506 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4507
4508 if (n == 1)
4509 return rws_access_regno (regno, flags, pred);
4510 else
4511 {
4512 int need_barrier = 0;
4513 while (--n >= 0)
4514 need_barrier |= rws_access_regno (regno + n, flags, pred);
4515 return need_barrier;
4516 }
4517 }
4518
4519 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4520 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4521
4522 static void
4523 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
4524 {
4525 rtx src = SET_SRC (x);
4526
4527 *pcond = 0;
4528
4529 switch (GET_CODE (src))
4530 {
4531 case CALL:
4532 return;
4533
4534 case IF_THEN_ELSE:
4535 if (SET_DEST (x) == pc_rtx)
4536 /* X is a conditional branch. */
4537 return;
4538 else
4539 {
4540 int is_complemented = 0;
4541
4542 /* X is a conditional move. */
4543 rtx cond = XEXP (src, 0);
4544 if (GET_CODE (cond) == EQ)
4545 is_complemented = 1;
4546 cond = XEXP (cond, 0);
4547 if (GET_CODE (cond) != REG
4548 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4549 abort ();
4550 *pcond = cond;
4551 if (XEXP (src, 1) == SET_DEST (x)
4552 || XEXP (src, 2) == SET_DEST (x))
4553 {
4554 /* X is a conditional move that conditionally writes the
4555 destination. */
4556
4557 /* We need another complement in this case. */
4558 if (XEXP (src, 1) == SET_DEST (x))
4559 is_complemented = ! is_complemented;
4560
4561 *ppred = REGNO (cond);
4562 if (is_complemented)
4563 ++*ppred;
4564 }
4565
4566 /* ??? If this is a conditional write to the dest, then this
4567 instruction does not actually read one source. This probably
4568 doesn't matter, because that source is also the dest. */
4569 /* ??? Multiple writes to predicate registers are allowed
4570 if they are all AND type compares, or if they are all OR
4571 type compares. We do not generate such instructions
4572 currently. */
4573 }
4574 /* ... fall through ... */
4575
4576 default:
4577 if (COMPARISON_P (src)
4578 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4579 /* Set pflags->is_fp to 1 so that we know we're dealing
4580 with a floating point comparison when processing the
4581 destination of the SET. */
4582 pflags->is_fp = 1;
4583
4584 /* Discover if this is a parallel comparison. We only handle
4585 and.orcm and or.andcm at present, since we must retain a
4586 strict inverse on the predicate pair. */
4587 else if (GET_CODE (src) == AND)
4588 pflags->is_and = 1;
4589 else if (GET_CODE (src) == IOR)
4590 pflags->is_or = 1;
4591
4592 break;
4593 }
4594 }
4595
4596 /* Subroutine of rtx_needs_barrier; this function determines whether the
4597 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4598 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4599 for this insn. */
4600
4601 static int
4602 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
4603 {
4604 int need_barrier = 0;
4605 rtx dst;
4606 rtx src = SET_SRC (x);
4607
4608 if (GET_CODE (src) == CALL)
4609 /* We don't need to worry about the result registers that
4610 get written by subroutine call. */
4611 return rtx_needs_barrier (src, flags, pred);
4612 else if (SET_DEST (x) == pc_rtx)
4613 {
4614 /* X is a conditional branch. */
4615 /* ??? This seems redundant, as the caller sets this bit for
4616 all JUMP_INSNs. */
4617 flags.is_branch = 1;
4618 return rtx_needs_barrier (src, flags, pred);
4619 }
4620
4621 need_barrier = rtx_needs_barrier (src, flags, pred);
4622
4623 /* This instruction unconditionally uses a predicate register. */
4624 if (cond)
4625 need_barrier |= rws_access_reg (cond, flags, 0);
4626
4627 dst = SET_DEST (x);
4628 if (GET_CODE (dst) == ZERO_EXTRACT)
4629 {
4630 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4631 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4632 dst = XEXP (dst, 0);
4633 }
4634 return need_barrier;
4635 }
4636
4637 /* Handle an access to rtx X of type FLAGS using predicate register
4638 PRED. Return 1 if this access creates a dependency with an earlier
4639 instruction in the same group. */
4640
4641 static int
4642 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
4643 {
4644 int i, j;
4645 int is_complemented = 0;
4646 int need_barrier = 0;
4647 const char *format_ptr;
4648 struct reg_flags new_flags;
4649 rtx cond = 0;
4650
4651 if (! x)
4652 return 0;
4653
4654 new_flags = flags;
4655
4656 switch (GET_CODE (x))
4657 {
4658 case SET:
4659 update_set_flags (x, &new_flags, &pred, &cond);
4660 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4661 if (GET_CODE (SET_SRC (x)) != CALL)
4662 {
4663 new_flags.is_write = 1;
4664 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4665 }
4666 break;
4667
4668 case CALL:
4669 new_flags.is_write = 0;
4670 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4671
4672 /* Avoid multiple register writes, in case this is a pattern with
4673 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4674 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4675 {
4676 new_flags.is_write = 1;
4677 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4678 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4679 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4680 }
4681 break;
4682
4683 case COND_EXEC:
4684 /* X is a predicated instruction. */
4685
4686 cond = COND_EXEC_TEST (x);
4687 if (pred)
4688 abort ();
4689 need_barrier = rtx_needs_barrier (cond, flags, 0);
4690
4691 if (GET_CODE (cond) == EQ)
4692 is_complemented = 1;
4693 cond = XEXP (cond, 0);
4694 if (GET_CODE (cond) != REG
4695 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4696 abort ();
4697 pred = REGNO (cond);
4698 if (is_complemented)
4699 ++pred;
4700
4701 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4702 return need_barrier;
4703
4704 case CLOBBER:
4705 case USE:
4706 /* Clobber & use are for earlier compiler-phases only. */
4707 break;
4708
4709 case ASM_OPERANDS:
4710 case ASM_INPUT:
4711 /* We always emit stop bits for traditional asms. We emit stop bits
4712 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4713 if (GET_CODE (x) != ASM_OPERANDS
4714 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4715 {
4716 /* Avoid writing the register multiple times if we have multiple
4717 asm outputs. This avoids an abort in rws_access_reg. */
4718 if (! rws_insn[REG_VOLATILE].write_count)
4719 {
4720 new_flags.is_write = 1;
4721 rws_access_regno (REG_VOLATILE, new_flags, pred);
4722 }
4723 return 1;
4724 }
4725
4726 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4727 We cannot just fall through here since then we would be confused
4728 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4729 traditional asms unlike their normal usage. */
4730
4731 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4732 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4733 need_barrier = 1;
4734 break;
4735
4736 case PARALLEL:
4737 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4738 {
4739 rtx pat = XVECEXP (x, 0, i);
4740 if (GET_CODE (pat) == SET)
4741 {
4742 update_set_flags (pat, &new_flags, &pred, &cond);
4743 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4744 }
4745 else if (GET_CODE (pat) == USE
4746 || GET_CODE (pat) == CALL
4747 || GET_CODE (pat) == ASM_OPERANDS)
4748 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4749 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4750 abort ();
4751 }
4752 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4753 {
4754 rtx pat = XVECEXP (x, 0, i);
4755 if (GET_CODE (pat) == SET)
4756 {
4757 if (GET_CODE (SET_SRC (pat)) != CALL)
4758 {
4759 new_flags.is_write = 1;
4760 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4761 pred);
4762 }
4763 }
4764 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4765 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4766 }
4767 break;
4768
4769 case SUBREG:
4770 x = SUBREG_REG (x);
4771 /* FALLTHRU */
4772 case REG:
4773 if (REGNO (x) == AR_UNAT_REGNUM)
4774 {
4775 for (i = 0; i < 64; ++i)
4776 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4777 }
4778 else
4779 need_barrier = rws_access_reg (x, flags, pred);
4780 break;
4781
4782 case MEM:
4783 /* Find the regs used in memory address computation. */
4784 new_flags.is_write = 0;
4785 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4786 break;
4787
4788 case CONST_INT: case CONST_DOUBLE:
4789 case SYMBOL_REF: case LABEL_REF: case CONST:
4790 break;
4791
4792 /* Operators with side-effects. */
4793 case POST_INC: case POST_DEC:
4794 if (GET_CODE (XEXP (x, 0)) != REG)
4795 abort ();
4796
4797 new_flags.is_write = 0;
4798 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4799 new_flags.is_write = 1;
4800 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4801 break;
4802
4803 case POST_MODIFY:
4804 if (GET_CODE (XEXP (x, 0)) != REG)
4805 abort ();
4806
4807 new_flags.is_write = 0;
4808 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4809 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4810 new_flags.is_write = 1;
4811 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4812 break;
4813
4814 /* Handle common unary and binary ops for efficiency. */
4815 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4816 case MOD: case UDIV: case UMOD: case AND: case IOR:
4817 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4818 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4819 case NE: case EQ: case GE: case GT: case LE:
4820 case LT: case GEU: case GTU: case LEU: case LTU:
4821 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4822 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4823 break;
4824
4825 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4826 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4827 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4828 case SQRT: case FFS: case POPCOUNT:
4829 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4830 break;
4831
4832 case UNSPEC:
4833 switch (XINT (x, 1))
4834 {
4835 case UNSPEC_LTOFF_DTPMOD:
4836 case UNSPEC_LTOFF_DTPREL:
4837 case UNSPEC_DTPREL:
4838 case UNSPEC_LTOFF_TPREL:
4839 case UNSPEC_TPREL:
4840 case UNSPEC_PRED_REL_MUTEX:
4841 case UNSPEC_PIC_CALL:
4842 case UNSPEC_MF:
4843 case UNSPEC_FETCHADD_ACQ:
4844 case UNSPEC_BSP_VALUE:
4845 case UNSPEC_FLUSHRS:
4846 case UNSPEC_BUNDLE_SELECTOR:
4847 break;
4848
4849 case UNSPEC_GR_SPILL:
4850 case UNSPEC_GR_RESTORE:
4851 {
4852 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4853 HOST_WIDE_INT bit = (offset >> 3) & 63;
4854
4855 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4856 new_flags.is_write = (XINT (x, 1) == 1);
4857 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4858 new_flags, pred);
4859 break;
4860 }
4861
4862 case UNSPEC_FR_SPILL:
4863 case UNSPEC_FR_RESTORE:
4864 case UNSPEC_GETF_EXP:
4865 case UNSPEC_SETF_EXP:
4866 case UNSPEC_ADDP4:
4867 case UNSPEC_FR_SQRT_RECIP_APPROX:
4868 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4869 break;
4870
4871 case UNSPEC_FR_RECIP_APPROX:
4872 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4873 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4874 break;
4875
4876 case UNSPEC_CMPXCHG_ACQ:
4877 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4878 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4879 break;
4880
4881 default:
4882 abort ();
4883 }
4884 break;
4885
4886 case UNSPEC_VOLATILE:
4887 switch (XINT (x, 1))
4888 {
4889 case UNSPECV_ALLOC:
4890 /* Alloc must always be the first instruction of a group.
4891 We force this by always returning true. */
4892 /* ??? We might get better scheduling if we explicitly check for
4893 input/local/output register dependencies, and modify the
4894 scheduler so that alloc is always reordered to the start of
4895 the current group. We could then eliminate all of the
4896 first_instruction code. */
4897 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4898
4899 new_flags.is_write = 1;
4900 rws_access_regno (REG_AR_CFM, new_flags, pred);
4901 return 1;
4902
4903 case UNSPECV_SET_BSP:
4904 need_barrier = 1;
4905 break;
4906
4907 case UNSPECV_BLOCKAGE:
4908 case UNSPECV_INSN_GROUP_BARRIER:
4909 case UNSPECV_BREAK:
4910 case UNSPECV_PSAC_ALL:
4911 case UNSPECV_PSAC_NORMAL:
4912 return 0;
4913
4914 default:
4915 abort ();
4916 }
4917 break;
4918
4919 case RETURN:
4920 new_flags.is_write = 0;
4921 need_barrier = rws_access_regno (REG_RP, flags, pred);
4922 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4923
4924 new_flags.is_write = 1;
4925 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4926 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4927 break;
4928
4929 default:
4930 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4931 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4932 switch (format_ptr[i])
4933 {
4934 case '0': /* unused field */
4935 case 'i': /* integer */
4936 case 'n': /* note */
4937 case 'w': /* wide integer */
4938 case 's': /* pointer to string */
4939 case 'S': /* optional pointer to string */
4940 break;
4941
4942 case 'e':
4943 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4944 need_barrier = 1;
4945 break;
4946
4947 case 'E':
4948 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4949 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4950 need_barrier = 1;
4951 break;
4952
4953 default:
4954 abort ();
4955 }
4956 break;
4957 }
4958 return need_barrier;
4959 }
4960
4961 /* Clear out the state for group_barrier_needed_p at the start of a
4962 sequence of insns. */
4963
4964 static void
4965 init_insn_group_barriers (void)
4966 {
4967 memset (rws_sum, 0, sizeof (rws_sum));
4968 first_instruction = 1;
4969 }
4970
4971 /* Given the current state, recorded by previous calls to this function,
4972 determine whether a group barrier (a stop bit) is necessary before INSN.
4973 Return nonzero if so. */
4974
4975 static int
4976 group_barrier_needed_p (rtx insn)
4977 {
4978 rtx pat;
4979 int need_barrier = 0;
4980 struct reg_flags flags;
4981
4982 memset (&flags, 0, sizeof (flags));
4983 switch (GET_CODE (insn))
4984 {
4985 case NOTE:
4986 break;
4987
4988 case BARRIER:
4989 /* A barrier doesn't imply an instruction group boundary. */
4990 break;
4991
4992 case CODE_LABEL:
4993 memset (rws_insn, 0, sizeof (rws_insn));
4994 return 1;
4995
4996 case CALL_INSN:
4997 flags.is_branch = 1;
4998 flags.is_sibcall = SIBLING_CALL_P (insn);
4999 memset (rws_insn, 0, sizeof (rws_insn));
5000
5001 /* Don't bundle a call following another call. */
5002 if ((pat = prev_active_insn (insn))
5003 && GET_CODE (pat) == CALL_INSN)
5004 {
5005 need_barrier = 1;
5006 break;
5007 }
5008
5009 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5010 break;
5011
5012 case JUMP_INSN:
5013 flags.is_branch = 1;
5014
5015 /* Don't bundle a jump following a call. */
5016 if ((pat = prev_active_insn (insn))
5017 && GET_CODE (pat) == CALL_INSN)
5018 {
5019 need_barrier = 1;
5020 break;
5021 }
5022 /* FALLTHRU */
5023
5024 case INSN:
5025 if (GET_CODE (PATTERN (insn)) == USE
5026 || GET_CODE (PATTERN (insn)) == CLOBBER)
5027 /* Don't care about USE and CLOBBER "insns"---those are used to
5028 indicate to the optimizer that it shouldn't get rid of
5029 certain operations. */
5030 break;
5031
5032 pat = PATTERN (insn);
5033
5034 /* Ug. Hack hacks hacked elsewhere. */
5035 switch (recog_memoized (insn))
5036 {
5037 /* We play dependency tricks with the epilogue in order
5038 to get proper schedules. Undo this for dv analysis. */
5039 case CODE_FOR_epilogue_deallocate_stack:
5040 case CODE_FOR_prologue_allocate_stack:
5041 pat = XVECEXP (pat, 0, 0);
5042 break;
5043
5044 /* The pattern we use for br.cloop confuses the code above.
5045 The second element of the vector is representative. */
5046 case CODE_FOR_doloop_end_internal:
5047 pat = XVECEXP (pat, 0, 1);
5048 break;
5049
5050 /* Doesn't generate code. */
5051 case CODE_FOR_pred_rel_mutex:
5052 case CODE_FOR_prologue_use:
5053 return 0;
5054
5055 default:
5056 break;
5057 }
5058
5059 memset (rws_insn, 0, sizeof (rws_insn));
5060 need_barrier = rtx_needs_barrier (pat, flags, 0);
5061
5062 /* Check to see if the previous instruction was a volatile
5063 asm. */
5064 if (! need_barrier)
5065 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5066 break;
5067
5068 default:
5069 abort ();
5070 }
5071
5072 if (first_instruction && INSN_P (insn)
5073 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5074 && GET_CODE (PATTERN (insn)) != USE
5075 && GET_CODE (PATTERN (insn)) != CLOBBER)
5076 {
5077 need_barrier = 0;
5078 first_instruction = 0;
5079 }
5080
5081 return need_barrier;
5082 }
5083
5084 /* Like group_barrier_needed_p, but do not clobber the current state. */
5085
5086 static int
5087 safe_group_barrier_needed_p (rtx insn)
5088 {
5089 struct reg_write_state rws_saved[NUM_REGS];
5090 int saved_first_instruction;
5091 int t;
5092
5093 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5094 saved_first_instruction = first_instruction;
5095
5096 t = group_barrier_needed_p (insn);
5097
5098 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5099 first_instruction = saved_first_instruction;
5100
5101 return t;
5102 }
5103
5104 /* Scan the current function and insert stop bits as necessary to
5105 eliminate dependencies. This function assumes that a final
5106 instruction scheduling pass has been run which has already
5107 inserted most of the necessary stop bits. This function only
5108 inserts new ones at basic block boundaries, since these are
5109 invisible to the scheduler. */
5110
5111 static void
5112 emit_insn_group_barriers (FILE *dump)
5113 {
5114 rtx insn;
5115 rtx last_label = 0;
5116 int insns_since_last_label = 0;
5117
5118 init_insn_group_barriers ();
5119
5120 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5121 {
5122 if (GET_CODE (insn) == CODE_LABEL)
5123 {
5124 if (insns_since_last_label)
5125 last_label = insn;
5126 insns_since_last_label = 0;
5127 }
5128 else if (GET_CODE (insn) == NOTE
5129 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5130 {
5131 if (insns_since_last_label)
5132 last_label = insn;
5133 insns_since_last_label = 0;
5134 }
5135 else if (GET_CODE (insn) == INSN
5136 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5137 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5138 {
5139 init_insn_group_barriers ();
5140 last_label = 0;
5141 }
5142 else if (INSN_P (insn))
5143 {
5144 insns_since_last_label = 1;
5145
5146 if (group_barrier_needed_p (insn))
5147 {
5148 if (last_label)
5149 {
5150 if (dump)
5151 fprintf (dump, "Emitting stop before label %d\n",
5152 INSN_UID (last_label));
5153 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5154 insn = last_label;
5155
5156 init_insn_group_barriers ();
5157 last_label = 0;
5158 }
5159 }
5160 }
5161 }
5162 }
5163
5164 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5165 This function has to emit all necessary group barriers. */
5166
5167 static void
5168 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5169 {
5170 rtx insn;
5171
5172 init_insn_group_barriers ();
5173
5174 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5175 {
5176 if (GET_CODE (insn) == BARRIER)
5177 {
5178 rtx last = prev_active_insn (insn);
5179
5180 if (! last)
5181 continue;
5182 if (GET_CODE (last) == JUMP_INSN
5183 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5184 last = prev_active_insn (last);
5185 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5186 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5187
5188 init_insn_group_barriers ();
5189 }
5190 else if (INSN_P (insn))
5191 {
5192 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5193 init_insn_group_barriers ();
5194 else if (group_barrier_needed_p (insn))
5195 {
5196 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5197 init_insn_group_barriers ();
5198 group_barrier_needed_p (insn);
5199 }
5200 }
5201 }
5202 }
5203
5204 \f
5205 static int errata_find_address_regs (rtx *, void *);
5206 static void errata_emit_nops (rtx);
5207 static void fixup_errata (void);
5208
5209 /* This structure is used to track some details about the previous insns
5210 groups so we can determine if it may be necessary to insert NOPs to
5211 workaround hardware errata. */
5212 static struct group
5213 {
5214 HARD_REG_SET p_reg_set;
5215 HARD_REG_SET gr_reg_conditionally_set;
5216 } last_group[2];
5217
5218 /* Index into the last_group array. */
5219 static int group_idx;
5220
5221 /* Called through for_each_rtx; determines if a hard register that was
5222 conditionally set in the previous group is used as an address register.
5223 It ensures that for_each_rtx returns 1 in that case. */
5224 static int
5225 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5226 {
5227 rtx x = *xp;
5228 if (GET_CODE (x) != MEM)
5229 return 0;
5230 x = XEXP (x, 0);
5231 if (GET_CODE (x) == POST_MODIFY)
5232 x = XEXP (x, 0);
5233 if (GET_CODE (x) == REG)
5234 {
5235 struct group *prev_group = last_group + (group_idx ^ 1);
5236 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5237 REGNO (x)))
5238 return 1;
5239 return -1;
5240 }
5241 return 0;
5242 }
5243
5244 /* Called for each insn; this function keeps track of the state in
5245 last_group and emits additional NOPs if necessary to work around
5246 an Itanium A/B step erratum. */
5247 static void
5248 errata_emit_nops (rtx insn)
5249 {
5250 struct group *this_group = last_group + group_idx;
5251 struct group *prev_group = last_group + (group_idx ^ 1);
5252 rtx pat = PATTERN (insn);
5253 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5254 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5255 enum attr_type type;
5256 rtx set = real_pat;
5257
5258 if (GET_CODE (real_pat) == USE
5259 || GET_CODE (real_pat) == CLOBBER
5260 || GET_CODE (real_pat) == ASM_INPUT
5261 || GET_CODE (real_pat) == ADDR_VEC
5262 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5263 || asm_noperands (PATTERN (insn)) >= 0)
5264 return;
5265
5266 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5267 parts of it. */
5268
5269 if (GET_CODE (set) == PARALLEL)
5270 {
5271 int i;
5272 set = XVECEXP (real_pat, 0, 0);
5273 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5274 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5275 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5276 {
5277 set = 0;
5278 break;
5279 }
5280 }
5281
5282 if (set && GET_CODE (set) != SET)
5283 set = 0;
5284
5285 type = get_attr_type (insn);
5286
5287 if (type == TYPE_F
5288 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5289 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5290
5291 if ((type == TYPE_M || type == TYPE_A) && cond && set
5292 && REG_P (SET_DEST (set))
5293 && GET_CODE (SET_SRC (set)) != PLUS
5294 && GET_CODE (SET_SRC (set)) != MINUS
5295 && (GET_CODE (SET_SRC (set)) != ASHIFT
5296 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5297 && (GET_CODE (SET_SRC (set)) != MEM
5298 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5299 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5300 {
5301 if (!COMPARISON_P (cond)
5302 || !REG_P (XEXP (cond, 0)))
5303 abort ();
5304
5305 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5306 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5307 }
5308 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5309 {
5310 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5311 emit_insn_before (gen_nop (), insn);
5312 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5313 group_idx = 0;
5314 memset (last_group, 0, sizeof last_group);
5315 }
5316 }
5317
5318 /* Emit extra nops if they are required to work around hardware errata. */
5319
5320 static void
5321 fixup_errata (void)
5322 {
5323 rtx insn;
5324
5325 if (! TARGET_B_STEP)
5326 return;
5327
5328 group_idx = 0;
5329 memset (last_group, 0, sizeof last_group);
5330
5331 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5332 {
5333 if (!INSN_P (insn))
5334 continue;
5335
5336 if (ia64_safe_type (insn) == TYPE_S)
5337 {
5338 group_idx ^= 1;
5339 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5340 }
5341 else
5342 errata_emit_nops (insn);
5343 }
5344 }
5345 \f
5346
5347 /* Instruction scheduling support. */
5348
5349 #define NR_BUNDLES 10
5350
5351 /* A list of names of all available bundles. */
5352
5353 static const char *bundle_name [NR_BUNDLES] =
5354 {
5355 ".mii",
5356 ".mmi",
5357 ".mfi",
5358 ".mmf",
5359 #if NR_BUNDLES == 10
5360 ".bbb",
5361 ".mbb",
5362 #endif
5363 ".mib",
5364 ".mmb",
5365 ".mfb",
5366 ".mlx"
5367 };
5368
5369 /* Nonzero if we should insert stop bits into the schedule. */
5370
5371 int ia64_final_schedule = 0;
5372
5373 /* Codes of the corresponding quieryied units: */
5374
5375 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5376 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5377
5378 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5379 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5380
5381 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5382
5383 /* The following variable value is an insn group barrier. */
5384
5385 static rtx dfa_stop_insn;
5386
5387 /* The following variable value is the last issued insn. */
5388
5389 static rtx last_scheduled_insn;
5390
5391 /* The following variable value is size of the DFA state. */
5392
5393 static size_t dfa_state_size;
5394
5395 /* The following variable value is pointer to a DFA state used as
5396 temporary variable. */
5397
5398 static state_t temp_dfa_state = NULL;
5399
5400 /* The following variable value is DFA state after issuing the last
5401 insn. */
5402
5403 static state_t prev_cycle_state = NULL;
5404
5405 /* The following array element values are TRUE if the corresponding
5406 insn requires to add stop bits before it. */
5407
5408 static char *stops_p;
5409
5410 /* The following variable is used to set up the mentioned above array. */
5411
5412 static int stop_before_p = 0;
5413
5414 /* The following variable value is length of the arrays `clocks' and
5415 `add_cycles'. */
5416
5417 static int clocks_length;
5418
5419 /* The following array element values are cycles on which the
5420 corresponding insn will be issued. The array is used only for
5421 Itanium1. */
5422
5423 static int *clocks;
5424
5425 /* The following array element values are numbers of cycles should be
5426 added to improve insn scheduling for MM_insns for Itanium1. */
5427
5428 static int *add_cycles;
5429
5430 static rtx ia64_single_set (rtx);
5431 static void ia64_emit_insn_before (rtx, rtx);
5432
5433 /* Map a bundle number to its pseudo-op. */
5434
5435 const char *
5436 get_bundle_name (int b)
5437 {
5438 return bundle_name[b];
5439 }
5440
5441
5442 /* Return the maximum number of instructions a cpu can issue. */
5443
5444 static int
5445 ia64_issue_rate (void)
5446 {
5447 return 6;
5448 }
5449
5450 /* Helper function - like single_set, but look inside COND_EXEC. */
5451
5452 static rtx
5453 ia64_single_set (rtx insn)
5454 {
5455 rtx x = PATTERN (insn), ret;
5456 if (GET_CODE (x) == COND_EXEC)
5457 x = COND_EXEC_CODE (x);
5458 if (GET_CODE (x) == SET)
5459 return x;
5460
5461 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5462 Although they are not classical single set, the second set is there just
5463 to protect it from moving past FP-relative stack accesses. */
5464 switch (recog_memoized (insn))
5465 {
5466 case CODE_FOR_prologue_allocate_stack:
5467 case CODE_FOR_epilogue_deallocate_stack:
5468 ret = XVECEXP (x, 0, 0);
5469 break;
5470
5471 default:
5472 ret = single_set_2 (insn, x);
5473 break;
5474 }
5475
5476 return ret;
5477 }
5478
5479 /* Adjust the cost of a scheduling dependency. Return the new cost of
5480 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5481
5482 static int
5483 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5484 {
5485 enum attr_itanium_class dep_class;
5486 enum attr_itanium_class insn_class;
5487
5488 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5489 return cost;
5490
5491 insn_class = ia64_safe_itanium_class (insn);
5492 dep_class = ia64_safe_itanium_class (dep_insn);
5493 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5494 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5495 return 0;
5496
5497 return cost;
5498 }
5499
5500 /* Like emit_insn_before, but skip cycle_display notes.
5501 ??? When cycle display notes are implemented, update this. */
5502
5503 static void
5504 ia64_emit_insn_before (rtx insn, rtx before)
5505 {
5506 emit_insn_before (insn, before);
5507 }
5508
5509 /* The following function marks insns who produce addresses for load
5510 and store insns. Such insns will be placed into M slots because it
5511 decrease latency time for Itanium1 (see function
5512 `ia64_produce_address_p' and the DFA descriptions). */
5513
5514 static void
5515 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
5516 {
5517 rtx insn, link, next, next_tail;
5518
5519 next_tail = NEXT_INSN (tail);
5520 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5521 if (INSN_P (insn))
5522 insn->call = 0;
5523 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5524 if (INSN_P (insn)
5525 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5526 {
5527 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5528 {
5529 next = XEXP (link, 0);
5530 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5531 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5532 && ia64_st_address_bypass_p (insn, next))
5533 break;
5534 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5535 || ia64_safe_itanium_class (next)
5536 == ITANIUM_CLASS_FLD)
5537 && ia64_ld_address_bypass_p (insn, next))
5538 break;
5539 }
5540 insn->call = link != 0;
5541 }
5542 }
5543
5544 /* We're beginning a new block. Initialize data structures as necessary. */
5545
5546 static void
5547 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
5548 int sched_verbose ATTRIBUTE_UNUSED,
5549 int max_ready ATTRIBUTE_UNUSED)
5550 {
5551 #ifdef ENABLE_CHECKING
5552 rtx insn;
5553
5554 if (reload_completed)
5555 for (insn = NEXT_INSN (current_sched_info->prev_head);
5556 insn != current_sched_info->next_tail;
5557 insn = NEXT_INSN (insn))
5558 if (SCHED_GROUP_P (insn))
5559 abort ();
5560 #endif
5561 last_scheduled_insn = NULL_RTX;
5562 init_insn_group_barriers ();
5563 }
5564
5565 /* We are about to being issuing insns for this clock cycle.
5566 Override the default sort algorithm to better slot instructions. */
5567
5568 static int
5569 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
5570 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
5571 int reorder_type)
5572 {
5573 int n_asms;
5574 int n_ready = *pn_ready;
5575 rtx *e_ready = ready + n_ready;
5576 rtx *insnp;
5577
5578 if (sched_verbose)
5579 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
5580
5581 if (reorder_type == 0)
5582 {
5583 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5584 n_asms = 0;
5585 for (insnp = ready; insnp < e_ready; insnp++)
5586 if (insnp < e_ready)
5587 {
5588 rtx insn = *insnp;
5589 enum attr_type t = ia64_safe_type (insn);
5590 if (t == TYPE_UNKNOWN)
5591 {
5592 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5593 || asm_noperands (PATTERN (insn)) >= 0)
5594 {
5595 rtx lowest = ready[n_asms];
5596 ready[n_asms] = insn;
5597 *insnp = lowest;
5598 n_asms++;
5599 }
5600 else
5601 {
5602 rtx highest = ready[n_ready - 1];
5603 ready[n_ready - 1] = insn;
5604 *insnp = highest;
5605 return 1;
5606 }
5607 }
5608 }
5609
5610 if (n_asms < n_ready)
5611 {
5612 /* Some normal insns to process. Skip the asms. */
5613 ready += n_asms;
5614 n_ready -= n_asms;
5615 }
5616 else if (n_ready > 0)
5617 return 1;
5618 }
5619
5620 if (ia64_final_schedule)
5621 {
5622 int deleted = 0;
5623 int nr_need_stop = 0;
5624
5625 for (insnp = ready; insnp < e_ready; insnp++)
5626 if (safe_group_barrier_needed_p (*insnp))
5627 nr_need_stop++;
5628
5629 if (reorder_type == 1 && n_ready == nr_need_stop)
5630 return 0;
5631 if (reorder_type == 0)
5632 return 1;
5633 insnp = e_ready;
5634 /* Move down everything that needs a stop bit, preserving
5635 relative order. */
5636 while (insnp-- > ready + deleted)
5637 while (insnp >= ready + deleted)
5638 {
5639 rtx insn = *insnp;
5640 if (! safe_group_barrier_needed_p (insn))
5641 break;
5642 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5643 *ready = insn;
5644 deleted++;
5645 }
5646 n_ready -= deleted;
5647 ready += deleted;
5648 }
5649
5650 return 1;
5651 }
5652
5653 /* We are about to being issuing insns for this clock cycle. Override
5654 the default sort algorithm to better slot instructions. */
5655
5656 static int
5657 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
5658 int clock_var)
5659 {
5660 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
5661 pn_ready, clock_var, 0);
5662 }
5663
5664 /* Like ia64_sched_reorder, but called after issuing each insn.
5665 Override the default sort algorithm to better slot instructions. */
5666
5667 static int
5668 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
5669 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
5670 int *pn_ready, int clock_var)
5671 {
5672 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
5673 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
5674 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5675 clock_var, 1);
5676 }
5677
5678 /* We are about to issue INSN. Return the number of insns left on the
5679 ready queue that can be issued this cycle. */
5680
5681 static int
5682 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
5683 int sched_verbose ATTRIBUTE_UNUSED,
5684 rtx insn ATTRIBUTE_UNUSED,
5685 int can_issue_more ATTRIBUTE_UNUSED)
5686 {
5687 last_scheduled_insn = insn;
5688 memcpy (prev_cycle_state, curr_state, dfa_state_size);
5689 if (reload_completed)
5690 {
5691 if (group_barrier_needed_p (insn))
5692 abort ();
5693 if (GET_CODE (insn) == CALL_INSN)
5694 init_insn_group_barriers ();
5695 stops_p [INSN_UID (insn)] = stop_before_p;
5696 stop_before_p = 0;
5697 }
5698 return 1;
5699 }
5700
5701 /* We are choosing insn from the ready queue. Return nonzero if INSN
5702 can be chosen. */
5703
5704 static int
5705 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
5706 {
5707 if (insn == NULL_RTX || !INSN_P (insn))
5708 abort ();
5709 return (!reload_completed
5710 || !safe_group_barrier_needed_p (insn));
5711 }
5712
5713 /* The following variable value is pseudo-insn used by the DFA insn
5714 scheduler to change the DFA state when the simulated clock is
5715 increased. */
5716
5717 static rtx dfa_pre_cycle_insn;
5718
5719 /* We are about to being issuing INSN. Return nonzero if we cannot
5720 issue it on given cycle CLOCK and return zero if we should not sort
5721 the ready queue on the next clock start. */
5722
5723 static int
5724 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
5725 int clock, int *sort_p)
5726 {
5727 int setup_clocks_p = FALSE;
5728
5729 if (insn == NULL_RTX || !INSN_P (insn))
5730 abort ();
5731 if ((reload_completed && safe_group_barrier_needed_p (insn))
5732 || (last_scheduled_insn
5733 && (GET_CODE (last_scheduled_insn) == CALL_INSN
5734 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
5735 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
5736 {
5737 init_insn_group_barriers ();
5738 if (verbose && dump)
5739 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
5740 last_clock == clock ? " + cycle advance" : "");
5741 stop_before_p = 1;
5742 if (last_clock == clock)
5743 {
5744 state_transition (curr_state, dfa_stop_insn);
5745 if (TARGET_EARLY_STOP_BITS)
5746 *sort_p = (last_scheduled_insn == NULL_RTX
5747 || GET_CODE (last_scheduled_insn) != CALL_INSN);
5748 else
5749 *sort_p = 0;
5750 return 1;
5751 }
5752 else if (reload_completed)
5753 setup_clocks_p = TRUE;
5754 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
5755 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
5756 state_reset (curr_state);
5757 else
5758 {
5759 memcpy (curr_state, prev_cycle_state, dfa_state_size);
5760 state_transition (curr_state, dfa_stop_insn);
5761 state_transition (curr_state, dfa_pre_cycle_insn);
5762 state_transition (curr_state, NULL);
5763 }
5764 }
5765 else if (reload_completed)
5766 setup_clocks_p = TRUE;
5767 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
5768 && GET_CODE (PATTERN (insn)) != ASM_INPUT
5769 && asm_noperands (PATTERN (insn)) < 0)
5770 {
5771 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
5772
5773 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
5774 {
5775 rtx link;
5776 int d = -1;
5777
5778 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
5779 if (REG_NOTE_KIND (link) == 0)
5780 {
5781 enum attr_itanium_class dep_class;
5782 rtx dep_insn = XEXP (link, 0);
5783
5784 dep_class = ia64_safe_itanium_class (dep_insn);
5785 if ((dep_class == ITANIUM_CLASS_MMMUL
5786 || dep_class == ITANIUM_CLASS_MMSHF)
5787 && last_clock - clocks [INSN_UID (dep_insn)] < 4
5788 && (d < 0
5789 || last_clock - clocks [INSN_UID (dep_insn)] < d))
5790 d = last_clock - clocks [INSN_UID (dep_insn)];
5791 }
5792 if (d >= 0)
5793 add_cycles [INSN_UID (insn)] = 3 - d;
5794 }
5795 }
5796 return 0;
5797 }
5798
5799 \f
5800
5801 /* The following page contains abstract data `bundle states' which are
5802 used for bundling insns (inserting nops and template generation). */
5803
5804 /* The following describes state of insn bundling. */
5805
5806 struct bundle_state
5807 {
5808 /* Unique bundle state number to identify them in the debugging
5809 output */
5810 int unique_num;
5811 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
5812 /* number nops before and after the insn */
5813 short before_nops_num, after_nops_num;
5814 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
5815 insn */
5816 int cost; /* cost of the state in cycles */
5817 int accumulated_insns_num; /* number of all previous insns including
5818 nops. L is considered as 2 insns */
5819 int branch_deviation; /* deviation of previous branches from 3rd slots */
5820 struct bundle_state *next; /* next state with the same insn_num */
5821 struct bundle_state *originator; /* originator (previous insn state) */
5822 /* All bundle states are in the following chain. */
5823 struct bundle_state *allocated_states_chain;
5824 /* The DFA State after issuing the insn and the nops. */
5825 state_t dfa_state;
5826 };
5827
5828 /* The following is map insn number to the corresponding bundle state. */
5829
5830 static struct bundle_state **index_to_bundle_states;
5831
5832 /* The unique number of next bundle state. */
5833
5834 static int bundle_states_num;
5835
5836 /* All allocated bundle states are in the following chain. */
5837
5838 static struct bundle_state *allocated_bundle_states_chain;
5839
5840 /* All allocated but not used bundle states are in the following
5841 chain. */
5842
5843 static struct bundle_state *free_bundle_state_chain;
5844
5845
5846 /* The following function returns a free bundle state. */
5847
5848 static struct bundle_state *
5849 get_free_bundle_state (void)
5850 {
5851 struct bundle_state *result;
5852
5853 if (free_bundle_state_chain != NULL)
5854 {
5855 result = free_bundle_state_chain;
5856 free_bundle_state_chain = result->next;
5857 }
5858 else
5859 {
5860 result = xmalloc (sizeof (struct bundle_state));
5861 result->dfa_state = xmalloc (dfa_state_size);
5862 result->allocated_states_chain = allocated_bundle_states_chain;
5863 allocated_bundle_states_chain = result;
5864 }
5865 result->unique_num = bundle_states_num++;
5866 return result;
5867
5868 }
5869
5870 /* The following function frees given bundle state. */
5871
5872 static void
5873 free_bundle_state (struct bundle_state *state)
5874 {
5875 state->next = free_bundle_state_chain;
5876 free_bundle_state_chain = state;
5877 }
5878
5879 /* Start work with abstract data `bundle states'. */
5880
5881 static void
5882 initiate_bundle_states (void)
5883 {
5884 bundle_states_num = 0;
5885 free_bundle_state_chain = NULL;
5886 allocated_bundle_states_chain = NULL;
5887 }
5888
5889 /* Finish work with abstract data `bundle states'. */
5890
5891 static void
5892 finish_bundle_states (void)
5893 {
5894 struct bundle_state *curr_state, *next_state;
5895
5896 for (curr_state = allocated_bundle_states_chain;
5897 curr_state != NULL;
5898 curr_state = next_state)
5899 {
5900 next_state = curr_state->allocated_states_chain;
5901 free (curr_state->dfa_state);
5902 free (curr_state);
5903 }
5904 }
5905
5906 /* Hash table of the bundle states. The key is dfa_state and insn_num
5907 of the bundle states. */
5908
5909 static htab_t bundle_state_table;
5910
5911 /* The function returns hash of BUNDLE_STATE. */
5912
5913 static unsigned
5914 bundle_state_hash (const void *bundle_state)
5915 {
5916 const struct bundle_state *state = (struct bundle_state *) bundle_state;
5917 unsigned result, i;
5918
5919 for (result = i = 0; i < dfa_state_size; i++)
5920 result += (((unsigned char *) state->dfa_state) [i]
5921 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
5922 return result + state->insn_num;
5923 }
5924
5925 /* The function returns nonzero if the bundle state keys are equal. */
5926
5927 static int
5928 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
5929 {
5930 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
5931 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
5932
5933 return (state1->insn_num == state2->insn_num
5934 && memcmp (state1->dfa_state, state2->dfa_state,
5935 dfa_state_size) == 0);
5936 }
5937
5938 /* The function inserts the BUNDLE_STATE into the hash table. The
5939 function returns nonzero if the bundle has been inserted into the
5940 table. The table contains the best bundle state with given key. */
5941
5942 static int
5943 insert_bundle_state (struct bundle_state *bundle_state)
5944 {
5945 void **entry_ptr;
5946
5947 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
5948 if (*entry_ptr == NULL)
5949 {
5950 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
5951 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
5952 *entry_ptr = (void *) bundle_state;
5953 return TRUE;
5954 }
5955 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
5956 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
5957 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
5958 > bundle_state->accumulated_insns_num
5959 || (((struct bundle_state *)
5960 *entry_ptr)->accumulated_insns_num
5961 == bundle_state->accumulated_insns_num
5962 && ((struct bundle_state *)
5963 *entry_ptr)->branch_deviation
5964 > bundle_state->branch_deviation))))
5965
5966 {
5967 struct bundle_state temp;
5968
5969 temp = *(struct bundle_state *) *entry_ptr;
5970 *(struct bundle_state *) *entry_ptr = *bundle_state;
5971 ((struct bundle_state *) *entry_ptr)->next = temp.next;
5972 *bundle_state = temp;
5973 }
5974 return FALSE;
5975 }
5976
5977 /* Start work with the hash table. */
5978
5979 static void
5980 initiate_bundle_state_table (void)
5981 {
5982 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
5983 (htab_del) 0);
5984 }
5985
5986 /* Finish work with the hash table. */
5987
5988 static void
5989 finish_bundle_state_table (void)
5990 {
5991 htab_delete (bundle_state_table);
5992 }
5993
5994 \f
5995
5996 /* The following variable is a insn `nop' used to check bundle states
5997 with different number of inserted nops. */
5998
5999 static rtx ia64_nop;
6000
6001 /* The following function tries to issue NOPS_NUM nops for the current
6002 state without advancing processor cycle. If it failed, the
6003 function returns FALSE and frees the current state. */
6004
6005 static int
6006 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6007 {
6008 int i;
6009
6010 for (i = 0; i < nops_num; i++)
6011 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6012 {
6013 free_bundle_state (curr_state);
6014 return FALSE;
6015 }
6016 return TRUE;
6017 }
6018
6019 /* The following function tries to issue INSN for the current
6020 state without advancing processor cycle. If it failed, the
6021 function returns FALSE and frees the current state. */
6022
6023 static int
6024 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6025 {
6026 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6027 {
6028 free_bundle_state (curr_state);
6029 return FALSE;
6030 }
6031 return TRUE;
6032 }
6033
6034 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6035 starting with ORIGINATOR without advancing processor cycle. If
6036 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6037 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6038 If it was successful, the function creates new bundle state and
6039 insert into the hash table and into `index_to_bundle_states'. */
6040
6041 static void
6042 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6043 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6044 {
6045 struct bundle_state *curr_state;
6046
6047 curr_state = get_free_bundle_state ();
6048 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6049 curr_state->insn = insn;
6050 curr_state->insn_num = originator->insn_num + 1;
6051 curr_state->cost = originator->cost;
6052 curr_state->originator = originator;
6053 curr_state->before_nops_num = before_nops_num;
6054 curr_state->after_nops_num = 0;
6055 curr_state->accumulated_insns_num
6056 = originator->accumulated_insns_num + before_nops_num;
6057 curr_state->branch_deviation = originator->branch_deviation;
6058 if (insn == NULL_RTX)
6059 abort ();
6060 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6061 {
6062 if (GET_MODE (insn) == TImode)
6063 abort ();
6064 if (!try_issue_nops (curr_state, before_nops_num))
6065 return;
6066 if (!try_issue_insn (curr_state, insn))
6067 return;
6068 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6069 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6070 && curr_state->accumulated_insns_num % 3 != 0)
6071 {
6072 free_bundle_state (curr_state);
6073 return;
6074 }
6075 }
6076 else if (GET_MODE (insn) != TImode)
6077 {
6078 if (!try_issue_nops (curr_state, before_nops_num))
6079 return;
6080 if (!try_issue_insn (curr_state, insn))
6081 return;
6082 curr_state->accumulated_insns_num++;
6083 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6084 || asm_noperands (PATTERN (insn)) >= 0)
6085 abort ();
6086 if (ia64_safe_type (insn) == TYPE_L)
6087 curr_state->accumulated_insns_num++;
6088 }
6089 else
6090 {
6091 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6092 state_transition (curr_state->dfa_state, NULL);
6093 curr_state->cost++;
6094 if (!try_issue_nops (curr_state, before_nops_num))
6095 return;
6096 if (!try_issue_insn (curr_state, insn))
6097 return;
6098 curr_state->accumulated_insns_num++;
6099 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6100 || asm_noperands (PATTERN (insn)) >= 0)
6101 {
6102 /* Finish bundle containing asm insn. */
6103 curr_state->after_nops_num
6104 = 3 - curr_state->accumulated_insns_num % 3;
6105 curr_state->accumulated_insns_num
6106 += 3 - curr_state->accumulated_insns_num % 3;
6107 }
6108 else if (ia64_safe_type (insn) == TYPE_L)
6109 curr_state->accumulated_insns_num++;
6110 }
6111 if (ia64_safe_type (insn) == TYPE_B)
6112 curr_state->branch_deviation
6113 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6114 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6115 {
6116 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6117 {
6118 state_t dfa_state;
6119 struct bundle_state *curr_state1;
6120 struct bundle_state *allocated_states_chain;
6121
6122 curr_state1 = get_free_bundle_state ();
6123 dfa_state = curr_state1->dfa_state;
6124 allocated_states_chain = curr_state1->allocated_states_chain;
6125 *curr_state1 = *curr_state;
6126 curr_state1->dfa_state = dfa_state;
6127 curr_state1->allocated_states_chain = allocated_states_chain;
6128 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6129 dfa_state_size);
6130 curr_state = curr_state1;
6131 }
6132 if (!try_issue_nops (curr_state,
6133 3 - curr_state->accumulated_insns_num % 3))
6134 return;
6135 curr_state->after_nops_num
6136 = 3 - curr_state->accumulated_insns_num % 3;
6137 curr_state->accumulated_insns_num
6138 += 3 - curr_state->accumulated_insns_num % 3;
6139 }
6140 if (!insert_bundle_state (curr_state))
6141 free_bundle_state (curr_state);
6142 return;
6143 }
6144
6145 /* The following function returns position in the two window bundle
6146 for given STATE. */
6147
6148 static int
6149 get_max_pos (state_t state)
6150 {
6151 if (cpu_unit_reservation_p (state, pos_6))
6152 return 6;
6153 else if (cpu_unit_reservation_p (state, pos_5))
6154 return 5;
6155 else if (cpu_unit_reservation_p (state, pos_4))
6156 return 4;
6157 else if (cpu_unit_reservation_p (state, pos_3))
6158 return 3;
6159 else if (cpu_unit_reservation_p (state, pos_2))
6160 return 2;
6161 else if (cpu_unit_reservation_p (state, pos_1))
6162 return 1;
6163 else
6164 return 0;
6165 }
6166
6167 /* The function returns code of a possible template for given position
6168 and state. The function should be called only with 2 values of
6169 position equal to 3 or 6. */
6170
6171 static int
6172 get_template (state_t state, int pos)
6173 {
6174 switch (pos)
6175 {
6176 case 3:
6177 if (cpu_unit_reservation_p (state, _0mii_))
6178 return 0;
6179 else if (cpu_unit_reservation_p (state, _0mmi_))
6180 return 1;
6181 else if (cpu_unit_reservation_p (state, _0mfi_))
6182 return 2;
6183 else if (cpu_unit_reservation_p (state, _0mmf_))
6184 return 3;
6185 else if (cpu_unit_reservation_p (state, _0bbb_))
6186 return 4;
6187 else if (cpu_unit_reservation_p (state, _0mbb_))
6188 return 5;
6189 else if (cpu_unit_reservation_p (state, _0mib_))
6190 return 6;
6191 else if (cpu_unit_reservation_p (state, _0mmb_))
6192 return 7;
6193 else if (cpu_unit_reservation_p (state, _0mfb_))
6194 return 8;
6195 else if (cpu_unit_reservation_p (state, _0mlx_))
6196 return 9;
6197 else
6198 abort ();
6199 case 6:
6200 if (cpu_unit_reservation_p (state, _1mii_))
6201 return 0;
6202 else if (cpu_unit_reservation_p (state, _1mmi_))
6203 return 1;
6204 else if (cpu_unit_reservation_p (state, _1mfi_))
6205 return 2;
6206 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6207 return 3;
6208 else if (cpu_unit_reservation_p (state, _1bbb_))
6209 return 4;
6210 else if (cpu_unit_reservation_p (state, _1mbb_))
6211 return 5;
6212 else if (cpu_unit_reservation_p (state, _1mib_))
6213 return 6;
6214 else if (cpu_unit_reservation_p (state, _1mmb_))
6215 return 7;
6216 else if (cpu_unit_reservation_p (state, _1mfb_))
6217 return 8;
6218 else if (cpu_unit_reservation_p (state, _1mlx_))
6219 return 9;
6220 else
6221 abort ();
6222 default:
6223 abort ();
6224 }
6225 }
6226
6227 /* The following function returns an insn important for insn bundling
6228 followed by INSN and before TAIL. */
6229
6230 static rtx
6231 get_next_important_insn (rtx insn, rtx tail)
6232 {
6233 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6234 if (INSN_P (insn)
6235 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6236 && GET_CODE (PATTERN (insn)) != USE
6237 && GET_CODE (PATTERN (insn)) != CLOBBER)
6238 return insn;
6239 return NULL_RTX;
6240 }
6241
6242 /* The following function does insn bundling. Bundling means
6243 inserting templates and nop insns to fit insn groups into permitted
6244 templates. Instruction scheduling uses NDFA (non-deterministic
6245 finite automata) encoding informations about the templates and the
6246 inserted nops. Nondeterminism of the automata permits follows
6247 all possible insn sequences very fast.
6248
6249 Unfortunately it is not possible to get information about inserting
6250 nop insns and used templates from the automata states. The
6251 automata only says that we can issue an insn possibly inserting
6252 some nops before it and using some template. Therefore insn
6253 bundling in this function is implemented by using DFA
6254 (deterministic finite automata). We follows all possible insn
6255 sequences by inserting 0-2 nops (that is what the NDFA describe for
6256 insn scheduling) before/after each insn being bundled. We know the
6257 start of simulated processor cycle from insn scheduling (insn
6258 starting a new cycle has TImode).
6259
6260 Simple implementation of insn bundling would create enormous
6261 number of possible insn sequences satisfying information about new
6262 cycle ticks taken from the insn scheduling. To make the algorithm
6263 practical we use dynamic programming. Each decision (about
6264 inserting nops and implicitly about previous decisions) is described
6265 by structure bundle_state (see above). If we generate the same
6266 bundle state (key is automaton state after issuing the insns and
6267 nops for it), we reuse already generated one. As consequence we
6268 reject some decisions which cannot improve the solution and
6269 reduce memory for the algorithm.
6270
6271 When we reach the end of EBB (extended basic block), we choose the
6272 best sequence and then, moving back in EBB, insert templates for
6273 the best alternative. The templates are taken from querying
6274 automaton state for each insn in chosen bundle states.
6275
6276 So the algorithm makes two (forward and backward) passes through
6277 EBB. There is an additional forward pass through EBB for Itanium1
6278 processor. This pass inserts more nops to make dependency between
6279 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6280
6281 static void
6282 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6283 {
6284 struct bundle_state *curr_state, *next_state, *best_state;
6285 rtx insn, next_insn;
6286 int insn_num;
6287 int i, bundle_end_p, only_bundle_end_p, asm_p;
6288 int pos = 0, max_pos, template0, template1;
6289 rtx b;
6290 rtx nop;
6291 enum attr_type type;
6292
6293 insn_num = 0;
6294 /* Count insns in the EBB. */
6295 for (insn = NEXT_INSN (prev_head_insn);
6296 insn && insn != tail;
6297 insn = NEXT_INSN (insn))
6298 if (INSN_P (insn))
6299 insn_num++;
6300 if (insn_num == 0)
6301 return;
6302 bundling_p = 1;
6303 dfa_clean_insn_cache ();
6304 initiate_bundle_state_table ();
6305 index_to_bundle_states = xmalloc ((insn_num + 2)
6306 * sizeof (struct bundle_state *));
6307 /* First (forward) pass -- generation of bundle states. */
6308 curr_state = get_free_bundle_state ();
6309 curr_state->insn = NULL;
6310 curr_state->before_nops_num = 0;
6311 curr_state->after_nops_num = 0;
6312 curr_state->insn_num = 0;
6313 curr_state->cost = 0;
6314 curr_state->accumulated_insns_num = 0;
6315 curr_state->branch_deviation = 0;
6316 curr_state->next = NULL;
6317 curr_state->originator = NULL;
6318 state_reset (curr_state->dfa_state);
6319 index_to_bundle_states [0] = curr_state;
6320 insn_num = 0;
6321 /* Shift cycle mark if it is put on insn which could be ignored. */
6322 for (insn = NEXT_INSN (prev_head_insn);
6323 insn != tail;
6324 insn = NEXT_INSN (insn))
6325 if (INSN_P (insn)
6326 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6327 || GET_CODE (PATTERN (insn)) == USE
6328 || GET_CODE (PATTERN (insn)) == CLOBBER)
6329 && GET_MODE (insn) == TImode)
6330 {
6331 PUT_MODE (insn, VOIDmode);
6332 for (next_insn = NEXT_INSN (insn);
6333 next_insn != tail;
6334 next_insn = NEXT_INSN (next_insn))
6335 if (INSN_P (next_insn)
6336 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6337 && GET_CODE (PATTERN (next_insn)) != USE
6338 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6339 {
6340 PUT_MODE (next_insn, TImode);
6341 break;
6342 }
6343 }
6344 /* Froward pass: generation of bundle states. */
6345 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6346 insn != NULL_RTX;
6347 insn = next_insn)
6348 {
6349 if (!INSN_P (insn)
6350 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6351 || GET_CODE (PATTERN (insn)) == USE
6352 || GET_CODE (PATTERN (insn)) == CLOBBER)
6353 abort ();
6354 type = ia64_safe_type (insn);
6355 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6356 insn_num++;
6357 index_to_bundle_states [insn_num] = NULL;
6358 for (curr_state = index_to_bundle_states [insn_num - 1];
6359 curr_state != NULL;
6360 curr_state = next_state)
6361 {
6362 pos = curr_state->accumulated_insns_num % 3;
6363 next_state = curr_state->next;
6364 /* We must fill up the current bundle in order to start a
6365 subsequent asm insn in a new bundle. Asm insn is always
6366 placed in a separate bundle. */
6367 only_bundle_end_p
6368 = (next_insn != NULL_RTX
6369 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6370 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6371 /* We may fill up the current bundle if it is the cycle end
6372 without a group barrier. */
6373 bundle_end_p
6374 = (only_bundle_end_p || next_insn == NULL_RTX
6375 || (GET_MODE (next_insn) == TImode
6376 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6377 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6378 || type == TYPE_S
6379 /* We need to insert 2 nops for cases like M_MII. To
6380 guarantee issuing all insns on the same cycle for
6381 Itanium 1, we need to issue 2 nops after the first M
6382 insn (MnnMII where n is a nop insn). */
6383 || ((type == TYPE_M || type == TYPE_A)
6384 && ia64_tune == PROCESSOR_ITANIUM
6385 && !bundle_end_p && pos == 1))
6386 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6387 only_bundle_end_p);
6388 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6389 only_bundle_end_p);
6390 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6391 only_bundle_end_p);
6392 }
6393 if (index_to_bundle_states [insn_num] == NULL)
6394 abort ();
6395 for (curr_state = index_to_bundle_states [insn_num];
6396 curr_state != NULL;
6397 curr_state = curr_state->next)
6398 if (verbose >= 2 && dump)
6399 {
6400 /* This structure is taken from generated code of the
6401 pipeline hazard recognizer (see file insn-attrtab.c).
6402 Please don't forget to change the structure if a new
6403 automaton is added to .md file. */
6404 struct DFA_chip
6405 {
6406 unsigned short one_automaton_state;
6407 unsigned short oneb_automaton_state;
6408 unsigned short two_automaton_state;
6409 unsigned short twob_automaton_state;
6410 };
6411
6412 fprintf
6413 (dump,
6414 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6415 curr_state->unique_num,
6416 (curr_state->originator == NULL
6417 ? -1 : curr_state->originator->unique_num),
6418 curr_state->cost,
6419 curr_state->before_nops_num, curr_state->after_nops_num,
6420 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6421 (ia64_tune == PROCESSOR_ITANIUM
6422 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6423 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6424 INSN_UID (insn));
6425 }
6426 }
6427 if (index_to_bundle_states [insn_num] == NULL)
6428 /* We should find a solution because the 2nd insn scheduling has
6429 found one. */
6430 abort ();
6431 /* Find a state corresponding to the best insn sequence. */
6432 best_state = NULL;
6433 for (curr_state = index_to_bundle_states [insn_num];
6434 curr_state != NULL;
6435 curr_state = curr_state->next)
6436 /* We are just looking at the states with fully filled up last
6437 bundle. The first we prefer insn sequences with minimal cost
6438 then with minimal inserted nops and finally with branch insns
6439 placed in the 3rd slots. */
6440 if (curr_state->accumulated_insns_num % 3 == 0
6441 && (best_state == NULL || best_state->cost > curr_state->cost
6442 || (best_state->cost == curr_state->cost
6443 && (curr_state->accumulated_insns_num
6444 < best_state->accumulated_insns_num
6445 || (curr_state->accumulated_insns_num
6446 == best_state->accumulated_insns_num
6447 && curr_state->branch_deviation
6448 < best_state->branch_deviation)))))
6449 best_state = curr_state;
6450 /* Second (backward) pass: adding nops and templates. */
6451 insn_num = best_state->before_nops_num;
6452 template0 = template1 = -1;
6453 for (curr_state = best_state;
6454 curr_state->originator != NULL;
6455 curr_state = curr_state->originator)
6456 {
6457 insn = curr_state->insn;
6458 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6459 || asm_noperands (PATTERN (insn)) >= 0);
6460 insn_num++;
6461 if (verbose >= 2 && dump)
6462 {
6463 struct DFA_chip
6464 {
6465 unsigned short one_automaton_state;
6466 unsigned short oneb_automaton_state;
6467 unsigned short two_automaton_state;
6468 unsigned short twob_automaton_state;
6469 };
6470
6471 fprintf
6472 (dump,
6473 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6474 curr_state->unique_num,
6475 (curr_state->originator == NULL
6476 ? -1 : curr_state->originator->unique_num),
6477 curr_state->cost,
6478 curr_state->before_nops_num, curr_state->after_nops_num,
6479 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6480 (ia64_tune == PROCESSOR_ITANIUM
6481 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6482 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6483 INSN_UID (insn));
6484 }
6485 /* Find the position in the current bundle window. The window can
6486 contain at most two bundles. Two bundle window means that
6487 the processor will make two bundle rotation. */
6488 max_pos = get_max_pos (curr_state->dfa_state);
6489 if (max_pos == 6
6490 /* The following (negative template number) means that the
6491 processor did one bundle rotation. */
6492 || (max_pos == 3 && template0 < 0))
6493 {
6494 /* We are at the end of the window -- find template(s) for
6495 its bundle(s). */
6496 pos = max_pos;
6497 if (max_pos == 3)
6498 template0 = get_template (curr_state->dfa_state, 3);
6499 else
6500 {
6501 template1 = get_template (curr_state->dfa_state, 3);
6502 template0 = get_template (curr_state->dfa_state, 6);
6503 }
6504 }
6505 if (max_pos > 3 && template1 < 0)
6506 /* It may happen when we have the stop inside a bundle. */
6507 {
6508 if (pos > 3)
6509 abort ();
6510 template1 = get_template (curr_state->dfa_state, 3);
6511 pos += 3;
6512 }
6513 if (!asm_p)
6514 /* Emit nops after the current insn. */
6515 for (i = 0; i < curr_state->after_nops_num; i++)
6516 {
6517 nop = gen_nop ();
6518 emit_insn_after (nop, insn);
6519 pos--;
6520 if (pos < 0)
6521 abort ();
6522 if (pos % 3 == 0)
6523 {
6524 /* We are at the start of a bundle: emit the template
6525 (it should be defined). */
6526 if (template0 < 0)
6527 abort ();
6528 b = gen_bundle_selector (GEN_INT (template0));
6529 ia64_emit_insn_before (b, nop);
6530 /* If we have two bundle window, we make one bundle
6531 rotation. Otherwise template0 will be undefined
6532 (negative value). */
6533 template0 = template1;
6534 template1 = -1;
6535 }
6536 }
6537 /* Move the position backward in the window. Group barrier has
6538 no slot. Asm insn takes all bundle. */
6539 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6540 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6541 && asm_noperands (PATTERN (insn)) < 0)
6542 pos--;
6543 /* Long insn takes 2 slots. */
6544 if (ia64_safe_type (insn) == TYPE_L)
6545 pos--;
6546 if (pos < 0)
6547 abort ();
6548 if (pos % 3 == 0
6549 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6550 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6551 && asm_noperands (PATTERN (insn)) < 0)
6552 {
6553 /* The current insn is at the bundle start: emit the
6554 template. */
6555 if (template0 < 0)
6556 abort ();
6557 b = gen_bundle_selector (GEN_INT (template0));
6558 ia64_emit_insn_before (b, insn);
6559 b = PREV_INSN (insn);
6560 insn = b;
6561 /* See comment above in analogous place for emitting nops
6562 after the insn. */
6563 template0 = template1;
6564 template1 = -1;
6565 }
6566 /* Emit nops after the current insn. */
6567 for (i = 0; i < curr_state->before_nops_num; i++)
6568 {
6569 nop = gen_nop ();
6570 ia64_emit_insn_before (nop, insn);
6571 nop = PREV_INSN (insn);
6572 insn = nop;
6573 pos--;
6574 if (pos < 0)
6575 abort ();
6576 if (pos % 3 == 0)
6577 {
6578 /* See comment above in analogous place for emitting nops
6579 after the insn. */
6580 if (template0 < 0)
6581 abort ();
6582 b = gen_bundle_selector (GEN_INT (template0));
6583 ia64_emit_insn_before (b, insn);
6584 b = PREV_INSN (insn);
6585 insn = b;
6586 template0 = template1;
6587 template1 = -1;
6588 }
6589 }
6590 }
6591 if (ia64_tune == PROCESSOR_ITANIUM)
6592 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6593 Itanium1 has a strange design, if the distance between an insn
6594 and dependent MM-insn is less 4 then we have a 6 additional
6595 cycles stall. So we make the distance equal to 4 cycles if it
6596 is less. */
6597 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6598 insn != NULL_RTX;
6599 insn = next_insn)
6600 {
6601 if (!INSN_P (insn)
6602 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6603 || GET_CODE (PATTERN (insn)) == USE
6604 || GET_CODE (PATTERN (insn)) == CLOBBER)
6605 abort ();
6606 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6607 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6608 /* We found a MM-insn which needs additional cycles. */
6609 {
6610 rtx last;
6611 int i, j, n;
6612 int pred_stop_p;
6613
6614 /* Now we are searching for a template of the bundle in
6615 which the MM-insn is placed and the position of the
6616 insn in the bundle (0, 1, 2). Also we are searching
6617 for that there is a stop before the insn. */
6618 last = prev_active_insn (insn);
6619 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
6620 if (pred_stop_p)
6621 last = prev_active_insn (last);
6622 n = 0;
6623 for (;; last = prev_active_insn (last))
6624 if (recog_memoized (last) == CODE_FOR_bundle_selector)
6625 {
6626 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
6627 if (template0 == 9)
6628 /* The insn is in MLX bundle. Change the template
6629 onto MFI because we will add nops before the
6630 insn. It simplifies subsequent code a lot. */
6631 PATTERN (last)
6632 = gen_bundle_selector (const2_rtx); /* -> MFI */
6633 break;
6634 }
6635 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
6636 && (ia64_safe_itanium_class (last)
6637 != ITANIUM_CLASS_IGNORE))
6638 n++;
6639 /* Some check of correctness: the stop is not at the
6640 bundle start, there are no more 3 insns in the bundle,
6641 and the MM-insn is not at the start of bundle with
6642 template MLX. */
6643 if ((pred_stop_p && n == 0) || n > 2
6644 || (template0 == 9 && n != 0))
6645 abort ();
6646 /* Put nops after the insn in the bundle. */
6647 for (j = 3 - n; j > 0; j --)
6648 ia64_emit_insn_before (gen_nop (), insn);
6649 /* It takes into account that we will add more N nops
6650 before the insn lately -- please see code below. */
6651 add_cycles [INSN_UID (insn)]--;
6652 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
6653 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6654 insn);
6655 if (pred_stop_p)
6656 add_cycles [INSN_UID (insn)]--;
6657 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
6658 {
6659 /* Insert "MII;" template. */
6660 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
6661 insn);
6662 ia64_emit_insn_before (gen_nop (), insn);
6663 ia64_emit_insn_before (gen_nop (), insn);
6664 if (i > 1)
6665 {
6666 /* To decrease code size, we use "MI;I;"
6667 template. */
6668 ia64_emit_insn_before
6669 (gen_insn_group_barrier (GEN_INT (3)), insn);
6670 i--;
6671 }
6672 ia64_emit_insn_before (gen_nop (), insn);
6673 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6674 insn);
6675 }
6676 /* Put the MM-insn in the same slot of a bundle with the
6677 same template as the original one. */
6678 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
6679 insn);
6680 /* To put the insn in the same slot, add necessary number
6681 of nops. */
6682 for (j = n; j > 0; j --)
6683 ia64_emit_insn_before (gen_nop (), insn);
6684 /* Put the stop if the original bundle had it. */
6685 if (pred_stop_p)
6686 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6687 insn);
6688 }
6689 }
6690 free (index_to_bundle_states);
6691 finish_bundle_state_table ();
6692 bundling_p = 0;
6693 dfa_clean_insn_cache ();
6694 }
6695
6696 /* The following function is called at the end of scheduling BB or
6697 EBB. After reload, it inserts stop bits and does insn bundling. */
6698
6699 static void
6700 ia64_sched_finish (FILE *dump, int sched_verbose)
6701 {
6702 if (sched_verbose)
6703 fprintf (dump, "// Finishing schedule.\n");
6704 if (!reload_completed)
6705 return;
6706 if (reload_completed)
6707 {
6708 final_emit_insn_group_barriers (dump);
6709 bundling (dump, sched_verbose, current_sched_info->prev_head,
6710 current_sched_info->next_tail);
6711 if (sched_verbose && dump)
6712 fprintf (dump, "// finishing %d-%d\n",
6713 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
6714 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
6715
6716 return;
6717 }
6718 }
6719
6720 /* The following function inserts stop bits in scheduled BB or EBB. */
6721
6722 static void
6723 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6724 {
6725 rtx insn;
6726 int need_barrier_p = 0;
6727 rtx prev_insn = NULL_RTX;
6728
6729 init_insn_group_barriers ();
6730
6731 for (insn = NEXT_INSN (current_sched_info->prev_head);
6732 insn != current_sched_info->next_tail;
6733 insn = NEXT_INSN (insn))
6734 {
6735 if (GET_CODE (insn) == BARRIER)
6736 {
6737 rtx last = prev_active_insn (insn);
6738
6739 if (! last)
6740 continue;
6741 if (GET_CODE (last) == JUMP_INSN
6742 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6743 last = prev_active_insn (last);
6744 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6745 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6746
6747 init_insn_group_barriers ();
6748 need_barrier_p = 0;
6749 prev_insn = NULL_RTX;
6750 }
6751 else if (INSN_P (insn))
6752 {
6753 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6754 {
6755 init_insn_group_barriers ();
6756 need_barrier_p = 0;
6757 prev_insn = NULL_RTX;
6758 }
6759 else if (need_barrier_p || group_barrier_needed_p (insn))
6760 {
6761 if (TARGET_EARLY_STOP_BITS)
6762 {
6763 rtx last;
6764
6765 for (last = insn;
6766 last != current_sched_info->prev_head;
6767 last = PREV_INSN (last))
6768 if (INSN_P (last) && GET_MODE (last) == TImode
6769 && stops_p [INSN_UID (last)])
6770 break;
6771 if (last == current_sched_info->prev_head)
6772 last = insn;
6773 last = prev_active_insn (last);
6774 if (last
6775 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
6776 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6777 last);
6778 init_insn_group_barriers ();
6779 for (last = NEXT_INSN (last);
6780 last != insn;
6781 last = NEXT_INSN (last))
6782 if (INSN_P (last))
6783 group_barrier_needed_p (last);
6784 }
6785 else
6786 {
6787 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6788 insn);
6789 init_insn_group_barriers ();
6790 }
6791 group_barrier_needed_p (insn);
6792 prev_insn = NULL_RTX;
6793 }
6794 else if (recog_memoized (insn) >= 0)
6795 prev_insn = insn;
6796 need_barrier_p = (GET_CODE (insn) == CALL_INSN
6797 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6798 || asm_noperands (PATTERN (insn)) >= 0);
6799 }
6800 }
6801 }
6802
6803 \f
6804
6805 /* If the following function returns TRUE, we will use the the DFA
6806 insn scheduler. */
6807
6808 static int
6809 ia64_first_cycle_multipass_dfa_lookahead (void)
6810 {
6811 return (reload_completed ? 6 : 4);
6812 }
6813
6814 /* The following function initiates variable `dfa_pre_cycle_insn'. */
6815
6816 static void
6817 ia64_init_dfa_pre_cycle_insn (void)
6818 {
6819 if (temp_dfa_state == NULL)
6820 {
6821 dfa_state_size = state_size ();
6822 temp_dfa_state = xmalloc (dfa_state_size);
6823 prev_cycle_state = xmalloc (dfa_state_size);
6824 }
6825 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
6826 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
6827 recog_memoized (dfa_pre_cycle_insn);
6828 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
6829 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
6830 recog_memoized (dfa_stop_insn);
6831 }
6832
6833 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
6834 used by the DFA insn scheduler. */
6835
6836 static rtx
6837 ia64_dfa_pre_cycle_insn (void)
6838 {
6839 return dfa_pre_cycle_insn;
6840 }
6841
6842 /* The following function returns TRUE if PRODUCER (of type ilog or
6843 ld) produces address for CONSUMER (of type st or stf). */
6844
6845 int
6846 ia64_st_address_bypass_p (rtx producer, rtx consumer)
6847 {
6848 rtx dest, reg, mem;
6849
6850 if (producer == NULL_RTX || consumer == NULL_RTX)
6851 abort ();
6852 dest = ia64_single_set (producer);
6853 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
6854 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
6855 abort ();
6856 if (GET_CODE (reg) == SUBREG)
6857 reg = SUBREG_REG (reg);
6858 dest = ia64_single_set (consumer);
6859 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
6860 || GET_CODE (mem) != MEM)
6861 abort ();
6862 return reg_mentioned_p (reg, mem);
6863 }
6864
6865 /* The following function returns TRUE if PRODUCER (of type ilog or
6866 ld) produces address for CONSUMER (of type ld or fld). */
6867
6868 int
6869 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
6870 {
6871 rtx dest, src, reg, mem;
6872
6873 if (producer == NULL_RTX || consumer == NULL_RTX)
6874 abort ();
6875 dest = ia64_single_set (producer);
6876 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
6877 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
6878 abort ();
6879 if (GET_CODE (reg) == SUBREG)
6880 reg = SUBREG_REG (reg);
6881 src = ia64_single_set (consumer);
6882 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
6883 abort ();
6884 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
6885 mem = XVECEXP (mem, 0, 0);
6886 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
6887 mem = XEXP (mem, 0);
6888
6889 /* Note that LO_SUM is used for GOT loads. */
6890 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
6891 abort ();
6892
6893 return reg_mentioned_p (reg, mem);
6894 }
6895
6896 /* The following function returns TRUE if INSN produces address for a
6897 load/store insn. We will place such insns into M slot because it
6898 decreases its latency time. */
6899
6900 int
6901 ia64_produce_address_p (rtx insn)
6902 {
6903 return insn->call;
6904 }
6905
6906 \f
6907 /* Emit pseudo-ops for the assembler to describe predicate relations.
6908 At present this assumes that we only consider predicate pairs to
6909 be mutex, and that the assembler can deduce proper values from
6910 straight-line code. */
6911
6912 static void
6913 emit_predicate_relation_info (void)
6914 {
6915 basic_block bb;
6916
6917 FOR_EACH_BB_REVERSE (bb)
6918 {
6919 int r;
6920 rtx head = BB_HEAD (bb);
6921
6922 /* We only need such notes at code labels. */
6923 if (GET_CODE (head) != CODE_LABEL)
6924 continue;
6925 if (GET_CODE (NEXT_INSN (head)) == NOTE
6926 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6927 head = NEXT_INSN (head);
6928
6929 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6930 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6931 {
6932 rtx p = gen_rtx_REG (BImode, r);
6933 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6934 if (head == BB_END (bb))
6935 BB_END (bb) = n;
6936 head = n;
6937 }
6938 }
6939
6940 /* Look for conditional calls that do not return, and protect predicate
6941 relations around them. Otherwise the assembler will assume the call
6942 returns, and complain about uses of call-clobbered predicates after
6943 the call. */
6944 FOR_EACH_BB_REVERSE (bb)
6945 {
6946 rtx insn = BB_HEAD (bb);
6947
6948 while (1)
6949 {
6950 if (GET_CODE (insn) == CALL_INSN
6951 && GET_CODE (PATTERN (insn)) == COND_EXEC
6952 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6953 {
6954 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6955 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6956 if (BB_HEAD (bb) == insn)
6957 BB_HEAD (bb) = b;
6958 if (BB_END (bb) == insn)
6959 BB_END (bb) = a;
6960 }
6961
6962 if (insn == BB_END (bb))
6963 break;
6964 insn = NEXT_INSN (insn);
6965 }
6966 }
6967 }
6968
6969 /* Perform machine dependent operations on the rtl chain INSNS. */
6970
6971 static void
6972 ia64_reorg (void)
6973 {
6974 /* We are freeing block_for_insn in the toplev to keep compatibility
6975 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6976 compute_bb_for_insn ();
6977
6978 /* If optimizing, we'll have split before scheduling. */
6979 if (optimize == 0)
6980 split_all_insns (0);
6981
6982 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6983 non-optimizing bootstrap. */
6984 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
6985
6986 if (ia64_flag_schedule_insns2)
6987 {
6988 timevar_push (TV_SCHED2);
6989 ia64_final_schedule = 1;
6990
6991 initiate_bundle_states ();
6992 ia64_nop = make_insn_raw (gen_nop ());
6993 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
6994 recog_memoized (ia64_nop);
6995 clocks_length = get_max_uid () + 1;
6996 stops_p = xcalloc (1, clocks_length);
6997 if (ia64_tune == PROCESSOR_ITANIUM)
6998 {
6999 clocks = xcalloc (clocks_length, sizeof (int));
7000 add_cycles = xcalloc (clocks_length, sizeof (int));
7001 }
7002 if (ia64_tune == PROCESSOR_ITANIUM2)
7003 {
7004 pos_1 = get_cpu_unit_code ("2_1");
7005 pos_2 = get_cpu_unit_code ("2_2");
7006 pos_3 = get_cpu_unit_code ("2_3");
7007 pos_4 = get_cpu_unit_code ("2_4");
7008 pos_5 = get_cpu_unit_code ("2_5");
7009 pos_6 = get_cpu_unit_code ("2_6");
7010 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7011 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7012 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7013 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7014 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7015 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7016 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7017 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7018 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7019 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7020 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7021 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7022 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7023 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7024 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7025 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7026 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7027 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7028 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7029 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7030 }
7031 else
7032 {
7033 pos_1 = get_cpu_unit_code ("1_1");
7034 pos_2 = get_cpu_unit_code ("1_2");
7035 pos_3 = get_cpu_unit_code ("1_3");
7036 pos_4 = get_cpu_unit_code ("1_4");
7037 pos_5 = get_cpu_unit_code ("1_5");
7038 pos_6 = get_cpu_unit_code ("1_6");
7039 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7040 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7041 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7042 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7043 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7044 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7045 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7046 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7047 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7048 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7049 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7050 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7051 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7052 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7053 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7054 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7055 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7056 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7057 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7058 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7059 }
7060 schedule_ebbs (dump_file);
7061 finish_bundle_states ();
7062 if (ia64_tune == PROCESSOR_ITANIUM)
7063 {
7064 free (add_cycles);
7065 free (clocks);
7066 }
7067 free (stops_p);
7068 emit_insn_group_barriers (dump_file);
7069
7070 ia64_final_schedule = 0;
7071 timevar_pop (TV_SCHED2);
7072 }
7073 else
7074 emit_all_insn_group_barriers (dump_file);
7075
7076 /* A call must not be the last instruction in a function, so that the
7077 return address is still within the function, so that unwinding works
7078 properly. Note that IA-64 differs from dwarf2 on this point. */
7079 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7080 {
7081 rtx insn;
7082 int saw_stop = 0;
7083
7084 insn = get_last_insn ();
7085 if (! INSN_P (insn))
7086 insn = prev_active_insn (insn);
7087 /* Skip over insns that expand to nothing. */
7088 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7089 {
7090 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7091 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7092 saw_stop = 1;
7093 insn = prev_active_insn (insn);
7094 }
7095 if (GET_CODE (insn) == CALL_INSN)
7096 {
7097 if (! saw_stop)
7098 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7099 emit_insn (gen_break_f ());
7100 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7101 }
7102 }
7103
7104 fixup_errata ();
7105 emit_predicate_relation_info ();
7106
7107 if (ia64_flag_var_tracking)
7108 {
7109 timevar_push (TV_VAR_TRACKING);
7110 variable_tracking_main ();
7111 timevar_pop (TV_VAR_TRACKING);
7112 }
7113 }
7114 \f
7115 /* Return true if REGNO is used by the epilogue. */
7116
7117 int
7118 ia64_epilogue_uses (int regno)
7119 {
7120 switch (regno)
7121 {
7122 case R_GR (1):
7123 /* With a call to a function in another module, we will write a new
7124 value to "gp". After returning from such a call, we need to make
7125 sure the function restores the original gp-value, even if the
7126 function itself does not use the gp anymore. */
7127 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7128
7129 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7130 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7131 /* For functions defined with the syscall_linkage attribute, all
7132 input registers are marked as live at all function exits. This
7133 prevents the register allocator from using the input registers,
7134 which in turn makes it possible to restart a system call after
7135 an interrupt without having to save/restore the input registers.
7136 This also prevents kernel data from leaking to application code. */
7137 return lookup_attribute ("syscall_linkage",
7138 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7139
7140 case R_BR (0):
7141 /* Conditional return patterns can't represent the use of `b0' as
7142 the return address, so we force the value live this way. */
7143 return 1;
7144
7145 case AR_PFS_REGNUM:
7146 /* Likewise for ar.pfs, which is used by br.ret. */
7147 return 1;
7148
7149 default:
7150 return 0;
7151 }
7152 }
7153
7154 /* Return true if REGNO is used by the frame unwinder. */
7155
7156 int
7157 ia64_eh_uses (int regno)
7158 {
7159 if (! reload_completed)
7160 return 0;
7161
7162 if (current_frame_info.reg_save_b0
7163 && regno == current_frame_info.reg_save_b0)
7164 return 1;
7165 if (current_frame_info.reg_save_pr
7166 && regno == current_frame_info.reg_save_pr)
7167 return 1;
7168 if (current_frame_info.reg_save_ar_pfs
7169 && regno == current_frame_info.reg_save_ar_pfs)
7170 return 1;
7171 if (current_frame_info.reg_save_ar_unat
7172 && regno == current_frame_info.reg_save_ar_unat)
7173 return 1;
7174 if (current_frame_info.reg_save_ar_lc
7175 && regno == current_frame_info.reg_save_ar_lc)
7176 return 1;
7177
7178 return 0;
7179 }
7180 \f
7181 /* Return true if this goes in small data/bss. */
7182
7183 /* ??? We could also support own long data here. Generating movl/add/ld8
7184 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7185 code faster because there is one less load. This also includes incomplete
7186 types which can't go in sdata/sbss. */
7187
7188 static bool
7189 ia64_in_small_data_p (tree exp)
7190 {
7191 if (TARGET_NO_SDATA)
7192 return false;
7193
7194 /* We want to merge strings, so we never consider them small data. */
7195 if (TREE_CODE (exp) == STRING_CST)
7196 return false;
7197
7198 /* Functions are never small data. */
7199 if (TREE_CODE (exp) == FUNCTION_DECL)
7200 return false;
7201
7202 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7203 {
7204 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7205 if (strcmp (section, ".sdata") == 0
7206 || strcmp (section, ".sbss") == 0)
7207 return true;
7208 }
7209 else
7210 {
7211 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7212
7213 /* If this is an incomplete type with size 0, then we can't put it
7214 in sdata because it might be too big when completed. */
7215 if (size > 0 && size <= ia64_section_threshold)
7216 return true;
7217 }
7218
7219 return false;
7220 }
7221 \f
7222 /* Output assembly directives for prologue regions. */
7223
7224 /* The current basic block number. */
7225
7226 static bool last_block;
7227
7228 /* True if we need a copy_state command at the start of the next block. */
7229
7230 static bool need_copy_state;
7231
7232 /* The function emits unwind directives for the start of an epilogue. */
7233
7234 static void
7235 process_epilogue (void)
7236 {
7237 /* If this isn't the last block of the function, then we need to label the
7238 current state, and copy it back in at the start of the next block. */
7239
7240 if (!last_block)
7241 {
7242 fprintf (asm_out_file, "\t.label_state 1\n");
7243 need_copy_state = true;
7244 }
7245
7246 fprintf (asm_out_file, "\t.restore sp\n");
7247 }
7248
7249 /* This function processes a SET pattern looking for specific patterns
7250 which result in emitting an assembly directive required for unwinding. */
7251
7252 static int
7253 process_set (FILE *asm_out_file, rtx pat)
7254 {
7255 rtx src = SET_SRC (pat);
7256 rtx dest = SET_DEST (pat);
7257 int src_regno, dest_regno;
7258
7259 /* Look for the ALLOC insn. */
7260 if (GET_CODE (src) == UNSPEC_VOLATILE
7261 && XINT (src, 1) == UNSPECV_ALLOC
7262 && GET_CODE (dest) == REG)
7263 {
7264 dest_regno = REGNO (dest);
7265
7266 /* If this isn't the final destination for ar.pfs, the alloc
7267 shouldn't have been marked frame related. */
7268 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7269 abort ();
7270
7271 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7272 ia64_dbx_register_number (dest_regno));
7273 return 1;
7274 }
7275
7276 /* Look for SP = .... */
7277 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7278 {
7279 if (GET_CODE (src) == PLUS)
7280 {
7281 rtx op0 = XEXP (src, 0);
7282 rtx op1 = XEXP (src, 1);
7283 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7284 {
7285 if (INTVAL (op1) < 0)
7286 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7287 -INTVAL (op1));
7288 else
7289 process_epilogue ();
7290 }
7291 else
7292 abort ();
7293 }
7294 else if (GET_CODE (src) == REG
7295 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7296 process_epilogue ();
7297 else
7298 abort ();
7299
7300 return 1;
7301 }
7302
7303 /* Register move we need to look at. */
7304 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7305 {
7306 src_regno = REGNO (src);
7307 dest_regno = REGNO (dest);
7308
7309 switch (src_regno)
7310 {
7311 case BR_REG (0):
7312 /* Saving return address pointer. */
7313 if (dest_regno != current_frame_info.reg_save_b0)
7314 abort ();
7315 fprintf (asm_out_file, "\t.save rp, r%d\n",
7316 ia64_dbx_register_number (dest_regno));
7317 return 1;
7318
7319 case PR_REG (0):
7320 if (dest_regno != current_frame_info.reg_save_pr)
7321 abort ();
7322 fprintf (asm_out_file, "\t.save pr, r%d\n",
7323 ia64_dbx_register_number (dest_regno));
7324 return 1;
7325
7326 case AR_UNAT_REGNUM:
7327 if (dest_regno != current_frame_info.reg_save_ar_unat)
7328 abort ();
7329 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7330 ia64_dbx_register_number (dest_regno));
7331 return 1;
7332
7333 case AR_LC_REGNUM:
7334 if (dest_regno != current_frame_info.reg_save_ar_lc)
7335 abort ();
7336 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7337 ia64_dbx_register_number (dest_regno));
7338 return 1;
7339
7340 case STACK_POINTER_REGNUM:
7341 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7342 || ! frame_pointer_needed)
7343 abort ();
7344 fprintf (asm_out_file, "\t.vframe r%d\n",
7345 ia64_dbx_register_number (dest_regno));
7346 return 1;
7347
7348 default:
7349 /* Everything else should indicate being stored to memory. */
7350 abort ();
7351 }
7352 }
7353
7354 /* Memory store we need to look at. */
7355 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7356 {
7357 long off;
7358 rtx base;
7359 const char *saveop;
7360
7361 if (GET_CODE (XEXP (dest, 0)) == REG)
7362 {
7363 base = XEXP (dest, 0);
7364 off = 0;
7365 }
7366 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7367 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7368 {
7369 base = XEXP (XEXP (dest, 0), 0);
7370 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7371 }
7372 else
7373 abort ();
7374
7375 if (base == hard_frame_pointer_rtx)
7376 {
7377 saveop = ".savepsp";
7378 off = - off;
7379 }
7380 else if (base == stack_pointer_rtx)
7381 saveop = ".savesp";
7382 else
7383 abort ();
7384
7385 src_regno = REGNO (src);
7386 switch (src_regno)
7387 {
7388 case BR_REG (0):
7389 if (current_frame_info.reg_save_b0 != 0)
7390 abort ();
7391 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7392 return 1;
7393
7394 case PR_REG (0):
7395 if (current_frame_info.reg_save_pr != 0)
7396 abort ();
7397 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7398 return 1;
7399
7400 case AR_LC_REGNUM:
7401 if (current_frame_info.reg_save_ar_lc != 0)
7402 abort ();
7403 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7404 return 1;
7405
7406 case AR_PFS_REGNUM:
7407 if (current_frame_info.reg_save_ar_pfs != 0)
7408 abort ();
7409 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7410 return 1;
7411
7412 case AR_UNAT_REGNUM:
7413 if (current_frame_info.reg_save_ar_unat != 0)
7414 abort ();
7415 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7416 return 1;
7417
7418 case GR_REG (4):
7419 case GR_REG (5):
7420 case GR_REG (6):
7421 case GR_REG (7):
7422 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7423 1 << (src_regno - GR_REG (4)));
7424 return 1;
7425
7426 case BR_REG (1):
7427 case BR_REG (2):
7428 case BR_REG (3):
7429 case BR_REG (4):
7430 case BR_REG (5):
7431 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7432 1 << (src_regno - BR_REG (1)));
7433 return 1;
7434
7435 case FR_REG (2):
7436 case FR_REG (3):
7437 case FR_REG (4):
7438 case FR_REG (5):
7439 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7440 1 << (src_regno - FR_REG (2)));
7441 return 1;
7442
7443 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7444 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7445 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7446 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7447 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7448 1 << (src_regno - FR_REG (12)));
7449 return 1;
7450
7451 default:
7452 return 0;
7453 }
7454 }
7455
7456 return 0;
7457 }
7458
7459
7460 /* This function looks at a single insn and emits any directives
7461 required to unwind this insn. */
7462 void
7463 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7464 {
7465 if (flag_unwind_tables
7466 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7467 {
7468 rtx pat;
7469
7470 if (GET_CODE (insn) == NOTE
7471 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7472 {
7473 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7474
7475 /* Restore unwind state from immediately before the epilogue. */
7476 if (need_copy_state)
7477 {
7478 fprintf (asm_out_file, "\t.body\n");
7479 fprintf (asm_out_file, "\t.copy_state 1\n");
7480 need_copy_state = false;
7481 }
7482 }
7483
7484 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7485 return;
7486
7487 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7488 if (pat)
7489 pat = XEXP (pat, 0);
7490 else
7491 pat = PATTERN (insn);
7492
7493 switch (GET_CODE (pat))
7494 {
7495 case SET:
7496 process_set (asm_out_file, pat);
7497 break;
7498
7499 case PARALLEL:
7500 {
7501 int par_index;
7502 int limit = XVECLEN (pat, 0);
7503 for (par_index = 0; par_index < limit; par_index++)
7504 {
7505 rtx x = XVECEXP (pat, 0, par_index);
7506 if (GET_CODE (x) == SET)
7507 process_set (asm_out_file, x);
7508 }
7509 break;
7510 }
7511
7512 default:
7513 abort ();
7514 }
7515 }
7516 }
7517
7518 \f
7519 void
7520 ia64_init_builtins (void)
7521 {
7522 tree psi_type_node = build_pointer_type (integer_type_node);
7523 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7524
7525 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7526 tree si_ftype_psi_si_si
7527 = build_function_type_list (integer_type_node,
7528 psi_type_node, integer_type_node,
7529 integer_type_node, NULL_TREE);
7530
7531 /* __sync_val_compare_and_swap_di */
7532 tree di_ftype_pdi_di_di
7533 = build_function_type_list (long_integer_type_node,
7534 pdi_type_node, long_integer_type_node,
7535 long_integer_type_node, NULL_TREE);
7536 /* __sync_bool_compare_and_swap_di */
7537 tree si_ftype_pdi_di_di
7538 = build_function_type_list (integer_type_node,
7539 pdi_type_node, long_integer_type_node,
7540 long_integer_type_node, NULL_TREE);
7541 /* __sync_synchronize */
7542 tree void_ftype_void
7543 = build_function_type (void_type_node, void_list_node);
7544
7545 /* __sync_lock_test_and_set_si */
7546 tree si_ftype_psi_si
7547 = build_function_type_list (integer_type_node,
7548 psi_type_node, integer_type_node, NULL_TREE);
7549
7550 /* __sync_lock_test_and_set_di */
7551 tree di_ftype_pdi_di
7552 = build_function_type_list (long_integer_type_node,
7553 pdi_type_node, long_integer_type_node,
7554 NULL_TREE);
7555
7556 /* __sync_lock_release_si */
7557 tree void_ftype_psi
7558 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7559
7560 /* __sync_lock_release_di */
7561 tree void_ftype_pdi
7562 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7563
7564 tree fpreg_type;
7565 tree float80_type;
7566
7567 /* The __fpreg type. */
7568 fpreg_type = make_node (REAL_TYPE);
7569 /* ??? The back end should know to load/save __fpreg variables using
7570 the ldf.fill and stf.spill instructions. */
7571 TYPE_PRECISION (fpreg_type) = 80;
7572 layout_type (fpreg_type);
7573 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
7574
7575 /* The __float80 type. */
7576 float80_type = make_node (REAL_TYPE);
7577 TYPE_PRECISION (float80_type) = 80;
7578 layout_type (float80_type);
7579 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
7580
7581 /* The __float128 type. */
7582 if (!TARGET_HPUX)
7583 {
7584 tree float128_type = make_node (REAL_TYPE);
7585 TYPE_PRECISION (float128_type) = 128;
7586 layout_type (float128_type);
7587 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
7588 }
7589 else
7590 /* Under HPUX, this is a synonym for "long double". */
7591 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
7592 "__float128");
7593
7594 #define def_builtin(name, type, code) \
7595 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
7596 NULL, NULL_TREE)
7597
7598 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7599 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7600 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7601 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7602 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7603 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7604 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7605 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7606
7607 def_builtin ("__sync_synchronize", void_ftype_void,
7608 IA64_BUILTIN_SYNCHRONIZE);
7609
7610 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7611 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7612 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7613 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7614 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7615 IA64_BUILTIN_LOCK_RELEASE_SI);
7616 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7617 IA64_BUILTIN_LOCK_RELEASE_DI);
7618
7619 def_builtin ("__builtin_ia64_bsp",
7620 build_function_type (ptr_type_node, void_list_node),
7621 IA64_BUILTIN_BSP);
7622
7623 def_builtin ("__builtin_ia64_flushrs",
7624 build_function_type (void_type_node, void_list_node),
7625 IA64_BUILTIN_FLUSHRS);
7626
7627 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7628 IA64_BUILTIN_FETCH_AND_ADD_SI);
7629 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7630 IA64_BUILTIN_FETCH_AND_SUB_SI);
7631 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7632 IA64_BUILTIN_FETCH_AND_OR_SI);
7633 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7634 IA64_BUILTIN_FETCH_AND_AND_SI);
7635 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7636 IA64_BUILTIN_FETCH_AND_XOR_SI);
7637 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7638 IA64_BUILTIN_FETCH_AND_NAND_SI);
7639
7640 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7641 IA64_BUILTIN_ADD_AND_FETCH_SI);
7642 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7643 IA64_BUILTIN_SUB_AND_FETCH_SI);
7644 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7645 IA64_BUILTIN_OR_AND_FETCH_SI);
7646 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7647 IA64_BUILTIN_AND_AND_FETCH_SI);
7648 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7649 IA64_BUILTIN_XOR_AND_FETCH_SI);
7650 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7651 IA64_BUILTIN_NAND_AND_FETCH_SI);
7652
7653 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7654 IA64_BUILTIN_FETCH_AND_ADD_DI);
7655 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7656 IA64_BUILTIN_FETCH_AND_SUB_DI);
7657 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7658 IA64_BUILTIN_FETCH_AND_OR_DI);
7659 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7660 IA64_BUILTIN_FETCH_AND_AND_DI);
7661 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7662 IA64_BUILTIN_FETCH_AND_XOR_DI);
7663 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7664 IA64_BUILTIN_FETCH_AND_NAND_DI);
7665
7666 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7667 IA64_BUILTIN_ADD_AND_FETCH_DI);
7668 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7669 IA64_BUILTIN_SUB_AND_FETCH_DI);
7670 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7671 IA64_BUILTIN_OR_AND_FETCH_DI);
7672 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7673 IA64_BUILTIN_AND_AND_FETCH_DI);
7674 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7675 IA64_BUILTIN_XOR_AND_FETCH_DI);
7676 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7677 IA64_BUILTIN_NAND_AND_FETCH_DI);
7678
7679 #undef def_builtin
7680 }
7681
7682 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7683
7684 mf
7685 tmp = [ptr];
7686 do {
7687 ret = tmp;
7688 ar.ccv = tmp;
7689 tmp <op>= value;
7690 cmpxchgsz.acq tmp = [ptr], tmp
7691 } while (tmp != ret)
7692 */
7693
7694 static rtx
7695 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
7696 tree arglist, rtx target)
7697 {
7698 rtx ret, label, tmp, ccv, insn, mem, value;
7699 tree arg0, arg1;
7700
7701 arg0 = TREE_VALUE (arglist);
7702 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7703 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7704 #ifdef POINTERS_EXTEND_UNSIGNED
7705 if (GET_MODE(mem) != Pmode)
7706 mem = convert_memory_address (Pmode, mem);
7707 #endif
7708 value = expand_expr (arg1, NULL_RTX, mode, 0);
7709
7710 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7711 MEM_VOLATILE_P (mem) = 1;
7712
7713 if (target && register_operand (target, mode))
7714 ret = target;
7715 else
7716 ret = gen_reg_rtx (mode);
7717
7718 emit_insn (gen_mf ());
7719
7720 /* Special case for fetchadd instructions. */
7721 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7722 {
7723 if (mode == SImode)
7724 insn = gen_fetchadd_acq_si (ret, mem, value);
7725 else
7726 insn = gen_fetchadd_acq_di (ret, mem, value);
7727 emit_insn (insn);
7728 return ret;
7729 }
7730
7731 tmp = gen_reg_rtx (mode);
7732 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7733 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7734 emit_move_insn (tmp, mem);
7735
7736 label = gen_label_rtx ();
7737 emit_label (label);
7738 emit_move_insn (ret, tmp);
7739 convert_move (ccv, tmp, /*unsignedp=*/1);
7740
7741 /* Perform the specific operation. Special case NAND by noticing
7742 one_cmpl_optab instead. */
7743 if (binoptab == one_cmpl_optab)
7744 {
7745 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7746 binoptab = and_optab;
7747 }
7748 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7749
7750 if (mode == SImode)
7751 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7752 else
7753 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7754 emit_insn (insn);
7755
7756 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7757
7758 return ret;
7759 }
7760
7761 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7762
7763 mf
7764 tmp = [ptr];
7765 do {
7766 old = tmp;
7767 ar.ccv = tmp;
7768 ret = tmp <op> value;
7769 cmpxchgsz.acq tmp = [ptr], ret
7770 } while (tmp != old)
7771 */
7772
7773 static rtx
7774 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
7775 tree arglist, rtx target)
7776 {
7777 rtx old, label, tmp, ret, ccv, insn, mem, value;
7778 tree arg0, arg1;
7779
7780 arg0 = TREE_VALUE (arglist);
7781 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7782 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7783 #ifdef POINTERS_EXTEND_UNSIGNED
7784 if (GET_MODE(mem) != Pmode)
7785 mem = convert_memory_address (Pmode, mem);
7786 #endif
7787
7788 value = expand_expr (arg1, NULL_RTX, mode, 0);
7789
7790 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7791 MEM_VOLATILE_P (mem) = 1;
7792
7793 if (target && ! register_operand (target, mode))
7794 target = NULL_RTX;
7795
7796 emit_insn (gen_mf ());
7797 tmp = gen_reg_rtx (mode);
7798 old = gen_reg_rtx (mode);
7799 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7800 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7801
7802 emit_move_insn (tmp, mem);
7803
7804 label = gen_label_rtx ();
7805 emit_label (label);
7806 emit_move_insn (old, tmp);
7807 convert_move (ccv, tmp, /*unsignedp=*/1);
7808
7809 /* Perform the specific operation. Special case NAND by noticing
7810 one_cmpl_optab instead. */
7811 if (binoptab == one_cmpl_optab)
7812 {
7813 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7814 binoptab = and_optab;
7815 }
7816 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7817
7818 if (mode == SImode)
7819 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7820 else
7821 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7822 emit_insn (insn);
7823
7824 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7825
7826 return ret;
7827 }
7828
7829 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7830
7831 ar.ccv = oldval
7832 mf
7833 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7834 return ret
7835
7836 For bool_ it's the same except return ret == oldval.
7837 */
7838
7839 static rtx
7840 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
7841 int boolp, tree arglist, rtx target)
7842 {
7843 tree arg0, arg1, arg2;
7844 rtx mem, old, new, ccv, tmp, insn;
7845
7846 arg0 = TREE_VALUE (arglist);
7847 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7848 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7849 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7850 old = expand_expr (arg1, NULL_RTX, mode, 0);
7851 new = expand_expr (arg2, NULL_RTX, mode, 0);
7852
7853 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7854 MEM_VOLATILE_P (mem) = 1;
7855
7856 if (GET_MODE (old) != mode)
7857 old = convert_to_mode (mode, old, /*unsignedp=*/1);
7858 if (GET_MODE (new) != mode)
7859 new = convert_to_mode (mode, new, /*unsignedp=*/1);
7860
7861 if (! register_operand (old, mode))
7862 old = copy_to_mode_reg (mode, old);
7863 if (! register_operand (new, mode))
7864 new = copy_to_mode_reg (mode, new);
7865
7866 if (! boolp && target && register_operand (target, mode))
7867 tmp = target;
7868 else
7869 tmp = gen_reg_rtx (mode);
7870
7871 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7872 convert_move (ccv, old, /*unsignedp=*/1);
7873 emit_insn (gen_mf ());
7874 if (mode == SImode)
7875 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7876 else
7877 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7878 emit_insn (insn);
7879
7880 if (boolp)
7881 {
7882 if (! target)
7883 target = gen_reg_rtx (rmode);
7884 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7885 }
7886 else
7887 return tmp;
7888 }
7889
7890 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7891
7892 static rtx
7893 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
7894 rtx target)
7895 {
7896 tree arg0, arg1;
7897 rtx mem, new, ret, insn;
7898
7899 arg0 = TREE_VALUE (arglist);
7900 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7901 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7902 new = expand_expr (arg1, NULL_RTX, mode, 0);
7903
7904 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7905 MEM_VOLATILE_P (mem) = 1;
7906 if (! register_operand (new, mode))
7907 new = copy_to_mode_reg (mode, new);
7908
7909 if (target && register_operand (target, mode))
7910 ret = target;
7911 else
7912 ret = gen_reg_rtx (mode);
7913
7914 if (mode == SImode)
7915 insn = gen_xchgsi (ret, mem, new);
7916 else
7917 insn = gen_xchgdi (ret, mem, new);
7918 emit_insn (insn);
7919
7920 return ret;
7921 }
7922
7923 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7924
7925 static rtx
7926 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
7927 rtx target ATTRIBUTE_UNUSED)
7928 {
7929 tree arg0;
7930 rtx mem;
7931
7932 arg0 = TREE_VALUE (arglist);
7933 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7934
7935 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7936 MEM_VOLATILE_P (mem) = 1;
7937
7938 emit_move_insn (mem, const0_rtx);
7939
7940 return const0_rtx;
7941 }
7942
7943 rtx
7944 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
7945 enum machine_mode mode ATTRIBUTE_UNUSED,
7946 int ignore ATTRIBUTE_UNUSED)
7947 {
7948 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7949 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7950 tree arglist = TREE_OPERAND (exp, 1);
7951 enum machine_mode rmode = VOIDmode;
7952
7953 switch (fcode)
7954 {
7955 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7956 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7957 mode = SImode;
7958 rmode = SImode;
7959 break;
7960
7961 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7962 case IA64_BUILTIN_LOCK_RELEASE_SI:
7963 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7964 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7965 case IA64_BUILTIN_FETCH_AND_OR_SI:
7966 case IA64_BUILTIN_FETCH_AND_AND_SI:
7967 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7968 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7969 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7970 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7971 case IA64_BUILTIN_OR_AND_FETCH_SI:
7972 case IA64_BUILTIN_AND_AND_FETCH_SI:
7973 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7974 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7975 mode = SImode;
7976 break;
7977
7978 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7979 mode = DImode;
7980 rmode = SImode;
7981 break;
7982
7983 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7984 mode = DImode;
7985 rmode = DImode;
7986 break;
7987
7988 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7989 case IA64_BUILTIN_LOCK_RELEASE_DI:
7990 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7991 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7992 case IA64_BUILTIN_FETCH_AND_OR_DI:
7993 case IA64_BUILTIN_FETCH_AND_AND_DI:
7994 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7995 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7996 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7997 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7998 case IA64_BUILTIN_OR_AND_FETCH_DI:
7999 case IA64_BUILTIN_AND_AND_FETCH_DI:
8000 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8001 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8002 mode = DImode;
8003 break;
8004
8005 default:
8006 break;
8007 }
8008
8009 switch (fcode)
8010 {
8011 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8012 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8013 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8014 target);
8015
8016 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8017 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8018 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8019 target);
8020
8021 case IA64_BUILTIN_SYNCHRONIZE:
8022 emit_insn (gen_mf ());
8023 return const0_rtx;
8024
8025 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8026 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8027 return ia64_expand_lock_test_and_set (mode, arglist, target);
8028
8029 case IA64_BUILTIN_LOCK_RELEASE_SI:
8030 case IA64_BUILTIN_LOCK_RELEASE_DI:
8031 return ia64_expand_lock_release (mode, arglist, target);
8032
8033 case IA64_BUILTIN_BSP:
8034 if (! target || ! register_operand (target, DImode))
8035 target = gen_reg_rtx (DImode);
8036 emit_insn (gen_bsp_value (target));
8037 #ifdef POINTERS_EXTEND_UNSIGNED
8038 target = convert_memory_address (ptr_mode, target);
8039 #endif
8040 return target;
8041
8042 case IA64_BUILTIN_FLUSHRS:
8043 emit_insn (gen_flushrs ());
8044 return const0_rtx;
8045
8046 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8047 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8048 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8049
8050 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8051 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8052 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8053
8054 case IA64_BUILTIN_FETCH_AND_OR_SI:
8055 case IA64_BUILTIN_FETCH_AND_OR_DI:
8056 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8057
8058 case IA64_BUILTIN_FETCH_AND_AND_SI:
8059 case IA64_BUILTIN_FETCH_AND_AND_DI:
8060 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8061
8062 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8063 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8064 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8065
8066 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8067 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8068 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8069
8070 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8071 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8072 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8073
8074 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8075 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8076 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8077
8078 case IA64_BUILTIN_OR_AND_FETCH_SI:
8079 case IA64_BUILTIN_OR_AND_FETCH_DI:
8080 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8081
8082 case IA64_BUILTIN_AND_AND_FETCH_SI:
8083 case IA64_BUILTIN_AND_AND_FETCH_DI:
8084 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8085
8086 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8087 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8088 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8089
8090 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8091 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8092 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8093
8094 default:
8095 break;
8096 }
8097
8098 return NULL_RTX;
8099 }
8100
8101 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8102 most significant bits of the stack slot. */
8103
8104 enum direction
8105 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8106 {
8107 /* Exception to normal case for structures/unions/etc. */
8108
8109 if (type && AGGREGATE_TYPE_P (type)
8110 && int_size_in_bytes (type) < UNITS_PER_WORD)
8111 return upward;
8112
8113 /* Fall back to the default. */
8114 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8115 }
8116
8117 /* Linked list of all external functions that are to be emitted by GCC.
8118 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8119 order to avoid putting out names that are never really used. */
8120
8121 struct extern_func_list GTY(())
8122 {
8123 struct extern_func_list *next;
8124 tree decl;
8125 };
8126
8127 static GTY(()) struct extern_func_list *extern_func_head;
8128
8129 static void
8130 ia64_hpux_add_extern_decl (tree decl)
8131 {
8132 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8133
8134 p->decl = decl;
8135 p->next = extern_func_head;
8136 extern_func_head = p;
8137 }
8138
8139 /* Print out the list of used global functions. */
8140
8141 static void
8142 ia64_hpux_file_end (void)
8143 {
8144 struct extern_func_list *p;
8145
8146 for (p = extern_func_head; p; p = p->next)
8147 {
8148 tree decl = p->decl;
8149 tree id = DECL_ASSEMBLER_NAME (decl);
8150
8151 if (!id)
8152 abort ();
8153
8154 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8155 {
8156 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8157
8158 TREE_ASM_WRITTEN (decl) = 1;
8159 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8160 fputs (TYPE_ASM_OP, asm_out_file);
8161 assemble_name (asm_out_file, name);
8162 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8163 }
8164 }
8165
8166 extern_func_head = 0;
8167 }
8168
8169 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8170 modes of word_mode and larger. Rename the TFmode libfuncs using the
8171 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8172 backward compatibility. */
8173
8174 static void
8175 ia64_init_libfuncs (void)
8176 {
8177 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8178 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8179 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8180 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8181
8182 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8183 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8184 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8185 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8186 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8187
8188 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8189 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8190 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8191 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8192 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8193 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8194
8195 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8196 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8197 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8198 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8199
8200 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8201 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8202 }
8203
8204 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8205
8206 static void
8207 ia64_hpux_init_libfuncs (void)
8208 {
8209 ia64_init_libfuncs ();
8210
8211 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8212 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8213 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8214
8215 /* ia64_expand_compare uses this. */
8216 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8217
8218 /* These should never be used. */
8219 set_optab_libfunc (eq_optab, TFmode, 0);
8220 set_optab_libfunc (ne_optab, TFmode, 0);
8221 set_optab_libfunc (gt_optab, TFmode, 0);
8222 set_optab_libfunc (ge_optab, TFmode, 0);
8223 set_optab_libfunc (lt_optab, TFmode, 0);
8224 set_optab_libfunc (le_optab, TFmode, 0);
8225 }
8226
8227 /* Rename the division and modulus functions in VMS. */
8228
8229 static void
8230 ia64_vms_init_libfuncs (void)
8231 {
8232 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8233 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8234 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8235 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8236 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8237 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8238 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8239 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8240 }
8241
8242 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8243 the HPUX conventions. */
8244
8245 static void
8246 ia64_sysv4_init_libfuncs (void)
8247 {
8248 ia64_init_libfuncs ();
8249
8250 /* These functions are not part of the HPUX TFmode interface. We
8251 use them instead of _U_Qfcmp, which doesn't work the way we
8252 expect. */
8253 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8254 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8255 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8256 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8257 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8258 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8259
8260 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8261 glibc doesn't have them. */
8262 }
8263 \f
8264 /* Switch to the section to which we should output X. The only thing
8265 special we do here is to honor small data. */
8266
8267 static void
8268 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8269 unsigned HOST_WIDE_INT align)
8270 {
8271 if (GET_MODE_SIZE (mode) > 0
8272 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8273 sdata_section ();
8274 else
8275 default_elf_select_rtx_section (mode, x, align);
8276 }
8277
8278 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8279 Pretend flag_pic is always set. */
8280
8281 static void
8282 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8283 {
8284 default_elf_select_section_1 (exp, reloc, align, true);
8285 }
8286
8287 static void
8288 ia64_rwreloc_unique_section (tree decl, int reloc)
8289 {
8290 default_unique_section_1 (decl, reloc, true);
8291 }
8292
8293 static void
8294 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8295 unsigned HOST_WIDE_INT align)
8296 {
8297 int save_pic = flag_pic;
8298 flag_pic = 1;
8299 ia64_select_rtx_section (mode, x, align);
8300 flag_pic = save_pic;
8301 }
8302
8303 static unsigned int
8304 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8305 {
8306 return default_section_type_flags_1 (decl, name, reloc, true);
8307 }
8308
8309 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8310 structure type and that the address of that type should be passed
8311 in out0, rather than in r8. */
8312
8313 static bool
8314 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8315 {
8316 tree ret_type = TREE_TYPE (fntype);
8317
8318 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8319 as the structure return address parameter, if the return value
8320 type has a non-trivial copy constructor or destructor. It is not
8321 clear if this same convention should be used for other
8322 programming languages. Until G++ 3.4, we incorrectly used r8 for
8323 these return values. */
8324 return (abi_version_at_least (2)
8325 && ret_type
8326 && TYPE_MODE (ret_type) == BLKmode
8327 && TREE_ADDRESSABLE (ret_type)
8328 && strcmp (lang_hooks.name, "GNU C++") == 0);
8329 }
8330
8331 /* Output the assembler code for a thunk function. THUNK_DECL is the
8332 declaration for the thunk function itself, FUNCTION is the decl for
8333 the target function. DELTA is an immediate constant offset to be
8334 added to THIS. If VCALL_OFFSET is nonzero, the word at
8335 *(*this + vcall_offset) should be added to THIS. */
8336
8337 static void
8338 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8339 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8340 tree function)
8341 {
8342 rtx this, insn, funexp;
8343 unsigned int this_parmno;
8344 unsigned int this_regno;
8345
8346 reload_completed = 1;
8347 epilogue_completed = 1;
8348 no_new_pseudos = 1;
8349 reset_block_changes ();
8350
8351 /* Set things up as ia64_expand_prologue might. */
8352 last_scratch_gr_reg = 15;
8353
8354 memset (&current_frame_info, 0, sizeof (current_frame_info));
8355 current_frame_info.spill_cfa_off = -16;
8356 current_frame_info.n_input_regs = 1;
8357 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8358
8359 /* Mark the end of the (empty) prologue. */
8360 emit_note (NOTE_INSN_PROLOGUE_END);
8361
8362 /* Figure out whether "this" will be the first parameter (the
8363 typical case) or the second parameter (as happens when the
8364 virtual function returns certain class objects). */
8365 this_parmno
8366 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8367 ? 1 : 0);
8368 this_regno = IN_REG (this_parmno);
8369 if (!TARGET_REG_NAMES)
8370 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8371
8372 this = gen_rtx_REG (Pmode, this_regno);
8373 if (TARGET_ILP32)
8374 {
8375 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8376 REG_POINTER (tmp) = 1;
8377 if (delta && CONST_OK_FOR_I (delta))
8378 {
8379 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8380 delta = 0;
8381 }
8382 else
8383 emit_insn (gen_ptr_extend (this, tmp));
8384 }
8385
8386 /* Apply the constant offset, if required. */
8387 if (delta)
8388 {
8389 rtx delta_rtx = GEN_INT (delta);
8390
8391 if (!CONST_OK_FOR_I (delta))
8392 {
8393 rtx tmp = gen_rtx_REG (Pmode, 2);
8394 emit_move_insn (tmp, delta_rtx);
8395 delta_rtx = tmp;
8396 }
8397 emit_insn (gen_adddi3 (this, this, delta_rtx));
8398 }
8399
8400 /* Apply the offset from the vtable, if required. */
8401 if (vcall_offset)
8402 {
8403 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8404 rtx tmp = gen_rtx_REG (Pmode, 2);
8405
8406 if (TARGET_ILP32)
8407 {
8408 rtx t = gen_rtx_REG (ptr_mode, 2);
8409 REG_POINTER (t) = 1;
8410 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8411 if (CONST_OK_FOR_I (vcall_offset))
8412 {
8413 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8414 vcall_offset_rtx));
8415 vcall_offset = 0;
8416 }
8417 else
8418 emit_insn (gen_ptr_extend (tmp, t));
8419 }
8420 else
8421 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8422
8423 if (vcall_offset)
8424 {
8425 if (!CONST_OK_FOR_J (vcall_offset))
8426 {
8427 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8428 emit_move_insn (tmp2, vcall_offset_rtx);
8429 vcall_offset_rtx = tmp2;
8430 }
8431 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8432 }
8433
8434 if (TARGET_ILP32)
8435 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8436 gen_rtx_MEM (ptr_mode, tmp));
8437 else
8438 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8439
8440 emit_insn (gen_adddi3 (this, this, tmp));
8441 }
8442
8443 /* Generate a tail call to the target function. */
8444 if (! TREE_USED (function))
8445 {
8446 assemble_external (function);
8447 TREE_USED (function) = 1;
8448 }
8449 funexp = XEXP (DECL_RTL (function), 0);
8450 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8451 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8452 insn = get_last_insn ();
8453 SIBLING_CALL_P (insn) = 1;
8454
8455 /* Code generation for calls relies on splitting. */
8456 reload_completed = 1;
8457 epilogue_completed = 1;
8458 try_split (PATTERN (insn), insn, 0);
8459
8460 emit_barrier ();
8461
8462 /* Run just enough of rest_of_compilation to get the insns emitted.
8463 There's not really enough bulk here to make other passes such as
8464 instruction scheduling worth while. Note that use_thunk calls
8465 assemble_start_function and assemble_end_function. */
8466
8467 insn_locators_initialize ();
8468 emit_all_insn_group_barriers (NULL);
8469 insn = get_insns ();
8470 shorten_branches (insn);
8471 final_start_function (insn, file, 1);
8472 final (insn, file, 1, 0);
8473 final_end_function ();
8474
8475 reload_completed = 0;
8476 epilogue_completed = 0;
8477 no_new_pseudos = 0;
8478 }
8479
8480 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8481
8482 static rtx
8483 ia64_struct_value_rtx (tree fntype,
8484 int incoming ATTRIBUTE_UNUSED)
8485 {
8486 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8487 return NULL_RTX;
8488 return gen_rtx_REG (Pmode, GR_REG (8));
8489 }
8490
8491 static bool
8492 ia64_scalar_mode_supported_p (enum machine_mode mode)
8493 {
8494 switch (mode)
8495 {
8496 case QImode:
8497 case HImode:
8498 case SImode:
8499 case DImode:
8500 case TImode:
8501 return true;
8502
8503 case SFmode:
8504 case DFmode:
8505 case XFmode:
8506 return true;
8507
8508 case TFmode:
8509 return TARGET_HPUX;
8510
8511 default:
8512 return false;
8513 }
8514 }
8515
8516 #include "gt-ia64.h"