]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/ia64/ia64.c
Merge from pch-branch up to tag pch-commit-20020603.
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "recog.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "except.h"
39 #include "function.h"
40 #include "ggc.h"
41 #include "basic-block.h"
42 #include "toplev.h"
43 #include "sched-int.h"
44 #include "timevar.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "tm_p.h"
48
49 /* This is used for communication between ASM_OUTPUT_LABEL and
50 ASM_OUTPUT_LABELREF. */
51 int ia64_asm_output_label = 0;
52
53 /* Define the information needed to generate branch and scc insns. This is
54 stored from the compare operation. */
55 struct rtx_def * ia64_compare_op0;
56 struct rtx_def * ia64_compare_op1;
57
58 /* Register names for ia64_expand_prologue. */
59 static const char * const ia64_reg_numbers[96] =
60 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
61 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
62 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
63 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
64 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
65 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
66 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
67 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
68 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
69 "r104","r105","r106","r107","r108","r109","r110","r111",
70 "r112","r113","r114","r115","r116","r117","r118","r119",
71 "r120","r121","r122","r123","r124","r125","r126","r127"};
72
73 /* ??? These strings could be shared with REGISTER_NAMES. */
74 static const char * const ia64_input_reg_names[8] =
75 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
76
77 /* ??? These strings could be shared with REGISTER_NAMES. */
78 static const char * const ia64_local_reg_names[80] =
79 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
80 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
81 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
82 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
83 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
84 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
85 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
86 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
87 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
88 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
89
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_output_reg_names[8] =
92 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
93
94 /* String used with the -mfixed-range= option. */
95 const char *ia64_fixed_range_string;
96
97 /* Determines whether we use adds, addl, or movl to generate our
98 TLS immediate offsets. */
99 int ia64_tls_size = 22;
100
101 /* String used with the -mtls-size= option. */
102 const char *ia64_tls_size_string;
103
104 /* Determines whether we run our final scheduling pass or not. We always
105 avoid the normal second scheduling pass. */
106 static int ia64_flag_schedule_insns2;
107
108 /* Variables which are this size or smaller are put in the sdata/sbss
109 sections. */
110
111 unsigned int ia64_section_threshold;
112 \f
113 static rtx gen_tls_get_addr PARAMS ((void));
114 static rtx gen_thread_pointer PARAMS ((void));
115 static int find_gr_spill PARAMS ((int));
116 static int next_scratch_gr_reg PARAMS ((void));
117 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
118 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
119 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
120 static void finish_spill_pointers PARAMS ((void));
121 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
122 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
123 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
124 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
125 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
126 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
127
128 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
129 static void fix_range PARAMS ((const char *));
130 static struct machine_function * ia64_init_machine_status PARAMS ((void));
131 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
132 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
133 static void emit_predicate_relation_info PARAMS ((void));
134 static bool ia64_in_small_data_p PARAMS ((tree));
135 static void ia64_encode_section_info PARAMS ((tree, int));
136 static const char *ia64_strip_name_encoding PARAMS ((const char *));
137 static void process_epilogue PARAMS ((void));
138 static int process_set PARAMS ((FILE *, rtx));
139
140 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
141 tree, rtx));
142 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
143 tree, rtx));
144 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
145 tree, rtx));
146 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
147 tree, rtx));
148 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
149 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
150 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
151 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
152 static void ia64_output_function_end_prologue PARAMS ((FILE *));
153
154 static int ia64_issue_rate PARAMS ((void));
155 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
156 static void ia64_sched_init PARAMS ((FILE *, int, int));
157 static void ia64_sched_finish PARAMS ((FILE *, int));
158 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
159 int *, int, int));
160 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
161 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
162 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
163
164 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
165 unsigned HOST_WIDE_INT));
166 static void ia64_aix_select_section PARAMS ((tree, int,
167 unsigned HOST_WIDE_INT))
168 ATTRIBUTE_UNUSED;
169 static void ia64_aix_unique_section PARAMS ((tree, int))
170 ATTRIBUTE_UNUSED;
171 static void ia64_aix_select_rtx_section PARAMS ((enum machine_mode, rtx,
172 unsigned HOST_WIDE_INT))
173 ATTRIBUTE_UNUSED;
174 \f
175 /* Table of valid machine attributes. */
176 static const struct attribute_spec ia64_attribute_table[] =
177 {
178 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
179 { "syscall_linkage", 0, 0, false, true, true, NULL },
180 { NULL, 0, 0, false, false, false, NULL }
181 };
182
183 /* Initialize the GCC target structure. */
184 #undef TARGET_ATTRIBUTE_TABLE
185 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
186
187 #undef TARGET_INIT_BUILTINS
188 #define TARGET_INIT_BUILTINS ia64_init_builtins
189
190 #undef TARGET_EXPAND_BUILTIN
191 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
192
193 #undef TARGET_ASM_BYTE_OP
194 #define TARGET_ASM_BYTE_OP "\tdata1\t"
195 #undef TARGET_ASM_ALIGNED_HI_OP
196 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
197 #undef TARGET_ASM_ALIGNED_SI_OP
198 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
199 #undef TARGET_ASM_ALIGNED_DI_OP
200 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
201 #undef TARGET_ASM_UNALIGNED_HI_OP
202 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
203 #undef TARGET_ASM_UNALIGNED_SI_OP
204 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
205 #undef TARGET_ASM_UNALIGNED_DI_OP
206 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
207 #undef TARGET_ASM_INTEGER
208 #define TARGET_ASM_INTEGER ia64_assemble_integer
209
210 #undef TARGET_ASM_FUNCTION_PROLOGUE
211 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
212 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
213 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
214 #undef TARGET_ASM_FUNCTION_EPILOGUE
215 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
216
217 #undef TARGET_IN_SMALL_DATA_P
218 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
219 #undef TARGET_ENCODE_SECTION_INFO
220 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
221 #undef TARGET_STRIP_NAME_ENCODING
222 #define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
223
224 #undef TARGET_SCHED_ADJUST_COST
225 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
226 #undef TARGET_SCHED_ISSUE_RATE
227 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
228 #undef TARGET_SCHED_VARIABLE_ISSUE
229 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
230 #undef TARGET_SCHED_INIT
231 #define TARGET_SCHED_INIT ia64_sched_init
232 #undef TARGET_SCHED_FINISH
233 #define TARGET_SCHED_FINISH ia64_sched_finish
234 #undef TARGET_SCHED_REORDER
235 #define TARGET_SCHED_REORDER ia64_sched_reorder
236 #undef TARGET_SCHED_REORDER2
237 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
238
239 #ifdef HAVE_AS_TLS
240 #undef TARGET_HAVE_TLS
241 #define TARGET_HAVE_TLS true
242 #endif
243
244 struct gcc_target targetm = TARGET_INITIALIZER;
245 \f
246 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
247
248 int
249 call_operand (op, mode)
250 rtx op;
251 enum machine_mode mode;
252 {
253 if (mode != GET_MODE (op))
254 return 0;
255
256 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
257 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
258 }
259
260 /* Return 1 if OP refers to a symbol in the sdata section. */
261
262 int
263 sdata_symbolic_operand (op, mode)
264 rtx op;
265 enum machine_mode mode ATTRIBUTE_UNUSED;
266 {
267 switch (GET_CODE (op))
268 {
269 case CONST:
270 if (GET_CODE (XEXP (op, 0)) != PLUS
271 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
272 break;
273 op = XEXP (XEXP (op, 0), 0);
274 /* FALLTHRU */
275
276 case SYMBOL_REF:
277 if (CONSTANT_POOL_ADDRESS_P (op))
278 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
279 else
280 {
281 const char *str = XSTR (op, 0);
282 return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
283 }
284
285 default:
286 break;
287 }
288
289 return 0;
290 }
291
292 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
293
294 int
295 got_symbolic_operand (op, mode)
296 rtx op;
297 enum machine_mode mode ATTRIBUTE_UNUSED;
298 {
299 switch (GET_CODE (op))
300 {
301 case CONST:
302 op = XEXP (op, 0);
303 if (GET_CODE (op) != PLUS)
304 return 0;
305 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
306 return 0;
307 op = XEXP (op, 1);
308 if (GET_CODE (op) != CONST_INT)
309 return 0;
310
311 return 1;
312
313 /* Ok if we're not using GOT entries at all. */
314 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
315 return 1;
316
317 /* "Ok" while emitting rtl, since otherwise we won't be provided
318 with the entire offset during emission, which makes it very
319 hard to split the offset into high and low parts. */
320 if (rtx_equal_function_value_matters)
321 return 1;
322
323 /* Force the low 14 bits of the constant to zero so that we do not
324 use up so many GOT entries. */
325 return (INTVAL (op) & 0x3fff) == 0;
326
327 case SYMBOL_REF:
328 case LABEL_REF:
329 return 1;
330
331 default:
332 break;
333 }
334 return 0;
335 }
336
337 /* Return 1 if OP refers to a symbol. */
338
339 int
340 symbolic_operand (op, mode)
341 rtx op;
342 enum machine_mode mode ATTRIBUTE_UNUSED;
343 {
344 switch (GET_CODE (op))
345 {
346 case CONST:
347 case SYMBOL_REF:
348 case LABEL_REF:
349 return 1;
350
351 default:
352 break;
353 }
354 return 0;
355 }
356
357 /* Return tls_model if OP refers to a TLS symbol. */
358
359 int
360 tls_symbolic_operand (op, mode)
361 rtx op;
362 enum machine_mode mode ATTRIBUTE_UNUSED;
363 {
364 const char *str;
365
366 if (GET_CODE (op) != SYMBOL_REF)
367 return 0;
368 str = XSTR (op, 0);
369 if (str[0] != ENCODE_SECTION_INFO_CHAR)
370 return 0;
371 switch (str[1])
372 {
373 case 'G':
374 return TLS_MODEL_GLOBAL_DYNAMIC;
375 case 'L':
376 return TLS_MODEL_LOCAL_DYNAMIC;
377 case 'i':
378 return TLS_MODEL_INITIAL_EXEC;
379 case 'l':
380 return TLS_MODEL_LOCAL_EXEC;
381 }
382 return 0;
383 }
384
385
386 /* Return 1 if OP refers to a function. */
387
388 int
389 function_operand (op, mode)
390 rtx op;
391 enum machine_mode mode ATTRIBUTE_UNUSED;
392 {
393 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
394 return 1;
395 else
396 return 0;
397 }
398
399 /* Return 1 if OP is setjmp or a similar function. */
400
401 /* ??? This is an unsatisfying solution. Should rethink. */
402
403 int
404 setjmp_operand (op, mode)
405 rtx op;
406 enum machine_mode mode ATTRIBUTE_UNUSED;
407 {
408 const char *name;
409 int retval = 0;
410
411 if (GET_CODE (op) != SYMBOL_REF)
412 return 0;
413
414 name = XSTR (op, 0);
415
416 /* The following code is borrowed from special_function_p in calls.c. */
417
418 /* Disregard prefix _, __ or __x. */
419 if (name[0] == '_')
420 {
421 if (name[1] == '_' && name[2] == 'x')
422 name += 3;
423 else if (name[1] == '_')
424 name += 2;
425 else
426 name += 1;
427 }
428
429 if (name[0] == 's')
430 {
431 retval
432 = ((name[1] == 'e'
433 && (! strcmp (name, "setjmp")
434 || ! strcmp (name, "setjmp_syscall")))
435 || (name[1] == 'i'
436 && ! strcmp (name, "sigsetjmp"))
437 || (name[1] == 'a'
438 && ! strcmp (name, "savectx")));
439 }
440 else if ((name[0] == 'q' && name[1] == 's'
441 && ! strcmp (name, "qsetjmp"))
442 || (name[0] == 'v' && name[1] == 'f'
443 && ! strcmp (name, "vfork")))
444 retval = 1;
445
446 return retval;
447 }
448
449 /* Return 1 if OP is a general operand, but when pic exclude symbolic
450 operands. */
451
452 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
453 from PREDICATE_CODES. */
454
455 int
456 move_operand (op, mode)
457 rtx op;
458 enum machine_mode mode;
459 {
460 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
461 return 0;
462
463 return general_operand (op, mode);
464 }
465
466 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
467
468 int
469 gr_register_operand (op, mode)
470 rtx op;
471 enum machine_mode mode;
472 {
473 if (! register_operand (op, mode))
474 return 0;
475 if (GET_CODE (op) == SUBREG)
476 op = SUBREG_REG (op);
477 if (GET_CODE (op) == REG)
478 {
479 unsigned int regno = REGNO (op);
480 if (regno < FIRST_PSEUDO_REGISTER)
481 return GENERAL_REGNO_P (regno);
482 }
483 return 1;
484 }
485
486 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
487
488 int
489 fr_register_operand (op, mode)
490 rtx op;
491 enum machine_mode mode;
492 {
493 if (! register_operand (op, mode))
494 return 0;
495 if (GET_CODE (op) == SUBREG)
496 op = SUBREG_REG (op);
497 if (GET_CODE (op) == REG)
498 {
499 unsigned int regno = REGNO (op);
500 if (regno < FIRST_PSEUDO_REGISTER)
501 return FR_REGNO_P (regno);
502 }
503 return 1;
504 }
505
506 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
507
508 int
509 grfr_register_operand (op, mode)
510 rtx op;
511 enum machine_mode mode;
512 {
513 if (! register_operand (op, mode))
514 return 0;
515 if (GET_CODE (op) == SUBREG)
516 op = SUBREG_REG (op);
517 if (GET_CODE (op) == REG)
518 {
519 unsigned int regno = REGNO (op);
520 if (regno < FIRST_PSEUDO_REGISTER)
521 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
522 }
523 return 1;
524 }
525
526 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
527
528 int
529 gr_nonimmediate_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
532 {
533 if (! nonimmediate_operand (op, mode))
534 return 0;
535 if (GET_CODE (op) == SUBREG)
536 op = SUBREG_REG (op);
537 if (GET_CODE (op) == REG)
538 {
539 unsigned int regno = REGNO (op);
540 if (regno < FIRST_PSEUDO_REGISTER)
541 return GENERAL_REGNO_P (regno);
542 }
543 return 1;
544 }
545
546 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
547
548 int
549 fr_nonimmediate_operand (op, mode)
550 rtx op;
551 enum machine_mode mode;
552 {
553 if (! nonimmediate_operand (op, mode))
554 return 0;
555 if (GET_CODE (op) == SUBREG)
556 op = SUBREG_REG (op);
557 if (GET_CODE (op) == REG)
558 {
559 unsigned int regno = REGNO (op);
560 if (regno < FIRST_PSEUDO_REGISTER)
561 return FR_REGNO_P (regno);
562 }
563 return 1;
564 }
565
566 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
567
568 int
569 grfr_nonimmediate_operand (op, mode)
570 rtx op;
571 enum machine_mode mode;
572 {
573 if (! nonimmediate_operand (op, mode))
574 return 0;
575 if (GET_CODE (op) == SUBREG)
576 op = SUBREG_REG (op);
577 if (GET_CODE (op) == REG)
578 {
579 unsigned int regno = REGNO (op);
580 if (regno < FIRST_PSEUDO_REGISTER)
581 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
582 }
583 return 1;
584 }
585
586 /* Return 1 if OP is a GR register operand, or zero. */
587
588 int
589 gr_reg_or_0_operand (op, mode)
590 rtx op;
591 enum machine_mode mode;
592 {
593 return (op == const0_rtx || gr_register_operand (op, mode));
594 }
595
596 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
597
598 int
599 gr_reg_or_5bit_operand (op, mode)
600 rtx op;
601 enum machine_mode mode;
602 {
603 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
604 || GET_CODE (op) == CONSTANT_P_RTX
605 || gr_register_operand (op, mode));
606 }
607
608 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
609
610 int
611 gr_reg_or_6bit_operand (op, mode)
612 rtx op;
613 enum machine_mode mode;
614 {
615 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
616 || GET_CODE (op) == CONSTANT_P_RTX
617 || gr_register_operand (op, mode));
618 }
619
620 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
621
622 int
623 gr_reg_or_8bit_operand (op, mode)
624 rtx op;
625 enum machine_mode mode;
626 {
627 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
628 || GET_CODE (op) == CONSTANT_P_RTX
629 || gr_register_operand (op, mode));
630 }
631
632 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
633
634 int
635 grfr_reg_or_8bit_operand (op, mode)
636 rtx op;
637 enum machine_mode mode;
638 {
639 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
640 || GET_CODE (op) == CONSTANT_P_RTX
641 || grfr_register_operand (op, mode));
642 }
643
644 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
645 operand. */
646
647 int
648 gr_reg_or_8bit_adjusted_operand (op, mode)
649 rtx op;
650 enum machine_mode mode;
651 {
652 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
653 || GET_CODE (op) == CONSTANT_P_RTX
654 || gr_register_operand (op, mode));
655 }
656
657 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
658 immediate and an 8 bit adjusted immediate operand. This is necessary
659 because when we emit a compare, we don't know what the condition will be,
660 so we need the union of the immediates accepted by GT and LT. */
661
662 int
663 gr_reg_or_8bit_and_adjusted_operand (op, mode)
664 rtx op;
665 enum machine_mode mode;
666 {
667 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
668 && CONST_OK_FOR_L (INTVAL (op)))
669 || GET_CODE (op) == CONSTANT_P_RTX
670 || gr_register_operand (op, mode));
671 }
672
673 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
674
675 int
676 gr_reg_or_14bit_operand (op, mode)
677 rtx op;
678 enum machine_mode mode;
679 {
680 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
681 || GET_CODE (op) == CONSTANT_P_RTX
682 || gr_register_operand (op, mode));
683 }
684
685 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
686
687 int
688 gr_reg_or_22bit_operand (op, mode)
689 rtx op;
690 enum machine_mode mode;
691 {
692 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
693 || GET_CODE (op) == CONSTANT_P_RTX
694 || gr_register_operand (op, mode));
695 }
696
697 /* Return 1 if OP is a 6 bit immediate operand. */
698
699 int
700 shift_count_operand (op, mode)
701 rtx op;
702 enum machine_mode mode ATTRIBUTE_UNUSED;
703 {
704 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
705 || GET_CODE (op) == CONSTANT_P_RTX);
706 }
707
708 /* Return 1 if OP is a 5 bit immediate operand. */
709
710 int
711 shift_32bit_count_operand (op, mode)
712 rtx op;
713 enum machine_mode mode ATTRIBUTE_UNUSED;
714 {
715 return ((GET_CODE (op) == CONST_INT
716 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
717 || GET_CODE (op) == CONSTANT_P_RTX);
718 }
719
720 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
721
722 int
723 shladd_operand (op, mode)
724 rtx op;
725 enum machine_mode mode ATTRIBUTE_UNUSED;
726 {
727 return (GET_CODE (op) == CONST_INT
728 && (INTVAL (op) == 2 || INTVAL (op) == 4
729 || INTVAL (op) == 8 || INTVAL (op) == 16));
730 }
731
732 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
733
734 int
735 fetchadd_operand (op, mode)
736 rtx op;
737 enum machine_mode mode ATTRIBUTE_UNUSED;
738 {
739 return (GET_CODE (op) == CONST_INT
740 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
741 INTVAL (op) == -4 || INTVAL (op) == -1 ||
742 INTVAL (op) == 1 || INTVAL (op) == 4 ||
743 INTVAL (op) == 8 || INTVAL (op) == 16));
744 }
745
746 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
747
748 int
749 fr_reg_or_fp01_operand (op, mode)
750 rtx op;
751 enum machine_mode mode;
752 {
753 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
754 || fr_register_operand (op, mode));
755 }
756
757 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
758 POST_MODIFY with a REG as displacement. */
759
760 int
761 destination_operand (op, mode)
762 rtx op;
763 enum machine_mode mode;
764 {
765 if (! nonimmediate_operand (op, mode))
766 return 0;
767 if (GET_CODE (op) == MEM
768 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
769 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
770 return 0;
771 return 1;
772 }
773
774 /* Like memory_operand, but don't allow post-increments. */
775
776 int
777 not_postinc_memory_operand (op, mode)
778 rtx op;
779 enum machine_mode mode;
780 {
781 return (memory_operand (op, mode)
782 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
783 }
784
785 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
786 signed immediate operand. */
787
788 int
789 normal_comparison_operator (op, mode)
790 register rtx op;
791 enum machine_mode mode;
792 {
793 enum rtx_code code = GET_CODE (op);
794 return ((mode == VOIDmode || GET_MODE (op) == mode)
795 && (code == EQ || code == NE
796 || code == GT || code == LE || code == GTU || code == LEU));
797 }
798
799 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
800 signed immediate operand. */
801
802 int
803 adjusted_comparison_operator (op, mode)
804 register rtx op;
805 enum machine_mode mode;
806 {
807 enum rtx_code code = GET_CODE (op);
808 return ((mode == VOIDmode || GET_MODE (op) == mode)
809 && (code == LT || code == GE || code == LTU || code == GEU));
810 }
811
812 /* Return 1 if this is a signed inequality operator. */
813
814 int
815 signed_inequality_operator (op, mode)
816 register rtx op;
817 enum machine_mode mode;
818 {
819 enum rtx_code code = GET_CODE (op);
820 return ((mode == VOIDmode || GET_MODE (op) == mode)
821 && (code == GE || code == GT
822 || code == LE || code == LT));
823 }
824
825 /* Return 1 if this operator is valid for predication. */
826
827 int
828 predicate_operator (op, mode)
829 register rtx op;
830 enum machine_mode mode;
831 {
832 enum rtx_code code = GET_CODE (op);
833 return ((GET_MODE (op) == mode || mode == VOIDmode)
834 && (code == EQ || code == NE));
835 }
836
837 /* Return 1 if this operator can be used in a conditional operation. */
838
839 int
840 condop_operator (op, mode)
841 register rtx op;
842 enum machine_mode mode;
843 {
844 enum rtx_code code = GET_CODE (op);
845 return ((GET_MODE (op) == mode || mode == VOIDmode)
846 && (code == PLUS || code == MINUS || code == AND
847 || code == IOR || code == XOR));
848 }
849
850 /* Return 1 if this is the ar.lc register. */
851
852 int
853 ar_lc_reg_operand (op, mode)
854 register rtx op;
855 enum machine_mode mode;
856 {
857 return (GET_MODE (op) == DImode
858 && (mode == DImode || mode == VOIDmode)
859 && GET_CODE (op) == REG
860 && REGNO (op) == AR_LC_REGNUM);
861 }
862
863 /* Return 1 if this is the ar.ccv register. */
864
865 int
866 ar_ccv_reg_operand (op, mode)
867 register rtx op;
868 enum machine_mode mode;
869 {
870 return ((GET_MODE (op) == mode || mode == VOIDmode)
871 && GET_CODE (op) == REG
872 && REGNO (op) == AR_CCV_REGNUM);
873 }
874
875 /* Return 1 if this is the ar.pfs register. */
876
877 int
878 ar_pfs_reg_operand (op, mode)
879 register rtx op;
880 enum machine_mode mode;
881 {
882 return ((GET_MODE (op) == mode || mode == VOIDmode)
883 && GET_CODE (op) == REG
884 && REGNO (op) == AR_PFS_REGNUM);
885 }
886
887 /* Like general_operand, but don't allow (mem (addressof)). */
888
889 int
890 general_tfmode_operand (op, mode)
891 rtx op;
892 enum machine_mode mode;
893 {
894 if (! general_operand (op, mode))
895 return 0;
896 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
897 return 0;
898 return 1;
899 }
900
901 /* Similarly. */
902
903 int
904 destination_tfmode_operand (op, mode)
905 rtx op;
906 enum machine_mode mode;
907 {
908 if (! destination_operand (op, mode))
909 return 0;
910 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
911 return 0;
912 return 1;
913 }
914
915 /* Similarly. */
916
917 int
918 tfreg_or_fp01_operand (op, mode)
919 rtx op;
920 enum machine_mode mode;
921 {
922 if (GET_CODE (op) == SUBREG)
923 return 0;
924 return fr_reg_or_fp01_operand (op, mode);
925 }
926
927 /* Return 1 if OP is valid as a base register in a reg + offset address. */
928
929 int
930 basereg_operand (op, mode)
931 rtx op;
932 enum machine_mode mode;
933 {
934 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
935 checks from pa.c basereg_operand as well? Seems to be OK without them
936 in test runs. */
937
938 return (register_operand (op, mode) &&
939 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
940 }
941 \f
942 /* Return 1 if the operands of a move are ok. */
943
944 int
945 ia64_move_ok (dst, src)
946 rtx dst, src;
947 {
948 /* If we're under init_recog_no_volatile, we'll not be able to use
949 memory_operand. So check the code directly and don't worry about
950 the validity of the underlying address, which should have been
951 checked elsewhere anyway. */
952 if (GET_CODE (dst) != MEM)
953 return 1;
954 if (GET_CODE (src) == MEM)
955 return 0;
956 if (register_operand (src, VOIDmode))
957 return 1;
958
959 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
960 if (INTEGRAL_MODE_P (GET_MODE (dst)))
961 return src == const0_rtx;
962 else
963 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
964 }
965
966 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
967 Return the length of the field, or <= 0 on failure. */
968
969 int
970 ia64_depz_field_mask (rop, rshift)
971 rtx rop, rshift;
972 {
973 unsigned HOST_WIDE_INT op = INTVAL (rop);
974 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
975
976 /* Get rid of the zero bits we're shifting in. */
977 op >>= shift;
978
979 /* We must now have a solid block of 1's at bit 0. */
980 return exact_log2 (op + 1);
981 }
982
983 /* Expand a symbolic constant load. */
984 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
985
986 void
987 ia64_expand_load_address (dest, src, scratch)
988 rtx dest, src, scratch;
989 {
990 rtx temp;
991
992 /* The destination could be a MEM during initial rtl generation,
993 which isn't a valid destination for the PIC load address patterns. */
994 if (! register_operand (dest, DImode))
995 temp = gen_reg_rtx (DImode);
996 else
997 temp = dest;
998
999 if (tls_symbolic_operand (src, Pmode))
1000 abort ();
1001
1002 if (TARGET_AUTO_PIC)
1003 emit_insn (gen_load_gprel64 (temp, src));
1004 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
1005 emit_insn (gen_load_fptr (temp, src));
1006 else if (sdata_symbolic_operand (src, DImode))
1007 emit_insn (gen_load_gprel (temp, src));
1008 else if (GET_CODE (src) == CONST
1009 && GET_CODE (XEXP (src, 0)) == PLUS
1010 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1011 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1012 {
1013 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1014 rtx sym = XEXP (XEXP (src, 0), 0);
1015 HOST_WIDE_INT ofs, hi, lo;
1016
1017 /* Split the offset into a sign extended 14-bit low part
1018 and a complementary high part. */
1019 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1020 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1021 hi = ofs - lo;
1022
1023 if (! scratch)
1024 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
1025
1026 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
1027 scratch));
1028 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
1029 }
1030 else
1031 {
1032 rtx insn;
1033 if (! scratch)
1034 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1035
1036 insn = emit_insn (gen_load_symptr (temp, src, scratch));
1037 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
1038 }
1039
1040 if (temp != dest)
1041 emit_move_insn (dest, temp);
1042 }
1043
1044 static GTY(()) rtx gen_tls_tga;
1045 static rtx
1046 gen_tls_get_addr ()
1047 {
1048 if (!gen_tls_tga)
1049 {
1050 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1051 }
1052 return gen_tls_tga;
1053 }
1054
1055 static GTY(()) rtx thread_pointer_rtx;
1056 static rtx
1057 gen_thread_pointer ()
1058 {
1059 if (!thread_pointer_rtx)
1060 {
1061 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1062 RTX_UNCHANGING_P (thread_pointer_rtx);
1063 }
1064 return tp;
1065 }
1066
1067 rtx
1068 ia64_expand_move (op0, op1)
1069 rtx op0, op1;
1070 {
1071 enum machine_mode mode = GET_MODE (op0);
1072
1073 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1074 op1 = force_reg (mode, op1);
1075
1076 if (mode == Pmode)
1077 {
1078 enum tls_model tls_kind;
1079 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1080 {
1081 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1082
1083 switch (tls_kind)
1084 {
1085 case TLS_MODEL_GLOBAL_DYNAMIC:
1086 start_sequence ();
1087
1088 tga_op1 = gen_reg_rtx (Pmode);
1089 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1090 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1091 RTX_UNCHANGING_P (tga_op1) = 1;
1092
1093 tga_op2 = gen_reg_rtx (Pmode);
1094 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1095 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1096 RTX_UNCHANGING_P (tga_op2) = 1;
1097
1098 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1099 LCT_CONST, Pmode, 2, tga_op1,
1100 Pmode, tga_op2, Pmode);
1101
1102 insns = get_insns ();
1103 end_sequence ();
1104
1105 emit_libcall_block (insns, op0, tga_ret, op1);
1106 return NULL_RTX;
1107
1108 case TLS_MODEL_LOCAL_DYNAMIC:
1109 /* ??? This isn't the completely proper way to do local-dynamic
1110 If the call to __tls_get_addr is used only by a single symbol,
1111 then we should (somehow) move the dtprel to the second arg
1112 to avoid the extra add. */
1113 start_sequence ();
1114
1115 tga_op1 = gen_reg_rtx (Pmode);
1116 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1117 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1118 RTX_UNCHANGING_P (tga_op1) = 1;
1119
1120 tga_op2 = const0_rtx;
1121
1122 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1123 LCT_CONST, Pmode, 2, tga_op1,
1124 Pmode, tga_op2, Pmode);
1125
1126 insns = get_insns ();
1127 end_sequence ();
1128
1129 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1130 UNSPEC_LD_BASE);
1131 tmp = gen_reg_rtx (Pmode);
1132 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1133
1134 if (register_operand (op0, Pmode))
1135 tga_ret = op0;
1136 else
1137 tga_ret = gen_reg_rtx (Pmode);
1138 if (TARGET_TLS64)
1139 {
1140 emit_insn (gen_load_dtprel (tga_ret, op1));
1141 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1142 }
1143 else
1144 emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1145 if (tga_ret == op0)
1146 return NULL_RTX;
1147 op1 = tga_ret;
1148 break;
1149
1150 case TLS_MODEL_INITIAL_EXEC:
1151 tmp = gen_reg_rtx (Pmode);
1152 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1153 tmp = gen_rtx_MEM (Pmode, tmp);
1154 RTX_UNCHANGING_P (tmp) = 1;
1155 tmp = force_reg (Pmode, tmp);
1156
1157 if (register_operand (op0, Pmode))
1158 op1 = op0;
1159 else
1160 op1 = gen_reg_rtx (Pmode);
1161 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1162 if (op1 == op0)
1163 return NULL_RTX;
1164 break;
1165
1166 case TLS_MODEL_LOCAL_EXEC:
1167 if (register_operand (op0, Pmode))
1168 tmp = op0;
1169 else
1170 tmp = gen_reg_rtx (Pmode);
1171 if (TARGET_TLS64)
1172 {
1173 emit_insn (gen_load_tprel (tmp, op1));
1174 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1175 }
1176 else
1177 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1178 if (tmp == op0)
1179 return NULL_RTX;
1180 op1 = tmp;
1181 break;
1182
1183 default:
1184 abort ();
1185 }
1186 }
1187 else if (!TARGET_NO_PIC && symbolic_operand (op1, DImode))
1188 {
1189 /* Before optimization starts, delay committing to any particular
1190 type of PIC address load. If this function gets deferred, we
1191 may acquire information that changes the value of the
1192 sdata_symbolic_operand predicate.
1193
1194 But don't delay for function pointers. Loading a function address
1195 actually loads the address of the descriptor not the function.
1196 If we represent these as SYMBOL_REFs, then they get cse'd with
1197 calls, and we end up with calls to the descriptor address instead
1198 of calls to the function address. Functions are not candidates
1199 for sdata anyways.
1200
1201 Don't delay for LABEL_REF because the splitter loses REG_LABEL
1202 notes. Don't delay for pool addresses on general principals;
1203 they'll never become non-local behind our back. */
1204
1205 if (rtx_equal_function_value_matters
1206 && GET_CODE (op1) != LABEL_REF
1207 && ! (GET_CODE (op1) == SYMBOL_REF
1208 && (SYMBOL_REF_FLAG (op1)
1209 || CONSTANT_POOL_ADDRESS_P (op1)
1210 || STRING_POOL_ADDRESS_P (op1))))
1211 emit_insn (gen_movdi_symbolic (op0, op1));
1212 else
1213 ia64_expand_load_address (op0, op1, NULL_RTX);
1214 return NULL_RTX;
1215 }
1216 }
1217
1218 return op1;
1219 }
1220
1221 rtx
1222 ia64_gp_save_reg (setjmp_p)
1223 int setjmp_p;
1224 {
1225 rtx save = cfun->machine->ia64_gp_save;
1226
1227 if (save != NULL)
1228 {
1229 /* We can't save GP in a pseudo if we are calling setjmp, because
1230 pseudos won't be restored by longjmp. For now, we save it in r4. */
1231 /* ??? It would be more efficient to save this directly into a stack
1232 slot. Unfortunately, the stack slot address gets cse'd across
1233 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
1234 place. */
1235
1236 /* ??? Get the barf bag, Virginia. We've got to replace this thing
1237 in place, since this rtx is used in exception handling receivers.
1238 Moreover, we must get this rtx out of regno_reg_rtx or reload
1239 will do the wrong thing. */
1240 unsigned int old_regno = REGNO (save);
1241 if (setjmp_p && old_regno != GR_REG (4))
1242 {
1243 REGNO (save) = GR_REG (4);
1244 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
1245 }
1246 }
1247 else
1248 {
1249 if (setjmp_p)
1250 save = gen_rtx_REG (DImode, GR_REG (4));
1251 else if (! optimize)
1252 save = gen_rtx_REG (DImode, LOC_REG (0));
1253 else
1254 save = gen_reg_rtx (DImode);
1255 cfun->machine->ia64_gp_save = save;
1256 }
1257
1258 return save;
1259 }
1260
1261 /* Split a post-reload TImode reference into two DImode components. */
1262
1263 rtx
1264 ia64_split_timode (out, in, scratch)
1265 rtx out[2];
1266 rtx in, scratch;
1267 {
1268 switch (GET_CODE (in))
1269 {
1270 case REG:
1271 out[0] = gen_rtx_REG (DImode, REGNO (in));
1272 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1273 return NULL_RTX;
1274
1275 case MEM:
1276 {
1277 rtx base = XEXP (in, 0);
1278
1279 switch (GET_CODE (base))
1280 {
1281 case REG:
1282 out[0] = adjust_address (in, DImode, 0);
1283 break;
1284 case POST_MODIFY:
1285 base = XEXP (base, 0);
1286 out[0] = adjust_address (in, DImode, 0);
1287 break;
1288
1289 /* Since we're changing the mode, we need to change to POST_MODIFY
1290 as well to preserve the size of the increment. Either that or
1291 do the update in two steps, but we've already got this scratch
1292 register handy so let's use it. */
1293 case POST_INC:
1294 base = XEXP (base, 0);
1295 out[0]
1296 = change_address (in, DImode,
1297 gen_rtx_POST_MODIFY
1298 (Pmode, base, plus_constant (base, 16)));
1299 break;
1300 case POST_DEC:
1301 base = XEXP (base, 0);
1302 out[0]
1303 = change_address (in, DImode,
1304 gen_rtx_POST_MODIFY
1305 (Pmode, base, plus_constant (base, -16)));
1306 break;
1307 default:
1308 abort ();
1309 }
1310
1311 if (scratch == NULL_RTX)
1312 abort ();
1313 out[1] = change_address (in, DImode, scratch);
1314 return gen_adddi3 (scratch, base, GEN_INT (8));
1315 }
1316
1317 case CONST_INT:
1318 case CONST_DOUBLE:
1319 split_double (in, &out[0], &out[1]);
1320 return NULL_RTX;
1321
1322 default:
1323 abort ();
1324 }
1325 }
1326
1327 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1328 through memory plus an extra GR scratch register. Except that you can
1329 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1330 SECONDARY_RELOAD_CLASS, but not both.
1331
1332 We got into problems in the first place by allowing a construct like
1333 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1334 This solution attempts to prevent this situation from occurring. When
1335 we see something like the above, we spill the inner register to memory. */
1336
1337 rtx
1338 spill_tfmode_operand (in, force)
1339 rtx in;
1340 int force;
1341 {
1342 if (GET_CODE (in) == SUBREG
1343 && GET_MODE (SUBREG_REG (in)) == TImode
1344 && GET_CODE (SUBREG_REG (in)) == REG)
1345 {
1346 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1347 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1348 }
1349 else if (force && GET_CODE (in) == REG)
1350 {
1351 rtx mem = gen_mem_addressof (in, NULL_TREE);
1352 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1353 }
1354 else if (GET_CODE (in) == MEM
1355 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1356 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1357 else
1358 return in;
1359 }
1360
1361 /* Emit comparison instruction if necessary, returning the expression
1362 that holds the compare result in the proper mode. */
1363
1364 rtx
1365 ia64_expand_compare (code, mode)
1366 enum rtx_code code;
1367 enum machine_mode mode;
1368 {
1369 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1370 rtx cmp;
1371
1372 /* If we have a BImode input, then we already have a compare result, and
1373 do not need to emit another comparison. */
1374 if (GET_MODE (op0) == BImode)
1375 {
1376 if ((code == NE || code == EQ) && op1 == const0_rtx)
1377 cmp = op0;
1378 else
1379 abort ();
1380 }
1381 else
1382 {
1383 cmp = gen_reg_rtx (BImode);
1384 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1385 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1386 code = NE;
1387 }
1388
1389 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1390 }
1391
1392 /* Emit the appropriate sequence for a call. */
1393
1394 void
1395 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1396 rtx retval;
1397 rtx addr;
1398 rtx nextarg;
1399 int sibcall_p;
1400 {
1401 rtx insn, b0, pfs, gp_save, narg_rtx, dest;
1402 bool indirect_p;
1403 int narg;
1404
1405 addr = XEXP (addr, 0);
1406 b0 = gen_rtx_REG (DImode, R_BR (0));
1407 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1408
1409 if (! nextarg)
1410 narg = 0;
1411 else if (IN_REGNO_P (REGNO (nextarg)))
1412 narg = REGNO (nextarg) - IN_REG (0);
1413 else
1414 narg = REGNO (nextarg) - OUT_REG (0);
1415 narg_rtx = GEN_INT (narg);
1416
1417 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1418 {
1419 if (sibcall_p)
1420 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1421 else if (! retval)
1422 insn = gen_call_nopic (addr, narg_rtx, b0);
1423 else
1424 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1425 emit_call_insn (insn);
1426 return;
1427 }
1428
1429 indirect_p = ! symbolic_operand (addr, VOIDmode);
1430
1431 if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
1432 gp_save = NULL_RTX;
1433 else
1434 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1435
1436 if (gp_save)
1437 emit_move_insn (gp_save, pic_offset_table_rtx);
1438
1439 /* If this is an indirect call, then we have the address of a descriptor. */
1440 if (indirect_p)
1441 {
1442 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1443 emit_move_insn (pic_offset_table_rtx,
1444 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1445 }
1446 else
1447 dest = addr;
1448
1449 if (sibcall_p)
1450 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1451 else if (! retval)
1452 insn = gen_call_pic (dest, narg_rtx, b0);
1453 else
1454 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1455 emit_call_insn (insn);
1456
1457 if (gp_save)
1458 emit_move_insn (pic_offset_table_rtx, gp_save);
1459 }
1460 \f
1461 /* Begin the assembly file. */
1462
1463 void
1464 emit_safe_across_calls (f)
1465 FILE *f;
1466 {
1467 unsigned int rs, re;
1468 int out_state;
1469
1470 rs = 1;
1471 out_state = 0;
1472 while (1)
1473 {
1474 while (rs < 64 && call_used_regs[PR_REG (rs)])
1475 rs++;
1476 if (rs >= 64)
1477 break;
1478 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1479 continue;
1480 if (out_state == 0)
1481 {
1482 fputs ("\t.pred.safe_across_calls ", f);
1483 out_state = 1;
1484 }
1485 else
1486 fputc (',', f);
1487 if (re == rs + 1)
1488 fprintf (f, "p%u", rs);
1489 else
1490 fprintf (f, "p%u-p%u", rs, re - 1);
1491 rs = re + 1;
1492 }
1493 if (out_state)
1494 fputc ('\n', f);
1495 }
1496
1497
1498 /* Structure to be filled in by ia64_compute_frame_size with register
1499 save masks and offsets for the current function. */
1500
1501 struct ia64_frame_info
1502 {
1503 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1504 the caller's scratch area. */
1505 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1506 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1507 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1508 HARD_REG_SET mask; /* mask of saved registers. */
1509 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1510 registers or long-term scratches. */
1511 int n_spilled; /* number of spilled registers. */
1512 int reg_fp; /* register for fp. */
1513 int reg_save_b0; /* save register for b0. */
1514 int reg_save_pr; /* save register for prs. */
1515 int reg_save_ar_pfs; /* save register for ar.pfs. */
1516 int reg_save_ar_unat; /* save register for ar.unat. */
1517 int reg_save_ar_lc; /* save register for ar.lc. */
1518 int n_input_regs; /* number of input registers used. */
1519 int n_local_regs; /* number of local registers used. */
1520 int n_output_regs; /* number of output registers used. */
1521 int n_rotate_regs; /* number of rotating registers used. */
1522
1523 char need_regstk; /* true if a .regstk directive needed. */
1524 char initialized; /* true if the data is finalized. */
1525 };
1526
1527 /* Current frame information calculated by ia64_compute_frame_size. */
1528 static struct ia64_frame_info current_frame_info;
1529
1530 /* Helper function for ia64_compute_frame_size: find an appropriate general
1531 register to spill some special register to. SPECIAL_SPILL_MASK contains
1532 bits in GR0 to GR31 that have already been allocated by this routine.
1533 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1534
1535 static int
1536 find_gr_spill (try_locals)
1537 int try_locals;
1538 {
1539 int regno;
1540
1541 /* If this is a leaf function, first try an otherwise unused
1542 call-clobbered register. */
1543 if (current_function_is_leaf)
1544 {
1545 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1546 if (! regs_ever_live[regno]
1547 && call_used_regs[regno]
1548 && ! fixed_regs[regno]
1549 && ! global_regs[regno]
1550 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1551 {
1552 current_frame_info.gr_used_mask |= 1 << regno;
1553 return regno;
1554 }
1555 }
1556
1557 if (try_locals)
1558 {
1559 regno = current_frame_info.n_local_regs;
1560 /* If there is a frame pointer, then we can't use loc79, because
1561 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1562 reg_name switching code in ia64_expand_prologue. */
1563 if (regno < (80 - frame_pointer_needed))
1564 {
1565 current_frame_info.n_local_regs = regno + 1;
1566 return LOC_REG (0) + regno;
1567 }
1568 }
1569
1570 /* Failed to find a general register to spill to. Must use stack. */
1571 return 0;
1572 }
1573
1574 /* In order to make for nice schedules, we try to allocate every temporary
1575 to a different register. We must of course stay away from call-saved,
1576 fixed, and global registers. We must also stay away from registers
1577 allocated in current_frame_info.gr_used_mask, since those include regs
1578 used all through the prologue.
1579
1580 Any register allocated here must be used immediately. The idea is to
1581 aid scheduling, not to solve data flow problems. */
1582
1583 static int last_scratch_gr_reg;
1584
1585 static int
1586 next_scratch_gr_reg ()
1587 {
1588 int i, regno;
1589
1590 for (i = 0; i < 32; ++i)
1591 {
1592 regno = (last_scratch_gr_reg + i + 1) & 31;
1593 if (call_used_regs[regno]
1594 && ! fixed_regs[regno]
1595 && ! global_regs[regno]
1596 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1597 {
1598 last_scratch_gr_reg = regno;
1599 return regno;
1600 }
1601 }
1602
1603 /* There must be _something_ available. */
1604 abort ();
1605 }
1606
1607 /* Helper function for ia64_compute_frame_size, called through
1608 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1609
1610 static void
1611 mark_reg_gr_used_mask (reg, data)
1612 rtx reg;
1613 void *data ATTRIBUTE_UNUSED;
1614 {
1615 unsigned int regno = REGNO (reg);
1616 if (regno < 32)
1617 {
1618 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1619 for (i = 0; i < n; ++i)
1620 current_frame_info.gr_used_mask |= 1 << (regno + i);
1621 }
1622 }
1623
1624 /* Returns the number of bytes offset between the frame pointer and the stack
1625 pointer for the current function. SIZE is the number of bytes of space
1626 needed for local variables. */
1627
1628 static void
1629 ia64_compute_frame_size (size)
1630 HOST_WIDE_INT size;
1631 {
1632 HOST_WIDE_INT total_size;
1633 HOST_WIDE_INT spill_size = 0;
1634 HOST_WIDE_INT extra_spill_size = 0;
1635 HOST_WIDE_INT pretend_args_size;
1636 HARD_REG_SET mask;
1637 int n_spilled = 0;
1638 int spilled_gr_p = 0;
1639 int spilled_fr_p = 0;
1640 unsigned int regno;
1641 int i;
1642
1643 if (current_frame_info.initialized)
1644 return;
1645
1646 memset (&current_frame_info, 0, sizeof current_frame_info);
1647 CLEAR_HARD_REG_SET (mask);
1648
1649 /* Don't allocate scratches to the return register. */
1650 diddle_return_value (mark_reg_gr_used_mask, NULL);
1651
1652 /* Don't allocate scratches to the EH scratch registers. */
1653 if (cfun->machine->ia64_eh_epilogue_sp)
1654 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1655 if (cfun->machine->ia64_eh_epilogue_bsp)
1656 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1657
1658 /* Find the size of the register stack frame. We have only 80 local
1659 registers, because we reserve 8 for the inputs and 8 for the
1660 outputs. */
1661
1662 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1663 since we'll be adjusting that down later. */
1664 regno = LOC_REG (78) + ! frame_pointer_needed;
1665 for (; regno >= LOC_REG (0); regno--)
1666 if (regs_ever_live[regno])
1667 break;
1668 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1669
1670 /* For functions marked with the syscall_linkage attribute, we must mark
1671 all eight input registers as in use, so that locals aren't visible to
1672 the caller. */
1673
1674 if (cfun->machine->n_varargs > 0
1675 || lookup_attribute ("syscall_linkage",
1676 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1677 current_frame_info.n_input_regs = 8;
1678 else
1679 {
1680 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1681 if (regs_ever_live[regno])
1682 break;
1683 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1684 }
1685
1686 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1687 if (regs_ever_live[regno])
1688 break;
1689 i = regno - OUT_REG (0) + 1;
1690
1691 /* When -p profiling, we need one output register for the mcount argument.
1692 Likwise for -a profiling for the bb_init_func argument. For -ax
1693 profiling, we need two output registers for the two bb_init_trace_func
1694 arguments. */
1695 if (current_function_profile)
1696 i = MAX (i, 1);
1697 current_frame_info.n_output_regs = i;
1698
1699 /* ??? No rotating register support yet. */
1700 current_frame_info.n_rotate_regs = 0;
1701
1702 /* Discover which registers need spilling, and how much room that
1703 will take. Begin with floating point and general registers,
1704 which will always wind up on the stack. */
1705
1706 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1707 if (regs_ever_live[regno] && ! call_used_regs[regno])
1708 {
1709 SET_HARD_REG_BIT (mask, regno);
1710 spill_size += 16;
1711 n_spilled += 1;
1712 spilled_fr_p = 1;
1713 }
1714
1715 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1716 if (regs_ever_live[regno] && ! call_used_regs[regno])
1717 {
1718 SET_HARD_REG_BIT (mask, regno);
1719 spill_size += 8;
1720 n_spilled += 1;
1721 spilled_gr_p = 1;
1722 }
1723
1724 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1725 if (regs_ever_live[regno] && ! call_used_regs[regno])
1726 {
1727 SET_HARD_REG_BIT (mask, regno);
1728 spill_size += 8;
1729 n_spilled += 1;
1730 }
1731
1732 /* Now come all special registers that might get saved in other
1733 general registers. */
1734
1735 if (frame_pointer_needed)
1736 {
1737 current_frame_info.reg_fp = find_gr_spill (1);
1738 /* If we did not get a register, then we take LOC79. This is guaranteed
1739 to be free, even if regs_ever_live is already set, because this is
1740 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1741 as we don't count loc79 above. */
1742 if (current_frame_info.reg_fp == 0)
1743 {
1744 current_frame_info.reg_fp = LOC_REG (79);
1745 current_frame_info.n_local_regs++;
1746 }
1747 }
1748
1749 if (! current_function_is_leaf)
1750 {
1751 /* Emit a save of BR0 if we call other functions. Do this even
1752 if this function doesn't return, as EH depends on this to be
1753 able to unwind the stack. */
1754 SET_HARD_REG_BIT (mask, BR_REG (0));
1755
1756 current_frame_info.reg_save_b0 = find_gr_spill (1);
1757 if (current_frame_info.reg_save_b0 == 0)
1758 {
1759 spill_size += 8;
1760 n_spilled += 1;
1761 }
1762
1763 /* Similarly for ar.pfs. */
1764 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1765 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1766 if (current_frame_info.reg_save_ar_pfs == 0)
1767 {
1768 extra_spill_size += 8;
1769 n_spilled += 1;
1770 }
1771 }
1772 else
1773 {
1774 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1775 {
1776 SET_HARD_REG_BIT (mask, BR_REG (0));
1777 spill_size += 8;
1778 n_spilled += 1;
1779 }
1780 }
1781
1782 /* Unwind descriptor hackery: things are most efficient if we allocate
1783 consecutive GR save registers for RP, PFS, FP in that order. However,
1784 it is absolutely critical that FP get the only hard register that's
1785 guaranteed to be free, so we allocated it first. If all three did
1786 happen to be allocated hard regs, and are consecutive, rearrange them
1787 into the preferred order now. */
1788 if (current_frame_info.reg_fp != 0
1789 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1790 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1791 {
1792 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1793 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1794 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1795 }
1796
1797 /* See if we need to store the predicate register block. */
1798 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1799 if (regs_ever_live[regno] && ! call_used_regs[regno])
1800 break;
1801 if (regno <= PR_REG (63))
1802 {
1803 SET_HARD_REG_BIT (mask, PR_REG (0));
1804 current_frame_info.reg_save_pr = find_gr_spill (1);
1805 if (current_frame_info.reg_save_pr == 0)
1806 {
1807 extra_spill_size += 8;
1808 n_spilled += 1;
1809 }
1810
1811 /* ??? Mark them all as used so that register renaming and such
1812 are free to use them. */
1813 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1814 regs_ever_live[regno] = 1;
1815 }
1816
1817 /* If we're forced to use st8.spill, we're forced to save and restore
1818 ar.unat as well. */
1819 if (spilled_gr_p || cfun->machine->n_varargs)
1820 {
1821 regs_ever_live[AR_UNAT_REGNUM] = 1;
1822 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1823 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1824 if (current_frame_info.reg_save_ar_unat == 0)
1825 {
1826 extra_spill_size += 8;
1827 n_spilled += 1;
1828 }
1829 }
1830
1831 if (regs_ever_live[AR_LC_REGNUM])
1832 {
1833 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1834 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1835 if (current_frame_info.reg_save_ar_lc == 0)
1836 {
1837 extra_spill_size += 8;
1838 n_spilled += 1;
1839 }
1840 }
1841
1842 /* If we have an odd number of words of pretend arguments written to
1843 the stack, then the FR save area will be unaligned. We round the
1844 size of this area up to keep things 16 byte aligned. */
1845 if (spilled_fr_p)
1846 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1847 else
1848 pretend_args_size = current_function_pretend_args_size;
1849
1850 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1851 + current_function_outgoing_args_size);
1852 total_size = IA64_STACK_ALIGN (total_size);
1853
1854 /* We always use the 16-byte scratch area provided by the caller, but
1855 if we are a leaf function, there's no one to which we need to provide
1856 a scratch area. */
1857 if (current_function_is_leaf)
1858 total_size = MAX (0, total_size - 16);
1859
1860 current_frame_info.total_size = total_size;
1861 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1862 current_frame_info.spill_size = spill_size;
1863 current_frame_info.extra_spill_size = extra_spill_size;
1864 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1865 current_frame_info.n_spilled = n_spilled;
1866 current_frame_info.initialized = reload_completed;
1867 }
1868
1869 /* Compute the initial difference between the specified pair of registers. */
1870
1871 HOST_WIDE_INT
1872 ia64_initial_elimination_offset (from, to)
1873 int from, to;
1874 {
1875 HOST_WIDE_INT offset;
1876
1877 ia64_compute_frame_size (get_frame_size ());
1878 switch (from)
1879 {
1880 case FRAME_POINTER_REGNUM:
1881 if (to == HARD_FRAME_POINTER_REGNUM)
1882 {
1883 if (current_function_is_leaf)
1884 offset = -current_frame_info.total_size;
1885 else
1886 offset = -(current_frame_info.total_size
1887 - current_function_outgoing_args_size - 16);
1888 }
1889 else if (to == STACK_POINTER_REGNUM)
1890 {
1891 if (current_function_is_leaf)
1892 offset = 0;
1893 else
1894 offset = 16 + current_function_outgoing_args_size;
1895 }
1896 else
1897 abort ();
1898 break;
1899
1900 case ARG_POINTER_REGNUM:
1901 /* Arguments start above the 16 byte save area, unless stdarg
1902 in which case we store through the 16 byte save area. */
1903 if (to == HARD_FRAME_POINTER_REGNUM)
1904 offset = 16 - current_function_pretend_args_size;
1905 else if (to == STACK_POINTER_REGNUM)
1906 offset = (current_frame_info.total_size
1907 + 16 - current_function_pretend_args_size);
1908 else
1909 abort ();
1910 break;
1911
1912 case RETURN_ADDRESS_POINTER_REGNUM:
1913 offset = 0;
1914 break;
1915
1916 default:
1917 abort ();
1918 }
1919
1920 return offset;
1921 }
1922
1923 /* If there are more than a trivial number of register spills, we use
1924 two interleaved iterators so that we can get two memory references
1925 per insn group.
1926
1927 In order to simplify things in the prologue and epilogue expanders,
1928 we use helper functions to fix up the memory references after the
1929 fact with the appropriate offsets to a POST_MODIFY memory mode.
1930 The following data structure tracks the state of the two iterators
1931 while insns are being emitted. */
1932
1933 struct spill_fill_data
1934 {
1935 rtx init_after; /* point at which to emit initializations */
1936 rtx init_reg[2]; /* initial base register */
1937 rtx iter_reg[2]; /* the iterator registers */
1938 rtx *prev_addr[2]; /* address of last memory use */
1939 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1940 HOST_WIDE_INT prev_off[2]; /* last offset */
1941 int n_iter; /* number of iterators in use */
1942 int next_iter; /* next iterator to use */
1943 unsigned int save_gr_used_mask;
1944 };
1945
1946 static struct spill_fill_data spill_fill_data;
1947
1948 static void
1949 setup_spill_pointers (n_spills, init_reg, cfa_off)
1950 int n_spills;
1951 rtx init_reg;
1952 HOST_WIDE_INT cfa_off;
1953 {
1954 int i;
1955
1956 spill_fill_data.init_after = get_last_insn ();
1957 spill_fill_data.init_reg[0] = init_reg;
1958 spill_fill_data.init_reg[1] = init_reg;
1959 spill_fill_data.prev_addr[0] = NULL;
1960 spill_fill_data.prev_addr[1] = NULL;
1961 spill_fill_data.prev_insn[0] = NULL;
1962 spill_fill_data.prev_insn[1] = NULL;
1963 spill_fill_data.prev_off[0] = cfa_off;
1964 spill_fill_data.prev_off[1] = cfa_off;
1965 spill_fill_data.next_iter = 0;
1966 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1967
1968 spill_fill_data.n_iter = 1 + (n_spills > 2);
1969 for (i = 0; i < spill_fill_data.n_iter; ++i)
1970 {
1971 int regno = next_scratch_gr_reg ();
1972 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1973 current_frame_info.gr_used_mask |= 1 << regno;
1974 }
1975 }
1976
1977 static void
1978 finish_spill_pointers ()
1979 {
1980 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1981 }
1982
1983 static rtx
1984 spill_restore_mem (reg, cfa_off)
1985 rtx reg;
1986 HOST_WIDE_INT cfa_off;
1987 {
1988 int iter = spill_fill_data.next_iter;
1989 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1990 rtx disp_rtx = GEN_INT (disp);
1991 rtx mem;
1992
1993 if (spill_fill_data.prev_addr[iter])
1994 {
1995 if (CONST_OK_FOR_N (disp))
1996 {
1997 *spill_fill_data.prev_addr[iter]
1998 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1999 gen_rtx_PLUS (DImode,
2000 spill_fill_data.iter_reg[iter],
2001 disp_rtx));
2002 REG_NOTES (spill_fill_data.prev_insn[iter])
2003 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2004 REG_NOTES (spill_fill_data.prev_insn[iter]));
2005 }
2006 else
2007 {
2008 /* ??? Could use register post_modify for loads. */
2009 if (! CONST_OK_FOR_I (disp))
2010 {
2011 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2012 emit_move_insn (tmp, disp_rtx);
2013 disp_rtx = tmp;
2014 }
2015 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2016 spill_fill_data.iter_reg[iter], disp_rtx));
2017 }
2018 }
2019 /* Micro-optimization: if we've created a frame pointer, it's at
2020 CFA 0, which may allow the real iterator to be initialized lower,
2021 slightly increasing parallelism. Also, if there are few saves
2022 it may eliminate the iterator entirely. */
2023 else if (disp == 0
2024 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2025 && frame_pointer_needed)
2026 {
2027 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2028 set_mem_alias_set (mem, get_varargs_alias_set ());
2029 return mem;
2030 }
2031 else
2032 {
2033 rtx seq, insn;
2034
2035 if (disp == 0)
2036 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2037 spill_fill_data.init_reg[iter]);
2038 else
2039 {
2040 start_sequence ();
2041
2042 if (! CONST_OK_FOR_I (disp))
2043 {
2044 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2045 emit_move_insn (tmp, disp_rtx);
2046 disp_rtx = tmp;
2047 }
2048
2049 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2050 spill_fill_data.init_reg[iter],
2051 disp_rtx));
2052
2053 seq = gen_sequence ();
2054 end_sequence ();
2055 }
2056
2057 /* Careful for being the first insn in a sequence. */
2058 if (spill_fill_data.init_after)
2059 insn = emit_insn_after (seq, spill_fill_data.init_after);
2060 else
2061 {
2062 rtx first = get_insns ();
2063 if (first)
2064 insn = emit_insn_before (seq, first);
2065 else
2066 insn = emit_insn (seq);
2067 }
2068 spill_fill_data.init_after = insn;
2069
2070 /* If DISP is 0, we may or may not have a further adjustment
2071 afterward. If we do, then the load/store insn may be modified
2072 to be a post-modify. If we don't, then this copy may be
2073 eliminated by copyprop_hardreg_forward, which makes this
2074 insn garbage, which runs afoul of the sanity check in
2075 propagate_one_insn. So mark this insn as legal to delete. */
2076 if (disp == 0)
2077 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2078 REG_NOTES (insn));
2079 }
2080
2081 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2082
2083 /* ??? Not all of the spills are for varargs, but some of them are.
2084 The rest of the spills belong in an alias set of their own. But
2085 it doesn't actually hurt to include them here. */
2086 set_mem_alias_set (mem, get_varargs_alias_set ());
2087
2088 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2089 spill_fill_data.prev_off[iter] = cfa_off;
2090
2091 if (++iter >= spill_fill_data.n_iter)
2092 iter = 0;
2093 spill_fill_data.next_iter = iter;
2094
2095 return mem;
2096 }
2097
2098 static void
2099 do_spill (move_fn, reg, cfa_off, frame_reg)
2100 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2101 rtx reg, frame_reg;
2102 HOST_WIDE_INT cfa_off;
2103 {
2104 int iter = spill_fill_data.next_iter;
2105 rtx mem, insn;
2106
2107 mem = spill_restore_mem (reg, cfa_off);
2108 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2109 spill_fill_data.prev_insn[iter] = insn;
2110
2111 if (frame_reg)
2112 {
2113 rtx base;
2114 HOST_WIDE_INT off;
2115
2116 RTX_FRAME_RELATED_P (insn) = 1;
2117
2118 /* Don't even pretend that the unwind code can intuit its way
2119 through a pair of interleaved post_modify iterators. Just
2120 provide the correct answer. */
2121
2122 if (frame_pointer_needed)
2123 {
2124 base = hard_frame_pointer_rtx;
2125 off = - cfa_off;
2126 }
2127 else
2128 {
2129 base = stack_pointer_rtx;
2130 off = current_frame_info.total_size - cfa_off;
2131 }
2132
2133 REG_NOTES (insn)
2134 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2135 gen_rtx_SET (VOIDmode,
2136 gen_rtx_MEM (GET_MODE (reg),
2137 plus_constant (base, off)),
2138 frame_reg),
2139 REG_NOTES (insn));
2140 }
2141 }
2142
2143 static void
2144 do_restore (move_fn, reg, cfa_off)
2145 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2146 rtx reg;
2147 HOST_WIDE_INT cfa_off;
2148 {
2149 int iter = spill_fill_data.next_iter;
2150 rtx insn;
2151
2152 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2153 GEN_INT (cfa_off)));
2154 spill_fill_data.prev_insn[iter] = insn;
2155 }
2156
2157 /* Wrapper functions that discards the CONST_INT spill offset. These
2158 exist so that we can give gr_spill/gr_fill the offset they need and
2159 use a consistant function interface. */
2160
2161 static rtx
2162 gen_movdi_x (dest, src, offset)
2163 rtx dest, src;
2164 rtx offset ATTRIBUTE_UNUSED;
2165 {
2166 return gen_movdi (dest, src);
2167 }
2168
2169 static rtx
2170 gen_fr_spill_x (dest, src, offset)
2171 rtx dest, src;
2172 rtx offset ATTRIBUTE_UNUSED;
2173 {
2174 return gen_fr_spill (dest, src);
2175 }
2176
2177 static rtx
2178 gen_fr_restore_x (dest, src, offset)
2179 rtx dest, src;
2180 rtx offset ATTRIBUTE_UNUSED;
2181 {
2182 return gen_fr_restore (dest, src);
2183 }
2184
2185 /* Called after register allocation to add any instructions needed for the
2186 prologue. Using a prologue insn is favored compared to putting all of the
2187 instructions in output_function_prologue(), since it allows the scheduler
2188 to intermix instructions with the saves of the caller saved registers. In
2189 some cases, it might be necessary to emit a barrier instruction as the last
2190 insn to prevent such scheduling.
2191
2192 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2193 so that the debug info generation code can handle them properly.
2194
2195 The register save area is layed out like so:
2196 cfa+16
2197 [ varargs spill area ]
2198 [ fr register spill area ]
2199 [ br register spill area ]
2200 [ ar register spill area ]
2201 [ pr register spill area ]
2202 [ gr register spill area ] */
2203
2204 /* ??? Get inefficient code when the frame size is larger than can fit in an
2205 adds instruction. */
2206
2207 void
2208 ia64_expand_prologue ()
2209 {
2210 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2211 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2212 rtx reg, alt_reg;
2213
2214 ia64_compute_frame_size (get_frame_size ());
2215 last_scratch_gr_reg = 15;
2216
2217 /* If there is no epilogue, then we don't need some prologue insns.
2218 We need to avoid emitting the dead prologue insns, because flow
2219 will complain about them. */
2220 if (optimize)
2221 {
2222 edge e;
2223
2224 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2225 if ((e->flags & EDGE_FAKE) == 0
2226 && (e->flags & EDGE_FALLTHRU) != 0)
2227 break;
2228 epilogue_p = (e != NULL);
2229 }
2230 else
2231 epilogue_p = 1;
2232
2233 /* Set the local, input, and output register names. We need to do this
2234 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2235 half. If we use in/loc/out register names, then we get assembler errors
2236 in crtn.S because there is no alloc insn or regstk directive in there. */
2237 if (! TARGET_REG_NAMES)
2238 {
2239 int inputs = current_frame_info.n_input_regs;
2240 int locals = current_frame_info.n_local_regs;
2241 int outputs = current_frame_info.n_output_regs;
2242
2243 for (i = 0; i < inputs; i++)
2244 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2245 for (i = 0; i < locals; i++)
2246 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2247 for (i = 0; i < outputs; i++)
2248 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2249 }
2250
2251 /* Set the frame pointer register name. The regnum is logically loc79,
2252 but of course we'll not have allocated that many locals. Rather than
2253 worrying about renumbering the existing rtxs, we adjust the name. */
2254 /* ??? This code means that we can never use one local register when
2255 there is a frame pointer. loc79 gets wasted in this case, as it is
2256 renamed to a register that will never be used. See also the try_locals
2257 code in find_gr_spill. */
2258 if (current_frame_info.reg_fp)
2259 {
2260 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2261 reg_names[HARD_FRAME_POINTER_REGNUM]
2262 = reg_names[current_frame_info.reg_fp];
2263 reg_names[current_frame_info.reg_fp] = tmp;
2264 }
2265
2266 /* Fix up the return address placeholder. */
2267 /* ??? We can fail if __builtin_return_address is used, and we didn't
2268 allocate a register in which to save b0. I can't think of a way to
2269 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2270 then be sure that I got the right one. Further, reload doesn't seem
2271 to care if an eliminable register isn't used, and "eliminates" it
2272 anyway. */
2273 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2274 && current_frame_info.reg_save_b0 != 0)
2275 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2276
2277 /* We don't need an alloc instruction if we've used no outputs or locals. */
2278 if (current_frame_info.n_local_regs == 0
2279 && current_frame_info.n_output_regs == 0
2280 && current_frame_info.n_input_regs <= current_function_args_info.int_regs)
2281 {
2282 /* If there is no alloc, but there are input registers used, then we
2283 need a .regstk directive. */
2284 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2285 ar_pfs_save_reg = NULL_RTX;
2286 }
2287 else
2288 {
2289 current_frame_info.need_regstk = 0;
2290
2291 if (current_frame_info.reg_save_ar_pfs)
2292 regno = current_frame_info.reg_save_ar_pfs;
2293 else
2294 regno = next_scratch_gr_reg ();
2295 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2296
2297 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2298 GEN_INT (current_frame_info.n_input_regs),
2299 GEN_INT (current_frame_info.n_local_regs),
2300 GEN_INT (current_frame_info.n_output_regs),
2301 GEN_INT (current_frame_info.n_rotate_regs)));
2302 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2303 }
2304
2305 /* Set up frame pointer, stack pointer, and spill iterators. */
2306
2307 n_varargs = cfun->machine->n_varargs;
2308 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2309 stack_pointer_rtx, 0);
2310
2311 if (frame_pointer_needed)
2312 {
2313 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2314 RTX_FRAME_RELATED_P (insn) = 1;
2315 }
2316
2317 if (current_frame_info.total_size != 0)
2318 {
2319 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2320 rtx offset;
2321
2322 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2323 offset = frame_size_rtx;
2324 else
2325 {
2326 regno = next_scratch_gr_reg ();
2327 offset = gen_rtx_REG (DImode, regno);
2328 emit_move_insn (offset, frame_size_rtx);
2329 }
2330
2331 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2332 stack_pointer_rtx, offset));
2333
2334 if (! frame_pointer_needed)
2335 {
2336 RTX_FRAME_RELATED_P (insn) = 1;
2337 if (GET_CODE (offset) != CONST_INT)
2338 {
2339 REG_NOTES (insn)
2340 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2341 gen_rtx_SET (VOIDmode,
2342 stack_pointer_rtx,
2343 gen_rtx_PLUS (DImode,
2344 stack_pointer_rtx,
2345 frame_size_rtx)),
2346 REG_NOTES (insn));
2347 }
2348 }
2349
2350 /* ??? At this point we must generate a magic insn that appears to
2351 modify the stack pointer, the frame pointer, and all spill
2352 iterators. This would allow the most scheduling freedom. For
2353 now, just hard stop. */
2354 emit_insn (gen_blockage ());
2355 }
2356
2357 /* Must copy out ar.unat before doing any integer spills. */
2358 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2359 {
2360 if (current_frame_info.reg_save_ar_unat)
2361 ar_unat_save_reg
2362 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2363 else
2364 {
2365 alt_regno = next_scratch_gr_reg ();
2366 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2367 current_frame_info.gr_used_mask |= 1 << alt_regno;
2368 }
2369
2370 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2371 insn = emit_move_insn (ar_unat_save_reg, reg);
2372 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2373
2374 /* Even if we're not going to generate an epilogue, we still
2375 need to save the register so that EH works. */
2376 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2377 emit_insn (gen_prologue_use (ar_unat_save_reg));
2378 }
2379 else
2380 ar_unat_save_reg = NULL_RTX;
2381
2382 /* Spill all varargs registers. Do this before spilling any GR registers,
2383 since we want the UNAT bits for the GR registers to override the UNAT
2384 bits from varargs, which we don't care about. */
2385
2386 cfa_off = -16;
2387 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2388 {
2389 reg = gen_rtx_REG (DImode, regno);
2390 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2391 }
2392
2393 /* Locate the bottom of the register save area. */
2394 cfa_off = (current_frame_info.spill_cfa_off
2395 + current_frame_info.spill_size
2396 + current_frame_info.extra_spill_size);
2397
2398 /* Save the predicate register block either in a register or in memory. */
2399 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2400 {
2401 reg = gen_rtx_REG (DImode, PR_REG (0));
2402 if (current_frame_info.reg_save_pr != 0)
2403 {
2404 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2405 insn = emit_move_insn (alt_reg, reg);
2406
2407 /* ??? Denote pr spill/fill by a DImode move that modifies all
2408 64 hard registers. */
2409 RTX_FRAME_RELATED_P (insn) = 1;
2410 REG_NOTES (insn)
2411 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2412 gen_rtx_SET (VOIDmode, alt_reg, reg),
2413 REG_NOTES (insn));
2414
2415 /* Even if we're not going to generate an epilogue, we still
2416 need to save the register so that EH works. */
2417 if (! epilogue_p)
2418 emit_insn (gen_prologue_use (alt_reg));
2419 }
2420 else
2421 {
2422 alt_regno = next_scratch_gr_reg ();
2423 alt_reg = gen_rtx_REG (DImode, alt_regno);
2424 insn = emit_move_insn (alt_reg, reg);
2425 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2426 cfa_off -= 8;
2427 }
2428 }
2429
2430 /* Handle AR regs in numerical order. All of them get special handling. */
2431 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2432 && current_frame_info.reg_save_ar_unat == 0)
2433 {
2434 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2435 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2436 cfa_off -= 8;
2437 }
2438
2439 /* The alloc insn already copied ar.pfs into a general register. The
2440 only thing we have to do now is copy that register to a stack slot
2441 if we'd not allocated a local register for the job. */
2442 if (current_frame_info.reg_save_ar_pfs == 0
2443 && ! current_function_is_leaf)
2444 {
2445 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2446 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2447 cfa_off -= 8;
2448 }
2449
2450 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2451 {
2452 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2453 if (current_frame_info.reg_save_ar_lc != 0)
2454 {
2455 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2456 insn = emit_move_insn (alt_reg, reg);
2457 RTX_FRAME_RELATED_P (insn) = 1;
2458
2459 /* Even if we're not going to generate an epilogue, we still
2460 need to save the register so that EH works. */
2461 if (! epilogue_p)
2462 emit_insn (gen_prologue_use (alt_reg));
2463 }
2464 else
2465 {
2466 alt_regno = next_scratch_gr_reg ();
2467 alt_reg = gen_rtx_REG (DImode, alt_regno);
2468 emit_move_insn (alt_reg, reg);
2469 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2470 cfa_off -= 8;
2471 }
2472 }
2473
2474 /* We should now be at the base of the gr/br/fr spill area. */
2475 if (cfa_off != (current_frame_info.spill_cfa_off
2476 + current_frame_info.spill_size))
2477 abort ();
2478
2479 /* Spill all general registers. */
2480 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2481 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2482 {
2483 reg = gen_rtx_REG (DImode, regno);
2484 do_spill (gen_gr_spill, reg, cfa_off, reg);
2485 cfa_off -= 8;
2486 }
2487
2488 /* Handle BR0 specially -- it may be getting stored permanently in
2489 some GR register. */
2490 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2491 {
2492 reg = gen_rtx_REG (DImode, BR_REG (0));
2493 if (current_frame_info.reg_save_b0 != 0)
2494 {
2495 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2496 insn = emit_move_insn (alt_reg, reg);
2497 RTX_FRAME_RELATED_P (insn) = 1;
2498
2499 /* Even if we're not going to generate an epilogue, we still
2500 need to save the register so that EH works. */
2501 if (! epilogue_p)
2502 emit_insn (gen_prologue_use (alt_reg));
2503 }
2504 else
2505 {
2506 alt_regno = next_scratch_gr_reg ();
2507 alt_reg = gen_rtx_REG (DImode, alt_regno);
2508 emit_move_insn (alt_reg, reg);
2509 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2510 cfa_off -= 8;
2511 }
2512 }
2513
2514 /* Spill the rest of the BR registers. */
2515 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2516 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2517 {
2518 alt_regno = next_scratch_gr_reg ();
2519 alt_reg = gen_rtx_REG (DImode, alt_regno);
2520 reg = gen_rtx_REG (DImode, regno);
2521 emit_move_insn (alt_reg, reg);
2522 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2523 cfa_off -= 8;
2524 }
2525
2526 /* Align the frame and spill all FR registers. */
2527 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2528 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2529 {
2530 if (cfa_off & 15)
2531 abort ();
2532 reg = gen_rtx_REG (TFmode, regno);
2533 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2534 cfa_off -= 16;
2535 }
2536
2537 if (cfa_off != current_frame_info.spill_cfa_off)
2538 abort ();
2539
2540 finish_spill_pointers ();
2541 }
2542
2543 /* Called after register allocation to add any instructions needed for the
2544 epilogue. Using an epilogue insn is favored compared to putting all of the
2545 instructions in output_function_prologue(), since it allows the scheduler
2546 to intermix instructions with the saves of the caller saved registers. In
2547 some cases, it might be necessary to emit a barrier instruction as the last
2548 insn to prevent such scheduling. */
2549
2550 void
2551 ia64_expand_epilogue (sibcall_p)
2552 int sibcall_p;
2553 {
2554 rtx insn, reg, alt_reg, ar_unat_save_reg;
2555 int regno, alt_regno, cfa_off;
2556
2557 ia64_compute_frame_size (get_frame_size ());
2558
2559 /* If there is a frame pointer, then we use it instead of the stack
2560 pointer, so that the stack pointer does not need to be valid when
2561 the epilogue starts. See EXIT_IGNORE_STACK. */
2562 if (frame_pointer_needed)
2563 setup_spill_pointers (current_frame_info.n_spilled,
2564 hard_frame_pointer_rtx, 0);
2565 else
2566 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2567 current_frame_info.total_size);
2568
2569 if (current_frame_info.total_size != 0)
2570 {
2571 /* ??? At this point we must generate a magic insn that appears to
2572 modify the spill iterators and the frame pointer. This would
2573 allow the most scheduling freedom. For now, just hard stop. */
2574 emit_insn (gen_blockage ());
2575 }
2576
2577 /* Locate the bottom of the register save area. */
2578 cfa_off = (current_frame_info.spill_cfa_off
2579 + current_frame_info.spill_size
2580 + current_frame_info.extra_spill_size);
2581
2582 /* Restore the predicate registers. */
2583 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2584 {
2585 if (current_frame_info.reg_save_pr != 0)
2586 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2587 else
2588 {
2589 alt_regno = next_scratch_gr_reg ();
2590 alt_reg = gen_rtx_REG (DImode, alt_regno);
2591 do_restore (gen_movdi_x, alt_reg, cfa_off);
2592 cfa_off -= 8;
2593 }
2594 reg = gen_rtx_REG (DImode, PR_REG (0));
2595 emit_move_insn (reg, alt_reg);
2596 }
2597
2598 /* Restore the application registers. */
2599
2600 /* Load the saved unat from the stack, but do not restore it until
2601 after the GRs have been restored. */
2602 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2603 {
2604 if (current_frame_info.reg_save_ar_unat != 0)
2605 ar_unat_save_reg
2606 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2607 else
2608 {
2609 alt_regno = next_scratch_gr_reg ();
2610 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2611 current_frame_info.gr_used_mask |= 1 << alt_regno;
2612 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2613 cfa_off -= 8;
2614 }
2615 }
2616 else
2617 ar_unat_save_reg = NULL_RTX;
2618
2619 if (current_frame_info.reg_save_ar_pfs != 0)
2620 {
2621 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2622 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2623 emit_move_insn (reg, alt_reg);
2624 }
2625 else if (! current_function_is_leaf)
2626 {
2627 alt_regno = next_scratch_gr_reg ();
2628 alt_reg = gen_rtx_REG (DImode, alt_regno);
2629 do_restore (gen_movdi_x, alt_reg, cfa_off);
2630 cfa_off -= 8;
2631 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2632 emit_move_insn (reg, alt_reg);
2633 }
2634
2635 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2636 {
2637 if (current_frame_info.reg_save_ar_lc != 0)
2638 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2639 else
2640 {
2641 alt_regno = next_scratch_gr_reg ();
2642 alt_reg = gen_rtx_REG (DImode, alt_regno);
2643 do_restore (gen_movdi_x, alt_reg, cfa_off);
2644 cfa_off -= 8;
2645 }
2646 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2647 emit_move_insn (reg, alt_reg);
2648 }
2649
2650 /* We should now be at the base of the gr/br/fr spill area. */
2651 if (cfa_off != (current_frame_info.spill_cfa_off
2652 + current_frame_info.spill_size))
2653 abort ();
2654
2655 /* Restore all general registers. */
2656 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2657 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2658 {
2659 reg = gen_rtx_REG (DImode, regno);
2660 do_restore (gen_gr_restore, reg, cfa_off);
2661 cfa_off -= 8;
2662 }
2663
2664 /* Restore the branch registers. Handle B0 specially, as it may
2665 have gotten stored in some GR register. */
2666 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2667 {
2668 if (current_frame_info.reg_save_b0 != 0)
2669 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2670 else
2671 {
2672 alt_regno = next_scratch_gr_reg ();
2673 alt_reg = gen_rtx_REG (DImode, alt_regno);
2674 do_restore (gen_movdi_x, alt_reg, cfa_off);
2675 cfa_off -= 8;
2676 }
2677 reg = gen_rtx_REG (DImode, BR_REG (0));
2678 emit_move_insn (reg, alt_reg);
2679 }
2680
2681 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2682 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2683 {
2684 alt_regno = next_scratch_gr_reg ();
2685 alt_reg = gen_rtx_REG (DImode, alt_regno);
2686 do_restore (gen_movdi_x, alt_reg, cfa_off);
2687 cfa_off -= 8;
2688 reg = gen_rtx_REG (DImode, regno);
2689 emit_move_insn (reg, alt_reg);
2690 }
2691
2692 /* Restore floating point registers. */
2693 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2694 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2695 {
2696 if (cfa_off & 15)
2697 abort ();
2698 reg = gen_rtx_REG (TFmode, regno);
2699 do_restore (gen_fr_restore_x, reg, cfa_off);
2700 cfa_off -= 16;
2701 }
2702
2703 /* Restore ar.unat for real. */
2704 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2705 {
2706 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2707 emit_move_insn (reg, ar_unat_save_reg);
2708 }
2709
2710 if (cfa_off != current_frame_info.spill_cfa_off)
2711 abort ();
2712
2713 finish_spill_pointers ();
2714
2715 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2716 {
2717 /* ??? At this point we must generate a magic insn that appears to
2718 modify the spill iterators, the stack pointer, and the frame
2719 pointer. This would allow the most scheduling freedom. For now,
2720 just hard stop. */
2721 emit_insn (gen_blockage ());
2722 }
2723
2724 if (cfun->machine->ia64_eh_epilogue_sp)
2725 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2726 else if (frame_pointer_needed)
2727 {
2728 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2729 RTX_FRAME_RELATED_P (insn) = 1;
2730 }
2731 else if (current_frame_info.total_size)
2732 {
2733 rtx offset, frame_size_rtx;
2734
2735 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2736 if (CONST_OK_FOR_I (current_frame_info.total_size))
2737 offset = frame_size_rtx;
2738 else
2739 {
2740 regno = next_scratch_gr_reg ();
2741 offset = gen_rtx_REG (DImode, regno);
2742 emit_move_insn (offset, frame_size_rtx);
2743 }
2744
2745 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2746 offset));
2747
2748 RTX_FRAME_RELATED_P (insn) = 1;
2749 if (GET_CODE (offset) != CONST_INT)
2750 {
2751 REG_NOTES (insn)
2752 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2753 gen_rtx_SET (VOIDmode,
2754 stack_pointer_rtx,
2755 gen_rtx_PLUS (DImode,
2756 stack_pointer_rtx,
2757 frame_size_rtx)),
2758 REG_NOTES (insn));
2759 }
2760 }
2761
2762 if (cfun->machine->ia64_eh_epilogue_bsp)
2763 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2764
2765 if (! sibcall_p)
2766 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2767 else
2768 {
2769 int fp = GR_REG (2);
2770 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2771 first available call clobbered register. If there was a frame_pointer
2772 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2773 so we have to make sure we're using the string "r2" when emitting
2774 the register name for the assmbler. */
2775 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2776 fp = HARD_FRAME_POINTER_REGNUM;
2777
2778 /* We must emit an alloc to force the input registers to become output
2779 registers. Otherwise, if the callee tries to pass its parameters
2780 through to another call without an intervening alloc, then these
2781 values get lost. */
2782 /* ??? We don't need to preserve all input registers. We only need to
2783 preserve those input registers used as arguments to the sibling call.
2784 It is unclear how to compute that number here. */
2785 if (current_frame_info.n_input_regs != 0)
2786 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2787 GEN_INT (0), GEN_INT (0),
2788 GEN_INT (current_frame_info.n_input_regs),
2789 GEN_INT (0)));
2790 }
2791 }
2792
2793 /* Return 1 if br.ret can do all the work required to return from a
2794 function. */
2795
2796 int
2797 ia64_direct_return ()
2798 {
2799 if (reload_completed && ! frame_pointer_needed)
2800 {
2801 ia64_compute_frame_size (get_frame_size ());
2802
2803 return (current_frame_info.total_size == 0
2804 && current_frame_info.n_spilled == 0
2805 && current_frame_info.reg_save_b0 == 0
2806 && current_frame_info.reg_save_pr == 0
2807 && current_frame_info.reg_save_ar_pfs == 0
2808 && current_frame_info.reg_save_ar_unat == 0
2809 && current_frame_info.reg_save_ar_lc == 0);
2810 }
2811 return 0;
2812 }
2813
2814 int
2815 ia64_hard_regno_rename_ok (from, to)
2816 int from;
2817 int to;
2818 {
2819 /* Don't clobber any of the registers we reserved for the prologue. */
2820 if (to == current_frame_info.reg_fp
2821 || to == current_frame_info.reg_save_b0
2822 || to == current_frame_info.reg_save_pr
2823 || to == current_frame_info.reg_save_ar_pfs
2824 || to == current_frame_info.reg_save_ar_unat
2825 || to == current_frame_info.reg_save_ar_lc)
2826 return 0;
2827
2828 if (from == current_frame_info.reg_fp
2829 || from == current_frame_info.reg_save_b0
2830 || from == current_frame_info.reg_save_pr
2831 || from == current_frame_info.reg_save_ar_pfs
2832 || from == current_frame_info.reg_save_ar_unat
2833 || from == current_frame_info.reg_save_ar_lc)
2834 return 0;
2835
2836 /* Don't use output registers outside the register frame. */
2837 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2838 return 0;
2839
2840 /* Retain even/oddness on predicate register pairs. */
2841 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2842 return (from & 1) == (to & 1);
2843
2844 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2845 if (from == GR_REG (4) && current_function_calls_setjmp)
2846 return 0;
2847
2848 return 1;
2849 }
2850
2851 /* Target hook for assembling integer objects. Handle word-sized
2852 aligned objects and detect the cases when @fptr is needed. */
2853
2854 static bool
2855 ia64_assemble_integer (x, size, aligned_p)
2856 rtx x;
2857 unsigned int size;
2858 int aligned_p;
2859 {
2860 if (size == UNITS_PER_WORD && aligned_p
2861 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2862 && GET_CODE (x) == SYMBOL_REF
2863 && SYMBOL_REF_FLAG (x))
2864 {
2865 fputs ("\tdata8\t@fptr(", asm_out_file);
2866 output_addr_const (asm_out_file, x);
2867 fputs (")\n", asm_out_file);
2868 return true;
2869 }
2870 return default_assemble_integer (x, size, aligned_p);
2871 }
2872
2873 /* Emit the function prologue. */
2874
2875 static void
2876 ia64_output_function_prologue (file, size)
2877 FILE *file;
2878 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2879 {
2880 int mask, grsave, grsave_prev;
2881
2882 if (current_frame_info.need_regstk)
2883 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2884 current_frame_info.n_input_regs,
2885 current_frame_info.n_local_regs,
2886 current_frame_info.n_output_regs,
2887 current_frame_info.n_rotate_regs);
2888
2889 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2890 return;
2891
2892 /* Emit the .prologue directive. */
2893
2894 mask = 0;
2895 grsave = grsave_prev = 0;
2896 if (current_frame_info.reg_save_b0 != 0)
2897 {
2898 mask |= 8;
2899 grsave = grsave_prev = current_frame_info.reg_save_b0;
2900 }
2901 if (current_frame_info.reg_save_ar_pfs != 0
2902 && (grsave_prev == 0
2903 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2904 {
2905 mask |= 4;
2906 if (grsave_prev == 0)
2907 grsave = current_frame_info.reg_save_ar_pfs;
2908 grsave_prev = current_frame_info.reg_save_ar_pfs;
2909 }
2910 if (current_frame_info.reg_fp != 0
2911 && (grsave_prev == 0
2912 || current_frame_info.reg_fp == grsave_prev + 1))
2913 {
2914 mask |= 2;
2915 if (grsave_prev == 0)
2916 grsave = HARD_FRAME_POINTER_REGNUM;
2917 grsave_prev = current_frame_info.reg_fp;
2918 }
2919 if (current_frame_info.reg_save_pr != 0
2920 && (grsave_prev == 0
2921 || current_frame_info.reg_save_pr == grsave_prev + 1))
2922 {
2923 mask |= 1;
2924 if (grsave_prev == 0)
2925 grsave = current_frame_info.reg_save_pr;
2926 }
2927
2928 if (mask)
2929 fprintf (file, "\t.prologue %d, %d\n", mask,
2930 ia64_dbx_register_number (grsave));
2931 else
2932 fputs ("\t.prologue\n", file);
2933
2934 /* Emit a .spill directive, if necessary, to relocate the base of
2935 the register spill area. */
2936 if (current_frame_info.spill_cfa_off != -16)
2937 fprintf (file, "\t.spill %ld\n",
2938 (long) (current_frame_info.spill_cfa_off
2939 + current_frame_info.spill_size));
2940 }
2941
2942 /* Emit the .body directive at the scheduled end of the prologue. */
2943
2944 static void
2945 ia64_output_function_end_prologue (file)
2946 FILE *file;
2947 {
2948 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2949 return;
2950
2951 fputs ("\t.body\n", file);
2952 }
2953
2954 /* Emit the function epilogue. */
2955
2956 static void
2957 ia64_output_function_epilogue (file, size)
2958 FILE *file ATTRIBUTE_UNUSED;
2959 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2960 {
2961 int i;
2962
2963 /* Reset from the function's potential modifications. */
2964 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2965
2966 if (current_frame_info.reg_fp)
2967 {
2968 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2969 reg_names[HARD_FRAME_POINTER_REGNUM]
2970 = reg_names[current_frame_info.reg_fp];
2971 reg_names[current_frame_info.reg_fp] = tmp;
2972 }
2973 if (! TARGET_REG_NAMES)
2974 {
2975 for (i = 0; i < current_frame_info.n_input_regs; i++)
2976 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2977 for (i = 0; i < current_frame_info.n_local_regs; i++)
2978 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2979 for (i = 0; i < current_frame_info.n_output_regs; i++)
2980 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2981 }
2982
2983 current_frame_info.initialized = 0;
2984 }
2985
2986 int
2987 ia64_dbx_register_number (regno)
2988 int regno;
2989 {
2990 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2991 from its home at loc79 to something inside the register frame. We
2992 must perform the same renumbering here for the debug info. */
2993 if (current_frame_info.reg_fp)
2994 {
2995 if (regno == HARD_FRAME_POINTER_REGNUM)
2996 regno = current_frame_info.reg_fp;
2997 else if (regno == current_frame_info.reg_fp)
2998 regno = HARD_FRAME_POINTER_REGNUM;
2999 }
3000
3001 if (IN_REGNO_P (regno))
3002 return 32 + regno - IN_REG (0);
3003 else if (LOC_REGNO_P (regno))
3004 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3005 else if (OUT_REGNO_P (regno))
3006 return (32 + current_frame_info.n_input_regs
3007 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3008 else
3009 return regno;
3010 }
3011
3012 void
3013 ia64_initialize_trampoline (addr, fnaddr, static_chain)
3014 rtx addr, fnaddr, static_chain;
3015 {
3016 rtx addr_reg, eight = GEN_INT (8);
3017
3018 /* Load up our iterator. */
3019 addr_reg = gen_reg_rtx (Pmode);
3020 emit_move_insn (addr_reg, addr);
3021
3022 /* The first two words are the fake descriptor:
3023 __ia64_trampoline, ADDR+16. */
3024 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3025 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3026 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3027
3028 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3029 copy_to_reg (plus_constant (addr, 16)));
3030 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3031
3032 /* The third word is the target descriptor. */
3033 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3034 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3035
3036 /* The fourth word is the static chain. */
3037 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3038 }
3039 \f
3040 /* Do any needed setup for a variadic function. CUM has not been updated
3041 for the last named argument which has type TYPE and mode MODE.
3042
3043 We generate the actual spill instructions during prologue generation. */
3044
3045 void
3046 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3047 CUMULATIVE_ARGS cum;
3048 int int_mode;
3049 tree type;
3050 int * pretend_size;
3051 int second_time ATTRIBUTE_UNUSED;
3052 {
3053 /* If this is a stdarg function, then skip the current argument. */
3054 if (! current_function_varargs)
3055 ia64_function_arg_advance (&cum, int_mode, type, 1);
3056
3057 if (cum.words < MAX_ARGUMENT_SLOTS)
3058 {
3059 int n = MAX_ARGUMENT_SLOTS - cum.words;
3060 *pretend_size = n * UNITS_PER_WORD;
3061 cfun->machine->n_varargs = n;
3062 }
3063 }
3064
3065 /* Check whether TYPE is a homogeneous floating point aggregate. If
3066 it is, return the mode of the floating point type that appears
3067 in all leafs. If it is not, return VOIDmode.
3068
3069 An aggregate is a homogeneous floating point aggregate is if all
3070 fields/elements in it have the same floating point type (e.g,
3071 SFmode). 128-bit quad-precision floats are excluded. */
3072
3073 static enum machine_mode
3074 hfa_element_mode (type, nested)
3075 tree type;
3076 int nested;
3077 {
3078 enum machine_mode element_mode = VOIDmode;
3079 enum machine_mode mode;
3080 enum tree_code code = TREE_CODE (type);
3081 int know_element_mode = 0;
3082 tree t;
3083
3084 switch (code)
3085 {
3086 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3087 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3088 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3089 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3090 case FUNCTION_TYPE:
3091 return VOIDmode;
3092
3093 /* Fortran complex types are supposed to be HFAs, so we need to handle
3094 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3095 types though. */
3096 case COMPLEX_TYPE:
3097 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
3098 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3099 * BITS_PER_UNIT, MODE_FLOAT, 0);
3100 else
3101 return VOIDmode;
3102
3103 case REAL_TYPE:
3104 /* ??? Should exclude 128-bit long double here. */
3105 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3106 mode if this is contained within an aggregate. */
3107 if (nested)
3108 return TYPE_MODE (type);
3109 else
3110 return VOIDmode;
3111
3112 case ARRAY_TYPE:
3113 return hfa_element_mode (TREE_TYPE (type), 1);
3114
3115 case RECORD_TYPE:
3116 case UNION_TYPE:
3117 case QUAL_UNION_TYPE:
3118 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3119 {
3120 if (TREE_CODE (t) != FIELD_DECL)
3121 continue;
3122
3123 mode = hfa_element_mode (TREE_TYPE (t), 1);
3124 if (know_element_mode)
3125 {
3126 if (mode != element_mode)
3127 return VOIDmode;
3128 }
3129 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3130 return VOIDmode;
3131 else
3132 {
3133 know_element_mode = 1;
3134 element_mode = mode;
3135 }
3136 }
3137 return element_mode;
3138
3139 default:
3140 /* If we reach here, we probably have some front-end specific type
3141 that the backend doesn't know about. This can happen via the
3142 aggregate_value_p call in init_function_start. All we can do is
3143 ignore unknown tree types. */
3144 return VOIDmode;
3145 }
3146
3147 return VOIDmode;
3148 }
3149
3150 /* Return rtx for register where argument is passed, or zero if it is passed
3151 on the stack. */
3152
3153 /* ??? 128-bit quad-precision floats are always passed in general
3154 registers. */
3155
3156 rtx
3157 ia64_function_arg (cum, mode, type, named, incoming)
3158 CUMULATIVE_ARGS *cum;
3159 enum machine_mode mode;
3160 tree type;
3161 int named;
3162 int incoming;
3163 {
3164 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3165 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3166 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3167 / UNITS_PER_WORD);
3168 int offset = 0;
3169 enum machine_mode hfa_mode = VOIDmode;
3170
3171 /* Integer and float arguments larger than 8 bytes start at the next even
3172 boundary. Aggregates larger than 8 bytes start at the next even boundary
3173 if the aggregate has 16 byte alignment. Net effect is that types with
3174 alignment greater than 8 start at the next even boundary. */
3175 /* ??? The ABI does not specify how to handle aggregates with alignment from
3176 9 to 15 bytes, or greater than 16. We handle them all as if they had
3177 16 byte alignment. Such aggregates can occur only if gcc extensions are
3178 used. */
3179 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3180 : (words > 1))
3181 && (cum->words & 1))
3182 offset = 1;
3183
3184 /* If all argument slots are used, then it must go on the stack. */
3185 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3186 return 0;
3187
3188 /* Check for and handle homogeneous FP aggregates. */
3189 if (type)
3190 hfa_mode = hfa_element_mode (type, 0);
3191
3192 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3193 and unprototyped hfas are passed specially. */
3194 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3195 {
3196 rtx loc[16];
3197 int i = 0;
3198 int fp_regs = cum->fp_regs;
3199 int int_regs = cum->words + offset;
3200 int hfa_size = GET_MODE_SIZE (hfa_mode);
3201 int byte_size;
3202 int args_byte_size;
3203
3204 /* If prototyped, pass it in FR regs then GR regs.
3205 If not prototyped, pass it in both FR and GR regs.
3206
3207 If this is an SFmode aggregate, then it is possible to run out of
3208 FR regs while GR regs are still left. In that case, we pass the
3209 remaining part in the GR regs. */
3210
3211 /* Fill the FP regs. We do this always. We stop if we reach the end
3212 of the argument, the last FP register, or the last argument slot. */
3213
3214 byte_size = ((mode == BLKmode)
3215 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3216 args_byte_size = int_regs * UNITS_PER_WORD;
3217 offset = 0;
3218 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3219 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3220 {
3221 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3222 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3223 + fp_regs)),
3224 GEN_INT (offset));
3225 offset += hfa_size;
3226 args_byte_size += hfa_size;
3227 fp_regs++;
3228 }
3229
3230 /* If no prototype, then the whole thing must go in GR regs. */
3231 if (! cum->prototype)
3232 offset = 0;
3233 /* If this is an SFmode aggregate, then we might have some left over
3234 that needs to go in GR regs. */
3235 else if (byte_size != offset)
3236 int_regs += offset / UNITS_PER_WORD;
3237
3238 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3239
3240 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3241 {
3242 enum machine_mode gr_mode = DImode;
3243
3244 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3245 then this goes in a GR reg left adjusted/little endian, right
3246 adjusted/big endian. */
3247 /* ??? Currently this is handled wrong, because 4-byte hunks are
3248 always right adjusted/little endian. */
3249 if (offset & 0x4)
3250 gr_mode = SImode;
3251 /* If we have an even 4 byte hunk because the aggregate is a
3252 multiple of 4 bytes in size, then this goes in a GR reg right
3253 adjusted/little endian. */
3254 else if (byte_size - offset == 4)
3255 gr_mode = SImode;
3256 /* Complex floats need to have float mode. */
3257 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3258 gr_mode = hfa_mode;
3259
3260 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3261 gen_rtx_REG (gr_mode, (basereg
3262 + int_regs)),
3263 GEN_INT (offset));
3264 offset += GET_MODE_SIZE (gr_mode);
3265 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3266 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3267 }
3268
3269 /* If we ended up using just one location, just return that one loc. */
3270 if (i == 1)
3271 return XEXP (loc[0], 0);
3272 else
3273 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3274 }
3275
3276 /* Integral and aggregates go in general registers. If we have run out of
3277 FR registers, then FP values must also go in general registers. This can
3278 happen when we have a SFmode HFA. */
3279 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3280 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3281 return gen_rtx_REG (mode, basereg + cum->words + offset);
3282
3283 /* If there is a prototype, then FP values go in a FR register when
3284 named, and in a GR registeer when unnamed. */
3285 else if (cum->prototype)
3286 {
3287 if (! named)
3288 return gen_rtx_REG (mode, basereg + cum->words + offset);
3289 else
3290 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3291 }
3292 /* If there is no prototype, then FP values go in both FR and GR
3293 registers. */
3294 else
3295 {
3296 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3297 gen_rtx_REG (mode, (FR_ARG_FIRST
3298 + cum->fp_regs)),
3299 const0_rtx);
3300 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3301 gen_rtx_REG (mode,
3302 (basereg + cum->words
3303 + offset)),
3304 const0_rtx);
3305
3306 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3307 }
3308 }
3309
3310 /* Return number of words, at the beginning of the argument, that must be
3311 put in registers. 0 is the argument is entirely in registers or entirely
3312 in memory. */
3313
3314 int
3315 ia64_function_arg_partial_nregs (cum, mode, type, named)
3316 CUMULATIVE_ARGS *cum;
3317 enum machine_mode mode;
3318 tree type;
3319 int named ATTRIBUTE_UNUSED;
3320 {
3321 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3322 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3323 / UNITS_PER_WORD);
3324 int offset = 0;
3325
3326 /* Arguments with alignment larger than 8 bytes start at the next even
3327 boundary. */
3328 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3329 : (words > 1))
3330 && (cum->words & 1))
3331 offset = 1;
3332
3333 /* If all argument slots are used, then it must go on the stack. */
3334 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3335 return 0;
3336
3337 /* It doesn't matter whether the argument goes in FR or GR regs. If
3338 it fits within the 8 argument slots, then it goes entirely in
3339 registers. If it extends past the last argument slot, then the rest
3340 goes on the stack. */
3341
3342 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3343 return 0;
3344
3345 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3346 }
3347
3348 /* Update CUM to point after this argument. This is patterned after
3349 ia64_function_arg. */
3350
3351 void
3352 ia64_function_arg_advance (cum, mode, type, named)
3353 CUMULATIVE_ARGS *cum;
3354 enum machine_mode mode;
3355 tree type;
3356 int named;
3357 {
3358 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3359 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3360 / UNITS_PER_WORD);
3361 int offset = 0;
3362 enum machine_mode hfa_mode = VOIDmode;
3363
3364 /* If all arg slots are already full, then there is nothing to do. */
3365 if (cum->words >= MAX_ARGUMENT_SLOTS)
3366 return;
3367
3368 /* Arguments with alignment larger than 8 bytes start at the next even
3369 boundary. */
3370 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3371 : (words > 1))
3372 && (cum->words & 1))
3373 offset = 1;
3374
3375 cum->words += words + offset;
3376
3377 /* Check for and handle homogeneous FP aggregates. */
3378 if (type)
3379 hfa_mode = hfa_element_mode (type, 0);
3380
3381 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3382 and unprototyped hfas are passed specially. */
3383 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3384 {
3385 int fp_regs = cum->fp_regs;
3386 /* This is the original value of cum->words + offset. */
3387 int int_regs = cum->words - words;
3388 int hfa_size = GET_MODE_SIZE (hfa_mode);
3389 int byte_size;
3390 int args_byte_size;
3391
3392 /* If prototyped, pass it in FR regs then GR regs.
3393 If not prototyped, pass it in both FR and GR regs.
3394
3395 If this is an SFmode aggregate, then it is possible to run out of
3396 FR regs while GR regs are still left. In that case, we pass the
3397 remaining part in the GR regs. */
3398
3399 /* Fill the FP regs. We do this always. We stop if we reach the end
3400 of the argument, the last FP register, or the last argument slot. */
3401
3402 byte_size = ((mode == BLKmode)
3403 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3404 args_byte_size = int_regs * UNITS_PER_WORD;
3405 offset = 0;
3406 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3407 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3408 {
3409 offset += hfa_size;
3410 args_byte_size += hfa_size;
3411 fp_regs++;
3412 }
3413
3414 cum->fp_regs = fp_regs;
3415 }
3416
3417 /* Integral and aggregates go in general registers. If we have run out of
3418 FR registers, then FP values must also go in general registers. This can
3419 happen when we have a SFmode HFA. */
3420 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3421 cum->int_regs = cum->words;
3422
3423 /* If there is a prototype, then FP values go in a FR register when
3424 named, and in a GR registeer when unnamed. */
3425 else if (cum->prototype)
3426 {
3427 if (! named)
3428 cum->int_regs = cum->words;
3429 else
3430 /* ??? Complex types should not reach here. */
3431 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3432 }
3433 /* If there is no prototype, then FP values go in both FR and GR
3434 registers. */
3435 else
3436 {
3437 /* ??? Complex types should not reach here. */
3438 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3439 cum->int_regs = cum->words;
3440 }
3441 }
3442
3443 /* Variable sized types are passed by reference. */
3444 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3445
3446 int
3447 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3448 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3449 enum machine_mode mode ATTRIBUTE_UNUSED;
3450 tree type;
3451 int named ATTRIBUTE_UNUSED;
3452 {
3453 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3454 }
3455 \f
3456 /* Implement va_start. */
3457
3458 void
3459 ia64_va_start (stdarg_p, valist, nextarg)
3460 int stdarg_p;
3461 tree valist;
3462 rtx nextarg;
3463 {
3464 int arg_words;
3465 int ofs;
3466
3467 arg_words = current_function_args_info.words;
3468
3469 if (stdarg_p)
3470 ofs = 0;
3471 else
3472 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3473
3474 nextarg = plus_constant (nextarg, ofs);
3475 std_expand_builtin_va_start (1, valist, nextarg);
3476 }
3477
3478 /* Implement va_arg. */
3479
3480 rtx
3481 ia64_va_arg (valist, type)
3482 tree valist, type;
3483 {
3484 tree t;
3485
3486 /* Variable sized types are passed by reference. */
3487 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3488 {
3489 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3490 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3491 }
3492
3493 /* Arguments with alignment larger than 8 bytes start at the next even
3494 boundary. */
3495 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3496 {
3497 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3498 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3499 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3500 build_int_2 (-2 * UNITS_PER_WORD, -1));
3501 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3502 TREE_SIDE_EFFECTS (t) = 1;
3503 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3504 }
3505
3506 return std_expand_builtin_va_arg (valist, type);
3507 }
3508 \f
3509 /* Return 1 if function return value returned in memory. Return 0 if it is
3510 in a register. */
3511
3512 int
3513 ia64_return_in_memory (valtype)
3514 tree valtype;
3515 {
3516 enum machine_mode mode;
3517 enum machine_mode hfa_mode;
3518 HOST_WIDE_INT byte_size;
3519
3520 mode = TYPE_MODE (valtype);
3521 byte_size = GET_MODE_SIZE (mode);
3522 if (mode == BLKmode)
3523 {
3524 byte_size = int_size_in_bytes (valtype);
3525 if (byte_size < 0)
3526 return 1;
3527 }
3528
3529 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3530
3531 hfa_mode = hfa_element_mode (valtype, 0);
3532 if (hfa_mode != VOIDmode)
3533 {
3534 int hfa_size = GET_MODE_SIZE (hfa_mode);
3535
3536 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3537 return 1;
3538 else
3539 return 0;
3540 }
3541 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3542 return 1;
3543 else
3544 return 0;
3545 }
3546
3547 /* Return rtx for register that holds the function return value. */
3548
3549 rtx
3550 ia64_function_value (valtype, func)
3551 tree valtype;
3552 tree func ATTRIBUTE_UNUSED;
3553 {
3554 enum machine_mode mode;
3555 enum machine_mode hfa_mode;
3556
3557 mode = TYPE_MODE (valtype);
3558 hfa_mode = hfa_element_mode (valtype, 0);
3559
3560 if (hfa_mode != VOIDmode)
3561 {
3562 rtx loc[8];
3563 int i;
3564 int hfa_size;
3565 int byte_size;
3566 int offset;
3567
3568 hfa_size = GET_MODE_SIZE (hfa_mode);
3569 byte_size = ((mode == BLKmode)
3570 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3571 offset = 0;
3572 for (i = 0; offset < byte_size; i++)
3573 {
3574 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3575 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3576 GEN_INT (offset));
3577 offset += hfa_size;
3578 }
3579
3580 if (i == 1)
3581 return XEXP (loc[0], 0);
3582 else
3583 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3584 }
3585 else if (FLOAT_TYPE_P (valtype) &&
3586 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3587 return gen_rtx_REG (mode, FR_ARG_FIRST);
3588 else
3589 return gen_rtx_REG (mode, GR_RET_FIRST);
3590 }
3591
3592 /* Print a memory address as an operand to reference that memory location. */
3593
3594 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3595 also call this from ia64_print_operand for memory addresses. */
3596
3597 void
3598 ia64_print_operand_address (stream, address)
3599 FILE * stream ATTRIBUTE_UNUSED;
3600 rtx address ATTRIBUTE_UNUSED;
3601 {
3602 }
3603
3604 /* Print an operand to an assembler instruction.
3605 C Swap and print a comparison operator.
3606 D Print an FP comparison operator.
3607 E Print 32 - constant, for SImode shifts as extract.
3608 e Print 64 - constant, for DImode rotates.
3609 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3610 a floating point register emitted normally.
3611 I Invert a predicate register by adding 1.
3612 J Select the proper predicate register for a condition.
3613 j Select the inverse predicate register for a condition.
3614 O Append .acq for volatile load.
3615 P Postincrement of a MEM.
3616 Q Append .rel for volatile store.
3617 S Shift amount for shladd instruction.
3618 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3619 for Intel assembler.
3620 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3621 for Intel assembler.
3622 r Print register name, or constant 0 as r0. HP compatibility for
3623 Linux kernel. */
3624 void
3625 ia64_print_operand (file, x, code)
3626 FILE * file;
3627 rtx x;
3628 int code;
3629 {
3630 const char *str;
3631
3632 switch (code)
3633 {
3634 case 0:
3635 /* Handled below. */
3636 break;
3637
3638 case 'C':
3639 {
3640 enum rtx_code c = swap_condition (GET_CODE (x));
3641 fputs (GET_RTX_NAME (c), file);
3642 return;
3643 }
3644
3645 case 'D':
3646 switch (GET_CODE (x))
3647 {
3648 case NE:
3649 str = "neq";
3650 break;
3651 case UNORDERED:
3652 str = "unord";
3653 break;
3654 case ORDERED:
3655 str = "ord";
3656 break;
3657 default:
3658 str = GET_RTX_NAME (GET_CODE (x));
3659 break;
3660 }
3661 fputs (str, file);
3662 return;
3663
3664 case 'E':
3665 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3666 return;
3667
3668 case 'e':
3669 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3670 return;
3671
3672 case 'F':
3673 if (x == CONST0_RTX (GET_MODE (x)))
3674 str = reg_names [FR_REG (0)];
3675 else if (x == CONST1_RTX (GET_MODE (x)))
3676 str = reg_names [FR_REG (1)];
3677 else if (GET_CODE (x) == REG)
3678 str = reg_names [REGNO (x)];
3679 else
3680 abort ();
3681 fputs (str, file);
3682 return;
3683
3684 case 'I':
3685 fputs (reg_names [REGNO (x) + 1], file);
3686 return;
3687
3688 case 'J':
3689 case 'j':
3690 {
3691 unsigned int regno = REGNO (XEXP (x, 0));
3692 if (GET_CODE (x) == EQ)
3693 regno += 1;
3694 if (code == 'j')
3695 regno ^= 1;
3696 fputs (reg_names [regno], file);
3697 }
3698 return;
3699
3700 case 'O':
3701 if (MEM_VOLATILE_P (x))
3702 fputs(".acq", file);
3703 return;
3704
3705 case 'P':
3706 {
3707 HOST_WIDE_INT value;
3708
3709 switch (GET_CODE (XEXP (x, 0)))
3710 {
3711 default:
3712 return;
3713
3714 case POST_MODIFY:
3715 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3716 if (GET_CODE (x) == CONST_INT)
3717 value = INTVAL (x);
3718 else if (GET_CODE (x) == REG)
3719 {
3720 fprintf (file, ", %s", reg_names[REGNO (x)]);
3721 return;
3722 }
3723 else
3724 abort ();
3725 break;
3726
3727 case POST_INC:
3728 value = GET_MODE_SIZE (GET_MODE (x));
3729 break;
3730
3731 case POST_DEC:
3732 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3733 break;
3734 }
3735
3736 putc (',', file);
3737 putc (' ', file);
3738 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3739 return;
3740 }
3741
3742 case 'Q':
3743 if (MEM_VOLATILE_P (x))
3744 fputs(".rel", file);
3745 return;
3746
3747 case 'S':
3748 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3749 return;
3750
3751 case 'T':
3752 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3753 {
3754 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3755 return;
3756 }
3757 break;
3758
3759 case 'U':
3760 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3761 {
3762 const char *prefix = "0x";
3763 if (INTVAL (x) & 0x80000000)
3764 {
3765 fprintf (file, "0xffffffff");
3766 prefix = "";
3767 }
3768 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3769 return;
3770 }
3771 break;
3772
3773 case 'r':
3774 /* If this operand is the constant zero, write it as register zero.
3775 Any register, zero, or CONST_INT value is OK here. */
3776 if (GET_CODE (x) == REG)
3777 fputs (reg_names[REGNO (x)], file);
3778 else if (x == CONST0_RTX (GET_MODE (x)))
3779 fputs ("r0", file);
3780 else if (GET_CODE (x) == CONST_INT)
3781 output_addr_const (file, x);
3782 else
3783 output_operand_lossage ("invalid %%r value");
3784 return;
3785
3786 case '+':
3787 {
3788 const char *which;
3789
3790 /* For conditional branches, returns or calls, substitute
3791 sptk, dptk, dpnt, or spnt for %s. */
3792 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3793 if (x)
3794 {
3795 int pred_val = INTVAL (XEXP (x, 0));
3796
3797 /* Guess top and bottom 10% statically predicted. */
3798 if (pred_val < REG_BR_PROB_BASE / 50)
3799 which = ".spnt";
3800 else if (pred_val < REG_BR_PROB_BASE / 2)
3801 which = ".dpnt";
3802 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3803 which = ".dptk";
3804 else
3805 which = ".sptk";
3806 }
3807 else if (GET_CODE (current_output_insn) == CALL_INSN)
3808 which = ".sptk";
3809 else
3810 which = ".dptk";
3811
3812 fputs (which, file);
3813 return;
3814 }
3815
3816 case ',':
3817 x = current_insn_predicate;
3818 if (x)
3819 {
3820 unsigned int regno = REGNO (XEXP (x, 0));
3821 if (GET_CODE (x) == EQ)
3822 regno += 1;
3823 fprintf (file, "(%s) ", reg_names [regno]);
3824 }
3825 return;
3826
3827 default:
3828 output_operand_lossage ("ia64_print_operand: unknown code");
3829 return;
3830 }
3831
3832 switch (GET_CODE (x))
3833 {
3834 /* This happens for the spill/restore instructions. */
3835 case POST_INC:
3836 case POST_DEC:
3837 case POST_MODIFY:
3838 x = XEXP (x, 0);
3839 /* ... fall through ... */
3840
3841 case REG:
3842 fputs (reg_names [REGNO (x)], file);
3843 break;
3844
3845 case MEM:
3846 {
3847 rtx addr = XEXP (x, 0);
3848 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3849 addr = XEXP (addr, 0);
3850 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3851 break;
3852 }
3853
3854 default:
3855 output_addr_const (file, x);
3856 break;
3857 }
3858
3859 return;
3860 }
3861 \f
3862 /* Calulate the cost of moving data from a register in class FROM to
3863 one in class TO, using MODE. */
3864
3865 int
3866 ia64_register_move_cost (mode, from, to)
3867 enum machine_mode mode;
3868 enum reg_class from, to;
3869 {
3870 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3871 if (to == ADDL_REGS)
3872 to = GR_REGS;
3873 if (from == ADDL_REGS)
3874 from = GR_REGS;
3875
3876 /* All costs are symmetric, so reduce cases by putting the
3877 lower number class as the destination. */
3878 if (from < to)
3879 {
3880 enum reg_class tmp = to;
3881 to = from, from = tmp;
3882 }
3883
3884 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3885 so that we get secondary memory reloads. Between FR_REGS,
3886 we have to make this at least as expensive as MEMORY_MOVE_COST
3887 to avoid spectacularly poor register class preferencing. */
3888 if (mode == TFmode)
3889 {
3890 if (to != GR_REGS || from != GR_REGS)
3891 return MEMORY_MOVE_COST (mode, to, 0);
3892 else
3893 return 3;
3894 }
3895
3896 switch (to)
3897 {
3898 case PR_REGS:
3899 /* Moving between PR registers takes two insns. */
3900 if (from == PR_REGS)
3901 return 3;
3902 /* Moving between PR and anything but GR is impossible. */
3903 if (from != GR_REGS)
3904 return MEMORY_MOVE_COST (mode, to, 0);
3905 break;
3906
3907 case BR_REGS:
3908 /* Moving between BR and anything but GR is impossible. */
3909 if (from != GR_REGS && from != GR_AND_BR_REGS)
3910 return MEMORY_MOVE_COST (mode, to, 0);
3911 break;
3912
3913 case AR_I_REGS:
3914 case AR_M_REGS:
3915 /* Moving between AR and anything but GR is impossible. */
3916 if (from != GR_REGS)
3917 return MEMORY_MOVE_COST (mode, to, 0);
3918 break;
3919
3920 case GR_REGS:
3921 case FR_REGS:
3922 case GR_AND_FR_REGS:
3923 case GR_AND_BR_REGS:
3924 case ALL_REGS:
3925 break;
3926
3927 default:
3928 abort ();
3929 }
3930
3931 return 2;
3932 }
3933
3934 /* This function returns the register class required for a secondary
3935 register when copying between one of the registers in CLASS, and X,
3936 using MODE. A return value of NO_REGS means that no secondary register
3937 is required. */
3938
3939 enum reg_class
3940 ia64_secondary_reload_class (class, mode, x)
3941 enum reg_class class;
3942 enum machine_mode mode ATTRIBUTE_UNUSED;
3943 rtx x;
3944 {
3945 int regno = -1;
3946
3947 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3948 regno = true_regnum (x);
3949
3950 switch (class)
3951 {
3952 case BR_REGS:
3953 case AR_M_REGS:
3954 case AR_I_REGS:
3955 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3956 interaction. We end up with two pseudos with overlapping lifetimes
3957 both of which are equiv to the same constant, and both which need
3958 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3959 changes depending on the path length, which means the qty_first_reg
3960 check in make_regs_eqv can give different answers at different times.
3961 At some point I'll probably need a reload_indi pattern to handle
3962 this.
3963
3964 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3965 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3966 non-general registers for good measure. */
3967 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3968 return GR_REGS;
3969
3970 /* This is needed if a pseudo used as a call_operand gets spilled to a
3971 stack slot. */
3972 if (GET_CODE (x) == MEM)
3973 return GR_REGS;
3974 break;
3975
3976 case FR_REGS:
3977 /* Need to go through general regsters to get to other class regs. */
3978 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3979 return GR_REGS;
3980
3981 /* This can happen when a paradoxical subreg is an operand to the
3982 muldi3 pattern. */
3983 /* ??? This shouldn't be necessary after instruction scheduling is
3984 enabled, because paradoxical subregs are not accepted by
3985 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3986 stop the paradoxical subreg stupidity in the *_operand functions
3987 in recog.c. */
3988 if (GET_CODE (x) == MEM
3989 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3990 || GET_MODE (x) == QImode))
3991 return GR_REGS;
3992
3993 /* This can happen because of the ior/and/etc patterns that accept FP
3994 registers as operands. If the third operand is a constant, then it
3995 needs to be reloaded into a FP register. */
3996 if (GET_CODE (x) == CONST_INT)
3997 return GR_REGS;
3998
3999 /* This can happen because of register elimination in a muldi3 insn.
4000 E.g. `26107 * (unsigned long)&u'. */
4001 if (GET_CODE (x) == PLUS)
4002 return GR_REGS;
4003 break;
4004
4005 case PR_REGS:
4006 /* ??? This happens if we cse/gcse a BImode value across a call,
4007 and the function has a nonlocal goto. This is because global
4008 does not allocate call crossing pseudos to hard registers when
4009 current_function_has_nonlocal_goto is true. This is relatively
4010 common for C++ programs that use exceptions. To reproduce,
4011 return NO_REGS and compile libstdc++. */
4012 if (GET_CODE (x) == MEM)
4013 return GR_REGS;
4014
4015 /* This can happen when we take a BImode subreg of a DImode value,
4016 and that DImode value winds up in some non-GR register. */
4017 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4018 return GR_REGS;
4019 break;
4020
4021 case GR_REGS:
4022 /* Since we have no offsettable memory addresses, we need a temporary
4023 to hold the address of the second word. */
4024 if (mode == TImode)
4025 return GR_REGS;
4026 break;
4027
4028 default:
4029 break;
4030 }
4031
4032 return NO_REGS;
4033 }
4034
4035 \f
4036 /* Emit text to declare externally defined variables and functions, because
4037 the Intel assembler does not support undefined externals. */
4038
4039 void
4040 ia64_asm_output_external (file, decl, name)
4041 FILE *file;
4042 tree decl;
4043 const char *name;
4044 {
4045 int save_referenced;
4046
4047 /* GNU as does not need anything here. */
4048 if (TARGET_GNU_AS)
4049 return;
4050
4051 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4052 the linker when we do this, so we need to be careful not to do this for
4053 builtin functions which have no library equivalent. Unfortunately, we
4054 can't tell here whether or not a function will actually be called by
4055 expand_expr, so we pull in library functions even if we may not need
4056 them later. */
4057 if (! strcmp (name, "__builtin_next_arg")
4058 || ! strcmp (name, "alloca")
4059 || ! strcmp (name, "__builtin_constant_p")
4060 || ! strcmp (name, "__builtin_args_info"))
4061 return;
4062
4063 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4064 restore it. */
4065 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4066 if (TREE_CODE (decl) == FUNCTION_DECL)
4067 {
4068 fprintf (file, "%s", TYPE_ASM_OP);
4069 assemble_name (file, name);
4070 putc (',', file);
4071 fprintf (file, TYPE_OPERAND_FMT, "function");
4072 putc ('\n', file);
4073 }
4074 ASM_GLOBALIZE_LABEL (file, name);
4075 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4076 }
4077 \f
4078 /* Parse the -mfixed-range= option string. */
4079
4080 static void
4081 fix_range (const_str)
4082 const char *const_str;
4083 {
4084 int i, first, last;
4085 char *str, *dash, *comma;
4086
4087 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4088 REG2 are either register names or register numbers. The effect
4089 of this option is to mark the registers in the range from REG1 to
4090 REG2 as ``fixed'' so they won't be used by the compiler. This is
4091 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4092
4093 i = strlen (const_str);
4094 str = (char *) alloca (i + 1);
4095 memcpy (str, const_str, i + 1);
4096
4097 while (1)
4098 {
4099 dash = strchr (str, '-');
4100 if (!dash)
4101 {
4102 warning ("value of -mfixed-range must have form REG1-REG2");
4103 return;
4104 }
4105 *dash = '\0';
4106
4107 comma = strchr (dash + 1, ',');
4108 if (comma)
4109 *comma = '\0';
4110
4111 first = decode_reg_name (str);
4112 if (first < 0)
4113 {
4114 warning ("unknown register name: %s", str);
4115 return;
4116 }
4117
4118 last = decode_reg_name (dash + 1);
4119 if (last < 0)
4120 {
4121 warning ("unknown register name: %s", dash + 1);
4122 return;
4123 }
4124
4125 *dash = '-';
4126
4127 if (first > last)
4128 {
4129 warning ("%s-%s is an empty range", str, dash + 1);
4130 return;
4131 }
4132
4133 for (i = first; i <= last; ++i)
4134 fixed_regs[i] = call_used_regs[i] = 1;
4135
4136 if (!comma)
4137 break;
4138
4139 *comma = ',';
4140 str = comma + 1;
4141 }
4142 }
4143
4144 static struct machine_function *
4145 ia64_init_machine_status ()
4146 {
4147 return ggc_alloc_cleared (sizeof (struct machine_function));
4148 }
4149
4150 /* Handle TARGET_OPTIONS switches. */
4151
4152 void
4153 ia64_override_options ()
4154 {
4155 if (TARGET_AUTO_PIC)
4156 target_flags |= MASK_CONST_GP;
4157
4158 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
4159 {
4160 warning ("cannot optimize division for both latency and throughput");
4161 target_flags &= ~MASK_INLINE_DIV_THR;
4162 }
4163
4164 if (ia64_fixed_range_string)
4165 fix_range (ia64_fixed_range_string);
4166
4167 if (ia64_tls_size_string)
4168 {
4169 char *end;
4170 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4171 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4172 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4173 else
4174 ia64_tls_size = tmp;
4175 }
4176
4177 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4178 flag_schedule_insns_after_reload = 0;
4179
4180 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4181
4182 init_machine_status = ia64_init_machine_status;
4183 }
4184 \f
4185 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
4186 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4187 static enum attr_type ia64_safe_type PARAMS((rtx));
4188
4189 static enum attr_itanium_requires_unit0
4190 ia64_safe_itanium_requires_unit0 (insn)
4191 rtx insn;
4192 {
4193 if (recog_memoized (insn) >= 0)
4194 return get_attr_itanium_requires_unit0 (insn);
4195 else
4196 return ITANIUM_REQUIRES_UNIT0_NO;
4197 }
4198
4199 static enum attr_itanium_class
4200 ia64_safe_itanium_class (insn)
4201 rtx insn;
4202 {
4203 if (recog_memoized (insn) >= 0)
4204 return get_attr_itanium_class (insn);
4205 else
4206 return ITANIUM_CLASS_UNKNOWN;
4207 }
4208
4209 static enum attr_type
4210 ia64_safe_type (insn)
4211 rtx insn;
4212 {
4213 if (recog_memoized (insn) >= 0)
4214 return get_attr_type (insn);
4215 else
4216 return TYPE_UNKNOWN;
4217 }
4218 \f
4219 /* The following collection of routines emit instruction group stop bits as
4220 necessary to avoid dependencies. */
4221
4222 /* Need to track some additional registers as far as serialization is
4223 concerned so we can properly handle br.call and br.ret. We could
4224 make these registers visible to gcc, but since these registers are
4225 never explicitly used in gcc generated code, it seems wasteful to
4226 do so (plus it would make the call and return patterns needlessly
4227 complex). */
4228 #define REG_GP (GR_REG (1))
4229 #define REG_RP (BR_REG (0))
4230 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4231 /* This is used for volatile asms which may require a stop bit immediately
4232 before and after them. */
4233 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4234 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4235 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4236
4237 /* For each register, we keep track of how it has been written in the
4238 current instruction group.
4239
4240 If a register is written unconditionally (no qualifying predicate),
4241 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4242
4243 If a register is written if its qualifying predicate P is true, we
4244 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4245 may be written again by the complement of P (P^1) and when this happens,
4246 WRITE_COUNT gets set to 2.
4247
4248 The result of this is that whenever an insn attempts to write a register
4249 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4250
4251 If a predicate register is written by a floating-point insn, we set
4252 WRITTEN_BY_FP to true.
4253
4254 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4255 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4256
4257 struct reg_write_state
4258 {
4259 unsigned int write_count : 2;
4260 unsigned int first_pred : 16;
4261 unsigned int written_by_fp : 1;
4262 unsigned int written_by_and : 1;
4263 unsigned int written_by_or : 1;
4264 };
4265
4266 /* Cumulative info for the current instruction group. */
4267 struct reg_write_state rws_sum[NUM_REGS];
4268 /* Info for the current instruction. This gets copied to rws_sum after a
4269 stop bit is emitted. */
4270 struct reg_write_state rws_insn[NUM_REGS];
4271
4272 /* Indicates whether this is the first instruction after a stop bit,
4273 in which case we don't need another stop bit. Without this, we hit
4274 the abort in ia64_variable_issue when scheduling an alloc. */
4275 static int first_instruction;
4276
4277 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4278 RTL for one instruction. */
4279 struct reg_flags
4280 {
4281 unsigned int is_write : 1; /* Is register being written? */
4282 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4283 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4284 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4285 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4286 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4287 };
4288
4289 static void rws_update PARAMS ((struct reg_write_state *, int,
4290 struct reg_flags, int));
4291 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4292 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4293 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4294 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4295 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4296 static void init_insn_group_barriers PARAMS ((void));
4297 static int group_barrier_needed_p PARAMS ((rtx));
4298 static int safe_group_barrier_needed_p PARAMS ((rtx));
4299
4300 /* Update *RWS for REGNO, which is being written by the current instruction,
4301 with predicate PRED, and associated register flags in FLAGS. */
4302
4303 static void
4304 rws_update (rws, regno, flags, pred)
4305 struct reg_write_state *rws;
4306 int regno;
4307 struct reg_flags flags;
4308 int pred;
4309 {
4310 if (pred)
4311 rws[regno].write_count++;
4312 else
4313 rws[regno].write_count = 2;
4314 rws[regno].written_by_fp |= flags.is_fp;
4315 /* ??? Not tracking and/or across differing predicates. */
4316 rws[regno].written_by_and = flags.is_and;
4317 rws[regno].written_by_or = flags.is_or;
4318 rws[regno].first_pred = pred;
4319 }
4320
4321 /* Handle an access to register REGNO of type FLAGS using predicate register
4322 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4323 a dependency with an earlier instruction in the same group. */
4324
4325 static int
4326 rws_access_regno (regno, flags, pred)
4327 int regno;
4328 struct reg_flags flags;
4329 int pred;
4330 {
4331 int need_barrier = 0;
4332
4333 if (regno >= NUM_REGS)
4334 abort ();
4335
4336 if (! PR_REGNO_P (regno))
4337 flags.is_and = flags.is_or = 0;
4338
4339 if (flags.is_write)
4340 {
4341 int write_count;
4342
4343 /* One insn writes same reg multiple times? */
4344 if (rws_insn[regno].write_count > 0)
4345 abort ();
4346
4347 /* Update info for current instruction. */
4348 rws_update (rws_insn, regno, flags, pred);
4349 write_count = rws_sum[regno].write_count;
4350
4351 switch (write_count)
4352 {
4353 case 0:
4354 /* The register has not been written yet. */
4355 rws_update (rws_sum, regno, flags, pred);
4356 break;
4357
4358 case 1:
4359 /* The register has been written via a predicate. If this is
4360 not a complementary predicate, then we need a barrier. */
4361 /* ??? This assumes that P and P+1 are always complementary
4362 predicates for P even. */
4363 if (flags.is_and && rws_sum[regno].written_by_and)
4364 ;
4365 else if (flags.is_or && rws_sum[regno].written_by_or)
4366 ;
4367 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4368 need_barrier = 1;
4369 rws_update (rws_sum, regno, flags, pred);
4370 break;
4371
4372 case 2:
4373 /* The register has been unconditionally written already. We
4374 need a barrier. */
4375 if (flags.is_and && rws_sum[regno].written_by_and)
4376 ;
4377 else if (flags.is_or && rws_sum[regno].written_by_or)
4378 ;
4379 else
4380 need_barrier = 1;
4381 rws_sum[regno].written_by_and = flags.is_and;
4382 rws_sum[regno].written_by_or = flags.is_or;
4383 break;
4384
4385 default:
4386 abort ();
4387 }
4388 }
4389 else
4390 {
4391 if (flags.is_branch)
4392 {
4393 /* Branches have several RAW exceptions that allow to avoid
4394 barriers. */
4395
4396 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4397 /* RAW dependencies on branch regs are permissible as long
4398 as the writer is a non-branch instruction. Since we
4399 never generate code that uses a branch register written
4400 by a branch instruction, handling this case is
4401 easy. */
4402 return 0;
4403
4404 if (REGNO_REG_CLASS (regno) == PR_REGS
4405 && ! rws_sum[regno].written_by_fp)
4406 /* The predicates of a branch are available within the
4407 same insn group as long as the predicate was written by
4408 something other than a floating-point instruction. */
4409 return 0;
4410 }
4411
4412 if (flags.is_and && rws_sum[regno].written_by_and)
4413 return 0;
4414 if (flags.is_or && rws_sum[regno].written_by_or)
4415 return 0;
4416
4417 switch (rws_sum[regno].write_count)
4418 {
4419 case 0:
4420 /* The register has not been written yet. */
4421 break;
4422
4423 case 1:
4424 /* The register has been written via a predicate. If this is
4425 not a complementary predicate, then we need a barrier. */
4426 /* ??? This assumes that P and P+1 are always complementary
4427 predicates for P even. */
4428 if ((rws_sum[regno].first_pred ^ 1) != pred)
4429 need_barrier = 1;
4430 break;
4431
4432 case 2:
4433 /* The register has been unconditionally written already. We
4434 need a barrier. */
4435 need_barrier = 1;
4436 break;
4437
4438 default:
4439 abort ();
4440 }
4441 }
4442
4443 return need_barrier;
4444 }
4445
4446 static int
4447 rws_access_reg (reg, flags, pred)
4448 rtx reg;
4449 struct reg_flags flags;
4450 int pred;
4451 {
4452 int regno = REGNO (reg);
4453 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4454
4455 if (n == 1)
4456 return rws_access_regno (regno, flags, pred);
4457 else
4458 {
4459 int need_barrier = 0;
4460 while (--n >= 0)
4461 need_barrier |= rws_access_regno (regno + n, flags, pred);
4462 return need_barrier;
4463 }
4464 }
4465
4466 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4467 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4468
4469 static void
4470 update_set_flags (x, pflags, ppred, pcond)
4471 rtx x;
4472 struct reg_flags *pflags;
4473 int *ppred;
4474 rtx *pcond;
4475 {
4476 rtx src = SET_SRC (x);
4477
4478 *pcond = 0;
4479
4480 switch (GET_CODE (src))
4481 {
4482 case CALL:
4483 return;
4484
4485 case IF_THEN_ELSE:
4486 if (SET_DEST (x) == pc_rtx)
4487 /* X is a conditional branch. */
4488 return;
4489 else
4490 {
4491 int is_complemented = 0;
4492
4493 /* X is a conditional move. */
4494 rtx cond = XEXP (src, 0);
4495 if (GET_CODE (cond) == EQ)
4496 is_complemented = 1;
4497 cond = XEXP (cond, 0);
4498 if (GET_CODE (cond) != REG
4499 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4500 abort ();
4501 *pcond = cond;
4502 if (XEXP (src, 1) == SET_DEST (x)
4503 || XEXP (src, 2) == SET_DEST (x))
4504 {
4505 /* X is a conditional move that conditionally writes the
4506 destination. */
4507
4508 /* We need another complement in this case. */
4509 if (XEXP (src, 1) == SET_DEST (x))
4510 is_complemented = ! is_complemented;
4511
4512 *ppred = REGNO (cond);
4513 if (is_complemented)
4514 ++*ppred;
4515 }
4516
4517 /* ??? If this is a conditional write to the dest, then this
4518 instruction does not actually read one source. This probably
4519 doesn't matter, because that source is also the dest. */
4520 /* ??? Multiple writes to predicate registers are allowed
4521 if they are all AND type compares, or if they are all OR
4522 type compares. We do not generate such instructions
4523 currently. */
4524 }
4525 /* ... fall through ... */
4526
4527 default:
4528 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4529 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4530 /* Set pflags->is_fp to 1 so that we know we're dealing
4531 with a floating point comparison when processing the
4532 destination of the SET. */
4533 pflags->is_fp = 1;
4534
4535 /* Discover if this is a parallel comparison. We only handle
4536 and.orcm and or.andcm at present, since we must retain a
4537 strict inverse on the predicate pair. */
4538 else if (GET_CODE (src) == AND)
4539 pflags->is_and = 1;
4540 else if (GET_CODE (src) == IOR)
4541 pflags->is_or = 1;
4542
4543 break;
4544 }
4545 }
4546
4547 /* Subroutine of rtx_needs_barrier; this function determines whether the
4548 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4549 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4550 for this insn. */
4551
4552 static int
4553 set_src_needs_barrier (x, flags, pred, cond)
4554 rtx x;
4555 struct reg_flags flags;
4556 int pred;
4557 rtx cond;
4558 {
4559 int need_barrier = 0;
4560 rtx dst;
4561 rtx src = SET_SRC (x);
4562
4563 if (GET_CODE (src) == CALL)
4564 /* We don't need to worry about the result registers that
4565 get written by subroutine call. */
4566 return rtx_needs_barrier (src, flags, pred);
4567 else if (SET_DEST (x) == pc_rtx)
4568 {
4569 /* X is a conditional branch. */
4570 /* ??? This seems redundant, as the caller sets this bit for
4571 all JUMP_INSNs. */
4572 flags.is_branch = 1;
4573 return rtx_needs_barrier (src, flags, pred);
4574 }
4575
4576 need_barrier = rtx_needs_barrier (src, flags, pred);
4577
4578 /* This instruction unconditionally uses a predicate register. */
4579 if (cond)
4580 need_barrier |= rws_access_reg (cond, flags, 0);
4581
4582 dst = SET_DEST (x);
4583 if (GET_CODE (dst) == ZERO_EXTRACT)
4584 {
4585 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4586 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4587 dst = XEXP (dst, 0);
4588 }
4589 return need_barrier;
4590 }
4591
4592 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4593 Return 1 is this access creates a dependency with an earlier instruction
4594 in the same group. */
4595
4596 static int
4597 rtx_needs_barrier (x, flags, pred)
4598 rtx x;
4599 struct reg_flags flags;
4600 int pred;
4601 {
4602 int i, j;
4603 int is_complemented = 0;
4604 int need_barrier = 0;
4605 const char *format_ptr;
4606 struct reg_flags new_flags;
4607 rtx cond = 0;
4608
4609 if (! x)
4610 return 0;
4611
4612 new_flags = flags;
4613
4614 switch (GET_CODE (x))
4615 {
4616 case SET:
4617 update_set_flags (x, &new_flags, &pred, &cond);
4618 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4619 if (GET_CODE (SET_SRC (x)) != CALL)
4620 {
4621 new_flags.is_write = 1;
4622 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4623 }
4624 break;
4625
4626 case CALL:
4627 new_flags.is_write = 0;
4628 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4629
4630 /* Avoid multiple register writes, in case this is a pattern with
4631 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4632 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4633 {
4634 new_flags.is_write = 1;
4635 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4636 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4637 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4638 }
4639 break;
4640
4641 case COND_EXEC:
4642 /* X is a predicated instruction. */
4643
4644 cond = COND_EXEC_TEST (x);
4645 if (pred)
4646 abort ();
4647 need_barrier = rtx_needs_barrier (cond, flags, 0);
4648
4649 if (GET_CODE (cond) == EQ)
4650 is_complemented = 1;
4651 cond = XEXP (cond, 0);
4652 if (GET_CODE (cond) != REG
4653 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4654 abort ();
4655 pred = REGNO (cond);
4656 if (is_complemented)
4657 ++pred;
4658
4659 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4660 return need_barrier;
4661
4662 case CLOBBER:
4663 case USE:
4664 /* Clobber & use are for earlier compiler-phases only. */
4665 break;
4666
4667 case ASM_OPERANDS:
4668 case ASM_INPUT:
4669 /* We always emit stop bits for traditional asms. We emit stop bits
4670 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4671 if (GET_CODE (x) != ASM_OPERANDS
4672 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4673 {
4674 /* Avoid writing the register multiple times if we have multiple
4675 asm outputs. This avoids an abort in rws_access_reg. */
4676 if (! rws_insn[REG_VOLATILE].write_count)
4677 {
4678 new_flags.is_write = 1;
4679 rws_access_regno (REG_VOLATILE, new_flags, pred);
4680 }
4681 return 1;
4682 }
4683
4684 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4685 We can not just fall through here since then we would be confused
4686 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4687 traditional asms unlike their normal usage. */
4688
4689 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4690 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4691 need_barrier = 1;
4692 break;
4693
4694 case PARALLEL:
4695 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4696 {
4697 rtx pat = XVECEXP (x, 0, i);
4698 if (GET_CODE (pat) == SET)
4699 {
4700 update_set_flags (pat, &new_flags, &pred, &cond);
4701 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4702 }
4703 else if (GET_CODE (pat) == USE
4704 || GET_CODE (pat) == CALL
4705 || GET_CODE (pat) == ASM_OPERANDS)
4706 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4707 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4708 abort ();
4709 }
4710 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4711 {
4712 rtx pat = XVECEXP (x, 0, i);
4713 if (GET_CODE (pat) == SET)
4714 {
4715 if (GET_CODE (SET_SRC (pat)) != CALL)
4716 {
4717 new_flags.is_write = 1;
4718 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4719 pred);
4720 }
4721 }
4722 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4723 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4724 }
4725 break;
4726
4727 case SUBREG:
4728 x = SUBREG_REG (x);
4729 /* FALLTHRU */
4730 case REG:
4731 if (REGNO (x) == AR_UNAT_REGNUM)
4732 {
4733 for (i = 0; i < 64; ++i)
4734 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4735 }
4736 else
4737 need_barrier = rws_access_reg (x, flags, pred);
4738 break;
4739
4740 case MEM:
4741 /* Find the regs used in memory address computation. */
4742 new_flags.is_write = 0;
4743 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4744 break;
4745
4746 case CONST_INT: case CONST_DOUBLE:
4747 case SYMBOL_REF: case LABEL_REF: case CONST:
4748 break;
4749
4750 /* Operators with side-effects. */
4751 case POST_INC: case POST_DEC:
4752 if (GET_CODE (XEXP (x, 0)) != REG)
4753 abort ();
4754
4755 new_flags.is_write = 0;
4756 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4757 new_flags.is_write = 1;
4758 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4759 break;
4760
4761 case POST_MODIFY:
4762 if (GET_CODE (XEXP (x, 0)) != REG)
4763 abort ();
4764
4765 new_flags.is_write = 0;
4766 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4767 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4768 new_flags.is_write = 1;
4769 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4770 break;
4771
4772 /* Handle common unary and binary ops for efficiency. */
4773 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4774 case MOD: case UDIV: case UMOD: case AND: case IOR:
4775 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4776 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4777 case NE: case EQ: case GE: case GT: case LE:
4778 case LT: case GEU: case GTU: case LEU: case LTU:
4779 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4780 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4781 break;
4782
4783 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4784 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4785 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4786 case SQRT: case FFS:
4787 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4788 break;
4789
4790 case UNSPEC:
4791 switch (XINT (x, 1))
4792 {
4793 case UNSPEC_LTOFF_DTPMOD:
4794 case UNSPEC_LTOFF_DTPREL:
4795 case UNSPEC_DTPREL:
4796 case UNSPEC_LTOFF_TPREL:
4797 case UNSPEC_TPREL:
4798 case UNSPEC_PRED_REL_MUTEX:
4799 case UNSPEC_PIC_CALL:
4800 case UNSPEC_MF:
4801 case UNSPEC_FETCHADD_ACQ:
4802 case UNSPEC_BSP_VALUE:
4803 case UNSPEC_FLUSHRS:
4804 case UNSPEC_BUNDLE_SELECTOR:
4805 break;
4806
4807 case UNSPEC_GR_SPILL:
4808 case UNSPEC_GR_RESTORE:
4809 {
4810 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4811 HOST_WIDE_INT bit = (offset >> 3) & 63;
4812
4813 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4814 new_flags.is_write = (XINT (x, 1) == 1);
4815 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4816 new_flags, pred);
4817 break;
4818 }
4819
4820 case UNSPEC_FR_SPILL:
4821 case UNSPEC_FR_RESTORE:
4822 case UNSPEC_POPCNT:
4823 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4824 break;
4825
4826 case UNSPEC_ADDP4:
4827 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4828 break;
4829
4830 case UNSPEC_FR_RECIP_APPROX:
4831 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4832 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4833 break;
4834
4835 case UNSPEC_CMPXCHG_ACQ:
4836 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4837 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4838 break;
4839
4840 default:
4841 abort ();
4842 }
4843 break;
4844
4845 case UNSPEC_VOLATILE:
4846 switch (XINT (x, 1))
4847 {
4848 case UNSPECV_ALLOC:
4849 /* Alloc must always be the first instruction of a group.
4850 We force this by always returning true. */
4851 /* ??? We might get better scheduling if we explicitly check for
4852 input/local/output register dependencies, and modify the
4853 scheduler so that alloc is always reordered to the start of
4854 the current group. We could then eliminate all of the
4855 first_instruction code. */
4856 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4857
4858 new_flags.is_write = 1;
4859 rws_access_regno (REG_AR_CFM, new_flags, pred);
4860 return 1;
4861
4862 case UNSPECV_SET_BSP:
4863 need_barrier = 1;
4864 break;
4865
4866 case UNSPECV_BLOCKAGE:
4867 case UNSPECV_INSN_GROUP_BARRIER:
4868 case UNSPECV_BREAK:
4869 case UNSPECV_PSAC_ALL:
4870 case UNSPECV_PSAC_NORMAL:
4871 return 0;
4872
4873 default:
4874 abort ();
4875 }
4876 break;
4877
4878 case RETURN:
4879 new_flags.is_write = 0;
4880 need_barrier = rws_access_regno (REG_RP, flags, pred);
4881 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4882
4883 new_flags.is_write = 1;
4884 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4885 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4886 break;
4887
4888 default:
4889 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4890 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4891 switch (format_ptr[i])
4892 {
4893 case '0': /* unused field */
4894 case 'i': /* integer */
4895 case 'n': /* note */
4896 case 'w': /* wide integer */
4897 case 's': /* pointer to string */
4898 case 'S': /* optional pointer to string */
4899 break;
4900
4901 case 'e':
4902 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4903 need_barrier = 1;
4904 break;
4905
4906 case 'E':
4907 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4908 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4909 need_barrier = 1;
4910 break;
4911
4912 default:
4913 abort ();
4914 }
4915 break;
4916 }
4917 return need_barrier;
4918 }
4919
4920 /* Clear out the state for group_barrier_needed_p at the start of a
4921 sequence of insns. */
4922
4923 static void
4924 init_insn_group_barriers ()
4925 {
4926 memset (rws_sum, 0, sizeof (rws_sum));
4927 first_instruction = 1;
4928 }
4929
4930 /* Given the current state, recorded by previous calls to this function,
4931 determine whether a group barrier (a stop bit) is necessary before INSN.
4932 Return nonzero if so. */
4933
4934 static int
4935 group_barrier_needed_p (insn)
4936 rtx insn;
4937 {
4938 rtx pat;
4939 int need_barrier = 0;
4940 struct reg_flags flags;
4941
4942 memset (&flags, 0, sizeof (flags));
4943 switch (GET_CODE (insn))
4944 {
4945 case NOTE:
4946 break;
4947
4948 case BARRIER:
4949 /* A barrier doesn't imply an instruction group boundary. */
4950 break;
4951
4952 case CODE_LABEL:
4953 memset (rws_insn, 0, sizeof (rws_insn));
4954 return 1;
4955
4956 case CALL_INSN:
4957 flags.is_branch = 1;
4958 flags.is_sibcall = SIBLING_CALL_P (insn);
4959 memset (rws_insn, 0, sizeof (rws_insn));
4960
4961 /* Don't bundle a call following another call. */
4962 if ((pat = prev_active_insn (insn))
4963 && GET_CODE (pat) == CALL_INSN)
4964 {
4965 need_barrier = 1;
4966 break;
4967 }
4968
4969 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4970 break;
4971
4972 case JUMP_INSN:
4973 flags.is_branch = 1;
4974
4975 /* Don't bundle a jump following a call. */
4976 if ((pat = prev_active_insn (insn))
4977 && GET_CODE (pat) == CALL_INSN)
4978 {
4979 need_barrier = 1;
4980 break;
4981 }
4982 /* FALLTHRU */
4983
4984 case INSN:
4985 if (GET_CODE (PATTERN (insn)) == USE
4986 || GET_CODE (PATTERN (insn)) == CLOBBER)
4987 /* Don't care about USE and CLOBBER "insns"---those are used to
4988 indicate to the optimizer that it shouldn't get rid of
4989 certain operations. */
4990 break;
4991
4992 pat = PATTERN (insn);
4993
4994 /* Ug. Hack hacks hacked elsewhere. */
4995 switch (recog_memoized (insn))
4996 {
4997 /* We play dependency tricks with the epilogue in order
4998 to get proper schedules. Undo this for dv analysis. */
4999 case CODE_FOR_epilogue_deallocate_stack:
5000 case CODE_FOR_prologue_allocate_stack:
5001 pat = XVECEXP (pat, 0, 0);
5002 break;
5003
5004 /* The pattern we use for br.cloop confuses the code above.
5005 The second element of the vector is representative. */
5006 case CODE_FOR_doloop_end_internal:
5007 pat = XVECEXP (pat, 0, 1);
5008 break;
5009
5010 /* Doesn't generate code. */
5011 case CODE_FOR_pred_rel_mutex:
5012 case CODE_FOR_prologue_use:
5013 return 0;
5014
5015 default:
5016 break;
5017 }
5018
5019 memset (rws_insn, 0, sizeof (rws_insn));
5020 need_barrier = rtx_needs_barrier (pat, flags, 0);
5021
5022 /* Check to see if the previous instruction was a volatile
5023 asm. */
5024 if (! need_barrier)
5025 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5026 break;
5027
5028 default:
5029 abort ();
5030 }
5031
5032 if (first_instruction)
5033 {
5034 need_barrier = 0;
5035 first_instruction = 0;
5036 }
5037
5038 return need_barrier;
5039 }
5040
5041 /* Like group_barrier_needed_p, but do not clobber the current state. */
5042
5043 static int
5044 safe_group_barrier_needed_p (insn)
5045 rtx insn;
5046 {
5047 struct reg_write_state rws_saved[NUM_REGS];
5048 int saved_first_instruction;
5049 int t;
5050
5051 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5052 saved_first_instruction = first_instruction;
5053
5054 t = group_barrier_needed_p (insn);
5055
5056 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5057 first_instruction = saved_first_instruction;
5058
5059 return t;
5060 }
5061
5062 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
5063 as necessary to eliminate dependendencies. This function assumes that
5064 a final instruction scheduling pass has been run which has already
5065 inserted most of the necessary stop bits. This function only inserts
5066 new ones at basic block boundaries, since these are invisible to the
5067 scheduler. */
5068
5069 static void
5070 emit_insn_group_barriers (dump, insns)
5071 FILE *dump;
5072 rtx insns;
5073 {
5074 rtx insn;
5075 rtx last_label = 0;
5076 int insns_since_last_label = 0;
5077
5078 init_insn_group_barriers ();
5079
5080 for (insn = insns; insn; insn = NEXT_INSN (insn))
5081 {
5082 if (GET_CODE (insn) == CODE_LABEL)
5083 {
5084 if (insns_since_last_label)
5085 last_label = insn;
5086 insns_since_last_label = 0;
5087 }
5088 else if (GET_CODE (insn) == NOTE
5089 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5090 {
5091 if (insns_since_last_label)
5092 last_label = insn;
5093 insns_since_last_label = 0;
5094 }
5095 else if (GET_CODE (insn) == INSN
5096 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5097 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5098 {
5099 init_insn_group_barriers ();
5100 last_label = 0;
5101 }
5102 else if (INSN_P (insn))
5103 {
5104 insns_since_last_label = 1;
5105
5106 if (group_barrier_needed_p (insn))
5107 {
5108 if (last_label)
5109 {
5110 if (dump)
5111 fprintf (dump, "Emitting stop before label %d\n",
5112 INSN_UID (last_label));
5113 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5114 insn = last_label;
5115
5116 init_insn_group_barriers ();
5117 last_label = 0;
5118 }
5119 }
5120 }
5121 }
5122 }
5123
5124 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5125 This function has to emit all necessary group barriers. */
5126
5127 static void
5128 emit_all_insn_group_barriers (dump, insns)
5129 FILE *dump ATTRIBUTE_UNUSED;
5130 rtx insns;
5131 {
5132 rtx insn;
5133
5134 init_insn_group_barriers ();
5135
5136 for (insn = insns; insn; insn = NEXT_INSN (insn))
5137 {
5138 if (GET_CODE (insn) == BARRIER)
5139 {
5140 rtx last = prev_active_insn (insn);
5141
5142 if (! last)
5143 continue;
5144 if (GET_CODE (last) == JUMP_INSN
5145 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5146 last = prev_active_insn (last);
5147 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5148 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5149
5150 init_insn_group_barriers ();
5151 }
5152 else if (INSN_P (insn))
5153 {
5154 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5155 init_insn_group_barriers ();
5156 else if (group_barrier_needed_p (insn))
5157 {
5158 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5159 init_insn_group_barriers ();
5160 group_barrier_needed_p (insn);
5161 }
5162 }
5163 }
5164 }
5165 \f
5166 static int errata_find_address_regs PARAMS ((rtx *, void *));
5167 static void errata_emit_nops PARAMS ((rtx));
5168 static void fixup_errata PARAMS ((void));
5169
5170 /* This structure is used to track some details about the previous insns
5171 groups so we can determine if it may be necessary to insert NOPs to
5172 workaround hardware errata. */
5173 static struct group
5174 {
5175 HARD_REG_SET p_reg_set;
5176 HARD_REG_SET gr_reg_conditionally_set;
5177 } last_group[2];
5178
5179 /* Index into the last_group array. */
5180 static int group_idx;
5181
5182 /* Called through for_each_rtx; determines if a hard register that was
5183 conditionally set in the previous group is used as an address register.
5184 It ensures that for_each_rtx returns 1 in that case. */
5185 static int
5186 errata_find_address_regs (xp, data)
5187 rtx *xp;
5188 void *data ATTRIBUTE_UNUSED;
5189 {
5190 rtx x = *xp;
5191 if (GET_CODE (x) != MEM)
5192 return 0;
5193 x = XEXP (x, 0);
5194 if (GET_CODE (x) == POST_MODIFY)
5195 x = XEXP (x, 0);
5196 if (GET_CODE (x) == REG)
5197 {
5198 struct group *prev_group = last_group + (group_idx ^ 1);
5199 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5200 REGNO (x)))
5201 return 1;
5202 return -1;
5203 }
5204 return 0;
5205 }
5206
5207 /* Called for each insn; this function keeps track of the state in
5208 last_group and emits additional NOPs if necessary to work around
5209 an Itanium A/B step erratum. */
5210 static void
5211 errata_emit_nops (insn)
5212 rtx insn;
5213 {
5214 struct group *this_group = last_group + group_idx;
5215 struct group *prev_group = last_group + (group_idx ^ 1);
5216 rtx pat = PATTERN (insn);
5217 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5218 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5219 enum attr_type type;
5220 rtx set = real_pat;
5221
5222 if (GET_CODE (real_pat) == USE
5223 || GET_CODE (real_pat) == CLOBBER
5224 || GET_CODE (real_pat) == ASM_INPUT
5225 || GET_CODE (real_pat) == ADDR_VEC
5226 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5227 || asm_noperands (PATTERN (insn)) >= 0)
5228 return;
5229
5230 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5231 parts of it. */
5232
5233 if (GET_CODE (set) == PARALLEL)
5234 {
5235 int i;
5236 set = XVECEXP (real_pat, 0, 0);
5237 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5238 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5239 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5240 {
5241 set = 0;
5242 break;
5243 }
5244 }
5245
5246 if (set && GET_CODE (set) != SET)
5247 set = 0;
5248
5249 type = get_attr_type (insn);
5250
5251 if (type == TYPE_F
5252 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5253 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5254
5255 if ((type == TYPE_M || type == TYPE_A) && cond && set
5256 && REG_P (SET_DEST (set))
5257 && GET_CODE (SET_SRC (set)) != PLUS
5258 && GET_CODE (SET_SRC (set)) != MINUS
5259 && (GET_CODE (SET_SRC (set)) != ASHIFT
5260 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5261 && (GET_CODE (SET_SRC (set)) != MEM
5262 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5263 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5264 {
5265 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5266 || ! REG_P (XEXP (cond, 0)))
5267 abort ();
5268
5269 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5270 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5271 }
5272 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5273 {
5274 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5275 emit_insn_before (gen_nop (), insn);
5276 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5277 group_idx = 0;
5278 memset (last_group, 0, sizeof last_group);
5279 }
5280 }
5281
5282 /* Emit extra nops if they are required to work around hardware errata. */
5283
5284 static void
5285 fixup_errata ()
5286 {
5287 rtx insn;
5288
5289 if (! TARGET_B_STEP)
5290 return;
5291
5292 group_idx = 0;
5293 memset (last_group, 0, sizeof last_group);
5294
5295 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5296 {
5297 if (!INSN_P (insn))
5298 continue;
5299
5300 if (ia64_safe_type (insn) == TYPE_S)
5301 {
5302 group_idx ^= 1;
5303 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5304 }
5305 else
5306 errata_emit_nops (insn);
5307 }
5308 }
5309 \f
5310 /* Instruction scheduling support. */
5311 /* Describe one bundle. */
5312
5313 struct bundle
5314 {
5315 /* Zero if there's no possibility of a stop in this bundle other than
5316 at the end, otherwise the position of the optional stop bit. */
5317 int possible_stop;
5318 /* The types of the three slots. */
5319 enum attr_type t[3];
5320 /* The pseudo op to be emitted into the assembler output. */
5321 const char *name;
5322 };
5323
5324 #define NR_BUNDLES 10
5325
5326 /* A list of all available bundles. */
5327
5328 static const struct bundle bundle[NR_BUNDLES] =
5329 {
5330 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5331 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5332 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5333 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5334 #if NR_BUNDLES == 10
5335 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5336 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5337 #endif
5338 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5339 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5340 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5341 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5342 it matches an L type insn. Otherwise we'll try to generate L type
5343 nops. */
5344 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5345 };
5346
5347 /* Describe a packet of instructions. Packets consist of two bundles that
5348 are visible to the hardware in one scheduling window. */
5349
5350 struct ia64_packet
5351 {
5352 const struct bundle *t1, *t2;
5353 /* Precomputed value of the first split issue in this packet if a cycle
5354 starts at its beginning. */
5355 int first_split;
5356 /* For convenience, the insn types are replicated here so we don't have
5357 to go through T1 and T2 all the time. */
5358 enum attr_type t[6];
5359 };
5360
5361 /* An array containing all possible packets. */
5362 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5363 static struct ia64_packet packets[NR_PACKETS];
5364
5365 /* Map attr_type to a string with the name. */
5366
5367 static const char *const type_names[] =
5368 {
5369 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5370 };
5371
5372 /* Nonzero if we should insert stop bits into the schedule. */
5373 int ia64_final_schedule = 0;
5374
5375 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5376 static rtx ia64_single_set PARAMS ((rtx));
5377 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5378 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5379 static void maybe_rotate PARAMS ((FILE *));
5380 static void finish_last_head PARAMS ((FILE *, int));
5381 static void rotate_one_bundle PARAMS ((FILE *));
5382 static void rotate_two_bundles PARAMS ((FILE *));
5383 static void nop_cycles_until PARAMS ((int, FILE *));
5384 static void cycle_end_fill_slots PARAMS ((FILE *));
5385 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5386 static int get_split PARAMS ((const struct ia64_packet *, int));
5387 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5388 const struct ia64_packet *, int));
5389 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5390 rtx *, enum attr_type *, int));
5391 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5392 static void dump_current_packet PARAMS ((FILE *));
5393 static void schedule_stop PARAMS ((FILE *));
5394 static rtx gen_nop_type PARAMS ((enum attr_type));
5395 static void ia64_emit_nops PARAMS ((void));
5396
5397 /* Map a bundle number to its pseudo-op. */
5398
5399 const char *
5400 get_bundle_name (b)
5401 int b;
5402 {
5403 return bundle[b].name;
5404 }
5405
5406 /* Compute the slot which will cause a split issue in packet P if the
5407 current cycle begins at slot BEGIN. */
5408
5409 static int
5410 itanium_split_issue (p, begin)
5411 const struct ia64_packet *p;
5412 int begin;
5413 {
5414 int type_count[TYPE_S];
5415 int i;
5416 int split = 6;
5417
5418 if (begin < 3)
5419 {
5420 /* Always split before and after MMF. */
5421 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5422 return 3;
5423 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5424 return 3;
5425 /* Always split after MBB and BBB. */
5426 if (p->t[1] == TYPE_B)
5427 return 3;
5428 /* Split after first bundle in MIB BBB combination. */
5429 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5430 return 3;
5431 }
5432
5433 memset (type_count, 0, sizeof type_count);
5434 for (i = begin; i < split; i++)
5435 {
5436 enum attr_type t0 = p->t[i];
5437 /* An MLX bundle reserves the same units as an MFI bundle. */
5438 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5439 : t0 == TYPE_X ? TYPE_I
5440 : t0);
5441
5442 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5443 2 integer per cycle. */
5444 int max = (t == TYPE_B ? 3 : 2);
5445 if (type_count[t] == max)
5446 return i;
5447
5448 type_count[t]++;
5449 }
5450 return split;
5451 }
5452
5453 /* Return the maximum number of instructions a cpu can issue. */
5454
5455 static int
5456 ia64_issue_rate ()
5457 {
5458 return 6;
5459 }
5460
5461 /* Helper function - like single_set, but look inside COND_EXEC. */
5462
5463 static rtx
5464 ia64_single_set (insn)
5465 rtx insn;
5466 {
5467 rtx x = PATTERN (insn), ret;
5468 if (GET_CODE (x) == COND_EXEC)
5469 x = COND_EXEC_CODE (x);
5470 if (GET_CODE (x) == SET)
5471 return x;
5472
5473 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5474 Although they are not classical single set, the second set is there just
5475 to protect it from moving past FP-relative stack accesses. */
5476 switch (recog_memoized (insn))
5477 {
5478 case CODE_FOR_prologue_allocate_stack:
5479 case CODE_FOR_epilogue_deallocate_stack:
5480 ret = XVECEXP (x, 0, 0);
5481 break;
5482
5483 default:
5484 ret = single_set_2 (insn, x);
5485 break;
5486 }
5487
5488 return ret;
5489 }
5490
5491 /* Adjust the cost of a scheduling dependency. Return the new cost of
5492 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5493
5494 static int
5495 ia64_adjust_cost (insn, link, dep_insn, cost)
5496 rtx insn, link, dep_insn;
5497 int cost;
5498 {
5499 enum attr_type dep_type;
5500 enum attr_itanium_class dep_class;
5501 enum attr_itanium_class insn_class;
5502 rtx dep_set, set, src, addr;
5503
5504 if (GET_CODE (PATTERN (insn)) == CLOBBER
5505 || GET_CODE (PATTERN (insn)) == USE
5506 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5507 || GET_CODE (PATTERN (dep_insn)) == USE
5508 /* @@@ Not accurate for indirect calls. */
5509 || GET_CODE (insn) == CALL_INSN
5510 || ia64_safe_type (insn) == TYPE_S)
5511 return 0;
5512
5513 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5514 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5515 return 0;
5516
5517 dep_type = ia64_safe_type (dep_insn);
5518 dep_class = ia64_safe_itanium_class (dep_insn);
5519 insn_class = ia64_safe_itanium_class (insn);
5520
5521 /* Compares that feed a conditional branch can execute in the same
5522 cycle. */
5523 dep_set = ia64_single_set (dep_insn);
5524 set = ia64_single_set (insn);
5525
5526 if (dep_type != TYPE_F
5527 && dep_set
5528 && GET_CODE (SET_DEST (dep_set)) == REG
5529 && PR_REG (REGNO (SET_DEST (dep_set)))
5530 && GET_CODE (insn) == JUMP_INSN)
5531 return 0;
5532
5533 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5534 {
5535 /* ??? Can't find any information in the documenation about whether
5536 a sequence
5537 st [rx] = ra
5538 ld rb = [ry]
5539 splits issue. Assume it doesn't. */
5540 return 0;
5541 }
5542
5543 src = set ? SET_SRC (set) : 0;
5544 addr = 0;
5545 if (set)
5546 {
5547 if (GET_CODE (SET_DEST (set)) == MEM)
5548 addr = XEXP (SET_DEST (set), 0);
5549 else if (GET_CODE (SET_DEST (set)) == SUBREG
5550 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5551 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5552 else
5553 {
5554 addr = src;
5555 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5556 addr = XVECEXP (addr, 0, 0);
5557 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5558 addr = XEXP (addr, 0);
5559 if (GET_CODE (addr) == MEM)
5560 addr = XEXP (addr, 0);
5561 else
5562 addr = 0;
5563 }
5564 }
5565
5566 if (addr && GET_CODE (addr) == POST_MODIFY)
5567 addr = XEXP (addr, 0);
5568
5569 set = ia64_single_set (dep_insn);
5570
5571 if ((dep_class == ITANIUM_CLASS_IALU
5572 || dep_class == ITANIUM_CLASS_ILOG
5573 || dep_class == ITANIUM_CLASS_LD)
5574 && (insn_class == ITANIUM_CLASS_LD
5575 || insn_class == ITANIUM_CLASS_ST))
5576 {
5577 if (! addr || ! set)
5578 abort ();
5579 /* This isn't completely correct - an IALU that feeds an address has
5580 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5581 otherwise. Unfortunately there's no good way to describe this. */
5582 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5583 return cost + 1;
5584 }
5585
5586 if ((dep_class == ITANIUM_CLASS_IALU
5587 || dep_class == ITANIUM_CLASS_ILOG
5588 || dep_class == ITANIUM_CLASS_LD)
5589 && (insn_class == ITANIUM_CLASS_MMMUL
5590 || insn_class == ITANIUM_CLASS_MMSHF
5591 || insn_class == ITANIUM_CLASS_MMSHFI))
5592 return 3;
5593
5594 if (dep_class == ITANIUM_CLASS_FMAC
5595 && (insn_class == ITANIUM_CLASS_FMISC
5596 || insn_class == ITANIUM_CLASS_FCVTFX
5597 || insn_class == ITANIUM_CLASS_XMPY))
5598 return 7;
5599
5600 if ((dep_class == ITANIUM_CLASS_FMAC
5601 || dep_class == ITANIUM_CLASS_FMISC
5602 || dep_class == ITANIUM_CLASS_FCVTFX
5603 || dep_class == ITANIUM_CLASS_XMPY)
5604 && insn_class == ITANIUM_CLASS_STF)
5605 return 8;
5606
5607 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5608 but HP engineers say any non-MM operation. */
5609 if ((dep_class == ITANIUM_CLASS_MMMUL
5610 || dep_class == ITANIUM_CLASS_MMSHF
5611 || dep_class == ITANIUM_CLASS_MMSHFI)
5612 && insn_class != ITANIUM_CLASS_MMMUL
5613 && insn_class != ITANIUM_CLASS_MMSHF
5614 && insn_class != ITANIUM_CLASS_MMSHFI)
5615 return 4;
5616
5617 return cost;
5618 }
5619
5620 /* Describe the current state of the Itanium pipeline. */
5621 static struct
5622 {
5623 /* The first slot that is used in the current cycle. */
5624 int first_slot;
5625 /* The next slot to fill. */
5626 int cur;
5627 /* The packet we have selected for the current issue window. */
5628 const struct ia64_packet *packet;
5629 /* The position of the split issue that occurs due to issue width
5630 limitations (6 if there's no split issue). */
5631 int split;
5632 /* Record data about the insns scheduled so far in the same issue
5633 window. The elements up to but not including FIRST_SLOT belong
5634 to the previous cycle, the ones starting with FIRST_SLOT belong
5635 to the current cycle. */
5636 enum attr_type types[6];
5637 rtx insns[6];
5638 int stopbit[6];
5639 /* Nonzero if we decided to schedule a stop bit. */
5640 int last_was_stop;
5641 } sched_data;
5642
5643 /* Temporary arrays; they have enough elements to hold all insns that
5644 can be ready at the same time while scheduling of the current block.
5645 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5646 static rtx *sched_ready;
5647 static enum attr_type *sched_types;
5648
5649 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5650 of packet P. */
5651
5652 static int
5653 insn_matches_slot (p, itype, slot, insn)
5654 const struct ia64_packet *p;
5655 enum attr_type itype;
5656 int slot;
5657 rtx insn;
5658 {
5659 enum attr_itanium_requires_unit0 u0;
5660 enum attr_type stype = p->t[slot];
5661
5662 if (insn)
5663 {
5664 u0 = ia64_safe_itanium_requires_unit0 (insn);
5665 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5666 {
5667 int i;
5668 for (i = sched_data.first_slot; i < slot; i++)
5669 if (p->t[i] == stype
5670 || (stype == TYPE_F && p->t[i] == TYPE_L)
5671 || (stype == TYPE_I && p->t[i] == TYPE_X))
5672 return 0;
5673 }
5674 if (GET_CODE (insn) == CALL_INSN)
5675 {
5676 /* Reject calls in multiway branch packets. We want to limit
5677 the number of multiway branches we generate (since the branch
5678 predictor is limited), and this seems to work fairly well.
5679 (If we didn't do this, we'd have to add another test here to
5680 force calls into the third slot of the bundle.) */
5681 if (slot < 3)
5682 {
5683 if (p->t[1] == TYPE_B)
5684 return 0;
5685 }
5686 else
5687 {
5688 if (p->t[4] == TYPE_B)
5689 return 0;
5690 }
5691 }
5692 }
5693
5694 if (itype == stype)
5695 return 1;
5696 if (itype == TYPE_A)
5697 return stype == TYPE_M || stype == TYPE_I;
5698 return 0;
5699 }
5700
5701 /* Like emit_insn_before, but skip cycle_display notes.
5702 ??? When cycle display notes are implemented, update this. */
5703
5704 static void
5705 ia64_emit_insn_before (insn, before)
5706 rtx insn, before;
5707 {
5708 emit_insn_before (insn, before);
5709 }
5710
5711 /* When rotating a bundle out of the issue window, insert a bundle selector
5712 insn in front of it. DUMP is the scheduling dump file or NULL. START
5713 is either 0 or 3, depending on whether we want to emit a bundle selector
5714 for the first bundle or the second bundle in the current issue window.
5715
5716 The selector insns are emitted this late because the selected packet can
5717 be changed until parts of it get rotated out. */
5718
5719 static void
5720 finish_last_head (dump, start)
5721 FILE *dump;
5722 int start;
5723 {
5724 const struct ia64_packet *p = sched_data.packet;
5725 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5726 int bundle_type = b - bundle;
5727 rtx insn;
5728 int i;
5729
5730 if (! ia64_final_schedule)
5731 return;
5732
5733 for (i = start; sched_data.insns[i] == 0; i++)
5734 if (i == start + 3)
5735 abort ();
5736 insn = sched_data.insns[i];
5737
5738 if (dump)
5739 fprintf (dump, "// Emitting template before %d: %s\n",
5740 INSN_UID (insn), b->name);
5741
5742 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5743 }
5744
5745 /* We can't schedule more insns this cycle. Fix up the scheduling state
5746 and advance FIRST_SLOT and CUR.
5747 We have to distribute the insns that are currently found between
5748 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5749 far, they are stored successively in the fields starting at FIRST_SLOT;
5750 now they must be moved to the correct slots.
5751 DUMP is the current scheduling dump file, or NULL. */
5752
5753 static void
5754 cycle_end_fill_slots (dump)
5755 FILE *dump;
5756 {
5757 const struct ia64_packet *packet = sched_data.packet;
5758 int slot, i;
5759 enum attr_type tmp_types[6];
5760 rtx tmp_insns[6];
5761
5762 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5763 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5764
5765 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5766 {
5767 enum attr_type t = tmp_types[i];
5768 if (t != ia64_safe_type (tmp_insns[i]))
5769 abort ();
5770 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5771 {
5772 if (slot > sched_data.split)
5773 abort ();
5774 if (dump)
5775 fprintf (dump, "// Packet needs %s, have %s\n",
5776 type_names[packet->t[slot]], type_names[t]);
5777 sched_data.types[slot] = packet->t[slot];
5778 sched_data.insns[slot] = 0;
5779 sched_data.stopbit[slot] = 0;
5780
5781 /* ??? TYPE_L instructions always fill up two slots, but we don't
5782 support TYPE_L nops. */
5783 if (packet->t[slot] == TYPE_L)
5784 abort ();
5785
5786 slot++;
5787 }
5788
5789 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5790 actual slot type later. */
5791 sched_data.types[slot] = packet->t[slot];
5792 sched_data.insns[slot] = tmp_insns[i];
5793 sched_data.stopbit[slot] = 0;
5794 slot++;
5795
5796 /* TYPE_L instructions always fill up two slots. */
5797 if (t == TYPE_L)
5798 {
5799 sched_data.types[slot] = packet->t[slot];
5800 sched_data.insns[slot] = 0;
5801 sched_data.stopbit[slot] = 0;
5802 slot++;
5803 }
5804 }
5805
5806 /* This isn't right - there's no need to pad out until the forced split;
5807 the CPU will automatically split if an insn isn't ready. */
5808 #if 0
5809 while (slot < sched_data.split)
5810 {
5811 sched_data.types[slot] = packet->t[slot];
5812 sched_data.insns[slot] = 0;
5813 sched_data.stopbit[slot] = 0;
5814 slot++;
5815 }
5816 #endif
5817
5818 sched_data.first_slot = sched_data.cur = slot;
5819 }
5820
5821 /* Bundle rotations, as described in the Itanium optimization manual.
5822 We can rotate either one or both bundles out of the issue window.
5823 DUMP is the current scheduling dump file, or NULL. */
5824
5825 static void
5826 rotate_one_bundle (dump)
5827 FILE *dump;
5828 {
5829 if (dump)
5830 fprintf (dump, "// Rotating one bundle.\n");
5831
5832 finish_last_head (dump, 0);
5833 if (sched_data.cur > 3)
5834 {
5835 sched_data.cur -= 3;
5836 sched_data.first_slot -= 3;
5837 memmove (sched_data.types,
5838 sched_data.types + 3,
5839 sched_data.cur * sizeof *sched_data.types);
5840 memmove (sched_data.stopbit,
5841 sched_data.stopbit + 3,
5842 sched_data.cur * sizeof *sched_data.stopbit);
5843 memmove (sched_data.insns,
5844 sched_data.insns + 3,
5845 sched_data.cur * sizeof *sched_data.insns);
5846 sched_data.packet
5847 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
5848 }
5849 else
5850 {
5851 sched_data.cur = 0;
5852 sched_data.first_slot = 0;
5853 }
5854 }
5855
5856 static void
5857 rotate_two_bundles (dump)
5858 FILE *dump;
5859 {
5860 if (dump)
5861 fprintf (dump, "// Rotating two bundles.\n");
5862
5863 if (sched_data.cur == 0)
5864 return;
5865
5866 finish_last_head (dump, 0);
5867 if (sched_data.cur > 3)
5868 finish_last_head (dump, 3);
5869 sched_data.cur = 0;
5870 sched_data.first_slot = 0;
5871 }
5872
5873 /* We're beginning a new block. Initialize data structures as necessary. */
5874
5875 static void
5876 ia64_sched_init (dump, sched_verbose, max_ready)
5877 FILE *dump ATTRIBUTE_UNUSED;
5878 int sched_verbose ATTRIBUTE_UNUSED;
5879 int max_ready;
5880 {
5881 static int initialized = 0;
5882
5883 if (! initialized)
5884 {
5885 int b1, b2, i;
5886
5887 initialized = 1;
5888
5889 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5890 {
5891 const struct bundle *t1 = bundle + b1;
5892 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5893 {
5894 const struct bundle *t2 = bundle + b2;
5895
5896 packets[i].t1 = t1;
5897 packets[i].t2 = t2;
5898 }
5899 }
5900 for (i = 0; i < NR_PACKETS; i++)
5901 {
5902 int j;
5903 for (j = 0; j < 3; j++)
5904 packets[i].t[j] = packets[i].t1->t[j];
5905 for (j = 0; j < 3; j++)
5906 packets[i].t[j + 3] = packets[i].t2->t[j];
5907 packets[i].first_split = itanium_split_issue (packets + i, 0);
5908 }
5909
5910 }
5911
5912 init_insn_group_barriers ();
5913
5914 memset (&sched_data, 0, sizeof sched_data);
5915 sched_types = (enum attr_type *) xmalloc (max_ready
5916 * sizeof (enum attr_type));
5917 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5918 }
5919
5920 /* See if the packet P can match the insns we have already scheduled. Return
5921 nonzero if so. In *PSLOT, we store the first slot that is available for
5922 more instructions if we choose this packet.
5923 SPLIT holds the last slot we can use, there's a split issue after it so
5924 scheduling beyond it would cause us to use more than one cycle. */
5925
5926 static int
5927 packet_matches_p (p, split, pslot)
5928 const struct ia64_packet *p;
5929 int split;
5930 int *pslot;
5931 {
5932 int filled = sched_data.cur;
5933 int first = sched_data.first_slot;
5934 int i, slot;
5935
5936 /* First, check if the first of the two bundles must be a specific one (due
5937 to stop bits). */
5938 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5939 return 0;
5940 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5941 return 0;
5942
5943 for (i = 0; i < first; i++)
5944 if (! insn_matches_slot (p, sched_data.types[i], i,
5945 sched_data.insns[i]))
5946 return 0;
5947 for (i = slot = first; i < filled; i++)
5948 {
5949 while (slot < split)
5950 {
5951 if (insn_matches_slot (p, sched_data.types[i], slot,
5952 sched_data.insns[i]))
5953 break;
5954 slot++;
5955 }
5956 if (slot == split)
5957 return 0;
5958 slot++;
5959 }
5960
5961 if (pslot)
5962 *pslot = slot;
5963 return 1;
5964 }
5965
5966 /* A frontend for itanium_split_issue. For a packet P and a slot
5967 number FIRST that describes the start of the current clock cycle,
5968 return the slot number of the first split issue. This function
5969 uses the cached number found in P if possible. */
5970
5971 static int
5972 get_split (p, first)
5973 const struct ia64_packet *p;
5974 int first;
5975 {
5976 if (first == 0)
5977 return p->first_split;
5978 return itanium_split_issue (p, first);
5979 }
5980
5981 /* Given N_READY insns in the array READY, whose types are found in the
5982 corresponding array TYPES, return the insn that is best suited to be
5983 scheduled in slot SLOT of packet P. */
5984
5985 static int
5986 find_best_insn (ready, types, n_ready, p, slot)
5987 rtx *ready;
5988 enum attr_type *types;
5989 int n_ready;
5990 const struct ia64_packet *p;
5991 int slot;
5992 {
5993 int best = -1;
5994 int best_pri = 0;
5995 while (n_ready-- > 0)
5996 {
5997 rtx insn = ready[n_ready];
5998 if (! insn)
5999 continue;
6000 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
6001 break;
6002 /* If we have equally good insns, one of which has a stricter
6003 slot requirement, prefer the one with the stricter requirement. */
6004 if (best >= 0 && types[n_ready] == TYPE_A)
6005 continue;
6006 if (insn_matches_slot (p, types[n_ready], slot, insn))
6007 {
6008 best = n_ready;
6009 best_pri = INSN_PRIORITY (ready[best]);
6010
6011 /* If there's no way we could get a stricter requirement, stop
6012 looking now. */
6013 if (types[n_ready] != TYPE_A
6014 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
6015 break;
6016 break;
6017 }
6018 }
6019 return best;
6020 }
6021
6022 /* Select the best packet to use given the current scheduler state and the
6023 current ready list.
6024 READY is an array holding N_READY ready insns; TYPES is a corresponding
6025 array that holds their types. Store the best packet in *PPACKET and the
6026 number of insns that can be scheduled in the current cycle in *PBEST. */
6027
6028 static void
6029 find_best_packet (pbest, ppacket, ready, types, n_ready)
6030 int *pbest;
6031 const struct ia64_packet **ppacket;
6032 rtx *ready;
6033 enum attr_type *types;
6034 int n_ready;
6035 {
6036 int first = sched_data.first_slot;
6037 int best = 0;
6038 int lowest_end = 6;
6039 const struct ia64_packet *best_packet = NULL;
6040 int i;
6041
6042 for (i = 0; i < NR_PACKETS; i++)
6043 {
6044 const struct ia64_packet *p = packets + i;
6045 int slot;
6046 int split = get_split (p, first);
6047 int win = 0;
6048 int first_slot, last_slot;
6049 int b_nops = 0;
6050
6051 if (! packet_matches_p (p, split, &first_slot))
6052 continue;
6053
6054 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
6055
6056 win = 0;
6057 last_slot = 6;
6058 for (slot = first_slot; slot < split; slot++)
6059 {
6060 int insn_nr;
6061
6062 /* Disallow a degenerate case where the first bundle doesn't
6063 contain anything but NOPs! */
6064 if (first_slot == 0 && win == 0 && slot == 3)
6065 {
6066 win = -1;
6067 break;
6068 }
6069
6070 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
6071 if (insn_nr >= 0)
6072 {
6073 sched_ready[insn_nr] = 0;
6074 last_slot = slot;
6075 win++;
6076 }
6077 else if (p->t[slot] == TYPE_B)
6078 b_nops++;
6079 }
6080 /* We must disallow MBB/BBB packets if any of their B slots would be
6081 filled with nops. */
6082 if (last_slot < 3)
6083 {
6084 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
6085 win = -1;
6086 }
6087 else
6088 {
6089 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
6090 win = -1;
6091 }
6092
6093 if (win > best
6094 || (win == best && last_slot < lowest_end))
6095 {
6096 best = win;
6097 lowest_end = last_slot;
6098 best_packet = p;
6099 }
6100 }
6101 *pbest = best;
6102 *ppacket = best_packet;
6103 }
6104
6105 /* Reorder the ready list so that the insns that can be issued in this cycle
6106 are found in the correct order at the end of the list.
6107 DUMP is the scheduling dump file, or NULL. READY points to the start,
6108 E_READY to the end of the ready list. MAY_FAIL determines what should be
6109 done if no insns can be scheduled in this cycle: if it is zero, we abort,
6110 otherwise we return 0.
6111 Return 1 if any insns can be scheduled in this cycle. */
6112
6113 static int
6114 itanium_reorder (dump, ready, e_ready, may_fail)
6115 FILE *dump;
6116 rtx *ready;
6117 rtx *e_ready;
6118 int may_fail;
6119 {
6120 const struct ia64_packet *best_packet;
6121 int n_ready = e_ready - ready;
6122 int first = sched_data.first_slot;
6123 int i, best, best_split, filled;
6124
6125 for (i = 0; i < n_ready; i++)
6126 sched_types[i] = ia64_safe_type (ready[i]);
6127
6128 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
6129
6130 if (best == 0)
6131 {
6132 if (may_fail)
6133 return 0;
6134 abort ();
6135 }
6136
6137 if (dump)
6138 {
6139 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
6140 best_packet->t1->name,
6141 best_packet->t2 ? best_packet->t2->name : NULL, best);
6142 }
6143
6144 best_split = itanium_split_issue (best_packet, first);
6145 packet_matches_p (best_packet, best_split, &filled);
6146
6147 for (i = filled; i < best_split; i++)
6148 {
6149 int insn_nr;
6150
6151 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
6152 if (insn_nr >= 0)
6153 {
6154 rtx insn = ready[insn_nr];
6155 memmove (ready + insn_nr, ready + insn_nr + 1,
6156 (n_ready - insn_nr - 1) * sizeof (rtx));
6157 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
6158 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
6159 ready[--n_ready] = insn;
6160 }
6161 }
6162
6163 sched_data.packet = best_packet;
6164 sched_data.split = best_split;
6165 return 1;
6166 }
6167
6168 /* Dump information about the current scheduling state to file DUMP. */
6169
6170 static void
6171 dump_current_packet (dump)
6172 FILE *dump;
6173 {
6174 int i;
6175 fprintf (dump, "// %d slots filled:", sched_data.cur);
6176 for (i = 0; i < sched_data.first_slot; i++)
6177 {
6178 rtx insn = sched_data.insns[i];
6179 fprintf (dump, " %s", type_names[sched_data.types[i]]);
6180 if (insn)
6181 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
6182 if (sched_data.stopbit[i])
6183 fprintf (dump, " ;;");
6184 }
6185 fprintf (dump, " :::");
6186 for (i = sched_data.first_slot; i < sched_data.cur; i++)
6187 {
6188 rtx insn = sched_data.insns[i];
6189 enum attr_type t = ia64_safe_type (insn);
6190 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
6191 }
6192 fprintf (dump, "\n");
6193 }
6194
6195 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
6196 NULL. */
6197
6198 static void
6199 schedule_stop (dump)
6200 FILE *dump;
6201 {
6202 const struct ia64_packet *best = sched_data.packet;
6203 int i;
6204 int best_stop = 6;
6205
6206 if (dump)
6207 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6208
6209 if (sched_data.cur == 0)
6210 {
6211 if (dump)
6212 fprintf (dump, "// At start of bundle, so nothing to do.\n");
6213
6214 rotate_two_bundles (NULL);
6215 return;
6216 }
6217
6218 for (i = -1; i < NR_PACKETS; i++)
6219 {
6220 /* This is a slight hack to give the current packet the first chance.
6221 This is done to avoid e.g. switching from MIB to MBB bundles. */
6222 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6223 int split = get_split (p, sched_data.first_slot);
6224 const struct bundle *compare;
6225 int next, stoppos;
6226
6227 if (! packet_matches_p (p, split, &next))
6228 continue;
6229
6230 compare = next > 3 ? p->t2 : p->t1;
6231
6232 stoppos = 3;
6233 if (compare->possible_stop)
6234 stoppos = compare->possible_stop;
6235 if (next > 3)
6236 stoppos += 3;
6237
6238 if (stoppos < next || stoppos >= best_stop)
6239 {
6240 if (compare->possible_stop == 0)
6241 continue;
6242 stoppos = (next > 3 ? 6 : 3);
6243 }
6244 if (stoppos < next || stoppos >= best_stop)
6245 continue;
6246
6247 if (dump)
6248 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6249 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6250 stoppos);
6251
6252 best_stop = stoppos;
6253 best = p;
6254 }
6255
6256 sched_data.packet = best;
6257 cycle_end_fill_slots (dump);
6258 while (sched_data.cur < best_stop)
6259 {
6260 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6261 sched_data.insns[sched_data.cur] = 0;
6262 sched_data.stopbit[sched_data.cur] = 0;
6263 sched_data.cur++;
6264 }
6265 sched_data.stopbit[sched_data.cur - 1] = 1;
6266 sched_data.first_slot = best_stop;
6267
6268 if (dump)
6269 dump_current_packet (dump);
6270 }
6271
6272 /* If necessary, perform one or two rotations on the scheduling state.
6273 This should only be called if we are starting a new cycle. */
6274
6275 static void
6276 maybe_rotate (dump)
6277 FILE *dump;
6278 {
6279 cycle_end_fill_slots (dump);
6280 if (sched_data.cur == 6)
6281 rotate_two_bundles (dump);
6282 else if (sched_data.cur >= 3)
6283 rotate_one_bundle (dump);
6284 sched_data.first_slot = sched_data.cur;
6285 }
6286
6287 /* The clock cycle when ia64_sched_reorder was last called. */
6288 static int prev_cycle;
6289
6290 /* The first insn scheduled in the previous cycle. This is the saved
6291 value of sched_data.first_slot. */
6292 static int prev_first;
6293
6294 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6295 pad out the delay between MM (shifts, etc.) and integer operations. */
6296
6297 static void
6298 nop_cycles_until (clock_var, dump)
6299 int clock_var;
6300 FILE *dump;
6301 {
6302 int prev_clock = prev_cycle;
6303 int cycles_left = clock_var - prev_clock;
6304 bool did_stop = false;
6305
6306 /* Finish the previous cycle; pad it out with NOPs. */
6307 if (sched_data.cur == 3)
6308 {
6309 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6310 did_stop = true;
6311 maybe_rotate (dump);
6312 }
6313 else if (sched_data.cur > 0)
6314 {
6315 int need_stop = 0;
6316 int split = itanium_split_issue (sched_data.packet, prev_first);
6317
6318 if (sched_data.cur < 3 && split > 3)
6319 {
6320 split = 3;
6321 need_stop = 1;
6322 }
6323
6324 if (split > sched_data.cur)
6325 {
6326 int i;
6327 for (i = sched_data.cur; i < split; i++)
6328 {
6329 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6330 sched_data.types[i] = sched_data.packet->t[i];
6331 sched_data.insns[i] = t;
6332 sched_data.stopbit[i] = 0;
6333 }
6334 sched_data.cur = split;
6335 }
6336
6337 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6338 && cycles_left > 1)
6339 {
6340 int i;
6341 for (i = sched_data.cur; i < 6; i++)
6342 {
6343 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6344 sched_data.types[i] = sched_data.packet->t[i];
6345 sched_data.insns[i] = t;
6346 sched_data.stopbit[i] = 0;
6347 }
6348 sched_data.cur = 6;
6349 cycles_left--;
6350 need_stop = 1;
6351 }
6352
6353 if (need_stop || sched_data.cur == 6)
6354 {
6355 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6356 did_stop = true;
6357 }
6358 maybe_rotate (dump);
6359 }
6360
6361 cycles_left--;
6362 while (cycles_left > 0)
6363 {
6364 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6365 sched_emit_insn (gen_nop_type (TYPE_M));
6366 sched_emit_insn (gen_nop_type (TYPE_I));
6367 if (cycles_left > 1)
6368 {
6369 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6370 cycles_left--;
6371 }
6372 sched_emit_insn (gen_nop_type (TYPE_I));
6373 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6374 did_stop = true;
6375 cycles_left--;
6376 }
6377
6378 if (did_stop)
6379 init_insn_group_barriers ();
6380 }
6381
6382 /* We are about to being issuing insns for this clock cycle.
6383 Override the default sort algorithm to better slot instructions. */
6384
6385 static int
6386 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6387 reorder_type, clock_var)
6388 FILE *dump ATTRIBUTE_UNUSED;
6389 int sched_verbose ATTRIBUTE_UNUSED;
6390 rtx *ready;
6391 int *pn_ready;
6392 int reorder_type, clock_var;
6393 {
6394 int n_asms;
6395 int n_ready = *pn_ready;
6396 rtx *e_ready = ready + n_ready;
6397 rtx *insnp;
6398
6399 if (sched_verbose)
6400 {
6401 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6402 dump_current_packet (dump);
6403 }
6404
6405 /* Work around the pipeline flush that will occurr if the results of
6406 an MM instruction are accessed before the result is ready. Intel
6407 documentation says this only happens with IALU, ISHF, ILOG, LD,
6408 and ST consumers, but experimental evidence shows that *any* non-MM
6409 type instruction will incurr the flush. */
6410 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6411 {
6412 for (insnp = ready; insnp < e_ready; insnp++)
6413 {
6414 rtx insn = *insnp, link;
6415 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6416
6417 if (t == ITANIUM_CLASS_MMMUL
6418 || t == ITANIUM_CLASS_MMSHF
6419 || t == ITANIUM_CLASS_MMSHFI)
6420 continue;
6421
6422 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6423 if (REG_NOTE_KIND (link) == 0)
6424 {
6425 rtx other = XEXP (link, 0);
6426 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6427 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6428 {
6429 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6430 goto out;
6431 }
6432 }
6433 }
6434 }
6435 out:
6436
6437 prev_first = sched_data.first_slot;
6438 prev_cycle = clock_var;
6439
6440 if (reorder_type == 0)
6441 maybe_rotate (sched_verbose ? dump : NULL);
6442
6443 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6444 n_asms = 0;
6445 for (insnp = ready; insnp < e_ready; insnp++)
6446 if (insnp < e_ready)
6447 {
6448 rtx insn = *insnp;
6449 enum attr_type t = ia64_safe_type (insn);
6450 if (t == TYPE_UNKNOWN)
6451 {
6452 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6453 || asm_noperands (PATTERN (insn)) >= 0)
6454 {
6455 rtx lowest = ready[n_asms];
6456 ready[n_asms] = insn;
6457 *insnp = lowest;
6458 n_asms++;
6459 }
6460 else
6461 {
6462 rtx highest = ready[n_ready - 1];
6463 ready[n_ready - 1] = insn;
6464 *insnp = highest;
6465 if (ia64_final_schedule && group_barrier_needed_p (insn))
6466 {
6467 schedule_stop (sched_verbose ? dump : NULL);
6468 sched_data.last_was_stop = 1;
6469 maybe_rotate (sched_verbose ? dump : NULL);
6470 }
6471
6472 return 1;
6473 }
6474 }
6475 }
6476 if (n_asms < n_ready)
6477 {
6478 /* Some normal insns to process. Skip the asms. */
6479 ready += n_asms;
6480 n_ready -= n_asms;
6481 }
6482 else if (n_ready > 0)
6483 {
6484 /* Only asm insns left. */
6485 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6486 {
6487 schedule_stop (sched_verbose ? dump : NULL);
6488 sched_data.last_was_stop = 1;
6489 maybe_rotate (sched_verbose ? dump : NULL);
6490 }
6491 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6492 return 1;
6493 }
6494
6495 if (ia64_final_schedule)
6496 {
6497 int nr_need_stop = 0;
6498
6499 for (insnp = ready; insnp < e_ready; insnp++)
6500 if (safe_group_barrier_needed_p (*insnp))
6501 nr_need_stop++;
6502
6503 /* Schedule a stop bit if
6504 - all insns require a stop bit, or
6505 - we are starting a new cycle and _any_ insns require a stop bit.
6506 The reason for the latter is that if our schedule is accurate, then
6507 the additional stop won't decrease performance at this point (since
6508 there's a split issue at this point anyway), but it gives us more
6509 freedom when scheduling the currently ready insns. */
6510 if ((reorder_type == 0 && nr_need_stop)
6511 || (reorder_type == 1 && n_ready == nr_need_stop))
6512 {
6513 schedule_stop (sched_verbose ? dump : NULL);
6514 sched_data.last_was_stop = 1;
6515 maybe_rotate (sched_verbose ? dump : NULL);
6516 if (reorder_type == 1)
6517 return 0;
6518 }
6519 else
6520 {
6521 int deleted = 0;
6522 insnp = e_ready;
6523 /* Move down everything that needs a stop bit, preserving relative
6524 order. */
6525 while (insnp-- > ready + deleted)
6526 while (insnp >= ready + deleted)
6527 {
6528 rtx insn = *insnp;
6529 if (! safe_group_barrier_needed_p (insn))
6530 break;
6531 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6532 *ready = insn;
6533 deleted++;
6534 }
6535 n_ready -= deleted;
6536 ready += deleted;
6537 if (deleted != nr_need_stop)
6538 abort ();
6539 }
6540 }
6541
6542 return itanium_reorder (sched_verbose ? dump : NULL,
6543 ready, e_ready, reorder_type == 1);
6544 }
6545
6546 static int
6547 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6548 FILE *dump;
6549 int sched_verbose;
6550 rtx *ready;
6551 int *pn_ready;
6552 int clock_var;
6553 {
6554 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6555 pn_ready, 0, clock_var);
6556 }
6557
6558 /* Like ia64_sched_reorder, but called after issuing each insn.
6559 Override the default sort algorithm to better slot instructions. */
6560
6561 static int
6562 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6563 FILE *dump ATTRIBUTE_UNUSED;
6564 int sched_verbose ATTRIBUTE_UNUSED;
6565 rtx *ready;
6566 int *pn_ready;
6567 int clock_var;
6568 {
6569 if (sched_data.last_was_stop)
6570 return 0;
6571
6572 /* Detect one special case and try to optimize it.
6573 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6574 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6575 if (sched_data.first_slot == 1
6576 && sched_data.stopbit[0]
6577 && ((sched_data.cur == 4
6578 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6579 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6580 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6581 || (sched_data.cur == 3
6582 && (sched_data.types[1] == TYPE_M
6583 || sched_data.types[1] == TYPE_A)
6584 && (sched_data.types[2] != TYPE_M
6585 && sched_data.types[2] != TYPE_I
6586 && sched_data.types[2] != TYPE_A))))
6587
6588 {
6589 int i, best;
6590 rtx stop = sched_data.insns[1];
6591
6592 /* Search backward for the stop bit that must be there. */
6593 while (1)
6594 {
6595 int insn_code;
6596
6597 stop = PREV_INSN (stop);
6598 if (GET_CODE (stop) != INSN)
6599 abort ();
6600 insn_code = recog_memoized (stop);
6601
6602 /* Ignore .pred.rel.mutex.
6603
6604 ??? Update this to ignore cycle display notes too
6605 ??? once those are implemented */
6606 if (insn_code == CODE_FOR_pred_rel_mutex
6607 || insn_code == CODE_FOR_prologue_use)
6608 continue;
6609
6610 if (insn_code == CODE_FOR_insn_group_barrier)
6611 break;
6612 abort ();
6613 }
6614
6615 /* Adjust the stop bit's slot selector. */
6616 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6617 abort ();
6618 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6619
6620 sched_data.stopbit[0] = 0;
6621 sched_data.stopbit[2] = 1;
6622
6623 sched_data.types[5] = sched_data.types[3];
6624 sched_data.types[4] = sched_data.types[2];
6625 sched_data.types[3] = sched_data.types[1];
6626 sched_data.insns[5] = sched_data.insns[3];
6627 sched_data.insns[4] = sched_data.insns[2];
6628 sched_data.insns[3] = sched_data.insns[1];
6629 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6630 sched_data.cur += 2;
6631 sched_data.first_slot = 3;
6632 for (i = 0; i < NR_PACKETS; i++)
6633 {
6634 const struct ia64_packet *p = packets + i;
6635 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6636 {
6637 sched_data.packet = p;
6638 break;
6639 }
6640 }
6641 rotate_one_bundle (sched_verbose ? dump : NULL);
6642
6643 best = 6;
6644 for (i = 0; i < NR_PACKETS; i++)
6645 {
6646 const struct ia64_packet *p = packets + i;
6647 int split = get_split (p, sched_data.first_slot);
6648 int next;
6649
6650 /* Disallow multiway branches here. */
6651 if (p->t[1] == TYPE_B)
6652 continue;
6653
6654 if (packet_matches_p (p, split, &next) && next < best)
6655 {
6656 best = next;
6657 sched_data.packet = p;
6658 sched_data.split = split;
6659 }
6660 }
6661 if (best == 6)
6662 abort ();
6663 }
6664
6665 if (*pn_ready > 0)
6666 {
6667 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6668 ready, pn_ready, 1,
6669 clock_var);
6670 if (more)
6671 return more;
6672 /* Did we schedule a stop? If so, finish this cycle. */
6673 if (sched_data.cur == sched_data.first_slot)
6674 return 0;
6675 }
6676
6677 if (sched_verbose)
6678 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6679
6680 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6681 if (sched_verbose)
6682 dump_current_packet (dump);
6683 return 0;
6684 }
6685
6686 /* We are about to issue INSN. Return the number of insns left on the
6687 ready queue that can be issued this cycle. */
6688
6689 static int
6690 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6691 FILE *dump;
6692 int sched_verbose;
6693 rtx insn;
6694 int can_issue_more ATTRIBUTE_UNUSED;
6695 {
6696 enum attr_type t = ia64_safe_type (insn);
6697
6698 if (sched_data.last_was_stop)
6699 {
6700 int t = sched_data.first_slot;
6701 if (t == 0)
6702 t = 3;
6703 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6704 init_insn_group_barriers ();
6705 sched_data.last_was_stop = 0;
6706 }
6707
6708 if (t == TYPE_UNKNOWN)
6709 {
6710 if (sched_verbose)
6711 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6712 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6713 || asm_noperands (PATTERN (insn)) >= 0)
6714 {
6715 /* This must be some kind of asm. Clear the scheduling state. */
6716 rotate_two_bundles (sched_verbose ? dump : NULL);
6717 if (ia64_final_schedule)
6718 group_barrier_needed_p (insn);
6719 }
6720 return 1;
6721 }
6722
6723 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6724 important state info. Don't delete this test. */
6725 if (ia64_final_schedule
6726 && group_barrier_needed_p (insn))
6727 abort ();
6728
6729 sched_data.stopbit[sched_data.cur] = 0;
6730 sched_data.insns[sched_data.cur] = insn;
6731 sched_data.types[sched_data.cur] = t;
6732
6733 sched_data.cur++;
6734 if (sched_verbose)
6735 fprintf (dump, "// Scheduling insn %d of type %s\n",
6736 INSN_UID (insn), type_names[t]);
6737
6738 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6739 {
6740 schedule_stop (sched_verbose ? dump : NULL);
6741 sched_data.last_was_stop = 1;
6742 }
6743
6744 return 1;
6745 }
6746
6747 /* Free data allocated by ia64_sched_init. */
6748
6749 static void
6750 ia64_sched_finish (dump, sched_verbose)
6751 FILE *dump;
6752 int sched_verbose;
6753 {
6754 if (sched_verbose)
6755 fprintf (dump, "// Finishing schedule.\n");
6756 rotate_two_bundles (NULL);
6757 free (sched_types);
6758 free (sched_ready);
6759 }
6760 \f
6761 /* Emit pseudo-ops for the assembler to describe predicate relations.
6762 At present this assumes that we only consider predicate pairs to
6763 be mutex, and that the assembler can deduce proper values from
6764 straight-line code. */
6765
6766 static void
6767 emit_predicate_relation_info ()
6768 {
6769 basic_block bb;
6770
6771 FOR_EACH_BB_REVERSE (bb)
6772 {
6773 int r;
6774 rtx head = bb->head;
6775
6776 /* We only need such notes at code labels. */
6777 if (GET_CODE (head) != CODE_LABEL)
6778 continue;
6779 if (GET_CODE (NEXT_INSN (head)) == NOTE
6780 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6781 head = NEXT_INSN (head);
6782
6783 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6784 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6785 {
6786 rtx p = gen_rtx_REG (BImode, r);
6787 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6788 if (head == bb->end)
6789 bb->end = n;
6790 head = n;
6791 }
6792 }
6793
6794 /* Look for conditional calls that do not return, and protect predicate
6795 relations around them. Otherwise the assembler will assume the call
6796 returns, and complain about uses of call-clobbered predicates after
6797 the call. */
6798 FOR_EACH_BB_REVERSE (bb)
6799 {
6800 rtx insn = bb->head;
6801
6802 while (1)
6803 {
6804 if (GET_CODE (insn) == CALL_INSN
6805 && GET_CODE (PATTERN (insn)) == COND_EXEC
6806 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6807 {
6808 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6809 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6810 if (bb->head == insn)
6811 bb->head = b;
6812 if (bb->end == insn)
6813 bb->end = a;
6814 }
6815
6816 if (insn == bb->end)
6817 break;
6818 insn = NEXT_INSN (insn);
6819 }
6820 }
6821 }
6822
6823 /* Generate a NOP instruction of type T. We will never generate L type
6824 nops. */
6825
6826 static rtx
6827 gen_nop_type (t)
6828 enum attr_type t;
6829 {
6830 switch (t)
6831 {
6832 case TYPE_M:
6833 return gen_nop_m ();
6834 case TYPE_I:
6835 return gen_nop_i ();
6836 case TYPE_B:
6837 return gen_nop_b ();
6838 case TYPE_F:
6839 return gen_nop_f ();
6840 case TYPE_X:
6841 return gen_nop_x ();
6842 default:
6843 abort ();
6844 }
6845 }
6846
6847 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6848 here than while scheduling. */
6849
6850 static void
6851 ia64_emit_nops ()
6852 {
6853 rtx insn;
6854 const struct bundle *b = 0;
6855 int bundle_pos = 0;
6856
6857 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6858 {
6859 rtx pat;
6860 enum attr_type t;
6861 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6862 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6863 continue;
6864 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
6865 || GET_CODE (insn) == CODE_LABEL)
6866 {
6867 if (b)
6868 while (bundle_pos < 3)
6869 {
6870 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6871 bundle_pos++;
6872 }
6873 if (GET_CODE (insn) != CODE_LABEL)
6874 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6875 else
6876 b = 0;
6877 bundle_pos = 0;
6878 continue;
6879 }
6880 else if (GET_CODE (pat) == UNSPEC_VOLATILE
6881 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
6882 {
6883 int t = INTVAL (XVECEXP (pat, 0, 0));
6884 if (b)
6885 while (bundle_pos < t)
6886 {
6887 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6888 bundle_pos++;
6889 }
6890 continue;
6891 }
6892
6893 if (bundle_pos == 3)
6894 b = 0;
6895
6896 if (b && INSN_P (insn))
6897 {
6898 t = ia64_safe_type (insn);
6899 if (asm_noperands (PATTERN (insn)) >= 0
6900 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6901 {
6902 while (bundle_pos < 3)
6903 {
6904 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6905 bundle_pos++;
6906 }
6907 continue;
6908 }
6909
6910 if (t == TYPE_UNKNOWN)
6911 continue;
6912 while (bundle_pos < 3)
6913 {
6914 if (t == b->t[bundle_pos]
6915 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6916 || b->t[bundle_pos] == TYPE_I)))
6917 break;
6918
6919 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6920 bundle_pos++;
6921 }
6922 if (bundle_pos < 3)
6923 bundle_pos++;
6924 }
6925 }
6926 }
6927
6928 /* Perform machine dependent operations on the rtl chain INSNS. */
6929
6930 void
6931 ia64_reorg (insns)
6932 rtx insns;
6933 {
6934 /* We are freeing block_for_insn in the toplev to keep compatibility
6935 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6936 compute_bb_for_insn (get_max_uid ());
6937
6938 /* If optimizing, we'll have split before scheduling. */
6939 if (optimize == 0)
6940 split_all_insns (0);
6941
6942 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6943 non-optimizing bootstrap. */
6944 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
6945
6946 if (ia64_flag_schedule_insns2)
6947 {
6948 timevar_push (TV_SCHED2);
6949 ia64_final_schedule = 1;
6950 schedule_ebbs (rtl_dump_file);
6951 ia64_final_schedule = 0;
6952 timevar_pop (TV_SCHED2);
6953
6954 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6955 place as they were during scheduling. */
6956 emit_insn_group_barriers (rtl_dump_file, insns);
6957 ia64_emit_nops ();
6958 }
6959 else
6960 emit_all_insn_group_barriers (rtl_dump_file, insns);
6961
6962 /* A call must not be the last instruction in a function, so that the
6963 return address is still within the function, so that unwinding works
6964 properly. Note that IA-64 differs from dwarf2 on this point. */
6965 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6966 {
6967 rtx insn;
6968 int saw_stop = 0;
6969
6970 insn = get_last_insn ();
6971 if (! INSN_P (insn))
6972 insn = prev_active_insn (insn);
6973 if (GET_CODE (insn) == INSN
6974 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6975 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6976 {
6977 saw_stop = 1;
6978 insn = prev_active_insn (insn);
6979 }
6980 if (GET_CODE (insn) == CALL_INSN)
6981 {
6982 if (! saw_stop)
6983 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6984 emit_insn (gen_break_f ());
6985 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6986 }
6987 }
6988
6989 fixup_errata ();
6990 emit_predicate_relation_info ();
6991 }
6992 \f
6993 /* Return true if REGNO is used by the epilogue. */
6994
6995 int
6996 ia64_epilogue_uses (regno)
6997 int regno;
6998 {
6999 switch (regno)
7000 {
7001 case R_GR (1):
7002 /* When a function makes a call through a function descriptor, we
7003 will write a (potentially) new value to "gp". After returning
7004 from such a call, we need to make sure the function restores the
7005 original gp-value, even if the function itself does not use the
7006 gp anymore. */
7007 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
7008
7009 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7010 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7011 /* For functions defined with the syscall_linkage attribute, all
7012 input registers are marked as live at all function exits. This
7013 prevents the register allocator from using the input registers,
7014 which in turn makes it possible to restart a system call after
7015 an interrupt without having to save/restore the input registers.
7016 This also prevents kernel data from leaking to application code. */
7017 return lookup_attribute ("syscall_linkage",
7018 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7019
7020 case R_BR (0):
7021 /* Conditional return patterns can't represent the use of `b0' as
7022 the return address, so we force the value live this way. */
7023 return 1;
7024
7025 case AR_PFS_REGNUM:
7026 /* Likewise for ar.pfs, which is used by br.ret. */
7027 return 1;
7028
7029 default:
7030 return 0;
7031 }
7032 }
7033
7034 /* Return true if REGNO is used by the frame unwinder. */
7035
7036 int
7037 ia64_eh_uses (regno)
7038 int regno;
7039 {
7040 if (! reload_completed)
7041 return 0;
7042
7043 if (current_frame_info.reg_save_b0
7044 && regno == current_frame_info.reg_save_b0)
7045 return 1;
7046 if (current_frame_info.reg_save_pr
7047 && regno == current_frame_info.reg_save_pr)
7048 return 1;
7049 if (current_frame_info.reg_save_ar_pfs
7050 && regno == current_frame_info.reg_save_ar_pfs)
7051 return 1;
7052 if (current_frame_info.reg_save_ar_unat
7053 && regno == current_frame_info.reg_save_ar_unat)
7054 return 1;
7055 if (current_frame_info.reg_save_ar_lc
7056 && regno == current_frame_info.reg_save_ar_lc)
7057 return 1;
7058
7059 return 0;
7060 }
7061 \f
7062 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7063
7064 We add @ to the name if this goes in small data/bss. We can only put
7065 a variable in small data/bss if it is defined in this module or a module
7066 that we are statically linked with. We can't check the second condition,
7067 but TREE_STATIC gives us the first one. */
7068
7069 /* ??? If we had IPA, we could check the second condition. We could support
7070 programmer added section attributes if the variable is not defined in this
7071 module. */
7072
7073 /* ??? See the v850 port for a cleaner way to do this. */
7074
7075 /* ??? We could also support own long data here. Generating movl/add/ld8
7076 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7077 code faster because there is one less load. This also includes incomplete
7078 types which can't go in sdata/sbss. */
7079
7080 static bool
7081 ia64_in_small_data_p (exp)
7082 tree exp;
7083 {
7084 if (TARGET_NO_SDATA)
7085 return false;
7086
7087 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7088 {
7089 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7090 if (strcmp (section, ".sdata") == 0
7091 || strcmp (section, ".sbss") == 0)
7092 return true;
7093 }
7094 else
7095 {
7096 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7097
7098 /* If this is an incomplete type with size 0, then we can't put it
7099 in sdata because it might be too big when completed. */
7100 if (size > 0 && size <= ia64_section_threshold)
7101 return true;
7102 }
7103
7104 return false;
7105 }
7106
7107 static void
7108 ia64_encode_section_info (decl, first)
7109 tree decl;
7110 int first ATTRIBUTE_UNUSED;
7111 {
7112 const char *symbol_str;
7113 bool is_local;
7114 rtx symbol;
7115 char encoding = 0;
7116
7117 if (TREE_CODE (decl) == FUNCTION_DECL)
7118 {
7119 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
7120 return;
7121 }
7122
7123 /* Careful not to prod global register variables. */
7124 if (TREE_CODE (decl) != VAR_DECL
7125 || GET_CODE (DECL_RTL (decl)) != MEM
7126 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
7127 return;
7128
7129 symbol = XEXP (DECL_RTL (decl), 0);
7130 symbol_str = XSTR (symbol, 0);
7131
7132 is_local = (*targetm.binds_local_p) (decl);
7133
7134 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7135 {
7136 enum tls_model kind;
7137 if (!flag_pic)
7138 {
7139 if (is_local)
7140 kind = TLS_MODEL_LOCAL_EXEC;
7141 else
7142 kind = TLS_MODEL_INITIAL_EXEC;
7143 }
7144 else if (is_local)
7145 kind = TLS_MODEL_LOCAL_DYNAMIC;
7146 else
7147 kind = TLS_MODEL_GLOBAL_DYNAMIC;
7148 if (kind < flag_tls_default)
7149 kind = flag_tls_default;
7150
7151 encoding = " GLil"[kind];
7152 }
7153 /* Determine if DECL will wind up in .sdata/.sbss. */
7154 else if (is_local && ia64_in_small_data_p (decl))
7155 encoding = 's';
7156
7157 /* Finally, encode this into the symbol string. */
7158 if (encoding)
7159 {
7160 char *newstr;
7161 size_t len;
7162
7163 if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7164 {
7165 if (encoding == symbol_str[1])
7166 return;
7167 /* ??? Sdata became thread or thread becaome not thread. Lose. */
7168 abort ();
7169 }
7170
7171 len = strlen (symbol_str);
7172 newstr = alloca (len + 3);
7173 newstr[0] = ENCODE_SECTION_INFO_CHAR;
7174 newstr[1] = encoding;
7175 memcpy (newstr + 2, symbol_str, len + 1);
7176
7177 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2);
7178 }
7179
7180 /* This decl is marked as being in small data/bss but it shouldn't be;
7181 one likely explanation for this is that the decl has been moved into
7182 a different section from the one it was in when encode_section_info
7183 was first called. Remove the encoding. */
7184 else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7185 XSTR (symbol, 0) = ggc_strdup (symbol_str + 2);
7186 }
7187
7188 static const char *
7189 ia64_strip_name_encoding (str)
7190 const char *str;
7191 {
7192 if (str[0] == ENCODE_SECTION_INFO_CHAR)
7193 str += 2;
7194 if (str[0] == '*')
7195 str++;
7196 return str;
7197 }
7198 \f
7199 /* Output assembly directives for prologue regions. */
7200
7201 /* The current basic block number. */
7202
7203 static bool last_block;
7204
7205 /* True if we need a copy_state command at the start of the next block. */
7206
7207 static bool need_copy_state;
7208
7209 /* The function emits unwind directives for the start of an epilogue. */
7210
7211 static void
7212 process_epilogue ()
7213 {
7214 /* If this isn't the last block of the function, then we need to label the
7215 current state, and copy it back in at the start of the next block. */
7216
7217 if (!last_block)
7218 {
7219 fprintf (asm_out_file, "\t.label_state 1\n");
7220 need_copy_state = true;
7221 }
7222
7223 fprintf (asm_out_file, "\t.restore sp\n");
7224 }
7225
7226 /* This function processes a SET pattern looking for specific patterns
7227 which result in emitting an assembly directive required for unwinding. */
7228
7229 static int
7230 process_set (asm_out_file, pat)
7231 FILE *asm_out_file;
7232 rtx pat;
7233 {
7234 rtx src = SET_SRC (pat);
7235 rtx dest = SET_DEST (pat);
7236 int src_regno, dest_regno;
7237
7238 /* Look for the ALLOC insn. */
7239 if (GET_CODE (src) == UNSPEC_VOLATILE
7240 && XINT (src, 1) == UNSPECV_ALLOC
7241 && GET_CODE (dest) == REG)
7242 {
7243 dest_regno = REGNO (dest);
7244
7245 /* If this isn't the final destination for ar.pfs, the alloc
7246 shouldn't have been marked frame related. */
7247 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7248 abort ();
7249
7250 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7251 ia64_dbx_register_number (dest_regno));
7252 return 1;
7253 }
7254
7255 /* Look for SP = .... */
7256 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7257 {
7258 if (GET_CODE (src) == PLUS)
7259 {
7260 rtx op0 = XEXP (src, 0);
7261 rtx op1 = XEXP (src, 1);
7262 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7263 {
7264 if (INTVAL (op1) < 0)
7265 {
7266 fputs ("\t.fframe ", asm_out_file);
7267 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7268 -INTVAL (op1));
7269 fputc ('\n', asm_out_file);
7270 }
7271 else
7272 process_epilogue ();
7273 }
7274 else
7275 abort ();
7276 }
7277 else if (GET_CODE (src) == REG
7278 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7279 process_epilogue ();
7280 else
7281 abort ();
7282
7283 return 1;
7284 }
7285
7286 /* Register move we need to look at. */
7287 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7288 {
7289 src_regno = REGNO (src);
7290 dest_regno = REGNO (dest);
7291
7292 switch (src_regno)
7293 {
7294 case BR_REG (0):
7295 /* Saving return address pointer. */
7296 if (dest_regno != current_frame_info.reg_save_b0)
7297 abort ();
7298 fprintf (asm_out_file, "\t.save rp, r%d\n",
7299 ia64_dbx_register_number (dest_regno));
7300 return 1;
7301
7302 case PR_REG (0):
7303 if (dest_regno != current_frame_info.reg_save_pr)
7304 abort ();
7305 fprintf (asm_out_file, "\t.save pr, r%d\n",
7306 ia64_dbx_register_number (dest_regno));
7307 return 1;
7308
7309 case AR_UNAT_REGNUM:
7310 if (dest_regno != current_frame_info.reg_save_ar_unat)
7311 abort ();
7312 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7313 ia64_dbx_register_number (dest_regno));
7314 return 1;
7315
7316 case AR_LC_REGNUM:
7317 if (dest_regno != current_frame_info.reg_save_ar_lc)
7318 abort ();
7319 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7320 ia64_dbx_register_number (dest_regno));
7321 return 1;
7322
7323 case STACK_POINTER_REGNUM:
7324 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7325 || ! frame_pointer_needed)
7326 abort ();
7327 fprintf (asm_out_file, "\t.vframe r%d\n",
7328 ia64_dbx_register_number (dest_regno));
7329 return 1;
7330
7331 default:
7332 /* Everything else should indicate being stored to memory. */
7333 abort ();
7334 }
7335 }
7336
7337 /* Memory store we need to look at. */
7338 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7339 {
7340 long off;
7341 rtx base;
7342 const char *saveop;
7343
7344 if (GET_CODE (XEXP (dest, 0)) == REG)
7345 {
7346 base = XEXP (dest, 0);
7347 off = 0;
7348 }
7349 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7350 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7351 {
7352 base = XEXP (XEXP (dest, 0), 0);
7353 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7354 }
7355 else
7356 abort ();
7357
7358 if (base == hard_frame_pointer_rtx)
7359 {
7360 saveop = ".savepsp";
7361 off = - off;
7362 }
7363 else if (base == stack_pointer_rtx)
7364 saveop = ".savesp";
7365 else
7366 abort ();
7367
7368 src_regno = REGNO (src);
7369 switch (src_regno)
7370 {
7371 case BR_REG (0):
7372 if (current_frame_info.reg_save_b0 != 0)
7373 abort ();
7374 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7375 return 1;
7376
7377 case PR_REG (0):
7378 if (current_frame_info.reg_save_pr != 0)
7379 abort ();
7380 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7381 return 1;
7382
7383 case AR_LC_REGNUM:
7384 if (current_frame_info.reg_save_ar_lc != 0)
7385 abort ();
7386 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7387 return 1;
7388
7389 case AR_PFS_REGNUM:
7390 if (current_frame_info.reg_save_ar_pfs != 0)
7391 abort ();
7392 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7393 return 1;
7394
7395 case AR_UNAT_REGNUM:
7396 if (current_frame_info.reg_save_ar_unat != 0)
7397 abort ();
7398 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7399 return 1;
7400
7401 case GR_REG (4):
7402 case GR_REG (5):
7403 case GR_REG (6):
7404 case GR_REG (7):
7405 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7406 1 << (src_regno - GR_REG (4)));
7407 return 1;
7408
7409 case BR_REG (1):
7410 case BR_REG (2):
7411 case BR_REG (3):
7412 case BR_REG (4):
7413 case BR_REG (5):
7414 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7415 1 << (src_regno - BR_REG (1)));
7416 return 1;
7417
7418 case FR_REG (2):
7419 case FR_REG (3):
7420 case FR_REG (4):
7421 case FR_REG (5):
7422 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7423 1 << (src_regno - FR_REG (2)));
7424 return 1;
7425
7426 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7427 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7428 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7429 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7430 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7431 1 << (src_regno - FR_REG (12)));
7432 return 1;
7433
7434 default:
7435 return 0;
7436 }
7437 }
7438
7439 return 0;
7440 }
7441
7442
7443 /* This function looks at a single insn and emits any directives
7444 required to unwind this insn. */
7445 void
7446 process_for_unwind_directive (asm_out_file, insn)
7447 FILE *asm_out_file;
7448 rtx insn;
7449 {
7450 if (flag_unwind_tables
7451 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7452 {
7453 rtx pat;
7454
7455 if (GET_CODE (insn) == NOTE
7456 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7457 {
7458 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7459
7460 /* Restore unwind state from immediately before the epilogue. */
7461 if (need_copy_state)
7462 {
7463 fprintf (asm_out_file, "\t.body\n");
7464 fprintf (asm_out_file, "\t.copy_state 1\n");
7465 need_copy_state = false;
7466 }
7467 }
7468
7469 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7470 return;
7471
7472 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7473 if (pat)
7474 pat = XEXP (pat, 0);
7475 else
7476 pat = PATTERN (insn);
7477
7478 switch (GET_CODE (pat))
7479 {
7480 case SET:
7481 process_set (asm_out_file, pat);
7482 break;
7483
7484 case PARALLEL:
7485 {
7486 int par_index;
7487 int limit = XVECLEN (pat, 0);
7488 for (par_index = 0; par_index < limit; par_index++)
7489 {
7490 rtx x = XVECEXP (pat, 0, par_index);
7491 if (GET_CODE (x) == SET)
7492 process_set (asm_out_file, x);
7493 }
7494 break;
7495 }
7496
7497 default:
7498 abort ();
7499 }
7500 }
7501 }
7502
7503 \f
7504 void
7505 ia64_init_builtins ()
7506 {
7507 tree psi_type_node = build_pointer_type (integer_type_node);
7508 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7509 tree endlink = void_list_node;
7510
7511 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7512 tree si_ftype_psi_si_si
7513 = build_function_type (integer_type_node,
7514 tree_cons (NULL_TREE, psi_type_node,
7515 tree_cons (NULL_TREE, integer_type_node,
7516 tree_cons (NULL_TREE,
7517 integer_type_node,
7518 endlink))));
7519
7520 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7521 tree di_ftype_pdi_di_di
7522 = build_function_type (long_integer_type_node,
7523 tree_cons (NULL_TREE, pdi_type_node,
7524 tree_cons (NULL_TREE,
7525 long_integer_type_node,
7526 tree_cons (NULL_TREE,
7527 long_integer_type_node,
7528 endlink))));
7529 /* __sync_synchronize */
7530 tree void_ftype_void
7531 = build_function_type (void_type_node, endlink);
7532
7533 /* __sync_lock_test_and_set_si */
7534 tree si_ftype_psi_si
7535 = build_function_type (integer_type_node,
7536 tree_cons (NULL_TREE, psi_type_node,
7537 tree_cons (NULL_TREE, integer_type_node, endlink)));
7538
7539 /* __sync_lock_test_and_set_di */
7540 tree di_ftype_pdi_di
7541 = build_function_type (long_integer_type_node,
7542 tree_cons (NULL_TREE, pdi_type_node,
7543 tree_cons (NULL_TREE, long_integer_type_node,
7544 endlink)));
7545
7546 /* __sync_lock_release_si */
7547 tree void_ftype_psi
7548 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7549 endlink));
7550
7551 /* __sync_lock_release_di */
7552 tree void_ftype_pdi
7553 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7554 endlink));
7555
7556 #define def_builtin(name, type, code) \
7557 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7558
7559 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7560 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7561 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7562 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7563 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7564 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7565 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7566 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7567
7568 def_builtin ("__sync_synchronize", void_ftype_void,
7569 IA64_BUILTIN_SYNCHRONIZE);
7570
7571 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7572 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7573 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7574 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7575 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7576 IA64_BUILTIN_LOCK_RELEASE_SI);
7577 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7578 IA64_BUILTIN_LOCK_RELEASE_DI);
7579
7580 def_builtin ("__builtin_ia64_bsp",
7581 build_function_type (ptr_type_node, endlink),
7582 IA64_BUILTIN_BSP);
7583
7584 def_builtin ("__builtin_ia64_flushrs",
7585 build_function_type (void_type_node, endlink),
7586 IA64_BUILTIN_FLUSHRS);
7587
7588 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7589 IA64_BUILTIN_FETCH_AND_ADD_SI);
7590 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7591 IA64_BUILTIN_FETCH_AND_SUB_SI);
7592 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7593 IA64_BUILTIN_FETCH_AND_OR_SI);
7594 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7595 IA64_BUILTIN_FETCH_AND_AND_SI);
7596 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7597 IA64_BUILTIN_FETCH_AND_XOR_SI);
7598 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7599 IA64_BUILTIN_FETCH_AND_NAND_SI);
7600
7601 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7602 IA64_BUILTIN_ADD_AND_FETCH_SI);
7603 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7604 IA64_BUILTIN_SUB_AND_FETCH_SI);
7605 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7606 IA64_BUILTIN_OR_AND_FETCH_SI);
7607 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7608 IA64_BUILTIN_AND_AND_FETCH_SI);
7609 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7610 IA64_BUILTIN_XOR_AND_FETCH_SI);
7611 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7612 IA64_BUILTIN_NAND_AND_FETCH_SI);
7613
7614 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7615 IA64_BUILTIN_FETCH_AND_ADD_DI);
7616 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7617 IA64_BUILTIN_FETCH_AND_SUB_DI);
7618 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7619 IA64_BUILTIN_FETCH_AND_OR_DI);
7620 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7621 IA64_BUILTIN_FETCH_AND_AND_DI);
7622 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7623 IA64_BUILTIN_FETCH_AND_XOR_DI);
7624 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7625 IA64_BUILTIN_FETCH_AND_NAND_DI);
7626
7627 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7628 IA64_BUILTIN_ADD_AND_FETCH_DI);
7629 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7630 IA64_BUILTIN_SUB_AND_FETCH_DI);
7631 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7632 IA64_BUILTIN_OR_AND_FETCH_DI);
7633 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7634 IA64_BUILTIN_AND_AND_FETCH_DI);
7635 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7636 IA64_BUILTIN_XOR_AND_FETCH_DI);
7637 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7638 IA64_BUILTIN_NAND_AND_FETCH_DI);
7639
7640 #undef def_builtin
7641 }
7642
7643 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7644
7645 mf
7646 tmp = [ptr];
7647 do {
7648 ret = tmp;
7649 ar.ccv = tmp;
7650 tmp <op>= value;
7651 cmpxchgsz.acq tmp = [ptr], tmp
7652 } while (tmp != ret)
7653 */
7654
7655 static rtx
7656 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7657 optab binoptab;
7658 enum machine_mode mode;
7659 tree arglist;
7660 rtx target;
7661 {
7662 rtx ret, label, tmp, ccv, insn, mem, value;
7663 tree arg0, arg1;
7664
7665 arg0 = TREE_VALUE (arglist);
7666 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7667 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7668 value = expand_expr (arg1, NULL_RTX, mode, 0);
7669
7670 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7671 MEM_VOLATILE_P (mem) = 1;
7672
7673 if (target && register_operand (target, mode))
7674 ret = target;
7675 else
7676 ret = gen_reg_rtx (mode);
7677
7678 emit_insn (gen_mf ());
7679
7680 /* Special case for fetchadd instructions. */
7681 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7682 {
7683 if (mode == SImode)
7684 insn = gen_fetchadd_acq_si (ret, mem, value);
7685 else
7686 insn = gen_fetchadd_acq_di (ret, mem, value);
7687 emit_insn (insn);
7688 return ret;
7689 }
7690
7691 tmp = gen_reg_rtx (mode);
7692 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7693 emit_move_insn (tmp, mem);
7694
7695 label = gen_label_rtx ();
7696 emit_label (label);
7697 emit_move_insn (ret, tmp);
7698 emit_move_insn (ccv, tmp);
7699
7700 /* Perform the specific operation. Special case NAND by noticing
7701 one_cmpl_optab instead. */
7702 if (binoptab == one_cmpl_optab)
7703 {
7704 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7705 binoptab = and_optab;
7706 }
7707 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7708
7709 if (mode == SImode)
7710 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7711 else
7712 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7713 emit_insn (insn);
7714
7715 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7716
7717 return ret;
7718 }
7719
7720 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7721
7722 mf
7723 tmp = [ptr];
7724 do {
7725 old = tmp;
7726 ar.ccv = tmp;
7727 ret = tmp + value;
7728 cmpxchgsz.acq tmp = [ptr], ret
7729 } while (tmp != old)
7730 */
7731
7732 static rtx
7733 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7734 optab binoptab;
7735 enum machine_mode mode;
7736 tree arglist;
7737 rtx target;
7738 {
7739 rtx old, label, tmp, ret, ccv, insn, mem, value;
7740 tree arg0, arg1;
7741
7742 arg0 = TREE_VALUE (arglist);
7743 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7744 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7745 value = expand_expr (arg1, NULL_RTX, mode, 0);
7746
7747 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7748 MEM_VOLATILE_P (mem) = 1;
7749
7750 if (target && ! register_operand (target, mode))
7751 target = NULL_RTX;
7752
7753 emit_insn (gen_mf ());
7754 tmp = gen_reg_rtx (mode);
7755 old = gen_reg_rtx (mode);
7756 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7757
7758 emit_move_insn (tmp, mem);
7759
7760 label = gen_label_rtx ();
7761 emit_label (label);
7762 emit_move_insn (old, tmp);
7763 emit_move_insn (ccv, tmp);
7764
7765 /* Perform the specific operation. Special case NAND by noticing
7766 one_cmpl_optab instead. */
7767 if (binoptab == one_cmpl_optab)
7768 {
7769 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7770 binoptab = and_optab;
7771 }
7772 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7773
7774 if (mode == SImode)
7775 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7776 else
7777 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7778 emit_insn (insn);
7779
7780 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7781
7782 return ret;
7783 }
7784
7785 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7786
7787 ar.ccv = oldval
7788 mf
7789 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7790 return ret
7791
7792 For bool_ it's the same except return ret == oldval.
7793 */
7794
7795 static rtx
7796 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7797 enum machine_mode mode;
7798 int boolp;
7799 tree arglist;
7800 rtx target;
7801 {
7802 tree arg0, arg1, arg2;
7803 rtx mem, old, new, ccv, tmp, insn;
7804
7805 arg0 = TREE_VALUE (arglist);
7806 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7807 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7808 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7809 old = expand_expr (arg1, NULL_RTX, mode, 0);
7810 new = expand_expr (arg2, NULL_RTX, mode, 0);
7811
7812 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7813 MEM_VOLATILE_P (mem) = 1;
7814
7815 if (! register_operand (old, mode))
7816 old = copy_to_mode_reg (mode, old);
7817 if (! register_operand (new, mode))
7818 new = copy_to_mode_reg (mode, new);
7819
7820 if (! boolp && target && register_operand (target, mode))
7821 tmp = target;
7822 else
7823 tmp = gen_reg_rtx (mode);
7824
7825 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7826 emit_move_insn (ccv, old);
7827 emit_insn (gen_mf ());
7828 if (mode == SImode)
7829 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7830 else
7831 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7832 emit_insn (insn);
7833
7834 if (boolp)
7835 {
7836 if (! target)
7837 target = gen_reg_rtx (mode);
7838 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7839 }
7840 else
7841 return tmp;
7842 }
7843
7844 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7845
7846 static rtx
7847 ia64_expand_lock_test_and_set (mode, arglist, target)
7848 enum machine_mode mode;
7849 tree arglist;
7850 rtx target;
7851 {
7852 tree arg0, arg1;
7853 rtx mem, new, ret, insn;
7854
7855 arg0 = TREE_VALUE (arglist);
7856 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7857 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7858 new = expand_expr (arg1, NULL_RTX, mode, 0);
7859
7860 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7861 MEM_VOLATILE_P (mem) = 1;
7862 if (! register_operand (new, mode))
7863 new = copy_to_mode_reg (mode, new);
7864
7865 if (target && register_operand (target, mode))
7866 ret = target;
7867 else
7868 ret = gen_reg_rtx (mode);
7869
7870 if (mode == SImode)
7871 insn = gen_xchgsi (ret, mem, new);
7872 else
7873 insn = gen_xchgdi (ret, mem, new);
7874 emit_insn (insn);
7875
7876 return ret;
7877 }
7878
7879 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7880
7881 static rtx
7882 ia64_expand_lock_release (mode, arglist, target)
7883 enum machine_mode mode;
7884 tree arglist;
7885 rtx target ATTRIBUTE_UNUSED;
7886 {
7887 tree arg0;
7888 rtx mem;
7889
7890 arg0 = TREE_VALUE (arglist);
7891 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7892
7893 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7894 MEM_VOLATILE_P (mem) = 1;
7895
7896 emit_move_insn (mem, const0_rtx);
7897
7898 return const0_rtx;
7899 }
7900
7901 rtx
7902 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7903 tree exp;
7904 rtx target;
7905 rtx subtarget ATTRIBUTE_UNUSED;
7906 enum machine_mode mode ATTRIBUTE_UNUSED;
7907 int ignore ATTRIBUTE_UNUSED;
7908 {
7909 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7910 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7911 tree arglist = TREE_OPERAND (exp, 1);
7912
7913 switch (fcode)
7914 {
7915 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7916 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7917 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7918 case IA64_BUILTIN_LOCK_RELEASE_SI:
7919 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7920 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7921 case IA64_BUILTIN_FETCH_AND_OR_SI:
7922 case IA64_BUILTIN_FETCH_AND_AND_SI:
7923 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7924 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7925 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7926 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7927 case IA64_BUILTIN_OR_AND_FETCH_SI:
7928 case IA64_BUILTIN_AND_AND_FETCH_SI:
7929 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7930 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7931 mode = SImode;
7932 break;
7933
7934 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7935 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7936 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7937 case IA64_BUILTIN_LOCK_RELEASE_DI:
7938 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7939 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7940 case IA64_BUILTIN_FETCH_AND_OR_DI:
7941 case IA64_BUILTIN_FETCH_AND_AND_DI:
7942 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7943 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7944 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7945 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7946 case IA64_BUILTIN_OR_AND_FETCH_DI:
7947 case IA64_BUILTIN_AND_AND_FETCH_DI:
7948 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7949 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7950 mode = DImode;
7951 break;
7952
7953 default:
7954 break;
7955 }
7956
7957 switch (fcode)
7958 {
7959 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7960 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7961 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7962
7963 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7964 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7965 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7966
7967 case IA64_BUILTIN_SYNCHRONIZE:
7968 emit_insn (gen_mf ());
7969 return const0_rtx;
7970
7971 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7972 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7973 return ia64_expand_lock_test_and_set (mode, arglist, target);
7974
7975 case IA64_BUILTIN_LOCK_RELEASE_SI:
7976 case IA64_BUILTIN_LOCK_RELEASE_DI:
7977 return ia64_expand_lock_release (mode, arglist, target);
7978
7979 case IA64_BUILTIN_BSP:
7980 if (! target || ! register_operand (target, DImode))
7981 target = gen_reg_rtx (DImode);
7982 emit_insn (gen_bsp_value (target));
7983 return target;
7984
7985 case IA64_BUILTIN_FLUSHRS:
7986 emit_insn (gen_flushrs ());
7987 return const0_rtx;
7988
7989 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7990 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7991 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7992
7993 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7994 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7995 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7996
7997 case IA64_BUILTIN_FETCH_AND_OR_SI:
7998 case IA64_BUILTIN_FETCH_AND_OR_DI:
7999 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8000
8001 case IA64_BUILTIN_FETCH_AND_AND_SI:
8002 case IA64_BUILTIN_FETCH_AND_AND_DI:
8003 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8004
8005 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8006 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8007 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8008
8009 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8010 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8011 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8012
8013 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8014 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8015 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8016
8017 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8018 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8019 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8020
8021 case IA64_BUILTIN_OR_AND_FETCH_SI:
8022 case IA64_BUILTIN_OR_AND_FETCH_DI:
8023 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8024
8025 case IA64_BUILTIN_AND_AND_FETCH_SI:
8026 case IA64_BUILTIN_AND_AND_FETCH_DI:
8027 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8028
8029 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8030 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8031 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8032
8033 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8034 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8035 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8036
8037 default:
8038 break;
8039 }
8040
8041 return NULL_RTX;
8042 }
8043
8044 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8045 most significant bits of the stack slot. */
8046
8047 enum direction
8048 ia64_hpux_function_arg_padding (mode, type)
8049 enum machine_mode mode;
8050 tree type;
8051 {
8052 /* Exception to normal case for structures/unions/etc. */
8053
8054 if (type && AGGREGATE_TYPE_P (type)
8055 && int_size_in_bytes (type) < UNITS_PER_WORD)
8056 return upward;
8057
8058 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8059 hardwired to be true. */
8060
8061 return((mode == BLKmode
8062 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8063 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8064 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8065 ? downward : upward);
8066 }
8067 \f
8068 /* Switch to the section to which we should output X. The only thing
8069 special we do here is to honor small data. */
8070
8071 static void
8072 ia64_select_rtx_section (mode, x, align)
8073 enum machine_mode mode;
8074 rtx x;
8075 unsigned HOST_WIDE_INT align;
8076 {
8077 if (GET_MODE_SIZE (mode) > 0
8078 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8079 sdata_section ();
8080 else
8081 default_elf_select_rtx_section (mode, x, align);
8082 }
8083
8084 /* It is illegal to have relocations in shared segments on AIX.
8085 Pretend flag_pic is always set. */
8086
8087 static void
8088 ia64_aix_select_section (exp, reloc, align)
8089 tree exp;
8090 int reloc;
8091 unsigned HOST_WIDE_INT align;
8092 {
8093 int save_pic = flag_pic;
8094 flag_pic = 1;
8095 default_elf_select_section (exp, reloc, align);
8096 flag_pic = save_pic;
8097 }
8098
8099 static void
8100 ia64_aix_unique_section (decl, reloc)
8101 tree decl;
8102 int reloc;
8103 {
8104 int save_pic = flag_pic;
8105 flag_pic = 1;
8106 default_unique_section (decl, reloc);
8107 flag_pic = save_pic;
8108 }
8109
8110 static void
8111 ia64_aix_select_rtx_section (mode, x, align)
8112 enum machine_mode mode;
8113 rtx x;
8114 unsigned HOST_WIDE_INT align;
8115 {
8116 int save_pic = flag_pic;
8117 flag_pic = 1;
8118 ia64_select_rtx_section (mode, x, align);
8119 flag_pic = save_pic;
8120 }
8121
8122 #include "gt-ia64.h"