]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/ia64.c
Make -fsjlj-exceptions a configure option.
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
cbd5937a 2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING. If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
c65ebc55
JW
25#include "rtl.h"
26#include "tree.h"
27#include "tm_p.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
c65ebc55
JW
33#include "output.h"
34#include "insn-attr.h"
35#include "flags.h"
36#include "recog.h"
37#include "expr.h"
38#include "obstack.h"
39#include "except.h"
40#include "function.h"
41#include "ggc.h"
42#include "basic-block.h"
809d4ef1 43#include "toplev.h"
2130b7fb 44#include "sched-int.h"
c65ebc55
JW
45
46/* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48int ia64_asm_output_label = 0;
49
50/* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52struct rtx_def * ia64_compare_op0;
53struct rtx_def * ia64_compare_op1;
54
c65ebc55 55/* Register names for ia64_expand_prologue. */
3b572406 56static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
57{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
69
70/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 71static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
72{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
73
74/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 75static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
76{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
86
87/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 88static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
89{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
90
91/* String used with the -mfixed-range= option. */
92const char *ia64_fixed_range_string;
93
94/* Variables which are this size or smaller are put in the sdata/sbss
95 sections. */
96
3b572406
RH
97unsigned int ia64_section_threshold;
98\f
97e242b0
RH
99static int find_gr_spill PARAMS ((int));
100static int next_scratch_gr_reg PARAMS ((void));
101static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
102static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
103static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
104static void finish_spill_pointers PARAMS ((void));
105static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
870f9ec0
RH
106static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
107static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
0551c32d
RH
108static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
109static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
110static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
97e242b0 111
3b572406
RH
112static enum machine_mode hfa_element_mode PARAMS ((tree, int));
113static void fix_range PARAMS ((const char *));
114static void ia64_add_gc_roots PARAMS ((void));
115static void ia64_init_machine_status PARAMS ((struct function *));
116static void ia64_mark_machine_status PARAMS ((struct function *));
37b15744 117static void ia64_free_machine_status PARAMS ((struct function *));
2130b7fb 118static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
f4d578da 119static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
f2f90c63 120static void emit_predicate_relation_info PARAMS ((void));
3b572406 121static int process_set PARAMS ((FILE *, rtx));
0551c32d
RH
122
123static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
124 tree, rtx));
125static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
126 tree, rtx));
127static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
128 tree, rtx));
129static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
130 tree, rtx));
131static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
3b572406 132\f
c65ebc55
JW
133/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
134
135int
136call_operand (op, mode)
137 rtx op;
138 enum machine_mode mode;
139{
140 if (mode != GET_MODE (op))
141 return 0;
142
143 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
144 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
145}
146
147/* Return 1 if OP refers to a symbol in the sdata section. */
148
149int
150sdata_symbolic_operand (op, mode)
151 rtx op;
fd7c34b0 152 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
153{
154 switch (GET_CODE (op))
155 {
ac9cd70f
RH
156 case CONST:
157 if (GET_CODE (XEXP (op, 0)) != PLUS
158 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
159 break;
160 op = XEXP (XEXP (op, 0), 0);
161 /* FALLTHRU */
162
c65ebc55 163 case SYMBOL_REF:
ac9cd70f
RH
164 if (CONSTANT_POOL_ADDRESS_P (op))
165 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
166 else
167 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
c65ebc55 168
c65ebc55
JW
169 default:
170 break;
171 }
172
173 return 0;
174}
175
ec039e3c 176/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
177
178int
ec039e3c 179got_symbolic_operand (op, mode)
c65ebc55 180 rtx op;
fd7c34b0 181 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
182{
183 switch (GET_CODE (op))
184 {
185 case CONST:
dee4095a
RH
186 op = XEXP (op, 0);
187 if (GET_CODE (op) != PLUS)
188 return 0;
189 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
190 return 0;
191 op = XEXP (op, 1);
192 if (GET_CODE (op) != CONST_INT)
193 return 0;
ec039e3c
RH
194
195 return 1;
196
197 /* Ok if we're not using GOT entries at all. */
198 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
199 return 1;
200
201 /* "Ok" while emitting rtl, since otherwise we won't be provided
202 with the entire offset during emission, which makes it very
203 hard to split the offset into high and low parts. */
204 if (rtx_equal_function_value_matters)
205 return 1;
206
207 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 208 use up so many GOT entries. */
ec039e3c
RH
209 return (INTVAL (op) & 0x3fff) == 0;
210
211 case SYMBOL_REF:
212 case LABEL_REF:
dee4095a
RH
213 return 1;
214
ec039e3c
RH
215 default:
216 break;
217 }
218 return 0;
219}
220
221/* Return 1 if OP refers to a symbol. */
222
223int
224symbolic_operand (op, mode)
225 rtx op;
226 enum machine_mode mode ATTRIBUTE_UNUSED;
227{
228 switch (GET_CODE (op))
229 {
230 case CONST:
c65ebc55
JW
231 case SYMBOL_REF:
232 case LABEL_REF:
233 return 1;
234
235 default:
236 break;
237 }
238 return 0;
239}
240
241/* Return 1 if OP refers to a function. */
242
243int
244function_operand (op, mode)
245 rtx op;
fd7c34b0 246 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
247{
248 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
249 return 1;
250 else
251 return 0;
252}
253
254/* Return 1 if OP is setjmp or a similar function. */
255
256/* ??? This is an unsatisfying solution. Should rethink. */
257
258int
259setjmp_operand (op, mode)
260 rtx op;
fd7c34b0 261 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 262{
809d4ef1 263 const char *name;
c65ebc55
JW
264 int retval = 0;
265
266 if (GET_CODE (op) != SYMBOL_REF)
267 return 0;
268
269 name = XSTR (op, 0);
270
271 /* The following code is borrowed from special_function_p in calls.c. */
272
273 /* Disregard prefix _, __ or __x. */
274 if (name[0] == '_')
275 {
276 if (name[1] == '_' && name[2] == 'x')
277 name += 3;
278 else if (name[1] == '_')
279 name += 2;
280 else
281 name += 1;
282 }
283
284 if (name[0] == 's')
285 {
286 retval
287 = ((name[1] == 'e'
288 && (! strcmp (name, "setjmp")
289 || ! strcmp (name, "setjmp_syscall")))
290 || (name[1] == 'i'
291 && ! strcmp (name, "sigsetjmp"))
292 || (name[1] == 'a'
293 && ! strcmp (name, "savectx")));
294 }
295 else if ((name[0] == 'q' && name[1] == 's'
296 && ! strcmp (name, "qsetjmp"))
297 || (name[0] == 'v' && name[1] == 'f'
298 && ! strcmp (name, "vfork")))
299 retval = 1;
300
301 return retval;
302}
303
304/* Return 1 if OP is a general operand, but when pic exclude symbolic
305 operands. */
306
307/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
308 from PREDICATE_CODES. */
309
310int
311move_operand (op, mode)
312 rtx op;
313 enum machine_mode mode;
314{
ec039e3c 315 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
c65ebc55
JW
316 return 0;
317
318 return general_operand (op, mode);
319}
320
0551c32d
RH
321/* Return 1 if OP is a register operand that is (or could be) a GR reg. */
322
323int
324gr_register_operand (op, mode)
325 rtx op;
326 enum machine_mode mode;
327{
328 if (! register_operand (op, mode))
329 return 0;
330 if (GET_CODE (op) == SUBREG)
331 op = SUBREG_REG (op);
332 if (GET_CODE (op) == REG)
333 {
334 unsigned int regno = REGNO (op);
335 if (regno < FIRST_PSEUDO_REGISTER)
336 return GENERAL_REGNO_P (regno);
337 }
338 return 1;
339}
340
341/* Return 1 if OP is a register operand that is (or could be) an FR reg. */
342
343int
344fr_register_operand (op, mode)
345 rtx op;
346 enum machine_mode mode;
347{
348 if (! register_operand (op, mode))
349 return 0;
350 if (GET_CODE (op) == SUBREG)
351 op = SUBREG_REG (op);
352 if (GET_CODE (op) == REG)
353 {
354 unsigned int regno = REGNO (op);
355 if (regno < FIRST_PSEUDO_REGISTER)
356 return FR_REGNO_P (regno);
357 }
358 return 1;
359}
360
361/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
362
363int
364grfr_register_operand (op, mode)
365 rtx op;
366 enum machine_mode mode;
367{
368 if (! register_operand (op, mode))
369 return 0;
370 if (GET_CODE (op) == SUBREG)
371 op = SUBREG_REG (op);
372 if (GET_CODE (op) == REG)
373 {
374 unsigned int regno = REGNO (op);
375 if (regno < FIRST_PSEUDO_REGISTER)
376 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
377 }
378 return 1;
379}
380
381/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
382
383int
384gr_nonimmediate_operand (op, mode)
385 rtx op;
386 enum machine_mode mode;
387{
388 if (! nonimmediate_operand (op, mode))
389 return 0;
390 if (GET_CODE (op) == SUBREG)
391 op = SUBREG_REG (op);
392 if (GET_CODE (op) == REG)
393 {
394 unsigned int regno = REGNO (op);
395 if (regno < FIRST_PSEUDO_REGISTER)
396 return GENERAL_REGNO_P (regno);
397 }
398 return 1;
399}
400
655f2eb9
RH
401/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
402
403int
404fr_nonimmediate_operand (op, mode)
405 rtx op;
406 enum machine_mode mode;
407{
408 if (! nonimmediate_operand (op, mode))
409 return 0;
410 if (GET_CODE (op) == SUBREG)
411 op = SUBREG_REG (op);
412 if (GET_CODE (op) == REG)
413 {
414 unsigned int regno = REGNO (op);
415 if (regno < FIRST_PSEUDO_REGISTER)
416 return FR_REGNO_P (regno);
417 }
418 return 1;
419}
420
0551c32d
RH
421/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
422
423int
424grfr_nonimmediate_operand (op, mode)
425 rtx op;
426 enum machine_mode mode;
427{
428 if (! nonimmediate_operand (op, mode))
429 return 0;
430 if (GET_CODE (op) == SUBREG)
431 op = SUBREG_REG (op);
432 if (GET_CODE (op) == REG)
433 {
434 unsigned int regno = REGNO (op);
435 if (regno < FIRST_PSEUDO_REGISTER)
436 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
437 }
438 return 1;
439}
440
441/* Return 1 if OP is a GR register operand, or zero. */
c65ebc55
JW
442
443int
0551c32d 444gr_reg_or_0_operand (op, mode)
c65ebc55
JW
445 rtx op;
446 enum machine_mode mode;
447{
0551c32d 448 return (op == const0_rtx || gr_register_operand (op, mode));
c65ebc55
JW
449}
450
0551c32d 451/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
041f25e6
RH
452
453int
0551c32d 454gr_reg_or_5bit_operand (op, mode)
041f25e6
RH
455 rtx op;
456 enum machine_mode mode;
457{
458 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
459 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 460 || gr_register_operand (op, mode));
041f25e6
RH
461}
462
0551c32d 463/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
c65ebc55
JW
464
465int
0551c32d 466gr_reg_or_6bit_operand (op, mode)
c65ebc55
JW
467 rtx op;
468 enum machine_mode mode;
469{
470 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
471 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 472 || gr_register_operand (op, mode));
c65ebc55
JW
473}
474
0551c32d 475/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
c65ebc55
JW
476
477int
0551c32d 478gr_reg_or_8bit_operand (op, mode)
c65ebc55
JW
479 rtx op;
480 enum machine_mode mode;
481{
482 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
483 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 484 || gr_register_operand (op, mode));
c65ebc55
JW
485}
486
0551c32d
RH
487/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
488
489int
490grfr_reg_or_8bit_operand (op, mode)
491 rtx op;
492 enum machine_mode mode;
493{
494 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
495 || GET_CODE (op) == CONSTANT_P_RTX
496 || grfr_register_operand (op, mode));
497}
97e242b0 498
c65ebc55
JW
499/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
500 operand. */
501
502int
0551c32d 503gr_reg_or_8bit_adjusted_operand (op, mode)
c65ebc55
JW
504 rtx op;
505 enum machine_mode mode;
506{
507 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
508 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 509 || gr_register_operand (op, mode));
c65ebc55
JW
510}
511
512/* Return 1 if OP is a register operand, or is valid for both an 8 bit
513 immediate and an 8 bit adjusted immediate operand. This is necessary
514 because when we emit a compare, we don't know what the condition will be,
515 so we need the union of the immediates accepted by GT and LT. */
516
517int
0551c32d 518gr_reg_or_8bit_and_adjusted_operand (op, mode)
c65ebc55
JW
519 rtx op;
520 enum machine_mode mode;
521{
522 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
523 && CONST_OK_FOR_L (INTVAL (op)))
524 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 525 || gr_register_operand (op, mode));
c65ebc55
JW
526}
527
528/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
529
530int
0551c32d 531gr_reg_or_14bit_operand (op, mode)
c65ebc55
JW
532 rtx op;
533 enum machine_mode mode;
534{
535 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
536 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 537 || gr_register_operand (op, mode));
c65ebc55
JW
538}
539
540/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
541
542int
0551c32d 543gr_reg_or_22bit_operand (op, mode)
c65ebc55
JW
544 rtx op;
545 enum machine_mode mode;
546{
547 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
548 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 549 || gr_register_operand (op, mode));
c65ebc55
JW
550}
551
552/* Return 1 if OP is a 6 bit immediate operand. */
553
554int
555shift_count_operand (op, mode)
556 rtx op;
fd7c34b0 557 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
558{
559 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
560 || GET_CODE (op) == CONSTANT_P_RTX);
561}
562
563/* Return 1 if OP is a 5 bit immediate operand. */
564
565int
566shift_32bit_count_operand (op, mode)
567 rtx op;
fd7c34b0 568 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
569{
570 return ((GET_CODE (op) == CONST_INT
571 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
572 || GET_CODE (op) == CONSTANT_P_RTX);
573}
574
575/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
576
577int
578shladd_operand (op, mode)
579 rtx op;
fd7c34b0 580 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
581{
582 return (GET_CODE (op) == CONST_INT
583 && (INTVAL (op) == 2 || INTVAL (op) == 4
584 || INTVAL (op) == 8 || INTVAL (op) == 16));
585}
586
587/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
588
589int
590fetchadd_operand (op, mode)
591 rtx op;
fd7c34b0 592 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
593{
594 return (GET_CODE (op) == CONST_INT
595 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
596 INTVAL (op) == -4 || INTVAL (op) == -1 ||
597 INTVAL (op) == 1 || INTVAL (op) == 4 ||
598 INTVAL (op) == 8 || INTVAL (op) == 16));
599}
600
601/* Return 1 if OP is a floating-point constant zero, one, or a register. */
602
603int
0551c32d 604fr_reg_or_fp01_operand (op, mode)
c65ebc55
JW
605 rtx op;
606 enum machine_mode mode;
607{
608 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
0551c32d 609 || fr_register_operand (op, mode));
c65ebc55
JW
610}
611
4b983fdc
RH
612/* Like nonimmediate_operand, but don't allow MEMs that try to use a
613 POST_MODIFY with a REG as displacement. */
614
615int
616destination_operand (op, mode)
617 rtx op;
618 enum machine_mode mode;
619{
620 if (! nonimmediate_operand (op, mode))
621 return 0;
622 if (GET_CODE (op) == MEM
623 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
624 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
625 return 0;
626 return 1;
627}
628
0551c32d
RH
629/* Like memory_operand, but don't allow post-increments. */
630
631int
632not_postinc_memory_operand (op, mode)
633 rtx op;
634 enum machine_mode mode;
635{
636 return (memory_operand (op, mode)
637 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
638}
639
c65ebc55
JW
640/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
641 signed immediate operand. */
642
643int
644normal_comparison_operator (op, mode)
645 register rtx op;
646 enum machine_mode mode;
647{
648 enum rtx_code code = GET_CODE (op);
649 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 650 && (code == EQ || code == NE
c65ebc55
JW
651 || code == GT || code == LE || code == GTU || code == LEU));
652}
653
654/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
655 signed immediate operand. */
656
657int
658adjusted_comparison_operator (op, mode)
659 register rtx op;
660 enum machine_mode mode;
661{
662 enum rtx_code code = GET_CODE (op);
663 return ((mode == VOIDmode || GET_MODE (op) == mode)
664 && (code == LT || code == GE || code == LTU || code == GEU));
665}
666
f2f90c63
RH
667/* Return 1 if this is a signed inequality operator. */
668
669int
670signed_inequality_operator (op, mode)
671 register rtx op;
672 enum machine_mode mode;
673{
674 enum rtx_code code = GET_CODE (op);
675 return ((mode == VOIDmode || GET_MODE (op) == mode)
676 && (code == GE || code == GT
677 || code == LE || code == LT));
678}
679
e5bde68a
RH
680/* Return 1 if this operator is valid for predication. */
681
682int
683predicate_operator (op, mode)
684 register rtx op;
685 enum machine_mode mode;
686{
687 enum rtx_code code = GET_CODE (op);
688 return ((GET_MODE (op) == mode || mode == VOIDmode)
689 && (code == EQ || code == NE));
690}
5527bf14
RH
691
692/* Return 1 if this is the ar.lc register. */
693
694int
695ar_lc_reg_operand (op, mode)
696 register rtx op;
697 enum machine_mode mode;
698{
699 return (GET_MODE (op) == DImode
700 && (mode == DImode || mode == VOIDmode)
701 && GET_CODE (op) == REG
702 && REGNO (op) == AR_LC_REGNUM);
703}
97e242b0
RH
704
705/* Return 1 if this is the ar.ccv register. */
706
707int
708ar_ccv_reg_operand (op, mode)
709 register rtx op;
710 enum machine_mode mode;
711{
712 return ((GET_MODE (op) == mode || mode == VOIDmode)
713 && GET_CODE (op) == REG
714 && REGNO (op) == AR_CCV_REGNUM);
715}
3f622353
RH
716
717/* Like general_operand, but don't allow (mem (addressof)). */
718
719int
720general_tfmode_operand (op, mode)
721 rtx op;
722 enum machine_mode mode;
723{
724 if (! general_operand (op, mode))
725 return 0;
726 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
727 return 0;
728 return 1;
729}
730
731/* Similarly. */
732
733int
734destination_tfmode_operand (op, mode)
735 rtx op;
736 enum machine_mode mode;
737{
738 if (! destination_operand (op, mode))
739 return 0;
740 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
741 return 0;
742 return 1;
743}
744
745/* Similarly. */
746
747int
748tfreg_or_fp01_operand (op, mode)
749 rtx op;
750 enum machine_mode mode;
751{
752 if (GET_CODE (op) == SUBREG)
753 return 0;
0551c32d 754 return fr_reg_or_fp01_operand (op, mode);
3f622353 755}
9b7bf67d 756\f
557b9df5
RH
757/* Return 1 if the operands of a move are ok. */
758
759int
760ia64_move_ok (dst, src)
761 rtx dst, src;
762{
763 /* If we're under init_recog_no_volatile, we'll not be able to use
764 memory_operand. So check the code directly and don't worry about
765 the validity of the underlying address, which should have been
766 checked elsewhere anyway. */
767 if (GET_CODE (dst) != MEM)
768 return 1;
769 if (GET_CODE (src) == MEM)
770 return 0;
771 if (register_operand (src, VOIDmode))
772 return 1;
773
774 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
775 if (INTEGRAL_MODE_P (GET_MODE (dst)))
776 return src == const0_rtx;
777 else
778 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
779}
9b7bf67d 780
041f25e6
RH
781/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
782 Return the length of the field, or <= 0 on failure. */
783
784int
785ia64_depz_field_mask (rop, rshift)
786 rtx rop, rshift;
787{
788 unsigned HOST_WIDE_INT op = INTVAL (rop);
789 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
790
791 /* Get rid of the zero bits we're shifting in. */
792 op >>= shift;
793
794 /* We must now have a solid block of 1's at bit 0. */
795 return exact_log2 (op + 1);
796}
797
9b7bf67d
RH
798/* Expand a symbolic constant load. */
799/* ??? Should generalize this, so that we can also support 32 bit pointers. */
800
801void
b5d37c6f
BS
802ia64_expand_load_address (dest, src, scratch)
803 rtx dest, src, scratch;
9b7bf67d
RH
804{
805 rtx temp;
806
807 /* The destination could be a MEM during initial rtl generation,
808 which isn't a valid destination for the PIC load address patterns. */
809 if (! register_operand (dest, DImode))
810 temp = gen_reg_rtx (DImode);
811 else
812 temp = dest;
813
814 if (TARGET_AUTO_PIC)
815 emit_insn (gen_load_gprel64 (temp, src));
816 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
817 emit_insn (gen_load_fptr (temp, src));
818 else if (sdata_symbolic_operand (src, DImode))
819 emit_insn (gen_load_gprel (temp, src));
820 else if (GET_CODE (src) == CONST
821 && GET_CODE (XEXP (src, 0)) == PLUS
822 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
823 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
824 {
825 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
826 rtx sym = XEXP (XEXP (src, 0), 0);
827 HOST_WIDE_INT ofs, hi, lo;
828
829 /* Split the offset into a sign extended 14-bit low part
830 and a complementary high part. */
831 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
832 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
833 hi = ofs - lo;
834
b5d37c6f
BS
835 if (! scratch)
836 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
837
838 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
839 scratch));
9b7bf67d
RH
840 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
841 }
842 else
b5d37c6f
BS
843 {
844 rtx insn;
845 if (! scratch)
846 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
847
848 insn = emit_insn (gen_load_symptr (temp, src, scratch));
849 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
850 }
9b7bf67d
RH
851
852 if (temp != dest)
853 emit_move_insn (dest, temp);
854}
97e242b0
RH
855
856rtx
857ia64_gp_save_reg (setjmp_p)
858 int setjmp_p;
859{
860 rtx save = cfun->machine->ia64_gp_save;
861
862 if (save != NULL)
863 {
864 /* We can't save GP in a pseudo if we are calling setjmp, because
865 pseudos won't be restored by longjmp. For now, we save it in r4. */
866 /* ??? It would be more efficient to save this directly into a stack
867 slot. Unfortunately, the stack slot address gets cse'd across
868 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
869 place. */
870
871 /* ??? Get the barf bag, Virginia. We've got to replace this thing
872 in place, since this rtx is used in exception handling receivers.
873 Moreover, we must get this rtx out of regno_reg_rtx or reload
874 will do the wrong thing. */
875 unsigned int old_regno = REGNO (save);
876 if (setjmp_p && old_regno != GR_REG (4))
877 {
878 REGNO (save) = GR_REG (4);
879 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
880 }
881 }
882 else
883 {
884 if (setjmp_p)
885 save = gen_rtx_REG (DImode, GR_REG (4));
886 else if (! optimize)
887 save = gen_rtx_REG (DImode, LOC_REG (0));
888 else
889 save = gen_reg_rtx (DImode);
890 cfun->machine->ia64_gp_save = save;
891 }
892
893 return save;
894}
3f622353
RH
895
896/* Split a post-reload TImode reference into two DImode components. */
897
898rtx
899ia64_split_timode (out, in, scratch)
900 rtx out[2];
901 rtx in, scratch;
902{
903 switch (GET_CODE (in))
904 {
905 case REG:
906 out[0] = gen_rtx_REG (DImode, REGNO (in));
907 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
908 return NULL_RTX;
909
910 case MEM:
911 {
3f622353 912 rtx base = XEXP (in, 0);
3f622353
RH
913
914 switch (GET_CODE (base))
915 {
916 case REG:
917 out[0] = change_address (in, DImode, NULL_RTX);
918 break;
919 case POST_MODIFY:
920 base = XEXP (base, 0);
921 out[0] = change_address (in, DImode, NULL_RTX);
922 break;
923
924 /* Since we're changing the mode, we need to change to POST_MODIFY
925 as well to preserve the size of the increment. Either that or
926 do the update in two steps, but we've already got this scratch
927 register handy so let's use it. */
928 case POST_INC:
929 base = XEXP (base, 0);
930 out[0] = change_address (in, DImode,
931 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16)));
932 break;
933 case POST_DEC:
934 base = XEXP (base, 0);
935 out[0] = change_address (in, DImode,
936 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16)));
937 break;
938 default:
939 abort ();
940 }
941
942 if (scratch == NULL_RTX)
943 abort ();
944 out[1] = change_address (in, DImode, scratch);
945 return gen_adddi3 (scratch, base, GEN_INT (8));
946 }
947
948 case CONST_INT:
949 case CONST_DOUBLE:
950 split_double (in, &out[0], &out[1]);
951 return NULL_RTX;
952
953 default:
954 abort ();
955 }
956}
957
958/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
959 through memory plus an extra GR scratch register. Except that you can
960 either get the first from SECONDARY_MEMORY_NEEDED or the second from
961 SECONDARY_RELOAD_CLASS, but not both.
962
963 We got into problems in the first place by allowing a construct like
964 (subreg:TF (reg:TI)), which we got from a union containing a long double.
965 This solution attempts to prevent this situation from ocurring. When
966 we see something like the above, we spill the inner register to memory. */
967
968rtx
969spill_tfmode_operand (in, force)
970 rtx in;
971 int force;
972{
973 if (GET_CODE (in) == SUBREG
974 && GET_MODE (SUBREG_REG (in)) == TImode
975 && GET_CODE (SUBREG_REG (in)) == REG)
976 {
977 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
978 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
979 }
980 else if (force && GET_CODE (in) == REG)
981 {
982 rtx mem = gen_mem_addressof (in, NULL_TREE);
983 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
984 }
985 else if (GET_CODE (in) == MEM
986 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
987 {
988 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
989 }
990 else
991 return in;
992}
f2f90c63
RH
993
994/* Emit comparison instruction if necessary, returning the expression
995 that holds the compare result in the proper mode. */
996
997rtx
998ia64_expand_compare (code, mode)
999 enum rtx_code code;
1000 enum machine_mode mode;
1001{
1002 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1003 rtx cmp;
1004
1005 /* If we have a BImode input, then we already have a compare result, and
1006 do not need to emit another comparison. */
1007 if (GET_MODE (op0) == BImode)
1008 {
1009 if ((code == NE || code == EQ) && op1 == const0_rtx)
1010 cmp = op0;
1011 else
1012 abort ();
1013 }
1014 else
1015 {
1016 cmp = gen_reg_rtx (BImode);
1017 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1018 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1019 code = NE;
1020 }
1021
1022 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1023}
2ed4af6f
RH
1024
1025/* Emit the appropriate sequence for a call. */
1026
1027void
1028ia64_expand_call (retval, addr, nextarg, sibcall_p)
1029 rtx retval;
1030 rtx addr;
1031 rtx nextarg;
1032 int sibcall_p;
1033{
1034 rtx insn, b0, gp_save, narg_rtx;
1035 int narg;
1036
1037 addr = XEXP (addr, 0);
1038 b0 = gen_rtx_REG (DImode, R_BR (0));
1039
1040 if (! nextarg)
1041 narg = 0;
1042 else if (IN_REGNO_P (REGNO (nextarg)))
1043 narg = REGNO (nextarg) - IN_REG (0);
1044 else
1045 narg = REGNO (nextarg) - OUT_REG (0);
1046 narg_rtx = GEN_INT (narg);
1047
1048 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1049 {
1050 if (sibcall_p)
1051 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1052 else if (! retval)
1053 insn = gen_call_nopic (addr, narg_rtx, b0);
1054 else
1055 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1056 emit_call_insn (insn);
1057 return;
1058 }
1059
1060 if (sibcall_p)
1061 gp_save = NULL_RTX;
1062 else
1063 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1064
1065 /* If this is an indirect call, then we have the address of a descriptor. */
1066 if (! symbolic_operand (addr, VOIDmode))
1067 {
1068 rtx dest;
1069
1070 if (! sibcall_p)
1071 emit_move_insn (gp_save, pic_offset_table_rtx);
1072
1073 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1074 emit_move_insn (pic_offset_table_rtx,
1075 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1076
1077 if (sibcall_p)
1078 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1079 else if (! retval)
1080 insn = gen_call_pic (dest, narg_rtx, b0);
1081 else
1082 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1083 emit_call_insn (insn);
1084
1085 if (! sibcall_p)
1086 emit_move_insn (pic_offset_table_rtx, gp_save);
1087 }
1088 else if (TARGET_CONST_GP)
1089 {
1090 if (sibcall_p)
1091 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1092 else if (! retval)
1093 insn = gen_call_nopic (addr, narg_rtx, b0);
1094 else
1095 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1096 emit_call_insn (insn);
1097 }
1098 else
1099 {
1100 if (sibcall_p)
1101 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1102 else
1103 {
1104 emit_move_insn (gp_save, pic_offset_table_rtx);
1105
1106 if (! retval)
1107 insn = gen_call_pic (addr, narg_rtx, b0);
1108 else
1109 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1110 emit_call_insn (insn);
1111
1112 emit_move_insn (pic_offset_table_rtx, gp_save);
1113 }
1114 }
1115}
809d4ef1 1116\f
3b572406
RH
1117/* Begin the assembly file. */
1118
1119void
ca3920ad 1120emit_safe_across_calls (f)
3b572406
RH
1121 FILE *f;
1122{
1123 unsigned int rs, re;
1124 int out_state;
1125
1126 rs = 1;
1127 out_state = 0;
1128 while (1)
1129 {
1130 while (rs < 64 && call_used_regs[PR_REG (rs)])
1131 rs++;
1132 if (rs >= 64)
1133 break;
1134 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1135 continue;
1136 if (out_state == 0)
1137 {
1138 fputs ("\t.pred.safe_across_calls ", f);
1139 out_state = 1;
1140 }
1141 else
1142 fputc (',', f);
1143 if (re == rs + 1)
1144 fprintf (f, "p%u", rs);
1145 else
1146 fprintf (f, "p%u-p%u", rs, re - 1);
1147 rs = re + 1;
1148 }
1149 if (out_state)
1150 fputc ('\n', f);
1151}
1152
97e242b0 1153
c65ebc55
JW
1154/* Structure to be filled in by ia64_compute_frame_size with register
1155 save masks and offsets for the current function. */
1156
1157struct ia64_frame_info
1158{
97e242b0
RH
1159 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1160 the caller's scratch area. */
1161 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1162 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1163 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
c65ebc55 1164 HARD_REG_SET mask; /* mask of saved registers. */
97e242b0
RH
1165 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1166 registers or long-term scratches. */
1167 int n_spilled; /* number of spilled registers. */
1168 int reg_fp; /* register for fp. */
1169 int reg_save_b0; /* save register for b0. */
1170 int reg_save_pr; /* save register for prs. */
1171 int reg_save_ar_pfs; /* save register for ar.pfs. */
1172 int reg_save_ar_unat; /* save register for ar.unat. */
1173 int reg_save_ar_lc; /* save register for ar.lc. */
1174 int n_input_regs; /* number of input registers used. */
1175 int n_local_regs; /* number of local registers used. */
1176 int n_output_regs; /* number of output registers used. */
1177 int n_rotate_regs; /* number of rotating registers used. */
1178
1179 char need_regstk; /* true if a .regstk directive needed. */
1180 char initialized; /* true if the data is finalized. */
c65ebc55
JW
1181};
1182
97e242b0
RH
1183/* Current frame information calculated by ia64_compute_frame_size. */
1184static struct ia64_frame_info current_frame_info;
c65ebc55 1185
97e242b0
RH
1186/* Helper function for ia64_compute_frame_size: find an appropriate general
1187 register to spill some special register to. SPECIAL_SPILL_MASK contains
1188 bits in GR0 to GR31 that have already been allocated by this routine.
1189 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 1190
97e242b0
RH
1191static int
1192find_gr_spill (try_locals)
1193 int try_locals;
1194{
1195 int regno;
1196
1197 /* If this is a leaf function, first try an otherwise unused
1198 call-clobbered register. */
1199 if (current_function_is_leaf)
1200 {
1201 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1202 if (! regs_ever_live[regno]
1203 && call_used_regs[regno]
1204 && ! fixed_regs[regno]
1205 && ! global_regs[regno]
1206 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1207 {
1208 current_frame_info.gr_used_mask |= 1 << regno;
1209 return regno;
1210 }
1211 }
1212
1213 if (try_locals)
1214 {
1215 regno = current_frame_info.n_local_regs;
9502c558
JW
1216 /* If there is a frame pointer, then we can't use loc79, because
1217 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1218 reg_name switching code in ia64_expand_prologue. */
1219 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
1220 {
1221 current_frame_info.n_local_regs = regno + 1;
1222 return LOC_REG (0) + regno;
1223 }
1224 }
1225
1226 /* Failed to find a general register to spill to. Must use stack. */
1227 return 0;
1228}
1229
1230/* In order to make for nice schedules, we try to allocate every temporary
1231 to a different register. We must of course stay away from call-saved,
1232 fixed, and global registers. We must also stay away from registers
1233 allocated in current_frame_info.gr_used_mask, since those include regs
1234 used all through the prologue.
1235
1236 Any register allocated here must be used immediately. The idea is to
1237 aid scheduling, not to solve data flow problems. */
1238
1239static int last_scratch_gr_reg;
1240
1241static int
1242next_scratch_gr_reg ()
1243{
1244 int i, regno;
1245
1246 for (i = 0; i < 32; ++i)
1247 {
1248 regno = (last_scratch_gr_reg + i + 1) & 31;
1249 if (call_used_regs[regno]
1250 && ! fixed_regs[regno]
1251 && ! global_regs[regno]
1252 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1253 {
1254 last_scratch_gr_reg = regno;
1255 return regno;
1256 }
1257 }
1258
1259 /* There must be _something_ available. */
1260 abort ();
1261}
1262
1263/* Helper function for ia64_compute_frame_size, called through
1264 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1265
1266static void
1267mark_reg_gr_used_mask (reg, data)
1268 rtx reg;
1269 void *data ATTRIBUTE_UNUSED;
c65ebc55 1270{
97e242b0
RH
1271 unsigned int regno = REGNO (reg);
1272 if (regno < 32)
1273 current_frame_info.gr_used_mask |= 1 << regno;
c65ebc55
JW
1274}
1275
1276/* Returns the number of bytes offset between the frame pointer and the stack
1277 pointer for the current function. SIZE is the number of bytes of space
1278 needed for local variables. */
97e242b0
RH
1279
1280static void
c65ebc55 1281ia64_compute_frame_size (size)
97e242b0 1282 HOST_WIDE_INT size;
c65ebc55 1283{
97e242b0
RH
1284 HOST_WIDE_INT total_size;
1285 HOST_WIDE_INT spill_size = 0;
1286 HOST_WIDE_INT extra_spill_size = 0;
1287 HOST_WIDE_INT pretend_args_size;
c65ebc55 1288 HARD_REG_SET mask;
97e242b0
RH
1289 int n_spilled = 0;
1290 int spilled_gr_p = 0;
1291 int spilled_fr_p = 0;
1292 unsigned int regno;
1293 int i;
c65ebc55 1294
97e242b0
RH
1295 if (current_frame_info.initialized)
1296 return;
294dac80 1297
97e242b0 1298 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
1299 CLEAR_HARD_REG_SET (mask);
1300
97e242b0
RH
1301 /* Don't allocate scratches to the return register. */
1302 diddle_return_value (mark_reg_gr_used_mask, NULL);
1303
1304 /* Don't allocate scratches to the EH scratch registers. */
1305 if (cfun->machine->ia64_eh_epilogue_sp)
1306 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1307 if (cfun->machine->ia64_eh_epilogue_bsp)
1308 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 1309
97e242b0
RH
1310 /* Find the size of the register stack frame. We have only 80 local
1311 registers, because we reserve 8 for the inputs and 8 for the
1312 outputs. */
1313
1314 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1315 since we'll be adjusting that down later. */
1316 regno = LOC_REG (78) + ! frame_pointer_needed;
1317 for (; regno >= LOC_REG (0); regno--)
1318 if (regs_ever_live[regno])
1319 break;
1320 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 1321
3f67ac08
DM
1322 /* For functions marked with the syscall_linkage attribute, we must mark
1323 all eight input registers as in use, so that locals aren't visible to
1324 the caller. */
1325
1326 if (cfun->machine->n_varargs > 0
1327 || lookup_attribute ("syscall_linkage",
1328 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
1329 current_frame_info.n_input_regs = 8;
1330 else
1331 {
1332 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1333 if (regs_ever_live[regno])
1334 break;
1335 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1336 }
1337
1338 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1339 if (regs_ever_live[regno])
1340 break;
1341 i = regno - OUT_REG (0) + 1;
1342
1343 /* When -p profiling, we need one output register for the mcount argument.
1344 Likwise for -a profiling for the bb_init_func argument. For -ax
1345 profiling, we need two output registers for the two bb_init_trace_func
1346 arguments. */
1347 if (profile_flag || profile_block_flag == 1)
1348 i = MAX (i, 1);
1349 else if (profile_block_flag == 2)
1350 i = MAX (i, 2);
1351 current_frame_info.n_output_regs = i;
1352
1353 /* ??? No rotating register support yet. */
1354 current_frame_info.n_rotate_regs = 0;
1355
1356 /* Discover which registers need spilling, and how much room that
1357 will take. Begin with floating point and general registers,
1358 which will always wind up on the stack. */
1359
1360 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
1361 if (regs_ever_live[regno] && ! call_used_regs[regno])
1362 {
1363 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1364 spill_size += 16;
1365 n_spilled += 1;
1366 spilled_fr_p = 1;
c65ebc55
JW
1367 }
1368
97e242b0 1369 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
1370 if (regs_ever_live[regno] && ! call_used_regs[regno])
1371 {
1372 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1373 spill_size += 8;
1374 n_spilled += 1;
1375 spilled_gr_p = 1;
c65ebc55
JW
1376 }
1377
97e242b0 1378 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
1379 if (regs_ever_live[regno] && ! call_used_regs[regno])
1380 {
1381 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1382 spill_size += 8;
1383 n_spilled += 1;
c65ebc55
JW
1384 }
1385
97e242b0
RH
1386 /* Now come all special registers that might get saved in other
1387 general registers. */
1388
1389 if (frame_pointer_needed)
1390 {
1391 current_frame_info.reg_fp = find_gr_spill (1);
0c35f902
JW
1392 /* If we did not get a register, then we take LOC79. This is guaranteed
1393 to be free, even if regs_ever_live is already set, because this is
1394 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1395 as we don't count loc79 above. */
97e242b0 1396 if (current_frame_info.reg_fp == 0)
0c35f902
JW
1397 {
1398 current_frame_info.reg_fp = LOC_REG (79);
1399 current_frame_info.n_local_regs++;
1400 }
97e242b0
RH
1401 }
1402
1403 if (! current_function_is_leaf)
c65ebc55 1404 {
97e242b0
RH
1405 /* Emit a save of BR0 if we call other functions. Do this even
1406 if this function doesn't return, as EH depends on this to be
1407 able to unwind the stack. */
1408 SET_HARD_REG_BIT (mask, BR_REG (0));
1409
1410 current_frame_info.reg_save_b0 = find_gr_spill (1);
1411 if (current_frame_info.reg_save_b0 == 0)
1412 {
1413 spill_size += 8;
1414 n_spilled += 1;
1415 }
1416
1417 /* Similarly for ar.pfs. */
1418 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1419 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1420 if (current_frame_info.reg_save_ar_pfs == 0)
1421 {
1422 extra_spill_size += 8;
1423 n_spilled += 1;
1424 }
c65ebc55
JW
1425 }
1426 else
97e242b0
RH
1427 {
1428 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1429 {
1430 SET_HARD_REG_BIT (mask, BR_REG (0));
1431 spill_size += 8;
1432 n_spilled += 1;
1433 }
1434 }
c65ebc55 1435
97e242b0
RH
1436 /* Unwind descriptor hackery: things are most efficient if we allocate
1437 consecutive GR save registers for RP, PFS, FP in that order. However,
1438 it is absolutely critical that FP get the only hard register that's
1439 guaranteed to be free, so we allocated it first. If all three did
1440 happen to be allocated hard regs, and are consecutive, rearrange them
1441 into the preferred order now. */
1442 if (current_frame_info.reg_fp != 0
1443 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1444 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 1445 {
97e242b0
RH
1446 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1447 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1448 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
1449 }
1450
97e242b0
RH
1451 /* See if we need to store the predicate register block. */
1452 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1453 if (regs_ever_live[regno] && ! call_used_regs[regno])
1454 break;
1455 if (regno <= PR_REG (63))
c65ebc55 1456 {
97e242b0
RH
1457 SET_HARD_REG_BIT (mask, PR_REG (0));
1458 current_frame_info.reg_save_pr = find_gr_spill (1);
1459 if (current_frame_info.reg_save_pr == 0)
1460 {
1461 extra_spill_size += 8;
1462 n_spilled += 1;
1463 }
1464
1465 /* ??? Mark them all as used so that register renaming and such
1466 are free to use them. */
1467 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1468 regs_ever_live[regno] = 1;
c65ebc55
JW
1469 }
1470
97e242b0
RH
1471 /* If we're forced to use st8.spill, we're forced to save and restore
1472 ar.unat as well. */
26a110f5 1473 if (spilled_gr_p || cfun->machine->n_varargs)
97e242b0
RH
1474 {
1475 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1476 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1477 if (current_frame_info.reg_save_ar_unat == 0)
1478 {
1479 extra_spill_size += 8;
1480 n_spilled += 1;
1481 }
1482 }
1483
1484 if (regs_ever_live[AR_LC_REGNUM])
1485 {
1486 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1487 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1488 if (current_frame_info.reg_save_ar_lc == 0)
1489 {
1490 extra_spill_size += 8;
1491 n_spilled += 1;
1492 }
1493 }
1494
1495 /* If we have an odd number of words of pretend arguments written to
1496 the stack, then the FR save area will be unaligned. We round the
1497 size of this area up to keep things 16 byte aligned. */
1498 if (spilled_fr_p)
1499 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1500 else
1501 pretend_args_size = current_function_pretend_args_size;
1502
1503 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1504 + current_function_outgoing_args_size);
1505 total_size = IA64_STACK_ALIGN (total_size);
1506
1507 /* We always use the 16-byte scratch area provided by the caller, but
1508 if we are a leaf function, there's no one to which we need to provide
1509 a scratch area. */
1510 if (current_function_is_leaf)
1511 total_size = MAX (0, total_size - 16);
1512
c65ebc55 1513 current_frame_info.total_size = total_size;
97e242b0
RH
1514 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1515 current_frame_info.spill_size = spill_size;
1516 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 1517 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 1518 current_frame_info.n_spilled = n_spilled;
c65ebc55 1519 current_frame_info.initialized = reload_completed;
97e242b0
RH
1520}
1521
1522/* Compute the initial difference between the specified pair of registers. */
1523
1524HOST_WIDE_INT
1525ia64_initial_elimination_offset (from, to)
1526 int from, to;
1527{
1528 HOST_WIDE_INT offset;
1529
1530 ia64_compute_frame_size (get_frame_size ());
1531 switch (from)
1532 {
1533 case FRAME_POINTER_REGNUM:
1534 if (to == HARD_FRAME_POINTER_REGNUM)
1535 {
1536 if (current_function_is_leaf)
1537 offset = -current_frame_info.total_size;
1538 else
1539 offset = -(current_frame_info.total_size
1540 - current_function_outgoing_args_size - 16);
1541 }
1542 else if (to == STACK_POINTER_REGNUM)
1543 {
1544 if (current_function_is_leaf)
1545 offset = 0;
1546 else
1547 offset = 16 + current_function_outgoing_args_size;
1548 }
1549 else
1550 abort ();
1551 break;
c65ebc55 1552
97e242b0
RH
1553 case ARG_POINTER_REGNUM:
1554 /* Arguments start above the 16 byte save area, unless stdarg
1555 in which case we store through the 16 byte save area. */
1556 if (to == HARD_FRAME_POINTER_REGNUM)
1557 offset = 16 - current_function_pretend_args_size;
1558 else if (to == STACK_POINTER_REGNUM)
1559 offset = (current_frame_info.total_size
1560 + 16 - current_function_pretend_args_size);
1561 else
1562 abort ();
1563 break;
1564
1565 case RETURN_ADDRESS_POINTER_REGNUM:
1566 offset = 0;
1567 break;
1568
1569 default:
1570 abort ();
1571 }
1572
1573 return offset;
c65ebc55
JW
1574}
1575
97e242b0
RH
1576/* If there are more than a trivial number of register spills, we use
1577 two interleaved iterators so that we can get two memory references
1578 per insn group.
1579
1580 In order to simplify things in the prologue and epilogue expanders,
1581 we use helper functions to fix up the memory references after the
1582 fact with the appropriate offsets to a POST_MODIFY memory mode.
1583 The following data structure tracks the state of the two iterators
1584 while insns are being emitted. */
1585
1586struct spill_fill_data
c65ebc55 1587{
97e242b0
RH
1588 rtx init_after; /* point at which to emit intializations */
1589 rtx init_reg[2]; /* initial base register */
1590 rtx iter_reg[2]; /* the iterator registers */
1591 rtx *prev_addr[2]; /* address of last memory use */
1592 HOST_WIDE_INT prev_off[2]; /* last offset */
1593 int n_iter; /* number of iterators in use */
1594 int next_iter; /* next iterator to use */
1595 unsigned int save_gr_used_mask;
1596};
1597
1598static struct spill_fill_data spill_fill_data;
c65ebc55 1599
97e242b0
RH
1600static void
1601setup_spill_pointers (n_spills, init_reg, cfa_off)
1602 int n_spills;
1603 rtx init_reg;
1604 HOST_WIDE_INT cfa_off;
1605{
1606 int i;
1607
1608 spill_fill_data.init_after = get_last_insn ();
1609 spill_fill_data.init_reg[0] = init_reg;
1610 spill_fill_data.init_reg[1] = init_reg;
1611 spill_fill_data.prev_addr[0] = NULL;
1612 spill_fill_data.prev_addr[1] = NULL;
1613 spill_fill_data.prev_off[0] = cfa_off;
1614 spill_fill_data.prev_off[1] = cfa_off;
1615 spill_fill_data.next_iter = 0;
1616 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1617
1618 spill_fill_data.n_iter = 1 + (n_spills > 2);
1619 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 1620 {
97e242b0
RH
1621 int regno = next_scratch_gr_reg ();
1622 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1623 current_frame_info.gr_used_mask |= 1 << regno;
1624 }
1625}
1626
1627static void
1628finish_spill_pointers ()
1629{
1630 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1631}
c65ebc55 1632
97e242b0
RH
1633static rtx
1634spill_restore_mem (reg, cfa_off)
1635 rtx reg;
1636 HOST_WIDE_INT cfa_off;
1637{
1638 int iter = spill_fill_data.next_iter;
1639 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1640 rtx disp_rtx = GEN_INT (disp);
1641 rtx mem;
1642
1643 if (spill_fill_data.prev_addr[iter])
1644 {
1645 if (CONST_OK_FOR_N (disp))
1646 *spill_fill_data.prev_addr[iter]
1647 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1648 gen_rtx_PLUS (DImode,
1649 spill_fill_data.iter_reg[iter],
1650 disp_rtx));
c65ebc55
JW
1651 else
1652 {
97e242b0
RH
1653 /* ??? Could use register post_modify for loads. */
1654 if (! CONST_OK_FOR_I (disp))
1655 {
1656 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1657 emit_move_insn (tmp, disp_rtx);
1658 disp_rtx = tmp;
1659 }
1660 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1661 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 1662 }
97e242b0
RH
1663 }
1664 /* Micro-optimization: if we've created a frame pointer, it's at
1665 CFA 0, which may allow the real iterator to be initialized lower,
1666 slightly increasing parallelism. Also, if there are few saves
1667 it may eliminate the iterator entirely. */
1668 else if (disp == 0
1669 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1670 && frame_pointer_needed)
1671 {
1672 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1673 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1674 return mem;
1675 }
1676 else
1677 {
1678 rtx seq;
809d4ef1 1679
97e242b0
RH
1680 if (disp == 0)
1681 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1682 spill_fill_data.init_reg[iter]);
1683 else
c65ebc55 1684 {
97e242b0
RH
1685 start_sequence ();
1686
1687 if (! CONST_OK_FOR_I (disp))
c65ebc55 1688 {
97e242b0
RH
1689 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1690 emit_move_insn (tmp, disp_rtx);
1691 disp_rtx = tmp;
c65ebc55 1692 }
97e242b0
RH
1693
1694 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1695 spill_fill_data.init_reg[iter],
1696 disp_rtx));
1697
1698 seq = gen_sequence ();
1699 end_sequence ();
c65ebc55 1700 }
809d4ef1 1701
97e242b0
RH
1702 /* Careful for being the first insn in a sequence. */
1703 if (spill_fill_data.init_after)
1704 spill_fill_data.init_after
1705 = emit_insn_after (seq, spill_fill_data.init_after);
1706 else
bc08aefe
RH
1707 {
1708 rtx first = get_insns ();
1709 if (first)
1710 spill_fill_data.init_after
1711 = emit_insn_before (seq, first);
1712 else
1713 spill_fill_data.init_after = emit_insn (seq);
1714 }
97e242b0 1715 }
c65ebc55 1716
97e242b0 1717 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 1718
97e242b0
RH
1719 /* ??? Not all of the spills are for varargs, but some of them are.
1720 The rest of the spills belong in an alias set of their own. But
1721 it doesn't actually hurt to include them here. */
1722 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
809d4ef1 1723
97e242b0
RH
1724 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1725 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 1726
97e242b0
RH
1727 if (++iter >= spill_fill_data.n_iter)
1728 iter = 0;
1729 spill_fill_data.next_iter = iter;
c65ebc55 1730
97e242b0
RH
1731 return mem;
1732}
5527bf14 1733
97e242b0
RH
1734static void
1735do_spill (move_fn, reg, cfa_off, frame_reg)
870f9ec0 1736 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1737 rtx reg, frame_reg;
1738 HOST_WIDE_INT cfa_off;
1739{
1740 rtx mem, insn;
5527bf14 1741
97e242b0 1742 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 1743 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
5527bf14 1744
97e242b0
RH
1745 if (frame_reg)
1746 {
1747 rtx base;
1748 HOST_WIDE_INT off;
1749
1750 RTX_FRAME_RELATED_P (insn) = 1;
1751
1752 /* Don't even pretend that the unwind code can intuit its way
1753 through a pair of interleaved post_modify iterators. Just
1754 provide the correct answer. */
1755
1756 if (frame_pointer_needed)
1757 {
1758 base = hard_frame_pointer_rtx;
1759 off = - cfa_off;
5527bf14 1760 }
97e242b0
RH
1761 else
1762 {
1763 base = stack_pointer_rtx;
1764 off = current_frame_info.total_size - cfa_off;
1765 }
1766
1767 REG_NOTES (insn)
1768 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1769 gen_rtx_SET (VOIDmode,
1770 gen_rtx_MEM (GET_MODE (reg),
1771 plus_constant (base, off)),
1772 frame_reg),
1773 REG_NOTES (insn));
c65ebc55
JW
1774 }
1775}
1776
97e242b0
RH
1777static void
1778do_restore (move_fn, reg, cfa_off)
870f9ec0 1779 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1780 rtx reg;
1781 HOST_WIDE_INT cfa_off;
1782{
870f9ec0
RH
1783 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1784 GEN_INT (cfa_off)));
97e242b0
RH
1785}
1786
870f9ec0
RH
1787/* Wrapper functions that discards the CONST_INT spill offset. These
1788 exist so that we can give gr_spill/gr_fill the offset they need and
1789 use a consistant function interface. */
1790
1791static rtx
1792gen_movdi_x (dest, src, offset)
1793 rtx dest, src;
1794 rtx offset ATTRIBUTE_UNUSED;
1795{
1796 return gen_movdi (dest, src);
1797}
1798
1799static rtx
1800gen_fr_spill_x (dest, src, offset)
1801 rtx dest, src;
1802 rtx offset ATTRIBUTE_UNUSED;
1803{
1804 return gen_fr_spill (dest, src);
1805}
1806
1807static rtx
1808gen_fr_restore_x (dest, src, offset)
1809 rtx dest, src;
1810 rtx offset ATTRIBUTE_UNUSED;
1811{
1812 return gen_fr_restore (dest, src);
1813}
c65ebc55
JW
1814
1815/* Called after register allocation to add any instructions needed for the
1816 prologue. Using a prologue insn is favored compared to putting all of the
1817 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1818 to intermix instructions with the saves of the caller saved registers. In
1819 some cases, it might be necessary to emit a barrier instruction as the last
1820 insn to prevent such scheduling.
1821
1822 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
1823 so that the debug info generation code can handle them properly.
1824
1825 The register save area is layed out like so:
1826 cfa+16
1827 [ varargs spill area ]
1828 [ fr register spill area ]
1829 [ br register spill area ]
1830 [ ar register spill area ]
1831 [ pr register spill area ]
1832 [ gr register spill area ] */
c65ebc55
JW
1833
1834/* ??? Get inefficient code when the frame size is larger than can fit in an
1835 adds instruction. */
1836
c65ebc55
JW
1837void
1838ia64_expand_prologue ()
1839{
97e242b0
RH
1840 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1841 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1842 rtx reg, alt_reg;
1843
1844 ia64_compute_frame_size (get_frame_size ());
1845 last_scratch_gr_reg = 15;
1846
1847 /* If there is no epilogue, then we don't need some prologue insns.
1848 We need to avoid emitting the dead prologue insns, because flow
1849 will complain about them. */
c65ebc55
JW
1850 if (optimize)
1851 {
97e242b0
RH
1852 edge e;
1853
c65ebc55
JW
1854 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1855 if ((e->flags & EDGE_FAKE) == 0
1856 && (e->flags & EDGE_FALLTHRU) != 0)
1857 break;
1858 epilogue_p = (e != NULL);
1859 }
1860 else
1861 epilogue_p = 1;
1862
97e242b0
RH
1863 /* Set the local, input, and output register names. We need to do this
1864 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1865 half. If we use in/loc/out register names, then we get assembler errors
1866 in crtn.S because there is no alloc insn or regstk directive in there. */
1867 if (! TARGET_REG_NAMES)
1868 {
1869 int inputs = current_frame_info.n_input_regs;
1870 int locals = current_frame_info.n_local_regs;
1871 int outputs = current_frame_info.n_output_regs;
1872
1873 for (i = 0; i < inputs; i++)
1874 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1875 for (i = 0; i < locals; i++)
1876 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1877 for (i = 0; i < outputs; i++)
1878 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1879 }
c65ebc55 1880
97e242b0
RH
1881 /* Set the frame pointer register name. The regnum is logically loc79,
1882 but of course we'll not have allocated that many locals. Rather than
1883 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
1884 /* ??? This code means that we can never use one local register when
1885 there is a frame pointer. loc79 gets wasted in this case, as it is
1886 renamed to a register that will never be used. See also the try_locals
1887 code in find_gr_spill. */
97e242b0
RH
1888 if (current_frame_info.reg_fp)
1889 {
1890 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1891 reg_names[HARD_FRAME_POINTER_REGNUM]
1892 = reg_names[current_frame_info.reg_fp];
1893 reg_names[current_frame_info.reg_fp] = tmp;
1894 }
c65ebc55 1895
97e242b0
RH
1896 /* Fix up the return address placeholder. */
1897 /* ??? We can fail if __builtin_return_address is used, and we didn't
1898 allocate a register in which to save b0. I can't think of a way to
1899 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1900 then be sure that I got the right one. Further, reload doesn't seem
1901 to care if an eliminable register isn't used, and "eliminates" it
1902 anyway. */
1903 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1904 && current_frame_info.reg_save_b0 != 0)
1905 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1906
1907 /* We don't need an alloc instruction if we've used no outputs or locals. */
1908 if (current_frame_info.n_local_regs == 0
2ed4af6f
RH
1909 && current_frame_info.n_output_regs == 0
1910 && current_frame_info.n_input_regs <= current_function_args_info.words)
97e242b0
RH
1911 {
1912 /* If there is no alloc, but there are input registers used, then we
1913 need a .regstk directive. */
1914 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1915 ar_pfs_save_reg = NULL_RTX;
1916 }
1917 else
1918 {
1919 current_frame_info.need_regstk = 0;
c65ebc55 1920
97e242b0
RH
1921 if (current_frame_info.reg_save_ar_pfs)
1922 regno = current_frame_info.reg_save_ar_pfs;
1923 else
1924 regno = next_scratch_gr_reg ();
1925 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1926
1927 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1928 GEN_INT (current_frame_info.n_input_regs),
1929 GEN_INT (current_frame_info.n_local_regs),
1930 GEN_INT (current_frame_info.n_output_regs),
1931 GEN_INT (current_frame_info.n_rotate_regs)));
1932 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1933 }
c65ebc55 1934
97e242b0 1935 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 1936
26a110f5 1937 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
1938 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1939 stack_pointer_rtx, 0);
c65ebc55 1940
97e242b0
RH
1941 if (frame_pointer_needed)
1942 {
1943 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1944 RTX_FRAME_RELATED_P (insn) = 1;
1945 }
c65ebc55 1946
97e242b0
RH
1947 if (current_frame_info.total_size != 0)
1948 {
1949 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1950 rtx offset;
c65ebc55 1951
97e242b0
RH
1952 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1953 offset = frame_size_rtx;
1954 else
1955 {
1956 regno = next_scratch_gr_reg ();
1957 offset = gen_rtx_REG (DImode, regno);
1958 emit_move_insn (offset, frame_size_rtx);
1959 }
c65ebc55 1960
97e242b0
RH
1961 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1962 stack_pointer_rtx, offset));
c65ebc55 1963
97e242b0
RH
1964 if (! frame_pointer_needed)
1965 {
1966 RTX_FRAME_RELATED_P (insn) = 1;
1967 if (GET_CODE (offset) != CONST_INT)
1968 {
1969 REG_NOTES (insn)
1970 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1971 gen_rtx_SET (VOIDmode,
1972 stack_pointer_rtx,
1973 gen_rtx_PLUS (DImode,
1974 stack_pointer_rtx,
1975 frame_size_rtx)),
1976 REG_NOTES (insn));
1977 }
1978 }
c65ebc55 1979
97e242b0
RH
1980 /* ??? At this point we must generate a magic insn that appears to
1981 modify the stack pointer, the frame pointer, and all spill
1982 iterators. This would allow the most scheduling freedom. For
1983 now, just hard stop. */
1984 emit_insn (gen_blockage ());
1985 }
c65ebc55 1986
97e242b0
RH
1987 /* Must copy out ar.unat before doing any integer spills. */
1988 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 1989 {
97e242b0
RH
1990 if (current_frame_info.reg_save_ar_unat)
1991 ar_unat_save_reg
1992 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1993 else
c65ebc55 1994 {
97e242b0
RH
1995 alt_regno = next_scratch_gr_reg ();
1996 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
1997 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 1998 }
c65ebc55 1999
97e242b0
RH
2000 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2001 insn = emit_move_insn (ar_unat_save_reg, reg);
2002 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2003
2004 /* Even if we're not going to generate an epilogue, we still
2005 need to save the register so that EH works. */
2006 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2007 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
c65ebc55
JW
2008 }
2009 else
97e242b0
RH
2010 ar_unat_save_reg = NULL_RTX;
2011
2012 /* Spill all varargs registers. Do this before spilling any GR registers,
2013 since we want the UNAT bits for the GR registers to override the UNAT
2014 bits from varargs, which we don't care about. */
c65ebc55 2015
97e242b0
RH
2016 cfa_off = -16;
2017 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 2018 {
97e242b0 2019 reg = gen_rtx_REG (DImode, regno);
870f9ec0 2020 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 2021 }
c65ebc55 2022
97e242b0
RH
2023 /* Locate the bottom of the register save area. */
2024 cfa_off = (current_frame_info.spill_cfa_off
2025 + current_frame_info.spill_size
2026 + current_frame_info.extra_spill_size);
c65ebc55 2027
97e242b0
RH
2028 /* Save the predicate register block either in a register or in memory. */
2029 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2030 {
2031 reg = gen_rtx_REG (DImode, PR_REG (0));
2032 if (current_frame_info.reg_save_pr != 0)
1ff5b671 2033 {
97e242b0
RH
2034 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2035 insn = emit_move_insn (alt_reg, reg);
1ff5b671 2036
97e242b0
RH
2037 /* ??? Denote pr spill/fill by a DImode move that modifies all
2038 64 hard registers. */
1ff5b671 2039 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2040 REG_NOTES (insn)
2041 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2042 gen_rtx_SET (VOIDmode, alt_reg, reg),
2043 REG_NOTES (insn));
46327bc5 2044
97e242b0
RH
2045 /* Even if we're not going to generate an epilogue, we still
2046 need to save the register so that EH works. */
2047 if (! epilogue_p)
2048 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
1ff5b671
JW
2049 }
2050 else
97e242b0
RH
2051 {
2052 alt_regno = next_scratch_gr_reg ();
2053 alt_reg = gen_rtx_REG (DImode, alt_regno);
2054 insn = emit_move_insn (alt_reg, reg);
870f9ec0 2055 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2056 cfa_off -= 8;
2057 }
c65ebc55
JW
2058 }
2059
97e242b0
RH
2060 /* Handle AR regs in numerical order. All of them get special handling. */
2061 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2062 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 2063 {
97e242b0 2064 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 2065 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 2066 cfa_off -= 8;
c65ebc55 2067 }
97e242b0
RH
2068
2069 /* The alloc insn already copied ar.pfs into a general register. The
2070 only thing we have to do now is copy that register to a stack slot
2071 if we'd not allocated a local register for the job. */
2072 if (current_frame_info.reg_save_ar_pfs == 0
2073 && ! current_function_is_leaf)
c65ebc55 2074 {
97e242b0 2075 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 2076 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
2077 cfa_off -= 8;
2078 }
2079
2080 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2081 {
2082 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2083 if (current_frame_info.reg_save_ar_lc != 0)
2084 {
2085 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2086 insn = emit_move_insn (alt_reg, reg);
2087 RTX_FRAME_RELATED_P (insn) = 1;
2088
2089 /* Even if we're not going to generate an epilogue, we still
2090 need to save the register so that EH works. */
2091 if (! epilogue_p)
2092 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2093 }
c65ebc55
JW
2094 else
2095 {
97e242b0
RH
2096 alt_regno = next_scratch_gr_reg ();
2097 alt_reg = gen_rtx_REG (DImode, alt_regno);
2098 emit_move_insn (alt_reg, reg);
870f9ec0 2099 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2100 cfa_off -= 8;
2101 }
2102 }
2103
2104 /* We should now be at the base of the gr/br/fr spill area. */
2105 if (cfa_off != (current_frame_info.spill_cfa_off
2106 + current_frame_info.spill_size))
2107 abort ();
2108
2109 /* Spill all general registers. */
2110 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2111 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2112 {
2113 reg = gen_rtx_REG (DImode, regno);
2114 do_spill (gen_gr_spill, reg, cfa_off, reg);
2115 cfa_off -= 8;
2116 }
2117
2118 /* Handle BR0 specially -- it may be getting stored permanently in
2119 some GR register. */
2120 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2121 {
2122 reg = gen_rtx_REG (DImode, BR_REG (0));
2123 if (current_frame_info.reg_save_b0 != 0)
2124 {
2125 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2126 insn = emit_move_insn (alt_reg, reg);
c65ebc55 2127 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2128
2129 /* Even if we're not going to generate an epilogue, we still
2130 need to save the register so that EH works. */
2131 if (! epilogue_p)
2132 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
c65ebc55 2133 }
c65ebc55 2134 else
97e242b0
RH
2135 {
2136 alt_regno = next_scratch_gr_reg ();
2137 alt_reg = gen_rtx_REG (DImode, alt_regno);
2138 emit_move_insn (alt_reg, reg);
870f9ec0 2139 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2140 cfa_off -= 8;
2141 }
c65ebc55
JW
2142 }
2143
97e242b0
RH
2144 /* Spill the rest of the BR registers. */
2145 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2146 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2147 {
2148 alt_regno = next_scratch_gr_reg ();
2149 alt_reg = gen_rtx_REG (DImode, alt_regno);
2150 reg = gen_rtx_REG (DImode, regno);
2151 emit_move_insn (alt_reg, reg);
870f9ec0 2152 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2153 cfa_off -= 8;
2154 }
2155
2156 /* Align the frame and spill all FR registers. */
2157 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2158 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2159 {
2160 if (cfa_off & 15)
2161 abort ();
3f622353 2162 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2163 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
2164 cfa_off -= 16;
2165 }
2166
2167 if (cfa_off != current_frame_info.spill_cfa_off)
2168 abort ();
2169
2170 finish_spill_pointers ();
c65ebc55
JW
2171}
2172
2173/* Called after register allocation to add any instructions needed for the
2174 epilogue. Using a epilogue insn is favored compared to putting all of the
2175 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2176 to intermix instructions with the saves of the caller saved registers. In
2177 some cases, it might be necessary to emit a barrier instruction as the last
2178 insn to prevent such scheduling. */
2179
2180void
2ed4af6f
RH
2181ia64_expand_epilogue (sibcall_p)
2182 int sibcall_p;
c65ebc55 2183{
97e242b0
RH
2184 rtx insn, reg, alt_reg, ar_unat_save_reg;
2185 int regno, alt_regno, cfa_off;
2186
2187 ia64_compute_frame_size (get_frame_size ());
2188
2189 /* If there is a frame pointer, then we use it instead of the stack
2190 pointer, so that the stack pointer does not need to be valid when
2191 the epilogue starts. See EXIT_IGNORE_STACK. */
2192 if (frame_pointer_needed)
2193 setup_spill_pointers (current_frame_info.n_spilled,
2194 hard_frame_pointer_rtx, 0);
2195 else
2196 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2197 current_frame_info.total_size);
2198
2199 if (current_frame_info.total_size != 0)
2200 {
2201 /* ??? At this point we must generate a magic insn that appears to
2202 modify the spill iterators and the frame pointer. This would
2203 allow the most scheduling freedom. For now, just hard stop. */
2204 emit_insn (gen_blockage ());
2205 }
2206
2207 /* Locate the bottom of the register save area. */
2208 cfa_off = (current_frame_info.spill_cfa_off
2209 + current_frame_info.spill_size
2210 + current_frame_info.extra_spill_size);
2211
2212 /* Restore the predicate registers. */
2213 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2214 {
2215 if (current_frame_info.reg_save_pr != 0)
2216 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2217 else
2218 {
2219 alt_regno = next_scratch_gr_reg ();
2220 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2221 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2222 cfa_off -= 8;
2223 }
2224 reg = gen_rtx_REG (DImode, PR_REG (0));
2225 emit_move_insn (reg, alt_reg);
2226 }
2227
2228 /* Restore the application registers. */
2229
2230 /* Load the saved unat from the stack, but do not restore it until
2231 after the GRs have been restored. */
2232 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2233 {
2234 if (current_frame_info.reg_save_ar_unat != 0)
2235 ar_unat_save_reg
2236 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2237 else
2238 {
2239 alt_regno = next_scratch_gr_reg ();
2240 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2241 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 2242 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
2243 cfa_off -= 8;
2244 }
2245 }
2246 else
2247 ar_unat_save_reg = NULL_RTX;
2248
2249 if (current_frame_info.reg_save_ar_pfs != 0)
2250 {
2251 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2252 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2253 emit_move_insn (reg, alt_reg);
2254 }
2255 else if (! current_function_is_leaf)
c65ebc55 2256 {
97e242b0
RH
2257 alt_regno = next_scratch_gr_reg ();
2258 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2259 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2260 cfa_off -= 8;
2261 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2262 emit_move_insn (reg, alt_reg);
2263 }
2264
2265 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2266 {
2267 if (current_frame_info.reg_save_ar_lc != 0)
2268 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2269 else
2270 {
2271 alt_regno = next_scratch_gr_reg ();
2272 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2273 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2274 cfa_off -= 8;
2275 }
2276 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2277 emit_move_insn (reg, alt_reg);
2278 }
2279
2280 /* We should now be at the base of the gr/br/fr spill area. */
2281 if (cfa_off != (current_frame_info.spill_cfa_off
2282 + current_frame_info.spill_size))
2283 abort ();
2284
2285 /* Restore all general registers. */
2286 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2287 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2288 {
97e242b0
RH
2289 reg = gen_rtx_REG (DImode, regno);
2290 do_restore (gen_gr_restore, reg, cfa_off);
2291 cfa_off -= 8;
0c96007e 2292 }
97e242b0
RH
2293
2294 /* Restore the branch registers. Handle B0 specially, as it may
2295 have gotten stored in some GR register. */
2296 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2297 {
2298 if (current_frame_info.reg_save_b0 != 0)
2299 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2300 else
2301 {
2302 alt_regno = next_scratch_gr_reg ();
2303 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2304 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2305 cfa_off -= 8;
2306 }
2307 reg = gen_rtx_REG (DImode, BR_REG (0));
2308 emit_move_insn (reg, alt_reg);
2309 }
2310
2311 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2312 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2313 {
97e242b0
RH
2314 alt_regno = next_scratch_gr_reg ();
2315 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2316 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2317 cfa_off -= 8;
2318 reg = gen_rtx_REG (DImode, regno);
2319 emit_move_insn (reg, alt_reg);
2320 }
c65ebc55 2321
97e242b0
RH
2322 /* Restore floating point registers. */
2323 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2324 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2325 {
2326 if (cfa_off & 15)
2327 abort ();
3f622353 2328 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2329 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 2330 cfa_off -= 16;
0c96007e 2331 }
97e242b0
RH
2332
2333 /* Restore ar.unat for real. */
2334 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2335 {
2336 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2337 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
2338 }
2339
97e242b0
RH
2340 if (cfa_off != current_frame_info.spill_cfa_off)
2341 abort ();
2342
2343 finish_spill_pointers ();
c65ebc55 2344
97e242b0
RH
2345 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2346 {
2347 /* ??? At this point we must generate a magic insn that appears to
2348 modify the spill iterators, the stack pointer, and the frame
2349 pointer. This would allow the most scheduling freedom. For now,
2350 just hard stop. */
2351 emit_insn (gen_blockage ());
2352 }
c65ebc55 2353
97e242b0
RH
2354 if (cfun->machine->ia64_eh_epilogue_sp)
2355 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2356 else if (frame_pointer_needed)
2357 {
2358 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2359 RTX_FRAME_RELATED_P (insn) = 1;
2360 }
2361 else if (current_frame_info.total_size)
0c96007e 2362 {
97e242b0
RH
2363 rtx offset, frame_size_rtx;
2364
2365 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2366 if (CONST_OK_FOR_I (current_frame_info.total_size))
2367 offset = frame_size_rtx;
2368 else
2369 {
2370 regno = next_scratch_gr_reg ();
2371 offset = gen_rtx_REG (DImode, regno);
2372 emit_move_insn (offset, frame_size_rtx);
2373 }
2374
2375 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2376 offset));
2377
2378 RTX_FRAME_RELATED_P (insn) = 1;
2379 if (GET_CODE (offset) != CONST_INT)
2380 {
2381 REG_NOTES (insn)
2382 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2383 gen_rtx_SET (VOIDmode,
2384 stack_pointer_rtx,
2385 gen_rtx_PLUS (DImode,
2386 stack_pointer_rtx,
2387 frame_size_rtx)),
2388 REG_NOTES (insn));
2389 }
0c96007e 2390 }
97e242b0
RH
2391
2392 if (cfun->machine->ia64_eh_epilogue_bsp)
2393 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2394
2ed4af6f
RH
2395 if (! sibcall_p)
2396 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
c65ebc55
JW
2397}
2398
97e242b0
RH
2399/* Return 1 if br.ret can do all the work required to return from a
2400 function. */
2401
2402int
2403ia64_direct_return ()
2404{
2405 if (reload_completed && ! frame_pointer_needed)
2406 {
2407 ia64_compute_frame_size (get_frame_size ());
2408
2409 return (current_frame_info.total_size == 0
2410 && current_frame_info.n_spilled == 0
2411 && current_frame_info.reg_save_b0 == 0
2412 && current_frame_info.reg_save_pr == 0
2413 && current_frame_info.reg_save_ar_pfs == 0
2414 && current_frame_info.reg_save_ar_unat == 0
2415 && current_frame_info.reg_save_ar_lc == 0);
2416 }
2417 return 0;
2418}
2419
10c9f189
RH
2420int
2421ia64_hard_regno_rename_ok (from, to)
2422 int from;
2423 int to;
2424{
2425 /* Don't clobber any of the registers we reserved for the prologue. */
2426 if (to == current_frame_info.reg_fp
2427 || to == current_frame_info.reg_save_b0
2428 || to == current_frame_info.reg_save_pr
2429 || to == current_frame_info.reg_save_ar_pfs
2430 || to == current_frame_info.reg_save_ar_unat
2431 || to == current_frame_info.reg_save_ar_lc)
2432 return 0;
2433
2130b7fb
BS
2434 if (from == current_frame_info.reg_fp
2435 || from == current_frame_info.reg_save_b0
2436 || from == current_frame_info.reg_save_pr
2437 || from == current_frame_info.reg_save_ar_pfs
2438 || from == current_frame_info.reg_save_ar_unat
2439 || from == current_frame_info.reg_save_ar_lc)
2440 return 0;
2441
10c9f189
RH
2442 /* Don't use output registers outside the register frame. */
2443 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2444 return 0;
2445
2446 /* Retain even/oddness on predicate register pairs. */
2447 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2448 return (from & 1) == (to & 1);
2449
8cb71435
BS
2450 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2451 if (from == GR_REG (4) && current_function_calls_setjmp)
2452 return 0;
2453
10c9f189
RH
2454 return 1;
2455}
2456
c65ebc55
JW
2457/* Emit the function prologue. */
2458
2459void
2460ia64_function_prologue (file, size)
2461 FILE *file;
fd7c34b0 2462 int size ATTRIBUTE_UNUSED;
c65ebc55 2463{
97e242b0
RH
2464 int mask, grsave, grsave_prev;
2465
2466 if (current_frame_info.need_regstk)
2467 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2468 current_frame_info.n_input_regs,
2469 current_frame_info.n_local_regs,
2470 current_frame_info.n_output_regs,
2471 current_frame_info.n_rotate_regs);
c65ebc55 2472
531073e7 2473 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
2474 return;
2475
97e242b0 2476 /* Emit the .prologue directive. */
809d4ef1 2477
97e242b0
RH
2478 mask = 0;
2479 grsave = grsave_prev = 0;
2480 if (current_frame_info.reg_save_b0 != 0)
0c96007e 2481 {
97e242b0
RH
2482 mask |= 8;
2483 grsave = grsave_prev = current_frame_info.reg_save_b0;
2484 }
2485 if (current_frame_info.reg_save_ar_pfs != 0
2486 && (grsave_prev == 0
2487 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2488 {
2489 mask |= 4;
2490 if (grsave_prev == 0)
2491 grsave = current_frame_info.reg_save_ar_pfs;
2492 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 2493 }
97e242b0
RH
2494 if (current_frame_info.reg_fp != 0
2495 && (grsave_prev == 0
2496 || current_frame_info.reg_fp == grsave_prev + 1))
2497 {
2498 mask |= 2;
2499 if (grsave_prev == 0)
2500 grsave = HARD_FRAME_POINTER_REGNUM;
2501 grsave_prev = current_frame_info.reg_fp;
2502 }
2503 if (current_frame_info.reg_save_pr != 0
2504 && (grsave_prev == 0
2505 || current_frame_info.reg_save_pr == grsave_prev + 1))
2506 {
2507 mask |= 1;
2508 if (grsave_prev == 0)
2509 grsave = current_frame_info.reg_save_pr;
2510 }
2511
2512 if (mask)
2513 fprintf (file, "\t.prologue %d, %d\n", mask,
2514 ia64_dbx_register_number (grsave));
2515 else
2516 fputs ("\t.prologue\n", file);
2517
2518 /* Emit a .spill directive, if necessary, to relocate the base of
2519 the register spill area. */
2520 if (current_frame_info.spill_cfa_off != -16)
2521 fprintf (file, "\t.spill %ld\n",
2522 (long) (current_frame_info.spill_cfa_off
2523 + current_frame_info.spill_size));
c65ebc55
JW
2524}
2525
0186257f
JW
2526/* Emit the .body directive at the scheduled end of the prologue. */
2527
2528void
2529ia64_output_end_prologue (file)
2530 FILE *file;
2531{
531073e7 2532 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
2533 return;
2534
2535 fputs ("\t.body\n", file);
2536}
2537
c65ebc55
JW
2538/* Emit the function epilogue. */
2539
2540void
2541ia64_function_epilogue (file, size)
fd7c34b0
RH
2542 FILE *file ATTRIBUTE_UNUSED;
2543 int size ATTRIBUTE_UNUSED;
c65ebc55 2544{
8a959ea5
RH
2545 int i;
2546
97e242b0
RH
2547 /* Reset from the function's potential modifications. */
2548 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
c65ebc55 2549
97e242b0
RH
2550 if (current_frame_info.reg_fp)
2551 {
2552 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2553 reg_names[HARD_FRAME_POINTER_REGNUM]
2554 = reg_names[current_frame_info.reg_fp];
2555 reg_names[current_frame_info.reg_fp] = tmp;
2556 }
2557 if (! TARGET_REG_NAMES)
2558 {
97e242b0
RH
2559 for (i = 0; i < current_frame_info.n_input_regs; i++)
2560 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2561 for (i = 0; i < current_frame_info.n_local_regs; i++)
2562 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2563 for (i = 0; i < current_frame_info.n_output_regs; i++)
2564 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2565 }
8a959ea5 2566
97e242b0
RH
2567 current_frame_info.initialized = 0;
2568}
c65ebc55
JW
2569
2570int
97e242b0
RH
2571ia64_dbx_register_number (regno)
2572 int regno;
c65ebc55 2573{
97e242b0
RH
2574 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2575 from its home at loc79 to something inside the register frame. We
2576 must perform the same renumbering here for the debug info. */
2577 if (current_frame_info.reg_fp)
2578 {
2579 if (regno == HARD_FRAME_POINTER_REGNUM)
2580 regno = current_frame_info.reg_fp;
2581 else if (regno == current_frame_info.reg_fp)
2582 regno = HARD_FRAME_POINTER_REGNUM;
2583 }
2584
2585 if (IN_REGNO_P (regno))
2586 return 32 + regno - IN_REG (0);
2587 else if (LOC_REGNO_P (regno))
2588 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2589 else if (OUT_REGNO_P (regno))
2590 return (32 + current_frame_info.n_input_regs
2591 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2592 else
2593 return regno;
c65ebc55
JW
2594}
2595
97e242b0
RH
2596void
2597ia64_initialize_trampoline (addr, fnaddr, static_chain)
2598 rtx addr, fnaddr, static_chain;
2599{
2600 rtx addr_reg, eight = GEN_INT (8);
2601
2602 /* Load up our iterator. */
2603 addr_reg = gen_reg_rtx (Pmode);
2604 emit_move_insn (addr_reg, addr);
2605
2606 /* The first two words are the fake descriptor:
2607 __ia64_trampoline, ADDR+16. */
2608 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2609 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2610 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2611
2612 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2613 copy_to_reg (plus_constant (addr, 16)));
2614 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2615
2616 /* The third word is the target descriptor. */
2617 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2618 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2619
2620 /* The fourth word is the static chain. */
2621 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2622}
c65ebc55
JW
2623\f
2624/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
2625 for the last named argument which has type TYPE and mode MODE.
2626
2627 We generate the actual spill instructions during prologue generation. */
2628
c65ebc55
JW
2629void
2630ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2631 CUMULATIVE_ARGS cum;
26a110f5
RH
2632 int int_mode;
2633 tree type;
c65ebc55 2634 int * pretend_size;
97e242b0 2635 int second_time ATTRIBUTE_UNUSED;
c65ebc55 2636{
26a110f5
RH
2637 /* If this is a stdarg function, then skip the current argument. */
2638 if (! current_function_varargs)
2639 ia64_function_arg_advance (&cum, int_mode, type, 1);
c65ebc55
JW
2640
2641 if (cum.words < MAX_ARGUMENT_SLOTS)
26a110f5
RH
2642 {
2643 int n = MAX_ARGUMENT_SLOTS - cum.words;
2644 *pretend_size = n * UNITS_PER_WORD;
2645 cfun->machine->n_varargs = n;
2646 }
c65ebc55
JW
2647}
2648
2649/* Check whether TYPE is a homogeneous floating point aggregate. If
2650 it is, return the mode of the floating point type that appears
2651 in all leafs. If it is not, return VOIDmode.
2652
2653 An aggregate is a homogeneous floating point aggregate is if all
2654 fields/elements in it have the same floating point type (e.g,
2655 SFmode). 128-bit quad-precision floats are excluded. */
2656
2657static enum machine_mode
2658hfa_element_mode (type, nested)
2659 tree type;
2660 int nested;
2661{
2662 enum machine_mode element_mode = VOIDmode;
2663 enum machine_mode mode;
2664 enum tree_code code = TREE_CODE (type);
2665 int know_element_mode = 0;
2666 tree t;
2667
2668 switch (code)
2669 {
2670 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2671 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2672 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2673 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2674 case FUNCTION_TYPE:
2675 return VOIDmode;
2676
2677 /* Fortran complex types are supposed to be HFAs, so we need to handle
2678 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2679 types though. */
2680 case COMPLEX_TYPE:
2681 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2682 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2683 * BITS_PER_UNIT, MODE_FLOAT, 0);
2684 else
2685 return VOIDmode;
2686
2687 case REAL_TYPE:
2688 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2689 mode if this is contained within an aggregate. */
2690 if (nested)
2691 return TYPE_MODE (type);
2692 else
2693 return VOIDmode;
2694
2695 case ARRAY_TYPE:
2696 return TYPE_MODE (TREE_TYPE (type));
2697
2698 case RECORD_TYPE:
2699 case UNION_TYPE:
2700 case QUAL_UNION_TYPE:
2701 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2702 {
2703 if (TREE_CODE (t) != FIELD_DECL)
2704 continue;
2705
2706 mode = hfa_element_mode (TREE_TYPE (t), 1);
2707 if (know_element_mode)
2708 {
2709 if (mode != element_mode)
2710 return VOIDmode;
2711 }
2712 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2713 return VOIDmode;
2714 else
2715 {
2716 know_element_mode = 1;
2717 element_mode = mode;
2718 }
2719 }
2720 return element_mode;
2721
2722 default:
2723 /* If we reach here, we probably have some front-end specific type
2724 that the backend doesn't know about. This can happen via the
2725 aggregate_value_p call in init_function_start. All we can do is
2726 ignore unknown tree types. */
2727 return VOIDmode;
2728 }
2729
2730 return VOIDmode;
2731}
2732
2733/* Return rtx for register where argument is passed, or zero if it is passed
2734 on the stack. */
2735
2736/* ??? 128-bit quad-precision floats are always passed in general
2737 registers. */
2738
2739rtx
2740ia64_function_arg (cum, mode, type, named, incoming)
2741 CUMULATIVE_ARGS *cum;
2742 enum machine_mode mode;
2743 tree type;
2744 int named;
2745 int incoming;
2746{
2747 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2748 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2749 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2750 / UNITS_PER_WORD);
2751 int offset = 0;
2752 enum machine_mode hfa_mode = VOIDmode;
2753
f9f45ccb
JW
2754 /* Integer and float arguments larger than 8 bytes start at the next even
2755 boundary. Aggregates larger than 8 bytes start at the next even boundary
7d17b34d
JW
2756 if the aggregate has 16 byte alignment. Net effect is that types with
2757 alignment greater than 8 start at the next even boundary. */
f9f45ccb
JW
2758 /* ??? The ABI does not specify how to handle aggregates with alignment from
2759 9 to 15 bytes, or greater than 16. We handle them all as if they had
2760 16 byte alignment. Such aggregates can occur only if gcc extensions are
2761 used. */
7d17b34d
JW
2762 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2763 : (words > 1))
2764 && (cum->words & 1))
c65ebc55
JW
2765 offset = 1;
2766
2767 /* If all argument slots are used, then it must go on the stack. */
2768 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2769 return 0;
2770
2771 /* Check for and handle homogeneous FP aggregates. */
2772 if (type)
2773 hfa_mode = hfa_element_mode (type, 0);
2774
2775 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2776 and unprototyped hfas are passed specially. */
2777 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2778 {
2779 rtx loc[16];
2780 int i = 0;
2781 int fp_regs = cum->fp_regs;
2782 int int_regs = cum->words + offset;
2783 int hfa_size = GET_MODE_SIZE (hfa_mode);
2784 int byte_size;
2785 int args_byte_size;
2786
2787 /* If prototyped, pass it in FR regs then GR regs.
2788 If not prototyped, pass it in both FR and GR regs.
2789
2790 If this is an SFmode aggregate, then it is possible to run out of
2791 FR regs while GR regs are still left. In that case, we pass the
2792 remaining part in the GR regs. */
2793
2794 /* Fill the FP regs. We do this always. We stop if we reach the end
2795 of the argument, the last FP register, or the last argument slot. */
2796
2797 byte_size = ((mode == BLKmode)
2798 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2799 args_byte_size = int_regs * UNITS_PER_WORD;
2800 offset = 0;
2801 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2802 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2803 {
2804 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2805 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2806 + fp_regs)),
2807 GEN_INT (offset));
c65ebc55
JW
2808 offset += hfa_size;
2809 args_byte_size += hfa_size;
2810 fp_regs++;
2811 }
2812
2813 /* If no prototype, then the whole thing must go in GR regs. */
2814 if (! cum->prototype)
2815 offset = 0;
2816 /* If this is an SFmode aggregate, then we might have some left over
2817 that needs to go in GR regs. */
2818 else if (byte_size != offset)
2819 int_regs += offset / UNITS_PER_WORD;
2820
2821 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2822
2823 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2824 {
2825 enum machine_mode gr_mode = DImode;
2826
2827 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2828 then this goes in a GR reg left adjusted/little endian, right
2829 adjusted/big endian. */
2830 /* ??? Currently this is handled wrong, because 4-byte hunks are
2831 always right adjusted/little endian. */
2832 if (offset & 0x4)
2833 gr_mode = SImode;
2834 /* If we have an even 4 byte hunk because the aggregate is a
2835 multiple of 4 bytes in size, then this goes in a GR reg right
2836 adjusted/little endian. */
2837 else if (byte_size - offset == 4)
2838 gr_mode = SImode;
2839
2840 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2841 gen_rtx_REG (gr_mode, (basereg
2842 + int_regs)),
2843 GEN_INT (offset));
2844 offset += GET_MODE_SIZE (gr_mode);
2845 int_regs++;
2846 }
2847
2848 /* If we ended up using just one location, just return that one loc. */
2849 if (i == 1)
2850 return XEXP (loc[0], 0);
2851 else
2852 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2853 }
2854
2855 /* Integral and aggregates go in general registers. If we have run out of
2856 FR registers, then FP values must also go in general registers. This can
2857 happen when we have a SFmode HFA. */
2858 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2859 return gen_rtx_REG (mode, basereg + cum->words + offset);
2860
2861 /* If there is a prototype, then FP values go in a FR register when
2862 named, and in a GR registeer when unnamed. */
2863 else if (cum->prototype)
2864 {
2865 if (! named)
2866 return gen_rtx_REG (mode, basereg + cum->words + offset);
2867 else
2868 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2869 }
2870 /* If there is no prototype, then FP values go in both FR and GR
2871 registers. */
2872 else
2873 {
2874 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2875 gen_rtx_REG (mode, (FR_ARG_FIRST
2876 + cum->fp_regs)),
2877 const0_rtx);
2878 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2879 gen_rtx_REG (mode,
2880 (basereg + cum->words
2881 + offset)),
2882 const0_rtx);
809d4ef1 2883
c65ebc55
JW
2884 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2885 }
2886}
2887
2888/* Return number of words, at the beginning of the argument, that must be
2889 put in registers. 0 is the argument is entirely in registers or entirely
2890 in memory. */
2891
2892int
2893ia64_function_arg_partial_nregs (cum, mode, type, named)
2894 CUMULATIVE_ARGS *cum;
2895 enum machine_mode mode;
2896 tree type;
fd7c34b0 2897 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
2898{
2899 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2900 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2901 / UNITS_PER_WORD);
2902 int offset = 0;
2903
7d17b34d
JW
2904 /* Arguments with alignment larger than 8 bytes start at the next even
2905 boundary. */
2906 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2907 : (words > 1))
2908 && (cum->words & 1))
c65ebc55
JW
2909 offset = 1;
2910
2911 /* If all argument slots are used, then it must go on the stack. */
2912 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2913 return 0;
2914
2915 /* It doesn't matter whether the argument goes in FR or GR regs. If
2916 it fits within the 8 argument slots, then it goes entirely in
2917 registers. If it extends past the last argument slot, then the rest
2918 goes on the stack. */
2919
2920 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2921 return 0;
2922
2923 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2924}
2925
2926/* Update CUM to point after this argument. This is patterned after
2927 ia64_function_arg. */
2928
2929void
2930ia64_function_arg_advance (cum, mode, type, named)
2931 CUMULATIVE_ARGS *cum;
2932 enum machine_mode mode;
2933 tree type;
2934 int named;
2935{
2936 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2937 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2938 / UNITS_PER_WORD);
2939 int offset = 0;
2940 enum machine_mode hfa_mode = VOIDmode;
2941
2942 /* If all arg slots are already full, then there is nothing to do. */
2943 if (cum->words >= MAX_ARGUMENT_SLOTS)
2944 return;
2945
7d17b34d
JW
2946 /* Arguments with alignment larger than 8 bytes start at the next even
2947 boundary. */
2948 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2949 : (words > 1))
2950 && (cum->words & 1))
c65ebc55
JW
2951 offset = 1;
2952
2953 cum->words += words + offset;
2954
2955 /* Check for and handle homogeneous FP aggregates. */
2956 if (type)
2957 hfa_mode = hfa_element_mode (type, 0);
2958
2959 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2960 and unprototyped hfas are passed specially. */
2961 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2962 {
2963 int fp_regs = cum->fp_regs;
2964 /* This is the original value of cum->words + offset. */
2965 int int_regs = cum->words - words;
2966 int hfa_size = GET_MODE_SIZE (hfa_mode);
2967 int byte_size;
2968 int args_byte_size;
2969
2970 /* If prototyped, pass it in FR regs then GR regs.
2971 If not prototyped, pass it in both FR and GR regs.
2972
2973 If this is an SFmode aggregate, then it is possible to run out of
2974 FR regs while GR regs are still left. In that case, we pass the
2975 remaining part in the GR regs. */
2976
2977 /* Fill the FP regs. We do this always. We stop if we reach the end
2978 of the argument, the last FP register, or the last argument slot. */
2979
2980 byte_size = ((mode == BLKmode)
2981 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2982 args_byte_size = int_regs * UNITS_PER_WORD;
2983 offset = 0;
2984 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2985 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
2986 {
c65ebc55
JW
2987 offset += hfa_size;
2988 args_byte_size += hfa_size;
2989 fp_regs++;
2990 }
2991
2992 cum->fp_regs = fp_regs;
2993 }
2994
2995 /* Integral and aggregates go in general registers. If we have run out of
2996 FR registers, then FP values must also go in general registers. This can
2997 happen when we have a SFmode HFA. */
2998 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2999 return;
3000
3001 /* If there is a prototype, then FP values go in a FR register when
3002 named, and in a GR registeer when unnamed. */
3003 else if (cum->prototype)
3004 {
3005 if (! named)
3006 return;
3007 else
3008 /* ??? Complex types should not reach here. */
3009 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3010 }
3011 /* If there is no prototype, then FP values go in both FR and GR
3012 registers. */
3013 else
3014 /* ??? Complex types should not reach here. */
3015 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3016
3017 return;
3018}
3019\f
3020/* Implement va_start. */
3021
3022void
3023ia64_va_start (stdarg_p, valist, nextarg)
3024 int stdarg_p;
3025 tree valist;
3026 rtx nextarg;
3027{
3028 int arg_words;
3029 int ofs;
3030
3031 arg_words = current_function_args_info.words;
3032
3033 if (stdarg_p)
3034 ofs = 0;
3035 else
3036 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3037
3038 nextarg = plus_constant (nextarg, ofs);
3039 std_expand_builtin_va_start (1, valist, nextarg);
3040}
3041
3042/* Implement va_arg. */
3043
3044rtx
3045ia64_va_arg (valist, type)
3046 tree valist, type;
3047{
c65ebc55
JW
3048 tree t;
3049
7d17b34d
JW
3050 /* Arguments with alignment larger than 8 bytes start at the next even
3051 boundary. */
3052 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
c65ebc55
JW
3053 {
3054 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3055 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 3056 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
3057 build_int_2 (-2 * UNITS_PER_WORD, -1));
3058 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3059 TREE_SIDE_EFFECTS (t) = 1;
3060 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3061 }
3062
3063 return std_expand_builtin_va_arg (valist, type);
3064}
3065\f
3066/* Return 1 if function return value returned in memory. Return 0 if it is
3067 in a register. */
3068
3069int
3070ia64_return_in_memory (valtype)
3071 tree valtype;
3072{
3073 enum machine_mode mode;
3074 enum machine_mode hfa_mode;
3075 int byte_size;
3076
3077 mode = TYPE_MODE (valtype);
3078 byte_size = ((mode == BLKmode)
3079 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3080
3081 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3082
3083 hfa_mode = hfa_element_mode (valtype, 0);
3084 if (hfa_mode != VOIDmode)
3085 {
3086 int hfa_size = GET_MODE_SIZE (hfa_mode);
3087
c65ebc55
JW
3088 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3089 return 1;
3090 else
3091 return 0;
3092 }
3093
3094 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3095 return 1;
3096 else
3097 return 0;
3098}
3099
3100/* Return rtx for register that holds the function return value. */
3101
3102rtx
3103ia64_function_value (valtype, func)
3104 tree valtype;
fd7c34b0 3105 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
3106{
3107 enum machine_mode mode;
3108 enum machine_mode hfa_mode;
3109
3110 mode = TYPE_MODE (valtype);
3111 hfa_mode = hfa_element_mode (valtype, 0);
3112
3113 if (hfa_mode != VOIDmode)
3114 {
3115 rtx loc[8];
3116 int i;
3117 int hfa_size;
3118 int byte_size;
3119 int offset;
3120
3121 hfa_size = GET_MODE_SIZE (hfa_mode);
3122 byte_size = ((mode == BLKmode)
3123 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3124 offset = 0;
3125 for (i = 0; offset < byte_size; i++)
3126 {
3127 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3128 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3129 GEN_INT (offset));
c65ebc55
JW
3130 offset += hfa_size;
3131 }
3132
3133 if (i == 1)
3134 return XEXP (loc[0], 0);
3135 else
3136 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3137 }
3138 else if (FLOAT_TYPE_P (valtype))
3139 return gen_rtx_REG (mode, FR_ARG_FIRST);
3140 else
3141 return gen_rtx_REG (mode, GR_RET_FIRST);
3142}
3143
3144/* Print a memory address as an operand to reference that memory location. */
3145
3146/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3147 also call this from ia64_print_operand for memory addresses. */
3148
3149void
3150ia64_print_operand_address (stream, address)
fd7c34b0
RH
3151 FILE * stream ATTRIBUTE_UNUSED;
3152 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
3153{
3154}
3155
3156/* Print an operand to a assembler instruction.
c65ebc55
JW
3157 C Swap and print a comparison operator.
3158 D Print an FP comparison operator.
3159 E Print 32 - constant, for SImode shifts as extract.
66db6b45 3160 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
3161 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3162 a floating point register emitted normally.
3163 I Invert a predicate register by adding 1.
e5bde68a 3164 J Select the proper predicate register for a condition.
6b6c1201 3165 j Select the inverse predicate register for a condition.
c65ebc55
JW
3166 O Append .acq for volatile load.
3167 P Postincrement of a MEM.
3168 Q Append .rel for volatile store.
3169 S Shift amount for shladd instruction.
3170 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3171 for Intel assembler.
3172 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3173 for Intel assembler.
3174 r Print register name, or constant 0 as r0. HP compatibility for
3175 Linux kernel. */
3176void
3177ia64_print_operand (file, x, code)
3178 FILE * file;
3179 rtx x;
3180 int code;
3181{
e57b9d65
RH
3182 const char *str;
3183
c65ebc55
JW
3184 switch (code)
3185 {
c65ebc55
JW
3186 case 0:
3187 /* Handled below. */
3188 break;
809d4ef1 3189
c65ebc55
JW
3190 case 'C':
3191 {
3192 enum rtx_code c = swap_condition (GET_CODE (x));
3193 fputs (GET_RTX_NAME (c), file);
3194 return;
3195 }
3196
3197 case 'D':
e57b9d65
RH
3198 switch (GET_CODE (x))
3199 {
3200 case NE:
3201 str = "neq";
3202 break;
3203 case UNORDERED:
3204 str = "unord";
3205 break;
3206 case ORDERED:
3207 str = "ord";
3208 break;
3209 default:
3210 str = GET_RTX_NAME (GET_CODE (x));
3211 break;
3212 }
3213 fputs (str, file);
c65ebc55
JW
3214 return;
3215
3216 case 'E':
3217 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3218 return;
3219
66db6b45
RH
3220 case 'e':
3221 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3222 return;
3223
c65ebc55
JW
3224 case 'F':
3225 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 3226 str = reg_names [FR_REG (0)];
c65ebc55 3227 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 3228 str = reg_names [FR_REG (1)];
c65ebc55 3229 else if (GET_CODE (x) == REG)
e57b9d65 3230 str = reg_names [REGNO (x)];
c65ebc55
JW
3231 else
3232 abort ();
e57b9d65 3233 fputs (str, file);
c65ebc55
JW
3234 return;
3235
3236 case 'I':
3237 fputs (reg_names [REGNO (x) + 1], file);
3238 return;
3239
e5bde68a 3240 case 'J':
6b6c1201
RH
3241 case 'j':
3242 {
3243 unsigned int regno = REGNO (XEXP (x, 0));
3244 if (GET_CODE (x) == EQ)
3245 regno += 1;
3246 if (code == 'j')
3247 regno ^= 1;
3248 fputs (reg_names [regno], file);
3249 }
e5bde68a
RH
3250 return;
3251
c65ebc55
JW
3252 case 'O':
3253 if (MEM_VOLATILE_P (x))
3254 fputs(".acq", file);
3255 return;
3256
3257 case 'P':
3258 {
4b983fdc 3259 HOST_WIDE_INT value;
c65ebc55 3260
4b983fdc
RH
3261 switch (GET_CODE (XEXP (x, 0)))
3262 {
3263 default:
3264 return;
3265
3266 case POST_MODIFY:
3267 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3268 if (GET_CODE (x) == CONST_INT)
08012cda 3269 value = INTVAL (x);
4b983fdc
RH
3270 else if (GET_CODE (x) == REG)
3271 {
08012cda 3272 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
3273 return;
3274 }
3275 else
3276 abort ();
3277 break;
c65ebc55 3278
4b983fdc
RH
3279 case POST_INC:
3280 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 3281 break;
c65ebc55 3282
4b983fdc 3283 case POST_DEC:
08012cda 3284 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
3285 break;
3286 }
809d4ef1 3287
4b983fdc
RH
3288 putc (',', file);
3289 putc (' ', file);
3290 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
3291 return;
3292 }
3293
3294 case 'Q':
3295 if (MEM_VOLATILE_P (x))
3296 fputs(".rel", file);
3297 return;
3298
3299 case 'S':
809d4ef1 3300 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
3301 return;
3302
3303 case 'T':
3304 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3305 {
809d4ef1 3306 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3307 return;
3308 }
3309 break;
3310
3311 case 'U':
3312 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3313 {
3b572406 3314 const char *prefix = "0x";
c65ebc55
JW
3315 if (INTVAL (x) & 0x80000000)
3316 {
3317 fprintf (file, "0xffffffff");
3318 prefix = "";
3319 }
809d4ef1 3320 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3321 return;
3322 }
3323 break;
809d4ef1 3324
c65ebc55 3325 case 'r':
18a3c539
JW
3326 /* If this operand is the constant zero, write it as register zero.
3327 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
3328 if (GET_CODE (x) == REG)
3329 fputs (reg_names[REGNO (x)], file);
3330 else if (x == CONST0_RTX (GET_MODE (x)))
3331 fputs ("r0", file);
18a3c539
JW
3332 else if (GET_CODE (x) == CONST_INT)
3333 output_addr_const (file, x);
c65ebc55
JW
3334 else
3335 output_operand_lossage ("invalid %%r value");
3336 return;
3337
85548039
RH
3338 case '+':
3339 {
3340 const char *which;
3341
3342 /* For conditional branches, returns or calls, substitute
3343 sptk, dptk, dpnt, or spnt for %s. */
3344 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3345 if (x)
3346 {
3347 int pred_val = INTVAL (XEXP (x, 0));
3348
3349 /* Guess top and bottom 10% statically predicted. */
55d8cb78 3350 if (pred_val < REG_BR_PROB_BASE / 50)
85548039
RH
3351 which = ".spnt";
3352 else if (pred_val < REG_BR_PROB_BASE / 2)
3353 which = ".dpnt";
55d8cb78 3354 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
85548039
RH
3355 which = ".dptk";
3356 else
3357 which = ".sptk";
3358 }
3359 else if (GET_CODE (current_output_insn) == CALL_INSN)
3360 which = ".sptk";
3361 else
3362 which = ".dptk";
3363
3364 fputs (which, file);
3365 return;
3366 }
3367
6f8aa100
RH
3368 case ',':
3369 x = current_insn_predicate;
3370 if (x)
3371 {
3372 unsigned int regno = REGNO (XEXP (x, 0));
3373 if (GET_CODE (x) == EQ)
3374 regno += 1;
6f8aa100
RH
3375 fprintf (file, "(%s) ", reg_names [regno]);
3376 }
3377 return;
3378
c65ebc55
JW
3379 default:
3380 output_operand_lossage ("ia64_print_operand: unknown code");
3381 return;
3382 }
3383
3384 switch (GET_CODE (x))
3385 {
3386 /* This happens for the spill/restore instructions. */
3387 case POST_INC:
4b983fdc
RH
3388 case POST_DEC:
3389 case POST_MODIFY:
c65ebc55
JW
3390 x = XEXP (x, 0);
3391 /* ... fall through ... */
3392
3393 case REG:
3394 fputs (reg_names [REGNO (x)], file);
3395 break;
3396
3397 case MEM:
3398 {
3399 rtx addr = XEXP (x, 0);
4b983fdc 3400 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
c65ebc55
JW
3401 addr = XEXP (addr, 0);
3402 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3403 break;
3404 }
809d4ef1 3405
c65ebc55
JW
3406 default:
3407 output_addr_const (file, x);
3408 break;
3409 }
3410
3411 return;
3412}
c65ebc55 3413\f
5527bf14
RH
3414/* Calulate the cost of moving data from a register in class FROM to
3415 one in class TO. */
3416
3417int
3418ia64_register_move_cost (from, to)
3419 enum reg_class from, to;
3420{
3421 int from_hard, to_hard;
3422 int from_gr, to_gr;
3f622353 3423 int from_fr, to_fr;
f2f90c63 3424 int from_pr, to_pr;
5527bf14
RH
3425
3426 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3427 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3428 from_gr = (from == GENERAL_REGS);
3429 to_gr = (to == GENERAL_REGS);
3f622353
RH
3430 from_fr = (from == FR_REGS);
3431 to_fr = (to == FR_REGS);
f2f90c63
RH
3432 from_pr = (from == PR_REGS);
3433 to_pr = (to == PR_REGS);
5527bf14
RH
3434
3435 if (from_hard && to_hard)
3436 return 8;
3437 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3438 return 6;
3439
f2f90c63
RH
3440 /* Moving between PR registers takes two insns. */
3441 else if (from_pr && to_pr)
3442 return 3;
3443 /* Moving between PR and anything but GR is impossible. */
3444 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3445 return 6;
3446
3f622353
RH
3447 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3448 secondary memory reloads for TFmode moves. Unfortunately, we don't
3449 have the mode here, so we can't check that. */
3450 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3451 to avoid spectacularly poor register class preferencing for TFmode. */
3452 else if (from_fr != to_fr)
3453 return 5;
3454
5527bf14
RH
3455 return 2;
3456}
c65ebc55
JW
3457
3458/* This function returns the register class required for a secondary
3459 register when copying between one of the registers in CLASS, and X,
3460 using MODE. A return value of NO_REGS means that no secondary register
3461 is required. */
3462
3463enum reg_class
3464ia64_secondary_reload_class (class, mode, x)
3465 enum reg_class class;
fd7c34b0 3466 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
3467 rtx x;
3468{
3469 int regno = -1;
3470
3471 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3472 regno = true_regnum (x);
3473
97e242b0
RH
3474 switch (class)
3475 {
3476 case BR_REGS:
3477 /* ??? This is required because of a bad gcse/cse/global interaction.
3478 We end up with two pseudos with overlapping lifetimes both of which
3479 are equiv to the same constant, and both which need to be in BR_REGS.
3480 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3481 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3482 This seems to be a cse bug. cse_basic_block_end changes depending
3483 on the path length, which means the qty_first_reg check in
3484 make_regs_eqv can give different answers at different times. */
3485 /* ??? At some point I'll probably need a reload_indi pattern to handle
3486 this. */
3487 if (BR_REGNO_P (regno))
3488 return GR_REGS;
3489
3490 /* This is needed if a pseudo used as a call_operand gets spilled to a
3491 stack slot. */
3492 if (GET_CODE (x) == MEM)
3493 return GR_REGS;
3494 break;
3495
3496 case FR_REGS:
3497 /* This can happen when a paradoxical subreg is an operand to the
3498 muldi3 pattern. */
3499 /* ??? This shouldn't be necessary after instruction scheduling is
3500 enabled, because paradoxical subregs are not accepted by
3501 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3502 stop the paradoxical subreg stupidity in the *_operand functions
3503 in recog.c. */
3504 if (GET_CODE (x) == MEM
3505 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3506 || GET_MODE (x) == QImode))
3507 return GR_REGS;
3508
3509 /* This can happen because of the ior/and/etc patterns that accept FP
3510 registers as operands. If the third operand is a constant, then it
3511 needs to be reloaded into a FP register. */
3512 if (GET_CODE (x) == CONST_INT)
3513 return GR_REGS;
3514
3515 /* This can happen because of register elimination in a muldi3 insn.
3516 E.g. `26107 * (unsigned long)&u'. */
3517 if (GET_CODE (x) == PLUS)
3518 return GR_REGS;
3519 break;
3520
3521 case PR_REGS:
f2f90c63 3522 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
3523 and the function has a nonlocal goto. This is because global
3524 does not allocate call crossing pseudos to hard registers when
3525 current_function_has_nonlocal_goto is true. This is relatively
3526 common for C++ programs that use exceptions. To reproduce,
3527 return NO_REGS and compile libstdc++. */
3528 if (GET_CODE (x) == MEM)
3529 return GR_REGS;
f2f90c63
RH
3530
3531 /* This can happen when we take a BImode subreg of a DImode value,
3532 and that DImode value winds up in some non-GR register. */
3533 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3534 return GR_REGS;
97e242b0
RH
3535 break;
3536
3f622353
RH
3537 case GR_REGS:
3538 /* Since we have no offsettable memory addresses, we need a temporary
3539 to hold the address of the second word. */
3540 if (mode == TImode)
3541 return GR_REGS;
3542 break;
3543
97e242b0
RH
3544 default:
3545 break;
3546 }
c65ebc55
JW
3547
3548 return NO_REGS;
3549}
3550
3551\f
3552/* Emit text to declare externally defined variables and functions, because
3553 the Intel assembler does not support undefined externals. */
3554
3555void
3556ia64_asm_output_external (file, decl, name)
3557 FILE *file;
3558 tree decl;
809d4ef1 3559 const char *name;
c65ebc55
JW
3560{
3561 int save_referenced;
3562
3563 /* GNU as does not need anything here. */
3564 if (TARGET_GNU_AS)
3565 return;
3566
3567 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3568 the linker when we do this, so we need to be careful not to do this for
3569 builtin functions which have no library equivalent. Unfortunately, we
3570 can't tell here whether or not a function will actually be called by
3571 expand_expr, so we pull in library functions even if we may not need
3572 them later. */
3573 if (! strcmp (name, "__builtin_next_arg")
3574 || ! strcmp (name, "alloca")
3575 || ! strcmp (name, "__builtin_constant_p")
3576 || ! strcmp (name, "__builtin_args_info"))
3577 return;
3578
3579 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3580 restore it. */
3581 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3582 if (TREE_CODE (decl) == FUNCTION_DECL)
3583 {
f0ca81d2 3584 fprintf (file, "%s", TYPE_ASM_OP);
c65ebc55
JW
3585 assemble_name (file, name);
3586 putc (',', file);
3587 fprintf (file, TYPE_OPERAND_FMT, "function");
3588 putc ('\n', file);
3589 }
3590 ASM_GLOBALIZE_LABEL (file, name);
3591 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3592}
3593\f
3594/* Parse the -mfixed-range= option string. */
3595
3596static void
3b572406
RH
3597fix_range (const_str)
3598 const char *const_str;
c65ebc55
JW
3599{
3600 int i, first, last;
3b572406 3601 char *str, *dash, *comma;
c65ebc55
JW
3602
3603 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3604 REG2 are either register names or register numbers. The effect
3605 of this option is to mark the registers in the range from REG1 to
3606 REG2 as ``fixed'' so they won't be used by the compiler. This is
3607 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3608
3b572406
RH
3609 i = strlen (const_str);
3610 str = (char *) alloca (i + 1);
3611 memcpy (str, const_str, i + 1);
3612
c65ebc55
JW
3613 while (1)
3614 {
3615 dash = strchr (str, '-');
3616 if (!dash)
3617 {
3618 warning ("value of -mfixed-range must have form REG1-REG2");
3619 return;
3620 }
3621 *dash = '\0';
3622
3623 comma = strchr (dash + 1, ',');
3624 if (comma)
3625 *comma = '\0';
3626
3627 first = decode_reg_name (str);
3628 if (first < 0)
3629 {
3630 warning ("unknown register name: %s", str);
3631 return;
3632 }
3633
3634 last = decode_reg_name (dash + 1);
3635 if (last < 0)
3636 {
3637 warning ("unknown register name: %s", dash + 1);
3638 return;
3639 }
3640
3641 *dash = '-';
3642
3643 if (first > last)
3644 {
3645 warning ("%s-%s is an empty range", str, dash + 1);
3646 return;
3647 }
3648
3649 for (i = first; i <= last; ++i)
3650 fixed_regs[i] = call_used_regs[i] = 1;
3651
3652 if (!comma)
3653 break;
3654
3655 *comma = ',';
3656 str = comma + 1;
3657 }
3658}
3659
3660/* Called to register all of our global variables with the garbage
3661 collector. */
3662
3663static void
3664ia64_add_gc_roots ()
3665{
3666 ggc_add_rtx_root (&ia64_compare_op0, 1);
3667 ggc_add_rtx_root (&ia64_compare_op1, 1);
3668}
3669
0c96007e
AM
3670static void
3671ia64_init_machine_status (p)
3672 struct function *p;
3673{
3674 p->machine =
3675 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3676}
3677
3678static void
3679ia64_mark_machine_status (p)
3680 struct function *p;
3681{
37b15744
RH
3682 struct machine_function *machine = p->machine;
3683
3684 if (machine)
3685 {
3686 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3687 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3688 ggc_mark_rtx (machine->ia64_gp_save);
3689 }
0c96007e
AM
3690}
3691
37b15744
RH
3692static void
3693ia64_free_machine_status (p)
3694 struct function *p;
3695{
3696 free (p->machine);
3697 p->machine = NULL;
3698}
0c96007e 3699
c65ebc55
JW
3700/* Handle TARGET_OPTIONS switches. */
3701
3702void
3703ia64_override_options ()
3704{
59da9a7d
JW
3705 if (TARGET_AUTO_PIC)
3706 target_flags |= MASK_CONST_GP;
3707
655f2eb9
RH
3708 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3709 {
3710 warning ("cannot optimize division for both latency and throughput");
3711 target_flags &= ~MASK_INLINE_DIV_THR;
3712 }
3713
c65ebc55
JW
3714 if (ia64_fixed_range_string)
3715 fix_range (ia64_fixed_range_string);
3716
3717 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3718
0c96007e
AM
3719 init_machine_status = ia64_init_machine_status;
3720 mark_machine_status = ia64_mark_machine_status;
37b15744 3721 free_machine_status = ia64_free_machine_status;
0c96007e 3722
c65ebc55
JW
3723 ia64_add_gc_roots ();
3724}
3725\f
2130b7fb
BS
3726static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3727static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3728static enum attr_type ia64_safe_type PARAMS((rtx));
3729
3730static enum attr_itanium_requires_unit0
3731ia64_safe_itanium_requires_unit0 (insn)
3732 rtx insn;
3733{
3734 if (recog_memoized (insn) >= 0)
3735 return get_attr_itanium_requires_unit0 (insn);
3736 else
3737 return ITANIUM_REQUIRES_UNIT0_NO;
3738}
3739
3740static enum attr_itanium_class
3741ia64_safe_itanium_class (insn)
3742 rtx insn;
3743{
3744 if (recog_memoized (insn) >= 0)
3745 return get_attr_itanium_class (insn);
3746 else
3747 return ITANIUM_CLASS_UNKNOWN;
3748}
3749
3750static enum attr_type
3751ia64_safe_type (insn)
3752 rtx insn;
3753{
3754 if (recog_memoized (insn) >= 0)
3755 return get_attr_type (insn);
3756 else
3757 return TYPE_UNKNOWN;
3758}
3759\f
c65ebc55
JW
3760/* The following collection of routines emit instruction group stop bits as
3761 necessary to avoid dependencies. */
3762
3763/* Need to track some additional registers as far as serialization is
3764 concerned so we can properly handle br.call and br.ret. We could
3765 make these registers visible to gcc, but since these registers are
3766 never explicitly used in gcc generated code, it seems wasteful to
3767 do so (plus it would make the call and return patterns needlessly
3768 complex). */
3769#define REG_GP (GR_REG (1))
3770#define REG_RP (BR_REG (0))
c65ebc55 3771#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
3772/* This is used for volatile asms which may require a stop bit immediately
3773 before and after them. */
5527bf14 3774#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
3775#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3776#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 3777
f2f90c63
RH
3778/* For each register, we keep track of how it has been written in the
3779 current instruction group.
3780
3781 If a register is written unconditionally (no qualifying predicate),
3782 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3783
3784 If a register is written if its qualifying predicate P is true, we
3785 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3786 may be written again by the complement of P (P^1) and when this happens,
3787 WRITE_COUNT gets set to 2.
3788
3789 The result of this is that whenever an insn attempts to write a register
3790 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3791
3792 If a predicate register is written by a floating-point insn, we set
3793 WRITTEN_BY_FP to true.
3794
3795 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3796 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3797
c65ebc55
JW
3798struct reg_write_state
3799{
f2f90c63
RH
3800 unsigned int write_count : 2;
3801 unsigned int first_pred : 16;
3802 unsigned int written_by_fp : 1;
3803 unsigned int written_by_and : 1;
3804 unsigned int written_by_or : 1;
c65ebc55
JW
3805};
3806
3807/* Cumulative info for the current instruction group. */
3808struct reg_write_state rws_sum[NUM_REGS];
3809/* Info for the current instruction. This gets copied to rws_sum after a
3810 stop bit is emitted. */
3811struct reg_write_state rws_insn[NUM_REGS];
3812
3813/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3814 RTL for one instruction. */
3815struct reg_flags
3816{
3817 unsigned int is_write : 1; /* Is register being written? */
3818 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3819 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
3820 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3821 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 3822 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
3823};
3824
3b572406
RH
3825static void rws_update PARAMS ((struct reg_write_state *, int,
3826 struct reg_flags, int));
97e242b0
RH
3827static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3828static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
3b572406 3829static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
2130b7fb
BS
3830static void init_insn_group_barriers PARAMS ((void));
3831static int group_barrier_needed_p PARAMS ((rtx));
3832static int safe_group_barrier_needed_p PARAMS ((rtx));
3b572406 3833
c65ebc55
JW
3834/* Update *RWS for REGNO, which is being written by the current instruction,
3835 with predicate PRED, and associated register flags in FLAGS. */
3836
3837static void
3838rws_update (rws, regno, flags, pred)
3839 struct reg_write_state *rws;
3840 int regno;
3841 struct reg_flags flags;
3842 int pred;
3843{
3844 rws[regno].write_count += pred ? 1 : 2;
3845 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
3846 /* ??? Not tracking and/or across differing predicates. */
3847 rws[regno].written_by_and = flags.is_and;
3848 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
3849 rws[regno].first_pred = pred;
3850}
3851
3852/* Handle an access to register REGNO of type FLAGS using predicate register
3853 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3854 a dependency with an earlier instruction in the same group. */
3855
3856static int
97e242b0 3857rws_access_regno (regno, flags, pred)
c65ebc55
JW
3858 int regno;
3859 struct reg_flags flags;
3860 int pred;
3861{
3862 int need_barrier = 0;
c65ebc55
JW
3863
3864 if (regno >= NUM_REGS)
3865 abort ();
3866
f2f90c63
RH
3867 if (! PR_REGNO_P (regno))
3868 flags.is_and = flags.is_or = 0;
3869
c65ebc55
JW
3870 if (flags.is_write)
3871 {
12c2c7aa
JW
3872 int write_count;
3873
c65ebc55
JW
3874 /* One insn writes same reg multiple times? */
3875 if (rws_insn[regno].write_count > 0)
3876 abort ();
3877
3878 /* Update info for current instruction. */
3879 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 3880 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
3881
3882 switch (write_count)
c65ebc55
JW
3883 {
3884 case 0:
3885 /* The register has not been written yet. */
3886 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3887 break;
3888
3889 case 1:
3890 /* The register has been written via a predicate. If this is
3891 not a complementary predicate, then we need a barrier. */
3892 /* ??? This assumes that P and P+1 are always complementary
3893 predicates for P even. */
f2f90c63
RH
3894 if (flags.is_and && rws_sum[regno].written_by_and)
3895 ;
3896 else if (flags.is_or && rws_sum[regno].written_by_or)
3897 ;
3898 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
3899 need_barrier = 1;
3900 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3901 break;
3902
3903 case 2:
3904 /* The register has been unconditionally written already. We
3905 need a barrier. */
f2f90c63
RH
3906 if (flags.is_and && rws_sum[regno].written_by_and)
3907 ;
3908 else if (flags.is_or && rws_sum[regno].written_by_or)
3909 ;
3910 else
3911 need_barrier = 1;
3912 rws_sum[regno].written_by_and = flags.is_and;
3913 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
3914 break;
3915
3916 default:
3917 abort ();
3918 }
3919 }
3920 else
3921 {
3922 if (flags.is_branch)
3923 {
3924 /* Branches have several RAW exceptions that allow to avoid
3925 barriers. */
3926
5527bf14 3927 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
3928 /* RAW dependencies on branch regs are permissible as long
3929 as the writer is a non-branch instruction. Since we
3930 never generate code that uses a branch register written
3931 by a branch instruction, handling this case is
3932 easy. */
5527bf14 3933 return 0;
c65ebc55
JW
3934
3935 if (REGNO_REG_CLASS (regno) == PR_REGS
3936 && ! rws_sum[regno].written_by_fp)
3937 /* The predicates of a branch are available within the
3938 same insn group as long as the predicate was written by
3939 something other than a floating-point instruction. */
3940 return 0;
3941 }
3942
f2f90c63
RH
3943 if (flags.is_and && rws_sum[regno].written_by_and)
3944 return 0;
3945 if (flags.is_or && rws_sum[regno].written_by_or)
3946 return 0;
3947
c65ebc55
JW
3948 switch (rws_sum[regno].write_count)
3949 {
3950 case 0:
3951 /* The register has not been written yet. */
3952 break;
3953
3954 case 1:
3955 /* The register has been written via a predicate. If this is
3956 not a complementary predicate, then we need a barrier. */
3957 /* ??? This assumes that P and P+1 are always complementary
3958 predicates for P even. */
3959 if ((rws_sum[regno].first_pred ^ 1) != pred)
3960 need_barrier = 1;
3961 break;
3962
3963 case 2:
3964 /* The register has been unconditionally written already. We
3965 need a barrier. */
3966 need_barrier = 1;
3967 break;
3968
3969 default:
3970 abort ();
3971 }
3972 }
3973
3974 return need_barrier;
3975}
3976
97e242b0
RH
3977static int
3978rws_access_reg (reg, flags, pred)
3979 rtx reg;
3980 struct reg_flags flags;
3981 int pred;
3982{
3983 int regno = REGNO (reg);
3984 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
3985
3986 if (n == 1)
3987 return rws_access_regno (regno, flags, pred);
3988 else
3989 {
3990 int need_barrier = 0;
3991 while (--n >= 0)
3992 need_barrier |= rws_access_regno (regno + n, flags, pred);
3993 return need_barrier;
3994 }
3995}
3996
c65ebc55
JW
3997/* Handle an access to rtx X of type FLAGS using predicate register PRED.
3998 Return 1 is this access creates a dependency with an earlier instruction
3999 in the same group. */
4000
4001static int
4002rtx_needs_barrier (x, flags, pred)
4003 rtx x;
4004 struct reg_flags flags;
4005 int pred;
4006{
4007 int i, j;
4008 int is_complemented = 0;
4009 int need_barrier = 0;
4010 const char *format_ptr;
4011 struct reg_flags new_flags;
4012 rtx src, dst;
4013 rtx cond = 0;
4014
4015 if (! x)
4016 return 0;
4017
4018 new_flags = flags;
4019
4020 switch (GET_CODE (x))
4021 {
4022 case SET:
4023 src = SET_SRC (x);
4024 switch (GET_CODE (src))
4025 {
4026 case CALL:
4027 /* We don't need to worry about the result registers that
4028 get written by subroutine call. */
4029 need_barrier = rtx_needs_barrier (src, flags, pred);
4030 return need_barrier;
4031
4032 case IF_THEN_ELSE:
4033 if (SET_DEST (x) == pc_rtx)
4034 {
4035 /* X is a conditional branch. */
4036 /* ??? This seems redundant, as the caller sets this bit for
4037 all JUMP_INSNs. */
4038 new_flags.is_branch = 1;
4039 need_barrier = rtx_needs_barrier (src, new_flags, pred);
4040 return need_barrier;
4041 }
4042 else
4043 {
4044 /* X is a conditional move. */
4045 cond = XEXP (src, 0);
4046 if (GET_CODE (cond) == EQ)
4047 is_complemented = 1;
4048 cond = XEXP (cond, 0);
4049 if (GET_CODE (cond) != REG
4050 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4051 abort ();
4052
4053 if (XEXP (src, 1) == SET_DEST (x)
4054 || XEXP (src, 2) == SET_DEST (x))
4055 {
4056 /* X is a conditional move that conditionally writes the
4057 destination. */
4058
4059 /* We need another complement in this case. */
4060 if (XEXP (src, 1) == SET_DEST (x))
4061 is_complemented = ! is_complemented;
4062
4063 pred = REGNO (cond);
4064 if (is_complemented)
4065 ++pred;
4066 }
4067
4068 /* ??? If this is a conditional write to the dest, then this
4069 instruction does not actually read one source. This probably
4070 doesn't matter, because that source is also the dest. */
4071 /* ??? Multiple writes to predicate registers are allowed
4072 if they are all AND type compares, or if they are all OR
4073 type compares. We do not generate such instructions
4074 currently. */
4075 }
4076 /* ... fall through ... */
4077
4078 default:
4079 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4080 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4081 /* Set new_flags.is_fp to 1 so that we know we're dealing
4082 with a floating point comparison when processing the
4083 destination of the SET. */
4084 new_flags.is_fp = 1;
f2f90c63
RH
4085
4086 /* Discover if this is a parallel comparison. We only handle
4087 and.orcm and or.andcm at present, since we must retain a
4088 strict inverse on the predicate pair. */
4089 else if (GET_CODE (src) == AND)
4090 new_flags.is_and = flags.is_and = 1;
4091 else if (GET_CODE (src) == IOR)
4092 new_flags.is_or = flags.is_or = 1;
4093
c65ebc55
JW
4094 break;
4095 }
4096 need_barrier = rtx_needs_barrier (src, flags, pred);
97e242b0 4097
c65ebc55
JW
4098 /* This instruction unconditionally uses a predicate register. */
4099 if (cond)
97e242b0 4100 need_barrier |= rws_access_reg (cond, flags, 0);
c65ebc55
JW
4101
4102 dst = SET_DEST (x);
4103 if (GET_CODE (dst) == ZERO_EXTRACT)
4104 {
4105 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4106 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4107 dst = XEXP (dst, 0);
4108 }
4109 new_flags.is_write = 1;
4110 need_barrier |= rtx_needs_barrier (dst, new_flags, pred);
4111 break;
4112
4113 case CALL:
4114 new_flags.is_write = 0;
97e242b0 4115 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
4116
4117 /* Avoid multiple register writes, in case this is a pattern with
4118 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2ed4af6f 4119 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
4120 {
4121 new_flags.is_write = 1;
97e242b0
RH
4122 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4123 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4124 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4125 }
4126 break;
4127
e5bde68a
RH
4128 case COND_EXEC:
4129 /* X is a predicated instruction. */
4130
4131 cond = COND_EXEC_TEST (x);
4132 if (pred)
4133 abort ();
4134 need_barrier = rtx_needs_barrier (cond, flags, 0);
4135
4136 if (GET_CODE (cond) == EQ)
4137 is_complemented = 1;
4138 cond = XEXP (cond, 0);
4139 if (GET_CODE (cond) != REG
4140 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4141 abort ();
4142 pred = REGNO (cond);
4143 if (is_complemented)
4144 ++pred;
4145
4146 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4147 return need_barrier;
4148
c65ebc55 4149 case CLOBBER:
c65ebc55 4150 case USE:
c65ebc55
JW
4151 /* Clobber & use are for earlier compiler-phases only. */
4152 break;
4153
4154 case ASM_OPERANDS:
4155 case ASM_INPUT:
4156 /* We always emit stop bits for traditional asms. We emit stop bits
4157 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4158 if (GET_CODE (x) != ASM_OPERANDS
4159 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4160 {
4161 /* Avoid writing the register multiple times if we have multiple
4162 asm outputs. This avoids an abort in rws_access_reg. */
4163 if (! rws_insn[REG_VOLATILE].write_count)
4164 {
4165 new_flags.is_write = 1;
97e242b0 4166 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
4167 }
4168 return 1;
4169 }
4170
4171 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4172 We can not just fall through here since then we would be confused
4173 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4174 traditional asms unlike their normal usage. */
4175
4176 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4177 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4178 need_barrier = 1;
4179 break;
4180
4181 case PARALLEL:
4182 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4183 if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred))
4184 need_barrier = 1;
4185 break;
4186
4187 case SUBREG:
4188 x = SUBREG_REG (x);
4189 /* FALLTHRU */
4190 case REG:
870f9ec0
RH
4191 if (REGNO (x) == AR_UNAT_REGNUM)
4192 {
4193 for (i = 0; i < 64; ++i)
4194 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4195 }
4196 else
4197 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
4198 break;
4199
4200 case MEM:
4201 /* Find the regs used in memory address computation. */
4202 new_flags.is_write = 0;
4203 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4204 break;
4205
4206 case CONST_INT: case CONST_DOUBLE:
4207 case SYMBOL_REF: case LABEL_REF: case CONST:
4208 break;
4209
4210 /* Operators with side-effects. */
4211 case POST_INC: case POST_DEC:
4212 if (GET_CODE (XEXP (x, 0)) != REG)
4213 abort ();
4214
4215 new_flags.is_write = 0;
97e242b0 4216 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 4217 new_flags.is_write = 1;
97e242b0 4218 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4219 break;
4220
4221 case POST_MODIFY:
4222 if (GET_CODE (XEXP (x, 0)) != REG)
4223 abort ();
4224
4225 new_flags.is_write = 0;
97e242b0 4226 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4227 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4228 new_flags.is_write = 1;
97e242b0 4229 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
4230 break;
4231
4232 /* Handle common unary and binary ops for efficiency. */
4233 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4234 case MOD: case UDIV: case UMOD: case AND: case IOR:
4235 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4236 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4237 case NE: case EQ: case GE: case GT: case LE:
4238 case LT: case GEU: case GTU: case LEU: case LTU:
4239 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4240 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4241 break;
4242
4243 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4244 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4245 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4246 case SQRT: case FFS:
4247 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4248 break;
4249
4250 case UNSPEC:
4251 switch (XINT (x, 1))
4252 {
c65ebc55
JW
4253 case 1: /* st8.spill */
4254 case 2: /* ld8.fill */
870f9ec0
RH
4255 {
4256 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4257 HOST_WIDE_INT bit = (offset >> 3) & 63;
4258
4259 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4260 new_flags.is_write = (XINT (x, 1) == 1);
4261 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4262 new_flags, pred);
4263 break;
4264 }
4265
c65ebc55
JW
4266 case 3: /* stf.spill */
4267 case 4: /* ldf.spill */
4268 case 8: /* popcnt */
4269 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4270 break;
4271
f2f90c63 4272 case 7: /* pred_rel_mutex */
2ed4af6f 4273 case 9: /* pic call */
c65ebc55 4274 case 12: /* mf */
c65ebc55 4275 case 19: /* fetchadd_acq */
0c96007e 4276 case 20: /* mov = ar.bsp */
ce152ef8 4277 case 21: /* flushrs */
2130b7fb
BS
4278 case 22: /* bundle selector */
4279 case 23: /* cycle display */
ce152ef8 4280 break;
0c96007e 4281
655f2eb9
RH
4282 case 5: /* recip_approx */
4283 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4284 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4285 break;
4286
0551c32d
RH
4287 case 13: /* cmpxchg_acq */
4288 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4289 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4290 break;
4291
c65ebc55
JW
4292 default:
4293 abort ();
4294 }
4295 break;
4296
4297 case UNSPEC_VOLATILE:
4298 switch (XINT (x, 1))
4299 {
4300 case 0: /* alloc */
4301 /* Alloc must always be the first instruction. Currently, we
4302 only emit it at the function start, so we don't need to worry
4303 about emitting a stop bit before it. */
97e242b0 4304 need_barrier = rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4305
4306 new_flags.is_write = 1;
97e242b0 4307 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4308 return need_barrier;
4309
4310 case 1: /* blockage */
4311 case 2: /* insn group barrier */
4312 return 0;
4313
3b572406
RH
4314 case 5: /* set_bsp */
4315 need_barrier = 1;
4316 break;
4317
3b572406 4318 case 7: /* pred.rel.mutex */
ca3920ad
JW
4319 case 8: /* safe_across_calls all */
4320 case 9: /* safe_across_calls normal */
3b572406 4321 return 0;
0c96007e 4322
c65ebc55
JW
4323 default:
4324 abort ();
4325 }
4326 break;
4327
4328 case RETURN:
4329 new_flags.is_write = 0;
97e242b0
RH
4330 need_barrier = rws_access_regno (REG_RP, flags, pred);
4331 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4332
4333 new_flags.is_write = 1;
97e242b0
RH
4334 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4335 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4336 break;
4337
4338 default:
4339 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4340 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4341 switch (format_ptr[i])
4342 {
4343 case '0': /* unused field */
4344 case 'i': /* integer */
4345 case 'n': /* note */
4346 case 'w': /* wide integer */
4347 case 's': /* pointer to string */
4348 case 'S': /* optional pointer to string */
4349 break;
4350
4351 case 'e':
4352 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4353 need_barrier = 1;
4354 break;
4355
4356 case 'E':
4357 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4358 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4359 need_barrier = 1;
4360 break;
4361
4362 default:
4363 abort ();
4364 }
2ed4af6f 4365 break;
c65ebc55
JW
4366 }
4367 return need_barrier;
4368}
4369
2130b7fb
BS
4370/* Clear out the state for group_barrier_needed_p at the start of a
4371 sequence of insns. */
4372
4373static void
4374init_insn_group_barriers ()
4375{
4376 memset (rws_sum, 0, sizeof (rws_sum));
4377}
4378
4379/* Cumulative info for the current instruction group. */
4380struct reg_write_state rws_sum[NUM_REGS];
4381
4382/* Given the current state, recorded by previous calls to this function,
4383 determine whether a group barrier (a stop bit) is necessary before INSN.
4384 Return nonzero if so. */
4385
4386static int
4387group_barrier_needed_p (insn)
4388 rtx insn;
4389{
4390 rtx pat;
4391 int need_barrier = 0;
4392 struct reg_flags flags;
4393
4394 memset (&flags, 0, sizeof (flags));
4395 switch (GET_CODE (insn))
4396 {
4397 case NOTE:
4398 break;
4399
4400 case BARRIER:
4401 /* A barrier doesn't imply an instruction group boundary. */
4402 break;
4403
4404 case CODE_LABEL:
4405 memset (rws_insn, 0, sizeof (rws_insn));
4406 return 1;
4407
4408 case CALL_INSN:
4409 flags.is_branch = 1;
4410 flags.is_sibcall = SIBLING_CALL_P (insn);
4411 memset (rws_insn, 0, sizeof (rws_insn));
4412 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4413 break;
4414
4415 case JUMP_INSN:
4416 flags.is_branch = 1;
4417 /* FALLTHRU */
4418
4419 case INSN:
4420 if (GET_CODE (PATTERN (insn)) == USE
4421 || GET_CODE (PATTERN (insn)) == CLOBBER)
4422 /* Don't care about USE and CLOBBER "insns"---those are used to
4423 indicate to the optimizer that it shouldn't get rid of
4424 certain operations. */
4425 break;
4426
4427 pat = PATTERN (insn);
4428
4429 /* Ug. Hack hacks hacked elsewhere. */
4430 switch (recog_memoized (insn))
4431 {
4432 /* We play dependency tricks with the epilogue in order
4433 to get proper schedules. Undo this for dv analysis. */
4434 case CODE_FOR_epilogue_deallocate_stack:
4435 pat = XVECEXP (pat, 0, 0);
4436 break;
4437
4438 /* The pattern we use for br.cloop confuses the code above.
4439 The second element of the vector is representative. */
4440 case CODE_FOR_doloop_end_internal:
4441 pat = XVECEXP (pat, 0, 1);
4442 break;
4443
4444 /* Doesn't generate code. */
4445 case CODE_FOR_pred_rel_mutex:
4446 return 0;
4447
4448 default:
4449 break;
4450 }
4451
4452 memset (rws_insn, 0, sizeof (rws_insn));
4453 need_barrier = rtx_needs_barrier (pat, flags, 0);
4454
4455 /* Check to see if the previous instruction was a volatile
4456 asm. */
4457 if (! need_barrier)
4458 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4459
4460 break;
4461
4462 default:
4463 abort ();
4464 }
4465 return need_barrier;
4466}
4467
4468/* Like group_barrier_needed_p, but do not clobber the current state. */
4469
4470static int
4471safe_group_barrier_needed_p (insn)
4472 rtx insn;
4473{
4474 struct reg_write_state rws_saved[NUM_REGS];
4475 int t;
4476 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4477 t = group_barrier_needed_p (insn);
4478 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4479 return t;
4480}
4481
4482/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
f4d578da
BS
4483 as necessary to eliminate dependendencies. This function assumes that
4484 a final instruction scheduling pass has been run which has already
4485 inserted most of the necessary stop bits. This function only inserts
4486 new ones at basic block boundaries, since these are invisible to the
4487 scheduler. */
2130b7fb
BS
4488
4489static void
4490emit_insn_group_barriers (dump, insns)
4491 FILE *dump;
4492 rtx insns;
4493{
4494 rtx insn;
4495 rtx last_label = 0;
4496 int insns_since_last_label = 0;
4497
4498 init_insn_group_barriers ();
4499
4500 for (insn = insns; insn; insn = NEXT_INSN (insn))
4501 {
4502 if (GET_CODE (insn) == CODE_LABEL)
4503 {
4504 if (insns_since_last_label)
4505 last_label = insn;
4506 insns_since_last_label = 0;
4507 }
4508 else if (GET_CODE (insn) == NOTE
4509 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4510 {
4511 if (insns_since_last_label)
4512 last_label = insn;
4513 insns_since_last_label = 0;
4514 }
4515 else if (GET_CODE (insn) == INSN
4516 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4517 && XINT (PATTERN (insn), 1) == 2)
4518 {
4519 init_insn_group_barriers ();
4520 last_label = 0;
4521 }
4522 else if (INSN_P (insn))
4523 {
4524 insns_since_last_label = 1;
4525
4526 if (group_barrier_needed_p (insn))
4527 {
4528 if (last_label)
4529 {
4530 if (dump)
4531 fprintf (dump, "Emitting stop before label %d\n",
4532 INSN_UID (last_label));
4533 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4534 insn = last_label;
4535 }
4536 init_insn_group_barriers ();
4537 last_label = 0;
4538 }
4539 }
4540 }
4541}
f4d578da
BS
4542
4543/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4544 This function has to emit all necessary group barriers. */
4545
4546static void
4547emit_all_insn_group_barriers (dump, insns)
0024a804 4548 FILE *dump ATTRIBUTE_UNUSED;
f4d578da
BS
4549 rtx insns;
4550{
4551 rtx insn;
4552
4553 init_insn_group_barriers ();
4554
4555 for (insn = insns; insn; insn = NEXT_INSN (insn))
4556 {
4557 if (GET_CODE (insn) == INSN
4558 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4559 && XINT (PATTERN (insn), 1) == 2)
4560 init_insn_group_barriers ();
4561 else if (INSN_P (insn))
4562 {
4563 if (group_barrier_needed_p (insn))
4564 {
4565 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4566 init_insn_group_barriers ();
4567 group_barrier_needed_p (insn);
4568 }
4569 }
4570 }
4571}
2130b7fb
BS
4572\f
4573static int errata_find_address_regs PARAMS ((rtx *, void *));
4574static void errata_emit_nops PARAMS ((rtx));
4575static void fixup_errata PARAMS ((void));
4576
099dde21
BS
4577/* This structure is used to track some details about the previous insns
4578 groups so we can determine if it may be necessary to insert NOPs to
4579 workaround hardware errata. */
4580static struct group
4581{
4582 HARD_REG_SET p_reg_set;
4583 HARD_REG_SET gr_reg_conditionally_set;
fe375cf1 4584} last_group[2];
099dde21
BS
4585
4586/* Index into the last_group array. */
4587static int group_idx;
4588
099dde21
BS
4589/* Called through for_each_rtx; determines if a hard register that was
4590 conditionally set in the previous group is used as an address register.
4591 It ensures that for_each_rtx returns 1 in that case. */
4592static int
4593errata_find_address_regs (xp, data)
4594 rtx *xp;
4595 void *data ATTRIBUTE_UNUSED;
4596{
4597 rtx x = *xp;
4598 if (GET_CODE (x) != MEM)
4599 return 0;
4600 x = XEXP (x, 0);
4601 if (GET_CODE (x) == POST_MODIFY)
4602 x = XEXP (x, 0);
4603 if (GET_CODE (x) == REG)
4604 {
fe375cf1 4605 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4606 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4607 REGNO (x)))
4608 return 1;
4609 return -1;
4610 }
4611 return 0;
4612}
4613
4614/* Called for each insn; this function keeps track of the state in
4615 last_group and emits additional NOPs if necessary to work around
4616 an Itanium A/B step erratum. */
4617static void
4618errata_emit_nops (insn)
4619 rtx insn;
4620{
4621 struct group *this_group = last_group + group_idx;
fe375cf1 4622 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4623 rtx pat = PATTERN (insn);
4624 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4625 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4626 enum attr_type type;
4627 rtx set = real_pat;
4628
4629 if (GET_CODE (real_pat) == USE
4630 || GET_CODE (real_pat) == CLOBBER
4631 || GET_CODE (real_pat) == ASM_INPUT
4632 || GET_CODE (real_pat) == ADDR_VEC
4633 || GET_CODE (real_pat) == ADDR_DIFF_VEC
f4d578da 4634 || asm_noperands (PATTERN (insn)) >= 0)
099dde21
BS
4635 return;
4636
4637 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4638 parts of it. */
4639
4640 if (GET_CODE (set) == PARALLEL)
4641 {
4642 int i;
4643 set = XVECEXP (real_pat, 0, 0);
4644 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4645 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4646 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4647 {
4648 set = 0;
4649 break;
4650 }
4651 }
4652
4653 if (set && GET_CODE (set) != SET)
4654 set = 0;
4655
4656 type = get_attr_type (insn);
4657
4658 if (type == TYPE_F
4659 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4660 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4661
4662 if ((type == TYPE_M || type == TYPE_A) && cond && set
4663 && REG_P (SET_DEST (set))
4664 && GET_CODE (SET_SRC (set)) != PLUS
4665 && GET_CODE (SET_SRC (set)) != MINUS
fe375cf1 4666 && (GET_CODE (SET_SRC (set)) != ASHIFT
f5bbdc0c 4667 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
099dde21
BS
4668 && (GET_CODE (SET_SRC (set)) != MEM
4669 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4670 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4671 {
4672 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4673 || ! REG_P (XEXP (cond, 0)))
4674 abort ();
4675
4676 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4677 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4678 }
4679 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4680 {
2130b7fb 4681 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
099dde21 4682 emit_insn_before (gen_nop (), insn);
2130b7fb 4683 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
fe375cf1
JJ
4684 group_idx = 0;
4685 memset (last_group, 0, sizeof last_group);
099dde21
BS
4686 }
4687}
4688
2130b7fb 4689/* Emit extra nops if they are required to work around hardware errata. */
c65ebc55
JW
4690
4691static void
2130b7fb 4692fixup_errata ()
c65ebc55 4693{
2130b7fb 4694 rtx insn;
c65ebc55 4695
fe375cf1
JJ
4696 if (! TARGET_B_STEP)
4697 return;
4698
099dde21
BS
4699 group_idx = 0;
4700 memset (last_group, 0, sizeof last_group);
4701
2130b7fb 4702 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
c65ebc55 4703 {
fe375cf1
JJ
4704 if (!INSN_P (insn))
4705 continue;
4706
4707 if (ia64_safe_type (insn) == TYPE_S)
2130b7fb 4708 {
fe375cf1 4709 group_idx ^= 1;
2130b7fb
BS
4710 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4711 }
fe375cf1 4712 else
099dde21 4713 errata_emit_nops (insn);
2130b7fb
BS
4714 }
4715}
4716\f
4717/* Instruction scheduling support. */
4718/* Describe one bundle. */
4719
4720struct bundle
4721{
4722 /* Zero if there's no possibility of a stop in this bundle other than
4723 at the end, otherwise the position of the optional stop bit. */
4724 int possible_stop;
4725 /* The types of the three slots. */
4726 enum attr_type t[3];
4727 /* The pseudo op to be emitted into the assembler output. */
4728 const char *name;
4729};
4730
4731#define NR_BUNDLES 10
4732
4733/* A list of all available bundles. */
4734
4735static const struct bundle bundle[NR_BUNDLES] =
4736{
4737 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4738 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4739 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4740 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4741#if NR_BUNDLES == 10
4742 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4743 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4744#endif
4745 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4746 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4747 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4748 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4749 it matches an L type insn. Otherwise we'll try to generate L type
4750 nops. */
4751 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4752};
4753
4754/* Describe a packet of instructions. Packets consist of two bundles that
4755 are visible to the hardware in one scheduling window. */
4756
4757struct ia64_packet
4758{
4759 const struct bundle *t1, *t2;
4760 /* Precomputed value of the first split issue in this packet if a cycle
4761 starts at its beginning. */
4762 int first_split;
4763 /* For convenience, the insn types are replicated here so we don't have
4764 to go through T1 and T2 all the time. */
4765 enum attr_type t[6];
4766};
4767
4768/* An array containing all possible packets. */
4769#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4770static struct ia64_packet packets[NR_PACKETS];
4771
4772/* Map attr_type to a string with the name. */
4773
4774static const char *type_names[] =
4775{
4776 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4777};
4778
4779/* Nonzero if we should insert stop bits into the schedule. */
4780int ia64_final_schedule = 0;
4781
0024a804 4782static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
2130b7fb
BS
4783static rtx ia64_single_set PARAMS ((rtx));
4784static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
4785static void ia64_emit_insn_before PARAMS ((rtx, rtx));
0024a804 4786#if 0
2130b7fb 4787static rtx gen_nop_type PARAMS ((enum attr_type));
0024a804 4788#endif
2130b7fb
BS
4789static void finish_last_head PARAMS ((FILE *, int));
4790static void rotate_one_bundle PARAMS ((FILE *));
4791static void rotate_two_bundles PARAMS ((FILE *));
4792static void cycle_end_fill_slots PARAMS ((FILE *));
4793static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
4794static int get_split PARAMS ((const struct ia64_packet *, int));
4795static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
4796 const struct ia64_packet *, int));
4797static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
4798 rtx *, enum attr_type *, int));
4799static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
4800static void dump_current_packet PARAMS ((FILE *));
4801static void schedule_stop PARAMS ((FILE *));
7a87c39c
BS
4802static rtx gen_nop_type PARAMS ((enum attr_type));
4803static void ia64_emit_nops PARAMS ((void));
2130b7fb
BS
4804
4805/* Map a bundle number to its pseudo-op. */
4806
4807const char *
4808get_bundle_name (b)
4809 int b;
4810{
4811 return bundle[b].name;
4812}
4813
4814/* Compute the slot which will cause a split issue in packet P if the
4815 current cycle begins at slot BEGIN. */
4816
4817static int
4818itanium_split_issue (p, begin)
4819 const struct ia64_packet *p;
4820 int begin;
4821{
4822 int type_count[TYPE_S];
4823 int i;
4824 int split = 6;
4825
4826 if (begin < 3)
4827 {
4828 /* Always split before and after MMF. */
4829 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
4830 return 3;
4831 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
4832 return 3;
4833 /* Always split after MBB and BBB. */
4834 if (p->t[1] == TYPE_B)
4835 return 3;
4836 /* Split after first bundle in MIB BBB combination. */
4837 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
4838 return 3;
4839 }
4840
4841 memset (type_count, 0, sizeof type_count);
4842 for (i = begin; i < split; i++)
4843 {
4844 enum attr_type t0 = p->t[i];
4845 /* An MLX bundle reserves the same units as an MFI bundle. */
4846 enum attr_type t = (t0 == TYPE_L ? TYPE_F
4847 : t0 == TYPE_X ? TYPE_I
4848 : t0);
4849 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
4850 if (type_count[t] == max)
4851 return i;
4852 type_count[t]++;
4853 }
4854 return split;
4855}
4856
4857/* Return the maximum number of instructions a cpu can issue. */
4858
4859int
4860ia64_issue_rate ()
4861{
4862 return 6;
4863}
4864
4865/* Helper function - like single_set, but look inside COND_EXEC. */
4866
4867static rtx
4868ia64_single_set (insn)
4869 rtx insn;
4870{
4871 rtx x = PATTERN (insn);
4872 if (GET_CODE (x) == COND_EXEC)
4873 x = COND_EXEC_CODE (x);
4874 if (GET_CODE (x) == SET)
4875 return x;
4876 return single_set_2 (insn, x);
4877}
4878
4879/* Adjust the cost of a scheduling dependency. Return the new cost of
4880 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4881
4882int
4883ia64_adjust_cost (insn, link, dep_insn, cost)
4884 rtx insn, link, dep_insn;
4885 int cost;
4886{
4887 enum attr_type dep_type;
4888 enum attr_itanium_class dep_class;
4889 enum attr_itanium_class insn_class;
4890 rtx dep_set, set, src, addr;
4891
4892 if (GET_CODE (PATTERN (insn)) == CLOBBER
4893 || GET_CODE (PATTERN (insn)) == USE
4894 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
4895 || GET_CODE (PATTERN (dep_insn)) == USE
4896 /* @@@ Not accurate for indirect calls. */
4897 || GET_CODE (insn) == CALL_INSN
4898 || ia64_safe_type (insn) == TYPE_S)
4899 return 0;
4900
4901 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
4902 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
4903 return 0;
4904
4905 dep_type = ia64_safe_type (dep_insn);
4906 dep_class = ia64_safe_itanium_class (dep_insn);
4907 insn_class = ia64_safe_itanium_class (insn);
4908
4909 /* Compares that feed a conditional branch can execute in the same
4910 cycle. */
4911 dep_set = ia64_single_set (dep_insn);
4912 set = ia64_single_set (insn);
4913
4914 if (dep_type != TYPE_F
4915 && dep_set
4916 && GET_CODE (SET_DEST (dep_set)) == REG
4917 && PR_REG (REGNO (SET_DEST (dep_set)))
4918 && GET_CODE (insn) == JUMP_INSN)
4919 return 0;
4920
4921 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
4922 {
4923 /* ??? Can't find any information in the documenation about whether
4924 a sequence
4925 st [rx] = ra
4926 ld rb = [ry]
4927 splits issue. Assume it doesn't. */
4928 return 0;
4929 }
4930
4931 src = set ? SET_SRC (set) : 0;
4932 addr = 0;
4933 if (set && GET_CODE (SET_DEST (set)) == MEM)
4934 addr = XEXP (SET_DEST (set), 0);
4935 else if (set && GET_CODE (src) == MEM)
4936 addr = XEXP (src, 0);
4937 else if (set && GET_CODE (src) == ZERO_EXTEND
4938 && GET_CODE (XEXP (src, 0)) == MEM)
4939 addr = XEXP (XEXP (src, 0), 0);
4940 else if (set && GET_CODE (src) == UNSPEC
4941 && XVECLEN (XEXP (src, 0), 0) > 0
4942 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
4943 addr = XEXP (XVECEXP (src, 0, 0), 0);
4944 if (addr && GET_CODE (addr) == POST_MODIFY)
4945 addr = XEXP (addr, 0);
4946
4947 set = ia64_single_set (dep_insn);
4948
4949 if ((dep_class == ITANIUM_CLASS_IALU
4950 || dep_class == ITANIUM_CLASS_ILOG
4951 || dep_class == ITANIUM_CLASS_LD)
4952 && (insn_class == ITANIUM_CLASS_LD
4953 || insn_class == ITANIUM_CLASS_ST))
4954 {
4955 if (! addr || ! set)
4956 abort ();
4957 /* This isn't completely correct - an IALU that feeds an address has
4958 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
4959 otherwise. Unfortunately there's no good way to describe this. */
4960 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
4961 return cost + 1;
4962 }
4963 if ((dep_class == ITANIUM_CLASS_IALU
4964 || dep_class == ITANIUM_CLASS_ILOG
4965 || dep_class == ITANIUM_CLASS_LD)
4966 && (insn_class == ITANIUM_CLASS_MMMUL
4967 || insn_class == ITANIUM_CLASS_MMSHF
4968 || insn_class == ITANIUM_CLASS_MMSHFI))
4969 return 3;
4970 if (dep_class == ITANIUM_CLASS_FMAC
4971 && (insn_class == ITANIUM_CLASS_FMISC
4972 || insn_class == ITANIUM_CLASS_FCVTFX
4973 || insn_class == ITANIUM_CLASS_XMPY))
4974 return 7;
4975 if ((dep_class == ITANIUM_CLASS_FMAC
4976 || dep_class == ITANIUM_CLASS_FMISC
4977 || dep_class == ITANIUM_CLASS_FCVTFX
4978 || dep_class == ITANIUM_CLASS_XMPY)
4979 && insn_class == ITANIUM_CLASS_STF)
4980 return 8;
4981 if ((dep_class == ITANIUM_CLASS_MMMUL
4982 || dep_class == ITANIUM_CLASS_MMSHF
4983 || dep_class == ITANIUM_CLASS_MMSHFI)
4984 && (insn_class == ITANIUM_CLASS_LD
4985 || insn_class == ITANIUM_CLASS_ST
4986 || insn_class == ITANIUM_CLASS_IALU
4987 || insn_class == ITANIUM_CLASS_ILOG
4988 || insn_class == ITANIUM_CLASS_ISHF))
4989 return 4;
4990
4991 return cost;
4992}
4993
4994/* Describe the current state of the Itanium pipeline. */
4995static struct
4996{
4997 /* The first slot that is used in the current cycle. */
4998 int first_slot;
4999 /* The next slot to fill. */
5000 int cur;
5001 /* The packet we have selected for the current issue window. */
5002 const struct ia64_packet *packet;
5003 /* The position of the split issue that occurs due to issue width
5004 limitations (6 if there's no split issue). */
5005 int split;
5006 /* Record data about the insns scheduled so far in the same issue
5007 window. The elements up to but not including FIRST_SLOT belong
5008 to the previous cycle, the ones starting with FIRST_SLOT belong
5009 to the current cycle. */
5010 enum attr_type types[6];
5011 rtx insns[6];
5012 int stopbit[6];
5013 /* Nonzero if we decided to schedule a stop bit. */
5014 int last_was_stop;
5015} sched_data;
5016
5017/* Temporary arrays; they have enough elements to hold all insns that
5018 can be ready at the same time while scheduling of the current block.
5019 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5020static rtx *sched_ready;
5021static enum attr_type *sched_types;
5022
5023/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5024 of packet P. */
099dde21 5025
2130b7fb
BS
5026static int
5027insn_matches_slot (p, itype, slot, insn)
5028 const struct ia64_packet *p;
5029 enum attr_type itype;
5030 int slot;
5031 rtx insn;
5032{
5033 enum attr_itanium_requires_unit0 u0;
5034 enum attr_type stype = p->t[slot];
5035
5036 if (insn)
5037 {
5038 u0 = ia64_safe_itanium_requires_unit0 (insn);
5039 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5040 {
5041 int i;
5042 for (i = sched_data.first_slot; i < slot; i++)
5043 if (p->t[i] == stype)
5044 return 0;
5045 }
5046 if (GET_CODE (insn) == CALL_INSN)
c65ebc55 5047 {
2130b7fb
BS
5048 /* Reject calls in multiway branch packets. We want to limit
5049 the number of multiway branches we generate (since the branch
5050 predictor is limited), and this seems to work fairly well.
5051 (If we didn't do this, we'd have to add another test here to
5052 force calls into the third slot of the bundle.) */
5053 if (slot < 3)
9c668921 5054 {
2130b7fb
BS
5055 if (p->t[1] == TYPE_B)
5056 return 0;
9c668921 5057 }
2130b7fb
BS
5058 else
5059 {
5060 if (p->t[4] == TYPE_B)
5061 return 0;
5062 }
5063 }
5064 }
5065
5066 if (itype == stype)
5067 return 1;
5068 if (itype == TYPE_A)
5069 return stype == TYPE_M || stype == TYPE_I;
5070 return 0;
5071}
5072
5073/* Like emit_insn_before, but skip cycle_display insns. This makes the
5074 assembly output a bit prettier. */
5075
5076static void
5077ia64_emit_insn_before (insn, before)
5078 rtx insn, before;
5079{
5080 rtx prev = PREV_INSN (before);
5081 if (prev && GET_CODE (prev) == INSN
5082 && GET_CODE (PATTERN (prev)) == UNSPEC
5083 && XINT (PATTERN (prev), 1) == 23)
5084 before = prev;
5085 emit_insn_before (insn, before);
5086}
5087
0024a804 5088#if 0
2130b7fb
BS
5089/* Generate a nop insn of the given type. Note we never generate L type
5090 nops. */
5091
5092static rtx
5093gen_nop_type (t)
5094 enum attr_type t;
5095{
5096 switch (t)
5097 {
5098 case TYPE_M:
5099 return gen_nop_m ();
5100 case TYPE_I:
5101 return gen_nop_i ();
5102 case TYPE_B:
5103 return gen_nop_b ();
5104 case TYPE_F:
5105 return gen_nop_f ();
5106 case TYPE_X:
5107 return gen_nop_x ();
5108 default:
5109 abort ();
5110 }
5111}
0024a804 5112#endif
2130b7fb
BS
5113
5114/* When rotating a bundle out of the issue window, insert a bundle selector
5115 insn in front of it. DUMP is the scheduling dump file or NULL. START
5116 is either 0 or 3, depending on whether we want to emit a bundle selector
5117 for the first bundle or the second bundle in the current issue window.
5118
5119 The selector insns are emitted this late because the selected packet can
5120 be changed until parts of it get rotated out. */
5121
5122static void
5123finish_last_head (dump, start)
5124 FILE *dump;
5125 int start;
5126{
5127 const struct ia64_packet *p = sched_data.packet;
5128 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5129 int bundle_type = b - bundle;
5130 rtx insn;
5131 int i;
5132
5133 if (! ia64_final_schedule)
5134 return;
5135
5136 for (i = start; sched_data.insns[i] == 0; i++)
5137 if (i == start + 3)
5138 abort ();
5139 insn = sched_data.insns[i];
5140
5141 if (dump)
5142 fprintf (dump, "// Emitting template before %d: %s\n",
5143 INSN_UID (insn), b->name);
5144
5145 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5146}
5147
5148/* We can't schedule more insns this cycle. Fix up the scheduling state
5149 and advance FIRST_SLOT and CUR.
5150 We have to distribute the insns that are currently found between
5151 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5152 far, they are stored successively in the fields starting at FIRST_SLOT;
5153 now they must be moved to the correct slots.
5154 DUMP is the current scheduling dump file, or NULL. */
5155
5156static void
5157cycle_end_fill_slots (dump)
5158 FILE *dump;
5159{
5160 const struct ia64_packet *packet = sched_data.packet;
5161 int slot, i;
5162 enum attr_type tmp_types[6];
5163 rtx tmp_insns[6];
5164
5165 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5166 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5167
5168 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5169 {
5170 enum attr_type t = tmp_types[i];
5171 if (t != ia64_safe_type (tmp_insns[i]))
5172 abort ();
5173 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5174 {
5175 if (slot > sched_data.split)
5176 abort ();
5177 if (dump)
5178 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5179 type_names[t]);
5180 sched_data.types[slot] = packet->t[slot];
5181 sched_data.insns[slot] = 0;
5182 sched_data.stopbit[slot] = 0;
5183 slot++;
5184 }
5185 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5186 actual slot type later. */
5187 sched_data.types[slot] = packet->t[slot];
5188 sched_data.insns[slot] = tmp_insns[i];
5189 sched_data.stopbit[slot] = 0;
5190 slot++;
5191 }
5192
5193 /* This isn't right - there's no need to pad out until the forced split;
5194 the CPU will automatically split if an insn isn't ready. */
5195#if 0
5196 while (slot < sched_data.split)
5197 {
5198 sched_data.types[slot] = packet->t[slot];
5199 sched_data.insns[slot] = 0;
5200 sched_data.stopbit[slot] = 0;
5201 slot++;
5202 }
5203#endif
5204
5205 sched_data.first_slot = sched_data.cur = slot;
5206}
6b6c1201 5207
2130b7fb
BS
5208/* Bundle rotations, as described in the Itanium optimization manual.
5209 We can rotate either one or both bundles out of the issue window.
5210 DUMP is the current scheduling dump file, or NULL. */
c65ebc55 5211
2130b7fb
BS
5212static void
5213rotate_one_bundle (dump)
5214 FILE *dump;
5215{
5216 if (dump)
5217 fprintf (dump, "// Rotating one bundle.\n");
5218
5219 finish_last_head (dump, 0);
5220 if (sched_data.cur > 3)
5221 {
5222 sched_data.cur -= 3;
5223 sched_data.first_slot -= 3;
5224 memmove (sched_data.types,
5225 sched_data.types + 3,
5226 sched_data.cur * sizeof *sched_data.types);
5227 memmove (sched_data.stopbit,
5228 sched_data.stopbit + 3,
5229 sched_data.cur * sizeof *sched_data.stopbit);
5230 memmove (sched_data.insns,
5231 sched_data.insns + 3,
5232 sched_data.cur * sizeof *sched_data.insns);
5233 }
5234 else
5235 {
5236 sched_data.cur = 0;
5237 sched_data.first_slot = 0;
5238 }
5239}
5240
5241static void
5242rotate_two_bundles (dump)
5243 FILE *dump;
5244{
5245 if (dump)
5246 fprintf (dump, "// Rotating two bundles.\n");
5247
5248 if (sched_data.cur == 0)
5249 return;
5250
5251 finish_last_head (dump, 0);
5252 if (sched_data.cur > 3)
5253 finish_last_head (dump, 3);
5254 sched_data.cur = 0;
5255 sched_data.first_slot = 0;
5256}
5257
5258/* We're beginning a new block. Initialize data structures as necessary. */
5259
5260void
5261ia64_sched_init (dump, sched_verbose, max_ready)
5262 FILE *dump ATTRIBUTE_UNUSED;
5263 int sched_verbose ATTRIBUTE_UNUSED;
5264 int max_ready;
5265{
5266 static int initialized = 0;
5267
5268 if (! initialized)
5269 {
5270 int b1, b2, i;
5271
5272 initialized = 1;
5273
5274 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5275 {
5276 const struct bundle *t1 = bundle + b1;
5277 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6b6c1201 5278 {
2130b7fb
BS
5279 const struct bundle *t2 = bundle + b2;
5280
5281 packets[i].t1 = t1;
5282 packets[i].t2 = t2;
6b6c1201 5283 }
2130b7fb
BS
5284 }
5285 for (i = 0; i < NR_PACKETS; i++)
5286 {
5287 int j;
5288 for (j = 0; j < 3; j++)
5289 packets[i].t[j] = packets[i].t1->t[j];
5290 for (j = 0; j < 3; j++)
5291 packets[i].t[j + 3] = packets[i].t2->t[j];
5292 packets[i].first_split = itanium_split_issue (packets + i, 0);
5293 }
5294
5295 }
c65ebc55 5296
2130b7fb 5297 init_insn_group_barriers ();
c65ebc55 5298
2130b7fb
BS
5299 memset (&sched_data, 0, sizeof sched_data);
5300 sched_types = (enum attr_type *) xmalloc (max_ready
5301 * sizeof (enum attr_type));
5302 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5303}
5304
5305/* See if the packet P can match the insns we have already scheduled. Return
5306 nonzero if so. In *PSLOT, we store the first slot that is available for
5307 more instructions if we choose this packet.
5308 SPLIT holds the last slot we can use, there's a split issue after it so
5309 scheduling beyond it would cause us to use more than one cycle. */
5310
5311static int
5312packet_matches_p (p, split, pslot)
5313 const struct ia64_packet *p;
5314 int split;
5315 int *pslot;
5316{
5317 int filled = sched_data.cur;
5318 int first = sched_data.first_slot;
5319 int i, slot;
5320
5321 /* First, check if the first of the two bundles must be a specific one (due
5322 to stop bits). */
5323 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5324 return 0;
5325 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5326 return 0;
5327
5328 for (i = 0; i < first; i++)
5329 if (! insn_matches_slot (p, sched_data.types[i], i,
5330 sched_data.insns[i]))
5331 return 0;
5332 for (i = slot = first; i < filled; i++)
5333 {
5334 while (slot < split)
5335 {
5336 if (insn_matches_slot (p, sched_data.types[i], slot,
5337 sched_data.insns[i]))
5338 break;
5339 slot++;
5340 }
5341 if (slot == split)
5342 return 0;
5343 slot++;
5344 }
5345
5346 if (pslot)
5347 *pslot = slot;
5348 return 1;
5349}
5350
5351/* A frontend for itanium_split_issue. For a packet P and a slot
5352 number FIRST that describes the start of the current clock cycle,
5353 return the slot number of the first split issue. This function
5354 uses the cached number found in P if possible. */
5355
5356static int
5357get_split (p, first)
5358 const struct ia64_packet *p;
5359 int first;
5360{
5361 if (first == 0)
5362 return p->first_split;
5363 return itanium_split_issue (p, first);
5364}
5365
5366/* Given N_READY insns in the array READY, whose types are found in the
5367 corresponding array TYPES, return the insn that is best suited to be
5368 scheduled in slot SLOT of packet P. */
5369
5370static int
5371find_best_insn (ready, types, n_ready, p, slot)
5372 rtx *ready;
5373 enum attr_type *types;
5374 int n_ready;
5375 const struct ia64_packet *p;
5376 int slot;
5377{
5378 int best = -1;
5379 int best_pri = 0;
5380 while (n_ready-- > 0)
5381 {
5382 rtx insn = ready[n_ready];
5383 if (! insn)
5384 continue;
5385 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5386 break;
5387 /* If we have equally good insns, one of which has a stricter
5388 slot requirement, prefer the one with the stricter requirement. */
5389 if (best >= 0 && types[n_ready] == TYPE_A)
5390 continue;
5391 if (insn_matches_slot (p, types[n_ready], slot, insn))
5392 {
5393 best = n_ready;
5394 best_pri = INSN_PRIORITY (ready[best]);
5395
5396 /* If there's no way we could get a stricter requirement, stop
5397 looking now. */
5398 if (types[n_ready] != TYPE_A
5399 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5400 break;
5401 break;
5402 }
5403 }
5404 return best;
5405}
5406
5407/* Select the best packet to use given the current scheduler state and the
5408 current ready list.
5409 READY is an array holding N_READY ready insns; TYPES is a corresponding
5410 array that holds their types. Store the best packet in *PPACKET and the
5411 number of insns that can be scheduled in the current cycle in *PBEST. */
5412
5413static void
5414find_best_packet (pbest, ppacket, ready, types, n_ready)
5415 int *pbest;
5416 const struct ia64_packet **ppacket;
5417 rtx *ready;
5418 enum attr_type *types;
5419 int n_ready;
5420{
5421 int first = sched_data.first_slot;
5422 int best = 0;
5423 int lowest_end = 6;
0024a804 5424 const struct ia64_packet *best_packet = NULL;
2130b7fb
BS
5425 int i;
5426
5427 for (i = 0; i < NR_PACKETS; i++)
5428 {
5429 const struct ia64_packet *p = packets + i;
5430 int slot;
5431 int split = get_split (p, first);
5432 int win = 0;
5433 int first_slot, last_slot;
5434 int b_nops = 0;
5435
5436 if (! packet_matches_p (p, split, &first_slot))
5437 continue;
5438
5439 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5440
5441 win = 0;
5442 last_slot = 6;
5443 for (slot = first_slot; slot < split; slot++)
5444 {
5445 int insn_nr;
5446
5447 /* Disallow a degenerate case where the first bundle doesn't
5448 contain anything but NOPs! */
5449 if (first_slot == 0 && win == 0 && slot == 3)
6b6c1201 5450 {
2130b7fb
BS
5451 win = -1;
5452 break;
6b6c1201 5453 }
2130b7fb
BS
5454
5455 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5456 if (insn_nr >= 0)
6b6c1201 5457 {
2130b7fb
BS
5458 sched_ready[insn_nr] = 0;
5459 last_slot = slot;
5460 win++;
c65ebc55 5461 }
2130b7fb
BS
5462 else if (p->t[slot] == TYPE_B)
5463 b_nops++;
5464 }
5465 /* We must disallow MBB/BBB packets if any of their B slots would be
5466 filled with nops. */
5467 if (last_slot < 3)
5468 {
5469 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5470 win = -1;
5471 }
5472 else
5473 {
5474 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5475 win = -1;
5476 }
e57b9d65 5477
2130b7fb
BS
5478 if (win > best
5479 || (win == best && last_slot < lowest_end))
5480 {
5481 best = win;
5482 lowest_end = last_slot;
5483 best_packet = p;
5484 }
5485 }
5486 *pbest = best;
5487 *ppacket = best_packet;
5488}
870f9ec0 5489
2130b7fb
BS
5490/* Reorder the ready list so that the insns that can be issued in this cycle
5491 are found in the correct order at the end of the list.
5492 DUMP is the scheduling dump file, or NULL. READY points to the start,
5493 E_READY to the end of the ready list. MAY_FAIL determines what should be
5494 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5495 otherwise we return 0.
5496 Return 1 if any insns can be scheduled in this cycle. */
5497
5498static int
5499itanium_reorder (dump, ready, e_ready, may_fail)
5500 FILE *dump;
5501 rtx *ready;
5502 rtx *e_ready;
5503 int may_fail;
5504{
5505 const struct ia64_packet *best_packet;
5506 int n_ready = e_ready - ready;
5507 int first = sched_data.first_slot;
5508 int i, best, best_split, filled;
5509
5510 for (i = 0; i < n_ready; i++)
5511 sched_types[i] = ia64_safe_type (ready[i]);
5512
5513 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5514
5515 if (best == 0)
5516 {
5517 if (may_fail)
5518 return 0;
5519 abort ();
5520 }
5521
5522 if (dump)
5523 {
5524 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5525 best_packet->t1->name,
5526 best_packet->t2 ? best_packet->t2->name : NULL, best);
5527 }
5528
5529 best_split = itanium_split_issue (best_packet, first);
5530 packet_matches_p (best_packet, best_split, &filled);
5531
5532 for (i = filled; i < best_split; i++)
5533 {
5534 int insn_nr;
5535
5536 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5537 if (insn_nr >= 0)
5538 {
5539 rtx insn = ready[insn_nr];
5540 memmove (ready + insn_nr, ready + insn_nr + 1,
5541 (n_ready - insn_nr - 1) * sizeof (rtx));
5542 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5543 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5544 ready[--n_ready] = insn;
5545 }
5546 }
5547
5548 sched_data.packet = best_packet;
5549 sched_data.split = best_split;
5550 return 1;
5551}
5552
5553/* Dump information about the current scheduling state to file DUMP. */
5554
5555static void
5556dump_current_packet (dump)
5557 FILE *dump;
5558{
5559 int i;
5560 fprintf (dump, "// %d slots filled:", sched_data.cur);
5561 for (i = 0; i < sched_data.first_slot; i++)
5562 {
5563 rtx insn = sched_data.insns[i];
5564 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5565 if (insn)
5566 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5567 if (sched_data.stopbit[i])
5568 fprintf (dump, " ;;");
5569 }
5570 fprintf (dump, " :::");
5571 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5572 {
5573 rtx insn = sched_data.insns[i];
5574 enum attr_type t = ia64_safe_type (insn);
5575 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5576 }
5577 fprintf (dump, "\n");
5578}
5579
5580/* Schedule a stop bit. DUMP is the current scheduling dump file, or
5581 NULL. */
5582
5583static void
5584schedule_stop (dump)
5585 FILE *dump;
5586{
5587 const struct ia64_packet *best = sched_data.packet;
5588 int i;
5589 int best_stop = 6;
5590
5591 if (dump)
5592 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5593
5594 if (sched_data.cur == 0)
5595 {
5596 if (dump)
5597 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5598
5599 rotate_two_bundles (NULL);
5600 return;
5601 }
5602
5603 for (i = -1; i < NR_PACKETS; i++)
5604 {
5605 /* This is a slight hack to give the current packet the first chance.
5606 This is done to avoid e.g. switching from MIB to MBB bundles. */
5607 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5608 int split = get_split (p, sched_data.first_slot);
5609 const struct bundle *compare;
5610 int next, stoppos;
5611
5612 if (! packet_matches_p (p, split, &next))
5613 continue;
5614
5615 compare = next > 3 ? p->t2 : p->t1;
5616
5617 stoppos = 3;
5618 if (compare->possible_stop)
5619 stoppos = compare->possible_stop;
5620 if (next > 3)
5621 stoppos += 3;
5622
5623 if (stoppos < next || stoppos >= best_stop)
5624 {
5625 if (compare->possible_stop == 0)
5626 continue;
5627 stoppos = (next > 3 ? 6 : 3);
5628 }
5629 if (stoppos < next || stoppos >= best_stop)
5630 continue;
5631
5632 if (dump)
5633 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5634 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5635 stoppos);
5636
5637 best_stop = stoppos;
5638 best = p;
5639 }
870f9ec0 5640
2130b7fb
BS
5641 sched_data.packet = best;
5642 cycle_end_fill_slots (dump);
5643 while (sched_data.cur < best_stop)
5644 {
5645 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5646 sched_data.insns[sched_data.cur] = 0;
5647 sched_data.stopbit[sched_data.cur] = 0;
5648 sched_data.cur++;
5649 }
5650 sched_data.stopbit[sched_data.cur - 1] = 1;
5651 sched_data.first_slot = best_stop;
5652
5653 if (dump)
5654 dump_current_packet (dump);
5655}
5656
e4027dab
BS
5657/* If necessary, perform one or two rotations on the scheduling state.
5658 This should only be called if we are starting a new cycle. */
5659
5660static void
5661maybe_rotate (dump)
5662 FILE *dump;
5663{
5664 if (sched_data.cur == 6)
5665 rotate_two_bundles (dump);
5666 else if (sched_data.cur >= 3)
5667 rotate_one_bundle (dump);
5668 sched_data.first_slot = sched_data.cur;
5669}
5670
2130b7fb
BS
5671/* We are about to being issuing insns for this clock cycle.
5672 Override the default sort algorithm to better slot instructions. */
5673
5674int
5675ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, reorder_type)
5676 FILE *dump ATTRIBUTE_UNUSED;
5677 int sched_verbose ATTRIBUTE_UNUSED;
5678 rtx *ready;
5679 int *pn_ready;
5680 int reorder_type;
5681{
5682 int n_ready = *pn_ready;
5683 rtx *e_ready = ready + n_ready;
5684 rtx *insnp;
5685 rtx highest;
5686
5687 if (sched_verbose)
5688 {
5689 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
5690 dump_current_packet (dump);
5691 }
5692
2d1b811d 5693 if (reorder_type == 0)
e4027dab 5694 maybe_rotate (sched_verbose ? dump : NULL);
2d1b811d 5695
2130b7fb
BS
5696 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5697 highest = ready[n_ready - 1];
5698 for (insnp = ready; insnp < e_ready; insnp++)
5699 if (insnp < e_ready)
5700 {
5701 rtx insn = *insnp;
5702 enum attr_type t = ia64_safe_type (insn);
5703 if (t == TYPE_UNKNOWN)
5704 {
5705 highest = ready[n_ready - 1];
5706 ready[n_ready - 1] = insn;
5707 *insnp = highest;
394411d5 5708 if (ia64_final_schedule && group_barrier_needed_p (insn))
2130b7fb
BS
5709 {
5710 schedule_stop (sched_verbose ? dump : NULL);
5711 sched_data.last_was_stop = 1;
e4027dab 5712 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb 5713 }
f4d578da
BS
5714 else if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5715 || asm_noperands (PATTERN (insn)) >= 0)
5716 {
5717 /* It must be an asm of some kind. */
5718 cycle_end_fill_slots (sched_verbose ? dump : NULL);
5719 }
2130b7fb
BS
5720 return 1;
5721 }
5722 }
f2f90c63 5723
2130b7fb
BS
5724 if (ia64_final_schedule)
5725 {
5726 int nr_need_stop = 0;
5727
5728 for (insnp = ready; insnp < e_ready; insnp++)
5729 if (safe_group_barrier_needed_p (*insnp))
5730 nr_need_stop++;
5731
5732 /* Schedule a stop bit if
5733 - all insns require a stop bit, or
5734 - we are starting a new cycle and _any_ insns require a stop bit.
5735 The reason for the latter is that if our schedule is accurate, then
5736 the additional stop won't decrease performance at this point (since
5737 there's a split issue at this point anyway), but it gives us more
5738 freedom when scheduling the currently ready insns. */
5739 if ((reorder_type == 0 && nr_need_stop)
5740 || (reorder_type == 1 && n_ready == nr_need_stop))
5741 {
5742 schedule_stop (sched_verbose ? dump : NULL);
5743 sched_data.last_was_stop = 1;
e4027dab 5744 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb
BS
5745 if (reorder_type == 1)
5746 return 0;
5747 }
5748 else
5749 {
5750 int deleted = 0;
5751 insnp = e_ready;
5752 /* Move down everything that needs a stop bit, preserving relative
5753 order. */
5754 while (insnp-- > ready + deleted)
5755 while (insnp >= ready + deleted)
5756 {
5757 rtx insn = *insnp;
5758 if (! safe_group_barrier_needed_p (insn))
870f9ec0 5759 break;
2130b7fb
BS
5760 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5761 *ready = insn;
5762 deleted++;
5763 }
5764 n_ready -= deleted;
5765 ready += deleted;
5766 if (deleted != nr_need_stop)
5767 abort ();
5768 }
5769 }
5527bf14 5770
2130b7fb
BS
5771 return itanium_reorder (sched_verbose ? dump : NULL,
5772 ready, e_ready, reorder_type == 1);
5773}
c65ebc55 5774
2130b7fb
BS
5775/* Like ia64_sched_reorder, but called after issuing each insn.
5776 Override the default sort algorithm to better slot instructions. */
5777
5778int
5779ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
5780 FILE *dump ATTRIBUTE_UNUSED;
5781 int sched_verbose ATTRIBUTE_UNUSED;
5782 rtx *ready;
5783 int *pn_ready;
5784 int clock_var ATTRIBUTE_UNUSED;
5785{
5786 if (sched_data.last_was_stop)
5787 return 0;
5788
5789 /* Detect one special case and try to optimize it.
5790 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
5791 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
5792 if (sched_data.first_slot == 1
5793 && sched_data.stopbit[0]
5794 && ((sched_data.cur == 4
5795 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
5796 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
5797 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
5798 || (sched_data.cur == 3
5799 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
5800 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
5801 && sched_data.types[2] != TYPE_A))))
5802
5803 {
5804 int i, best;
5805 rtx stop = PREV_INSN (sched_data.insns[1]);
5806 rtx pat;
5807
5808 sched_data.stopbit[0] = 0;
5809 sched_data.stopbit[2] = 1;
5810 if (GET_CODE (stop) != INSN)
5811 abort ();
5812
5813 pat = PATTERN (stop);
5814 /* Ignore cycle displays. */
5815 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
5816 stop = PREV_INSN (stop);
5817 pat = PATTERN (stop);
5818 if (GET_CODE (pat) != UNSPEC_VOLATILE
5819 || XINT (pat, 1) != 2
5820 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
5821 abort ();
5822 XVECEXP (pat, 0, 0) = GEN_INT (3);
5823
5824 sched_data.types[5] = sched_data.types[3];
5825 sched_data.types[4] = sched_data.types[2];
5826 sched_data.types[3] = sched_data.types[1];
5827 sched_data.insns[5] = sched_data.insns[3];
5828 sched_data.insns[4] = sched_data.insns[2];
5829 sched_data.insns[3] = sched_data.insns[1];
5830 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
5831 sched_data.cur += 2;
5832 sched_data.first_slot = 3;
5833 for (i = 0; i < NR_PACKETS; i++)
5834 {
5835 const struct ia64_packet *p = packets + i;
5836 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
5837 {
5838 sched_data.packet = p;
5839 break;
c65ebc55 5840 }
2130b7fb
BS
5841 }
5842 rotate_one_bundle (sched_verbose ? dump : NULL);
c65ebc55 5843
2130b7fb
BS
5844 best = 6;
5845 for (i = 0; i < NR_PACKETS; i++)
5846 {
5847 const struct ia64_packet *p = packets + i;
5848 int split = get_split (p, sched_data.first_slot);
5849 int next;
c65ebc55 5850
2130b7fb
BS
5851 /* Disallow multiway branches here. */
5852 if (p->t[1] == TYPE_B)
5853 continue;
c65ebc55 5854
2130b7fb
BS
5855 if (packet_matches_p (p, split, &next) && next < best)
5856 {
5857 best = next;
5858 sched_data.packet = p;
5859 sched_data.split = split;
5860 }
c65ebc55 5861 }
2130b7fb
BS
5862 if (best == 6)
5863 abort ();
5864 }
5865
5866 if (*pn_ready > 0)
5867 {
5868 int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1);
5869 if (more)
5870 return more;
5871 /* Did we schedule a stop? If so, finish this cycle. */
5872 if (sched_data.cur == sched_data.first_slot)
5873 return 0;
c65ebc55 5874 }
2130b7fb
BS
5875
5876 if (sched_verbose)
5877 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
5878
5879 cycle_end_fill_slots (sched_verbose ? dump : NULL);
5880 if (sched_verbose)
5881 dump_current_packet (dump);
5882 return 0;
c65ebc55
JW
5883}
5884
2130b7fb
BS
5885/* We are about to issue INSN. Return the number of insns left on the
5886 ready queue that can be issued this cycle. */
5887
5888int
5889ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
5890 FILE *dump;
5891 int sched_verbose;
5892 rtx insn;
5893 int can_issue_more ATTRIBUTE_UNUSED;
5894{
5895 enum attr_type t = ia64_safe_type (insn);
5896
5897 if (sched_data.last_was_stop)
5898 {
5899 int t = sched_data.first_slot;
5900 if (t == 0)
5901 t = 3;
5902 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
5903 init_insn_group_barriers ();
5904 sched_data.last_was_stop = 0;
5905 }
5906
5907 if (t == TYPE_UNKNOWN)
5908 {
5909 if (sched_verbose)
5910 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
f4d578da
BS
5911 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5912 || asm_noperands (PATTERN (insn)) >= 0)
5913 {
5914 /* This must be some kind of asm. Clear the scheduling state. */
5915 rotate_two_bundles (sched_verbose ? dump : NULL);
0c1cf241
BS
5916 if (ia64_final_schedule)
5917 group_barrier_needed_p (insn);
f4d578da 5918 }
2130b7fb
BS
5919 return 1;
5920 }
5921
5922 /* This is _not_ just a sanity check. group_barrier_needed_p will update
5923 important state info. Don't delete this test. */
5924 if (ia64_final_schedule
5925 && group_barrier_needed_p (insn))
5926 abort ();
5927
5928 sched_data.stopbit[sched_data.cur] = 0;
5929 sched_data.insns[sched_data.cur] = insn;
5930 sched_data.types[sched_data.cur] = t;
5931
5932 sched_data.cur++;
5933 if (sched_verbose)
5934 fprintf (dump, "// Scheduling insn %d of type %s\n",
5935 INSN_UID (insn), type_names[t]);
5936
5937 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
5938 {
5939 schedule_stop (sched_verbose ? dump : NULL);
5940 sched_data.last_was_stop = 1;
5941 }
5942
5943 return 1;
5944}
5945
5946/* Free data allocated by ia64_sched_init. */
5947
5948void
5949ia64_sched_finish (dump, sched_verbose)
5950 FILE *dump;
5951 int sched_verbose;
5952{
5953 if (sched_verbose)
5954 fprintf (dump, "// Finishing schedule.\n");
5955 rotate_two_bundles (NULL);
5956 free (sched_types);
5957 free (sched_ready);
5958}
5959\f
3b572406
RH
5960/* Emit pseudo-ops for the assembler to describe predicate relations.
5961 At present this assumes that we only consider predicate pairs to
5962 be mutex, and that the assembler can deduce proper values from
5963 straight-line code. */
5964
5965static void
f2f90c63 5966emit_predicate_relation_info ()
3b572406
RH
5967{
5968 int i;
5969
3b572406
RH
5970 for (i = n_basic_blocks - 1; i >= 0; --i)
5971 {
5972 basic_block bb = BASIC_BLOCK (i);
5973 int r;
5974 rtx head = bb->head;
5975
5976 /* We only need such notes at code labels. */
5977 if (GET_CODE (head) != CODE_LABEL)
5978 continue;
5979 if (GET_CODE (NEXT_INSN (head)) == NOTE
5980 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
5981 head = NEXT_INSN (head);
5982
5983 for (r = PR_REG (0); r < PR_REG (64); r += 2)
5984 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
5985 {
f2f90c63 5986 rtx p = gen_rtx_REG (BImode, r);
054451ea 5987 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
5988 if (head == bb->end)
5989 bb->end = n;
5990 head = n;
5991 }
5992 }
ca3920ad
JW
5993
5994 /* Look for conditional calls that do not return, and protect predicate
5995 relations around them. Otherwise the assembler will assume the call
5996 returns, and complain about uses of call-clobbered predicates after
5997 the call. */
5998 for (i = n_basic_blocks - 1; i >= 0; --i)
5999 {
6000 basic_block bb = BASIC_BLOCK (i);
6001 rtx insn = bb->head;
6002
6003 while (1)
6004 {
6005 if (GET_CODE (insn) == CALL_INSN
6006 && GET_CODE (PATTERN (insn)) == COND_EXEC
6007 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6008 {
6009 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6010 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6011 if (bb->head == insn)
6012 bb->head = b;
6013 if (bb->end == insn)
6014 bb->end = a;
6015 }
6016
6017 if (insn == bb->end)
6018 break;
6019 insn = NEXT_INSN (insn);
6020 }
6021 }
3b572406
RH
6022}
6023
7a87c39c
BS
6024/* Generate a NOP instruction of type T. We will never generate L type
6025 nops. */
6026
6027static rtx
6028gen_nop_type (t)
6029 enum attr_type t;
6030{
6031 switch (t)
6032 {
6033 case TYPE_M:
6034 return gen_nop_m ();
6035 case TYPE_I:
6036 return gen_nop_i ();
6037 case TYPE_B:
6038 return gen_nop_b ();
6039 case TYPE_F:
6040 return gen_nop_f ();
6041 case TYPE_X:
6042 return gen_nop_x ();
6043 default:
6044 abort ();
6045 }
6046}
6047
6048/* After the last scheduling pass, fill in NOPs. It's easier to do this
6049 here than while scheduling. */
6050
6051static void
6052ia64_emit_nops ()
6053{
6054 rtx insn;
6055 const struct bundle *b = 0;
6056 int bundle_pos = 0;
6057
6058 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6059 {
6060 rtx pat;
6061 enum attr_type t;
6062 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6063 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6064 continue;
6065 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6066 || GET_CODE (insn) == CODE_LABEL)
6067 {
6068 if (b)
6069 while (bundle_pos < 3)
6070 {
6071 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6072 bundle_pos++;
6073 }
6074 if (GET_CODE (insn) != CODE_LABEL)
6075 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6076 else
6077 b = 0;
6078 bundle_pos = 0;
6079 continue;
6080 }
6081 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6082 {
6083 int t = INTVAL (XVECEXP (pat, 0, 0));
6084 if (b)
6085 while (bundle_pos < t)
6086 {
6087 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6088 bundle_pos++;
6089 }
6090 continue;
6091 }
6092
6093 if (bundle_pos == 3)
6094 b = 0;
6095
6096 if (b && INSN_P (insn))
6097 {
6098 t = ia64_safe_type (insn);
e4027dab
BS
6099 if (asm_noperands (PATTERN (insn)) >= 0
6100 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6101 {
6102 while (bundle_pos < 3)
6103 {
6104 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6105 bundle_pos++;
6106 }
6107 continue;
6108 }
6109
7a87c39c
BS
6110 if (t == TYPE_UNKNOWN)
6111 continue;
6112 while (bundle_pos < 3)
6113 {
6114 if (t == b->t[bundle_pos]
6115 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6116 || b->t[bundle_pos] == TYPE_I)))
6117 break;
6118
6119 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6120 bundle_pos++;
6121 }
6122 if (bundle_pos < 3)
6123 bundle_pos++;
6124 }
6125 }
6126}
6127
c65ebc55
JW
6128/* Perform machine dependent operations on the rtl chain INSNS. */
6129
6130void
6131ia64_reorg (insns)
6132 rtx insns;
6133{
9b7bf67d
RH
6134 /* If optimizing, we'll have split before scheduling. */
6135 if (optimize == 0)
6136 split_all_insns (0);
6137
f2f90c63
RH
6138 /* Make sure the CFG and global_live_at_start are correct
6139 for emit_predicate_relation_info. */
6140 find_basic_blocks (insns, max_reg_num (), NULL);
2130b7fb
BS
6141 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6142
f4d578da
BS
6143 if (optimize)
6144 {
6145 ia64_final_schedule = 1;
6146 schedule_ebbs (rtl_dump_file);
6147 ia64_final_schedule = 0;
2130b7fb 6148
f4d578da
BS
6149 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6150 place as they were during scheduling. */
6151 emit_insn_group_barriers (rtl_dump_file, insns);
7a87c39c 6152 ia64_emit_nops ();
f4d578da
BS
6153 }
6154 else
6155 emit_all_insn_group_barriers (rtl_dump_file, insns);
f2f90c63 6156
2130b7fb 6157 fixup_errata ();
f2f90c63 6158 emit_predicate_relation_info ();
c65ebc55
JW
6159}
6160\f
6161/* Return true if REGNO is used by the epilogue. */
6162
6163int
6164ia64_epilogue_uses (regno)
6165 int regno;
6166{
59da9a7d
JW
6167 /* When a function makes a call through a function descriptor, we
6168 will write a (potentially) new value to "gp". After returning
6169 from such a call, we need to make sure the function restores the
6170 original gp-value, even if the function itself does not use the
6171 gp anymore. */
6b6c1201
RH
6172 if (regno == R_GR (1)
6173 && TARGET_CONST_GP
6174 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
59da9a7d
JW
6175 return 1;
6176
c65ebc55
JW
6177 /* For functions defined with the syscall_linkage attribute, all input
6178 registers are marked as live at all function exits. This prevents the
6179 register allocator from using the input registers, which in turn makes it
6180 possible to restart a system call after an interrupt without having to
3f67ac08
DM
6181 save/restore the input registers. This also prevents kernel data from
6182 leaking to application code. */
c65ebc55
JW
6183
6184 if (IN_REGNO_P (regno)
c65ebc55
JW
6185 && lookup_attribute ("syscall_linkage",
6186 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6187 return 1;
6188
6b6c1201
RH
6189 /* Conditional return patterns can't represent the use of `b0' as
6190 the return address, so we force the value live this way. */
6191 if (regno == R_BR (0))
6192 return 1;
6193
97e242b0
RH
6194 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6195 return 1;
6196 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6197 return 1;
6198 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6199 && regno == AR_UNAT_REGNUM)
5527bf14
RH
6200 return 1;
6201
c65ebc55
JW
6202 return 0;
6203}
6204
6205/* Return true if IDENTIFIER is a valid attribute for TYPE. */
6206
6207int
6208ia64_valid_type_attribute (type, attributes, identifier, args)
6209 tree type;
6210 tree attributes ATTRIBUTE_UNUSED;
6211 tree identifier;
6212 tree args;
6213{
6214 /* We only support an attribute for function calls. */
6215
6216 if (TREE_CODE (type) != FUNCTION_TYPE
6217 && TREE_CODE (type) != METHOD_TYPE)
6218 return 0;
6219
6220 /* The "syscall_linkage" attribute says the callee is a system call entry
6221 point. This affects ia64_epilogue_uses. */
6222
6223 if (is_attribute_p ("syscall_linkage", identifier))
6224 return args == NULL_TREE;
6225
6226 return 0;
6227}
6228\f
6229/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6230
6231 We add @ to the name if this goes in small data/bss. We can only put
6232 a variable in small data/bss if it is defined in this module or a module
6233 that we are statically linked with. We can't check the second condition,
6234 but TREE_STATIC gives us the first one. */
6235
6236/* ??? If we had IPA, we could check the second condition. We could support
6237 programmer added section attributes if the variable is not defined in this
6238 module. */
6239
6240/* ??? See the v850 port for a cleaner way to do this. */
6241
6242/* ??? We could also support own long data here. Generating movl/add/ld8
6243 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6244 code faster because there is one less load. This also includes incomplete
6245 types which can't go in sdata/sbss. */
6246
6247/* ??? See select_section. We must put short own readonly variables in
6248 sdata/sbss instead of the more natural rodata, because we can't perform
6249 the DECL_READONLY_SECTION test here. */
6250
6251extern struct obstack * saveable_obstack;
6252
6253void
6254ia64_encode_section_info (decl)
6255 tree decl;
6256{
549f0725
RH
6257 const char *symbol_str;
6258
c65ebc55 6259 if (TREE_CODE (decl) == FUNCTION_DECL)
549f0725
RH
6260 {
6261 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6262 return;
6263 }
6264
6265 /* Careful not to prod global register variables. */
6266 if (TREE_CODE (decl) != VAR_DECL
3b572406
RH
6267 || GET_CODE (DECL_RTL (decl)) != MEM
6268 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
549f0725
RH
6269 return;
6270
6271 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6272
c65ebc55
JW
6273 /* We assume that -fpic is used only to create a shared library (dso).
6274 With -fpic, no global data can ever be sdata.
6275 Without -fpic, global common uninitialized data can never be sdata, since
6276 it can unify with a real definition in a dso. */
6277 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6278 to access them. The linker may then be able to do linker relaxation to
6279 optimize references to them. Currently sdata implies use of gprel. */
74fe26b2
JW
6280 /* We need the DECL_EXTERNAL check for C++. static class data members get
6281 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6282 statically allocated, but the space is allocated somewhere else. Such
6283 decls can not be own data. */
549f0725 6284 if (! TARGET_NO_SDATA
74fe26b2 6285 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
549f0725
RH
6286 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6287 && ! (TREE_PUBLIC (decl)
6288 && (flag_pic
6289 || (DECL_COMMON (decl)
6290 && (DECL_INITIAL (decl) == 0
6291 || DECL_INITIAL (decl) == error_mark_node))))
6292 /* Either the variable must be declared without a section attribute,
6293 or the section must be sdata or sbss. */
6294 && (DECL_SECTION_NAME (decl) == 0
6295 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6296 ".sdata")
6297 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6298 ".sbss")))
c65ebc55 6299 {
97e242b0 6300 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
c65ebc55 6301
59da9a7d
JW
6302 /* If the variable has already been defined in the output file, then it
6303 is too late to put it in sdata if it wasn't put there in the first
6304 place. The test is here rather than above, because if it is already
6305 in sdata, then it can stay there. */
809d4ef1 6306
549f0725 6307 if (TREE_ASM_WRITTEN (decl))
59da9a7d
JW
6308 ;
6309
c65ebc55
JW
6310 /* If this is an incomplete type with size 0, then we can't put it in
6311 sdata because it might be too big when completed. */
97e242b0
RH
6312 else if (size > 0
6313 && size <= (HOST_WIDE_INT) ia64_section_threshold
549f0725 6314 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
c65ebc55 6315 {
97e242b0 6316 size_t len = strlen (symbol_str);
520a57c8 6317 char *newstr = alloca (len + 1);
0024a804 6318 const char *string;
549f0725 6319
c65ebc55 6320 *newstr = SDATA_NAME_FLAG_CHAR;
549f0725 6321 memcpy (newstr + 1, symbol_str, len + 1);
520a57c8 6322
0024a804
JW
6323 string = ggc_alloc_string (newstr, len + 1);
6324 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
c65ebc55 6325 }
809d4ef1 6326 }
32adf8e6
AH
6327 /* This decl is marked as being in small data/bss but it shouldn't
6328 be; one likely explanation for this is that the decl has been
6329 moved into a different section from the one it was in when
6330 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
549f0725 6331 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
32adf8e6 6332 {
1f8f4a0b 6333 XSTR (XEXP (DECL_RTL (decl), 0), 0)
a8a05998 6334 = ggc_strdup (symbol_str + 1);
c65ebc55
JW
6335 }
6336}
0c96007e 6337\f
ad0fc698
JW
6338/* Output assembly directives for prologue regions. */
6339
6340/* The current basic block number. */
6341
6342static int block_num;
6343
6344/* True if we need a copy_state command at the start of the next block. */
6345
6346static int need_copy_state;
6347
6348/* The function emits unwind directives for the start of an epilogue. */
6349
6350static void
6351process_epilogue ()
6352{
6353 /* If this isn't the last block of the function, then we need to label the
6354 current state, and copy it back in at the start of the next block. */
6355
6356 if (block_num != n_basic_blocks - 1)
6357 {
6358 fprintf (asm_out_file, "\t.label_state 1\n");
6359 need_copy_state = 1;
6360 }
6361
6362 fprintf (asm_out_file, "\t.restore sp\n");
6363}
0c96007e 6364
0c96007e
AM
6365/* This function processes a SET pattern looking for specific patterns
6366 which result in emitting an assembly directive required for unwinding. */
97e242b0 6367
0c96007e
AM
6368static int
6369process_set (asm_out_file, pat)
6370 FILE *asm_out_file;
6371 rtx pat;
6372{
6373 rtx src = SET_SRC (pat);
6374 rtx dest = SET_DEST (pat);
97e242b0 6375 int src_regno, dest_regno;
0c96007e 6376
97e242b0
RH
6377 /* Look for the ALLOC insn. */
6378 if (GET_CODE (src) == UNSPEC_VOLATILE
6379 && XINT (src, 1) == 0
6380 && GET_CODE (dest) == REG)
0c96007e 6381 {
97e242b0
RH
6382 dest_regno = REGNO (dest);
6383
6384 /* If this isn't the final destination for ar.pfs, the alloc
6385 shouldn't have been marked frame related. */
6386 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6387 abort ();
6388
809d4ef1 6389 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 6390 ia64_dbx_register_number (dest_regno));
0c96007e
AM
6391 return 1;
6392 }
6393
97e242b0 6394 /* Look for SP = .... */
0c96007e
AM
6395 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6396 {
6397 if (GET_CODE (src) == PLUS)
6398 {
6399 rtx op0 = XEXP (src, 0);
6400 rtx op1 = XEXP (src, 1);
6401 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6402 {
0186257f
JW
6403 if (INTVAL (op1) < 0)
6404 {
6405 fputs ("\t.fframe ", asm_out_file);
6406 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6407 -INTVAL (op1));
6408 fputc ('\n', asm_out_file);
0186257f
JW
6409 }
6410 else
ad0fc698 6411 process_epilogue ();
0c96007e 6412 }
0186257f
JW
6413 else
6414 abort ();
0c96007e 6415 }
97e242b0
RH
6416 else if (GET_CODE (src) == REG
6417 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
ad0fc698 6418 process_epilogue ();
0186257f
JW
6419 else
6420 abort ();
6421
6422 return 1;
0c96007e 6423 }
0c96007e
AM
6424
6425 /* Register move we need to look at. */
6426 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6427 {
97e242b0
RH
6428 src_regno = REGNO (src);
6429 dest_regno = REGNO (dest);
6430
6431 switch (src_regno)
6432 {
6433 case BR_REG (0):
0c96007e 6434 /* Saving return address pointer. */
97e242b0
RH
6435 if (dest_regno != current_frame_info.reg_save_b0)
6436 abort ();
6437 fprintf (asm_out_file, "\t.save rp, r%d\n",
6438 ia64_dbx_register_number (dest_regno));
6439 return 1;
6440
6441 case PR_REG (0):
6442 if (dest_regno != current_frame_info.reg_save_pr)
6443 abort ();
6444 fprintf (asm_out_file, "\t.save pr, r%d\n",
6445 ia64_dbx_register_number (dest_regno));
6446 return 1;
6447
6448 case AR_UNAT_REGNUM:
6449 if (dest_regno != current_frame_info.reg_save_ar_unat)
6450 abort ();
6451 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6452 ia64_dbx_register_number (dest_regno));
6453 return 1;
6454
6455 case AR_LC_REGNUM:
6456 if (dest_regno != current_frame_info.reg_save_ar_lc)
6457 abort ();
6458 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6459 ia64_dbx_register_number (dest_regno));
6460 return 1;
6461
6462 case STACK_POINTER_REGNUM:
6463 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6464 || ! frame_pointer_needed)
6465 abort ();
6466 fprintf (asm_out_file, "\t.vframe r%d\n",
6467 ia64_dbx_register_number (dest_regno));
6468 return 1;
6469
6470 default:
6471 /* Everything else should indicate being stored to memory. */
6472 abort ();
0c96007e
AM
6473 }
6474 }
97e242b0
RH
6475
6476 /* Memory store we need to look at. */
6477 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 6478 {
97e242b0
RH
6479 long off;
6480 rtx base;
6481 const char *saveop;
6482
6483 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 6484 {
97e242b0
RH
6485 base = XEXP (dest, 0);
6486 off = 0;
0c96007e 6487 }
97e242b0
RH
6488 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6489 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 6490 {
97e242b0
RH
6491 base = XEXP (XEXP (dest, 0), 0);
6492 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 6493 }
97e242b0
RH
6494 else
6495 abort ();
0c96007e 6496
97e242b0
RH
6497 if (base == hard_frame_pointer_rtx)
6498 {
6499 saveop = ".savepsp";
6500 off = - off;
6501 }
6502 else if (base == stack_pointer_rtx)
6503 saveop = ".savesp";
6504 else
6505 abort ();
6506
6507 src_regno = REGNO (src);
6508 switch (src_regno)
6509 {
6510 case BR_REG (0):
6511 if (current_frame_info.reg_save_b0 != 0)
6512 abort ();
6513 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6514 return 1;
6515
6516 case PR_REG (0):
6517 if (current_frame_info.reg_save_pr != 0)
6518 abort ();
6519 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6520 return 1;
6521
6522 case AR_LC_REGNUM:
6523 if (current_frame_info.reg_save_ar_lc != 0)
6524 abort ();
6525 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6526 return 1;
6527
6528 case AR_PFS_REGNUM:
6529 if (current_frame_info.reg_save_ar_pfs != 0)
6530 abort ();
6531 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6532 return 1;
6533
6534 case AR_UNAT_REGNUM:
6535 if (current_frame_info.reg_save_ar_unat != 0)
6536 abort ();
6537 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6538 return 1;
6539
6540 case GR_REG (4):
6541 case GR_REG (5):
6542 case GR_REG (6):
6543 case GR_REG (7):
6544 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6545 1 << (src_regno - GR_REG (4)));
97e242b0
RH
6546 return 1;
6547
6548 case BR_REG (1):
6549 case BR_REG (2):
6550 case BR_REG (3):
6551 case BR_REG (4):
6552 case BR_REG (5):
6553 fprintf (asm_out_file, "\t.save.b 0x%x\n",
6554 1 << (src_regno - BR_REG (1)));
0c96007e 6555 return 1;
97e242b0
RH
6556
6557 case FR_REG (2):
6558 case FR_REG (3):
6559 case FR_REG (4):
6560 case FR_REG (5):
6561 fprintf (asm_out_file, "\t.save.f 0x%x\n",
6562 1 << (src_regno - FR_REG (2)));
6563 return 1;
6564
6565 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6566 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6567 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6568 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6569 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
6570 1 << (src_regno - FR_REG (12)));
6571 return 1;
6572
6573 default:
6574 return 0;
0c96007e
AM
6575 }
6576 }
97e242b0 6577
0c96007e
AM
6578 return 0;
6579}
6580
6581
6582/* This function looks at a single insn and emits any directives
6583 required to unwind this insn. */
6584void
6585process_for_unwind_directive (asm_out_file, insn)
6586 FILE *asm_out_file;
6587 rtx insn;
6588{
ad0fc698 6589 if (flag_unwind_tables
531073e7 6590 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
0c96007e 6591 {
97e242b0
RH
6592 rtx pat;
6593
ad0fc698
JW
6594 if (GET_CODE (insn) == NOTE
6595 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6596 {
6597 block_num = NOTE_BASIC_BLOCK (insn)->index;
6598
6599 /* Restore unwind state from immediately before the epilogue. */
6600 if (need_copy_state)
6601 {
6602 fprintf (asm_out_file, "\t.body\n");
6603 fprintf (asm_out_file, "\t.copy_state 1\n");
6604 need_copy_state = 0;
6605 }
6606 }
6607
6608 if (! RTX_FRAME_RELATED_P (insn))
6609 return;
6610
97e242b0
RH
6611 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
6612 if (pat)
6613 pat = XEXP (pat, 0);
6614 else
6615 pat = PATTERN (insn);
0c96007e
AM
6616
6617 switch (GET_CODE (pat))
6618 {
809d4ef1
RH
6619 case SET:
6620 process_set (asm_out_file, pat);
6621 break;
6622
6623 case PARALLEL:
6624 {
6625 int par_index;
6626 int limit = XVECLEN (pat, 0);
6627 for (par_index = 0; par_index < limit; par_index++)
6628 {
6629 rtx x = XVECEXP (pat, 0, par_index);
6630 if (GET_CODE (x) == SET)
6631 process_set (asm_out_file, x);
6632 }
6633 break;
6634 }
6635
6636 default:
6637 abort ();
0c96007e
AM
6638 }
6639 }
6640}
c65ebc55 6641
0551c32d 6642\f
c65ebc55
JW
6643void
6644ia64_init_builtins ()
6645{
c65ebc55
JW
6646 tree psi_type_node = build_pointer_type (integer_type_node);
6647 tree pdi_type_node = build_pointer_type (long_integer_type_node);
cbd5937a 6648 tree endlink = void_list_node;
c65ebc55 6649
c65ebc55
JW
6650 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6651 tree si_ftype_psi_si_si
6652 = build_function_type (integer_type_node,
6653 tree_cons (NULL_TREE, psi_type_node,
6654 tree_cons (NULL_TREE, integer_type_node,
3b572406
RH
6655 tree_cons (NULL_TREE,
6656 integer_type_node,
c65ebc55
JW
6657 endlink))));
6658
6659 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6660 tree di_ftype_pdi_di_di
6661 = build_function_type (long_integer_type_node,
6662 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
6663 tree_cons (NULL_TREE,
6664 long_integer_type_node,
6665 tree_cons (NULL_TREE,
0551c32d
RH
6666 long_integer_type_node,
6667 endlink))));
c65ebc55
JW
6668 /* __sync_synchronize */
6669 tree void_ftype_void
6670 = build_function_type (void_type_node, endlink);
6671
6672 /* __sync_lock_test_and_set_si */
6673 tree si_ftype_psi_si
6674 = build_function_type (integer_type_node,
6675 tree_cons (NULL_TREE, psi_type_node,
6676 tree_cons (NULL_TREE, integer_type_node, endlink)));
6677
6678 /* __sync_lock_test_and_set_di */
6679 tree di_ftype_pdi_di
809d4ef1 6680 = build_function_type (long_integer_type_node,
c65ebc55 6681 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
6682 tree_cons (NULL_TREE, long_integer_type_node,
6683 endlink)));
c65ebc55
JW
6684
6685 /* __sync_lock_release_si */
6686 tree void_ftype_psi
3b572406
RH
6687 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
6688 endlink));
c65ebc55
JW
6689
6690 /* __sync_lock_release_di */
6691 tree void_ftype_pdi
3b572406
RH
6692 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
6693 endlink));
c65ebc55 6694
0551c32d
RH
6695#define def_builtin(name, type, code) \
6696 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
6697
3b572406
RH
6698 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
6699 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3b572406
RH
6700 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
6701 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3b572406
RH
6702 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
6703 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
3b572406
RH
6704 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
6705 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 6706
3b572406
RH
6707 def_builtin ("__sync_synchronize", void_ftype_void,
6708 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 6709
3b572406
RH
6710 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
6711 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3b572406
RH
6712 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
6713 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3b572406
RH
6714 def_builtin ("__sync_lock_release_si", void_ftype_psi,
6715 IA64_BUILTIN_LOCK_RELEASE_SI);
3b572406
RH
6716 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
6717 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 6718
3b572406
RH
6719 def_builtin ("__builtin_ia64_bsp",
6720 build_function_type (ptr_type_node, endlink),
6721 IA64_BUILTIN_BSP);
ce152ef8
AM
6722
6723 def_builtin ("__builtin_ia64_flushrs",
6724 build_function_type (void_type_node, endlink),
6725 IA64_BUILTIN_FLUSHRS);
6726
0551c32d
RH
6727 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
6728 IA64_BUILTIN_FETCH_AND_ADD_SI);
6729 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
6730 IA64_BUILTIN_FETCH_AND_SUB_SI);
6731 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
6732 IA64_BUILTIN_FETCH_AND_OR_SI);
6733 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
6734 IA64_BUILTIN_FETCH_AND_AND_SI);
6735 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
6736 IA64_BUILTIN_FETCH_AND_XOR_SI);
6737 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
6738 IA64_BUILTIN_FETCH_AND_NAND_SI);
6739
6740 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
6741 IA64_BUILTIN_ADD_AND_FETCH_SI);
6742 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
6743 IA64_BUILTIN_SUB_AND_FETCH_SI);
6744 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
6745 IA64_BUILTIN_OR_AND_FETCH_SI);
6746 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
6747 IA64_BUILTIN_AND_AND_FETCH_SI);
6748 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
6749 IA64_BUILTIN_XOR_AND_FETCH_SI);
6750 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
6751 IA64_BUILTIN_NAND_AND_FETCH_SI);
6752
6753 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
6754 IA64_BUILTIN_FETCH_AND_ADD_DI);
6755 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
6756 IA64_BUILTIN_FETCH_AND_SUB_DI);
6757 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
6758 IA64_BUILTIN_FETCH_AND_OR_DI);
6759 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
6760 IA64_BUILTIN_FETCH_AND_AND_DI);
6761 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
6762 IA64_BUILTIN_FETCH_AND_XOR_DI);
6763 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
6764 IA64_BUILTIN_FETCH_AND_NAND_DI);
6765
6766 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
6767 IA64_BUILTIN_ADD_AND_FETCH_DI);
6768 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
6769 IA64_BUILTIN_SUB_AND_FETCH_DI);
6770 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
6771 IA64_BUILTIN_OR_AND_FETCH_DI);
6772 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
6773 IA64_BUILTIN_AND_AND_FETCH_DI);
6774 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
6775 IA64_BUILTIN_XOR_AND_FETCH_DI);
6776 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
6777 IA64_BUILTIN_NAND_AND_FETCH_DI);
6778
6779#undef def_builtin
c65ebc55
JW
6780}
6781
6782/* Expand fetch_and_op intrinsics. The basic code sequence is:
6783
6784 mf
0551c32d 6785 tmp = [ptr];
c65ebc55 6786 do {
0551c32d 6787 ret = tmp;
c65ebc55
JW
6788 ar.ccv = tmp;
6789 tmp <op>= value;
6790 cmpxchgsz.acq tmp = [ptr], tmp
0551c32d 6791 } while (tmp != ret)
c65ebc55 6792*/
0551c32d
RH
6793
6794static rtx
6795ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
6796 optab binoptab;
c65ebc55 6797 enum machine_mode mode;
0551c32d
RH
6798 tree arglist;
6799 rtx target;
c65ebc55 6800{
0551c32d
RH
6801 rtx ret, label, tmp, ccv, insn, mem, value;
6802 tree arg0, arg1;
97e242b0 6803
0551c32d
RH
6804 arg0 = TREE_VALUE (arglist);
6805 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6806 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6807 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 6808
0551c32d
RH
6809 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6810 MEM_VOLATILE_P (mem) = 1;
c65ebc55 6811
0551c32d
RH
6812 if (target && register_operand (target, mode))
6813 ret = target;
6814 else
6815 ret = gen_reg_rtx (mode);
c65ebc55 6816
0551c32d
RH
6817 emit_insn (gen_mf ());
6818
6819 /* Special case for fetchadd instructions. */
6820 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
c65ebc55 6821 {
c65ebc55 6822 if (mode == SImode)
0551c32d 6823 insn = gen_fetchadd_acq_si (ret, mem, value);
c65ebc55 6824 else
0551c32d
RH
6825 insn = gen_fetchadd_acq_di (ret, mem, value);
6826 emit_insn (insn);
6827 return ret;
c65ebc55
JW
6828 }
6829
0551c32d
RH
6830 tmp = gen_reg_rtx (mode);
6831 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
6832 emit_move_insn (tmp, mem);
6833
6834 label = gen_label_rtx ();
6835 emit_label (label);
6836 emit_move_insn (ret, tmp);
6837 emit_move_insn (ccv, tmp);
6838
6839 /* Perform the specific operation. Special case NAND by noticing
6840 one_cmpl_optab instead. */
6841 if (binoptab == one_cmpl_optab)
6842 {
6843 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
6844 binoptab = and_optab;
6845 }
6846 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
809d4ef1
RH
6847
6848 if (mode == SImode)
0551c32d 6849 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
c65ebc55 6850 else
0551c32d
RH
6851 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
6852 emit_insn (insn);
6853
6854 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
c65ebc55 6855
0551c32d 6856 return ret;
c65ebc55
JW
6857}
6858
6859/* Expand op_and_fetch intrinsics. The basic code sequence is:
6860
6861 mf
0551c32d 6862 tmp = [ptr];
c65ebc55 6863 do {
0551c32d 6864 old = tmp;
c65ebc55 6865 ar.ccv = tmp;
0551c32d
RH
6866 ret = tmp + value;
6867 cmpxchgsz.acq tmp = [ptr], ret
6868 } while (tmp != old)
c65ebc55 6869*/
0551c32d
RH
6870
6871static rtx
6872ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
6873 optab binoptab;
c65ebc55 6874 enum machine_mode mode;
0551c32d
RH
6875 tree arglist;
6876 rtx target;
c65ebc55 6877{
0551c32d
RH
6878 rtx old, label, tmp, ret, ccv, insn, mem, value;
6879 tree arg0, arg1;
6880
6881 arg0 = TREE_VALUE (arglist);
6882 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6883 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6884 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 6885
0551c32d
RH
6886 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6887 MEM_VOLATILE_P (mem) = 1;
6888
6889 if (target && ! register_operand (target, mode))
6890 target = NULL_RTX;
6891
6892 emit_insn (gen_mf ());
6893 tmp = gen_reg_rtx (mode);
6894 old = gen_reg_rtx (mode);
97e242b0
RH
6895 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
6896
0551c32d 6897 emit_move_insn (tmp, mem);
c65ebc55 6898
0551c32d
RH
6899 label = gen_label_rtx ();
6900 emit_label (label);
6901 emit_move_insn (old, tmp);
6902 emit_move_insn (ccv, tmp);
c65ebc55 6903
0551c32d
RH
6904 /* Perform the specific operation. Special case NAND by noticing
6905 one_cmpl_optab instead. */
6906 if (binoptab == one_cmpl_optab)
6907 {
6908 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
6909 binoptab = and_optab;
6910 }
6911 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
809d4ef1
RH
6912
6913 if (mode == SImode)
0551c32d 6914 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
c65ebc55 6915 else
0551c32d
RH
6916 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
6917 emit_insn (insn);
6918
6919 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
c65ebc55 6920
0551c32d 6921 return ret;
c65ebc55
JW
6922}
6923
6924/* Expand val_ and bool_compare_and_swap. For val_ we want:
6925
6926 ar.ccv = oldval
6927 mf
6928 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
6929 return ret
6930
6931 For bool_ it's the same except return ret == oldval.
6932*/
0551c32d 6933
c65ebc55 6934static rtx
0551c32d
RH
6935ia64_expand_compare_and_swap (mode, boolp, arglist, target)
6936 enum machine_mode mode;
6937 int boolp;
c65ebc55
JW
6938 tree arglist;
6939 rtx target;
c65ebc55
JW
6940{
6941 tree arg0, arg1, arg2;
0551c32d 6942 rtx mem, old, new, ccv, tmp, insn;
809d4ef1 6943
c65ebc55
JW
6944 arg0 = TREE_VALUE (arglist);
6945 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6946 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
0551c32d
RH
6947 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6948 old = expand_expr (arg1, NULL_RTX, mode, 0);
6949 new = expand_expr (arg2, NULL_RTX, mode, 0);
6950
6951 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6952 MEM_VOLATILE_P (mem) = 1;
6953
6954 if (! register_operand (old, mode))
6955 old = copy_to_mode_reg (mode, old);
6956 if (! register_operand (new, mode))
6957 new = copy_to_mode_reg (mode, new);
6958
6959 if (! boolp && target && register_operand (target, mode))
6960 tmp = target;
6961 else
6962 tmp = gen_reg_rtx (mode);
6963
6964 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
6965 emit_move_insn (ccv, old);
6966 emit_insn (gen_mf ());
6967 if (mode == SImode)
6968 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
6969 else
6970 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
6971 emit_insn (insn);
6972
6973 if (boolp)
c65ebc55 6974 {
0551c32d
RH
6975 if (! target)
6976 target = gen_reg_rtx (mode);
6977 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
c65ebc55 6978 }
0551c32d
RH
6979 else
6980 return tmp;
c65ebc55
JW
6981}
6982
0551c32d
RH
6983/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
6984
c65ebc55 6985static rtx
0551c32d
RH
6986ia64_expand_lock_test_and_set (mode, arglist, target)
6987 enum machine_mode mode;
c65ebc55
JW
6988 tree arglist;
6989 rtx target;
6990{
0551c32d
RH
6991 tree arg0, arg1;
6992 rtx mem, new, ret, insn;
6993
6994 arg0 = TREE_VALUE (arglist);
6995 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6996 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6997 new = expand_expr (arg1, NULL_RTX, mode, 0);
6998
6999 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7000 MEM_VOLATILE_P (mem) = 1;
7001 if (! register_operand (new, mode))
7002 new = copy_to_mode_reg (mode, new);
7003
7004 if (target && register_operand (target, mode))
7005 ret = target;
7006 else
7007 ret = gen_reg_rtx (mode);
7008
7009 if (mode == SImode)
7010 insn = gen_xchgsi (ret, mem, new);
7011 else
7012 insn = gen_xchgdi (ret, mem, new);
7013 emit_insn (insn);
7014
7015 return ret;
7016}
7017
7018/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7019
7020static rtx
7021ia64_expand_lock_release (mode, arglist, target)
7022 enum machine_mode mode;
7023 tree arglist;
7024 rtx target ATTRIBUTE_UNUSED;
7025{
7026 tree arg0;
7027 rtx mem;
7028
7029 arg0 = TREE_VALUE (arglist);
7030 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7031
7032 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7033 MEM_VOLATILE_P (mem) = 1;
7034
7035 emit_move_insn (mem, const0_rtx);
7036
7037 return const0_rtx;
c65ebc55
JW
7038}
7039
7040rtx
7041ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7042 tree exp;
7043 rtx target;
fd7c34b0
RH
7044 rtx subtarget ATTRIBUTE_UNUSED;
7045 enum machine_mode mode ATTRIBUTE_UNUSED;
7046 int ignore ATTRIBUTE_UNUSED;
c65ebc55 7047{
c65ebc55 7048 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 7049 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
0551c32d 7050 tree arglist = TREE_OPERAND (exp, 1);
c65ebc55
JW
7051
7052 switch (fcode)
7053 {
7054 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
c65ebc55 7055 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
0551c32d
RH
7056 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7057 case IA64_BUILTIN_LOCK_RELEASE_SI:
7058 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7059 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7060 case IA64_BUILTIN_FETCH_AND_OR_SI:
7061 case IA64_BUILTIN_FETCH_AND_AND_SI:
7062 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7063 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7064 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7065 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7066 case IA64_BUILTIN_OR_AND_FETCH_SI:
7067 case IA64_BUILTIN_AND_AND_FETCH_SI:
7068 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7069 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7070 mode = SImode;
7071 break;
809d4ef1 7072
c65ebc55 7073 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
0551c32d
RH
7074 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7075 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7076 case IA64_BUILTIN_LOCK_RELEASE_DI:
7077 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7078 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7079 case IA64_BUILTIN_FETCH_AND_OR_DI:
7080 case IA64_BUILTIN_FETCH_AND_AND_DI:
7081 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7082 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7083 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7084 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7085 case IA64_BUILTIN_OR_AND_FETCH_DI:
7086 case IA64_BUILTIN_AND_AND_FETCH_DI:
7087 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7088 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7089 mode = DImode;
7090 break;
809d4ef1 7091
0551c32d
RH
7092 default:
7093 break;
7094 }
7095
7096 switch (fcode)
7097 {
7098 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7099 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7100 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7101
7102 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
c65ebc55 7103 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
0551c32d 7104 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
809d4ef1 7105
c65ebc55 7106 case IA64_BUILTIN_SYNCHRONIZE:
0551c32d 7107 emit_insn (gen_mf ());
3b572406 7108 return const0_rtx;
c65ebc55
JW
7109
7110 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
c65ebc55 7111 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
0551c32d 7112 return ia64_expand_lock_test_and_set (mode, arglist, target);
c65ebc55
JW
7113
7114 case IA64_BUILTIN_LOCK_RELEASE_SI:
c65ebc55 7115 case IA64_BUILTIN_LOCK_RELEASE_DI:
0551c32d 7116 return ia64_expand_lock_release (mode, arglist, target);
c65ebc55 7117
ce152ef8 7118 case IA64_BUILTIN_BSP:
0551c32d
RH
7119 if (! target || ! register_operand (target, DImode))
7120 target = gen_reg_rtx (DImode);
7121 emit_insn (gen_bsp_value (target));
7122 return target;
ce152ef8
AM
7123
7124 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
7125 emit_insn (gen_flushrs ());
7126 return const0_rtx;
ce152ef8 7127
0551c32d
RH
7128 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7129 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7130 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7131
7132 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7133 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7134 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7135
7136 case IA64_BUILTIN_FETCH_AND_OR_SI:
7137 case IA64_BUILTIN_FETCH_AND_OR_DI:
7138 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7139
7140 case IA64_BUILTIN_FETCH_AND_AND_SI:
7141 case IA64_BUILTIN_FETCH_AND_AND_DI:
7142 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7143
7144 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7145 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7146 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7147
7148 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7149 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7150 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7151
7152 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7153 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7154 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7155
7156 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7157 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7158 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7159
7160 case IA64_BUILTIN_OR_AND_FETCH_SI:
7161 case IA64_BUILTIN_OR_AND_FETCH_DI:
7162 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7163
7164 case IA64_BUILTIN_AND_AND_FETCH_SI:
7165 case IA64_BUILTIN_AND_AND_FETCH_DI:
7166 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7167
7168 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7169 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7170 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7171
7172 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7173 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7174 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7175
c65ebc55
JW
7176 default:
7177 break;
7178 }
7179
0551c32d 7180 return NULL_RTX;
c65ebc55 7181}