]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/ia64/ia64.c
f5af6546208fd8c1a63a40ecc485c97423494858
[thirdparty/gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "obstack.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44
45 /* This is used for communication between ASM_OUTPUT_LABEL and
46 ASM_OUTPUT_LABELREF. */
47 int ia64_asm_output_label = 0;
48
49 /* Define the information needed to generate branch and scc insns. This is
50 stored from the compare operation. */
51 struct rtx_def * ia64_compare_op0;
52 struct rtx_def * ia64_compare_op1;
53
54 /* Register number where ar.pfs was saved in the prologue, or zero
55 if it was not saved. */
56
57 int ia64_arpfs_regno;
58
59 /* Register number where rp was saved in the prologue, or zero if it was
60 not saved. */
61
62 int ia64_rp_regno;
63
64 /* Register number where frame pointer was saved in the prologue, or zero
65 if it was not saved. */
66
67 int ia64_fp_regno;
68
69 /* Number of input and local registers used. This is needed for the .regstk
70 directive, and also for debugging info. */
71
72 int ia64_input_regs;
73 int ia64_local_regs;
74
75 /* If true, then we must emit a .regstk directive. */
76
77 int ia64_need_regstk;
78
79 /* Register names for ia64_expand_prologue. */
80 char *ia64_reg_numbers[96] =
81 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
82 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
83 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
84 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
85 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
86 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
87 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
88 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
89 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
90 "r104","r105","r106","r107","r108","r109","r110","r111",
91 "r112","r113","r114","r115","r116","r117","r118","r119",
92 "r120","r121","r122","r123","r124","r125","r126","r127"};
93
94 /* ??? These strings could be shared with REGISTER_NAMES. */
95 char *ia64_input_reg_names[8] =
96 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
97
98 /* ??? These strings could be shared with REGISTER_NAMES. */
99 char *ia64_local_reg_names[80] =
100 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
101 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
102 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
103 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
104 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
105 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
106 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
107 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
108 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
109 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
110
111 /* ??? These strings could be shared with REGISTER_NAMES. */
112 char *ia64_output_reg_names[8] =
113 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
114
115 /* String used with the -mfixed-range= option. */
116 const char *ia64_fixed_range_string;
117
118 /* Variables which are this size or smaller are put in the sdata/sbss
119 sections. */
120
121 int ia64_section_threshold;
122
123 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
124
125 int
126 call_operand (op, mode)
127 rtx op;
128 enum machine_mode mode;
129 {
130 if (mode != GET_MODE (op))
131 return 0;
132
133 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
134 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
135 }
136
137 /* Return 1 if OP refers to a symbol in the sdata section. */
138
139 int
140 sdata_symbolic_operand (op, mode)
141 rtx op;
142 enum machine_mode mode;
143 {
144 switch (GET_CODE (op))
145 {
146 case SYMBOL_REF:
147 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
148
149 case CONST:
150 return (GET_CODE (XEXP (op, 0)) == PLUS
151 && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
152 && XSTR (XEXP (XEXP (op, 0), 0), 0)[0] == SDATA_NAME_FLAG_CHAR);
153 default:
154 break;
155 }
156
157 return 0;
158 }
159
160 /* Return 1 if OP refers to a symbol. */
161
162 int
163 symbolic_operand (op, mode)
164 rtx op;
165 enum machine_mode mode;
166 {
167 switch (GET_CODE (op))
168 {
169 case CONST:
170 case SYMBOL_REF:
171 case LABEL_REF:
172 return 1;
173
174 default:
175 break;
176 }
177 return 0;
178 }
179
180 /* Return 1 if OP refers to a function. */
181
182 int
183 function_operand (op, mode)
184 rtx op;
185 enum machine_mode mode;
186 {
187 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
188 return 1;
189 else
190 return 0;
191 }
192
193 /* Return 1 if OP is setjmp or a similar function. */
194
195 /* ??? This is an unsatisfying solution. Should rethink. */
196
197 int
198 setjmp_operand (op, mode)
199 rtx op;
200 enum machine_mode mode;
201 {
202 char *name;
203 int retval = 0;
204
205 if (GET_CODE (op) != SYMBOL_REF)
206 return 0;
207
208 name = XSTR (op, 0);
209
210 /* The following code is borrowed from special_function_p in calls.c. */
211
212 /* Disregard prefix _, __ or __x. */
213 if (name[0] == '_')
214 {
215 if (name[1] == '_' && name[2] == 'x')
216 name += 3;
217 else if (name[1] == '_')
218 name += 2;
219 else
220 name += 1;
221 }
222
223 if (name[0] == 's')
224 {
225 retval
226 = ((name[1] == 'e'
227 && (! strcmp (name, "setjmp")
228 || ! strcmp (name, "setjmp_syscall")))
229 || (name[1] == 'i'
230 && ! strcmp (name, "sigsetjmp"))
231 || (name[1] == 'a'
232 && ! strcmp (name, "savectx")));
233 }
234 else if ((name[0] == 'q' && name[1] == 's'
235 && ! strcmp (name, "qsetjmp"))
236 || (name[0] == 'v' && name[1] == 'f'
237 && ! strcmp (name, "vfork")))
238 retval = 1;
239
240 return retval;
241 }
242
243 /* Return 1 if OP is a general operand, but when pic exclude symbolic
244 operands. */
245
246 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
247 from PREDICATE_CODES. */
248
249 int
250 move_operand (op, mode)
251 rtx op;
252 enum machine_mode mode;
253 {
254 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
255 return 0;
256
257 return general_operand (op, mode);
258 }
259
260 /* Return 1 if OP is a register operand, or zero. */
261
262 int
263 reg_or_0_operand (op, mode)
264 rtx op;
265 enum machine_mode mode;
266 {
267 return (op == const0_rtx || register_operand (op, mode));
268 }
269
270 /* Return 1 if OP is a register operand, or a 6 bit immediate operand. */
271
272 int
273 reg_or_6bit_operand (op, mode)
274 rtx op;
275 enum machine_mode mode;
276 {
277 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
278 || GET_CODE (op) == CONSTANT_P_RTX
279 || register_operand (op, mode));
280 }
281
282 /* Return 1 if OP is a register operand, or an 8 bit immediate operand. */
283
284 int
285 reg_or_8bit_operand (op, mode)
286 rtx op;
287 enum machine_mode mode;
288 {
289 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
290 || GET_CODE (op) == CONSTANT_P_RTX
291 || register_operand (op, mode));
292 }
293
294 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
295 operand. */
296
297 int
298 reg_or_8bit_adjusted_operand (op, mode)
299 rtx op;
300 enum machine_mode mode;
301 {
302 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
303 || GET_CODE (op) == CONSTANT_P_RTX
304 || register_operand (op, mode));
305 }
306
307 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
308 immediate and an 8 bit adjusted immediate operand. This is necessary
309 because when we emit a compare, we don't know what the condition will be,
310 so we need the union of the immediates accepted by GT and LT. */
311
312 int
313 reg_or_8bit_and_adjusted_operand (op, mode)
314 rtx op;
315 enum machine_mode mode;
316 {
317 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
318 && CONST_OK_FOR_L (INTVAL (op)))
319 || GET_CODE (op) == CONSTANT_P_RTX
320 || register_operand (op, mode));
321 }
322
323 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
324
325 int
326 reg_or_14bit_operand (op, mode)
327 rtx op;
328 enum machine_mode mode;
329 {
330 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
331 || GET_CODE (op) == CONSTANT_P_RTX
332 || register_operand (op, mode));
333 }
334
335 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
336
337 int
338 reg_or_22bit_operand (op, mode)
339 rtx op;
340 enum machine_mode mode;
341 {
342 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
343 || GET_CODE (op) == CONSTANT_P_RTX
344 || register_operand (op, mode));
345 }
346
347 /* Return 1 if OP is a 6 bit immediate operand. */
348
349 int
350 shift_count_operand (op, mode)
351 rtx op;
352 enum machine_mode mode;
353 {
354 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
355 || GET_CODE (op) == CONSTANT_P_RTX);
356 }
357
358 /* Return 1 if OP is a 5 bit immediate operand. */
359
360 int
361 shift_32bit_count_operand (op, mode)
362 rtx op;
363 enum machine_mode mode;
364 {
365 return ((GET_CODE (op) == CONST_INT
366 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
367 || GET_CODE (op) == CONSTANT_P_RTX);
368 }
369
370 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
371
372 int
373 shladd_operand (op, mode)
374 rtx op;
375 enum machine_mode mode;
376 {
377 return (GET_CODE (op) == CONST_INT
378 && (INTVAL (op) == 2 || INTVAL (op) == 4
379 || INTVAL (op) == 8 || INTVAL (op) == 16));
380 }
381
382 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
383
384 int
385 fetchadd_operand (op, mode)
386 rtx op;
387 enum machine_mode mode;
388 {
389 return (GET_CODE (op) == CONST_INT
390 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
391 INTVAL (op) == -4 || INTVAL (op) == -1 ||
392 INTVAL (op) == 1 || INTVAL (op) == 4 ||
393 INTVAL (op) == 8 || INTVAL (op) == 16));
394 }
395
396 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
397
398 int
399 reg_or_fp01_operand (op, mode)
400 rtx op;
401 enum machine_mode mode;
402 {
403 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
404 || GET_CODE (op) == CONSTANT_P_RTX
405 || register_operand (op, mode));
406 }
407
408 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
409 signed immediate operand. */
410
411 int
412 normal_comparison_operator (op, mode)
413 register rtx op;
414 enum machine_mode mode;
415 {
416 enum rtx_code code = GET_CODE (op);
417 return ((mode == VOIDmode || GET_MODE (op) == mode)
418 && (code == EQ || code == NE
419 || code == GT || code == LE || code == GTU || code == LEU));
420 }
421
422 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
423 signed immediate operand. */
424
425 int
426 adjusted_comparison_operator (op, mode)
427 register rtx op;
428 enum machine_mode mode;
429 {
430 enum rtx_code code = GET_CODE (op);
431 return ((mode == VOIDmode || GET_MODE (op) == mode)
432 && (code == LT || code == GE || code == LTU || code == GEU));
433 }
434
435 /* Return 1 if OP is a call returning an HFA. It is known to be a PARALLEL
436 and the first section has already been tested. */
437
438 int
439 call_multiple_values_operation (op, mode)
440 rtx op;
441 enum machine_mode mode ATTRIBUTE_UNUSED;
442 {
443 int count = XVECLEN (op, 0) - 2;
444 int i;
445 int dest_regno;
446
447 /* Perform a quick check so we don't block up below. */
448 if (count <= 1
449 || GET_CODE (XVECEXP (op, 0, 0)) != SET
450 || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
451 || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != CALL)
452 return 0;
453
454 dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
455
456 for (i = 1; i < count; i++)
457 {
458 rtx elt = XVECEXP (op, 0, i + 2);
459
460 if (GET_CODE (elt) != SET
461 || GET_CODE (SET_SRC (elt)) != CALL
462 || GET_CODE (SET_DEST (elt)) != REG
463 || REGNO (SET_DEST (elt)) != dest_regno + i)
464 return 0;
465 }
466
467 return 1;
468 }
469
470 \f
471 /* Structure to be filled in by ia64_compute_frame_size with register
472 save masks and offsets for the current function. */
473
474 struct ia64_frame_info
475 {
476 long total_size; /* # bytes that the entire frame takes up. */
477 long var_size; /* # bytes that variables take up. */
478 long args_size; /* # bytes that outgoing arguments take up. */
479 long pretend_size; /* # bytes that stdarg arguments take up. */
480 long pretend_pad_size; /* # bytes padding to align stdarg args. */
481 long extra_size; /* # bytes of extra gunk. */
482 long gr_size; /* # bytes needed to store general regs. */
483 long fr_size; /* # bytes needed to store FP regs. */
484 long fr_pad_size; /* # bytes needed to align FP save area. */
485 long pr_size; /* # bytes needed to store predicate regs. */
486 long br_size; /* # bytes needed to store branch regs. */
487 HARD_REG_SET mask; /* mask of saved registers. */
488 int initialized; /* != 0 is frame size already calculated. */
489 };
490
491 /* Current frame information calculated by compute_frame_size. */
492 struct ia64_frame_info current_frame_info;
493
494 /* Helper function for INITIAL_ELIMINATION_OFFSET. Return the offset from the
495 frame pointer where b0 is saved. */
496
497 int
498 ia64_rap_fp_offset ()
499 {
500 return - current_frame_info.br_size;
501 }
502
503 /* Returns the number of bytes offset between the frame pointer and the stack
504 pointer for the current function. SIZE is the number of bytes of space
505 needed for local variables. */
506 unsigned int
507 ia64_compute_frame_size (size)
508 int size;
509 {
510 int total_size;
511 int extra_size;
512 int gr_size = 0;
513 int fr_size = 0;
514 int fr_pad_size = 0;
515 int pr_size = 0;
516 int br_size = 0;
517 int pretend_pad_size = 0;
518 int tmp;
519 int regno;
520 HARD_REG_SET mask;
521
522 /* Reload used to round the frame size to STACK_BOUNDARY. Now we do it
523 here. */
524 size = IA64_STACK_ALIGN (size);
525
526 CLEAR_HARD_REG_SET (mask);
527
528 /* Calculate space needed for general registers. */
529 /* We never need to save any of the stacked registers, which are regs
530 32 to 127. */
531 for (regno = GR_REG (0); regno <= GR_REG (31); regno++)
532 if (regs_ever_live[regno] && ! call_used_regs[regno])
533 {
534 SET_HARD_REG_BIT (mask, regno);
535 gr_size += 8;
536 }
537
538 /* Allocate space to save/restore the unat from. */
539 if (gr_size != 0
540 || current_function_varargs || current_function_stdarg)
541 gr_size += 8;
542
543 /* Calculate space needed for FP registers. */
544 for (regno = FR_REG (0); regno <= FR_REG (127); regno++)
545 if (regs_ever_live[regno] && ! call_used_regs[regno])
546 {
547 SET_HARD_REG_BIT (mask, regno);
548 fr_size += 16;
549 }
550
551 /* Calculate space needed for predicate registers. */
552 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
553 if (regs_ever_live[regno] && ! call_used_regs[regno])
554 {
555 SET_HARD_REG_BIT (mask, regno);
556 pr_size = 8;
557 }
558
559 /* Calculate space needed for branch registers. */
560 for (regno = BR_REG (0); regno <= BR_REG (7); regno++)
561 if (regs_ever_live[regno] && ! call_used_regs[regno])
562 {
563 SET_HARD_REG_BIT (mask, regno);
564 br_size += 8;
565 }
566
567 /* The FR save area needs to be 16-byte aligned. */
568 if (fr_size)
569 {
570 tmp = (size + fr_size + br_size);
571 fr_pad_size = IA64_STACK_ALIGN (tmp) - tmp;
572 }
573 else
574 fr_pad_size = 0;
575
576 /* If we have an odd number of words of pretend arguments written to the
577 stack, then the FR save area will be unaligned. We pad below this area
578 to keep things 16 byte aligned. This needs to be kept distinct, to
579 avoid confusing it with padding added below the GR save area, which does
580 not affect the FR area alignment. */
581 pretend_pad_size = current_function_pretend_args_size % 16;
582
583 /* The 16 bytes is for the scratch area. */
584 tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size
585 + current_function_outgoing_args_size + 16);
586 tmp += (current_function_pretend_args_size
587 ? current_function_pretend_args_size - 16
588 : 0) + pretend_pad_size;
589 total_size = IA64_STACK_ALIGN (tmp);
590 extra_size = total_size - tmp + 16;
591
592 /* If this is a leaf routine (BR_REG (0) is not live), and if there is no
593 stack space needed for register saves, then don't allocate the 16 byte
594 scratch area. */
595 if (total_size == 16 && ! regs_ever_live[BR_REG (0)])
596 {
597 total_size = 0;
598 extra_size = 0;
599 }
600
601 current_frame_info.total_size = total_size;
602 current_frame_info.var_size = size;
603 current_frame_info.args_size = current_function_outgoing_args_size;
604 current_frame_info.pretend_size
605 = (current_function_pretend_args_size
606 ? current_function_pretend_args_size - 16
607 : 0);
608 current_frame_info.pretend_pad_size = pretend_pad_size;
609 current_frame_info.extra_size = extra_size;
610 current_frame_info.gr_size = gr_size;
611 current_frame_info.fr_size = fr_size;
612 current_frame_info.fr_pad_size = fr_pad_size;
613 current_frame_info.pr_size = pr_size;
614 current_frame_info.br_size = br_size;
615 COPY_HARD_REG_SET (current_frame_info.mask, mask);
616 current_frame_info.initialized = reload_completed;
617
618 return total_size;
619 }
620
621 void
622 save_restore_insns (save_p)
623 int save_p;
624 {
625 rtx insn;
626
627 if (current_frame_info.gr_size + current_frame_info.fr_size
628 + current_frame_info.br_size + current_frame_info.pr_size)
629 {
630 rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (2));
631 rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg);
632 rtx tmp2_reg = gen_rtx_REG (DImode, GR_REG (3));
633 int offset = (current_frame_info.total_size
634 - (current_frame_info.gr_size + current_frame_info.fr_size
635 + current_frame_info.fr_pad_size
636 + current_frame_info.br_size
637 + current_frame_info.pr_size
638 + current_frame_info.var_size
639 + current_frame_info.pretend_size
640 + current_frame_info.pretend_pad_size));
641 rtx offset_rtx;
642 int regno;
643
644 /* If there is a frame pointer, then we use it instead of the stack
645 pointer, so that the stack pointer does not need to be valid when
646 the epilogue starts. See EXIT_IGNORE_STACK. */
647 if (frame_pointer_needed)
648 offset = offset - current_frame_info.total_size;
649
650 if (CONST_OK_FOR_I (offset))
651 offset_rtx = GEN_INT (offset);
652 else
653 {
654 offset_rtx = tmp_reg;
655 insn = emit_insn (gen_movdi (tmp_reg, GEN_INT (offset)));
656 if (save_p)
657 RTX_FRAME_RELATED_P (insn) = 1;
658 }
659 insn = emit_insn (gen_adddi3 (tmp_reg,
660 (frame_pointer_needed ? frame_pointer_rtx
661 : stack_pointer_rtx),
662 offset_rtx));
663 if (save_p)
664 RTX_FRAME_RELATED_P (insn) = 1;
665
666 /* If one is used, we save/restore all of them. */
667 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
668 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
669 {
670 rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
671 if (save_p)
672 {
673 insn = emit_insn (gen_pr_spill (tmp2_reg));
674 RTX_FRAME_RELATED_P (insn) = 1;
675 insn = emit_insn (gen_movdi (mem, tmp2_reg));
676 RTX_FRAME_RELATED_P (insn) = 1;
677 }
678 else
679 {
680 insn = emit_insn (gen_movdi (tmp2_reg, mem));
681 insn = emit_insn (gen_pr_restore (tmp2_reg));
682 }
683 break;
684 }
685
686 /* Must save/restore ar.unat if any GR is spilled/restored. */
687 if (current_frame_info.gr_size != 0
688 || current_function_varargs || current_function_stdarg)
689 {
690 rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
691 if (save_p)
692 {
693 insn = emit_insn (gen_unat_spill (tmp2_reg));
694 if (save_p)
695 RTX_FRAME_RELATED_P (insn) = 1;
696 insn = emit_insn (gen_movdi (mem, tmp2_reg));
697 if (save_p)
698 RTX_FRAME_RELATED_P (insn) = 1;
699 }
700 else
701 {
702 insn = emit_insn (gen_movdi (tmp2_reg, mem));
703 if (save_p)
704 RTX_FRAME_RELATED_P (insn) = 1;
705 /* The restore happens after the last ld8.fill instruction. */
706 }
707 }
708
709 for (regno = GR_REG (0); regno <= GR_REG (127); regno++)
710 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
711 {
712 rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
713 if (save_p)
714 insn = emit_insn (gen_gr_spill (mem,
715 gen_rtx_REG (DImode, regno)));
716 else
717 insn = emit_insn (gen_gr_restore (gen_rtx_REG (DImode, regno),
718 mem));
719 if (save_p)
720 RTX_FRAME_RELATED_P (insn) = 1;
721 }
722
723 /* Now restore the unat register if necessary. */
724 if ((current_frame_info.gr_size != 0
725 || current_function_varargs || current_function_stdarg)
726 && ! save_p)
727 emit_insn (gen_unat_restore (tmp2_reg));
728
729 for (regno = FR_REG (0); regno <= FR_REG (127); regno++)
730 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
731 {
732 rtx mem = gen_rtx_MEM (XFmode, tmp_post_inc);
733 if (save_p)
734 insn = emit_insn (gen_fr_spill (mem,
735 gen_rtx_REG (XFmode, regno)));
736 else
737 insn = emit_insn (gen_fr_restore (gen_rtx_REG (XFmode, regno),
738 mem));
739 if (save_p)
740 RTX_FRAME_RELATED_P (insn) = 1;
741 }
742
743 for (regno = BR_REG (0); regno <= BR_REG (7); regno++)
744 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
745 {
746 rtx src, dest;
747
748 if (save_p)
749 {
750 src = gen_rtx_REG (DImode, regno);
751 dest = gen_rtx_MEM (DImode, tmp_post_inc);
752 }
753 else
754 {
755 src = gen_rtx_MEM (DImode, tmp_post_inc);
756 dest = gen_rtx_REG (DImode, regno);
757 }
758
759 insn = emit_insn (gen_movdi (tmp2_reg, src));
760 if (save_p)
761 RTX_FRAME_RELATED_P (insn) = 1;
762 insn = emit_insn (gen_movdi (dest, tmp2_reg));
763 if (save_p)
764 RTX_FRAME_RELATED_P (insn) = 1;
765 }
766 }
767 }
768
769
770 /* Called after register allocation to add any instructions needed for the
771 prologue. Using a prologue insn is favored compared to putting all of the
772 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
773 to intermix instructions with the saves of the caller saved registers. In
774 some cases, it might be necessary to emit a barrier instruction as the last
775 insn to prevent such scheduling.
776
777 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
778 so that the debug info generation code can handle them properly. */
779
780 /* ??? Get inefficient code when the frame size is larger than can fit in an
781 adds instruction. */
782
783 /* ??? If this is a leaf function, then fp/rp/ar.pfs should be put in the
784 low 32 regs. */
785
786 /* ??? Should not reserve a local register for rp/ar.pfs. Should
787 instead check to see if any local registers are unused, and if so,
788 allocate them to rp/ar.pfs in that order. Not sure what to do about
789 fp, we may still need to reserve a local register for it. */
790
791 void
792 ia64_expand_prologue ()
793 {
794 rtx insn, offset;
795 int i, locals, inputs, outputs, rotates;
796 int frame_size = ia64_compute_frame_size (get_frame_size ());
797 int leaf_function;
798 int epilogue_p;
799 edge e;
800
801 /* ??? This seems like a leaf_function_p bug. It calls get_insns which
802 returns the first insn of the current sequence, not the first insn
803 of the function. We work around this by pushing to the topmost
804 sequence first. */
805 push_topmost_sequence ();
806 leaf_function = leaf_function_p ();
807 pop_topmost_sequence ();
808
809 /* If there is no epilogue, then we don't need some prologue insns. We
810 need to avoid emitting the dead prologue insns, because flow will complain
811 about them. */
812 if (optimize)
813 {
814 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
815 if ((e->flags & EDGE_FAKE) == 0
816 && (e->flags & EDGE_FALLTHRU) != 0)
817 break;
818 epilogue_p = (e != NULL);
819 }
820 else
821 epilogue_p = 1;
822
823 /* Find the highest local register used. */
824 /* We have only 80 local registers, because we reserve 8 for the inputs
825 and 8 for the outputs. */
826
827 for (i = LOC_REG (79); i >= LOC_REG (0); i--)
828 if (regs_ever_live[i])
829 break;
830 locals = i - LOC_REG (0) + 1;
831
832 /* Likewise for inputs. */
833
834 for (i = IN_REG (7); i >= IN_REG (0); i--)
835 if (regs_ever_live[i])
836 break;
837 inputs = i - IN_REG (0) + 1;
838
839 #if 0
840 /* If the function was declared with syscall_linkage, then we may need to
841 preserve all declared input registers, even if they weren't used.
842 Currently, syscall_linkage does not have this effect. */
843
844 if (lookup_attribute ("syscall_linkage",
845 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
846 inputs = MAX (inputs, current_function_args_info.words);
847 #endif
848
849 /* Likewise for outputs. */
850
851 for (i = OUT_REG (7); i >= OUT_REG (0); i--)
852 if (regs_ever_live[i])
853 break;
854 outputs = i - OUT_REG (0) + 1;
855
856 /* When -p profiling, we need one output register for the mcount argument.
857 Likwise for -a profiling for the bb_init_func argument. For -ax
858 profiling, we need two output registers for the two bb_init_trace_func
859 arguments. */
860 if (profile_flag || profile_block_flag == 1)
861 outputs = MAX (outputs, 1);
862 else if (profile_block_flag == 2)
863 outputs = MAX (outputs, 2);
864
865 /* No rotating register support as yet. */
866
867 rotates = 0;
868
869 /* Allocate two extra locals for saving/restoring rp and ar.pfs. Also
870 allocate one local for use as the frame pointer if frame_pointer_needed
871 is true. */
872 /* ??? If this is a leaf function, then we aren't using one of these local
873 registers for the RP anymore. */
874 locals += 2 + frame_pointer_needed;
875
876 /* Save these values in global registers for debugging info. */
877 ia64_input_regs = inputs;
878 ia64_local_regs = locals;
879
880 /* Set the local, input, and output register names. We need to do this
881 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
882 half. If we use in/loc/out register names, then we get assembler errors
883 in crtn.S because there is no alloc insn or regstk directive in there.
884 We give in/loc/out names to unused registers, to make invalid uses of
885 them easy to spot. */
886 if (! TARGET_REG_NAMES)
887 {
888 for (i = 0; i < 8; i++)
889 {
890 if (i < inputs)
891 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
892 else
893 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
894 }
895 for (i = 0; i < 80; i++)
896 {
897 if (i < locals)
898 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
899 else
900 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
901 }
902 for (i = 0; i < 8; i++)
903 {
904 if (i < outputs)
905 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
906 else
907 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
908 }
909 }
910
911 /* Set the frame pointer register name now that it is known, and the
912 local register names are known. */
913 if (frame_pointer_needed)
914 {
915 reg_names[FRAME_POINTER_REGNUM]
916 = reg_names[LOC_REG (locals - 3)];
917 ia64_fp_regno = LOC_REG (inputs + locals - 3);
918 }
919 else
920 ia64_fp_regno = 0;
921
922 /* We don't need an alloc instruction if this is a leaf function, and the
923 locals and outputs are both zero sized. Since we have already allocated
924 two locals for rp and ar.pfs, we check for two locals. */
925 /* Leaf functions can use output registers as call-clobbered temporaries. */
926 if (locals == 2 && outputs == 0 && leaf_function)
927 {
928 /* If there is no alloc, but there are input registers used, then we
929 need a .regstk directive. */
930 if (TARGET_REG_NAMES)
931 ia64_need_regstk = 1;
932 else
933 ia64_need_regstk = 0;
934
935 ia64_arpfs_regno = 0;
936 ia64_rp_regno = 0;
937 }
938 else
939 {
940 ia64_need_regstk = 0;
941 ia64_arpfs_regno = LOC_REG (locals - 1);
942
943 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, ia64_arpfs_regno),
944 GEN_INT (inputs), GEN_INT (locals),
945 GEN_INT (outputs), GEN_INT (rotates)));
946 RTX_FRAME_RELATED_P (insn) = 1;
947
948 /* Emit a save of BR_REG (0) if we call other functions.
949 Do this even if this function doesn't return, as EH
950 depends on this to be able to unwind the stack. */
951 if (! leaf_function)
952 {
953 rtx ia64_rp_reg;
954
955 ia64_rp_regno = LOC_REG (locals - 2);
956 reg_names[RETURN_ADDRESS_REGNUM] = reg_names[ia64_rp_regno];
957
958 ia64_rp_reg = gen_rtx_REG (DImode, ia64_rp_regno);
959 insn = emit_move_insn (ia64_rp_reg, gen_rtx_REG (DImode,
960 BR_REG (0)));
961 RTX_FRAME_RELATED_P (insn) = 1;
962 if (! epilogue_p)
963 {
964 /* If we don't have an epilogue, then the return value
965 doesn't appear to be needed and the above store will
966 appear dead and will elicit a warning from flow. */
967 emit_insn (gen_rtx_USE (VOIDmode, ia64_rp_reg));
968 }
969 }
970 else
971 ia64_rp_regno = 0;
972 }
973
974 /* Set up frame pointer and stack pointer. */
975 if (frame_pointer_needed)
976 {
977 insn = emit_insn (gen_movdi (hard_frame_pointer_rtx, stack_pointer_rtx));
978 RTX_FRAME_RELATED_P (insn) = 1;
979 }
980 if (frame_size != 0)
981 {
982 if (CONST_OK_FOR_I (-frame_size))
983 offset = GEN_INT (-frame_size);
984 else
985 {
986 offset = gen_rtx_REG (DImode, GR_REG (2));
987 insn = emit_insn (gen_movdi (offset, GEN_INT (-frame_size)));
988 RTX_FRAME_RELATED_P (insn) = 1;
989 }
990 /* If there is a frame pointer, then we need to make the stack pointer
991 decrement depend on the frame pointer, so that the stack pointer
992 update won't be moved past fp-relative stores to the frame. */
993 if (frame_pointer_needed)
994 insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx,
995 stack_pointer_rtx,
996 offset,
997 hard_frame_pointer_rtx));
998 else
999 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
1000 offset));
1001 RTX_FRAME_RELATED_P (insn) = 1;
1002 }
1003
1004 /* Save registers to frame. */
1005 save_restore_insns (1);
1006 }
1007
1008 /* Called after register allocation to add any instructions needed for the
1009 epilogue. Using a epilogue insn is favored compared to putting all of the
1010 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1011 to intermix instructions with the saves of the caller saved registers. In
1012 some cases, it might be necessary to emit a barrier instruction as the last
1013 insn to prevent such scheduling. */
1014
1015 void
1016 ia64_expand_epilogue ()
1017 {
1018 /* Restore registers from frame. */
1019 save_restore_insns (0);
1020
1021 /* ??? The gen_epilogue_deallocate_stack call below does not work. This
1022 is mainly because there is no fp+offset addressing mode, so most loads
1023 from the frame do not actually use the frame pointer; they use a pseudo
1024 computed from the frame pointer. The same problem exists with the
1025 stack pointer when there is no frame pointer. I think this can be
1026 fixed only by making the dependency analysis code in sched smarter, so
1027 that it recognizes references to the frame, and makes succeeding stack
1028 pointer updates anti-dependent on them. */
1029 emit_insn (gen_blockage ());
1030
1031 if (cfun->machine->ia64_eh_epilogue_sp == NULL_RTX)
1032 {
1033 if (frame_pointer_needed)
1034 {
1035 /* If there is a frame pointer, then we need to make the stack pointer
1036 restore depend on the frame pointer, so that the stack pointer
1037 restore won't be moved up past fp-relative loads from the frame. */
1038 emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
1039 hard_frame_pointer_rtx));
1040 }
1041 else
1042 {
1043 int frame_size = current_frame_info.total_size;
1044 rtx offset;
1045
1046 if (frame_size != 0)
1047 {
1048 if (CONST_OK_FOR_I (frame_size))
1049 offset = GEN_INT (frame_size);
1050 else
1051 {
1052 offset = gen_rtx_REG (DImode, GR_REG (2));
1053 emit_insn (gen_movdi (offset, GEN_INT (frame_size)));
1054 }
1055 emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
1056 offset));
1057 }
1058 }
1059 }
1060 /* Return via eh_epilogue, so we already have our new stack pointer. */
1061 else
1062 emit_insn (gen_movdi (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp));
1063
1064 if (ia64_arpfs_regno)
1065 emit_insn (gen_pfs_restore (gen_rtx_REG (DImode, ia64_arpfs_regno)));
1066
1067 if (ia64_rp_regno)
1068 emit_move_insn (gen_rtx_REG (DImode, BR_REG (0)),
1069 gen_rtx_REG (DImode, ia64_rp_regno));
1070
1071 if (cfun->machine->ia64_eh_epilogue_bsp != NULL_RTX)
1072 {
1073 /* We have to restore the bsp. */
1074 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
1075 }
1076 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
1077 }
1078
1079 /* Emit the function prologue. */
1080
1081 void
1082 ia64_function_prologue (file, size)
1083 FILE *file;
1084 int size;
1085 {
1086 rtx insn;
1087 if (ia64_need_regstk)
1088 fprintf (file, "\t.regstk %d, 0, 0, 0\n", ia64_input_regs);
1089
1090 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
1091 return;
1092
1093 /* Emit the .prologue directive. in order to do this, we need to find
1094 where the stack pointer is moved toa GR, if it is, and mark it. */
1095
1096 for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
1097 {
1098 if (RTX_FRAME_RELATED_P (insn) && GET_CODE (insn) == INSN)
1099 {
1100 rtx pat = PATTERN (insn);
1101 if (GET_CODE (pat) == SET)
1102 {
1103 rtx dest = SET_DEST (pat);
1104 rtx src = SET_SRC (pat);
1105 if (GET_CODE (src) == REG && REGNO (src) == STACK_POINTER_REGNUM
1106 && GET_CODE (dest) == REG)
1107 {
1108 int reg = REGNO (dest);
1109 if (REGNO (dest) == FRAME_POINTER_REGNUM)
1110 reg = ia64_fp_regno;
1111 fprintf (file, "\t.prologue 0x2, %d\n", reg);
1112 break;
1113 }
1114 }
1115 }
1116 }
1117 if (insn == NULL_RTX)
1118 fprintf (file, "\t.prologue\n");
1119 }
1120
1121 /* Emit the function epilogue. */
1122
1123 void
1124 ia64_function_epilogue (file, size)
1125 FILE *file;
1126 int size;
1127 {
1128 }
1129
1130 /* Return 1 if br.ret can do all the work required to return from a
1131 function. */
1132
1133 int
1134 ia64_direct_return ()
1135 {
1136 return (reload_completed && ! frame_pointer_needed
1137 && ia64_compute_frame_size (get_frame_size ()));
1138 }
1139
1140 \f
1141 /* Do any needed setup for a variadic function. CUM has not been updated
1142 for the last named argument which has type TYPE and mode MODE. */
1143 void
1144 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
1145 CUMULATIVE_ARGS cum;
1146 int int_mode;
1147 tree type;
1148 int * pretend_size;
1149 int second_time;
1150 {
1151 /* If this is a stdarg function, then don't save the current argument. */
1152 int offset = ! current_function_varargs;
1153
1154 if (cum.words < MAX_ARGUMENT_SLOTS)
1155 {
1156 if (! second_time)
1157 {
1158 int i;
1159 int first_reg = GR_ARG_FIRST + cum.words + offset;
1160 rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (16));
1161 rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg);
1162 rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
1163 rtx insn;
1164
1165 /* We must emit st8.spill insns instead of st8 because we might
1166 be saving non-argument registers, and non-argument registers might
1167 not contain valid values. */
1168 emit_move_insn (tmp_reg, virtual_incoming_args_rtx);
1169 for (i = first_reg; i < GR_ARG_FIRST + 8; i++)
1170 {
1171 insn = emit_insn (gen_gr_spill (mem, gen_rtx_REG (DImode, i)));
1172 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, tmp_reg, 0);
1173 }
1174 }
1175 *pretend_size = ((MAX_ARGUMENT_SLOTS - cum.words - offset)
1176 * UNITS_PER_WORD);
1177 }
1178 }
1179
1180 /* Check whether TYPE is a homogeneous floating point aggregate. If
1181 it is, return the mode of the floating point type that appears
1182 in all leafs. If it is not, return VOIDmode.
1183
1184 An aggregate is a homogeneous floating point aggregate is if all
1185 fields/elements in it have the same floating point type (e.g,
1186 SFmode). 128-bit quad-precision floats are excluded. */
1187
1188 static enum machine_mode
1189 hfa_element_mode (type, nested)
1190 tree type;
1191 int nested;
1192 {
1193 enum machine_mode element_mode = VOIDmode;
1194 enum machine_mode mode;
1195 enum tree_code code = TREE_CODE (type);
1196 int know_element_mode = 0;
1197 tree t;
1198
1199 switch (code)
1200 {
1201 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
1202 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
1203 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
1204 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
1205 case FUNCTION_TYPE:
1206 return VOIDmode;
1207
1208 /* Fortran complex types are supposed to be HFAs, so we need to handle
1209 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
1210 types though. */
1211 case COMPLEX_TYPE:
1212 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
1213 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
1214 * BITS_PER_UNIT, MODE_FLOAT, 0);
1215 else
1216 return VOIDmode;
1217
1218 case REAL_TYPE:
1219 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
1220 mode if this is contained within an aggregate. */
1221 if (nested)
1222 return TYPE_MODE (type);
1223 else
1224 return VOIDmode;
1225
1226 case ARRAY_TYPE:
1227 return TYPE_MODE (TREE_TYPE (type));
1228
1229 case RECORD_TYPE:
1230 case UNION_TYPE:
1231 case QUAL_UNION_TYPE:
1232 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
1233 {
1234 if (TREE_CODE (t) != FIELD_DECL)
1235 continue;
1236
1237 mode = hfa_element_mode (TREE_TYPE (t), 1);
1238 if (know_element_mode)
1239 {
1240 if (mode != element_mode)
1241 return VOIDmode;
1242 }
1243 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
1244 return VOIDmode;
1245 else
1246 {
1247 know_element_mode = 1;
1248 element_mode = mode;
1249 }
1250 }
1251 return element_mode;
1252
1253 default:
1254 /* If we reach here, we probably have some front-end specific type
1255 that the backend doesn't know about. This can happen via the
1256 aggregate_value_p call in init_function_start. All we can do is
1257 ignore unknown tree types. */
1258 return VOIDmode;
1259 }
1260
1261 return VOIDmode;
1262 }
1263
1264 /* Return rtx for register where argument is passed, or zero if it is passed
1265 on the stack. */
1266
1267 /* ??? 128-bit quad-precision floats are always passed in general
1268 registers. */
1269
1270 rtx
1271 ia64_function_arg (cum, mode, type, named, incoming)
1272 CUMULATIVE_ARGS *cum;
1273 enum machine_mode mode;
1274 tree type;
1275 int named;
1276 int incoming;
1277 {
1278 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
1279 int words = (((mode == BLKmode ? int_size_in_bytes (type)
1280 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
1281 / UNITS_PER_WORD);
1282 int offset = 0;
1283 enum machine_mode hfa_mode = VOIDmode;
1284
1285 /* Arguments larger than 8 bytes start at the next even boundary. */
1286 if (words > 1 && (cum->words & 1))
1287 offset = 1;
1288
1289 /* If all argument slots are used, then it must go on the stack. */
1290 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
1291 return 0;
1292
1293 /* Check for and handle homogeneous FP aggregates. */
1294 if (type)
1295 hfa_mode = hfa_element_mode (type, 0);
1296
1297 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
1298 and unprototyped hfas are passed specially. */
1299 if (hfa_mode != VOIDmode && (! cum->prototype || named))
1300 {
1301 rtx loc[16];
1302 int i = 0;
1303 int fp_regs = cum->fp_regs;
1304 int int_regs = cum->words + offset;
1305 int hfa_size = GET_MODE_SIZE (hfa_mode);
1306 int byte_size;
1307 int args_byte_size;
1308
1309 /* If prototyped, pass it in FR regs then GR regs.
1310 If not prototyped, pass it in both FR and GR regs.
1311
1312 If this is an SFmode aggregate, then it is possible to run out of
1313 FR regs while GR regs are still left. In that case, we pass the
1314 remaining part in the GR regs. */
1315
1316 /* Fill the FP regs. We do this always. We stop if we reach the end
1317 of the argument, the last FP register, or the last argument slot. */
1318
1319 byte_size = ((mode == BLKmode)
1320 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
1321 args_byte_size = int_regs * UNITS_PER_WORD;
1322 offset = 0;
1323 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
1324 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
1325 {
1326 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
1327 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
1328 + fp_regs)),
1329 GEN_INT (offset));
1330 /* ??? Padding for XFmode type? */
1331 offset += hfa_size;
1332 args_byte_size += hfa_size;
1333 fp_regs++;
1334 }
1335
1336 /* If no prototype, then the whole thing must go in GR regs. */
1337 if (! cum->prototype)
1338 offset = 0;
1339 /* If this is an SFmode aggregate, then we might have some left over
1340 that needs to go in GR regs. */
1341 else if (byte_size != offset)
1342 int_regs += offset / UNITS_PER_WORD;
1343
1344 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
1345
1346 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
1347 {
1348 enum machine_mode gr_mode = DImode;
1349
1350 /* If we have an odd 4 byte hunk because we ran out of FR regs,
1351 then this goes in a GR reg left adjusted/little endian, right
1352 adjusted/big endian. */
1353 /* ??? Currently this is handled wrong, because 4-byte hunks are
1354 always right adjusted/little endian. */
1355 if (offset & 0x4)
1356 gr_mode = SImode;
1357 /* If we have an even 4 byte hunk because the aggregate is a
1358 multiple of 4 bytes in size, then this goes in a GR reg right
1359 adjusted/little endian. */
1360 else if (byte_size - offset == 4)
1361 gr_mode = SImode;
1362
1363 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
1364 gen_rtx_REG (gr_mode, (basereg
1365 + int_regs)),
1366 GEN_INT (offset));
1367 offset += GET_MODE_SIZE (gr_mode);
1368 int_regs++;
1369 }
1370
1371 /* If we ended up using just one location, just return that one loc. */
1372 if (i == 1)
1373 return XEXP (loc[0], 0);
1374 else
1375 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
1376 }
1377
1378 /* Integral and aggregates go in general registers. If we have run out of
1379 FR registers, then FP values must also go in general registers. This can
1380 happen when we have a SFmode HFA. */
1381 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
1382 return gen_rtx_REG (mode, basereg + cum->words + offset);
1383
1384 /* If there is a prototype, then FP values go in a FR register when
1385 named, and in a GR registeer when unnamed. */
1386 else if (cum->prototype)
1387 {
1388 if (! named)
1389 return gen_rtx_REG (mode, basereg + cum->words + offset);
1390 else
1391 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
1392 }
1393 /* If there is no prototype, then FP values go in both FR and GR
1394 registers. */
1395 else
1396 {
1397 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
1398 gen_rtx_REG (mode, (FR_ARG_FIRST
1399 + cum->fp_regs)),
1400 const0_rtx);
1401 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
1402 gen_rtx_REG (mode,
1403 (basereg + cum->words
1404 + offset)),
1405 const0_rtx);
1406
1407 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
1408 }
1409 }
1410
1411 /* Return number of words, at the beginning of the argument, that must be
1412 put in registers. 0 is the argument is entirely in registers or entirely
1413 in memory. */
1414
1415 int
1416 ia64_function_arg_partial_nregs (cum, mode, type, named)
1417 CUMULATIVE_ARGS *cum;
1418 enum machine_mode mode;
1419 tree type;
1420 int named;
1421 {
1422 int words = (((mode == BLKmode ? int_size_in_bytes (type)
1423 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
1424 / UNITS_PER_WORD);
1425 int offset = 0;
1426
1427 /* Arguments larger than 8 bytes start at the next even boundary. */
1428 if (words > 1 && (cum->words & 1))
1429 offset = 1;
1430
1431 /* If all argument slots are used, then it must go on the stack. */
1432 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
1433 return 0;
1434
1435 /* It doesn't matter whether the argument goes in FR or GR regs. If
1436 it fits within the 8 argument slots, then it goes entirely in
1437 registers. If it extends past the last argument slot, then the rest
1438 goes on the stack. */
1439
1440 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
1441 return 0;
1442
1443 return MAX_ARGUMENT_SLOTS - cum->words - offset;
1444 }
1445
1446 /* Update CUM to point after this argument. This is patterned after
1447 ia64_function_arg. */
1448
1449 void
1450 ia64_function_arg_advance (cum, mode, type, named)
1451 CUMULATIVE_ARGS *cum;
1452 enum machine_mode mode;
1453 tree type;
1454 int named;
1455 {
1456 int words = (((mode == BLKmode ? int_size_in_bytes (type)
1457 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
1458 / UNITS_PER_WORD);
1459 int offset = 0;
1460 enum machine_mode hfa_mode = VOIDmode;
1461
1462 /* If all arg slots are already full, then there is nothing to do. */
1463 if (cum->words >= MAX_ARGUMENT_SLOTS)
1464 return;
1465
1466 /* Arguments larger than 8 bytes start at the next even boundary. */
1467 if (words > 1 && (cum->words & 1))
1468 offset = 1;
1469
1470 cum->words += words + offset;
1471
1472 /* Check for and handle homogeneous FP aggregates. */
1473 if (type)
1474 hfa_mode = hfa_element_mode (type, 0);
1475
1476 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
1477 and unprototyped hfas are passed specially. */
1478 if (hfa_mode != VOIDmode && (! cum->prototype || named))
1479 {
1480 int fp_regs = cum->fp_regs;
1481 /* This is the original value of cum->words + offset. */
1482 int int_regs = cum->words - words;
1483 int hfa_size = GET_MODE_SIZE (hfa_mode);
1484 int byte_size;
1485 int args_byte_size;
1486
1487 /* If prototyped, pass it in FR regs then GR regs.
1488 If not prototyped, pass it in both FR and GR regs.
1489
1490 If this is an SFmode aggregate, then it is possible to run out of
1491 FR regs while GR regs are still left. In that case, we pass the
1492 remaining part in the GR regs. */
1493
1494 /* Fill the FP regs. We do this always. We stop if we reach the end
1495 of the argument, the last FP register, or the last argument slot. */
1496
1497 byte_size = ((mode == BLKmode)
1498 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
1499 args_byte_size = int_regs * UNITS_PER_WORD;
1500 offset = 0;
1501 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
1502 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
1503 {
1504 /* ??? Padding for XFmode type? */
1505 offset += hfa_size;
1506 args_byte_size += hfa_size;
1507 fp_regs++;
1508 }
1509
1510 cum->fp_regs = fp_regs;
1511 }
1512
1513 /* Integral and aggregates go in general registers. If we have run out of
1514 FR registers, then FP values must also go in general registers. This can
1515 happen when we have a SFmode HFA. */
1516 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
1517 return;
1518
1519 /* If there is a prototype, then FP values go in a FR register when
1520 named, and in a GR registeer when unnamed. */
1521 else if (cum->prototype)
1522 {
1523 if (! named)
1524 return;
1525 else
1526 /* ??? Complex types should not reach here. */
1527 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
1528 }
1529 /* If there is no prototype, then FP values go in both FR and GR
1530 registers. */
1531 else
1532 /* ??? Complex types should not reach here. */
1533 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
1534
1535 return;
1536 }
1537 \f
1538 /* Implement va_start. */
1539
1540 void
1541 ia64_va_start (stdarg_p, valist, nextarg)
1542 int stdarg_p;
1543 tree valist;
1544 rtx nextarg;
1545 {
1546 int arg_words;
1547 int ofs;
1548
1549 arg_words = current_function_args_info.words;
1550
1551 if (stdarg_p)
1552 ofs = 0;
1553 else
1554 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
1555
1556 nextarg = plus_constant (nextarg, ofs);
1557 std_expand_builtin_va_start (1, valist, nextarg);
1558 }
1559
1560 /* Implement va_arg. */
1561
1562 rtx
1563 ia64_va_arg (valist, type)
1564 tree valist, type;
1565 {
1566 HOST_WIDE_INT size;
1567 tree t;
1568
1569 /* Arguments larger than 8 bytes are 16 byte aligned. */
1570 size = int_size_in_bytes (type);
1571 if (size > UNITS_PER_WORD)
1572 {
1573 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
1574 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
1575 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
1576 build_int_2 (-2 * UNITS_PER_WORD, -1));
1577 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
1578 TREE_SIDE_EFFECTS (t) = 1;
1579 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
1580 }
1581
1582 return std_expand_builtin_va_arg (valist, type);
1583 }
1584 \f
1585 /* Return 1 if function return value returned in memory. Return 0 if it is
1586 in a register. */
1587
1588 int
1589 ia64_return_in_memory (valtype)
1590 tree valtype;
1591 {
1592 enum machine_mode mode;
1593 enum machine_mode hfa_mode;
1594 int byte_size;
1595
1596 mode = TYPE_MODE (valtype);
1597 byte_size = ((mode == BLKmode)
1598 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
1599
1600 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
1601
1602 hfa_mode = hfa_element_mode (valtype, 0);
1603 if (hfa_mode != VOIDmode)
1604 {
1605 int hfa_size = GET_MODE_SIZE (hfa_mode);
1606
1607 /* ??? Padding for XFmode type? */
1608 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
1609 return 1;
1610 else
1611 return 0;
1612 }
1613
1614 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
1615 return 1;
1616 else
1617 return 0;
1618 }
1619
1620 /* Return rtx for register that holds the function return value. */
1621
1622 rtx
1623 ia64_function_value (valtype, func)
1624 tree valtype;
1625 tree func;
1626 {
1627 enum machine_mode mode;
1628 enum machine_mode hfa_mode;
1629
1630 mode = TYPE_MODE (valtype);
1631 hfa_mode = hfa_element_mode (valtype, 0);
1632
1633 if (hfa_mode != VOIDmode)
1634 {
1635 rtx loc[8];
1636 int i;
1637 int hfa_size;
1638 int byte_size;
1639 int offset;
1640
1641 hfa_size = GET_MODE_SIZE (hfa_mode);
1642 byte_size = ((mode == BLKmode)
1643 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
1644 offset = 0;
1645 for (i = 0; offset < byte_size; i++)
1646 {
1647 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
1648 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
1649 GEN_INT (offset));
1650 /* ??? Padding for XFmode type? */
1651 offset += hfa_size;
1652 }
1653
1654 if (i == 1)
1655 return XEXP (loc[0], 0);
1656 else
1657 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
1658 }
1659 else if (FLOAT_TYPE_P (valtype))
1660 return gen_rtx_REG (mode, FR_ARG_FIRST);
1661 else
1662 return gen_rtx_REG (mode, GR_RET_FIRST);
1663 }
1664
1665 /* Print a memory address as an operand to reference that memory location. */
1666
1667 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
1668 also call this from ia64_print_operand for memory addresses. */
1669
1670 void
1671 ia64_print_operand_address (stream, address)
1672 FILE * stream;
1673 rtx address;
1674 {
1675 }
1676
1677 /* Print an operand to a assembler instruction.
1678 B Work arounds for hardware bugs.
1679 C Swap and print a comparison operator.
1680 D Print an FP comparison operator.
1681 E Print 32 - constant, for SImode shifts as extract.
1682 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
1683 a floating point register emitted normally.
1684 I Invert a predicate register by adding 1.
1685 O Append .acq for volatile load.
1686 P Postincrement of a MEM.
1687 Q Append .rel for volatile store.
1688 S Shift amount for shladd instruction.
1689 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
1690 for Intel assembler.
1691 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
1692 for Intel assembler.
1693 r Print register name, or constant 0 as r0. HP compatibility for
1694 Linux kernel. */
1695 void
1696 ia64_print_operand (file, x, code)
1697 FILE * file;
1698 rtx x;
1699 int code;
1700 {
1701 switch (code)
1702 {
1703 /* XXX Add other codes here. */
1704
1705 case 0:
1706 /* Handled below. */
1707 break;
1708
1709 case 'B':
1710 if (TARGET_A_STEP)
1711 fputs (" ;; nop 0 ;; nop 0 ;;", file);
1712 return;
1713
1714 case 'C':
1715 {
1716 enum rtx_code c = swap_condition (GET_CODE (x));
1717 fputs (GET_RTX_NAME (c), file);
1718 return;
1719 }
1720
1721 case 'D':
1722 fputs (GET_CODE (x) == NE ? "neq" : GET_RTX_NAME (GET_CODE (x)), file);
1723 return;
1724
1725 case 'E':
1726 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
1727 return;
1728
1729 case 'F':
1730 if (x == CONST0_RTX (GET_MODE (x)))
1731 fputs (reg_names [FR_REG (0)], file);
1732 else if (x == CONST1_RTX (GET_MODE (x)))
1733 fputs (reg_names [FR_REG (1)], file);
1734 else if (GET_CODE (x) == REG)
1735 fputs (reg_names [REGNO (x)], file);
1736 else
1737 abort ();
1738 return;
1739
1740 case 'I':
1741 fputs (reg_names [REGNO (x) + 1], file);
1742 return;
1743
1744 case 'O':
1745 if (MEM_VOLATILE_P (x))
1746 fputs(".acq", file);
1747 return;
1748
1749 case 'P':
1750 {
1751 int value;
1752
1753 if (GET_CODE (XEXP (x, 0)) != POST_INC
1754 && GET_CODE (XEXP (x, 0)) != POST_DEC)
1755 return;
1756
1757 fputs (", ", file);
1758
1759 value = GET_MODE_SIZE (GET_MODE (x));
1760
1761 /* ??? This is for ldf.fill and stf.spill which use XFmode, but which
1762 actually need 16 bytes increments. Perhaps we can change them
1763 to use TFmode instead. Or don't use POST_DEC/POST_INC for them.
1764 Currently, there are no other uses of XFmode, so hacking it here
1765 is no problem. */
1766 if (value == 12)
1767 value = 16;
1768
1769 if (GET_CODE (XEXP (x, 0)) == POST_DEC)
1770 value = -value;
1771
1772 fprintf (file, "%d", value);
1773 return;
1774 }
1775
1776 case 'Q':
1777 if (MEM_VOLATILE_P (x))
1778 fputs(".rel", file);
1779 return;
1780
1781 case 'S':
1782 fprintf (file, HOST_WIDE_INT_PRINT_DEC, exact_log2 (INTVAL (x)));
1783 return;
1784
1785 case 'T':
1786 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
1787 {
1788 fprintf (file, "0x%x", INTVAL (x) & 0xffffffff);
1789 return;
1790 }
1791 break;
1792
1793 case 'U':
1794 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
1795 {
1796 char *prefix = "0x";
1797 if (INTVAL (x) & 0x80000000)
1798 {
1799 fprintf (file, "0xffffffff");
1800 prefix = "";
1801 }
1802 fprintf (file, "%s%x", prefix, INTVAL (x) & 0xffffffff);
1803 return;
1804 }
1805 break;
1806
1807 case 'r':
1808 /* If this operand is the constant zero, write it as zero. */
1809 if (GET_CODE (x) == REG)
1810 fputs (reg_names[REGNO (x)], file);
1811 else if (x == CONST0_RTX (GET_MODE (x)))
1812 fputs ("r0", file);
1813 else
1814 output_operand_lossage ("invalid %%r value");
1815 return;
1816
1817 default:
1818 output_operand_lossage ("ia64_print_operand: unknown code");
1819 return;
1820 }
1821
1822 switch (GET_CODE (x))
1823 {
1824 /* This happens for the spill/restore instructions. */
1825 case POST_INC:
1826 x = XEXP (x, 0);
1827 /* ... fall through ... */
1828
1829 case REG:
1830 fputs (reg_names [REGNO (x)], file);
1831 break;
1832
1833 case MEM:
1834 {
1835 rtx addr = XEXP (x, 0);
1836 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1837 addr = XEXP (addr, 0);
1838 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
1839 break;
1840 }
1841
1842 default:
1843 output_addr_const (file, x);
1844 break;
1845 }
1846
1847 return;
1848 }
1849
1850 \f
1851
1852 /* This function returns the register class required for a secondary
1853 register when copying between one of the registers in CLASS, and X,
1854 using MODE. A return value of NO_REGS means that no secondary register
1855 is required. */
1856
1857 enum reg_class
1858 ia64_secondary_reload_class (class, mode, x)
1859 enum reg_class class;
1860 enum machine_mode mode;
1861 rtx x;
1862 {
1863 int regno = -1;
1864
1865 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
1866 regno = true_regnum (x);
1867
1868 /* ??? This is required because of a bad gcse/cse/global interaction.
1869 We end up with two pseudos with overlapping lifetimes both of which are
1870 equiv to the same constant, and both which need to be in BR_REGS. This
1871 results in a BR_REGS to BR_REGS copy which doesn't exist. To reproduce,
1872 return NO_REGS here, and compile divdi3 in libgcc2.c. This seems to be
1873 a cse bug. cse_basic_block_end changes depending on the path length,
1874 which means the qty_first_reg check in make_regs_eqv can give different
1875 answers at different times. */
1876 /* ??? At some point I'll probably need a reload_indi pattern to handle
1877 this. */
1878 if (class == BR_REGS && BR_REGNO_P (regno))
1879 return GR_REGS;
1880
1881 /* This is needed if a pseudo used as a call_operand gets spilled to a
1882 stack slot. */
1883 if (class == BR_REGS && GET_CODE (x) == MEM)
1884 return GR_REGS;
1885
1886 /* This can happen when a paradoxical subreg is an operand to the muldi3
1887 pattern. */
1888 /* ??? This shouldn't be necessary after instruction scheduling is enabled,
1889 because paradoxical subregs are not accepted by register_operand when
1890 INSN_SCHEDULING is defined. Or alternatively, stop the paradoxical subreg
1891 stupidity in the *_operand functions in recog.c. */
1892 if ((class == FR_REGS || class == FR_INT_REGS || class == FR_FP_REGS)
1893 && GET_CODE (x) == MEM
1894 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
1895 || GET_MODE (x) == QImode))
1896 return GR_REGS;
1897
1898 /* This can happen because of the ior/and/etc patterns that accept FP
1899 registers as operands. If the third operand is a constant, then it
1900 needs to be reloaded into a FP register. */
1901 if ((class == FR_REGS || class == FR_INT_REGS || class == FR_FP_REGS)
1902 && GET_CODE (x) == CONST_INT)
1903 return GR_REGS;
1904
1905 /* Moving a integer from an FP register to memory requires a general register
1906 as an intermediary. This is not necessary if we are moving a DImode
1907 subreg of a DFmode value from an FP register to memory, since stfd will
1908 do the right thing in this case. */
1909 if (class == FR_INT_REGS && GET_CODE (x) == MEM && GET_MODE (x) == DImode)
1910 return GR_REGS;
1911
1912 /* ??? This happens if we cse/gcse a CCmode value across a call, and the
1913 function has a nonlocal goto. This is because global does not allocate
1914 call crossing pseudos to hard registers when current_function_has_
1915 nonlocal_goto is true. This is relatively common for C++ programs that
1916 use exceptions. To reproduce, return NO_REGS and compile libstdc++. */
1917 if (class == PR_REGS && GET_CODE (x) == MEM)
1918 return GR_REGS;
1919
1920 return NO_REGS;
1921 }
1922
1923 \f
1924 /* Emit text to declare externally defined variables and functions, because
1925 the Intel assembler does not support undefined externals. */
1926
1927 void
1928 ia64_asm_output_external (file, decl, name)
1929 FILE *file;
1930 tree decl;
1931 char *name;
1932 {
1933 int save_referenced;
1934
1935 /* GNU as does not need anything here. */
1936 if (TARGET_GNU_AS)
1937 return;
1938
1939 /* ??? The Intel assembler creates a reference that needs to be satisfied by
1940 the linker when we do this, so we need to be careful not to do this for
1941 builtin functions which have no library equivalent. Unfortunately, we
1942 can't tell here whether or not a function will actually be called by
1943 expand_expr, so we pull in library functions even if we may not need
1944 them later. */
1945 if (! strcmp (name, "__builtin_next_arg")
1946 || ! strcmp (name, "alloca")
1947 || ! strcmp (name, "__builtin_constant_p")
1948 || ! strcmp (name, "__builtin_args_info"))
1949 return;
1950
1951 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
1952 restore it. */
1953 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
1954 if (TREE_CODE (decl) == FUNCTION_DECL)
1955 {
1956 fprintf (file, "\t%s\t ", TYPE_ASM_OP);
1957 assemble_name (file, name);
1958 putc (',', file);
1959 fprintf (file, TYPE_OPERAND_FMT, "function");
1960 putc ('\n', file);
1961 }
1962 ASM_GLOBALIZE_LABEL (file, name);
1963 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
1964 }
1965 \f
1966 /* Parse the -mfixed-range= option string. */
1967
1968 static void
1969 fix_range (str)
1970 char *str;
1971 {
1972 int i, first, last;
1973 char *dash, *comma;
1974
1975 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
1976 REG2 are either register names or register numbers. The effect
1977 of this option is to mark the registers in the range from REG1 to
1978 REG2 as ``fixed'' so they won't be used by the compiler. This is
1979 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
1980
1981 while (1)
1982 {
1983 dash = strchr (str, '-');
1984 if (!dash)
1985 {
1986 warning ("value of -mfixed-range must have form REG1-REG2");
1987 return;
1988 }
1989 *dash = '\0';
1990
1991 comma = strchr (dash + 1, ',');
1992 if (comma)
1993 *comma = '\0';
1994
1995 first = decode_reg_name (str);
1996 if (first < 0)
1997 {
1998 warning ("unknown register name: %s", str);
1999 return;
2000 }
2001
2002 last = decode_reg_name (dash + 1);
2003 if (last < 0)
2004 {
2005 warning ("unknown register name: %s", dash + 1);
2006 return;
2007 }
2008
2009 *dash = '-';
2010
2011 if (first > last)
2012 {
2013 warning ("%s-%s is an empty range", str, dash + 1);
2014 return;
2015 }
2016
2017 for (i = first; i <= last; ++i)
2018 fixed_regs[i] = call_used_regs[i] = 1;
2019
2020 if (!comma)
2021 break;
2022
2023 *comma = ',';
2024 str = comma + 1;
2025 }
2026 }
2027
2028 /* Called to register all of our global variables with the garbage
2029 collector. */
2030
2031 static void
2032 ia64_add_gc_roots ()
2033 {
2034 ggc_add_rtx_root (&ia64_compare_op0, 1);
2035 ggc_add_rtx_root (&ia64_compare_op1, 1);
2036 }
2037
2038 static void
2039 ia64_init_machine_status (p)
2040 struct function *p;
2041 {
2042 p->machine =
2043 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
2044 }
2045
2046 static void
2047 ia64_mark_machine_status (p)
2048 struct function *p;
2049 {
2050 ggc_mark_rtx (p->machine->ia64_eh_epilogue_sp);
2051 ggc_mark_rtx (p->machine->ia64_eh_epilogue_bsp);
2052 }
2053
2054
2055 /* Handle TARGET_OPTIONS switches. */
2056
2057 void
2058 ia64_override_options ()
2059 {
2060 if (ia64_fixed_range_string)
2061 fix_range (ia64_fixed_range_string);
2062
2063 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
2064
2065 init_machine_status = ia64_init_machine_status;
2066 mark_machine_status = ia64_mark_machine_status;
2067
2068 ia64_add_gc_roots ();
2069 }
2070 \f
2071 /* The following collection of routines emit instruction group stop bits as
2072 necessary to avoid dependencies. */
2073
2074 /* Need to track some additional registers as far as serialization is
2075 concerned so we can properly handle br.call and br.ret. We could
2076 make these registers visible to gcc, but since these registers are
2077 never explicitly used in gcc generated code, it seems wasteful to
2078 do so (plus it would make the call and return patterns needlessly
2079 complex). */
2080 #define REG_GP (GR_REG (1))
2081 #define REG_RP (BR_REG (0))
2082 #define REG_AR_PFS (FIRST_PSEUDO_REGISTER)
2083 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
2084 /* ??? This will eventually need to be a hard register. */
2085 #define REG_AR_EC (FIRST_PSEUDO_REGISTER + 2)
2086 /* This is used for volatile asms which may require a stop bit immediately
2087 before and after them. */
2088 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 3)
2089 #define NUM_REGS (FIRST_PSEUDO_REGISTER + 4)
2090
2091 /* For each register, we keep track of how many times it has been
2092 written in the current instruction group. If a register is written
2093 unconditionally (no qualifying predicate), WRITE_COUNT is set to 2
2094 and FIRST_PRED is ignored. If a register is written if its
2095 qualifying predicate P is true, we set WRITE_COUNT to 1 and
2096 FIRST_PRED to P. Later on, the same register may be written again
2097 by the complement of P (P+1 if P is even, P-1, otherwise) and when
2098 this happens, WRITE_COUNT gets set to 2. The result of this is
2099 that whenever an insn attempts to write a register whose
2100 WRITE_COUNT is two, we need to issue a insn group barrier first. */
2101 struct reg_write_state
2102 {
2103 char write_count;
2104 char written_by_fp; /* Was register written by a floating-point insn? */
2105 short first_pred; /* 0 means ``no predicate'' */
2106 };
2107
2108 /* Cumulative info for the current instruction group. */
2109 struct reg_write_state rws_sum[NUM_REGS];
2110 /* Info for the current instruction. This gets copied to rws_sum after a
2111 stop bit is emitted. */
2112 struct reg_write_state rws_insn[NUM_REGS];
2113
2114 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
2115 RTL for one instruction. */
2116 struct reg_flags
2117 {
2118 unsigned int is_write : 1; /* Is register being written? */
2119 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
2120 unsigned int is_branch : 1; /* Is register used as part of a branch? */
2121 };
2122
2123 /* Update *RWS for REGNO, which is being written by the current instruction,
2124 with predicate PRED, and associated register flags in FLAGS. */
2125
2126 static void
2127 rws_update (rws, regno, flags, pred)
2128 struct reg_write_state *rws;
2129 int regno;
2130 struct reg_flags flags;
2131 int pred;
2132 {
2133 rws[regno].write_count += pred ? 1 : 2;
2134 rws[regno].written_by_fp |= flags.is_fp;
2135 rws[regno].first_pred = pred;
2136 }
2137
2138 /* Handle an access to register REGNO of type FLAGS using predicate register
2139 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
2140 a dependency with an earlier instruction in the same group. */
2141
2142 static int
2143 rws_access_reg (regno, flags, pred)
2144 int regno;
2145 struct reg_flags flags;
2146 int pred;
2147 {
2148 int need_barrier = 0;
2149 int is_predicate_reg;
2150
2151 if (regno >= NUM_REGS)
2152 abort ();
2153
2154 if (flags.is_write)
2155 {
2156 /* One insn writes same reg multiple times? */
2157 if (rws_insn[regno].write_count > 0)
2158 abort ();
2159
2160 /* Update info for current instruction. */
2161 rws_update (rws_insn, regno, flags, pred);
2162
2163 /* ??? This is necessary because predicate regs require two hard
2164 registers. However, this should be using HARD_REGNO_NREGS so that
2165 it works for all multi-reg hard registers, instead of only for
2166 predicate registers. */
2167 is_predicate_reg = REGNO_REG_CLASS (regno) == PR_REGS;
2168 if (is_predicate_reg)
2169 rws_update (rws_insn, regno + 1, flags, pred);
2170
2171 switch (rws_sum[regno].write_count)
2172 {
2173 case 0:
2174 /* The register has not been written yet. */
2175 rws_update (rws_sum, regno, flags, pred);
2176 if (is_predicate_reg)
2177 rws_update (rws_sum, regno + 1, flags, pred);
2178 break;
2179
2180 case 1:
2181 /* The register has been written via a predicate. If this is
2182 not a complementary predicate, then we need a barrier. */
2183 /* ??? This assumes that P and P+1 are always complementary
2184 predicates for P even. */
2185 if ((rws_sum[regno].first_pred ^ 1) != pred)
2186 need_barrier = 1;
2187 rws_update (rws_sum, regno, flags, pred);
2188 if (is_predicate_reg)
2189 rws_update (rws_sum, regno + 1, flags, pred);
2190 break;
2191
2192 case 2:
2193 /* The register has been unconditionally written already. We
2194 need a barrier. */
2195 need_barrier = 1;
2196 break;
2197
2198 default:
2199 abort ();
2200 }
2201 }
2202 else
2203 {
2204 if (flags.is_branch)
2205 {
2206 /* Branches have several RAW exceptions that allow to avoid
2207 barriers. */
2208
2209 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == REG_AR_PFS)
2210 /* RAW dependencies on branch regs are permissible as long
2211 as the writer is a non-branch instruction. Since we
2212 never generate code that uses a branch register written
2213 by a branch instruction, handling this case is
2214 easy. */
2215 /* ??? This assumes that we don't emit br.cloop, br.cexit, br.ctop,
2216 br.wexit, br.wtop. This is true currently. */
2217 return 0;
2218
2219 if (REGNO_REG_CLASS (regno) == PR_REGS
2220 && ! rws_sum[regno].written_by_fp)
2221 /* The predicates of a branch are available within the
2222 same insn group as long as the predicate was written by
2223 something other than a floating-point instruction. */
2224 return 0;
2225 }
2226
2227 switch (rws_sum[regno].write_count)
2228 {
2229 case 0:
2230 /* The register has not been written yet. */
2231 break;
2232
2233 case 1:
2234 /* The register has been written via a predicate. If this is
2235 not a complementary predicate, then we need a barrier. */
2236 /* ??? This assumes that P and P+1 are always complementary
2237 predicates for P even. */
2238 if ((rws_sum[regno].first_pred ^ 1) != pred)
2239 need_barrier = 1;
2240 break;
2241
2242 case 2:
2243 /* The register has been unconditionally written already. We
2244 need a barrier. */
2245 need_barrier = 1;
2246 break;
2247
2248 default:
2249 abort ();
2250 }
2251 }
2252
2253 return need_barrier;
2254 }
2255
2256 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
2257 Return 1 is this access creates a dependency with an earlier instruction
2258 in the same group. */
2259
2260 static int
2261 rtx_needs_barrier (x, flags, pred)
2262 rtx x;
2263 struct reg_flags flags;
2264 int pred;
2265 {
2266 int i, j;
2267 int is_complemented = 0;
2268 int need_barrier = 0;
2269 const char *format_ptr;
2270 struct reg_flags new_flags;
2271 rtx src, dst;
2272 rtx cond = 0;
2273
2274 if (! x)
2275 return 0;
2276
2277 new_flags = flags;
2278
2279 switch (GET_CODE (x))
2280 {
2281 case SET:
2282 src = SET_SRC (x);
2283 switch (GET_CODE (src))
2284 {
2285 case CALL:
2286 /* We don't need to worry about the result registers that
2287 get written by subroutine call. */
2288 need_barrier = rtx_needs_barrier (src, flags, pred);
2289 return need_barrier;
2290
2291 case IF_THEN_ELSE:
2292 if (SET_DEST (x) == pc_rtx)
2293 {
2294 /* X is a conditional branch. */
2295 /* ??? This seems redundant, as the caller sets this bit for
2296 all JUMP_INSNs. */
2297 new_flags.is_branch = 1;
2298 need_barrier = rtx_needs_barrier (src, new_flags, pred);
2299 return need_barrier;
2300 }
2301 else
2302 {
2303 /* X is a conditional move. */
2304 cond = XEXP (src, 0);
2305 if (GET_CODE (cond) == EQ)
2306 is_complemented = 1;
2307 cond = XEXP (cond, 0);
2308 if (GET_CODE (cond) != REG
2309 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
2310 abort ();
2311
2312 if (XEXP (src, 1) == SET_DEST (x)
2313 || XEXP (src, 2) == SET_DEST (x))
2314 {
2315 /* X is a conditional move that conditionally writes the
2316 destination. */
2317
2318 /* We need another complement in this case. */
2319 if (XEXP (src, 1) == SET_DEST (x))
2320 is_complemented = ! is_complemented;
2321
2322 pred = REGNO (cond);
2323 if (is_complemented)
2324 ++pred;
2325 }
2326
2327 /* ??? If this is a conditional write to the dest, then this
2328 instruction does not actually read one source. This probably
2329 doesn't matter, because that source is also the dest. */
2330 /* ??? Multiple writes to predicate registers are allowed
2331 if they are all AND type compares, or if they are all OR
2332 type compares. We do not generate such instructions
2333 currently. */
2334 }
2335 /* ... fall through ... */
2336
2337 default:
2338 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
2339 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
2340 /* Set new_flags.is_fp to 1 so that we know we're dealing
2341 with a floating point comparison when processing the
2342 destination of the SET. */
2343 new_flags.is_fp = 1;
2344 break;
2345 }
2346 need_barrier = rtx_needs_barrier (src, flags, pred);
2347 /* This instruction unconditionally uses a predicate register. */
2348 if (cond)
2349 need_barrier |= rws_access_reg (REGNO (cond), flags, 0);
2350
2351 dst = SET_DEST (x);
2352 if (GET_CODE (dst) == ZERO_EXTRACT)
2353 {
2354 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
2355 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
2356 dst = XEXP (dst, 0);
2357 }
2358 new_flags.is_write = 1;
2359 need_barrier |= rtx_needs_barrier (dst, new_flags, pred);
2360 break;
2361
2362 case CALL:
2363 new_flags.is_write = 0;
2364 /* ??? Why is this here? It seems unnecessary. */
2365 need_barrier |= rws_access_reg (REG_GP, new_flags, pred);
2366 need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
2367
2368 /* Avoid multiple register writes, in case this is a pattern with
2369 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2370 /* ??? This assumes that no rtx other than CALL/RETURN sets REG_AR_CFM,
2371 and that we don't have predicated calls/returns. */
2372 if (! rws_insn[REG_AR_CFM].write_count)
2373 {
2374 new_flags.is_write = 1;
2375 need_barrier |= rws_access_reg (REG_RP, new_flags, pred);
2376 need_barrier |= rws_access_reg (REG_AR_PFS, new_flags, pred);
2377 need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
2378 }
2379 break;
2380
2381 case CLOBBER:
2382 #if 0
2383 case USE:
2384 /* We must handle USE here in case it occurs within a PARALLEL.
2385 For instance, the mov ar.pfs= instruction has a USE which requires
2386 a barrier between it and an immediately preceeding alloc. */
2387 #endif
2388 /* Clobber & use are for earlier compiler-phases only. */
2389 break;
2390
2391 case ASM_OPERANDS:
2392 case ASM_INPUT:
2393 /* We always emit stop bits for traditional asms. We emit stop bits
2394 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
2395 if (GET_CODE (x) != ASM_OPERANDS
2396 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
2397 {
2398 /* Avoid writing the register multiple times if we have multiple
2399 asm outputs. This avoids an abort in rws_access_reg. */
2400 if (! rws_insn[REG_VOLATILE].write_count)
2401 {
2402 new_flags.is_write = 1;
2403 rws_access_reg (REG_VOLATILE, new_flags, pred);
2404 }
2405 return 1;
2406 }
2407
2408 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
2409 We can not just fall through here since then we would be confused
2410 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
2411 traditional asms unlike their normal usage. */
2412
2413 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
2414 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
2415 need_barrier = 1;
2416 break;
2417
2418 case PARALLEL:
2419 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
2420 if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred))
2421 need_barrier = 1;
2422 break;
2423
2424 case SUBREG:
2425 x = SUBREG_REG (x);
2426 /* FALLTHRU */
2427 case REG:
2428 need_barrier = rws_access_reg (REGNO (x), flags, pred);
2429 break;
2430
2431 case MEM:
2432 /* Find the regs used in memory address computation. */
2433 new_flags.is_write = 0;
2434 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
2435 break;
2436
2437 case CONST_INT: case CONST_DOUBLE:
2438 case SYMBOL_REF: case LABEL_REF: case CONST:
2439 break;
2440
2441 /* Operators with side-effects. */
2442 case POST_INC: case POST_DEC:
2443 if (GET_CODE (XEXP (x, 0)) != REG)
2444 abort ();
2445
2446 new_flags.is_write = 0;
2447 need_barrier = rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred);
2448 new_flags.is_write = 1;
2449 need_barrier |= rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred);
2450 break;
2451
2452 /* Handle common unary and binary ops for efficiency. */
2453 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
2454 case MOD: case UDIV: case UMOD: case AND: case IOR:
2455 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
2456 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
2457 case NE: case EQ: case GE: case GT: case LE:
2458 case LT: case GEU: case GTU: case LEU: case LTU:
2459 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
2460 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
2461 break;
2462
2463 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
2464 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
2465 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
2466 case SQRT: case FFS:
2467 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
2468 break;
2469
2470 case UNSPEC:
2471 switch (XINT (x, 1))
2472 {
2473 /* ??? For the st8.spill/ld8.fill instructions, we can ignore unat
2474 dependencies as long as we don't have both a spill and fill in
2475 the same instruction group. We need to check for that. */
2476 case 1: /* st8.spill */
2477 case 2: /* ld8.fill */
2478 case 3: /* stf.spill */
2479 case 4: /* ldf.spill */
2480 case 8: /* popcnt */
2481 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
2482 break;
2483
2484 case 5: /* mov =pr */
2485 /* This reads all predicate registers. */
2486 for (i = PR_REG (1); i < PR_REG (64); i++)
2487 need_barrier |= rws_access_reg (i, flags, pred);
2488 break;
2489
2490 case 6:
2491 case 7:
2492 abort ();
2493
2494 /* ??? Should track unat reads and writes. */
2495 case 9: /* mov =ar.unat */
2496 case 10: /* mov ar.unat= */
2497 break;
2498 case 11: /* mov ar.ccv= */
2499 break;
2500 case 12: /* mf */
2501 break;
2502 case 13: /* cmpxchg_acq */
2503 break;
2504 case 14: /* val_compare_and_swap */
2505 break;
2506 case 15: /* lock_release */
2507 break;
2508 case 16: /* lock_test_and_set */
2509 break;
2510 case 17: /* _and_fetch */
2511 break;
2512 case 18: /* fetch_and_ */
2513 break;
2514 case 19: /* fetchadd_acq */
2515 break;
2516 case 20: /* mov = ar.bsp */
2517 break;
2518
2519 default:
2520 abort ();
2521 }
2522 break;
2523
2524 case UNSPEC_VOLATILE:
2525 switch (XINT (x, 1))
2526 {
2527 case 0: /* alloc */
2528 /* Alloc must always be the first instruction. Currently, we
2529 only emit it at the function start, so we don't need to worry
2530 about emitting a stop bit before it. */
2531 need_barrier = rws_access_reg (REG_AR_PFS, flags, pred);
2532
2533 new_flags.is_write = 1;
2534 need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
2535 return need_barrier;
2536
2537 case 1: /* blockage */
2538 case 2: /* insn group barrier */
2539 return 0;
2540
2541 case 3: /* flush_cache */
2542 return rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
2543
2544 case 4: /* mov ar.pfs= */
2545 new_flags.is_write = 1;
2546 need_barrier = rws_access_reg (REG_AR_PFS, new_flags, pred);
2547 break;
2548
2549 case 6: /* mov pr= */
2550 /* This writes all predicate registers. */
2551 new_flags.is_write = 1;
2552 /* We need to skip by two, because rws_access_reg always writes
2553 to two predicate registers at a time. */
2554 /* ??? Strictly speaking, we shouldn't be counting writes to pr0. */
2555 for (i = PR_REG (0); i < PR_REG (64); i += 2)
2556 need_barrier |= rws_access_reg (i, new_flags, pred);
2557 break;
2558
2559 case 5: /* set_bsp */
2560 need_barrier = 1;
2561 break;
2562
2563 default:
2564 abort ();
2565 }
2566 break;
2567
2568 case RETURN:
2569 new_flags.is_write = 0;
2570 need_barrier = rws_access_reg (REG_RP, flags, pred);
2571 need_barrier |= rws_access_reg (REG_AR_PFS, flags, pred);
2572
2573 new_flags.is_write = 1;
2574 need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
2575 need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
2576 break;
2577
2578 default:
2579 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
2580 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
2581 switch (format_ptr[i])
2582 {
2583 case '0': /* unused field */
2584 case 'i': /* integer */
2585 case 'n': /* note */
2586 case 'w': /* wide integer */
2587 case 's': /* pointer to string */
2588 case 'S': /* optional pointer to string */
2589 break;
2590
2591 case 'e':
2592 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
2593 need_barrier = 1;
2594 break;
2595
2596 case 'E':
2597 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
2598 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
2599 need_barrier = 1;
2600 break;
2601
2602 default:
2603 abort ();
2604 }
2605 }
2606 return need_barrier;
2607 }
2608
2609 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
2610 as necessary to eliminate dependendencies. */
2611
2612 static void
2613 emit_insn_group_barriers (insns)
2614 rtx insns;
2615 {
2616 int need_barrier = 0;
2617 int exception_nesting;
2618 struct reg_flags flags;
2619 rtx insn, prev_insn;
2620
2621 memset (rws_sum, 0, sizeof (rws_sum));
2622
2623 prev_insn = 0;
2624 for (insn = insns; insn; insn = NEXT_INSN (insn))
2625 {
2626 memset (&flags, 0, sizeof (flags));
2627 switch (GET_CODE (insn))
2628 {
2629 case NOTE:
2630 switch (NOTE_LINE_NUMBER (insn))
2631 {
2632 case NOTE_INSN_EH_REGION_BEG:
2633 exception_nesting++;
2634 break;
2635
2636 case NOTE_INSN_EH_REGION_END:
2637 exception_nesting--;
2638 break;
2639
2640 case NOTE_INSN_EPILOGUE_BEG:
2641 break;
2642
2643 default:
2644 break;
2645 }
2646 break;
2647
2648 case JUMP_INSN:
2649 case CALL_INSN:
2650 flags.is_branch = 1;
2651 case INSN:
2652 if (GET_CODE (PATTERN (insn)) == USE)
2653 /* Don't care about USE "insns"---those are used to
2654 indicate to the optimizer that it shouldn't get rid of
2655 certain operations. */
2656 break;
2657 else
2658 {
2659 memset (rws_insn, 0, sizeof (rws_insn));
2660 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
2661
2662 /* Check to see if the previous instruction was a volatile
2663 asm. */
2664 if (! need_barrier)
2665 need_barrier = rws_access_reg (REG_VOLATILE, flags, 0);
2666
2667 if (need_barrier)
2668 {
2669 /* PREV_INSN null can happen if the very first insn is a
2670 volatile asm. */
2671 if (prev_insn)
2672 emit_insn_after (gen_insn_group_barrier (), prev_insn);
2673 memcpy (rws_sum, rws_insn, sizeof (rws_sum));
2674 }
2675 need_barrier = 0;
2676 prev_insn = insn;
2677 }
2678 break;
2679
2680 case BARRIER:
2681 /* A barrier doesn't imply an instruction group boundary. */
2682 break;
2683
2684 case CODE_LABEL:
2685 /* Leave prev_insn alone so the barrier gets generated in front
2686 of the label, if one is needed. */
2687 break;
2688
2689 default:
2690 abort ();
2691 }
2692 }
2693 }
2694
2695 /* Perform machine dependent operations on the rtl chain INSNS. */
2696
2697 void
2698 ia64_reorg (insns)
2699 rtx insns;
2700 {
2701 emit_insn_group_barriers (insns);
2702 }
2703 \f
2704 /* Return true if REGNO is used by the epilogue. */
2705
2706 int
2707 ia64_epilogue_uses (regno)
2708 int regno;
2709 {
2710 /* For functions defined with the syscall_linkage attribute, all input
2711 registers are marked as live at all function exits. This prevents the
2712 register allocator from using the input registers, which in turn makes it
2713 possible to restart a system call after an interrupt without having to
2714 save/restore the input registers. */
2715
2716 if (IN_REGNO_P (regno)
2717 && (regno < IN_REG (current_function_args_info.words))
2718 && lookup_attribute ("syscall_linkage",
2719 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2720 return 1;
2721
2722 return 0;
2723 }
2724
2725 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
2726
2727 int
2728 ia64_valid_type_attribute (type, attributes, identifier, args)
2729 tree type;
2730 tree attributes ATTRIBUTE_UNUSED;
2731 tree identifier;
2732 tree args;
2733 {
2734 /* We only support an attribute for function calls. */
2735
2736 if (TREE_CODE (type) != FUNCTION_TYPE
2737 && TREE_CODE (type) != METHOD_TYPE)
2738 return 0;
2739
2740 /* The "syscall_linkage" attribute says the callee is a system call entry
2741 point. This affects ia64_epilogue_uses. */
2742
2743 if (is_attribute_p ("syscall_linkage", identifier))
2744 return args == NULL_TREE;
2745
2746 return 0;
2747 }
2748 \f
2749 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
2750
2751 We add @ to the name if this goes in small data/bss. We can only put
2752 a variable in small data/bss if it is defined in this module or a module
2753 that we are statically linked with. We can't check the second condition,
2754 but TREE_STATIC gives us the first one. */
2755
2756 /* ??? If we had IPA, we could check the second condition. We could support
2757 programmer added section attributes if the variable is not defined in this
2758 module. */
2759
2760 /* ??? See the v850 port for a cleaner way to do this. */
2761
2762 /* ??? We could also support own long data here. Generating movl/add/ld8
2763 instead of addl,ld8/ld8. This makes the code bigger, but should make the
2764 code faster because there is one less load. This also includes incomplete
2765 types which can't go in sdata/sbss. */
2766
2767 /* ??? See select_section. We must put short own readonly variables in
2768 sdata/sbss instead of the more natural rodata, because we can't perform
2769 the DECL_READONLY_SECTION test here. */
2770
2771 extern struct obstack * saveable_obstack;
2772
2773 void
2774 ia64_encode_section_info (decl)
2775 tree decl;
2776 {
2777 if (TREE_CODE (decl) == FUNCTION_DECL)
2778 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
2779 /* We assume that -fpic is used only to create a shared library (dso).
2780 With -fpic, no global data can ever be sdata.
2781 Without -fpic, global common uninitialized data can never be sdata, since
2782 it can unify with a real definition in a dso. */
2783 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
2784 to access them. The linker may then be able to do linker relaxation to
2785 optimize references to them. Currently sdata implies use of gprel. */
2786 else if (! TARGET_NO_SDATA
2787 && TREE_CODE (decl) == VAR_DECL
2788 && TREE_STATIC (decl)
2789 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
2790 && ! (TREE_PUBLIC (decl)
2791 && (flag_pic
2792 || (DECL_COMMON (decl)
2793 && (DECL_INITIAL (decl) == 0
2794 || DECL_INITIAL (decl) == error_mark_node))))
2795 /* Either the variable must be declared without a section attribute,
2796 or the section must be sdata or sbss. */
2797 && (DECL_SECTION_NAME (decl) == 0
2798 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
2799 ".sdata")
2800 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
2801 ".sbss")))
2802 {
2803 int size = int_size_in_bytes (TREE_TYPE (decl));
2804 char *str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2805 int reloc;
2806
2807 /* ??? We should redeclare CTOR_LIST, DTOR_END so that we don't have to
2808 special case them here. Currently we put them in ctor/dtors sections
2809 behind the compiler's back. We should use section attributes
2810 instead. */
2811 if (! strcmp (str, "__CTOR_LIST__")
2812 || ! strcmp (str, "__DTOR_END__"))
2813 ;
2814
2815 /* If this is an incomplete type with size 0, then we can't put it in
2816 sdata because it might be too big when completed. */
2817 else if (size > 0 && size <= ia64_section_threshold
2818 && str[0] != SDATA_NAME_FLAG_CHAR)
2819 {
2820 int len = strlen (str);
2821 char *newstr = obstack_alloc (saveable_obstack, len + 2);
2822
2823 strcpy (newstr + 1, str);
2824 *newstr = SDATA_NAME_FLAG_CHAR;
2825 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
2826 }
2827 }
2828 /* This decl is marked as being in small data/bss but it shouldn't
2829 be; one likely explanation for this is that the decl has been
2830 moved into a different section from the one it was in when
2831 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
2832 else if (TREE_CODE (decl) == VAR_DECL
2833 && (XSTR (XEXP (DECL_RTL (decl), 0), 0)[0]
2834 == SDATA_NAME_FLAG_CHAR))
2835 {
2836 char *str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2837 int len = strlen (str);
2838 char *newstr = obstack_alloc (saveable_obstack, len);
2839
2840 strcpy (newstr, str + 1);
2841 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
2842 }
2843 }
2844 \f
2845 /* Output assmebly directives for prologue regions. */
2846
2847 static int spill_offset;
2848 static int sp_offset;
2849 static int spill_offset_emitted = 1;
2850 static rtx tmp_reg = NULL_RTX;
2851 static int tmp_saved = -1;
2852
2853
2854 /* This function processes a SET pattern looking for specific patterns
2855 which result in emitting an assembly directive required for unwinding. */
2856 static int
2857 process_set (asm_out_file, pat)
2858 FILE *asm_out_file;
2859 rtx pat;
2860 {
2861 rtx src = SET_SRC (pat);
2862 rtx dest = SET_DEST (pat);
2863 static rtx frame_reg = NULL_RTX;
2864 static int frame_size = 0;
2865
2866 /* Look for the ALLOC insn. reg = alloc .... */
2867 if (GET_CODE (src) == UNSPEC_VOLATILE && XINT (src, 1) == 0
2868 && GET_CODE (dest) == REG && GR_REGNO_P (REGNO (dest)))
2869 {
2870 /* Assume this is a stack allocate insn. */
2871 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
2872 REGNO (dest) + ia64_input_regs);
2873 return 1;
2874 }
2875
2876 /* look for SP = .... */
2877 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
2878 {
2879 if (GET_CODE (src) == PLUS)
2880 {
2881 rtx op0 = XEXP (src, 0);
2882 rtx op1 = XEXP (src, 1);
2883 if (op0 == dest && GET_CODE (op1) == CONST_INT)
2884 {
2885 fprintf (asm_out_file, "\t.fframe %d\n", -INTVAL (op1));
2886 frame_size = INTVAL (op1);
2887 return 1;
2888 }
2889 else
2890 if (op0 == dest && GET_CODE (op1) == REG)
2891 {
2892 fprintf (asm_out_file, "\t.vframe r%d\n", REGNO (op1));
2893 frame_size = 0;
2894 return 1;
2895 }
2896 }
2897 }
2898 /* Look for a frame offset. */
2899 if (GET_CODE (dest) == REG)
2900 {
2901 if (GET_CODE (src) == PLUS)
2902 {
2903 rtx op0 = XEXP (src, 0);
2904 rtx op1 = XEXP (src, 1);
2905 if (GET_CODE (op0) == REG && REGNO (op0) == FRAME_POINTER_REGNUM
2906 && GET_CODE (op1) == CONST_INT)
2907 {
2908 sp_offset = -frame_size + INTVAL (op1);
2909 spill_offset = INTVAL (op1);
2910 spill_offset_emitted = 0;
2911 frame_reg = dest;
2912 /* We delay issuing the spill offset since we might
2913 be saving non-spill things off this register,
2914 thus adjusting its offset before a spill is seen. */
2915 return 1;
2916 }
2917 }
2918 }
2919
2920 /* Register move we need to look at. */
2921 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
2922 {
2923 int regno = REGNO (src);
2924 if (BR_REGNO_P (regno))
2925 {
2926 /* Saving return address pointer. */
2927 if (regno == BR_REG (0))
2928 {
2929 fprintf (asm_out_file, "\t.save rp, r%d\n",
2930 REGNO (dest) + ia64_input_regs);
2931 return 1;
2932 }
2933 /* If its br1 to br5, we copy them to temp regs, then save the
2934 temp reg to memory next. */
2935 if (regno >= BR_REG (1) && regno <= BR_REG (5))
2936 {
2937 tmp_reg = dest;
2938 tmp_saved = regno;
2939 return 1;
2940 }
2941 }
2942 }
2943 /* Search for special reg moves. */
2944 if (GET_CODE (dest) == REG && GET_CODE (src) == UNSPEC)
2945 {
2946 int unspec_code = XINT (src, 1);
2947 /* Copied to a temp register, save it until we see the temp
2948 register stored. */
2949 if (unspec_code == 5 || unspec_code == 9)
2950 {
2951 tmp_reg = dest;
2952 tmp_saved = unspec_code;
2953 return 1;
2954 }
2955 }
2956 if (GET_CODE (dest) == MEM && GET_CODE (XEXP (dest, 0)) == POST_INC
2957 && GET_CODE (XEXP (XEXP (dest, 0), 0)) == REG)
2958 {
2959 int spill_unspec = 0;
2960 /* We adjust the spill_offset early, so we dont miss it later. */
2961 spill_offset += 8;
2962 sp_offset += 8;
2963 if (GET_CODE (src) == UNSPEC)
2964 {
2965 spill_unspec = XINT (src, 1);
2966 /* 1 and 3 are unspecs for the GR and FR spills. */
2967 if (spill_unspec != 1 && spill_unspec != 3)
2968 spill_unspec = 0;
2969 }
2970 /* ST8 or st8.spill insn. */
2971 if ((GET_CODE (src) == REG) || spill_unspec != 0)
2972 {
2973 int regno;
2974 if (spill_unspec != 0)
2975 {
2976 regno = REGNO (XVECEXP (src, 0, 0));
2977 if (!spill_offset_emitted)
2978 {
2979 fprintf (asm_out_file, "\t.spill %d\n",
2980 /* (frame_size + 16 - spill_offset ) / 4); */
2981 (-(spill_offset - 8) + 16) / 4);
2982 spill_offset_emitted = 1;
2983 }
2984 }
2985 else
2986 regno = REGNO (src);
2987
2988 if (GR_REGNO_P (regno))
2989 {
2990 if (regno >= GR_REG (4) && regno <= GR_REG (7))
2991 fprintf (asm_out_file, "\t.save.g 0x%x\n",
2992 1 << (regno - GR_REG (4)));
2993 else if (tmp_reg != NULL_RTX && regno == REGNO (tmp_reg))
2994 {
2995 /* We saved a special reg to a temp reg, and now we're
2996 dumping it to memory. */
2997 tmp_reg = NULL_RTX;
2998 /* register 9 is ar.unat. */
2999 if (tmp_saved == 9)
3000 fprintf (asm_out_file, "\t.savesp ar.unat, %d\n",
3001 (sp_offset - 8) / 4);
3002 else if (tmp_saved == 5)
3003 fprintf (asm_out_file, "\t.savesp pr, %d\n",
3004 (sp_offset - 8) / 4);
3005 else if (tmp_saved >= BR_REG (1) && tmp_saved <= BR_REG (5))
3006 {
3007 /* BR regs are saved this way too. */
3008 fprintf (asm_out_file, "\t.save.b 0x%x\n",
3009 1 << (tmp_saved - BR_REG (1)));
3010 }
3011 }
3012 else
3013 return 0;
3014 }
3015 if (FR_REGNO_P (regno))
3016 {
3017 if (regno >= FR_REG (2) && regno <= FR_REG (5))
3018 fprintf (asm_out_file, "\t.save.f 0x%x\n",
3019 1 << (regno - FR_REG (2)));
3020 else
3021 if (regno >= FR_REG (16) && regno <= FR_REG (31))
3022 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
3023 1 << (regno - FR_REG (12)));
3024 else
3025 return 0;
3026 }
3027 return 1;
3028 }
3029 }
3030 return 0;
3031 }
3032
3033
3034 /* This function looks at a single insn and emits any directives
3035 required to unwind this insn. */
3036 void
3037 process_for_unwind_directive (asm_out_file, insn)
3038 FILE *asm_out_file;
3039 rtx insn;
3040 {
3041 if ((flag_unwind_tables
3042 || (flag_exceptions && !exceptions_via_longjmp))
3043 && RTX_FRAME_RELATED_P (insn))
3044 {
3045 rtx code, pat;
3046 pat = PATTERN (insn);
3047
3048 switch (GET_CODE (pat))
3049 {
3050 case SET:
3051 {
3052 process_set (asm_out_file, pat);
3053 break;
3054 }
3055 case PARALLEL:
3056 {
3057 int par_index;
3058 int limit = XVECLEN (pat, 0);
3059 for (par_index = 0; par_index < limit; par_index++)
3060 {
3061 rtx x = XVECEXP (pat, 0, par_index);
3062 if (GET_CODE (x) == SET)
3063 process_set (asm_out_file, x);
3064 }
3065 break;
3066 }
3067 }
3068 }
3069 }
3070
3071 #define def_builtin(name, type, code) \
3072 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
3073
3074 struct builtin_description
3075 {
3076 enum insn_code icode;
3077 const char *name;
3078 enum ia64_builtins code;
3079 enum rtx_code comparison;
3080 unsigned int flag;
3081 };
3082
3083 /* All 32 bit intrinsics that take 2 arguments. */
3084 static struct builtin_description bdesc_2argsi[] =
3085 {
3086 { CODE_FOR_fetch_and_add_si, "__sync_fetch_and_add_si", IA64_BUILTIN_FETCH_AND_ADD_SI, 0, 0 },
3087 { CODE_FOR_fetch_and_sub_si, "__sync_fetch_and_sub_si", IA64_BUILTIN_FETCH_AND_SUB_SI, 0, 0 },
3088 { CODE_FOR_fetch_and_or_si, "__sync_fetch_and_or_si", IA64_BUILTIN_FETCH_AND_OR_SI, 0, 0 },
3089 { CODE_FOR_fetch_and_and_si, "__sync_fetch_and_and_si", IA64_BUILTIN_FETCH_AND_AND_SI, 0, 0 },
3090 { CODE_FOR_fetch_and_xor_si, "__sync_fetch_and_xor_si", IA64_BUILTIN_FETCH_AND_XOR_SI, 0, 0 },
3091 { CODE_FOR_fetch_and_nand_si, "__sync_fetch_and_nand_si", IA64_BUILTIN_FETCH_AND_NAND_SI, 0, 0 },
3092 { CODE_FOR_add_and_fetch_si, "__sync_add_and_fetch_si", IA64_BUILTIN_ADD_AND_FETCH_SI, 0, 0 },
3093 { CODE_FOR_sub_and_fetch_si, "__sync_sub_and_fetch_si", IA64_BUILTIN_SUB_AND_FETCH_SI, 0, 0 },
3094 { CODE_FOR_or_and_fetch_si, "__sync_or_and_fetch_si", IA64_BUILTIN_OR_AND_FETCH_SI, 0, 0 },
3095 { CODE_FOR_and_and_fetch_si, "__sync_and_and_fetch_si", IA64_BUILTIN_AND_AND_FETCH_SI, 0, 0 },
3096 { CODE_FOR_xor_and_fetch_si, "__sync_xor_and_fetch_si", IA64_BUILTIN_XOR_AND_FETCH_SI, 0, 0 },
3097 { CODE_FOR_nand_and_fetch_si, "__sync_nand_and_fetch_si", IA64_BUILTIN_NAND_AND_FETCH_SI, 0, 0 }
3098 };
3099
3100 /* All 64 bit intrinsics that take 2 arguments. */
3101 static struct builtin_description bdesc_2argdi[] =
3102 {
3103 { CODE_FOR_fetch_and_add_di, "__sync_fetch_and_add_di", IA64_BUILTIN_FETCH_AND_ADD_DI, 0, 0 },
3104 { CODE_FOR_fetch_and_sub_di, "__sync_fetch_and_sub_di", IA64_BUILTIN_FETCH_AND_SUB_DI, 0, 0 },
3105 { CODE_FOR_fetch_and_or_di, "__sync_fetch_and_or_di", IA64_BUILTIN_FETCH_AND_OR_DI, 0, 0 },
3106 { CODE_FOR_fetch_and_and_di, "__sync_fetch_and_and_di", IA64_BUILTIN_FETCH_AND_AND_DI, 0, 0 },
3107 { CODE_FOR_fetch_and_xor_di, "__sync_fetch_and_xor_di", IA64_BUILTIN_FETCH_AND_XOR_DI, 0, 0 },
3108 { CODE_FOR_fetch_and_nand_di, "__sync_fetch_and_nand_di", IA64_BUILTIN_FETCH_AND_NAND_DI, 0, 0 },
3109 { CODE_FOR_add_and_fetch_di, "__sync_add_and_fetch_di", IA64_BUILTIN_ADD_AND_FETCH_DI, 0, 0 },
3110 { CODE_FOR_sub_and_fetch_di, "__sync_sub_and_fetch_di", IA64_BUILTIN_SUB_AND_FETCH_DI, 0, 0 },
3111 { CODE_FOR_or_and_fetch_di, "__sync_or_and_fetch_di", IA64_BUILTIN_OR_AND_FETCH_DI, 0, 0 },
3112 { CODE_FOR_and_and_fetch_di, "__sync_and_and_fetch_di", IA64_BUILTIN_AND_AND_FETCH_DI, 0, 0 },
3113 { CODE_FOR_xor_and_fetch_di, "__sync_xor_and_fetch_di", IA64_BUILTIN_XOR_AND_FETCH_DI, 0, 0 },
3114 { CODE_FOR_nand_and_fetch_di, "__sync_nand_and_fetch_di", IA64_BUILTIN_NAND_AND_FETCH_DI, 0, 0 }
3115 };
3116
3117 void
3118 ia64_init_builtins ()
3119 {
3120 int i;
3121 struct builtin_description *d;
3122
3123 tree psi_type_node = build_pointer_type (integer_type_node);
3124 tree pdi_type_node = build_pointer_type (long_integer_type_node);
3125 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
3126
3127
3128 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
3129 tree si_ftype_psi_si_si
3130 = build_function_type (integer_type_node,
3131 tree_cons (NULL_TREE, psi_type_node,
3132 tree_cons (NULL_TREE, integer_type_node,
3133 tree_cons (NULL_TREE, integer_type_node,
3134 endlink))));
3135
3136 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
3137 tree di_ftype_pdi_di_di
3138 = build_function_type (long_integer_type_node,
3139 tree_cons (NULL_TREE, pdi_type_node,
3140 tree_cons (NULL_TREE, long_integer_type_node,
3141 tree_cons (NULL_TREE, long_integer_type_node,
3142 endlink))));
3143 /* __sync_synchronize */
3144 tree void_ftype_void
3145 = build_function_type (void_type_node, endlink);
3146
3147 /* __sync_lock_test_and_set_si */
3148 tree si_ftype_psi_si
3149 = build_function_type (integer_type_node,
3150 tree_cons (NULL_TREE, psi_type_node,
3151 tree_cons (NULL_TREE, integer_type_node, endlink)));
3152
3153 /* __sync_lock_test_and_set_di */
3154 tree di_ftype_pdi_di
3155 = build_function_type (long_integer_type_node,
3156 tree_cons (NULL_TREE, pdi_type_node,
3157 tree_cons (NULL_TREE, long_integer_type_node, endlink)));
3158
3159 /* __sync_lock_release_si */
3160 tree void_ftype_psi
3161 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node, endlink));
3162
3163 /* __sync_lock_release_di */
3164 tree void_ftype_pdi
3165 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node, endlink));
3166
3167 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3168
3169 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3170
3171 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
3172
3173 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
3174
3175 def_builtin ("__sync_synchronize", void_ftype_void, IA64_BUILTIN_SYNCHRONIZE);
3176
3177 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3178
3179 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3180
3181 def_builtin ("__sync_lock_release_si", void_ftype_psi, IA64_BUILTIN_LOCK_RELEASE_SI);
3182
3183 def_builtin ("__sync_lock_release_di", void_ftype_pdi, IA64_BUILTIN_LOCK_RELEASE_DI);
3184
3185 /* Add all builtins that are operations on two args. */
3186 for (i=0, d = bdesc_2argsi; i < sizeof(bdesc_2argsi) / sizeof *d; i++, d++)
3187 def_builtin (d->name, si_ftype_psi_si, d->code);
3188 for (i=0, d = bdesc_2argdi; i < sizeof(bdesc_2argdi) / sizeof *d; i++, d++)
3189 def_builtin (d->name, di_ftype_pdi_di, d->code);
3190 }
3191
3192 /* Expand fetch_and_op intrinsics. The basic code sequence is:
3193
3194 mf
3195 ldsz return = [ptr];
3196 tmp = return;
3197 do {
3198 oldval = tmp;
3199 ar.ccv = tmp;
3200 tmp <op>= value;
3201 cmpxchgsz.acq tmp = [ptr], tmp
3202 cmpxchgsz.acq tmp = [ptr], tmp
3203 } while (tmp != oldval)
3204 */
3205 void
3206 ia64_expand_fetch_and_op (code, mode, operands)
3207 enum fetchop_code code;
3208 enum machine_mode mode;
3209 rtx operands[];
3210 {
3211 rtx oldval, newlabel;
3212 rtx tmp_reg = gen_rtx_REG (mode, GR_REG(0));
3213 rtx mfreg = gen_rtx_MEM (BLKmode, tmp_reg);
3214 RTX_UNCHANGING_P (mfreg) = 1;
3215 emit_insn (gen_mf (mfreg));
3216 tmp_reg = gen_reg_rtx (mode);
3217 oldval = gen_reg_rtx (mode);
3218
3219 if (mode == SImode)
3220 {
3221 emit_insn (gen_movsi (operands[0], operands[1]));
3222 emit_insn (gen_movsi (tmp_reg, operands[0]));
3223 }
3224 else
3225 {
3226 emit_insn (gen_movdi (operands[0], operands[1]));
3227 emit_insn (gen_movdi (tmp_reg, operands[0]));
3228 }
3229
3230 newlabel = gen_label_rtx ();
3231 emit_label (newlabel);
3232 if (mode == SImode)
3233 {
3234 emit_insn (gen_movsi (oldval, tmp_reg));
3235 emit_insn (gen_ccv_restore_si (tmp_reg));
3236 }
3237 else
3238 {
3239 emit_insn (gen_movdi (oldval, tmp_reg));
3240 emit_insn (gen_ccv_restore_di (tmp_reg));
3241 }
3242
3243 /* Perform the specific operation. */
3244 switch (code)
3245 {
3246 case IA64_ADD_OP:
3247 {
3248 rtx reg;
3249 if (GET_CODE (operands[2]) == CONST_INT)
3250 reg = gen_reg_rtx (mode);
3251 else
3252 reg = operands[2];
3253 if (mode == SImode)
3254 {
3255 if (reg != operands[2])
3256 emit_insn (gen_movsi (reg, operands[2]));
3257 emit_insn (gen_addsi3 (tmp_reg, tmp_reg, reg));
3258 }
3259 else
3260 {
3261 if (reg != operands[2])
3262 emit_insn (gen_movdi (reg, operands[2]));
3263 emit_insn (gen_adddi3 (tmp_reg, tmp_reg, reg));
3264 }
3265 break;
3266 }
3267
3268 case IA64_SUB_OP:
3269 if (mode == SImode)
3270 emit_insn (gen_subsi3 (tmp_reg, tmp_reg, operands[2]));
3271 else
3272 emit_insn (gen_subdi3 (tmp_reg, tmp_reg, operands[2]));
3273 break;
3274
3275 case IA64_OR_OP:
3276 emit_insn (gen_iordi3 (tmp_reg, tmp_reg, operands[2]));
3277 break;
3278
3279 case IA64_AND_OP:
3280 emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2]));
3281 break;
3282
3283 case IA64_XOR_OP:
3284 emit_insn (gen_xordi3 (tmp_reg, tmp_reg, operands[2]));
3285 break;
3286
3287 case IA64_NAND_OP:
3288 emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2]));
3289 if (mode == SImode)
3290 emit_insn (gen_one_cmplsi2 (tmp_reg, operands[0]));
3291 else
3292 emit_insn (gen_one_cmpldi2 (tmp_reg, operands[0]));
3293 break;
3294
3295 default:
3296 break;
3297 }
3298
3299 if (mode == SImode)
3300 emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], tmp_reg));
3301 else
3302 emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], tmp_reg));
3303
3304 emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel);
3305 }
3306
3307 /* Expand op_and_fetch intrinsics. The basic code sequence is:
3308
3309 mf
3310 ldsz return = [ptr];
3311 do {
3312 oldval = tmp;
3313 ar.ccv = tmp;
3314 return = tmp + value;
3315 cmpxchgsz.acq tmp = [ptr], return
3316 } while (tmp != oldval)
3317 */
3318 void
3319 ia64_expand_op_and_fetch (code, mode, operands)
3320 enum fetchop_code code;
3321 enum machine_mode mode;
3322 rtx operands[];
3323 {
3324 rtx oldval, newlabel;
3325 rtx tmp_reg, tmp2_reg = gen_rtx_REG (mode, GR_REG(0));
3326 rtx mfreg = gen_rtx_MEM (BLKmode, tmp2_reg);
3327 RTX_UNCHANGING_P (mfreg) = 1;
3328
3329 emit_insn (gen_mf (mfreg));
3330 tmp_reg = gen_reg_rtx (mode);
3331 if (mode == SImode)
3332 emit_insn (gen_movsi (tmp_reg, operands[1]));
3333 else
3334 emit_insn (gen_movdi (tmp_reg, operands[1]));
3335
3336 newlabel = gen_label_rtx ();
3337 emit_label (newlabel);
3338 oldval = gen_reg_rtx (mode);
3339 if (mode == SImode)
3340 {
3341 emit_insn (gen_movsi (oldval, tmp_reg));
3342 emit_insn (gen_ccv_restore_si (tmp_reg));
3343 }
3344 else
3345 {
3346 emit_insn (gen_movdi (oldval, tmp_reg));
3347 emit_insn (gen_ccv_restore_di (tmp_reg));
3348 }
3349
3350 /* Perform the specific operation. */
3351 switch (code)
3352 {
3353 case IA64_ADD_OP:
3354 if (mode == SImode)
3355 emit_insn (gen_addsi3 (operands[0], tmp_reg, operands[2]));
3356 else
3357 emit_insn (gen_adddi3 (operands[0], tmp_reg, operands[2]));
3358 break;
3359
3360 case IA64_SUB_OP:
3361 if (mode == SImode)
3362 emit_insn (gen_subsi3 (operands[0], tmp_reg, operands[2]));
3363 else
3364 emit_insn (gen_subdi3 (operands[0], tmp_reg, operands[2]));
3365 break;
3366
3367 case IA64_OR_OP:
3368 emit_insn (gen_iordi3 (operands[0], tmp_reg, operands[2]));
3369 break;
3370
3371 case IA64_AND_OP:
3372 emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2]));
3373 break;
3374
3375 case IA64_XOR_OP:
3376 emit_insn (gen_xordi3 (operands[0], tmp_reg, operands[2]));
3377 break;
3378
3379 case IA64_NAND_OP:
3380 emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2]));
3381 if (mode == SImode)
3382 emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
3383 else
3384 emit_insn (gen_one_cmpldi2 (operands[0], operands[0]));
3385 break;
3386
3387 default:
3388 break;
3389 }
3390
3391 if (mode == SImode)
3392 emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], operands[0]));
3393 else
3394 emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], operands[0]));
3395
3396 emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel);
3397 }
3398
3399 /* Expand val_ and bool_compare_and_swap. For val_ we want:
3400
3401 ar.ccv = oldval
3402 mf
3403 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
3404 return ret
3405
3406 For bool_ it's the same except return ret == oldval.
3407 */
3408 static rtx
3409 ia64_expand_compare_and_swap (icode, arglist, target, boolcode)
3410 enum insn_code icode;
3411 tree arglist;
3412 rtx target;
3413 int boolcode;
3414 {
3415 tree arg0, arg1, arg2;
3416 rtx newlabel, newlabel2, op0, op1, op2, pat;
3417 enum machine_mode tmode, mode0, mode1, mode2;
3418
3419 arg0 = TREE_VALUE (arglist);
3420 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
3421 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
3422 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3423 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
3424 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
3425 tmode = insn_data[icode].operand[0].mode;
3426 mode0 = insn_data[icode].operand[1].mode;
3427 mode1 = insn_data[icode].operand[2].mode;
3428 mode2 = insn_data[icode].operand[3].mode;
3429
3430 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
3431 RTX_UNCHANGING_P (op0) = 1;
3432 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3433 op1 = copy_to_mode_reg (mode1, op1);
3434 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
3435 op2 = copy_to_mode_reg (mode2, op2);
3436 if (target == 0
3437 || GET_MODE (target) != tmode
3438 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3439 target = gen_reg_rtx (tmode);
3440
3441 pat = GEN_FCN (icode) (target, op0, op1, op2);
3442 if (! pat)
3443 return 0;
3444 emit_insn (pat);
3445 if (boolcode)
3446 {
3447 if (tmode == SImode)
3448 {
3449 emit_insn (gen_cmpsi (target, op1));
3450 emit_insn (gen_seq (gen_lowpart (DImode, target)));
3451 }
3452 else
3453 {
3454 emit_insn (gen_cmpdi (target, op1));
3455 emit_insn (gen_seq (target));
3456 }
3457 }
3458 return target;
3459 }
3460
3461 /* Expand all intrinsics that take 2 arguments. */
3462 static rtx
3463 ia64_expand_binop_builtin (icode, arglist, target)
3464 enum insn_code icode;
3465 tree arglist;
3466 rtx target;
3467 {
3468 rtx pat;
3469 tree arg0 = TREE_VALUE (arglist);
3470 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
3471 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3472 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
3473 enum machine_mode tmode = insn_data[icode].operand[0].mode;
3474 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
3475 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
3476
3477 if (! target
3478 || GET_MODE (target) != tmode
3479 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3480 target = gen_reg_rtx (tmode);
3481
3482 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
3483 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3484 op1 = copy_to_mode_reg (mode1, op1);
3485
3486 pat = GEN_FCN (icode) (target, op0, op1);
3487 if (! pat)
3488 return 0;
3489 emit_insn (pat);
3490 return target;
3491 }
3492
3493 rtx
3494 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
3495 tree exp;
3496 rtx target;
3497 rtx subtarget;
3498 enum machine_mode mode;
3499 int ignore;
3500 {
3501 rtx op0, op1, op2, op3, pat;
3502 rtx tmp_reg;
3503 rtx newlabel, newlabel2;
3504 tree arg0, arg1, arg2, arg3;
3505 tree arglist = TREE_OPERAND (exp, 1);
3506 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
3507 int fcode = DECL_FUNCTION_CODE (fndecl);
3508 enum machine_mode tmode, mode0, mode1, mode2, mode3;
3509 enum insn_code icode;
3510 int boolcode = 0;
3511 int i;
3512 struct builtin_description *d;
3513
3514 switch (fcode)
3515 {
3516 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
3517 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 1);
3518 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
3519 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 0);
3520 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
3521 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 1);
3522 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
3523 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 0);
3524 case IA64_BUILTIN_SYNCHRONIZE:
3525 /* Pass a volatile memory operand. */
3526 tmp_reg = gen_rtx_REG (DImode, GR_REG(0));
3527 target = gen_rtx_MEM (BLKmode, tmp_reg);
3528 emit_insn (gen_mf (target));
3529 return 0;
3530
3531 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
3532 icode = CODE_FOR_lock_test_and_set_si;
3533 arg0 = TREE_VALUE (arglist);
3534 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
3535 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3536 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
3537 tmode = insn_data[icode].operand[0].mode;
3538 mode0 = insn_data[icode].operand[1].mode;
3539 mode1 = insn_data[icode].operand[2].mode;
3540 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
3541 RTX_UNCHANGING_P (op0) = 1;
3542 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3543 op1 = copy_to_mode_reg (mode1, op1);
3544 if (target == 0
3545 || GET_MODE (target) != tmode
3546 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3547 target = gen_reg_rtx (tmode);
3548 pat = GEN_FCN (icode) (target, op0, op1);
3549 if (! pat)
3550 return 0;
3551 emit_insn (pat);
3552 return target;
3553
3554 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
3555 icode = CODE_FOR_lock_test_and_set_di;
3556 arg0 = TREE_VALUE (arglist);
3557 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
3558 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3559 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
3560 tmode = insn_data[icode].operand[0].mode;
3561 mode0 = insn_data[icode].operand[1].mode;
3562 mode1 = insn_data[icode].operand[2].mode;
3563 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
3564 RTX_UNCHANGING_P (op0) = 1;
3565 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3566 op1 = copy_to_mode_reg (mode1, op1);
3567 if (target == 0
3568 || GET_MODE (target) != tmode
3569 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3570 target = gen_reg_rtx (tmode);
3571 pat = GEN_FCN (icode) (target, op0, op1);
3572 if (! pat)
3573 return 0;
3574 emit_insn (pat);
3575 return target;
3576
3577 case IA64_BUILTIN_LOCK_RELEASE_SI:
3578 arg0 = TREE_VALUE (arglist);
3579 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3580 op0 = gen_rtx_MEM (SImode, copy_to_mode_reg (Pmode, op0));
3581 MEM_VOLATILE_P (op0) = 1;
3582 emit_insn (gen_movsi (op0, GEN_INT(0)));
3583 return 0;
3584
3585 case IA64_BUILTIN_LOCK_RELEASE_DI:
3586 arg0 = TREE_VALUE (arglist);
3587 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3588 op0 = gen_rtx_MEM (DImode, copy_to_mode_reg (Pmode, op0));
3589 MEM_VOLATILE_P (op0) = 1;
3590 emit_insn (gen_movdi (op0, GEN_INT(0)));
3591 return 0;
3592
3593 default:
3594 break;
3595 }
3596
3597 /* Expand all 32 bit intrinsics that take 2 arguments. */
3598 for (i=0, d = bdesc_2argsi; i < sizeof (bdesc_2argsi) / sizeof *d; i++, d++)
3599 if (d->code == fcode)
3600 return ia64_expand_binop_builtin (d->icode, arglist, target);
3601
3602 /* Expand all 64 bit intrinsics that take 2 arguments. */
3603 for (i=0, d = bdesc_2argdi; i < sizeof (bdesc_2argdi) / sizeof *d; i++, d++)
3604 if (d->code == fcode)
3605 return ia64_expand_binop_builtin (d->icode, arglist, target);
3606
3607 fail:
3608 return 0;
3609 }