]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pa/pa.c
5c01a8b0b4f362b6fe13fc982dc5b30c4106bd58
[thirdparty/gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "insn-attr.h"
32 #include "flags.h"
33 #include "tree.h"
34 #include "output.h"
35 #include "except.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "libfuncs.h"
39 #include "reload.h"
40 #include "c-tree.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "obstack.h"
44 #include "toplev.h"
45 #include "ggc.h"
46 #include "recog.h"
47 #include "predict.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51
52 static int hppa_use_dfa_pipeline_interface PARAMS ((void));
53
54 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
55 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
56
57 static int
58 hppa_use_dfa_pipeline_interface ()
59 {
60 return 1;
61 }
62
63 /* Return nonzero if there is a bypass for the output of
64 OUT_INSN and the fp store IN_INSN. */
65 int
66 hppa_fpstore_bypass_p (out_insn, in_insn)
67 rtx out_insn, in_insn;
68 {
69 enum machine_mode store_mode;
70 enum machine_mode other_mode;
71 rtx set;
72
73 if (recog_memoized (in_insn) < 0
74 || get_attr_type (in_insn) != TYPE_FPSTORE
75 || recog_memoized (out_insn) < 0)
76 return 0;
77
78 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
79
80 set = single_set (out_insn);
81 if (!set)
82 return 0;
83
84 other_mode = GET_MODE (SET_SRC (set));
85
86 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
87 }
88
89
90 #ifndef DO_FRAME_NOTES
91 #ifdef INCOMING_RETURN_ADDR_RTX
92 #define DO_FRAME_NOTES 1
93 #else
94 #define DO_FRAME_NOTES 0
95 #endif
96 #endif
97
98 static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
99 static void pa_combine_instructions PARAMS ((rtx));
100 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
101 static int forward_branch_p PARAMS ((rtx));
102 static int shadd_constant_p PARAMS ((int));
103 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
104 static int compute_movstrsi_length PARAMS ((rtx));
105 static bool pa_assemble_integer PARAMS ((rtx, unsigned int, int));
106 static void remove_useless_addtr_insns PARAMS ((rtx, int));
107 static void store_reg PARAMS ((int, int, int));
108 static void store_reg_modify PARAMS ((int, int, int));
109 static void load_reg PARAMS ((int, int, int));
110 static void set_reg_plus_d PARAMS ((int, int, int, int));
111 static void pa_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
112 static void pa_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
113 static int pa_adjust_cost PARAMS ((rtx, rtx, rtx, int));
114 static int pa_adjust_priority PARAMS ((rtx, int));
115 static int pa_issue_rate PARAMS ((void));
116 static void pa_select_section PARAMS ((tree, int, unsigned HOST_WIDE_INT))
117 ATTRIBUTE_UNUSED;
118 static void pa_encode_section_info PARAMS ((tree, int));
119 static const char *pa_strip_name_encoding PARAMS ((const char *));
120 static void pa_globalize_label PARAMS ((FILE *, const char *))
121 ATTRIBUTE_UNUSED;
122 static void pa_asm_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
123 HOST_WIDE_INT, tree));
124 static void copy_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
125 static int length_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
126 static struct deferred_plabel *get_plabel PARAMS ((const char *))
127 ATTRIBUTE_UNUSED;
128
129 /* Save the operands last given to a compare for use when we
130 generate a scc or bcc insn. */
131 rtx hppa_compare_op0, hppa_compare_op1;
132 enum cmp_type hppa_branch_type;
133
134 /* Which cpu we are scheduling for. */
135 enum processor_type pa_cpu;
136
137 /* String to hold which cpu we are scheduling for. */
138 const char *pa_cpu_string;
139
140 /* Which architecture we are generating code for. */
141 enum architecture_type pa_arch;
142
143 /* String to hold which architecture we are generating code for. */
144 const char *pa_arch_string;
145
146 /* Counts for the number of callee-saved general and floating point
147 registers which were saved by the current function's prologue. */
148 static int gr_saved, fr_saved;
149
150 static rtx find_addr_reg PARAMS ((rtx));
151
152 /* Keep track of the number of bytes we have output in the CODE subspaces
153 during this compilation so we'll know when to emit inline long-calls. */
154 unsigned long total_code_bytes;
155
156 /* Variables to handle plabels that we discover are necessary at assembly
157 output time. They are output after the current function. */
158 struct deferred_plabel GTY(())
159 {
160 rtx internal_label;
161 const char *name;
162 };
163 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
164 deferred_plabels;
165 static size_t n_deferred_plabels = 0;
166 \f
167 /* Initialize the GCC target structure. */
168
169 #undef TARGET_ASM_ALIGNED_HI_OP
170 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
171 #undef TARGET_ASM_ALIGNED_SI_OP
172 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
173 #undef TARGET_ASM_ALIGNED_DI_OP
174 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
175 #undef TARGET_ASM_UNALIGNED_HI_OP
176 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
177 #undef TARGET_ASM_UNALIGNED_SI_OP
178 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
179 #undef TARGET_ASM_UNALIGNED_DI_OP
180 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
181 #undef TARGET_ASM_INTEGER
182 #define TARGET_ASM_INTEGER pa_assemble_integer
183
184 #undef TARGET_ASM_FUNCTION_PROLOGUE
185 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
186 #undef TARGET_ASM_FUNCTION_EPILOGUE
187 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
188
189 #undef TARGET_SCHED_ADJUST_COST
190 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
191 #undef TARGET_SCHED_ADJUST_PRIORITY
192 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
193 #undef TARGET_SCHED_ISSUE_RATE
194 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
195
196 #undef TARGET_ENCODE_SECTION_INFO
197 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
198 #undef TARGET_STRIP_NAME_ENCODING
199 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
200
201 #undef TARGET_ASM_OUTPUT_MI_THUNK
202 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
203 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
204 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
205
206 struct gcc_target targetm = TARGET_INITIALIZER;
207 \f
208 void
209 override_options ()
210 {
211 if (pa_cpu_string == NULL)
212 pa_cpu_string = TARGET_SCHED_DEFAULT;
213
214 if (! strcmp (pa_cpu_string, "8000"))
215 {
216 pa_cpu_string = "8000";
217 pa_cpu = PROCESSOR_8000;
218 }
219 else if (! strcmp (pa_cpu_string, "7100"))
220 {
221 pa_cpu_string = "7100";
222 pa_cpu = PROCESSOR_7100;
223 }
224 else if (! strcmp (pa_cpu_string, "700"))
225 {
226 pa_cpu_string = "700";
227 pa_cpu = PROCESSOR_700;
228 }
229 else if (! strcmp (pa_cpu_string, "7100LC"))
230 {
231 pa_cpu_string = "7100LC";
232 pa_cpu = PROCESSOR_7100LC;
233 }
234 else if (! strcmp (pa_cpu_string, "7200"))
235 {
236 pa_cpu_string = "7200";
237 pa_cpu = PROCESSOR_7200;
238 }
239 else if (! strcmp (pa_cpu_string, "7300"))
240 {
241 pa_cpu_string = "7300";
242 pa_cpu = PROCESSOR_7300;
243 }
244 else
245 {
246 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
247 }
248
249 /* Set the instruction set architecture. */
250 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
251 {
252 pa_arch_string = "1.0";
253 pa_arch = ARCHITECTURE_10;
254 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
255 }
256 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
257 {
258 pa_arch_string = "1.1";
259 pa_arch = ARCHITECTURE_11;
260 target_flags &= ~MASK_PA_20;
261 target_flags |= MASK_PA_11;
262 }
263 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
264 {
265 pa_arch_string = "2.0";
266 pa_arch = ARCHITECTURE_20;
267 target_flags |= MASK_PA_11 | MASK_PA_20;
268 }
269 else if (pa_arch_string)
270 {
271 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
272 }
273
274 /* Unconditional branches in the delay slot are not compatible with dwarf2
275 call frame information. There is no benefit in using this optimization
276 on PA8000 and later processors. */
277 if (pa_cpu >= PROCESSOR_8000
278 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
279 || flag_unwind_tables)
280 target_flags &= ~MASK_JUMP_IN_DELAY;
281
282 if (flag_pic && TARGET_PORTABLE_RUNTIME)
283 {
284 warning ("PIC code generation is not supported in the portable runtime model\n");
285 }
286
287 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
288 {
289 warning ("PIC code generation is not compatible with fast indirect calls\n");
290 }
291
292 if (! TARGET_GAS && write_symbols != NO_DEBUG)
293 {
294 warning ("-g is only supported when using GAS on this processor,");
295 warning ("-g option disabled");
296 write_symbols = NO_DEBUG;
297 }
298
299 /* We only support the "big PIC" model now. And we always generate PIC
300 code when in 64bit mode. */
301 if (flag_pic == 1 || TARGET_64BIT)
302 flag_pic = 2;
303
304 /* We can't guarantee that .dword is available for 32-bit targets. */
305 if (UNITS_PER_WORD == 4)
306 targetm.asm_out.aligned_op.di = NULL;
307
308 /* The unaligned ops are only available when using GAS. */
309 if (!TARGET_GAS)
310 {
311 targetm.asm_out.unaligned_op.hi = NULL;
312 targetm.asm_out.unaligned_op.si = NULL;
313 targetm.asm_out.unaligned_op.di = NULL;
314 }
315 }
316
317 /* Return nonzero only if OP is a register of mode MODE,
318 or CONST0_RTX. */
319 int
320 reg_or_0_operand (op, mode)
321 rtx op;
322 enum machine_mode mode;
323 {
324 return (op == CONST0_RTX (mode) || register_operand (op, mode));
325 }
326
327 /* Return nonzero if OP is suitable for use in a call to a named
328 function.
329
330 For 2.5 try to eliminate either call_operand_address or
331 function_label_operand, they perform very similar functions. */
332 int
333 call_operand_address (op, mode)
334 rtx op;
335 enum machine_mode mode ATTRIBUTE_UNUSED;
336 {
337 return (GET_MODE (op) == word_mode
338 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
339 }
340
341 /* Return 1 if X contains a symbolic expression. We know these
342 expressions will have one of a few well defined forms, so
343 we need only check those forms. */
344 int
345 symbolic_expression_p (x)
346 register rtx x;
347 {
348
349 /* Strip off any HIGH. */
350 if (GET_CODE (x) == HIGH)
351 x = XEXP (x, 0);
352
353 return (symbolic_operand (x, VOIDmode));
354 }
355
356 int
357 symbolic_operand (op, mode)
358 register rtx op;
359 enum machine_mode mode ATTRIBUTE_UNUSED;
360 {
361 switch (GET_CODE (op))
362 {
363 case SYMBOL_REF:
364 case LABEL_REF:
365 return 1;
366 case CONST:
367 op = XEXP (op, 0);
368 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
369 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
370 && GET_CODE (XEXP (op, 1)) == CONST_INT);
371 default:
372 return 0;
373 }
374 }
375
376 /* Return truth value of statement that OP is a symbolic memory
377 operand of mode MODE. */
378
379 int
380 symbolic_memory_operand (op, mode)
381 rtx op;
382 enum machine_mode mode ATTRIBUTE_UNUSED;
383 {
384 if (GET_CODE (op) == SUBREG)
385 op = SUBREG_REG (op);
386 if (GET_CODE (op) != MEM)
387 return 0;
388 op = XEXP (op, 0);
389 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
390 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
391 }
392
393 /* Return 1 if the operand is either a register or a memory operand that is
394 not symbolic. */
395
396 int
397 reg_or_nonsymb_mem_operand (op, mode)
398 register rtx op;
399 enum machine_mode mode;
400 {
401 if (register_operand (op, mode))
402 return 1;
403
404 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
405 return 1;
406
407 return 0;
408 }
409
410 /* Return 1 if the operand is either a register, zero, or a memory operand
411 that is not symbolic. */
412
413 int
414 reg_or_0_or_nonsymb_mem_operand (op, mode)
415 register rtx op;
416 enum machine_mode mode;
417 {
418 if (register_operand (op, mode))
419 return 1;
420
421 if (op == CONST0_RTX (mode))
422 return 1;
423
424 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
425 return 1;
426
427 return 0;
428 }
429
430 /* Return 1 if the operand is a register operand or a non-symbolic memory
431 operand after reload. This predicate is used for branch patterns that
432 internally handle register reloading. We need to accept non-symbolic
433 memory operands after reload to ensure that the pattern is still valid
434 if reload didn't find a hard register for the operand. */
435
436 int
437 reg_before_reload_operand (op, mode)
438 register rtx op;
439 enum machine_mode mode;
440 {
441 /* Don't accept a SUBREG since it will need a reload. */
442 if (GET_CODE (op) == SUBREG)
443 return 0;
444
445 if (register_operand (op, mode))
446 return 1;
447
448 if (reload_completed
449 && memory_operand (op, mode)
450 && ! symbolic_memory_operand (op, mode))
451 return 1;
452
453 return 0;
454 }
455
456 /* Accept any constant that can be moved in one instruction into a
457 general register. */
458 int
459 cint_ok_for_move (intval)
460 HOST_WIDE_INT intval;
461 {
462 /* OK if ldo, ldil, or zdepi, can be used. */
463 return (CONST_OK_FOR_LETTER_P (intval, 'J')
464 || CONST_OK_FOR_LETTER_P (intval, 'N')
465 || CONST_OK_FOR_LETTER_P (intval, 'K'));
466 }
467
468 /* Accept anything that can be moved in one instruction into a general
469 register. */
470 int
471 move_operand (op, mode)
472 rtx op;
473 enum machine_mode mode;
474 {
475 if (register_operand (op, mode))
476 return 1;
477
478 if (GET_CODE (op) == CONSTANT_P_RTX)
479 return 1;
480
481 if (GET_CODE (op) == CONST_INT)
482 return cint_ok_for_move (INTVAL (op));
483
484 if (GET_CODE (op) == SUBREG)
485 op = SUBREG_REG (op);
486 if (GET_CODE (op) != MEM)
487 return 0;
488
489 op = XEXP (op, 0);
490
491 /* We consider a LO_SUM DLT reference a move_operand now since it has
492 been merged into the normal movsi/movdi patterns. */
493 if (GET_CODE (op) == LO_SUM
494 && GET_CODE (XEXP (op, 0)) == REG
495 && REG_OK_FOR_BASE_P (XEXP (op, 0))
496 && GET_CODE (XEXP (op, 1)) == UNSPEC
497 && GET_MODE (op) == Pmode)
498 return 1;
499
500 /* Since move_operand is only used for source operands, we can always
501 allow scaled indexing! */
502 if (! TARGET_DISABLE_INDEXING
503 && GET_CODE (op) == PLUS
504 && ((GET_CODE (XEXP (op, 0)) == MULT
505 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
506 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
507 && INTVAL (XEXP (XEXP (op, 0), 1))
508 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
509 && GET_CODE (XEXP (op, 1)) == REG)
510 || (GET_CODE (XEXP (op, 1)) == MULT
511 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
512 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
513 && INTVAL (XEXP (XEXP (op, 1), 1))
514 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
515 && GET_CODE (XEXP (op, 0)) == REG)))
516 return 1;
517
518 return memory_address_p (mode, op);
519 }
520
521 /* Accept REG and any CONST_INT that can be moved in one instruction into a
522 general register. */
523 int
524 reg_or_cint_move_operand (op, mode)
525 rtx op;
526 enum machine_mode mode;
527 {
528 if (register_operand (op, mode))
529 return 1;
530
531 if (GET_CODE (op) == CONST_INT)
532 return cint_ok_for_move (INTVAL (op));
533
534 return 0;
535 }
536
537 int
538 pic_label_operand (op, mode)
539 rtx op;
540 enum machine_mode mode ATTRIBUTE_UNUSED;
541 {
542 if (!flag_pic)
543 return 0;
544
545 switch (GET_CODE (op))
546 {
547 case LABEL_REF:
548 return 1;
549 case CONST:
550 op = XEXP (op, 0);
551 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
552 && GET_CODE (XEXP (op, 1)) == CONST_INT);
553 default:
554 return 0;
555 }
556 }
557
558 int
559 fp_reg_operand (op, mode)
560 rtx op;
561 enum machine_mode mode ATTRIBUTE_UNUSED;
562 {
563 return reg_renumber && FP_REG_P (op);
564 }
565
566 \f
567
568 /* Return truth value of whether OP can be used as an operand in a
569 three operand arithmetic insn that accepts registers of mode MODE
570 or 14-bit signed integers. */
571 int
572 arith_operand (op, mode)
573 rtx op;
574 enum machine_mode mode;
575 {
576 return (register_operand (op, mode)
577 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
578 }
579
580 /* Return truth value of whether OP can be used as an operand in a
581 three operand arithmetic insn that accepts registers of mode MODE
582 or 11-bit signed integers. */
583 int
584 arith11_operand (op, mode)
585 rtx op;
586 enum machine_mode mode;
587 {
588 return (register_operand (op, mode)
589 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
590 }
591
592 /* Return truth value of whether OP can be used as an operand in a
593 adddi3 insn. */
594 int
595 adddi3_operand (op, mode)
596 rtx op;
597 enum machine_mode mode;
598 {
599 return (register_operand (op, mode)
600 || (GET_CODE (op) == CONST_INT
601 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
602 }
603
604 /* A constant integer suitable for use in a PRE_MODIFY memory
605 reference. */
606 int
607 pre_cint_operand (op, mode)
608 rtx op;
609 enum machine_mode mode ATTRIBUTE_UNUSED;
610 {
611 return (GET_CODE (op) == CONST_INT
612 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
613 }
614
615 /* A constant integer suitable for use in a POST_MODIFY memory
616 reference. */
617 int
618 post_cint_operand (op, mode)
619 rtx op;
620 enum machine_mode mode ATTRIBUTE_UNUSED;
621 {
622 return (GET_CODE (op) == CONST_INT
623 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
624 }
625
626 int
627 arith_double_operand (op, mode)
628 rtx op;
629 enum machine_mode mode;
630 {
631 return (register_operand (op, mode)
632 || (GET_CODE (op) == CONST_DOUBLE
633 && GET_MODE (op) == mode
634 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
635 && ((CONST_DOUBLE_HIGH (op) >= 0)
636 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
637 }
638
639 /* Return truth value of whether OP is an integer which fits the
640 range constraining immediate operands in three-address insns, or
641 is an integer register. */
642
643 int
644 ireg_or_int5_operand (op, mode)
645 rtx op;
646 enum machine_mode mode ATTRIBUTE_UNUSED;
647 {
648 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
649 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
650 }
651
652 /* Return nonzero if OP is an integer register, else return zero. */
653 int
654 ireg_operand (op, mode)
655 rtx op;
656 enum machine_mode mode ATTRIBUTE_UNUSED;
657 {
658 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
659 }
660
661 /* Return truth value of whether OP is an integer which fits the
662 range constraining immediate operands in three-address insns. */
663
664 int
665 int5_operand (op, mode)
666 rtx op;
667 enum machine_mode mode ATTRIBUTE_UNUSED;
668 {
669 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
670 }
671
672 int
673 uint5_operand (op, mode)
674 rtx op;
675 enum machine_mode mode ATTRIBUTE_UNUSED;
676 {
677 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
678 }
679
680 int
681 int11_operand (op, mode)
682 rtx op;
683 enum machine_mode mode ATTRIBUTE_UNUSED;
684 {
685 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
686 }
687
688 int
689 uint32_operand (op, mode)
690 rtx op;
691 enum machine_mode mode ATTRIBUTE_UNUSED;
692 {
693 #if HOST_BITS_PER_WIDE_INT > 32
694 /* All allowed constants will fit a CONST_INT. */
695 return (GET_CODE (op) == CONST_INT
696 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
697 #else
698 return (GET_CODE (op) == CONST_INT
699 || (GET_CODE (op) == CONST_DOUBLE
700 && CONST_DOUBLE_HIGH (op) == 0));
701 #endif
702 }
703
704 int
705 arith5_operand (op, mode)
706 rtx op;
707 enum machine_mode mode;
708 {
709 return register_operand (op, mode) || int5_operand (op, mode);
710 }
711
712 /* True iff zdepi can be used to generate this CONST_INT.
713 zdepi first sign extends a 5 bit signed number to a given field
714 length, then places this field anywhere in a zero. */
715 int
716 zdepi_cint_p (x)
717 unsigned HOST_WIDE_INT x;
718 {
719 unsigned HOST_WIDE_INT lsb_mask, t;
720
721 /* This might not be obvious, but it's at least fast.
722 This function is critical; we don't have the time loops would take. */
723 lsb_mask = x & -x;
724 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
725 /* Return true iff t is a power of two. */
726 return ((t & (t - 1)) == 0);
727 }
728
729 /* True iff depi or extru can be used to compute (reg & mask).
730 Accept bit pattern like these:
731 0....01....1
732 1....10....0
733 1..10..01..1 */
734 int
735 and_mask_p (mask)
736 unsigned HOST_WIDE_INT mask;
737 {
738 mask = ~mask;
739 mask += mask & -mask;
740 return (mask & (mask - 1)) == 0;
741 }
742
743 /* True iff depi or extru can be used to compute (reg & OP). */
744 int
745 and_operand (op, mode)
746 rtx op;
747 enum machine_mode mode;
748 {
749 return (register_operand (op, mode)
750 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
751 }
752
753 /* True iff depi can be used to compute (reg | MASK). */
754 int
755 ior_mask_p (mask)
756 unsigned HOST_WIDE_INT mask;
757 {
758 mask += mask & -mask;
759 return (mask & (mask - 1)) == 0;
760 }
761
762 /* True iff depi can be used to compute (reg | OP). */
763 int
764 ior_operand (op, mode)
765 rtx op;
766 enum machine_mode mode ATTRIBUTE_UNUSED;
767 {
768 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
769 }
770
771 int
772 lhs_lshift_operand (op, mode)
773 rtx op;
774 enum machine_mode mode;
775 {
776 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
777 }
778
779 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
780 Such values can be the left hand side x in (x << r), using the zvdepi
781 instruction. */
782 int
783 lhs_lshift_cint_operand (op, mode)
784 rtx op;
785 enum machine_mode mode ATTRIBUTE_UNUSED;
786 {
787 unsigned HOST_WIDE_INT x;
788 if (GET_CODE (op) != CONST_INT)
789 return 0;
790 x = INTVAL (op) >> 4;
791 return (x & (x + 1)) == 0;
792 }
793
794 int
795 arith32_operand (op, mode)
796 rtx op;
797 enum machine_mode mode;
798 {
799 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
800 }
801
802 int
803 pc_or_label_operand (op, mode)
804 rtx op;
805 enum machine_mode mode ATTRIBUTE_UNUSED;
806 {
807 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
808 }
809 \f
810 /* Legitimize PIC addresses. If the address is already
811 position-independent, we return ORIG. Newly generated
812 position-independent addresses go to REG. If we need more
813 than one register, we lose. */
814
815 rtx
816 legitimize_pic_address (orig, mode, reg)
817 rtx orig, reg;
818 enum machine_mode mode;
819 {
820 rtx pic_ref = orig;
821
822 /* Labels need special handling. */
823 if (pic_label_operand (orig, mode))
824 {
825 /* We do not want to go through the movXX expanders here since that
826 would create recursion.
827
828 Nor do we really want to call a generator for a named pattern
829 since that requires multiple patterns if we want to support
830 multiple word sizes.
831
832 So instead we just emit the raw set, which avoids the movXX
833 expanders completely. */
834 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
835 current_function_uses_pic_offset_table = 1;
836 return reg;
837 }
838 if (GET_CODE (orig) == SYMBOL_REF)
839 {
840 if (reg == 0)
841 abort ();
842
843 emit_move_insn (reg,
844 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
845 gen_rtx_HIGH (word_mode, orig)));
846 pic_ref
847 = gen_rtx_MEM (Pmode,
848 gen_rtx_LO_SUM (Pmode, reg,
849 gen_rtx_UNSPEC (Pmode,
850 gen_rtvec (1, orig),
851 0)));
852
853 current_function_uses_pic_offset_table = 1;
854 RTX_UNCHANGING_P (pic_ref) = 1;
855 emit_move_insn (reg, pic_ref);
856 return reg;
857 }
858 else if (GET_CODE (orig) == CONST)
859 {
860 rtx base;
861
862 if (GET_CODE (XEXP (orig, 0)) == PLUS
863 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
864 return orig;
865
866 if (reg == 0)
867 abort ();
868
869 if (GET_CODE (XEXP (orig, 0)) == PLUS)
870 {
871 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
872 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
873 base == reg ? 0 : reg);
874 }
875 else abort ();
876 if (GET_CODE (orig) == CONST_INT)
877 {
878 if (INT_14_BITS (orig))
879 return plus_constant (base, INTVAL (orig));
880 orig = force_reg (Pmode, orig);
881 }
882 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
883 /* Likewise, should we set special REG_NOTEs here? */
884 }
885 return pic_ref;
886 }
887
888 /* Try machine-dependent ways of modifying an illegitimate address
889 to be legitimate. If we find one, return the new, valid address.
890 This macro is used in only one place: `memory_address' in explow.c.
891
892 OLDX is the address as it was before break_out_memory_refs was called.
893 In some cases it is useful to look at this to decide what needs to be done.
894
895 MODE and WIN are passed so that this macro can use
896 GO_IF_LEGITIMATE_ADDRESS.
897
898 It is always safe for this macro to do nothing. It exists to recognize
899 opportunities to optimize the output.
900
901 For the PA, transform:
902
903 memory(X + <large int>)
904
905 into:
906
907 if (<large int> & mask) >= 16
908 Y = (<large int> & ~mask) + mask + 1 Round up.
909 else
910 Y = (<large int> & ~mask) Round down.
911 Z = X + Y
912 memory (Z + (<large int> - Y));
913
914 This is for CSE to find several similar references, and only use one Z.
915
916 X can either be a SYMBOL_REF or REG, but because combine can not
917 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
918 D will not fit in 14 bits.
919
920 MODE_FLOAT references allow displacements which fit in 5 bits, so use
921 0x1f as the mask.
922
923 MODE_INT references allow displacements which fit in 14 bits, so use
924 0x3fff as the mask.
925
926 This relies on the fact that most mode MODE_FLOAT references will use FP
927 registers and most mode MODE_INT references will use integer registers.
928 (In the rare case of an FP register used in an integer MODE, we depend
929 on secondary reloads to clean things up.)
930
931
932 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
933 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
934 addressing modes to be used).
935
936 Put X and Z into registers. Then put the entire expression into
937 a register. */
938
939 rtx
940 hppa_legitimize_address (x, oldx, mode)
941 rtx x, oldx ATTRIBUTE_UNUSED;
942 enum machine_mode mode;
943 {
944 rtx orig = x;
945
946 if (flag_pic)
947 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
948
949 /* Strip off CONST. */
950 if (GET_CODE (x) == CONST)
951 x = XEXP (x, 0);
952
953 /* Special case. Get the SYMBOL_REF into a register and use indexing.
954 That should always be safe. */
955 if (GET_CODE (x) == PLUS
956 && GET_CODE (XEXP (x, 0)) == REG
957 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
958 {
959 rtx reg = force_reg (Pmode, XEXP (x, 1));
960 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
961 }
962
963 /* Note we must reject symbols which represent function addresses
964 since the assembler/linker can't handle arithmetic on plabels. */
965 if (GET_CODE (x) == PLUS
966 && GET_CODE (XEXP (x, 1)) == CONST_INT
967 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
968 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
969 || GET_CODE (XEXP (x, 0)) == REG))
970 {
971 rtx int_part, ptr_reg;
972 int newoffset;
973 int offset = INTVAL (XEXP (x, 1));
974 int mask;
975
976 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
977 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
978
979 /* Choose which way to round the offset. Round up if we
980 are >= halfway to the next boundary. */
981 if ((offset & mask) >= ((mask + 1) / 2))
982 newoffset = (offset & ~ mask) + mask + 1;
983 else
984 newoffset = (offset & ~ mask);
985
986 /* If the newoffset will not fit in 14 bits (ldo), then
987 handling this would take 4 or 5 instructions (2 to load
988 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
989 add the new offset and the SYMBOL_REF.) Combine can
990 not handle 4->2 or 5->2 combinations, so do not create
991 them. */
992 if (! VAL_14_BITS_P (newoffset)
993 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
994 {
995 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
996 rtx tmp_reg
997 = force_reg (Pmode,
998 gen_rtx_HIGH (Pmode, const_part));
999 ptr_reg
1000 = force_reg (Pmode,
1001 gen_rtx_LO_SUM (Pmode,
1002 tmp_reg, const_part));
1003 }
1004 else
1005 {
1006 if (! VAL_14_BITS_P (newoffset))
1007 int_part = force_reg (Pmode, GEN_INT (newoffset));
1008 else
1009 int_part = GEN_INT (newoffset);
1010
1011 ptr_reg = force_reg (Pmode,
1012 gen_rtx_PLUS (Pmode,
1013 force_reg (Pmode, XEXP (x, 0)),
1014 int_part));
1015 }
1016 return plus_constant (ptr_reg, offset - newoffset);
1017 }
1018
1019 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1020
1021 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1022 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1023 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1024 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1025 || GET_CODE (XEXP (x, 1)) == SUBREG)
1026 && GET_CODE (XEXP (x, 1)) != CONST)
1027 {
1028 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1029 rtx reg1, reg2;
1030
1031 reg1 = XEXP (x, 1);
1032 if (GET_CODE (reg1) != REG)
1033 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1034
1035 reg2 = XEXP (XEXP (x, 0), 0);
1036 if (GET_CODE (reg2) != REG)
1037 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1038
1039 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1040 gen_rtx_MULT (Pmode,
1041 reg2,
1042 GEN_INT (val)),
1043 reg1));
1044 }
1045
1046 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1047
1048 Only do so for floating point modes since this is more speculative
1049 and we lose if it's an integer store. */
1050 if (GET_CODE (x) == PLUS
1051 && GET_CODE (XEXP (x, 0)) == PLUS
1052 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1053 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1054 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1055 && (mode == SFmode || mode == DFmode))
1056 {
1057
1058 /* First, try and figure out what to use as a base register. */
1059 rtx reg1, reg2, base, idx, orig_base;
1060
1061 reg1 = XEXP (XEXP (x, 0), 1);
1062 reg2 = XEXP (x, 1);
1063 base = NULL_RTX;
1064 idx = NULL_RTX;
1065
1066 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1067 then emit_move_sequence will turn on REG_POINTER so we'll know
1068 it's a base register below. */
1069 if (GET_CODE (reg1) != REG)
1070 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1071
1072 if (GET_CODE (reg2) != REG)
1073 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1074
1075 /* Figure out what the base and index are. */
1076
1077 if (GET_CODE (reg1) == REG
1078 && REG_POINTER (reg1))
1079 {
1080 base = reg1;
1081 orig_base = XEXP (XEXP (x, 0), 1);
1082 idx = gen_rtx_PLUS (Pmode,
1083 gen_rtx_MULT (Pmode,
1084 XEXP (XEXP (XEXP (x, 0), 0), 0),
1085 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1086 XEXP (x, 1));
1087 }
1088 else if (GET_CODE (reg2) == REG
1089 && REG_POINTER (reg2))
1090 {
1091 base = reg2;
1092 orig_base = XEXP (x, 1);
1093 idx = XEXP (x, 0);
1094 }
1095
1096 if (base == 0)
1097 return orig;
1098
1099 /* If the index adds a large constant, try to scale the
1100 constant so that it can be loaded with only one insn. */
1101 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1102 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1103 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1104 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1105 {
1106 /* Divide the CONST_INT by the scale factor, then add it to A. */
1107 int val = INTVAL (XEXP (idx, 1));
1108
1109 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1110 reg1 = XEXP (XEXP (idx, 0), 0);
1111 if (GET_CODE (reg1) != REG)
1112 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1113
1114 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1115
1116 /* We can now generate a simple scaled indexed address. */
1117 return
1118 force_reg
1119 (Pmode, gen_rtx_PLUS (Pmode,
1120 gen_rtx_MULT (Pmode, reg1,
1121 XEXP (XEXP (idx, 0), 1)),
1122 base));
1123 }
1124
1125 /* If B + C is still a valid base register, then add them. */
1126 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1127 && INTVAL (XEXP (idx, 1)) <= 4096
1128 && INTVAL (XEXP (idx, 1)) >= -4096)
1129 {
1130 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1131 rtx reg1, reg2;
1132
1133 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1134
1135 reg2 = XEXP (XEXP (idx, 0), 0);
1136 if (GET_CODE (reg2) != CONST_INT)
1137 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1138
1139 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1140 gen_rtx_MULT (Pmode,
1141 reg2,
1142 GEN_INT (val)),
1143 reg1));
1144 }
1145
1146 /* Get the index into a register, then add the base + index and
1147 return a register holding the result. */
1148
1149 /* First get A into a register. */
1150 reg1 = XEXP (XEXP (idx, 0), 0);
1151 if (GET_CODE (reg1) != REG)
1152 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1153
1154 /* And get B into a register. */
1155 reg2 = XEXP (idx, 1);
1156 if (GET_CODE (reg2) != REG)
1157 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1158
1159 reg1 = force_reg (Pmode,
1160 gen_rtx_PLUS (Pmode,
1161 gen_rtx_MULT (Pmode, reg1,
1162 XEXP (XEXP (idx, 0), 1)),
1163 reg2));
1164
1165 /* Add the result to our base register and return. */
1166 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1167
1168 }
1169
1170 /* Uh-oh. We might have an address for x[n-100000]. This needs
1171 special handling to avoid creating an indexed memory address
1172 with x-100000 as the base.
1173
1174 If the constant part is small enough, then it's still safe because
1175 there is a guard page at the beginning and end of the data segment.
1176
1177 Scaled references are common enough that we want to try and rearrange the
1178 terms so that we can use indexing for these addresses too. Only
1179 do the optimization for floatint point modes. */
1180
1181 if (GET_CODE (x) == PLUS
1182 && symbolic_expression_p (XEXP (x, 1)))
1183 {
1184 /* Ugly. We modify things here so that the address offset specified
1185 by the index expression is computed first, then added to x to form
1186 the entire address. */
1187
1188 rtx regx1, regx2, regy1, regy2, y;
1189
1190 /* Strip off any CONST. */
1191 y = XEXP (x, 1);
1192 if (GET_CODE (y) == CONST)
1193 y = XEXP (y, 0);
1194
1195 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1196 {
1197 /* See if this looks like
1198 (plus (mult (reg) (shadd_const))
1199 (const (plus (symbol_ref) (const_int))))
1200
1201 Where const_int is small. In that case the const
1202 expression is a valid pointer for indexing.
1203
1204 If const_int is big, but can be divided evenly by shadd_const
1205 and added to (reg). This allows more scaled indexed addresses. */
1206 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1207 && GET_CODE (XEXP (x, 0)) == MULT
1208 && GET_CODE (XEXP (y, 1)) == CONST_INT
1209 && INTVAL (XEXP (y, 1)) >= -4096
1210 && INTVAL (XEXP (y, 1)) <= 4095
1211 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1212 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1213 {
1214 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1215 rtx reg1, reg2;
1216
1217 reg1 = XEXP (x, 1);
1218 if (GET_CODE (reg1) != REG)
1219 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1220
1221 reg2 = XEXP (XEXP (x, 0), 0);
1222 if (GET_CODE (reg2) != REG)
1223 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1224
1225 return force_reg (Pmode,
1226 gen_rtx_PLUS (Pmode,
1227 gen_rtx_MULT (Pmode,
1228 reg2,
1229 GEN_INT (val)),
1230 reg1));
1231 }
1232 else if ((mode == DFmode || mode == SFmode)
1233 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1234 && GET_CODE (XEXP (x, 0)) == MULT
1235 && GET_CODE (XEXP (y, 1)) == CONST_INT
1236 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1237 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1238 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1239 {
1240 regx1
1241 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1242 / INTVAL (XEXP (XEXP (x, 0), 1))));
1243 regx2 = XEXP (XEXP (x, 0), 0);
1244 if (GET_CODE (regx2) != REG)
1245 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1246 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1247 regx2, regx1));
1248 return
1249 force_reg (Pmode,
1250 gen_rtx_PLUS (Pmode,
1251 gen_rtx_MULT (Pmode, regx2,
1252 XEXP (XEXP (x, 0), 1)),
1253 force_reg (Pmode, XEXP (y, 0))));
1254 }
1255 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1256 && INTVAL (XEXP (y, 1)) >= -4096
1257 && INTVAL (XEXP (y, 1)) <= 4095)
1258 {
1259 /* This is safe because of the guard page at the
1260 beginning and end of the data space. Just
1261 return the original address. */
1262 return orig;
1263 }
1264 else
1265 {
1266 /* Doesn't look like one we can optimize. */
1267 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1268 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1269 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1270 regx1 = force_reg (Pmode,
1271 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1272 regx1, regy2));
1273 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1274 }
1275 }
1276 }
1277
1278 return orig;
1279 }
1280
1281 /* For the HPPA, REG and REG+CONST is cost 0
1282 and addresses involving symbolic constants are cost 2.
1283
1284 PIC addresses are very expensive.
1285
1286 It is no coincidence that this has the same structure
1287 as GO_IF_LEGITIMATE_ADDRESS. */
1288 int
1289 hppa_address_cost (X)
1290 rtx X;
1291 {
1292 if (GET_CODE (X) == PLUS)
1293 return 1;
1294 else if (GET_CODE (X) == LO_SUM)
1295 return 1;
1296 else if (GET_CODE (X) == HIGH)
1297 return 2;
1298 return 4;
1299 }
1300
1301 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1302 new rtx with the correct mode. */
1303 static inline rtx
1304 force_mode (mode, orig)
1305 enum machine_mode mode;
1306 rtx orig;
1307 {
1308 if (mode == GET_MODE (orig))
1309 return orig;
1310
1311 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1312 abort ();
1313
1314 return gen_rtx_REG (mode, REGNO (orig));
1315 }
1316
1317 /* Emit insns to move operands[1] into operands[0].
1318
1319 Return 1 if we have written out everything that needs to be done to
1320 do the move. Otherwise, return 0 and the caller will emit the move
1321 normally.
1322
1323 Note SCRATCH_REG may not be in the proper mode depending on how it
1324 will be used. This routine is resposible for creating a new copy
1325 of SCRATCH_REG in the proper mode. */
1326
1327 int
1328 emit_move_sequence (operands, mode, scratch_reg)
1329 rtx *operands;
1330 enum machine_mode mode;
1331 rtx scratch_reg;
1332 {
1333 register rtx operand0 = operands[0];
1334 register rtx operand1 = operands[1];
1335 register rtx tem;
1336
1337 if (scratch_reg
1338 && reload_in_progress && GET_CODE (operand0) == REG
1339 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1340 operand0 = reg_equiv_mem[REGNO (operand0)];
1341 else if (scratch_reg
1342 && reload_in_progress && GET_CODE (operand0) == SUBREG
1343 && GET_CODE (SUBREG_REG (operand0)) == REG
1344 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1345 {
1346 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1347 the code which tracks sets/uses for delete_output_reload. */
1348 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1349 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1350 SUBREG_BYTE (operand0));
1351 operand0 = alter_subreg (&temp);
1352 }
1353
1354 if (scratch_reg
1355 && reload_in_progress && GET_CODE (operand1) == REG
1356 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1357 operand1 = reg_equiv_mem[REGNO (operand1)];
1358 else if (scratch_reg
1359 && reload_in_progress && GET_CODE (operand1) == SUBREG
1360 && GET_CODE (SUBREG_REG (operand1)) == REG
1361 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1362 {
1363 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1364 the code which tracks sets/uses for delete_output_reload. */
1365 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1366 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1367 SUBREG_BYTE (operand1));
1368 operand1 = alter_subreg (&temp);
1369 }
1370
1371 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1372 && ((tem = find_replacement (&XEXP (operand0, 0)))
1373 != XEXP (operand0, 0)))
1374 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1375 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1376 && ((tem = find_replacement (&XEXP (operand1, 0)))
1377 != XEXP (operand1, 0)))
1378 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1379
1380 /* Handle secondary reloads for loads/stores of FP registers from
1381 REG+D addresses where D does not fit in 5 bits, including
1382 (subreg (mem (addr))) cases. */
1383 if (fp_reg_operand (operand0, mode)
1384 && ((GET_CODE (operand1) == MEM
1385 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1386 || ((GET_CODE (operand1) == SUBREG
1387 && GET_CODE (XEXP (operand1, 0)) == MEM
1388 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1389 && scratch_reg)
1390 {
1391 if (GET_CODE (operand1) == SUBREG)
1392 operand1 = XEXP (operand1, 0);
1393
1394 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1395 it in WORD_MODE regardless of what mode it was originally given
1396 to us. */
1397 scratch_reg = force_mode (word_mode, scratch_reg);
1398
1399 /* D might not fit in 14 bits either; for such cases load D into
1400 scratch reg. */
1401 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1402 {
1403 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1404 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1405 Pmode,
1406 XEXP (XEXP (operand1, 0), 0),
1407 scratch_reg));
1408 }
1409 else
1410 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1411 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1412 gen_rtx_MEM (mode, scratch_reg)));
1413 return 1;
1414 }
1415 else if (fp_reg_operand (operand1, mode)
1416 && ((GET_CODE (operand0) == MEM
1417 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1418 || ((GET_CODE (operand0) == SUBREG)
1419 && GET_CODE (XEXP (operand0, 0)) == MEM
1420 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1421 && scratch_reg)
1422 {
1423 if (GET_CODE (operand0) == SUBREG)
1424 operand0 = XEXP (operand0, 0);
1425
1426 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1427 it in WORD_MODE regardless of what mode it was originally given
1428 to us. */
1429 scratch_reg = force_mode (word_mode, scratch_reg);
1430
1431 /* D might not fit in 14 bits either; for such cases load D into
1432 scratch reg. */
1433 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1434 {
1435 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1436 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1437 0)),
1438 Pmode,
1439 XEXP (XEXP (operand0, 0),
1440 0),
1441 scratch_reg));
1442 }
1443 else
1444 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1445 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1446 operand1));
1447 return 1;
1448 }
1449 /* Handle secondary reloads for loads of FP registers from constant
1450 expressions by forcing the constant into memory.
1451
1452 use scratch_reg to hold the address of the memory location.
1453
1454 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1455 NO_REGS when presented with a const_int and an register class
1456 containing only FP registers. Doing so unfortunately creates
1457 more problems than it solves. Fix this for 2.5. */
1458 else if (fp_reg_operand (operand0, mode)
1459 && CONSTANT_P (operand1)
1460 && scratch_reg)
1461 {
1462 rtx xoperands[2];
1463
1464 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1465 it in WORD_MODE regardless of what mode it was originally given
1466 to us. */
1467 scratch_reg = force_mode (word_mode, scratch_reg);
1468
1469 /* Force the constant into memory and put the address of the
1470 memory location into scratch_reg. */
1471 xoperands[0] = scratch_reg;
1472 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1473 emit_move_sequence (xoperands, Pmode, 0);
1474
1475 /* Now load the destination register. */
1476 emit_insn (gen_rtx_SET (mode, operand0,
1477 gen_rtx_MEM (mode, scratch_reg)));
1478 return 1;
1479 }
1480 /* Handle secondary reloads for SAR. These occur when trying to load
1481 the SAR from memory, FP register, or with a constant. */
1482 else if (GET_CODE (operand0) == REG
1483 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1484 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1485 && (GET_CODE (operand1) == MEM
1486 || GET_CODE (operand1) == CONST_INT
1487 || (GET_CODE (operand1) == REG
1488 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1489 && scratch_reg)
1490 {
1491 /* D might not fit in 14 bits either; for such cases load D into
1492 scratch reg. */
1493 if (GET_CODE (operand1) == MEM
1494 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1495 {
1496 /* We are reloading the address into the scratch register, so we
1497 want to make sure the scratch register is a full register. */
1498 scratch_reg = force_mode (word_mode, scratch_reg);
1499
1500 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1501 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1502 0)),
1503 Pmode,
1504 XEXP (XEXP (operand1, 0),
1505 0),
1506 scratch_reg));
1507
1508 /* Now we are going to load the scratch register from memory,
1509 we want to load it in the same width as the original MEM,
1510 which must be the same as the width of the ultimate destination,
1511 OPERAND0. */
1512 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1513
1514 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1515 scratch_reg));
1516 }
1517 else
1518 {
1519 /* We want to load the scratch register using the same mode as
1520 the ultimate destination. */
1521 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1522
1523 emit_move_insn (scratch_reg, operand1);
1524 }
1525
1526 /* And emit the insn to set the ultimate destination. We know that
1527 the scratch register has the same mode as the destination at this
1528 point. */
1529 emit_move_insn (operand0, scratch_reg);
1530 return 1;
1531 }
1532 /* Handle most common case: storing into a register. */
1533 else if (register_operand (operand0, mode))
1534 {
1535 if (register_operand (operand1, mode)
1536 || (GET_CODE (operand1) == CONST_INT
1537 && cint_ok_for_move (INTVAL (operand1)))
1538 || (operand1 == CONST0_RTX (mode))
1539 || (GET_CODE (operand1) == HIGH
1540 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1541 /* Only `general_operands' can come here, so MEM is ok. */
1542 || GET_CODE (operand1) == MEM)
1543 {
1544 /* Run this case quickly. */
1545 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1546 return 1;
1547 }
1548 }
1549 else if (GET_CODE (operand0) == MEM)
1550 {
1551 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1552 && !(reload_in_progress || reload_completed))
1553 {
1554 rtx temp = gen_reg_rtx (DFmode);
1555
1556 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1557 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1558 return 1;
1559 }
1560 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1561 {
1562 /* Run this case quickly. */
1563 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1564 return 1;
1565 }
1566 if (! (reload_in_progress || reload_completed))
1567 {
1568 operands[0] = validize_mem (operand0);
1569 operands[1] = operand1 = force_reg (mode, operand1);
1570 }
1571 }
1572
1573 /* Simplify the source if we need to.
1574 Note we do have to handle function labels here, even though we do
1575 not consider them legitimate constants. Loop optimizations can
1576 call the emit_move_xxx with one as a source. */
1577 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1578 || function_label_operand (operand1, mode)
1579 || (GET_CODE (operand1) == HIGH
1580 && symbolic_operand (XEXP (operand1, 0), mode)))
1581 {
1582 int ishighonly = 0;
1583
1584 if (GET_CODE (operand1) == HIGH)
1585 {
1586 ishighonly = 1;
1587 operand1 = XEXP (operand1, 0);
1588 }
1589 if (symbolic_operand (operand1, mode))
1590 {
1591 /* Argh. The assembler and linker can't handle arithmetic
1592 involving plabels.
1593
1594 So we force the plabel into memory, load operand0 from
1595 the memory location, then add in the constant part. */
1596 if ((GET_CODE (operand1) == CONST
1597 && GET_CODE (XEXP (operand1, 0)) == PLUS
1598 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1599 || function_label_operand (operand1, mode))
1600 {
1601 rtx temp, const_part;
1602
1603 /* Figure out what (if any) scratch register to use. */
1604 if (reload_in_progress || reload_completed)
1605 {
1606 scratch_reg = scratch_reg ? scratch_reg : operand0;
1607 /* SCRATCH_REG will hold an address and maybe the actual
1608 data. We want it in WORD_MODE regardless of what mode it
1609 was originally given to us. */
1610 scratch_reg = force_mode (word_mode, scratch_reg);
1611 }
1612 else if (flag_pic)
1613 scratch_reg = gen_reg_rtx (Pmode);
1614
1615 if (GET_CODE (operand1) == CONST)
1616 {
1617 /* Save away the constant part of the expression. */
1618 const_part = XEXP (XEXP (operand1, 0), 1);
1619 if (GET_CODE (const_part) != CONST_INT)
1620 abort ();
1621
1622 /* Force the function label into memory. */
1623 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1624 }
1625 else
1626 {
1627 /* No constant part. */
1628 const_part = NULL_RTX;
1629
1630 /* Force the function label into memory. */
1631 temp = force_const_mem (mode, operand1);
1632 }
1633
1634
1635 /* Get the address of the memory location. PIC-ify it if
1636 necessary. */
1637 temp = XEXP (temp, 0);
1638 if (flag_pic)
1639 temp = legitimize_pic_address (temp, mode, scratch_reg);
1640
1641 /* Put the address of the memory location into our destination
1642 register. */
1643 operands[1] = temp;
1644 emit_move_sequence (operands, mode, scratch_reg);
1645
1646 /* Now load from the memory location into our destination
1647 register. */
1648 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1649 emit_move_sequence (operands, mode, scratch_reg);
1650
1651 /* And add back in the constant part. */
1652 if (const_part != NULL_RTX)
1653 expand_inc (operand0, const_part);
1654
1655 return 1;
1656 }
1657
1658 if (flag_pic)
1659 {
1660 rtx temp;
1661
1662 if (reload_in_progress || reload_completed)
1663 {
1664 temp = scratch_reg ? scratch_reg : operand0;
1665 /* TEMP will hold an address and maybe the actual
1666 data. We want it in WORD_MODE regardless of what mode it
1667 was originally given to us. */
1668 temp = force_mode (word_mode, temp);
1669 }
1670 else
1671 temp = gen_reg_rtx (Pmode);
1672
1673 /* (const (plus (symbol) (const_int))) must be forced to
1674 memory during/after reload if the const_int will not fit
1675 in 14 bits. */
1676 if (GET_CODE (operand1) == CONST
1677 && GET_CODE (XEXP (operand1, 0)) == PLUS
1678 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1679 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1680 && (reload_completed || reload_in_progress)
1681 && flag_pic)
1682 {
1683 operands[1] = force_const_mem (mode, operand1);
1684 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1685 mode, temp);
1686 emit_move_sequence (operands, mode, temp);
1687 }
1688 else
1689 {
1690 operands[1] = legitimize_pic_address (operand1, mode, temp);
1691 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1692 }
1693 }
1694 /* On the HPPA, references to data space are supposed to use dp,
1695 register 27, but showing it in the RTL inhibits various cse
1696 and loop optimizations. */
1697 else
1698 {
1699 rtx temp, set;
1700
1701 if (reload_in_progress || reload_completed)
1702 {
1703 temp = scratch_reg ? scratch_reg : operand0;
1704 /* TEMP will hold an address and maybe the actual
1705 data. We want it in WORD_MODE regardless of what mode it
1706 was originally given to us. */
1707 temp = force_mode (word_mode, temp);
1708 }
1709 else
1710 temp = gen_reg_rtx (mode);
1711
1712 /* Loading a SYMBOL_REF into a register makes that register
1713 safe to be used as the base in an indexed address.
1714
1715 Don't mark hard registers though. That loses. */
1716 if (GET_CODE (operand0) == REG
1717 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1718 REG_POINTER (operand0) = 1;
1719 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1720 REG_POINTER (temp) = 1;
1721 if (ishighonly)
1722 set = gen_rtx_SET (mode, operand0, temp);
1723 else
1724 set = gen_rtx_SET (VOIDmode,
1725 operand0,
1726 gen_rtx_LO_SUM (mode, temp, operand1));
1727
1728 emit_insn (gen_rtx_SET (VOIDmode,
1729 temp,
1730 gen_rtx_HIGH (mode, operand1)));
1731 emit_insn (set);
1732
1733 }
1734 return 1;
1735 }
1736 else if (GET_CODE (operand1) != CONST_INT
1737 || ! cint_ok_for_move (INTVAL (operand1)))
1738 {
1739 rtx extend = NULL_RTX;
1740 rtx temp;
1741
1742 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1743 && HOST_BITS_PER_WIDE_INT > 32
1744 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1745 {
1746 HOST_WIDE_INT val = INTVAL (operand1);
1747 HOST_WIDE_INT nval;
1748
1749 /* Extract the low order 32 bits of the value and sign extend.
1750 If the new value is the same as the original value, we can
1751 can use the original value as-is. If the new value is
1752 different, we use it and insert the most-significant 32-bits
1753 of the original value into the final result. */
1754 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1755 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1756 if (val != nval)
1757 {
1758 #if HOST_BITS_PER_WIDE_INT > 32
1759 extend = GEN_INT (val >> 32);
1760 #endif
1761 operand1 = GEN_INT (nval);
1762 }
1763 }
1764
1765 if (reload_in_progress || reload_completed)
1766 temp = operand0;
1767 else
1768 temp = gen_reg_rtx (mode);
1769
1770 /* We don't directly split DImode constants on 32-bit targets
1771 because PLUS uses an 11-bit immediate and the insn sequence
1772 generated is not as efficient as the one using HIGH/LO_SUM. */
1773 if (GET_CODE (operand1) == CONST_INT
1774 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
1775 {
1776 /* Directly break constant into high and low parts. This
1777 provides better optimization opportunities because various
1778 passes recognize constants split with PLUS but not LO_SUM.
1779 We use a 14-bit signed low part except when the addition
1780 of 0x4000 to the high part might change the sign of the
1781 high part. */
1782 HOST_WIDE_INT value = INTVAL (operand1);
1783 HOST_WIDE_INT low = value & 0x3fff;
1784 HOST_WIDE_INT high = value & ~ 0x3fff;
1785
1786 if (low >= 0x2000)
1787 {
1788 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1789 high += 0x2000;
1790 else
1791 high += 0x4000;
1792 }
1793
1794 low = value - high;
1795
1796 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1797 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1798 }
1799 else
1800 {
1801 emit_insn (gen_rtx_SET (VOIDmode, temp,
1802 gen_rtx_HIGH (mode, operand1)));
1803 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1804 }
1805
1806 emit_move_insn (operands[0], operands[1]);
1807
1808 if (extend != NULL_RTX)
1809 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1810 extend));
1811
1812 return 1;
1813 }
1814 }
1815 /* Now have insn-emit do whatever it normally does. */
1816 return 0;
1817 }
1818
1819 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1820 it will need a link/runtime reloc). */
1821
1822 int
1823 reloc_needed (exp)
1824 tree exp;
1825 {
1826 int reloc = 0;
1827
1828 switch (TREE_CODE (exp))
1829 {
1830 case ADDR_EXPR:
1831 return 1;
1832
1833 case PLUS_EXPR:
1834 case MINUS_EXPR:
1835 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1836 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1837 break;
1838
1839 case NOP_EXPR:
1840 case CONVERT_EXPR:
1841 case NON_LVALUE_EXPR:
1842 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1843 break;
1844
1845 case CONSTRUCTOR:
1846 {
1847 register tree link;
1848 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1849 if (TREE_VALUE (link) != 0)
1850 reloc |= reloc_needed (TREE_VALUE (link));
1851 }
1852 break;
1853
1854 case ERROR_MARK:
1855 break;
1856
1857 default:
1858 break;
1859 }
1860 return reloc;
1861 }
1862
1863 /* Does operand (which is a symbolic_operand) live in text space?
1864 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
1865 will be true. */
1866
1867 int
1868 read_only_operand (operand, mode)
1869 rtx operand;
1870 enum machine_mode mode ATTRIBUTE_UNUSED;
1871 {
1872 if (GET_CODE (operand) == CONST)
1873 operand = XEXP (XEXP (operand, 0), 0);
1874 if (flag_pic)
1875 {
1876 if (GET_CODE (operand) == SYMBOL_REF)
1877 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1878 }
1879 else
1880 {
1881 if (GET_CODE (operand) == SYMBOL_REF)
1882 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1883 }
1884 return 1;
1885 }
1886
1887 \f
1888 /* Return the best assembler insn template
1889 for moving operands[1] into operands[0] as a fullword. */
1890 const char *
1891 singlemove_string (operands)
1892 rtx *operands;
1893 {
1894 HOST_WIDE_INT intval;
1895
1896 if (GET_CODE (operands[0]) == MEM)
1897 return "stw %r1,%0";
1898 if (GET_CODE (operands[1]) == MEM)
1899 return "ldw %1,%0";
1900 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1901 {
1902 long i;
1903 REAL_VALUE_TYPE d;
1904
1905 if (GET_MODE (operands[1]) != SFmode)
1906 abort ();
1907
1908 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1909 bit pattern. */
1910 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1911 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1912
1913 operands[1] = GEN_INT (i);
1914 /* Fall through to CONST_INT case. */
1915 }
1916 if (GET_CODE (operands[1]) == CONST_INT)
1917 {
1918 intval = INTVAL (operands[1]);
1919
1920 if (VAL_14_BITS_P (intval))
1921 return "ldi %1,%0";
1922 else if ((intval & 0x7ff) == 0)
1923 return "ldil L'%1,%0";
1924 else if (zdepi_cint_p (intval))
1925 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
1926 else
1927 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1928 }
1929 return "copy %1,%0";
1930 }
1931 \f
1932
1933 /* Compute position (in OP[1]) and width (in OP[2])
1934 useful for copying IMM to a register using the zdepi
1935 instructions. Store the immediate value to insert in OP[0]. */
1936 static void
1937 compute_zdepwi_operands (imm, op)
1938 unsigned HOST_WIDE_INT imm;
1939 unsigned *op;
1940 {
1941 int lsb, len;
1942
1943 /* Find the least significant set bit in IMM. */
1944 for (lsb = 0; lsb < 32; lsb++)
1945 {
1946 if ((imm & 1) != 0)
1947 break;
1948 imm >>= 1;
1949 }
1950
1951 /* Choose variants based on *sign* of the 5-bit field. */
1952 if ((imm & 0x10) == 0)
1953 len = (lsb <= 28) ? 4 : 32 - lsb;
1954 else
1955 {
1956 /* Find the width of the bitstring in IMM. */
1957 for (len = 5; len < 32; len++)
1958 {
1959 if ((imm & (1 << len)) == 0)
1960 break;
1961 }
1962
1963 /* Sign extend IMM as a 5-bit value. */
1964 imm = (imm & 0xf) - 0x10;
1965 }
1966
1967 op[0] = imm;
1968 op[1] = 31 - lsb;
1969 op[2] = len;
1970 }
1971
1972 /* Compute position (in OP[1]) and width (in OP[2])
1973 useful for copying IMM to a register using the depdi,z
1974 instructions. Store the immediate value to insert in OP[0]. */
1975 void
1976 compute_zdepdi_operands (imm, op)
1977 unsigned HOST_WIDE_INT imm;
1978 unsigned *op;
1979 {
1980 HOST_WIDE_INT lsb, len;
1981
1982 /* Find the least significant set bit in IMM. */
1983 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
1984 {
1985 if ((imm & 1) != 0)
1986 break;
1987 imm >>= 1;
1988 }
1989
1990 /* Choose variants based on *sign* of the 5-bit field. */
1991 if ((imm & 0x10) == 0)
1992 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
1993 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
1994 else
1995 {
1996 /* Find the width of the bitstring in IMM. */
1997 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
1998 {
1999 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2000 break;
2001 }
2002
2003 /* Sign extend IMM as a 5-bit value. */
2004 imm = (imm & 0xf) - 0x10;
2005 }
2006
2007 op[0] = imm;
2008 op[1] = 63 - lsb;
2009 op[2] = len;
2010 }
2011
2012 /* Output assembler code to perform a doubleword move insn
2013 with operands OPERANDS. */
2014
2015 const char *
2016 output_move_double (operands)
2017 rtx *operands;
2018 {
2019 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2020 rtx latehalf[2];
2021 rtx addreg0 = 0, addreg1 = 0;
2022
2023 /* First classify both operands. */
2024
2025 if (REG_P (operands[0]))
2026 optype0 = REGOP;
2027 else if (offsettable_memref_p (operands[0]))
2028 optype0 = OFFSOP;
2029 else if (GET_CODE (operands[0]) == MEM)
2030 optype0 = MEMOP;
2031 else
2032 optype0 = RNDOP;
2033
2034 if (REG_P (operands[1]))
2035 optype1 = REGOP;
2036 else if (CONSTANT_P (operands[1]))
2037 optype1 = CNSTOP;
2038 else if (offsettable_memref_p (operands[1]))
2039 optype1 = OFFSOP;
2040 else if (GET_CODE (operands[1]) == MEM)
2041 optype1 = MEMOP;
2042 else
2043 optype1 = RNDOP;
2044
2045 /* Check for the cases that the operand constraints are not
2046 supposed to allow to happen. Abort if we get one,
2047 because generating code for these cases is painful. */
2048
2049 if (optype0 != REGOP && optype1 != REGOP)
2050 abort ();
2051
2052 /* Handle auto decrementing and incrementing loads and stores
2053 specifically, since the structure of the function doesn't work
2054 for them without major modification. Do it better when we learn
2055 this port about the general inc/dec addressing of PA.
2056 (This was written by tege. Chide him if it doesn't work.) */
2057
2058 if (optype0 == MEMOP)
2059 {
2060 /* We have to output the address syntax ourselves, since print_operand
2061 doesn't deal with the addresses we want to use. Fix this later. */
2062
2063 rtx addr = XEXP (operands[0], 0);
2064 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2065 {
2066 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2067
2068 operands[0] = XEXP (addr, 0);
2069 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2070 abort ();
2071
2072 if (!reg_overlap_mentioned_p (high_reg, addr))
2073 {
2074 /* No overlap between high target register and address
2075 register. (We do this in a non-obvious way to
2076 save a register file writeback) */
2077 if (GET_CODE (addr) == POST_INC)
2078 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2079 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2080 }
2081 else
2082 abort ();
2083 }
2084 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2085 {
2086 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2087
2088 operands[0] = XEXP (addr, 0);
2089 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2090 abort ();
2091
2092 if (!reg_overlap_mentioned_p (high_reg, addr))
2093 {
2094 /* No overlap between high target register and address
2095 register. (We do this in a non-obvious way to
2096 save a register file writeback) */
2097 if (GET_CODE (addr) == PRE_INC)
2098 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2099 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2100 }
2101 else
2102 abort ();
2103 }
2104 }
2105 if (optype1 == MEMOP)
2106 {
2107 /* We have to output the address syntax ourselves, since print_operand
2108 doesn't deal with the addresses we want to use. Fix this later. */
2109
2110 rtx addr = XEXP (operands[1], 0);
2111 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2112 {
2113 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2114
2115 operands[1] = XEXP (addr, 0);
2116 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2117 abort ();
2118
2119 if (!reg_overlap_mentioned_p (high_reg, addr))
2120 {
2121 /* No overlap between high target register and address
2122 register. (We do this in a non-obvious way to
2123 save a register file writeback) */
2124 if (GET_CODE (addr) == POST_INC)
2125 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2126 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2127 }
2128 else
2129 {
2130 /* This is an undefined situation. We should load into the
2131 address register *and* update that register. Probably
2132 we don't need to handle this at all. */
2133 if (GET_CODE (addr) == POST_INC)
2134 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2135 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2136 }
2137 }
2138 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2139 {
2140 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2141
2142 operands[1] = XEXP (addr, 0);
2143 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2144 abort ();
2145
2146 if (!reg_overlap_mentioned_p (high_reg, addr))
2147 {
2148 /* No overlap between high target register and address
2149 register. (We do this in a non-obvious way to
2150 save a register file writeback) */
2151 if (GET_CODE (addr) == PRE_INC)
2152 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2153 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2154 }
2155 else
2156 {
2157 /* This is an undefined situation. We should load into the
2158 address register *and* update that register. Probably
2159 we don't need to handle this at all. */
2160 if (GET_CODE (addr) == PRE_INC)
2161 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2162 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2163 }
2164 }
2165 else if (GET_CODE (addr) == PLUS
2166 && GET_CODE (XEXP (addr, 0)) == MULT)
2167 {
2168 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2169
2170 if (!reg_overlap_mentioned_p (high_reg, addr))
2171 {
2172 rtx xoperands[3];
2173
2174 xoperands[0] = high_reg;
2175 xoperands[1] = XEXP (addr, 1);
2176 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2177 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2178 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2179 xoperands);
2180 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2181 }
2182 else
2183 {
2184 rtx xoperands[3];
2185
2186 xoperands[0] = high_reg;
2187 xoperands[1] = XEXP (addr, 1);
2188 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2189 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2190 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2191 xoperands);
2192 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2193 }
2194 }
2195 }
2196
2197 /* If an operand is an unoffsettable memory ref, find a register
2198 we can increment temporarily to make it refer to the second word. */
2199
2200 if (optype0 == MEMOP)
2201 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2202
2203 if (optype1 == MEMOP)
2204 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2205
2206 /* Ok, we can do one word at a time.
2207 Normally we do the low-numbered word first.
2208
2209 In either case, set up in LATEHALF the operands to use
2210 for the high-numbered word and in some cases alter the
2211 operands in OPERANDS to be suitable for the low-numbered word. */
2212
2213 if (optype0 == REGOP)
2214 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2215 else if (optype0 == OFFSOP)
2216 latehalf[0] = adjust_address (operands[0], SImode, 4);
2217 else
2218 latehalf[0] = operands[0];
2219
2220 if (optype1 == REGOP)
2221 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2222 else if (optype1 == OFFSOP)
2223 latehalf[1] = adjust_address (operands[1], SImode, 4);
2224 else if (optype1 == CNSTOP)
2225 split_double (operands[1], &operands[1], &latehalf[1]);
2226 else
2227 latehalf[1] = operands[1];
2228
2229 /* If the first move would clobber the source of the second one,
2230 do them in the other order.
2231
2232 This can happen in two cases:
2233
2234 mem -> register where the first half of the destination register
2235 is the same register used in the memory's address. Reload
2236 can create such insns.
2237
2238 mem in this case will be either register indirect or register
2239 indirect plus a valid offset.
2240
2241 register -> register move where REGNO(dst) == REGNO(src + 1)
2242 someone (Tim/Tege?) claimed this can happen for parameter loads.
2243
2244 Handle mem -> register case first. */
2245 if (optype0 == REGOP
2246 && (optype1 == MEMOP || optype1 == OFFSOP)
2247 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2248 operands[1], 0))
2249 {
2250 /* Do the late half first. */
2251 if (addreg1)
2252 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2253 output_asm_insn (singlemove_string (latehalf), latehalf);
2254
2255 /* Then clobber. */
2256 if (addreg1)
2257 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2258 return singlemove_string (operands);
2259 }
2260
2261 /* Now handle register -> register case. */
2262 if (optype0 == REGOP && optype1 == REGOP
2263 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2264 {
2265 output_asm_insn (singlemove_string (latehalf), latehalf);
2266 return singlemove_string (operands);
2267 }
2268
2269 /* Normal case: do the two words, low-numbered first. */
2270
2271 output_asm_insn (singlemove_string (operands), operands);
2272
2273 /* Make any unoffsettable addresses point at high-numbered word. */
2274 if (addreg0)
2275 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2276 if (addreg1)
2277 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2278
2279 /* Do that word. */
2280 output_asm_insn (singlemove_string (latehalf), latehalf);
2281
2282 /* Undo the adds we just did. */
2283 if (addreg0)
2284 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2285 if (addreg1)
2286 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2287
2288 return "";
2289 }
2290 \f
2291 const char *
2292 output_fp_move_double (operands)
2293 rtx *operands;
2294 {
2295 if (FP_REG_P (operands[0]))
2296 {
2297 if (FP_REG_P (operands[1])
2298 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2299 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2300 else
2301 output_asm_insn ("fldd%F1 %1,%0", operands);
2302 }
2303 else if (FP_REG_P (operands[1]))
2304 {
2305 output_asm_insn ("fstd%F0 %1,%0", operands);
2306 }
2307 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2308 {
2309 if (GET_CODE (operands[0]) == REG)
2310 {
2311 rtx xoperands[2];
2312 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2313 xoperands[0] = operands[0];
2314 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2315 }
2316 /* This is a pain. You have to be prepared to deal with an
2317 arbitrary address here including pre/post increment/decrement.
2318
2319 so avoid this in the MD. */
2320 else
2321 abort ();
2322 }
2323 else abort ();
2324 return "";
2325 }
2326 \f
2327 /* Return a REG that occurs in ADDR with coefficient 1.
2328 ADDR can be effectively incremented by incrementing REG. */
2329
2330 static rtx
2331 find_addr_reg (addr)
2332 rtx addr;
2333 {
2334 while (GET_CODE (addr) == PLUS)
2335 {
2336 if (GET_CODE (XEXP (addr, 0)) == REG)
2337 addr = XEXP (addr, 0);
2338 else if (GET_CODE (XEXP (addr, 1)) == REG)
2339 addr = XEXP (addr, 1);
2340 else if (CONSTANT_P (XEXP (addr, 0)))
2341 addr = XEXP (addr, 1);
2342 else if (CONSTANT_P (XEXP (addr, 1)))
2343 addr = XEXP (addr, 0);
2344 else
2345 abort ();
2346 }
2347 if (GET_CODE (addr) == REG)
2348 return addr;
2349 abort ();
2350 }
2351
2352 /* Emit code to perform a block move.
2353
2354 OPERANDS[0] is the destination pointer as a REG, clobbered.
2355 OPERANDS[1] is the source pointer as a REG, clobbered.
2356 OPERANDS[2] is a register for temporary storage.
2357 OPERANDS[4] is the size as a CONST_INT
2358 OPERANDS[3] is a register for temporary storage.
2359 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2360 OPERANDS[6] is another temporary register. */
2361
2362 const char *
2363 output_block_move (operands, size_is_constant)
2364 rtx *operands;
2365 int size_is_constant ATTRIBUTE_UNUSED;
2366 {
2367 int align = INTVAL (operands[5]);
2368 unsigned long n_bytes = INTVAL (operands[4]);
2369
2370 /* We can't move more than four bytes at a time because the PA
2371 has no longer integer move insns. (Could use fp mem ops?) */
2372 if (align > 4)
2373 align = 4;
2374
2375 /* Note that we know each loop below will execute at least twice
2376 (else we would have open-coded the copy). */
2377 switch (align)
2378 {
2379 case 4:
2380 /* Pre-adjust the loop counter. */
2381 operands[4] = GEN_INT (n_bytes - 8);
2382 output_asm_insn ("ldi %4,%2", operands);
2383
2384 /* Copying loop. */
2385 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2386 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2387 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2388 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2389 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2390
2391 /* Handle the residual. There could be up to 7 bytes of
2392 residual to copy! */
2393 if (n_bytes % 8 != 0)
2394 {
2395 operands[4] = GEN_INT (n_bytes % 4);
2396 if (n_bytes % 8 >= 4)
2397 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2398 if (n_bytes % 4 != 0)
2399 output_asm_insn ("ldw 0(%1),%6", operands);
2400 if (n_bytes % 8 >= 4)
2401 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2402 if (n_bytes % 4 != 0)
2403 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2404 }
2405 return "";
2406
2407 case 2:
2408 /* Pre-adjust the loop counter. */
2409 operands[4] = GEN_INT (n_bytes - 4);
2410 output_asm_insn ("ldi %4,%2", operands);
2411
2412 /* Copying loop. */
2413 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2414 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2415 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2416 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2417 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2418
2419 /* Handle the residual. */
2420 if (n_bytes % 4 != 0)
2421 {
2422 if (n_bytes % 4 >= 2)
2423 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2424 if (n_bytes % 2 != 0)
2425 output_asm_insn ("ldb 0(%1),%6", operands);
2426 if (n_bytes % 4 >= 2)
2427 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2428 if (n_bytes % 2 != 0)
2429 output_asm_insn ("stb %6,0(%0)", operands);
2430 }
2431 return "";
2432
2433 case 1:
2434 /* Pre-adjust the loop counter. */
2435 operands[4] = GEN_INT (n_bytes - 2);
2436 output_asm_insn ("ldi %4,%2", operands);
2437
2438 /* Copying loop. */
2439 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2440 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2441 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2442 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2443 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2444
2445 /* Handle the residual. */
2446 if (n_bytes % 2 != 0)
2447 {
2448 output_asm_insn ("ldb 0(%1),%3", operands);
2449 output_asm_insn ("stb %3,0(%0)", operands);
2450 }
2451 return "";
2452
2453 default:
2454 abort ();
2455 }
2456 }
2457
2458 /* Count the number of insns necessary to handle this block move.
2459
2460 Basic structure is the same as emit_block_move, except that we
2461 count insns rather than emit them. */
2462
2463 static int
2464 compute_movstrsi_length (insn)
2465 rtx insn;
2466 {
2467 rtx pat = PATTERN (insn);
2468 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2469 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2470 unsigned int n_insns = 0;
2471
2472 /* We can't move more than four bytes at a time because the PA
2473 has no longer integer move insns. (Could use fp mem ops?) */
2474 if (align > 4)
2475 align = 4;
2476
2477 /* The basic copying loop. */
2478 n_insns = 6;
2479
2480 /* Residuals. */
2481 if (n_bytes % (2 * align) != 0)
2482 {
2483 if ((n_bytes % (2 * align)) >= align)
2484 n_insns += 2;
2485
2486 if ((n_bytes % align) != 0)
2487 n_insns += 2;
2488 }
2489
2490 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2491 return n_insns * 4;
2492 }
2493 \f
2494
2495 const char *
2496 output_and (operands)
2497 rtx *operands;
2498 {
2499 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2500 {
2501 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2502 int ls0, ls1, ms0, p, len;
2503
2504 for (ls0 = 0; ls0 < 32; ls0++)
2505 if ((mask & (1 << ls0)) == 0)
2506 break;
2507
2508 for (ls1 = ls0; ls1 < 32; ls1++)
2509 if ((mask & (1 << ls1)) != 0)
2510 break;
2511
2512 for (ms0 = ls1; ms0 < 32; ms0++)
2513 if ((mask & (1 << ms0)) == 0)
2514 break;
2515
2516 if (ms0 != 32)
2517 abort ();
2518
2519 if (ls1 == 32)
2520 {
2521 len = ls0;
2522
2523 if (len == 0)
2524 abort ();
2525
2526 operands[2] = GEN_INT (len);
2527 return "{extru|extrw,u} %1,31,%2,%0";
2528 }
2529 else
2530 {
2531 /* We could use this `depi' for the case above as well, but `depi'
2532 requires one more register file access than an `extru'. */
2533
2534 p = 31 - ls0;
2535 len = ls1 - ls0;
2536
2537 operands[2] = GEN_INT (p);
2538 operands[3] = GEN_INT (len);
2539 return "{depi|depwi} 0,%2,%3,%0";
2540 }
2541 }
2542 else
2543 return "and %1,%2,%0";
2544 }
2545
2546 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2547 storing the result in operands[0]. */
2548 const char *
2549 output_64bit_and (operands)
2550 rtx *operands;
2551 {
2552 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2553 {
2554 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2555 int ls0, ls1, ms0, p, len;
2556
2557 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2558 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2559 break;
2560
2561 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2562 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2563 break;
2564
2565 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2566 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2567 break;
2568
2569 if (ms0 != HOST_BITS_PER_WIDE_INT)
2570 abort ();
2571
2572 if (ls1 == HOST_BITS_PER_WIDE_INT)
2573 {
2574 len = ls0;
2575
2576 if (len == 0)
2577 abort ();
2578
2579 operands[2] = GEN_INT (len);
2580 return "extrd,u %1,63,%2,%0";
2581 }
2582 else
2583 {
2584 /* We could use this `depi' for the case above as well, but `depi'
2585 requires one more register file access than an `extru'. */
2586
2587 p = 63 - ls0;
2588 len = ls1 - ls0;
2589
2590 operands[2] = GEN_INT (p);
2591 operands[3] = GEN_INT (len);
2592 return "depdi 0,%2,%3,%0";
2593 }
2594 }
2595 else
2596 return "and %1,%2,%0";
2597 }
2598
2599 const char *
2600 output_ior (operands)
2601 rtx *operands;
2602 {
2603 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2604 int bs0, bs1, p, len;
2605
2606 if (INTVAL (operands[2]) == 0)
2607 return "copy %1,%0";
2608
2609 for (bs0 = 0; bs0 < 32; bs0++)
2610 if ((mask & (1 << bs0)) != 0)
2611 break;
2612
2613 for (bs1 = bs0; bs1 < 32; bs1++)
2614 if ((mask & (1 << bs1)) == 0)
2615 break;
2616
2617 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2618 abort ();
2619
2620 p = 31 - bs0;
2621 len = bs1 - bs0;
2622
2623 operands[2] = GEN_INT (p);
2624 operands[3] = GEN_INT (len);
2625 return "{depi|depwi} -1,%2,%3,%0";
2626 }
2627
2628 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2629 storing the result in operands[0]. */
2630 const char *
2631 output_64bit_ior (operands)
2632 rtx *operands;
2633 {
2634 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2635 int bs0, bs1, p, len;
2636
2637 if (INTVAL (operands[2]) == 0)
2638 return "copy %1,%0";
2639
2640 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2641 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2642 break;
2643
2644 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2645 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2646 break;
2647
2648 if (bs1 != HOST_BITS_PER_WIDE_INT
2649 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2650 abort ();
2651
2652 p = 63 - bs0;
2653 len = bs1 - bs0;
2654
2655 operands[2] = GEN_INT (p);
2656 operands[3] = GEN_INT (len);
2657 return "depdi -1,%2,%3,%0";
2658 }
2659 \f
2660 /* Target hook for assembling integer objects. This code handles
2661 aligned SI and DI integers specially, since function references must
2662 be preceded by P%. */
2663
2664 static bool
2665 pa_assemble_integer (x, size, aligned_p)
2666 rtx x;
2667 unsigned int size;
2668 int aligned_p;
2669 {
2670 if (size == UNITS_PER_WORD && aligned_p
2671 && function_label_operand (x, VOIDmode))
2672 {
2673 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2674 output_addr_const (asm_out_file, x);
2675 fputc ('\n', asm_out_file);
2676 return true;
2677 }
2678 return default_assemble_integer (x, size, aligned_p);
2679 }
2680 \f
2681 /* Output an ascii string. */
2682 void
2683 output_ascii (file, p, size)
2684 FILE *file;
2685 const char *p;
2686 int size;
2687 {
2688 int i;
2689 int chars_output;
2690 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2691
2692 /* The HP assembler can only take strings of 256 characters at one
2693 time. This is a limitation on input line length, *not* the
2694 length of the string. Sigh. Even worse, it seems that the
2695 restriction is in number of input characters (see \xnn &
2696 \whatever). So we have to do this very carefully. */
2697
2698 fputs ("\t.STRING \"", file);
2699
2700 chars_output = 0;
2701 for (i = 0; i < size; i += 4)
2702 {
2703 int co = 0;
2704 int io = 0;
2705 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2706 {
2707 register unsigned int c = (unsigned char) p[i + io];
2708
2709 if (c == '\"' || c == '\\')
2710 partial_output[co++] = '\\';
2711 if (c >= ' ' && c < 0177)
2712 partial_output[co++] = c;
2713 else
2714 {
2715 unsigned int hexd;
2716 partial_output[co++] = '\\';
2717 partial_output[co++] = 'x';
2718 hexd = c / 16 - 0 + '0';
2719 if (hexd > '9')
2720 hexd -= '9' - 'a' + 1;
2721 partial_output[co++] = hexd;
2722 hexd = c % 16 - 0 + '0';
2723 if (hexd > '9')
2724 hexd -= '9' - 'a' + 1;
2725 partial_output[co++] = hexd;
2726 }
2727 }
2728 if (chars_output + co > 243)
2729 {
2730 fputs ("\"\n\t.STRING \"", file);
2731 chars_output = 0;
2732 }
2733 fwrite (partial_output, 1, (size_t) co, file);
2734 chars_output += co;
2735 co = 0;
2736 }
2737 fputs ("\"\n", file);
2738 }
2739
2740 /* Try to rewrite floating point comparisons & branches to avoid
2741 useless add,tr insns.
2742
2743 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2744 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2745 first attempt to remove useless add,tr insns. It is zero
2746 for the second pass as reorg sometimes leaves bogus REG_DEAD
2747 notes lying around.
2748
2749 When CHECK_NOTES is zero we can only eliminate add,tr insns
2750 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2751 instructions. */
2752 static void
2753 remove_useless_addtr_insns (insns, check_notes)
2754 rtx insns;
2755 int check_notes;
2756 {
2757 rtx insn;
2758 static int pass = 0;
2759
2760 /* This is fairly cheap, so always run it when optimizing. */
2761 if (optimize > 0)
2762 {
2763 int fcmp_count = 0;
2764 int fbranch_count = 0;
2765
2766 /* Walk all the insns in this function looking for fcmp & fbranch
2767 instructions. Keep track of how many of each we find. */
2768 insns = get_insns ();
2769 for (insn = insns; insn; insn = next_insn (insn))
2770 {
2771 rtx tmp;
2772
2773 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2774 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2775 continue;
2776
2777 tmp = PATTERN (insn);
2778
2779 /* It must be a set. */
2780 if (GET_CODE (tmp) != SET)
2781 continue;
2782
2783 /* If the destination is CCFP, then we've found an fcmp insn. */
2784 tmp = SET_DEST (tmp);
2785 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2786 {
2787 fcmp_count++;
2788 continue;
2789 }
2790
2791 tmp = PATTERN (insn);
2792 /* If this is an fbranch instruction, bump the fbranch counter. */
2793 if (GET_CODE (tmp) == SET
2794 && SET_DEST (tmp) == pc_rtx
2795 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2796 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2797 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2798 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2799 {
2800 fbranch_count++;
2801 continue;
2802 }
2803 }
2804
2805
2806 /* Find all floating point compare + branch insns. If possible,
2807 reverse the comparison & the branch to avoid add,tr insns. */
2808 for (insn = insns; insn; insn = next_insn (insn))
2809 {
2810 rtx tmp, next;
2811
2812 /* Ignore anything that isn't an INSN. */
2813 if (GET_CODE (insn) != INSN)
2814 continue;
2815
2816 tmp = PATTERN (insn);
2817
2818 /* It must be a set. */
2819 if (GET_CODE (tmp) != SET)
2820 continue;
2821
2822 /* The destination must be CCFP, which is register zero. */
2823 tmp = SET_DEST (tmp);
2824 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2825 continue;
2826
2827 /* INSN should be a set of CCFP.
2828
2829 See if the result of this insn is used in a reversed FP
2830 conditional branch. If so, reverse our condition and
2831 the branch. Doing so avoids useless add,tr insns. */
2832 next = next_insn (insn);
2833 while (next)
2834 {
2835 /* Jumps, calls and labels stop our search. */
2836 if (GET_CODE (next) == JUMP_INSN
2837 || GET_CODE (next) == CALL_INSN
2838 || GET_CODE (next) == CODE_LABEL)
2839 break;
2840
2841 /* As does another fcmp insn. */
2842 if (GET_CODE (next) == INSN
2843 && GET_CODE (PATTERN (next)) == SET
2844 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2845 && REGNO (SET_DEST (PATTERN (next))) == 0)
2846 break;
2847
2848 next = next_insn (next);
2849 }
2850
2851 /* Is NEXT_INSN a branch? */
2852 if (next
2853 && GET_CODE (next) == JUMP_INSN)
2854 {
2855 rtx pattern = PATTERN (next);
2856
2857 /* If it a reversed fp conditional branch (eg uses add,tr)
2858 and CCFP dies, then reverse our conditional and the branch
2859 to avoid the add,tr. */
2860 if (GET_CODE (pattern) == SET
2861 && SET_DEST (pattern) == pc_rtx
2862 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2863 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2864 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2865 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2866 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2867 && (fcmp_count == fbranch_count
2868 || (check_notes
2869 && find_regno_note (next, REG_DEAD, 0))))
2870 {
2871 /* Reverse the branch. */
2872 tmp = XEXP (SET_SRC (pattern), 1);
2873 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2874 XEXP (SET_SRC (pattern), 2) = tmp;
2875 INSN_CODE (next) = -1;
2876
2877 /* Reverse our condition. */
2878 tmp = PATTERN (insn);
2879 PUT_CODE (XEXP (tmp, 1),
2880 (reverse_condition_maybe_unordered
2881 (GET_CODE (XEXP (tmp, 1)))));
2882 }
2883 }
2884 }
2885 }
2886
2887 pass = !pass;
2888
2889 }
2890 \f
2891 /* You may have trouble believing this, but this is the 32 bit HP-PA
2892 stack layout. Wow.
2893
2894 Offset Contents
2895
2896 Variable arguments (optional; any number may be allocated)
2897
2898 SP-(4*(N+9)) arg word N
2899 : :
2900 SP-56 arg word 5
2901 SP-52 arg word 4
2902
2903 Fixed arguments (must be allocated; may remain unused)
2904
2905 SP-48 arg word 3
2906 SP-44 arg word 2
2907 SP-40 arg word 1
2908 SP-36 arg word 0
2909
2910 Frame Marker
2911
2912 SP-32 External Data Pointer (DP)
2913 SP-28 External sr4
2914 SP-24 External/stub RP (RP')
2915 SP-20 Current RP
2916 SP-16 Static Link
2917 SP-12 Clean up
2918 SP-8 Calling Stub RP (RP'')
2919 SP-4 Previous SP
2920
2921 Top of Frame
2922
2923 SP-0 Stack Pointer (points to next available address)
2924
2925 */
2926
2927 /* This function saves registers as follows. Registers marked with ' are
2928 this function's registers (as opposed to the previous function's).
2929 If a frame_pointer isn't needed, r4 is saved as a general register;
2930 the space for the frame pointer is still allocated, though, to keep
2931 things simple.
2932
2933
2934 Top of Frame
2935
2936 SP (FP') Previous FP
2937 SP + 4 Alignment filler (sigh)
2938 SP + 8 Space for locals reserved here.
2939 .
2940 .
2941 .
2942 SP + n All call saved register used.
2943 .
2944 .
2945 .
2946 SP + o All call saved fp registers used.
2947 .
2948 .
2949 .
2950 SP + p (SP') points to next available address.
2951
2952 */
2953
2954 /* Global variables set by output_function_prologue(). */
2955 /* Size of frame. Need to know this to emit return insns from
2956 leaf procedures. */
2957 static int actual_fsize;
2958 static int local_fsize, save_fregs;
2959
2960 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2961 Handle case where DISP > 8k by using the add_high_const patterns.
2962
2963 Note in DISP > 8k case, we will leave the high part of the address
2964 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2965
2966 static void
2967 store_reg (reg, disp, base)
2968 int reg, disp, base;
2969 {
2970 rtx insn, dest, src, basereg;
2971
2972 src = gen_rtx_REG (word_mode, reg);
2973 basereg = gen_rtx_REG (Pmode, base);
2974 if (VAL_14_BITS_P (disp))
2975 {
2976 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
2977 insn = emit_move_insn (dest, src);
2978 }
2979 else
2980 {
2981 rtx delta = GEN_INT (disp);
2982 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
2983 rtx tmpreg = gen_rtx_REG (Pmode, 1);
2984 emit_move_insn (tmpreg, high);
2985 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
2986 insn = emit_move_insn (dest, src);
2987 if (DO_FRAME_NOTES)
2988 {
2989 REG_NOTES (insn)
2990 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2991 gen_rtx_SET (VOIDmode,
2992 gen_rtx_MEM (word_mode,
2993 gen_rtx_PLUS (word_mode, basereg,
2994 delta)),
2995 src),
2996 REG_NOTES (insn));
2997 }
2998 }
2999
3000 if (DO_FRAME_NOTES)
3001 RTX_FRAME_RELATED_P (insn) = 1;
3002 }
3003
3004 /* Emit RTL to store REG at the memory location specified by BASE and then
3005 add MOD to BASE. MOD must be <= 8k. */
3006
3007 static void
3008 store_reg_modify (base, reg, mod)
3009 int base, reg, mod;
3010 {
3011 rtx insn, basereg, srcreg, delta;
3012
3013 if (! VAL_14_BITS_P (mod))
3014 abort ();
3015
3016 basereg = gen_rtx_REG (Pmode, base);
3017 srcreg = gen_rtx_REG (word_mode, reg);
3018 delta = GEN_INT (mod);
3019
3020 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3021 if (DO_FRAME_NOTES)
3022 {
3023 RTX_FRAME_RELATED_P (insn) = 1;
3024
3025 /* RTX_FRAME_RELATED_P must be set on each frame related set
3026 in a parallel with more than one element. Don't set
3027 RTX_FRAME_RELATED_P in the first set if reg is temporary
3028 register 1. The effect of this operation is recorded in
3029 the initial copy. */
3030 if (reg != 1)
3031 {
3032 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3033 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3034 }
3035 else
3036 {
3037 /* The first element of a PARALLEL is always processed if it is
3038 a SET. Thus, we need an expression list for this case. */
3039 REG_NOTES (insn)
3040 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3041 gen_rtx_SET (VOIDmode, basereg,
3042 gen_rtx_PLUS (word_mode, basereg, delta)),
3043 REG_NOTES (insn));
3044 }
3045 }
3046 }
3047
3048 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3049 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3050 whether to add a frame note or not.
3051
3052 In the DISP > 8k case, we leave the high part of the address in %r1.
3053 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3054
3055 static void
3056 set_reg_plus_d (reg, base, disp, note)
3057 int reg, base, disp, note;
3058 {
3059 rtx insn;
3060
3061 if (VAL_14_BITS_P (disp))
3062 {
3063 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3064 plus_constant (gen_rtx_REG (Pmode, base), disp));
3065 }
3066 else
3067 {
3068 rtx basereg = gen_rtx_REG (Pmode, base);
3069 rtx delta = GEN_INT (disp);
3070
3071 emit_move_insn (gen_rtx_REG (Pmode, 1),
3072 gen_rtx_PLUS (Pmode, basereg,
3073 gen_rtx_HIGH (Pmode, delta)));
3074 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3075 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3076 delta));
3077 }
3078
3079 if (DO_FRAME_NOTES && note)
3080 RTX_FRAME_RELATED_P (insn) = 1;
3081 }
3082
3083 int
3084 compute_frame_size (size, fregs_live)
3085 int size;
3086 int *fregs_live;
3087 {
3088 int i, fsize;
3089
3090 /* Space for frame pointer + filler. If any frame is allocated
3091 we need to add this in because of STARTING_FRAME_OFFSET.
3092
3093 Similar code also appears in hppa_expand_prologue. Change both
3094 of them at the same time. */
3095 fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
3096
3097 /* If the current function calls __builtin_eh_return, then we need
3098 to allocate stack space for registers that will hold data for
3099 the exception handler. */
3100 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3101 {
3102 unsigned int i;
3103
3104 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3105 continue;
3106 fsize += i * UNITS_PER_WORD;
3107 }
3108
3109 /* Account for space used by the callee general register saves. */
3110 for (i = 18; i >= 3; i--)
3111 if (regs_ever_live[i])
3112 fsize += UNITS_PER_WORD;
3113
3114 /* Round the stack. */
3115 fsize = (fsize + 7) & ~7;
3116
3117 /* Account for space used by the callee floating point register saves. */
3118 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3119 if (regs_ever_live[i]
3120 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3121 {
3122 if (fregs_live)
3123 *fregs_live = 1;
3124
3125 /* We always save both halves of the FP register, so always
3126 increment the frame size by 8 bytes. */
3127 fsize += 8;
3128 }
3129
3130 /* The various ABIs include space for the outgoing parameters in the
3131 size of the current function's stack frame. */
3132 fsize += current_function_outgoing_args_size;
3133
3134 /* Allocate space for the fixed frame marker. This space must be
3135 allocated for any function that makes calls or otherwise allocates
3136 stack space. */
3137 if (!current_function_is_leaf || fsize)
3138 fsize += TARGET_64BIT ? 16 : 32;
3139
3140 return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
3141 }
3142
3143 /* Generate the assembly code for function entry. FILE is a stdio
3144 stream to output the code to. SIZE is an int: how many units of
3145 temporary storage to allocate.
3146
3147 Refer to the array `regs_ever_live' to determine which registers to
3148 save; `regs_ever_live[I]' is nonzero if register number I is ever
3149 used in the function. This function is responsible for knowing
3150 which registers should not be saved even if used. */
3151
3152 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3153 of memory. If any fpu reg is used in the function, we allocate
3154 such a block here, at the bottom of the frame, just in case it's needed.
3155
3156 If this function is a leaf procedure, then we may choose not
3157 to do a "save" insn. The decision about whether or not
3158 to do this is made in regclass.c. */
3159
3160 static void
3161 pa_output_function_prologue (file, size)
3162 FILE *file;
3163 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3164 {
3165 /* The function's label and associated .PROC must never be
3166 separated and must be output *after* any profiling declarations
3167 to avoid changing spaces/subspaces within a procedure. */
3168 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3169 fputs ("\t.PROC\n", file);
3170
3171 /* hppa_expand_prologue does the dirty work now. We just need
3172 to output the assembler directives which denote the start
3173 of a function. */
3174 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3175 if (regs_ever_live[2])
3176 fputs (",CALLS,SAVE_RP", file);
3177 else
3178 fputs (",NO_CALLS", file);
3179
3180 if (frame_pointer_needed)
3181 fputs (",SAVE_SP", file);
3182
3183 /* Pass on information about the number of callee register saves
3184 performed in the prologue.
3185
3186 The compiler is supposed to pass the highest register number
3187 saved, the assembler then has to adjust that number before
3188 entering it into the unwind descriptor (to account for any
3189 caller saved registers with lower register numbers than the
3190 first callee saved register). */
3191 if (gr_saved)
3192 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3193
3194 if (fr_saved)
3195 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3196
3197 fputs ("\n\t.ENTRY\n", file);
3198
3199 /* If we're using GAS and SOM, and not using the portable runtime model,
3200 or function sections, then we don't need to accumulate the total number
3201 of code bytes. */
3202 if ((TARGET_GAS && TARGET_SOM && ! TARGET_PORTABLE_RUNTIME)
3203 || flag_function_sections)
3204 total_code_bytes = 0;
3205 else if (INSN_ADDRESSES_SET_P ())
3206 {
3207 unsigned long old_total = total_code_bytes;
3208
3209 total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_nonnote_insn ()));
3210 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
3211
3212 /* Be prepared to handle overflows. */
3213 if (old_total > total_code_bytes)
3214 total_code_bytes = -1;
3215 }
3216 else
3217 total_code_bytes = -1;
3218
3219 remove_useless_addtr_insns (get_insns (), 0);
3220 }
3221
3222 void
3223 hppa_expand_prologue ()
3224 {
3225 int size = get_frame_size ();
3226 int merge_sp_adjust_with_store = 0;
3227 int i, offset;
3228 rtx insn, tmpreg;
3229
3230 gr_saved = 0;
3231 fr_saved = 0;
3232 save_fregs = 0;
3233
3234 /* Allocate space for frame pointer + filler. If any frame is allocated
3235 we need to add this in because of STARTING_FRAME_OFFSET.
3236
3237 Similar code also appears in compute_frame_size. Change both
3238 of them at the same time. */
3239 local_fsize = size + (size || frame_pointer_needed
3240 ? STARTING_FRAME_OFFSET : 0);
3241
3242 actual_fsize = compute_frame_size (size, &save_fregs);
3243
3244 /* Compute a few things we will use often. */
3245 tmpreg = gen_rtx_REG (word_mode, 1);
3246
3247 /* Save RP first. The calling conventions manual states RP will
3248 always be stored into the caller's frame at sp - 20 or sp - 16
3249 depending on which ABI is in use. */
3250 if (regs_ever_live[2] || current_function_calls_eh_return)
3251 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3252
3253 /* Allocate the local frame and set up the frame pointer if needed. */
3254 if (actual_fsize != 0)
3255 {
3256 if (frame_pointer_needed)
3257 {
3258 /* Copy the old frame pointer temporarily into %r1. Set up the
3259 new stack pointer, then store away the saved old frame pointer
3260 into the stack at sp and at the same time update the stack
3261 pointer by actual_fsize bytes. Two versions, first
3262 handles small (<8k) frames. The second handles large (>=8k)
3263 frames. */
3264 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3265 if (DO_FRAME_NOTES)
3266 {
3267 /* We need to record the frame pointer save here since the
3268 new frame pointer is set in the following insn. */
3269 RTX_FRAME_RELATED_P (insn) = 1;
3270 REG_NOTES (insn)
3271 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3272 gen_rtx_SET (VOIDmode,
3273 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3274 frame_pointer_rtx),
3275 REG_NOTES (insn));
3276 }
3277
3278 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3279 if (DO_FRAME_NOTES)
3280 RTX_FRAME_RELATED_P (insn) = 1;
3281
3282 if (VAL_14_BITS_P (actual_fsize))
3283 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3284 else
3285 {
3286 /* It is incorrect to store the saved frame pointer at *sp,
3287 then increment sp (writes beyond the current stack boundary).
3288
3289 So instead use stwm to store at *sp and post-increment the
3290 stack pointer as an atomic operation. Then increment sp to
3291 finish allocating the new frame. */
3292 int adjust1 = 8192 - 64;
3293 int adjust2 = actual_fsize - adjust1;
3294
3295 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3296 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3297 adjust2, 1);
3298 }
3299
3300 /* Prevent register spills from being scheduled before the
3301 stack pointer is raised. Necessary as we will be storing
3302 registers using the frame pointer as a base register, and
3303 we happen to set fp before raising sp. */
3304 emit_insn (gen_blockage ());
3305 }
3306 /* no frame pointer needed. */
3307 else
3308 {
3309 /* In some cases we can perform the first callee register save
3310 and allocating the stack frame at the same time. If so, just
3311 make a note of it and defer allocating the frame until saving
3312 the callee registers. */
3313 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3314 merge_sp_adjust_with_store = 1;
3315 /* Can not optimize. Adjust the stack frame by actual_fsize
3316 bytes. */
3317 else
3318 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3319 actual_fsize, 1);
3320 }
3321 }
3322
3323 /* Normal register save.
3324
3325 Do not save the frame pointer in the frame_pointer_needed case. It
3326 was done earlier. */
3327 if (frame_pointer_needed)
3328 {
3329 offset = local_fsize;
3330
3331 /* Saving the EH return data registers in the frame is the simplest
3332 way to get the frame unwind information emitted. We put them
3333 just before the general registers. */
3334 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3335 {
3336 unsigned int i, regno;
3337
3338 for (i = 0; ; ++i)
3339 {
3340 regno = EH_RETURN_DATA_REGNO (i);
3341 if (regno == INVALID_REGNUM)
3342 break;
3343
3344 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3345 offset += UNITS_PER_WORD;
3346 }
3347 }
3348
3349 for (i = 18; i >= 4; i--)
3350 if (regs_ever_live[i] && ! call_used_regs[i])
3351 {
3352 store_reg (i, offset, FRAME_POINTER_REGNUM);
3353 offset += UNITS_PER_WORD;
3354 gr_saved++;
3355 }
3356 /* Account for %r3 which is saved in a special place. */
3357 gr_saved++;
3358 }
3359 /* No frame pointer needed. */
3360 else
3361 {
3362 offset = local_fsize - actual_fsize;
3363
3364 /* Saving the EH return data registers in the frame is the simplest
3365 way to get the frame unwind information emitted. */
3366 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3367 {
3368 unsigned int i, regno;
3369
3370 for (i = 0; ; ++i)
3371 {
3372 regno = EH_RETURN_DATA_REGNO (i);
3373 if (regno == INVALID_REGNUM)
3374 break;
3375
3376 /* If merge_sp_adjust_with_store is nonzero, then we can
3377 optimize the first save. */
3378 if (merge_sp_adjust_with_store)
3379 {
3380 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3381 merge_sp_adjust_with_store = 0;
3382 }
3383 else
3384 store_reg (regno, offset, STACK_POINTER_REGNUM);
3385 offset += UNITS_PER_WORD;
3386 }
3387 }
3388
3389 for (i = 18; i >= 3; i--)
3390 if (regs_ever_live[i] && ! call_used_regs[i])
3391 {
3392 /* If merge_sp_adjust_with_store is nonzero, then we can
3393 optimize the first GR save. */
3394 if (merge_sp_adjust_with_store)
3395 {
3396 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3397 merge_sp_adjust_with_store = 0;
3398 }
3399 else
3400 store_reg (i, offset, STACK_POINTER_REGNUM);
3401 offset += UNITS_PER_WORD;
3402 gr_saved++;
3403 }
3404
3405 /* If we wanted to merge the SP adjustment with a GR save, but we never
3406 did any GR saves, then just emit the adjustment here. */
3407 if (merge_sp_adjust_with_store)
3408 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3409 actual_fsize, 1);
3410 }
3411
3412 /* The hppa calling conventions say that %r19, the pic offset
3413 register, is saved at sp - 32 (in this function's frame)
3414 when generating PIC code. FIXME: What is the correct thing
3415 to do for functions which make no calls and allocate no
3416 frame? Do we need to allocate a frame, or can we just omit
3417 the save? For now we'll just omit the save. */
3418 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3419 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3420
3421 /* Align pointer properly (doubleword boundary). */
3422 offset = (offset + 7) & ~7;
3423
3424 /* Floating point register store. */
3425 if (save_fregs)
3426 {
3427 rtx base;
3428
3429 /* First get the frame or stack pointer to the start of the FP register
3430 save area. */
3431 if (frame_pointer_needed)
3432 {
3433 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3434 base = frame_pointer_rtx;
3435 }
3436 else
3437 {
3438 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3439 base = stack_pointer_rtx;
3440 }
3441
3442 /* Now actually save the FP registers. */
3443 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3444 {
3445 if (regs_ever_live[i]
3446 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3447 {
3448 rtx addr, insn, reg;
3449 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3450 reg = gen_rtx_REG (DFmode, i);
3451 insn = emit_move_insn (addr, reg);
3452 if (DO_FRAME_NOTES)
3453 {
3454 RTX_FRAME_RELATED_P (insn) = 1;
3455 if (TARGET_64BIT)
3456 {
3457 rtx mem = gen_rtx_MEM (DFmode,
3458 plus_constant (base, offset));
3459 REG_NOTES (insn)
3460 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3461 gen_rtx_SET (VOIDmode, mem, reg),
3462 REG_NOTES (insn));
3463 }
3464 else
3465 {
3466 rtx meml = gen_rtx_MEM (SFmode,
3467 plus_constant (base, offset));
3468 rtx memr = gen_rtx_MEM (SFmode,
3469 plus_constant (base, offset + 4));
3470 rtx regl = gen_rtx_REG (SFmode, i);
3471 rtx regr = gen_rtx_REG (SFmode, i + 1);
3472 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3473 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3474 rtvec vec;
3475
3476 RTX_FRAME_RELATED_P (setl) = 1;
3477 RTX_FRAME_RELATED_P (setr) = 1;
3478 vec = gen_rtvec (2, setl, setr);
3479 REG_NOTES (insn)
3480 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3481 gen_rtx_SEQUENCE (VOIDmode, vec),
3482 REG_NOTES (insn));
3483 }
3484 }
3485 offset += GET_MODE_SIZE (DFmode);
3486 fr_saved++;
3487 }
3488 }
3489 }
3490
3491 /* FIXME: expand_call and expand_millicode_call need to be fixed to
3492 prevent insns with frame notes being scheduled in the delay slot
3493 of calls. This causes problems because the dwarf2 output code
3494 processes the insn list serially. For now, limit the migration
3495 of prologue insns with a blockage. */
3496 if (DO_FRAME_NOTES)
3497 emit_insn (gen_blockage ());
3498 }
3499
3500 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3501 Handle case where DISP > 8k by using the add_high_const patterns. */
3502
3503 static void
3504 load_reg (reg, disp, base)
3505 int reg, disp, base;
3506 {
3507 rtx src, dest, basereg;
3508
3509 dest = gen_rtx_REG (word_mode, reg);
3510 basereg = gen_rtx_REG (Pmode, base);
3511 if (VAL_14_BITS_P (disp))
3512 {
3513 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3514 emit_move_insn (dest, src);
3515 }
3516 else
3517 {
3518 rtx delta = GEN_INT (disp);
3519 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3520 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3521 emit_move_insn (tmpreg, high);
3522 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3523 emit_move_insn (dest, src);
3524 }
3525 }
3526
3527 /* This function generates the assembly code for function exit.
3528 Args are as for output_function_prologue ().
3529
3530 The function epilogue should not depend on the current stack
3531 pointer! It should use the frame pointer only. This is mandatory
3532 because of alloca; we also take advantage of it to omit stack
3533 adjustments before returning. */
3534
3535 static void
3536 pa_output_function_epilogue (file, size)
3537 FILE *file;
3538 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3539 {
3540 rtx insn = get_last_insn ();
3541
3542 /* hppa_expand_epilogue does the dirty work now. We just need
3543 to output the assembler directives which denote the end
3544 of a function.
3545
3546 To make debuggers happy, emit a nop if the epilogue was completely
3547 eliminated due to a volatile call as the last insn in the
3548 current function. That way the return address (in %r2) will
3549 always point to a valid instruction in the current function. */
3550
3551 /* Get the last real insn. */
3552 if (GET_CODE (insn) == NOTE)
3553 insn = prev_real_insn (insn);
3554
3555 /* If it is a sequence, then look inside. */
3556 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3557 insn = XVECEXP (PATTERN (insn), 0, 0);
3558
3559 /* If insn is a CALL_INSN, then it must be a call to a volatile
3560 function (otherwise there would be epilogue insns). */
3561 if (insn && GET_CODE (insn) == CALL_INSN)
3562 fputs ("\tnop\n", file);
3563
3564 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3565 }
3566
3567 void
3568 hppa_expand_epilogue ()
3569 {
3570 rtx tmpreg;
3571 int offset, i;
3572 int merge_sp_adjust_with_load = 0;
3573 int ret_off = 0;
3574
3575 /* We will use this often. */
3576 tmpreg = gen_rtx_REG (word_mode, 1);
3577
3578 /* Try to restore RP early to avoid load/use interlocks when
3579 RP gets used in the return (bv) instruction. This appears to still
3580 be necessary even when we schedule the prologue and epilogue. */
3581 if (regs_ever_live [2] || current_function_calls_eh_return)
3582 {
3583 ret_off = TARGET_64BIT ? -16 : -20;
3584 if (frame_pointer_needed)
3585 {
3586 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3587 ret_off = 0;
3588 }
3589 else
3590 {
3591 /* No frame pointer, and stack is smaller than 8k. */
3592 if (VAL_14_BITS_P (ret_off - actual_fsize))
3593 {
3594 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3595 ret_off = 0;
3596 }
3597 }
3598 }
3599
3600 /* General register restores. */
3601 if (frame_pointer_needed)
3602 {
3603 offset = local_fsize;
3604
3605 /* If the current function calls __builtin_eh_return, then we need
3606 to restore the saved EH data registers. */
3607 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3608 {
3609 unsigned int i, regno;
3610
3611 for (i = 0; ; ++i)
3612 {
3613 regno = EH_RETURN_DATA_REGNO (i);
3614 if (regno == INVALID_REGNUM)
3615 break;
3616
3617 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3618 offset += UNITS_PER_WORD;
3619 }
3620 }
3621
3622 for (i = 18; i >= 4; i--)
3623 if (regs_ever_live[i] && ! call_used_regs[i])
3624 {
3625 load_reg (i, offset, FRAME_POINTER_REGNUM);
3626 offset += UNITS_PER_WORD;
3627 }
3628 }
3629 else
3630 {
3631 offset = local_fsize - actual_fsize;
3632
3633 /* If the current function calls __builtin_eh_return, then we need
3634 to restore the saved EH data registers. */
3635 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3636 {
3637 unsigned int i, regno;
3638
3639 for (i = 0; ; ++i)
3640 {
3641 regno = EH_RETURN_DATA_REGNO (i);
3642 if (regno == INVALID_REGNUM)
3643 break;
3644
3645 /* Only for the first load.
3646 merge_sp_adjust_with_load holds the register load
3647 with which we will merge the sp adjustment. */
3648 if (merge_sp_adjust_with_load == 0
3649 && local_fsize == 0
3650 && VAL_14_BITS_P (-actual_fsize))
3651 merge_sp_adjust_with_load = regno;
3652 else
3653 load_reg (regno, offset, STACK_POINTER_REGNUM);
3654 offset += UNITS_PER_WORD;
3655 }
3656 }
3657
3658 for (i = 18; i >= 3; i--)
3659 {
3660 if (regs_ever_live[i] && ! call_used_regs[i])
3661 {
3662 /* Only for the first load.
3663 merge_sp_adjust_with_load holds the register load
3664 with which we will merge the sp adjustment. */
3665 if (merge_sp_adjust_with_load == 0
3666 && local_fsize == 0
3667 && VAL_14_BITS_P (-actual_fsize))
3668 merge_sp_adjust_with_load = i;
3669 else
3670 load_reg (i, offset, STACK_POINTER_REGNUM);
3671 offset += UNITS_PER_WORD;
3672 }
3673 }
3674 }
3675
3676 /* Align pointer properly (doubleword boundary). */
3677 offset = (offset + 7) & ~7;
3678
3679 /* FP register restores. */
3680 if (save_fregs)
3681 {
3682 /* Adjust the register to index off of. */
3683 if (frame_pointer_needed)
3684 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3685 else
3686 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3687
3688 /* Actually do the restores now. */
3689 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3690 if (regs_ever_live[i]
3691 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3692 {
3693 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3694 rtx dest = gen_rtx_REG (DFmode, i);
3695 emit_move_insn (dest, src);
3696 }
3697 }
3698
3699 /* Emit a blockage insn here to keep these insns from being moved to
3700 an earlier spot in the epilogue, or into the main instruction stream.
3701
3702 This is necessary as we must not cut the stack back before all the
3703 restores are finished. */
3704 emit_insn (gen_blockage ());
3705
3706 /* Reset stack pointer (and possibly frame pointer). The stack
3707 pointer is initially set to fp + 64 to avoid a race condition. */
3708 if (frame_pointer_needed)
3709 {
3710 rtx delta = GEN_INT (-64);
3711
3712 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
3713 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
3714 }
3715 /* If we were deferring a callee register restore, do it now. */
3716 else if (merge_sp_adjust_with_load)
3717 {
3718 rtx delta = GEN_INT (-actual_fsize);
3719 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3720
3721 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
3722 }
3723 else if (actual_fsize != 0)
3724 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3725 - actual_fsize, 0);
3726
3727 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3728 frame greater than 8k), do so now. */
3729 if (ret_off != 0)
3730 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3731
3732 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3733 {
3734 rtx sa = EH_RETURN_STACKADJ_RTX;
3735
3736 emit_insn (gen_blockage ());
3737 emit_insn (TARGET_64BIT
3738 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
3739 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
3740 }
3741 }
3742
3743 rtx
3744 hppa_pic_save_rtx ()
3745 {
3746 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3747 }
3748
3749 void
3750 hppa_profile_hook (label_no)
3751 int label_no;
3752 {
3753 rtx begin_label_rtx, call_insn;
3754 char begin_label_name[16];
3755
3756 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
3757 label_no);
3758 begin_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (begin_label_name));
3759
3760 if (TARGET_64BIT)
3761 emit_move_insn (arg_pointer_rtx,
3762 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3763 GEN_INT (64)));
3764
3765 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3766
3767 #ifndef NO_PROFILE_COUNTERS
3768 {
3769 rtx count_label_rtx, addr, r24;
3770 char count_label_name[16];
3771
3772 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
3773 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
3774
3775 addr = force_reg (Pmode, count_label_rtx);
3776 r24 = gen_rtx_REG (Pmode, 24);
3777 emit_move_insn (r24, addr);
3778
3779 /* %r25 is set from within the output pattern. */
3780 call_insn =
3781 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3782 GEN_INT (TARGET_64BIT ? 24 : 12),
3783 begin_label_rtx));
3784
3785 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3786 }
3787 #else
3788 /* %r25 is set from within the output pattern. */
3789 call_insn =
3790 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3791 GEN_INT (TARGET_64BIT ? 16 : 8),
3792 begin_label_rtx));
3793 #endif
3794
3795 /* Indicate the _mcount call cannot throw, nor will it execute a
3796 non-local goto. */
3797 REG_NOTES (call_insn)
3798 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3799
3800 if (flag_pic)
3801 {
3802 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3803 if (TARGET_64BIT)
3804 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3805
3806 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
3807 }
3808 }
3809
3810 /* Fetch the return address for the frame COUNT steps up from
3811 the current frame, after the prologue. FRAMEADDR is the
3812 frame pointer of the COUNT frame.
3813
3814 We want to ignore any export stub remnants here. To handle this,
3815 we examine the code at the return address, and if it is an export
3816 stub, we return a memory rtx for the stub return address stored
3817 at frame-24.
3818
3819 The value returned is used in two different ways:
3820
3821 1. To find a function's caller.
3822
3823 2. To change the return address for a function.
3824
3825 This function handles most instances of case 1; however, it will
3826 fail if there are two levels of stubs to execute on the return
3827 path. The only way I believe that can happen is if the return value
3828 needs a parameter relocation, which never happens for C code.
3829
3830 This function handles most instances of case 2; however, it will
3831 fail if we did not originally have stub code on the return path
3832 but will need stub code on the new return path. This can happen if
3833 the caller & callee are both in the main program, but the new
3834 return location is in a shared library. */
3835
3836 rtx
3837 return_addr_rtx (count, frameaddr)
3838 int count;
3839 rtx frameaddr;
3840 {
3841 rtx label;
3842 rtx rp;
3843 rtx saved_rp;
3844 rtx ins;
3845
3846 if (count != 0)
3847 return NULL_RTX;
3848
3849 rp = get_hard_reg_initial_val (Pmode, 2);
3850
3851 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
3852 return rp;
3853
3854 saved_rp = gen_reg_rtx (Pmode);
3855 emit_move_insn (saved_rp, rp);
3856
3857 /* Get pointer to the instruction stream. We have to mask out the
3858 privilege level from the two low order bits of the return address
3859 pointer here so that ins will point to the start of the first
3860 instruction that would have been executed if we returned. */
3861 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
3862 label = gen_label_rtx ();
3863
3864 /* Check the instruction stream at the normal return address for the
3865 export stub:
3866
3867 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3868 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3869 0x00011820 | stub+16: mtsp r1,sr0
3870 0xe0400002 | stub+20: be,n 0(sr0,rp)
3871
3872 If it is an export stub, than our return address is really in
3873 -24[frameaddr]. */
3874
3875 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
3876 NULL_RTX, SImode, 1);
3877 emit_jump_insn (gen_bne (label));
3878
3879 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3880 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
3881 emit_jump_insn (gen_bne (label));
3882
3883 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3884 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
3885 emit_jump_insn (gen_bne (label));
3886
3887 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3888 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
3889
3890 /* If there is no export stub then just use the value saved from
3891 the return pointer register. */
3892
3893 emit_jump_insn (gen_bne (label));
3894
3895 /* Here we know that our return address points to an export
3896 stub. We don't want to return the address of the export stub,
3897 but rather the return address of the export stub. That return
3898 address is stored at -24[frameaddr]. */
3899
3900 emit_move_insn (saved_rp,
3901 gen_rtx_MEM (Pmode,
3902 memory_address (Pmode,
3903 plus_constant (frameaddr,
3904 -24))));
3905
3906 emit_label (label);
3907 return saved_rp;
3908 }
3909
3910 /* This is only valid once reload has completed because it depends on
3911 knowing exactly how much (if any) frame there is and...
3912
3913 It's only valid if there is no frame marker to de-allocate and...
3914
3915 It's only valid if %r2 hasn't been saved into the caller's frame
3916 (we're not profiling and %r2 isn't live anywhere). */
3917 int
3918 hppa_can_use_return_insn_p ()
3919 {
3920 return (reload_completed
3921 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3922 && ! regs_ever_live[2]
3923 && ! frame_pointer_needed);
3924 }
3925
3926 void
3927 emit_bcond_fp (code, operand0)
3928 enum rtx_code code;
3929 rtx operand0;
3930 {
3931 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3932 gen_rtx_IF_THEN_ELSE (VOIDmode,
3933 gen_rtx_fmt_ee (code,
3934 VOIDmode,
3935 gen_rtx_REG (CCFPmode, 0),
3936 const0_rtx),
3937 gen_rtx_LABEL_REF (VOIDmode, operand0),
3938 pc_rtx)));
3939
3940 }
3941
3942 rtx
3943 gen_cmp_fp (code, operand0, operand1)
3944 enum rtx_code code;
3945 rtx operand0, operand1;
3946 {
3947 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3948 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
3949 }
3950
3951 /* Adjust the cost of a scheduling dependency. Return the new cost of
3952 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3953
3954 static int
3955 pa_adjust_cost (insn, link, dep_insn, cost)
3956 rtx insn;
3957 rtx link;
3958 rtx dep_insn;
3959 int cost;
3960 {
3961 enum attr_type attr_type;
3962
3963 /* Don't adjust costs for a pa8000 chip, also do not adjust any
3964 true dependencies as they are described with bypasses now. */
3965 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
3966 return cost;
3967
3968 if (! recog_memoized (insn))
3969 return 0;
3970
3971 attr_type = get_attr_type (insn);
3972
3973 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3974 {
3975 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3976 cycles later. */
3977
3978 if (attr_type == TYPE_FPLOAD)
3979 {
3980 rtx pat = PATTERN (insn);
3981 rtx dep_pat = PATTERN (dep_insn);
3982 if (GET_CODE (pat) == PARALLEL)
3983 {
3984 /* This happens for the fldXs,mb patterns. */
3985 pat = XVECEXP (pat, 0, 0);
3986 }
3987 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3988 /* If this happens, we have to extend this to schedule
3989 optimally. Return 0 for now. */
3990 return 0;
3991
3992 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3993 {
3994 if (! recog_memoized (dep_insn))
3995 return 0;
3996 switch (get_attr_type (dep_insn))
3997 {
3998 case TYPE_FPALU:
3999 case TYPE_FPMULSGL:
4000 case TYPE_FPMULDBL:
4001 case TYPE_FPDIVSGL:
4002 case TYPE_FPDIVDBL:
4003 case TYPE_FPSQRTSGL:
4004 case TYPE_FPSQRTDBL:
4005 /* A fpload can't be issued until one cycle before a
4006 preceding arithmetic operation has finished if
4007 the target of the fpload is any of the sources
4008 (or destination) of the arithmetic operation. */
4009 return insn_default_latency (dep_insn) - 1;
4010
4011 default:
4012 return 0;
4013 }
4014 }
4015 }
4016 else if (attr_type == TYPE_FPALU)
4017 {
4018 rtx pat = PATTERN (insn);
4019 rtx dep_pat = PATTERN (dep_insn);
4020 if (GET_CODE (pat) == PARALLEL)
4021 {
4022 /* This happens for the fldXs,mb patterns. */
4023 pat = XVECEXP (pat, 0, 0);
4024 }
4025 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4026 /* If this happens, we have to extend this to schedule
4027 optimally. Return 0 for now. */
4028 return 0;
4029
4030 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4031 {
4032 if (! recog_memoized (dep_insn))
4033 return 0;
4034 switch (get_attr_type (dep_insn))
4035 {
4036 case TYPE_FPDIVSGL:
4037 case TYPE_FPDIVDBL:
4038 case TYPE_FPSQRTSGL:
4039 case TYPE_FPSQRTDBL:
4040 /* An ALU flop can't be issued until two cycles before a
4041 preceding divide or sqrt operation has finished if
4042 the target of the ALU flop is any of the sources
4043 (or destination) of the divide or sqrt operation. */
4044 return insn_default_latency (dep_insn) - 2;
4045
4046 default:
4047 return 0;
4048 }
4049 }
4050 }
4051
4052 /* For other anti dependencies, the cost is 0. */
4053 return 0;
4054 }
4055 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4056 {
4057 /* Output dependency; DEP_INSN writes a register that INSN writes some
4058 cycles later. */
4059 if (attr_type == TYPE_FPLOAD)
4060 {
4061 rtx pat = PATTERN (insn);
4062 rtx dep_pat = PATTERN (dep_insn);
4063 if (GET_CODE (pat) == PARALLEL)
4064 {
4065 /* This happens for the fldXs,mb patterns. */
4066 pat = XVECEXP (pat, 0, 0);
4067 }
4068 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4069 /* If this happens, we have to extend this to schedule
4070 optimally. Return 0 for now. */
4071 return 0;
4072
4073 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4074 {
4075 if (! recog_memoized (dep_insn))
4076 return 0;
4077 switch (get_attr_type (dep_insn))
4078 {
4079 case TYPE_FPALU:
4080 case TYPE_FPMULSGL:
4081 case TYPE_FPMULDBL:
4082 case TYPE_FPDIVSGL:
4083 case TYPE_FPDIVDBL:
4084 case TYPE_FPSQRTSGL:
4085 case TYPE_FPSQRTDBL:
4086 /* A fpload can't be issued until one cycle before a
4087 preceding arithmetic operation has finished if
4088 the target of the fpload is the destination of the
4089 arithmetic operation.
4090
4091 Exception: For PA7100LC, PA7200 and PA7300, the cost
4092 is 3 cycles, unless they bundle together. We also
4093 pay the penalty if the second insn is a fpload. */
4094 return insn_default_latency (dep_insn) - 1;
4095
4096 default:
4097 return 0;
4098 }
4099 }
4100 }
4101 else if (attr_type == TYPE_FPALU)
4102 {
4103 rtx pat = PATTERN (insn);
4104 rtx dep_pat = PATTERN (dep_insn);
4105 if (GET_CODE (pat) == PARALLEL)
4106 {
4107 /* This happens for the fldXs,mb patterns. */
4108 pat = XVECEXP (pat, 0, 0);
4109 }
4110 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4111 /* If this happens, we have to extend this to schedule
4112 optimally. Return 0 for now. */
4113 return 0;
4114
4115 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4116 {
4117 if (! recog_memoized (dep_insn))
4118 return 0;
4119 switch (get_attr_type (dep_insn))
4120 {
4121 case TYPE_FPDIVSGL:
4122 case TYPE_FPDIVDBL:
4123 case TYPE_FPSQRTSGL:
4124 case TYPE_FPSQRTDBL:
4125 /* An ALU flop can't be issued until two cycles before a
4126 preceding divide or sqrt operation has finished if
4127 the target of the ALU flop is also the target of
4128 the divide or sqrt operation. */
4129 return insn_default_latency (dep_insn) - 2;
4130
4131 default:
4132 return 0;
4133 }
4134 }
4135 }
4136
4137 /* For other output dependencies, the cost is 0. */
4138 return 0;
4139 }
4140 else
4141 abort ();
4142 }
4143
4144 /* Adjust scheduling priorities. We use this to try and keep addil
4145 and the next use of %r1 close together. */
4146 static int
4147 pa_adjust_priority (insn, priority)
4148 rtx insn;
4149 int priority;
4150 {
4151 rtx set = single_set (insn);
4152 rtx src, dest;
4153 if (set)
4154 {
4155 src = SET_SRC (set);
4156 dest = SET_DEST (set);
4157 if (GET_CODE (src) == LO_SUM
4158 && symbolic_operand (XEXP (src, 1), VOIDmode)
4159 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4160 priority >>= 3;
4161
4162 else if (GET_CODE (src) == MEM
4163 && GET_CODE (XEXP (src, 0)) == LO_SUM
4164 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4165 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4166 priority >>= 1;
4167
4168 else if (GET_CODE (dest) == MEM
4169 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4170 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4171 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4172 priority >>= 3;
4173 }
4174 return priority;
4175 }
4176
4177 /* The 700 can only issue a single insn at a time.
4178 The 7XXX processors can issue two insns at a time.
4179 The 8000 can issue 4 insns at a time. */
4180 static int
4181 pa_issue_rate ()
4182 {
4183 switch (pa_cpu)
4184 {
4185 case PROCESSOR_700: return 1;
4186 case PROCESSOR_7100: return 2;
4187 case PROCESSOR_7100LC: return 2;
4188 case PROCESSOR_7200: return 2;
4189 case PROCESSOR_7300: return 2;
4190 case PROCESSOR_8000: return 4;
4191
4192 default:
4193 abort ();
4194 }
4195 }
4196
4197
4198
4199 /* Return any length adjustment needed by INSN which already has its length
4200 computed as LENGTH. Return zero if no adjustment is necessary.
4201
4202 For the PA: function calls, millicode calls, and backwards short
4203 conditional branches with unfilled delay slots need an adjustment by +1
4204 (to account for the NOP which will be inserted into the instruction stream).
4205
4206 Also compute the length of an inline block move here as it is too
4207 complicated to express as a length attribute in pa.md. */
4208 int
4209 pa_adjust_insn_length (insn, length)
4210 rtx insn;
4211 int length;
4212 {
4213 rtx pat = PATTERN (insn);
4214
4215 /* Call insns which are *not* indirect and have unfilled delay slots. */
4216 if (GET_CODE (insn) == CALL_INSN)
4217 {
4218
4219 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
4220 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
4221 return 4;
4222 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
4223 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
4224 == SYMBOL_REF)
4225 return 4;
4226 else
4227 return 0;
4228 }
4229 /* Jumps inside switch tables which have unfilled delay slots
4230 also need adjustment. */
4231 else if (GET_CODE (insn) == JUMP_INSN
4232 && simplejump_p (insn)
4233 && GET_MODE (insn) == SImode)
4234 return 4;
4235 /* Millicode insn with an unfilled delay slot. */
4236 else if (GET_CODE (insn) == INSN
4237 && GET_CODE (pat) != SEQUENCE
4238 && GET_CODE (pat) != USE
4239 && GET_CODE (pat) != CLOBBER
4240 && get_attr_type (insn) == TYPE_MILLI)
4241 return 4;
4242 /* Block move pattern. */
4243 else if (GET_CODE (insn) == INSN
4244 && GET_CODE (pat) == PARALLEL
4245 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4246 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4247 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4248 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4249 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4250 return compute_movstrsi_length (insn) - 4;
4251 /* Conditional branch with an unfilled delay slot. */
4252 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4253 {
4254 /* Adjust a short backwards conditional with an unfilled delay slot. */
4255 if (GET_CODE (pat) == SET
4256 && length == 4
4257 && ! forward_branch_p (insn))
4258 return 4;
4259 else if (GET_CODE (pat) == PARALLEL
4260 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4261 && length == 4)
4262 return 4;
4263 /* Adjust dbra insn with short backwards conditional branch with
4264 unfilled delay slot -- only for case where counter is in a
4265 general register register. */
4266 else if (GET_CODE (pat) == PARALLEL
4267 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4268 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4269 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4270 && length == 4
4271 && ! forward_branch_p (insn))
4272 return 4;
4273 else
4274 return 0;
4275 }
4276 return 0;
4277 }
4278
4279 /* Print operand X (an rtx) in assembler syntax to file FILE.
4280 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4281 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4282
4283 void
4284 print_operand (file, x, code)
4285 FILE *file;
4286 rtx x;
4287 int code;
4288 {
4289 switch (code)
4290 {
4291 case '#':
4292 /* Output a 'nop' if there's nothing for the delay slot. */
4293 if (dbr_sequence_length () == 0)
4294 fputs ("\n\tnop", file);
4295 return;
4296 case '*':
4297 /* Output an nullification completer if there's nothing for the */
4298 /* delay slot or nullification is requested. */
4299 if (dbr_sequence_length () == 0 ||
4300 (final_sequence &&
4301 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4302 fputs (",n", file);
4303 return;
4304 case 'R':
4305 /* Print out the second register name of a register pair.
4306 I.e., R (6) => 7. */
4307 fputs (reg_names[REGNO (x) + 1], file);
4308 return;
4309 case 'r':
4310 /* A register or zero. */
4311 if (x == const0_rtx
4312 || (x == CONST0_RTX (DFmode))
4313 || (x == CONST0_RTX (SFmode)))
4314 {
4315 fputs ("%r0", file);
4316 return;
4317 }
4318 else
4319 break;
4320 case 'f':
4321 /* A register or zero (floating point). */
4322 if (x == const0_rtx
4323 || (x == CONST0_RTX (DFmode))
4324 || (x == CONST0_RTX (SFmode)))
4325 {
4326 fputs ("%fr0", file);
4327 return;
4328 }
4329 else
4330 break;
4331 case 'A':
4332 {
4333 rtx xoperands[2];
4334
4335 xoperands[0] = XEXP (XEXP (x, 0), 0);
4336 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4337 output_global_address (file, xoperands[1], 0);
4338 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4339 return;
4340 }
4341
4342 case 'C': /* Plain (C)ondition */
4343 case 'X':
4344 switch (GET_CODE (x))
4345 {
4346 case EQ:
4347 fputs ("=", file); break;
4348 case NE:
4349 fputs ("<>", file); break;
4350 case GT:
4351 fputs (">", file); break;
4352 case GE:
4353 fputs (">=", file); break;
4354 case GEU:
4355 fputs (">>=", file); break;
4356 case GTU:
4357 fputs (">>", file); break;
4358 case LT:
4359 fputs ("<", file); break;
4360 case LE:
4361 fputs ("<=", file); break;
4362 case LEU:
4363 fputs ("<<=", file); break;
4364 case LTU:
4365 fputs ("<<", file); break;
4366 default:
4367 abort ();
4368 }
4369 return;
4370 case 'N': /* Condition, (N)egated */
4371 switch (GET_CODE (x))
4372 {
4373 case EQ:
4374 fputs ("<>", file); break;
4375 case NE:
4376 fputs ("=", file); break;
4377 case GT:
4378 fputs ("<=", file); break;
4379 case GE:
4380 fputs ("<", file); break;
4381 case GEU:
4382 fputs ("<<", file); break;
4383 case GTU:
4384 fputs ("<<=", file); break;
4385 case LT:
4386 fputs (">=", file); break;
4387 case LE:
4388 fputs (">", file); break;
4389 case LEU:
4390 fputs (">>", file); break;
4391 case LTU:
4392 fputs (">>=", file); break;
4393 default:
4394 abort ();
4395 }
4396 return;
4397 /* For floating point comparisons. Note that the output
4398 predicates are the complement of the desired mode. */
4399 case 'Y':
4400 switch (GET_CODE (x))
4401 {
4402 case EQ:
4403 fputs ("!=", file); break;
4404 case NE:
4405 fputs ("=", file); break;
4406 case GT:
4407 fputs ("!>", file); break;
4408 case GE:
4409 fputs ("!>=", file); break;
4410 case LT:
4411 fputs ("!<", file); break;
4412 case LE:
4413 fputs ("!<=", file); break;
4414 case LTGT:
4415 fputs ("!<>", file); break;
4416 case UNLE:
4417 fputs (">", file); break;
4418 case UNLT:
4419 fputs (">=", file); break;
4420 case UNGE:
4421 fputs ("<", file); break;
4422 case UNGT:
4423 fputs ("<=", file); break;
4424 case UNEQ:
4425 fputs ("<>", file); break;
4426 case UNORDERED:
4427 fputs ("<=>", file); break;
4428 case ORDERED:
4429 fputs ("!<=>", file); break;
4430 default:
4431 abort ();
4432 }
4433 return;
4434 case 'S': /* Condition, operands are (S)wapped. */
4435 switch (GET_CODE (x))
4436 {
4437 case EQ:
4438 fputs ("=", file); break;
4439 case NE:
4440 fputs ("<>", file); break;
4441 case GT:
4442 fputs ("<", file); break;
4443 case GE:
4444 fputs ("<=", file); break;
4445 case GEU:
4446 fputs ("<<=", file); break;
4447 case GTU:
4448 fputs ("<<", file); break;
4449 case LT:
4450 fputs (">", file); break;
4451 case LE:
4452 fputs (">=", file); break;
4453 case LEU:
4454 fputs (">>=", file); break;
4455 case LTU:
4456 fputs (">>", file); break;
4457 default:
4458 abort ();
4459 }
4460 return;
4461 case 'B': /* Condition, (B)oth swapped and negate. */
4462 switch (GET_CODE (x))
4463 {
4464 case EQ:
4465 fputs ("<>", file); break;
4466 case NE:
4467 fputs ("=", file); break;
4468 case GT:
4469 fputs (">=", file); break;
4470 case GE:
4471 fputs (">", file); break;
4472 case GEU:
4473 fputs (">>", file); break;
4474 case GTU:
4475 fputs (">>=", file); break;
4476 case LT:
4477 fputs ("<=", file); break;
4478 case LE:
4479 fputs ("<", file); break;
4480 case LEU:
4481 fputs ("<<", file); break;
4482 case LTU:
4483 fputs ("<<=", file); break;
4484 default:
4485 abort ();
4486 }
4487 return;
4488 case 'k':
4489 if (GET_CODE (x) == CONST_INT)
4490 {
4491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4492 return;
4493 }
4494 abort ();
4495 case 'Q':
4496 if (GET_CODE (x) == CONST_INT)
4497 {
4498 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4499 return;
4500 }
4501 abort ();
4502 case 'L':
4503 if (GET_CODE (x) == CONST_INT)
4504 {
4505 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4506 return;
4507 }
4508 abort ();
4509 case 'O':
4510 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4511 {
4512 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4513 return;
4514 }
4515 abort ();
4516 case 'p':
4517 if (GET_CODE (x) == CONST_INT)
4518 {
4519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4520 return;
4521 }
4522 abort ();
4523 case 'P':
4524 if (GET_CODE (x) == CONST_INT)
4525 {
4526 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4527 return;
4528 }
4529 abort ();
4530 case 'I':
4531 if (GET_CODE (x) == CONST_INT)
4532 fputs ("i", file);
4533 return;
4534 case 'M':
4535 case 'F':
4536 switch (GET_CODE (XEXP (x, 0)))
4537 {
4538 case PRE_DEC:
4539 case PRE_INC:
4540 if (ASSEMBLER_DIALECT == 0)
4541 fputs ("s,mb", file);
4542 else
4543 fputs (",mb", file);
4544 break;
4545 case POST_DEC:
4546 case POST_INC:
4547 if (ASSEMBLER_DIALECT == 0)
4548 fputs ("s,ma", file);
4549 else
4550 fputs (",ma", file);
4551 break;
4552 case PLUS:
4553 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4554 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4555 {
4556 if (ASSEMBLER_DIALECT == 0)
4557 fputs ("x,s", file);
4558 else
4559 fputs (",s", file);
4560 }
4561 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4562 fputs ("s", file);
4563 break;
4564 default:
4565 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4566 fputs ("s", file);
4567 break;
4568 }
4569 return;
4570 case 'G':
4571 output_global_address (file, x, 0);
4572 return;
4573 case 'H':
4574 output_global_address (file, x, 1);
4575 return;
4576 case 0: /* Don't do anything special */
4577 break;
4578 case 'Z':
4579 {
4580 unsigned op[3];
4581 compute_zdepwi_operands (INTVAL (x), op);
4582 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4583 return;
4584 }
4585 case 'z':
4586 {
4587 unsigned op[3];
4588 compute_zdepdi_operands (INTVAL (x), op);
4589 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4590 return;
4591 }
4592 case 'c':
4593 /* We can get here from a .vtable_inherit due to our
4594 CONSTANT_ADDRESS_P rejecting perfectly good constant
4595 addresses. */
4596 break;
4597 default:
4598 abort ();
4599 }
4600 if (GET_CODE (x) == REG)
4601 {
4602 fputs (reg_names [REGNO (x)], file);
4603 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4604 {
4605 fputs ("R", file);
4606 return;
4607 }
4608 if (FP_REG_P (x)
4609 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4610 && (REGNO (x) & 1) == 0)
4611 fputs ("L", file);
4612 }
4613 else if (GET_CODE (x) == MEM)
4614 {
4615 int size = GET_MODE_SIZE (GET_MODE (x));
4616 rtx base = NULL_RTX;
4617 switch (GET_CODE (XEXP (x, 0)))
4618 {
4619 case PRE_DEC:
4620 case POST_DEC:
4621 base = XEXP (XEXP (x, 0), 0);
4622 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4623 break;
4624 case PRE_INC:
4625 case POST_INC:
4626 base = XEXP (XEXP (x, 0), 0);
4627 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4628 break;
4629 default:
4630 if (GET_CODE (XEXP (x, 0)) == PLUS
4631 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4632 fprintf (file, "%s(%s)",
4633 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4634 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4635 else if (GET_CODE (XEXP (x, 0)) == PLUS
4636 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4637 fprintf (file, "%s(%s)",
4638 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4639 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4640 else
4641 output_address (XEXP (x, 0));
4642 break;
4643 }
4644 }
4645 else
4646 output_addr_const (file, x);
4647 }
4648
4649 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4650
4651 void
4652 output_global_address (file, x, round_constant)
4653 FILE *file;
4654 rtx x;
4655 int round_constant;
4656 {
4657
4658 /* Imagine (high (const (plus ...))). */
4659 if (GET_CODE (x) == HIGH)
4660 x = XEXP (x, 0);
4661
4662 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4663 assemble_name (file, XSTR (x, 0));
4664 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4665 {
4666 assemble_name (file, XSTR (x, 0));
4667 fputs ("-$global$", file);
4668 }
4669 else if (GET_CODE (x) == CONST)
4670 {
4671 const char *sep = "";
4672 int offset = 0; /* assembler wants -$global$ at end */
4673 rtx base = NULL_RTX;
4674
4675 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4676 {
4677 base = XEXP (XEXP (x, 0), 0);
4678 output_addr_const (file, base);
4679 }
4680 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4681 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4682 else abort ();
4683
4684 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4685 {
4686 base = XEXP (XEXP (x, 0), 1);
4687 output_addr_const (file, base);
4688 }
4689 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4690 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4691 else abort ();
4692
4693 /* How bogus. The compiler is apparently responsible for
4694 rounding the constant if it uses an LR field selector.
4695
4696 The linker and/or assembler seem a better place since
4697 they have to do this kind of thing already.
4698
4699 If we fail to do this, HP's optimizing linker may eliminate
4700 an addil, but not update the ldw/stw/ldo instruction that
4701 uses the result of the addil. */
4702 if (round_constant)
4703 offset = ((offset + 0x1000) & ~0x1fff);
4704
4705 if (GET_CODE (XEXP (x, 0)) == PLUS)
4706 {
4707 if (offset < 0)
4708 {
4709 offset = -offset;
4710 sep = "-";
4711 }
4712 else
4713 sep = "+";
4714 }
4715 else if (GET_CODE (XEXP (x, 0)) == MINUS
4716 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4717 sep = "-";
4718 else abort ();
4719
4720 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4721 fputs ("-$global$", file);
4722 if (offset)
4723 fprintf (file, "%s%d", sep, offset);
4724 }
4725 else
4726 output_addr_const (file, x);
4727 }
4728
4729 static struct deferred_plabel *
4730 get_plabel (fname)
4731 const char *fname;
4732 {
4733 size_t i;
4734
4735 /* See if we have already put this function on the list of deferred
4736 plabels. This list is generally small, so a liner search is not
4737 too ugly. If it proves too slow replace it with something faster. */
4738 for (i = 0; i < n_deferred_plabels; i++)
4739 if (strcmp (fname, deferred_plabels[i].name) == 0)
4740 break;
4741
4742 /* If the deferred plabel list is empty, or this entry was not found
4743 on the list, create a new entry on the list. */
4744 if (deferred_plabels == NULL || i == n_deferred_plabels)
4745 {
4746 const char *real_name;
4747
4748 if (deferred_plabels == 0)
4749 deferred_plabels = (struct deferred_plabel *)
4750 ggc_alloc (sizeof (struct deferred_plabel));
4751 else
4752 deferred_plabels = (struct deferred_plabel *)
4753 ggc_realloc (deferred_plabels,
4754 ((n_deferred_plabels + 1)
4755 * sizeof (struct deferred_plabel)));
4756
4757 i = n_deferred_plabels++;
4758 deferred_plabels[i].internal_label = gen_label_rtx ();
4759 deferred_plabels[i].name = ggc_strdup (fname);
4760
4761 /* Gross. We have just implicitly taken the address of this function,
4762 mark it as such. */
4763 real_name = (*targetm.strip_name_encoding) (fname);
4764 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
4765 }
4766
4767 return &deferred_plabels[i];
4768 }
4769
4770 void
4771 output_deferred_plabels (file)
4772 FILE *file;
4773 {
4774 size_t i;
4775 /* If we have deferred plabels, then we need to switch into the data
4776 section and align it to a 4 byte boundary before we output the
4777 deferred plabels. */
4778 if (n_deferred_plabels)
4779 {
4780 data_section ();
4781 ASM_OUTPUT_ALIGN (file, TARGET_64BIT ? 3 : 2);
4782 }
4783
4784 /* Now output the deferred plabels. */
4785 for (i = 0; i < n_deferred_plabels; i++)
4786 {
4787 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4788 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
4789 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
4790 }
4791 }
4792
4793 /* HP's millicode routines mean something special to the assembler.
4794 Keep track of which ones we have used. */
4795
4796 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
4797 static void import_milli PARAMS ((enum millicodes));
4798 static char imported[(int) end1000];
4799 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
4800 static const char import_string[] = ".IMPORT $$....,MILLICODE";
4801 #define MILLI_START 10
4802
4803 static void
4804 import_milli (code)
4805 enum millicodes code;
4806 {
4807 char str[sizeof (import_string)];
4808
4809 if (!imported[(int) code])
4810 {
4811 imported[(int) code] = 1;
4812 strcpy (str, import_string);
4813 strncpy (str + MILLI_START, milli_names[(int) code], 4);
4814 output_asm_insn (str, 0);
4815 }
4816 }
4817
4818 /* The register constraints have put the operands and return value in
4819 the proper registers. */
4820
4821 const char *
4822 output_mul_insn (unsignedp, insn)
4823 int unsignedp ATTRIBUTE_UNUSED;
4824 rtx insn;
4825 {
4826 import_milli (mulI);
4827 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4828 }
4829
4830 /* Emit the rtl for doing a division by a constant. */
4831
4832 /* Do magic division millicodes exist for this value? */
4833 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4834 1, 1};
4835
4836 /* We'll use an array to keep track of the magic millicodes and
4837 whether or not we've used them already. [n][0] is signed, [n][1] is
4838 unsigned. */
4839
4840 static int div_milli[16][2];
4841
4842 int
4843 div_operand (op, mode)
4844 rtx op;
4845 enum machine_mode mode;
4846 {
4847 return (mode == SImode
4848 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4849 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4850 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4851 }
4852
4853 int
4854 emit_hpdiv_const (operands, unsignedp)
4855 rtx *operands;
4856 int unsignedp;
4857 {
4858 if (GET_CODE (operands[2]) == CONST_INT
4859 && INTVAL (operands[2]) > 0
4860 && INTVAL (operands[2]) < 16
4861 && magic_milli[INTVAL (operands[2])])
4862 {
4863 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
4864
4865 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4866 emit
4867 (gen_rtx
4868 (PARALLEL, VOIDmode,
4869 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4870 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4871 SImode,
4872 gen_rtx_REG (SImode, 26),
4873 operands[2])),
4874 gen_rtx_CLOBBER (VOIDmode, operands[4]),
4875 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4876 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4877 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4878 gen_rtx_CLOBBER (VOIDmode, ret))));
4879 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4880 return 1;
4881 }
4882 return 0;
4883 }
4884
4885 const char *
4886 output_div_insn (operands, unsignedp, insn)
4887 rtx *operands;
4888 int unsignedp;
4889 rtx insn;
4890 {
4891 int divisor;
4892
4893 /* If the divisor is a constant, try to use one of the special
4894 opcodes .*/
4895 if (GET_CODE (operands[0]) == CONST_INT)
4896 {
4897 static char buf[100];
4898 divisor = INTVAL (operands[0]);
4899 if (!div_milli[divisor][unsignedp])
4900 {
4901 div_milli[divisor][unsignedp] = 1;
4902 if (unsignedp)
4903 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4904 else
4905 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4906 }
4907 if (unsignedp)
4908 {
4909 sprintf (buf, "$$divU_");
4910 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4911 return output_millicode_call (insn,
4912 gen_rtx_SYMBOL_REF (SImode, buf));
4913 }
4914 else
4915 {
4916 sprintf (buf, "$$divI_");
4917 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4918 return output_millicode_call (insn,
4919 gen_rtx_SYMBOL_REF (SImode, buf));
4920 }
4921 }
4922 /* Divisor isn't a special constant. */
4923 else
4924 {
4925 if (unsignedp)
4926 {
4927 import_milli (divU);
4928 return output_millicode_call (insn,
4929 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4930 }
4931 else
4932 {
4933 import_milli (divI);
4934 return output_millicode_call (insn,
4935 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4936 }
4937 }
4938 }
4939
4940 /* Output a $$rem millicode to do mod. */
4941
4942 const char *
4943 output_mod_insn (unsignedp, insn)
4944 int unsignedp;
4945 rtx insn;
4946 {
4947 if (unsignedp)
4948 {
4949 import_milli (remU);
4950 return output_millicode_call (insn,
4951 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4952 }
4953 else
4954 {
4955 import_milli (remI);
4956 return output_millicode_call (insn,
4957 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4958 }
4959 }
4960
4961 void
4962 output_arg_descriptor (call_insn)
4963 rtx call_insn;
4964 {
4965 const char *arg_regs[4];
4966 enum machine_mode arg_mode;
4967 rtx link;
4968 int i, output_flag = 0;
4969 int regno;
4970
4971 /* We neither need nor want argument location descriptors for the
4972 64bit runtime environment or the ELF32 environment. */
4973 if (TARGET_64BIT || TARGET_ELF32)
4974 return;
4975
4976 for (i = 0; i < 4; i++)
4977 arg_regs[i] = 0;
4978
4979 /* Specify explicitly that no argument relocations should take place
4980 if using the portable runtime calling conventions. */
4981 if (TARGET_PORTABLE_RUNTIME)
4982 {
4983 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4984 asm_out_file);
4985 return;
4986 }
4987
4988 if (GET_CODE (call_insn) != CALL_INSN)
4989 abort ();
4990 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4991 {
4992 rtx use = XEXP (link, 0);
4993
4994 if (! (GET_CODE (use) == USE
4995 && GET_CODE (XEXP (use, 0)) == REG
4996 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4997 continue;
4998
4999 arg_mode = GET_MODE (XEXP (use, 0));
5000 regno = REGNO (XEXP (use, 0));
5001 if (regno >= 23 && regno <= 26)
5002 {
5003 arg_regs[26 - regno] = "GR";
5004 if (arg_mode == DImode)
5005 arg_regs[25 - regno] = "GR";
5006 }
5007 else if (regno >= 32 && regno <= 39)
5008 {
5009 if (arg_mode == SFmode)
5010 arg_regs[(regno - 32) / 2] = "FR";
5011 else
5012 {
5013 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5014 arg_regs[(regno - 34) / 2] = "FR";
5015 arg_regs[(regno - 34) / 2 + 1] = "FU";
5016 #else
5017 arg_regs[(regno - 34) / 2] = "FU";
5018 arg_regs[(regno - 34) / 2 + 1] = "FR";
5019 #endif
5020 }
5021 }
5022 }
5023 fputs ("\t.CALL ", asm_out_file);
5024 for (i = 0; i < 4; i++)
5025 {
5026 if (arg_regs[i])
5027 {
5028 if (output_flag++)
5029 fputc (',', asm_out_file);
5030 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5031 }
5032 }
5033 fputc ('\n', asm_out_file);
5034 }
5035 \f
5036 /* Return the class of any secondary reload register that is needed to
5037 move IN into a register in class CLASS using mode MODE.
5038
5039 Profiling has showed this routine and its descendants account for
5040 a significant amount of compile time (~7%). So it has been
5041 optimized to reduce redundant computations and eliminate useless
5042 function calls.
5043
5044 It might be worthwhile to try and make this a leaf function too. */
5045
5046 enum reg_class
5047 secondary_reload_class (class, mode, in)
5048 enum reg_class class;
5049 enum machine_mode mode;
5050 rtx in;
5051 {
5052 int regno, is_symbolic;
5053
5054 /* Trying to load a constant into a FP register during PIC code
5055 generation will require %r1 as a scratch register. */
5056 if (flag_pic
5057 && GET_MODE_CLASS (mode) == MODE_INT
5058 && FP_REG_CLASS_P (class)
5059 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5060 return R1_REGS;
5061
5062 /* Profiling showed the PA port spends about 1.3% of its compilation
5063 time in true_regnum from calls inside secondary_reload_class. */
5064
5065 if (GET_CODE (in) == REG)
5066 {
5067 regno = REGNO (in);
5068 if (regno >= FIRST_PSEUDO_REGISTER)
5069 regno = true_regnum (in);
5070 }
5071 else if (GET_CODE (in) == SUBREG)
5072 regno = true_regnum (in);
5073 else
5074 regno = -1;
5075
5076 /* If we have something like (mem (mem (...)), we can safely assume the
5077 inner MEM will end up in a general register after reloading, so there's
5078 no need for a secondary reload. */
5079 if (GET_CODE (in) == MEM
5080 && GET_CODE (XEXP (in, 0)) == MEM)
5081 return NO_REGS;
5082
5083 /* Handle out of range displacement for integer mode loads/stores of
5084 FP registers. */
5085 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5086 && GET_MODE_CLASS (mode) == MODE_INT
5087 && FP_REG_CLASS_P (class))
5088 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5089 return GENERAL_REGS;
5090
5091 /* A SAR<->FP register copy requires a secondary register (GPR) as
5092 well as secondary memory. */
5093 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5094 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5095 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5096 return GENERAL_REGS;
5097
5098 if (GET_CODE (in) == HIGH)
5099 in = XEXP (in, 0);
5100
5101 /* Profiling has showed GCC spends about 2.6% of its compilation
5102 time in symbolic_operand from calls inside secondary_reload_class.
5103
5104 We use an inline copy and only compute its return value once to avoid
5105 useless work. */
5106 switch (GET_CODE (in))
5107 {
5108 rtx tmp;
5109
5110 case SYMBOL_REF:
5111 case LABEL_REF:
5112 is_symbolic = 1;
5113 break;
5114 case CONST:
5115 tmp = XEXP (in, 0);
5116 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5117 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5118 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5119 break;
5120
5121 default:
5122 is_symbolic = 0;
5123 break;
5124 }
5125
5126 if (!flag_pic
5127 && is_symbolic
5128 && read_only_operand (in, VOIDmode))
5129 return NO_REGS;
5130
5131 if (class != R1_REGS && is_symbolic)
5132 return R1_REGS;
5133
5134 return NO_REGS;
5135 }
5136
5137 enum direction
5138 function_arg_padding (mode, type)
5139 enum machine_mode mode;
5140 tree type;
5141 {
5142 if (mode == BLKmode
5143 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5144 {
5145 /* Return none if justification is not required. */
5146 if (type
5147 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5148 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5149 return none;
5150
5151 /* The directions set here are ignored when a BLKmode argument larger
5152 than a word is placed in a register. Different code is used for
5153 the stack and registers. This makes it difficult to have a
5154 consistent data representation for both the stack and registers.
5155 For both runtimes, the justification and padding for arguments on
5156 the stack and in registers should be identical. */
5157 if (TARGET_64BIT)
5158 /* The 64-bit runtime specifies left justification for aggregates. */
5159 return upward;
5160 else
5161 /* The 32-bit runtime architecture specifies right justification.
5162 When the argument is passed on the stack, the argument is padded
5163 with garbage on the left. The HP compiler pads with zeros. */
5164 return downward;
5165 }
5166
5167 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5168 return downward;
5169 else
5170 return none;
5171 }
5172
5173 \f
5174 /* Do what is necessary for `va_start'. We look at the current function
5175 to determine if stdargs or varargs is used and fill in an initial
5176 va_list. A pointer to this constructor is returned. */
5177
5178 struct rtx_def *
5179 hppa_builtin_saveregs ()
5180 {
5181 rtx offset, dest;
5182 tree fntype = TREE_TYPE (current_function_decl);
5183 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5184 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5185 != void_type_node)))
5186 ? UNITS_PER_WORD : 0);
5187
5188 if (argadj)
5189 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5190 else
5191 offset = current_function_arg_offset_rtx;
5192
5193 if (TARGET_64BIT)
5194 {
5195 int i, off;
5196
5197 /* Adjust for varargs/stdarg differences. */
5198 if (argadj)
5199 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5200 else
5201 offset = current_function_arg_offset_rtx;
5202
5203 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5204 from the incoming arg pointer and growing to larger addresses. */
5205 for (i = 26, off = -64; i >= 19; i--, off += 8)
5206 emit_move_insn (gen_rtx_MEM (word_mode,
5207 plus_constant (arg_pointer_rtx, off)),
5208 gen_rtx_REG (word_mode, i));
5209
5210 /* The incoming args pointer points just beyond the flushback area;
5211 normally this is not a serious concern. However, when we are doing
5212 varargs/stdargs we want to make the arg pointer point to the start
5213 of the incoming argument area. */
5214 emit_move_insn (virtual_incoming_args_rtx,
5215 plus_constant (arg_pointer_rtx, -64));
5216
5217 /* Now return a pointer to the first anonymous argument. */
5218 return copy_to_reg (expand_binop (Pmode, add_optab,
5219 virtual_incoming_args_rtx,
5220 offset, 0, 0, OPTAB_LIB_WIDEN));
5221 }
5222
5223 /* Store general registers on the stack. */
5224 dest = gen_rtx_MEM (BLKmode,
5225 plus_constant (current_function_internal_arg_pointer,
5226 -16));
5227 set_mem_alias_set (dest, get_varargs_alias_set ());
5228 set_mem_align (dest, BITS_PER_WORD);
5229 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
5230
5231 /* move_block_from_reg will emit code to store the argument registers
5232 individually as scalar stores.
5233
5234 However, other insns may later load from the same addresses for
5235 a structure load (passing a struct to a varargs routine).
5236
5237 The alias code assumes that such aliasing can never happen, so we
5238 have to keep memory referencing insns from moving up beyond the
5239 last argument register store. So we emit a blockage insn here. */
5240 emit_insn (gen_blockage ());
5241
5242 return copy_to_reg (expand_binop (Pmode, add_optab,
5243 current_function_internal_arg_pointer,
5244 offset, 0, 0, OPTAB_LIB_WIDEN));
5245 }
5246
5247 void
5248 hppa_va_start (valist, nextarg)
5249 tree valist;
5250 rtx nextarg;
5251 {
5252 nextarg = expand_builtin_saveregs ();
5253 std_expand_builtin_va_start (valist, nextarg);
5254 }
5255
5256 rtx
5257 hppa_va_arg (valist, type)
5258 tree valist, type;
5259 {
5260 HOST_WIDE_INT size = int_size_in_bytes (type);
5261 HOST_WIDE_INT ofs;
5262 tree t, ptr, pptr;
5263
5264 if (TARGET_64BIT)
5265 {
5266 /* Every argument in PA64 is supposed to be passed by value
5267 (including large structs). However, as a GCC extension, we
5268 pass zero and variable sized arguments by reference. Empty
5269 structures are a GCC extension not supported by the HP
5270 compilers. Thus, passing them by reference isn't likely
5271 to conflict with the ABI. For variable sized arguments,
5272 GCC doesn't have the infrastructure to allocate these to
5273 registers. */
5274
5275 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5276
5277 if (size > UNITS_PER_WORD)
5278 {
5279 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5280 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5281 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5282 build_int_2 (-2 * UNITS_PER_WORD, -1));
5283 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5284 TREE_SIDE_EFFECTS (t) = 1;
5285 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5286 }
5287
5288 if (size > 0)
5289 return std_expand_builtin_va_arg (valist, type);
5290 else
5291 {
5292 ptr = build_pointer_type (type);
5293
5294 /* Args grow upward. */
5295 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5296 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5297 TREE_SIDE_EFFECTS (t) = 1;
5298
5299 pptr = build_pointer_type (ptr);
5300 t = build1 (NOP_EXPR, pptr, t);
5301 TREE_SIDE_EFFECTS (t) = 1;
5302
5303 t = build1 (INDIRECT_REF, ptr, t);
5304 TREE_SIDE_EFFECTS (t) = 1;
5305 }
5306 }
5307 else /* !TARGET_64BIT */
5308 {
5309 ptr = build_pointer_type (type);
5310
5311 /* "Large" and variable sized types are passed by reference. */
5312 if (size > 8 || size <= 0)
5313 {
5314 /* Args grow downward. */
5315 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5316 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5317 TREE_SIDE_EFFECTS (t) = 1;
5318
5319 pptr = build_pointer_type (ptr);
5320 t = build1 (NOP_EXPR, pptr, t);
5321 TREE_SIDE_EFFECTS (t) = 1;
5322
5323 t = build1 (INDIRECT_REF, ptr, t);
5324 TREE_SIDE_EFFECTS (t) = 1;
5325 }
5326 else
5327 {
5328 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5329 build_int_2 (-size, -1));
5330
5331 /* Copied from va-pa.h, but we probably don't need to align to
5332 word size, since we generate and preserve that invariant. */
5333 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5334 build_int_2 ((size > 4 ? -8 : -4), -1));
5335
5336 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5337 TREE_SIDE_EFFECTS (t) = 1;
5338
5339 ofs = (8 - size) % 4;
5340 if (ofs)
5341 {
5342 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
5343 build_int_2 (ofs, 0));
5344 TREE_SIDE_EFFECTS (t) = 1;
5345 }
5346
5347 t = build1 (NOP_EXPR, ptr, t);
5348 TREE_SIDE_EFFECTS (t) = 1;
5349 }
5350 }
5351
5352 /* Calculate! */
5353 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5354 }
5355
5356
5357
5358 /* This routine handles all the normal conditional branch sequences we
5359 might need to generate. It handles compare immediate vs compare
5360 register, nullification of delay slots, varying length branches,
5361 negated branches, and all combinations of the above. It returns the
5362 output appropriate to emit the branch corresponding to all given
5363 parameters. */
5364
5365 const char *
5366 output_cbranch (operands, nullify, length, negated, insn)
5367 rtx *operands;
5368 int nullify, length, negated;
5369 rtx insn;
5370 {
5371 static char buf[100];
5372 int useskip = 0;
5373
5374 /* A conditional branch to the following instruction (eg the delay slot) is
5375 asking for a disaster. This can happen when not optimizing.
5376
5377 In such cases it is safe to emit nothing. */
5378
5379 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5380 return "";
5381
5382 /* If this is a long branch with its delay slot unfilled, set `nullify'
5383 as it can nullify the delay slot and save a nop. */
5384 if (length == 8 && dbr_sequence_length () == 0)
5385 nullify = 1;
5386
5387 /* If this is a short forward conditional branch which did not get
5388 its delay slot filled, the delay slot can still be nullified. */
5389 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5390 nullify = forward_branch_p (insn);
5391
5392 /* A forward branch over a single nullified insn can be done with a
5393 comclr instruction. This avoids a single cycle penalty due to
5394 mis-predicted branch if we fall through (branch not taken). */
5395 if (length == 4
5396 && next_real_insn (insn) != 0
5397 && get_attr_length (next_real_insn (insn)) == 4
5398 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5399 && nullify)
5400 useskip = 1;
5401
5402 switch (length)
5403 {
5404 /* All short conditional branches except backwards with an unfilled
5405 delay slot. */
5406 case 4:
5407 if (useskip)
5408 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5409 else
5410 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5411 if (GET_MODE (operands[1]) == DImode)
5412 strcat (buf, "*");
5413 if (negated)
5414 strcat (buf, "%B3");
5415 else
5416 strcat (buf, "%S3");
5417 if (useskip)
5418 strcat (buf, " %2,%r1,%%r0");
5419 else if (nullify)
5420 strcat (buf, ",n %2,%r1,%0");
5421 else
5422 strcat (buf, " %2,%r1,%0");
5423 break;
5424
5425 /* All long conditionals. Note an short backward branch with an
5426 unfilled delay slot is treated just like a long backward branch
5427 with an unfilled delay slot. */
5428 case 8:
5429 /* Handle weird backwards branch with a filled delay slot
5430 with is nullified. */
5431 if (dbr_sequence_length () != 0
5432 && ! forward_branch_p (insn)
5433 && nullify)
5434 {
5435 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5436 if (GET_MODE (operands[1]) == DImode)
5437 strcat (buf, "*");
5438 if (negated)
5439 strcat (buf, "%S3");
5440 else
5441 strcat (buf, "%B3");
5442 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5443 }
5444 /* Handle short backwards branch with an unfilled delay slot.
5445 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5446 taken and untaken branches. */
5447 else if (dbr_sequence_length () == 0
5448 && ! forward_branch_p (insn)
5449 && INSN_ADDRESSES_SET_P ()
5450 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5451 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5452 {
5453 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5454 if (GET_MODE (operands[1]) == DImode)
5455 strcat (buf, "*");
5456 if (negated)
5457 strcat (buf, "%B3 %2,%r1,%0%#");
5458 else
5459 strcat (buf, "%S3 %2,%r1,%0%#");
5460 }
5461 else
5462 {
5463 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5464 if (GET_MODE (operands[1]) == DImode)
5465 strcat (buf, "*");
5466 if (negated)
5467 strcat (buf, "%S3");
5468 else
5469 strcat (buf, "%B3");
5470 if (nullify)
5471 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5472 else
5473 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5474 }
5475 break;
5476
5477 case 20:
5478 /* Very long branch. Right now we only handle these when not
5479 optimizing. See "jump" pattern in pa.md for details. */
5480 if (optimize)
5481 abort ();
5482
5483 /* Create a reversed conditional branch which branches around
5484 the following insns. */
5485 if (negated)
5486 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
5487 else
5488 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
5489 if (GET_MODE (operands[1]) == DImode)
5490 {
5491 if (negated)
5492 strcpy (buf,
5493 "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
5494 else
5495 strcpy (buf,
5496 "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
5497 }
5498 output_asm_insn (buf, operands);
5499
5500 /* Output an insn to save %r1. */
5501 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5502
5503 /* Now output a very long branch to the original target. */
5504 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
5505
5506 /* Now restore the value of %r1 in the delay slot. We're not
5507 optimizing so we know nothing else can be in the delay slot. */
5508 return "ldw -16(%%r30),%%r1";
5509
5510 case 28:
5511 /* Very long branch when generating PIC code. Right now we only
5512 handle these when not optimizing. See "jump" pattern in pa.md
5513 for details. */
5514 if (optimize)
5515 abort ();
5516
5517 /* Create a reversed conditional branch which branches around
5518 the following insns. */
5519 if (negated)
5520 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
5521 else
5522 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
5523 if (GET_MODE (operands[1]) == DImode)
5524 {
5525 if (negated)
5526 strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
5527 else
5528 strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
5529 }
5530 output_asm_insn (buf, operands);
5531
5532 /* Output an insn to save %r1. */
5533 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5534
5535 /* Now output a very long PIC branch to the original target. */
5536 {
5537 rtx xoperands[5];
5538
5539 xoperands[0] = operands[0];
5540 xoperands[1] = operands[1];
5541 xoperands[2] = operands[2];
5542 xoperands[3] = operands[3];
5543
5544 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5545 if (TARGET_SOM || !TARGET_GAS)
5546 {
5547 xoperands[4] = gen_label_rtx ();
5548 output_asm_insn ("addil L'%l0-%l4,%%r1", xoperands);
5549 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5550 CODE_LABEL_NUMBER (xoperands[4]));
5551 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1", xoperands);
5552 }
5553 else
5554 {
5555 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
5556 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1",
5557 xoperands);
5558 }
5559 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5560 }
5561
5562 /* Now restore the value of %r1 in the delay slot. We're not
5563 optimizing so we know nothing else can be in the delay slot. */
5564 return "ldw -16(%%r30),%%r1";
5565
5566 default:
5567 abort ();
5568 }
5569 return buf;
5570 }
5571
5572 /* This routine handles all the branch-on-bit conditional branch sequences we
5573 might need to generate. It handles nullification of delay slots,
5574 varying length branches, negated branches and all combinations of the
5575 above. it returns the appropriate output template to emit the branch. */
5576
5577 const char *
5578 output_bb (operands, nullify, length, negated, insn, which)
5579 rtx *operands ATTRIBUTE_UNUSED;
5580 int nullify, length, negated;
5581 rtx insn;
5582 int which;
5583 {
5584 static char buf[100];
5585 int useskip = 0;
5586
5587 /* A conditional branch to the following instruction (eg the delay slot) is
5588 asking for a disaster. I do not think this can happen as this pattern
5589 is only used when optimizing; jump optimization should eliminate the
5590 jump. But be prepared just in case. */
5591
5592 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5593 return "";
5594
5595 /* If this is a long branch with its delay slot unfilled, set `nullify'
5596 as it can nullify the delay slot and save a nop. */
5597 if (length == 8 && dbr_sequence_length () == 0)
5598 nullify = 1;
5599
5600 /* If this is a short forward conditional branch which did not get
5601 its delay slot filled, the delay slot can still be nullified. */
5602 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5603 nullify = forward_branch_p (insn);
5604
5605 /* A forward branch over a single nullified insn can be done with a
5606 extrs instruction. This avoids a single cycle penalty due to
5607 mis-predicted branch if we fall through (branch not taken). */
5608
5609 if (length == 4
5610 && next_real_insn (insn) != 0
5611 && get_attr_length (next_real_insn (insn)) == 4
5612 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5613 && nullify)
5614 useskip = 1;
5615
5616 switch (length)
5617 {
5618
5619 /* All short conditional branches except backwards with an unfilled
5620 delay slot. */
5621 case 4:
5622 if (useskip)
5623 strcpy (buf, "{extrs,|extrw,s,}");
5624 else
5625 strcpy (buf, "bb,");
5626 if (useskip && GET_MODE (operands[0]) == DImode)
5627 strcpy (buf, "extrd,s,*");
5628 else if (GET_MODE (operands[0]) == DImode)
5629 strcpy (buf, "bb,*");
5630 if ((which == 0 && negated)
5631 || (which == 1 && ! negated))
5632 strcat (buf, ">=");
5633 else
5634 strcat (buf, "<");
5635 if (useskip)
5636 strcat (buf, " %0,%1,1,%%r0");
5637 else if (nullify && negated)
5638 strcat (buf, ",n %0,%1,%3");
5639 else if (nullify && ! negated)
5640 strcat (buf, ",n %0,%1,%2");
5641 else if (! nullify && negated)
5642 strcat (buf, "%0,%1,%3");
5643 else if (! nullify && ! negated)
5644 strcat (buf, " %0,%1,%2");
5645 break;
5646
5647 /* All long conditionals. Note an short backward branch with an
5648 unfilled delay slot is treated just like a long backward branch
5649 with an unfilled delay slot. */
5650 case 8:
5651 /* Handle weird backwards branch with a filled delay slot
5652 with is nullified. */
5653 if (dbr_sequence_length () != 0
5654 && ! forward_branch_p (insn)
5655 && nullify)
5656 {
5657 strcpy (buf, "bb,");
5658 if (GET_MODE (operands[0]) == DImode)
5659 strcat (buf, "*");
5660 if ((which == 0 && negated)
5661 || (which == 1 && ! negated))
5662 strcat (buf, "<");
5663 else
5664 strcat (buf, ">=");
5665 if (negated)
5666 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5667 else
5668 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5669 }
5670 /* Handle short backwards branch with an unfilled delay slot.
5671 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5672 taken and untaken branches. */
5673 else if (dbr_sequence_length () == 0
5674 && ! forward_branch_p (insn)
5675 && INSN_ADDRESSES_SET_P ()
5676 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5677 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5678 {
5679 strcpy (buf, "bb,");
5680 if (GET_MODE (operands[0]) == DImode)
5681 strcat (buf, "*");
5682 if ((which == 0 && negated)
5683 || (which == 1 && ! negated))
5684 strcat (buf, ">=");
5685 else
5686 strcat (buf, "<");
5687 if (negated)
5688 strcat (buf, " %0,%1,%3%#");
5689 else
5690 strcat (buf, " %0,%1,%2%#");
5691 }
5692 else
5693 {
5694 strcpy (buf, "{extrs,|extrw,s,}");
5695 if (GET_MODE (operands[0]) == DImode)
5696 strcpy (buf, "extrd,s,*");
5697 if ((which == 0 && negated)
5698 || (which == 1 && ! negated))
5699 strcat (buf, "<");
5700 else
5701 strcat (buf, ">=");
5702 if (nullify && negated)
5703 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5704 else if (nullify && ! negated)
5705 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5706 else if (negated)
5707 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5708 else
5709 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5710 }
5711 break;
5712
5713 default:
5714 abort ();
5715 }
5716 return buf;
5717 }
5718
5719 /* This routine handles all the branch-on-variable-bit conditional branch
5720 sequences we might need to generate. It handles nullification of delay
5721 slots, varying length branches, negated branches and all combinations
5722 of the above. it returns the appropriate output template to emit the
5723 branch. */
5724
5725 const char *
5726 output_bvb (operands, nullify, length, negated, insn, which)
5727 rtx *operands ATTRIBUTE_UNUSED;
5728 int nullify, length, negated;
5729 rtx insn;
5730 int which;
5731 {
5732 static char buf[100];
5733 int useskip = 0;
5734
5735 /* A conditional branch to the following instruction (eg the delay slot) is
5736 asking for a disaster. I do not think this can happen as this pattern
5737 is only used when optimizing; jump optimization should eliminate the
5738 jump. But be prepared just in case. */
5739
5740 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5741 return "";
5742
5743 /* If this is a long branch with its delay slot unfilled, set `nullify'
5744 as it can nullify the delay slot and save a nop. */
5745 if (length == 8 && dbr_sequence_length () == 0)
5746 nullify = 1;
5747
5748 /* If this is a short forward conditional branch which did not get
5749 its delay slot filled, the delay slot can still be nullified. */
5750 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5751 nullify = forward_branch_p (insn);
5752
5753 /* A forward branch over a single nullified insn can be done with a
5754 extrs instruction. This avoids a single cycle penalty due to
5755 mis-predicted branch if we fall through (branch not taken). */
5756
5757 if (length == 4
5758 && next_real_insn (insn) != 0
5759 && get_attr_length (next_real_insn (insn)) == 4
5760 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5761 && nullify)
5762 useskip = 1;
5763
5764 switch (length)
5765 {
5766
5767 /* All short conditional branches except backwards with an unfilled
5768 delay slot. */
5769 case 4:
5770 if (useskip)
5771 strcpy (buf, "{vextrs,|extrw,s,}");
5772 else
5773 strcpy (buf, "{bvb,|bb,}");
5774 if (useskip && GET_MODE (operands[0]) == DImode)
5775 strcpy (buf, "extrd,s,*}");
5776 else if (GET_MODE (operands[0]) == DImode)
5777 strcpy (buf, "bb,*");
5778 if ((which == 0 && negated)
5779 || (which == 1 && ! negated))
5780 strcat (buf, ">=");
5781 else
5782 strcat (buf, "<");
5783 if (useskip)
5784 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
5785 else if (nullify && negated)
5786 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
5787 else if (nullify && ! negated)
5788 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
5789 else if (! nullify && negated)
5790 strcat (buf, "{%0,%3|%0,%%sar,%3}");
5791 else if (! nullify && ! negated)
5792 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
5793 break;
5794
5795 /* All long conditionals. Note an short backward branch with an
5796 unfilled delay slot is treated just like a long backward branch
5797 with an unfilled delay slot. */
5798 case 8:
5799 /* Handle weird backwards branch with a filled delay slot
5800 with is nullified. */
5801 if (dbr_sequence_length () != 0
5802 && ! forward_branch_p (insn)
5803 && nullify)
5804 {
5805 strcpy (buf, "{bvb,|bb,}");
5806 if (GET_MODE (operands[0]) == DImode)
5807 strcat (buf, "*");
5808 if ((which == 0 && negated)
5809 || (which == 1 && ! negated))
5810 strcat (buf, "<");
5811 else
5812 strcat (buf, ">=");
5813 if (negated)
5814 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
5815 else
5816 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
5817 }
5818 /* Handle short backwards branch with an unfilled delay slot.
5819 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5820 taken and untaken branches. */
5821 else if (dbr_sequence_length () == 0
5822 && ! forward_branch_p (insn)
5823 && INSN_ADDRESSES_SET_P ()
5824 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5825 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5826 {
5827 strcpy (buf, "{bvb,|bb,}");
5828 if (GET_MODE (operands[0]) == DImode)
5829 strcat (buf, "*");
5830 if ((which == 0 && negated)
5831 || (which == 1 && ! negated))
5832 strcat (buf, ">=");
5833 else
5834 strcat (buf, "<");
5835 if (negated)
5836 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
5837 else
5838 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
5839 }
5840 else
5841 {
5842 strcpy (buf, "{vextrs,|extrw,s,}");
5843 if (GET_MODE (operands[0]) == DImode)
5844 strcpy (buf, "extrd,s,*");
5845 if ((which == 0 && negated)
5846 || (which == 1 && ! negated))
5847 strcat (buf, "<");
5848 else
5849 strcat (buf, ">=");
5850 if (nullify && negated)
5851 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
5852 else if (nullify && ! negated)
5853 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
5854 else if (negated)
5855 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
5856 else
5857 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
5858 }
5859 break;
5860
5861 default:
5862 abort ();
5863 }
5864 return buf;
5865 }
5866
5867 /* Return the output template for emitting a dbra type insn.
5868
5869 Note it may perform some output operations on its own before
5870 returning the final output string. */
5871 const char *
5872 output_dbra (operands, insn, which_alternative)
5873 rtx *operands;
5874 rtx insn;
5875 int which_alternative;
5876 {
5877
5878 /* A conditional branch to the following instruction (eg the delay slot) is
5879 asking for a disaster. Be prepared! */
5880
5881 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5882 {
5883 if (which_alternative == 0)
5884 return "ldo %1(%0),%0";
5885 else if (which_alternative == 1)
5886 {
5887 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
5888 output_asm_insn ("ldw -16(%%r30),%4", operands);
5889 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5890 return "{fldws|fldw} -16(%%r30),%0";
5891 }
5892 else
5893 {
5894 output_asm_insn ("ldw %0,%4", operands);
5895 return "ldo %1(%4),%4\n\tstw %4,%0";
5896 }
5897 }
5898
5899 if (which_alternative == 0)
5900 {
5901 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5902 int length = get_attr_length (insn);
5903
5904 /* If this is a long branch with its delay slot unfilled, set `nullify'
5905 as it can nullify the delay slot and save a nop. */
5906 if (length == 8 && dbr_sequence_length () == 0)
5907 nullify = 1;
5908
5909 /* If this is a short forward conditional branch which did not get
5910 its delay slot filled, the delay slot can still be nullified. */
5911 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5912 nullify = forward_branch_p (insn);
5913
5914 /* Handle short versions first. */
5915 if (length == 4 && nullify)
5916 return "addib,%C2,n %1,%0,%3";
5917 else if (length == 4 && ! nullify)
5918 return "addib,%C2 %1,%0,%3";
5919 else if (length == 8)
5920 {
5921 /* Handle weird backwards branch with a fulled delay slot
5922 which is nullified. */
5923 if (dbr_sequence_length () != 0
5924 && ! forward_branch_p (insn)
5925 && nullify)
5926 return "addib,%N2,n %1,%0,.+12\n\tb %3";
5927 /* Handle short backwards branch with an unfilled delay slot.
5928 Using a addb;nop rather than addi;bl saves 1 cycle for both
5929 taken and untaken branches. */
5930 else if (dbr_sequence_length () == 0
5931 && ! forward_branch_p (insn)
5932 && INSN_ADDRESSES_SET_P ()
5933 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5934 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5935 return "addib,%C2 %1,%0,%3%#";
5936
5937 /* Handle normal cases. */
5938 if (nullify)
5939 return "addi,%N2 %1,%0,%0\n\tb,n %3";
5940 else
5941 return "addi,%N2 %1,%0,%0\n\tb %3";
5942 }
5943 else
5944 abort ();
5945 }
5946 /* Deal with gross reload from FP register case. */
5947 else if (which_alternative == 1)
5948 {
5949 /* Move loop counter from FP register to MEM then into a GR,
5950 increment the GR, store the GR into MEM, and finally reload
5951 the FP register from MEM from within the branch's delay slot. */
5952 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
5953 operands);
5954 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5955 if (get_attr_length (insn) == 24)
5956 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
5957 else
5958 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5959 }
5960 /* Deal with gross reload from memory case. */
5961 else
5962 {
5963 /* Reload loop counter from memory, the store back to memory
5964 happens in the branch's delay slot. */
5965 output_asm_insn ("ldw %0,%4", operands);
5966 if (get_attr_length (insn) == 12)
5967 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
5968 else
5969 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
5970 }
5971 }
5972
5973 /* Return the output template for emitting a dbra type insn.
5974
5975 Note it may perform some output operations on its own before
5976 returning the final output string. */
5977 const char *
5978 output_movb (operands, insn, which_alternative, reverse_comparison)
5979 rtx *operands;
5980 rtx insn;
5981 int which_alternative;
5982 int reverse_comparison;
5983 {
5984
5985 /* A conditional branch to the following instruction (eg the delay slot) is
5986 asking for a disaster. Be prepared! */
5987
5988 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5989 {
5990 if (which_alternative == 0)
5991 return "copy %1,%0";
5992 else if (which_alternative == 1)
5993 {
5994 output_asm_insn ("stw %1,-16(%%r30)", operands);
5995 return "{fldws|fldw} -16(%%r30),%0";
5996 }
5997 else if (which_alternative == 2)
5998 return "stw %1,%0";
5999 else
6000 return "mtsar %r1";
6001 }
6002
6003 /* Support the second variant. */
6004 if (reverse_comparison)
6005 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6006
6007 if (which_alternative == 0)
6008 {
6009 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6010 int length = get_attr_length (insn);
6011
6012 /* If this is a long branch with its delay slot unfilled, set `nullify'
6013 as it can nullify the delay slot and save a nop. */
6014 if (length == 8 && dbr_sequence_length () == 0)
6015 nullify = 1;
6016
6017 /* If this is a short forward conditional branch which did not get
6018 its delay slot filled, the delay slot can still be nullified. */
6019 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6020 nullify = forward_branch_p (insn);
6021
6022 /* Handle short versions first. */
6023 if (length == 4 && nullify)
6024 return "movb,%C2,n %1,%0,%3";
6025 else if (length == 4 && ! nullify)
6026 return "movb,%C2 %1,%0,%3";
6027 else if (length == 8)
6028 {
6029 /* Handle weird backwards branch with a filled delay slot
6030 which is nullified. */
6031 if (dbr_sequence_length () != 0
6032 && ! forward_branch_p (insn)
6033 && nullify)
6034 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6035
6036 /* Handle short backwards branch with an unfilled delay slot.
6037 Using a movb;nop rather than or;bl saves 1 cycle for both
6038 taken and untaken branches. */
6039 else if (dbr_sequence_length () == 0
6040 && ! forward_branch_p (insn)
6041 && INSN_ADDRESSES_SET_P ()
6042 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6043 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6044 return "movb,%C2 %1,%0,%3%#";
6045 /* Handle normal cases. */
6046 if (nullify)
6047 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6048 else
6049 return "or,%N2 %1,%%r0,%0\n\tb %3";
6050 }
6051 else
6052 abort ();
6053 }
6054 /* Deal with gross reload from FP register case. */
6055 else if (which_alternative == 1)
6056 {
6057 /* Move loop counter from FP register to MEM then into a GR,
6058 increment the GR, store the GR into MEM, and finally reload
6059 the FP register from MEM from within the branch's delay slot. */
6060 output_asm_insn ("stw %1,-16(%%r30)", operands);
6061 if (get_attr_length (insn) == 12)
6062 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6063 else
6064 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6065 }
6066 /* Deal with gross reload from memory case. */
6067 else if (which_alternative == 2)
6068 {
6069 /* Reload loop counter from memory, the store back to memory
6070 happens in the branch's delay slot. */
6071 if (get_attr_length (insn) == 8)
6072 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6073 else
6074 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6075 }
6076 /* Handle SAR as a destination. */
6077 else
6078 {
6079 if (get_attr_length (insn) == 8)
6080 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6081 else
6082 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6083 }
6084 }
6085
6086 /* Copy any FP arguments in INSN into integer registers. */
6087 static void
6088 copy_fp_args (insn)
6089 rtx insn;
6090 {
6091 rtx link;
6092 rtx xoperands[2];
6093
6094 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6095 {
6096 int arg_mode, regno;
6097 rtx use = XEXP (link, 0);
6098
6099 if (! (GET_CODE (use) == USE
6100 && GET_CODE (XEXP (use, 0)) == REG
6101 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6102 continue;
6103
6104 arg_mode = GET_MODE (XEXP (use, 0));
6105 regno = REGNO (XEXP (use, 0));
6106
6107 /* Is it a floating point register? */
6108 if (regno >= 32 && regno <= 39)
6109 {
6110 /* Copy the FP register into an integer register via memory. */
6111 if (arg_mode == SFmode)
6112 {
6113 xoperands[0] = XEXP (use, 0);
6114 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6115 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6116 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6117 }
6118 else
6119 {
6120 xoperands[0] = XEXP (use, 0);
6121 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6122 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6123 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6124 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6125 }
6126 }
6127 }
6128 }
6129
6130 /* Compute length of the FP argument copy sequence for INSN. */
6131 static int
6132 length_fp_args (insn)
6133 rtx insn;
6134 {
6135 int length = 0;
6136 rtx link;
6137
6138 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6139 {
6140 int arg_mode, regno;
6141 rtx use = XEXP (link, 0);
6142
6143 if (! (GET_CODE (use) == USE
6144 && GET_CODE (XEXP (use, 0)) == REG
6145 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6146 continue;
6147
6148 arg_mode = GET_MODE (XEXP (use, 0));
6149 regno = REGNO (XEXP (use, 0));
6150
6151 /* Is it a floating point register? */
6152 if (regno >= 32 && regno <= 39)
6153 {
6154 if (arg_mode == SFmode)
6155 length += 8;
6156 else
6157 length += 12;
6158 }
6159 }
6160
6161 return length;
6162 }
6163
6164 /* We include the delay slot in the returned length as it is better to
6165 over estimate the length than to under estimate it. */
6166
6167 int
6168 attr_length_millicode_call (insn, length)
6169 rtx insn;
6170 int length;
6171 {
6172 unsigned long distance = total_code_bytes + INSN_ADDRESSES (INSN_UID (insn));
6173
6174 if (distance < total_code_bytes)
6175 distance = -1;
6176
6177 if (TARGET_64BIT)
6178 {
6179 if (!TARGET_LONG_CALLS && distance < 7600000)
6180 return length + 8;
6181
6182 return length + 20;
6183 }
6184 else if (TARGET_PORTABLE_RUNTIME)
6185 return length + 24;
6186 else
6187 {
6188 if (!TARGET_LONG_CALLS && distance < 240000)
6189 return length + 8;
6190
6191 if (TARGET_LONG_ABS_CALL && !flag_pic)
6192 return length + 12;
6193
6194 return length + 24;
6195 }
6196 }
6197
6198 /* INSN is a function call. It may have an unconditional jump
6199 in its delay slot.
6200
6201 CALL_DEST is the routine we are calling. */
6202
6203 const char *
6204 output_millicode_call (insn, call_dest)
6205 rtx insn;
6206 rtx call_dest;
6207 {
6208 int attr_length = get_attr_length (insn);
6209 int seq_length = dbr_sequence_length ();
6210 int distance;
6211 rtx seq_insn;
6212 rtx xoperands[3];
6213
6214 xoperands[0] = call_dest;
6215 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6216
6217 /* Handle the common case where we are sure that the branch will
6218 reach the beginning of the $CODE$ subspace. The within reach
6219 form of the $$sh_func_adrs call has a length of 28. Because
6220 it has an attribute type of multi, it never has a non-zero
6221 sequence length. The length of the $$sh_func_adrs is the same
6222 as certain out of reach PIC calls to other routines. */
6223 if (!TARGET_LONG_CALLS
6224 && ((seq_length == 0
6225 && (attr_length == 12
6226 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6227 || (seq_length != 0 && attr_length == 8)))
6228 {
6229 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6230 }
6231 else
6232 {
6233 if (TARGET_64BIT)
6234 {
6235 /* It might seem that one insn could be saved by accessing
6236 the millicode function using the linkage table. However,
6237 this doesn't work in shared libraries and other dynamically
6238 loaded objects. Using a pc-relative sequence also avoids
6239 problems related to the implicit use of the gp register. */
6240 output_asm_insn ("b,l .+8,%%r1", xoperands);
6241
6242 if (TARGET_GAS)
6243 {
6244 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6245 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6246 }
6247 else
6248 {
6249 xoperands[1] = gen_label_rtx ();
6250 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6251 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6252 CODE_LABEL_NUMBER (xoperands[1]));
6253 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6254 }
6255
6256 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6257 }
6258 else if (TARGET_PORTABLE_RUNTIME)
6259 {
6260 /* Pure portable runtime doesn't allow be/ble; we also don't
6261 have PIC support in the assembler/linker, so this sequence
6262 is needed. */
6263
6264 /* Get the address of our target into %r1. */
6265 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6266 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6267
6268 /* Get our return address into %r31. */
6269 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6270 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6271
6272 /* Jump to our target address in %r1. */
6273 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6274 }
6275 else if (!flag_pic)
6276 {
6277 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6278 if (TARGET_PA_20)
6279 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6280 else
6281 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6282 }
6283 else
6284 {
6285 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6286 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6287
6288 if (TARGET_SOM || !TARGET_GAS)
6289 {
6290 /* The HP assembler can generate relocations for the
6291 difference of two symbols. GAS can do this for a
6292 millicode symbol but not an arbitrary external
6293 symbol when generating SOM output. */
6294 xoperands[1] = gen_label_rtx ();
6295 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6296 CODE_LABEL_NUMBER (xoperands[1]));
6297 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6298 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6299 }
6300 else
6301 {
6302 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6303 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6304 xoperands);
6305 }
6306
6307 /* Jump to our target address in %r1. */
6308 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6309 }
6310 }
6311
6312 if (seq_length == 0)
6313 output_asm_insn ("nop", xoperands);
6314
6315 /* We are done if there isn't a jump in the delay slot. */
6316 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6317 return "";
6318
6319 /* This call has an unconditional jump in its delay slot. */
6320 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6321
6322 /* See if the return address can be adjusted. Use the containing
6323 sequence insn's address. */
6324 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6325 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6326 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6327
6328 if (VAL_14_BITS_P (distance))
6329 {
6330 xoperands[1] = gen_label_rtx ();
6331 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
6332 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6333 CODE_LABEL_NUMBER (xoperands[1]));
6334 }
6335 else
6336 /* ??? This branch may not reach its target. */
6337 output_asm_insn ("nop\n\tb,n %0", xoperands);
6338
6339 /* Delete the jump. */
6340 PUT_CODE (NEXT_INSN (insn), NOTE);
6341 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6342 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6343
6344 return "";
6345 }
6346
6347 /* We include the delay slot in the returned length as it is better to
6348 over estimate the length than to under estimate it. */
6349
6350 int
6351 attr_length_call (insn, sibcall)
6352 rtx insn;
6353 int sibcall;
6354 {
6355 unsigned long distance = total_code_bytes + INSN_ADDRESSES (INSN_UID (insn));
6356
6357 if (distance < total_code_bytes)
6358 distance = -1;
6359
6360 if (TARGET_64BIT)
6361 {
6362 if (!TARGET_LONG_CALLS
6363 && ((!sibcall && distance < 7600000) || distance < 240000))
6364 return 8;
6365
6366 return (sibcall ? 28 : 24);
6367 }
6368 else
6369 {
6370 if (!TARGET_LONG_CALLS
6371 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
6372 || distance < 240000))
6373 return 8;
6374
6375 if (TARGET_LONG_ABS_CALL && !flag_pic)
6376 return 12;
6377
6378 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6379 || (TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL))
6380 {
6381 if (TARGET_PA_20)
6382 return 20;
6383
6384 return 28;
6385 }
6386 else
6387 {
6388 int length = 0;
6389
6390 if (TARGET_SOM)
6391 length += length_fp_args (insn);
6392
6393 if (flag_pic)
6394 length += 4;
6395
6396 if (TARGET_PA_20)
6397 return (length + 32);
6398
6399 if (!sibcall)
6400 length += 8;
6401
6402 return (length + 40);
6403 }
6404 }
6405 }
6406
6407 /* INSN is a function call. It may have an unconditional jump
6408 in its delay slot.
6409
6410 CALL_DEST is the routine we are calling. */
6411
6412 const char *
6413 output_call (insn, call_dest, sibcall)
6414 rtx insn;
6415 rtx call_dest;
6416 int sibcall;
6417 {
6418 int delay_insn_deleted = 0;
6419 int delay_slot_filled = 0;
6420 int attr_length = get_attr_length (insn);
6421 int seq_length = dbr_sequence_length ();
6422 rtx xoperands[2];
6423
6424 xoperands[0] = call_dest;
6425
6426 /* Handle the common case where we're sure that the branch will reach
6427 the beginning of the $CODE$ subspace. */
6428 if (!TARGET_LONG_CALLS
6429 && ((seq_length == 0 && attr_length == 12)
6430 || (seq_length != 0 && attr_length == 8)))
6431 {
6432 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6433 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
6434 }
6435 else
6436 {
6437 if (TARGET_64BIT)
6438 {
6439 /* ??? As far as I can tell, the HP linker doesn't support the
6440 long pc-relative sequence described in the 64-bit runtime
6441 architecture. So, we use a slightly longer indirect call. */
6442 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6443
6444 xoperands[0] = p->internal_label;
6445 xoperands[1] = gen_label_rtx ();
6446
6447 /* If this isn't a sibcall, we put the load of %r27 into the
6448 delay slot. We can't do this in a sibcall as we don't
6449 have a second call-clobbered scratch register available. */
6450 if (seq_length != 0
6451 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6452 && !sibcall)
6453 {
6454 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6455 optimize, 0, 0);
6456
6457 /* Now delete the delay insn. */
6458 PUT_CODE (NEXT_INSN (insn), NOTE);
6459 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6460 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6461 delay_insn_deleted = 1;
6462 }
6463
6464 output_asm_insn ("addil LT'%0,%%r27", xoperands);
6465 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
6466 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
6467
6468 if (sibcall)
6469 {
6470 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6471 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
6472 output_asm_insn ("bve (%%r1)", xoperands);
6473 }
6474 else
6475 {
6476 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
6477 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
6478 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6479 delay_slot_filled = 1;
6480 }
6481 }
6482 else
6483 {
6484 int indirect_call = 0;
6485
6486 /* Emit a long call. There are several different sequences
6487 of increasing length and complexity. In most cases,
6488 they don't allow an instruction in the delay slot. */
6489 if (!(TARGET_LONG_ABS_CALL && !flag_pic)
6490 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6491 && !(TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL))
6492 indirect_call = 1;
6493
6494 if (seq_length != 0
6495 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6496 && !sibcall
6497 && (!TARGET_PA_20 || indirect_call))
6498 {
6499 /* A non-jump insn in the delay slot. By definition we can
6500 emit this insn before the call (and in fact before argument
6501 relocating. */
6502 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6503
6504 /* Now delete the delay insn. */
6505 PUT_CODE (NEXT_INSN (insn), NOTE);
6506 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6507 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6508 delay_insn_deleted = 1;
6509 }
6510
6511 if (TARGET_LONG_ABS_CALL && !flag_pic)
6512 {
6513 /* This is the best sequence for making long calls in
6514 non-pic code. Unfortunately, GNU ld doesn't provide
6515 the stub needed for external calls, and GAS's support
6516 for this with the SOM linker is buggy. */
6517 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6518 if (sibcall)
6519 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
6520 else
6521 {
6522 if (TARGET_PA_20)
6523 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
6524 xoperands);
6525 else
6526 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6527
6528 output_asm_insn ("copy %%r31,%%r2", xoperands);
6529 delay_slot_filled = 1;
6530 }
6531 }
6532 else
6533 {
6534 if (TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6535 {
6536 /* The HP assembler and linker can handle relocations
6537 for the difference of two symbols. GAS and the HP
6538 linker can't do this when one of the symbols is
6539 external. */
6540 xoperands[1] = gen_label_rtx ();
6541 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6542 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6543 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6544 CODE_LABEL_NUMBER (xoperands[1]));
6545 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6546 }
6547 else if (TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL)
6548 {
6549 /* GAS currently can't generate the relocations that
6550 are needed for the SOM linker under HP-UX using this
6551 sequence. The GNU linker doesn't generate the stubs
6552 that are needed for external calls on TARGET_ELF32
6553 with this sequence. For now, we have to use a
6554 longer plabel sequence when using GAS. */
6555 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6556 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
6557 xoperands);
6558 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
6559 xoperands);
6560 }
6561 else
6562 {
6563 /* Emit a long plabel-based call sequence. This is
6564 essentially an inline implementation of $$dyncall.
6565 We don't actually try to call $$dyncall as this is
6566 as difficult as calling the function itself. */
6567 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6568
6569 xoperands[0] = p->internal_label;
6570 xoperands[1] = gen_label_rtx ();
6571
6572 /* Since the call is indirect, FP arguments in registers
6573 need to be copied to the general registers. Then, the
6574 argument relocation stub will copy them back. */
6575 if (TARGET_SOM)
6576 copy_fp_args (insn);
6577
6578 if (flag_pic)
6579 {
6580 output_asm_insn ("addil LT'%0,%%r19", xoperands);
6581 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
6582 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
6583 }
6584 else
6585 {
6586 output_asm_insn ("addil LR'%0-$global$,%%r27",
6587 xoperands);
6588 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
6589 xoperands);
6590 }
6591
6592 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
6593 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
6594 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
6595 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
6596
6597 if (!sibcall && !TARGET_PA_20)
6598 {
6599 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
6600 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
6601 }
6602 }
6603
6604 if (TARGET_PA_20)
6605 {
6606 if (sibcall)
6607 output_asm_insn ("bve (%%r1)", xoperands);
6608 else
6609 {
6610 if (indirect_call)
6611 {
6612 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6613 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
6614 delay_slot_filled = 1;
6615 }
6616 else
6617 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6618 }
6619 }
6620 else
6621 {
6622 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
6623 xoperands);
6624
6625 if (sibcall)
6626 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
6627 else
6628 {
6629 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
6630
6631 if (indirect_call)
6632 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
6633 else
6634 output_asm_insn ("copy %%r31,%%r2", xoperands);
6635 delay_slot_filled = 1;
6636 }
6637 }
6638 }
6639 }
6640 }
6641
6642 if (seq_length == 0 || (delay_insn_deleted && !delay_slot_filled))
6643 output_asm_insn ("nop", xoperands);
6644
6645 /* We are done if there isn't a jump in the delay slot. */
6646 if (seq_length == 0
6647 || delay_insn_deleted
6648 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6649 return "";
6650
6651 /* A sibcall should never have a branch in the delay slot. */
6652 if (sibcall)
6653 abort ();
6654
6655 /* This call has an unconditional jump in its delay slot. */
6656 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6657
6658 if (!delay_slot_filled)
6659 {
6660 /* See if the return address can be adjusted. Use the containing
6661 sequence insn's address. */
6662 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6663 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6664 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6665
6666 if (VAL_14_BITS_P (distance))
6667 {
6668 xoperands[1] = gen_label_rtx ();
6669 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
6670 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6671 CODE_LABEL_NUMBER (xoperands[1]));
6672 }
6673 else
6674 /* ??? This branch may not reach its target. */
6675 output_asm_insn ("nop\n\tb,n %0", xoperands);
6676 }
6677 else
6678 /* ??? This branch may not reach its target. */
6679 output_asm_insn ("b,n %0", xoperands);
6680
6681 /* Delete the jump. */
6682 PUT_CODE (NEXT_INSN (insn), NOTE);
6683 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6684 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6685
6686 return "";
6687 }
6688
6689 /* In HPUX 8.0's shared library scheme, special relocations are needed
6690 for function labels if they might be passed to a function
6691 in a shared library (because shared libraries don't live in code
6692 space), and special magic is needed to construct their address. */
6693
6694 void
6695 hppa_encode_label (sym)
6696 rtx sym;
6697 {
6698 const char *str = XSTR (sym, 0);
6699 int len = strlen (str) + 1;
6700 char *newstr, *p;
6701
6702 p = newstr = alloca (len + 1);
6703 *p++ = '@';
6704 strcpy (p, str);
6705
6706 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
6707 }
6708
6709 static void
6710 pa_encode_section_info (decl, first)
6711 tree decl;
6712 int first;
6713 {
6714 if (first && TEXT_SPACE_P (decl))
6715 {
6716 rtx rtl;
6717 if (TREE_CODE (decl) == FUNCTION_DECL
6718 || TREE_CODE (decl) == VAR_DECL)
6719 rtl = DECL_RTL (decl);
6720 else
6721 rtl = TREE_CST_RTL (decl);
6722 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
6723 if (TREE_CODE (decl) == FUNCTION_DECL)
6724 hppa_encode_label (XEXP (DECL_RTL (decl), 0));
6725 }
6726 }
6727
6728 /* This is sort of inverse to pa_encode_section_info. */
6729
6730 static const char *
6731 pa_strip_name_encoding (str)
6732 const char *str;
6733 {
6734 str += (*str == '@');
6735 str += (*str == '*');
6736 return str;
6737 }
6738
6739 int
6740 function_label_operand (op, mode)
6741 rtx op;
6742 enum machine_mode mode ATTRIBUTE_UNUSED;
6743 {
6744 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
6745 }
6746
6747 /* Returns 1 if OP is a function label involved in a simple addition
6748 with a constant. Used to keep certain patterns from matching
6749 during instruction combination. */
6750 int
6751 is_function_label_plus_const (op)
6752 rtx op;
6753 {
6754 /* Strip off any CONST. */
6755 if (GET_CODE (op) == CONST)
6756 op = XEXP (op, 0);
6757
6758 return (GET_CODE (op) == PLUS
6759 && function_label_operand (XEXP (op, 0), Pmode)
6760 && GET_CODE (XEXP (op, 1)) == CONST_INT);
6761 }
6762
6763 /* Output assembly code for a thunk to FUNCTION. */
6764
6765 static void
6766 pa_asm_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
6767 FILE *file;
6768 tree thunk_fndecl;
6769 HOST_WIDE_INT delta;
6770 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED;
6771 tree function;
6772 {
6773 const char *target_name = XSTR (XEXP (DECL_RTL (function), 0), 0);
6774 static unsigned int current_thunk_number;
6775 char label[16];
6776 const char *lab;
6777 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
6778 lab = (*targetm.strip_name_encoding) (label);
6779 target_name = (*targetm.strip_name_encoding) (target_name);
6780 /* FIXME: total_code_bytes is not handled correctly in files with
6781 mi thunks. */
6782 pa_output_function_prologue (file, 0);
6783 if (VAL_14_BITS_P (delta))
6784 {
6785 if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
6786 {
6787 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
6788 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
6789 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
6790 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
6791 fprintf (file, "\tdepi 0,31,2,%%r22\n");
6792 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
6793 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
6794 fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n\tmtsp %%r1,%%sr0\n");
6795 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
6796 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6797 fprintf (file, "(%%r26),%%r26\n");
6798 }
6799 else
6800 {
6801 fprintf (file, "\tb %s\n\tldo ", target_name);
6802 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6803 fprintf (file, "(%%r26),%%r26\n");
6804 }
6805 }
6806 else
6807 {
6808 if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
6809 {
6810 fprintf (file, "\taddil L'");
6811 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6812 fprintf (file, ",%%r26\n\tldo R'");
6813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6814 fprintf (file, "(%%r1),%%r26\n");
6815 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
6816 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
6817 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
6818 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
6819 fprintf (file, "\tdepi 0,31,2,%%r22\n");
6820 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
6821 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
6822 fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n\tmtsp %%r1,%%sr0\n");
6823 fprintf (file, "\tbe,n 0(%%sr0,%%r22)\n");
6824 }
6825 else
6826 {
6827 fprintf (file, "\taddil L'");
6828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6829 fprintf (file, ",%%r26\n\tb %s\n\tldo R'", target_name);
6830 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6831 fprintf (file, "(%%r1),%%r26\n");
6832 }
6833 }
6834
6835 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
6836 if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
6837 {
6838 data_section ();
6839 fprintf (file, "\t.align 4\n");
6840 ASM_OUTPUT_INTERNAL_LABEL (file, "LTHN", current_thunk_number);
6841 fprintf (file, "\t.word P'%s\n", target_name);
6842 function_section (thunk_fndecl);
6843 }
6844 current_thunk_number++;
6845 }
6846
6847 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6848 use in fmpyadd instructions. */
6849 int
6850 fmpyaddoperands (operands)
6851 rtx *operands;
6852 {
6853 enum machine_mode mode = GET_MODE (operands[0]);
6854
6855 /* Must be a floating point mode. */
6856 if (mode != SFmode && mode != DFmode)
6857 return 0;
6858
6859 /* All modes must be the same. */
6860 if (! (mode == GET_MODE (operands[1])
6861 && mode == GET_MODE (operands[2])
6862 && mode == GET_MODE (operands[3])
6863 && mode == GET_MODE (operands[4])
6864 && mode == GET_MODE (operands[5])))
6865 return 0;
6866
6867 /* All operands must be registers. */
6868 if (! (GET_CODE (operands[1]) == REG
6869 && GET_CODE (operands[2]) == REG
6870 && GET_CODE (operands[3]) == REG
6871 && GET_CODE (operands[4]) == REG
6872 && GET_CODE (operands[5]) == REG))
6873 return 0;
6874
6875 /* Only 2 real operands to the addition. One of the input operands must
6876 be the same as the output operand. */
6877 if (! rtx_equal_p (operands[3], operands[4])
6878 && ! rtx_equal_p (operands[3], operands[5]))
6879 return 0;
6880
6881 /* Inout operand of add can not conflict with any operands from multiply. */
6882 if (rtx_equal_p (operands[3], operands[0])
6883 || rtx_equal_p (operands[3], operands[1])
6884 || rtx_equal_p (operands[3], operands[2]))
6885 return 0;
6886
6887 /* multiply can not feed into addition operands. */
6888 if (rtx_equal_p (operands[4], operands[0])
6889 || rtx_equal_p (operands[5], operands[0]))
6890 return 0;
6891
6892 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6893 if (mode == SFmode
6894 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6895 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6896 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6897 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6898 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6899 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6900 return 0;
6901
6902 /* Passed. Operands are suitable for fmpyadd. */
6903 return 1;
6904 }
6905
6906 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6907 use in fmpysub instructions. */
6908 int
6909 fmpysuboperands (operands)
6910 rtx *operands;
6911 {
6912 enum machine_mode mode = GET_MODE (operands[0]);
6913
6914 /* Must be a floating point mode. */
6915 if (mode != SFmode && mode != DFmode)
6916 return 0;
6917
6918 /* All modes must be the same. */
6919 if (! (mode == GET_MODE (operands[1])
6920 && mode == GET_MODE (operands[2])
6921 && mode == GET_MODE (operands[3])
6922 && mode == GET_MODE (operands[4])
6923 && mode == GET_MODE (operands[5])))
6924 return 0;
6925
6926 /* All operands must be registers. */
6927 if (! (GET_CODE (operands[1]) == REG
6928 && GET_CODE (operands[2]) == REG
6929 && GET_CODE (operands[3]) == REG
6930 && GET_CODE (operands[4]) == REG
6931 && GET_CODE (operands[5]) == REG))
6932 return 0;
6933
6934 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
6935 operation, so operands[4] must be the same as operand[3]. */
6936 if (! rtx_equal_p (operands[3], operands[4]))
6937 return 0;
6938
6939 /* multiply can not feed into subtraction. */
6940 if (rtx_equal_p (operands[5], operands[0]))
6941 return 0;
6942
6943 /* Inout operand of sub can not conflict with any operands from multiply. */
6944 if (rtx_equal_p (operands[3], operands[0])
6945 || rtx_equal_p (operands[3], operands[1])
6946 || rtx_equal_p (operands[3], operands[2]))
6947 return 0;
6948
6949 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6950 if (mode == SFmode
6951 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6952 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6953 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6954 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6955 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6956 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6957 return 0;
6958
6959 /* Passed. Operands are suitable for fmpysub. */
6960 return 1;
6961 }
6962
6963 int
6964 plus_xor_ior_operator (op, mode)
6965 rtx op;
6966 enum machine_mode mode ATTRIBUTE_UNUSED;
6967 {
6968 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
6969 || GET_CODE (op) == IOR);
6970 }
6971
6972 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
6973 constants for shadd instructions. */
6974 static int
6975 shadd_constant_p (val)
6976 int val;
6977 {
6978 if (val == 2 || val == 4 || val == 8)
6979 return 1;
6980 else
6981 return 0;
6982 }
6983
6984 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
6985 the valid constant for shadd instructions. */
6986 int
6987 shadd_operand (op, mode)
6988 rtx op;
6989 enum machine_mode mode ATTRIBUTE_UNUSED;
6990 {
6991 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
6992 }
6993
6994 /* Return 1 if OP is valid as a base register in a reg + reg address. */
6995
6996 int
6997 basereg_operand (op, mode)
6998 rtx op;
6999 enum machine_mode mode;
7000 {
7001 /* cse will create some unscaled indexed addresses, however; it
7002 generally isn't a win on the PA, so avoid creating unscaled
7003 indexed addresses until after cse is finished. */
7004 if (!cse_not_expected)
7005 return 0;
7006
7007 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
7008 we don't have to worry about the braindamaged implicit space
7009 register selection from the basereg. */
7010 if (TARGET_NO_SPACE_REGS)
7011 return (GET_CODE (op) == REG);
7012
7013 /* While it's always safe to index off the frame pointer, it's not
7014 always profitable, particularly when the frame pointer is being
7015 eliminated. */
7016 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
7017 return 1;
7018
7019 return (GET_CODE (op) == REG
7020 && REG_POINTER (op)
7021 && register_operand (op, mode));
7022 }
7023
7024 /* Return 1 if this operand is anything other than a hard register. */
7025
7026 int
7027 non_hard_reg_operand (op, mode)
7028 rtx op;
7029 enum machine_mode mode ATTRIBUTE_UNUSED;
7030 {
7031 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
7032 }
7033
7034 /* Return 1 if INSN branches forward. Should be using insn_addresses
7035 to avoid walking through all the insns... */
7036 static int
7037 forward_branch_p (insn)
7038 rtx insn;
7039 {
7040 rtx label = JUMP_LABEL (insn);
7041
7042 while (insn)
7043 {
7044 if (insn == label)
7045 break;
7046 else
7047 insn = NEXT_INSN (insn);
7048 }
7049
7050 return (insn == label);
7051 }
7052
7053 /* Return 1 if OP is an equality comparison, else return 0. */
7054 int
7055 eq_neq_comparison_operator (op, mode)
7056 rtx op;
7057 enum machine_mode mode ATTRIBUTE_UNUSED;
7058 {
7059 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
7060 }
7061
7062 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
7063 int
7064 movb_comparison_operator (op, mode)
7065 rtx op;
7066 enum machine_mode mode ATTRIBUTE_UNUSED;
7067 {
7068 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
7069 || GET_CODE (op) == LT || GET_CODE (op) == GE);
7070 }
7071
7072 /* Return 1 if INSN is in the delay slot of a call instruction. */
7073 int
7074 jump_in_call_delay (insn)
7075 rtx insn;
7076 {
7077
7078 if (GET_CODE (insn) != JUMP_INSN)
7079 return 0;
7080
7081 if (PREV_INSN (insn)
7082 && PREV_INSN (PREV_INSN (insn))
7083 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
7084 {
7085 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
7086
7087 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
7088 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
7089
7090 }
7091 else
7092 return 0;
7093 }
7094
7095 /* Output an unconditional move and branch insn. */
7096
7097 const char *
7098 output_parallel_movb (operands, length)
7099 rtx *operands;
7100 int length;
7101 {
7102 /* These are the cases in which we win. */
7103 if (length == 4)
7104 return "mov%I1b,tr %1,%0,%2";
7105
7106 /* None of these cases wins, but they don't lose either. */
7107 if (dbr_sequence_length () == 0)
7108 {
7109 /* Nothing in the delay slot, fake it by putting the combined
7110 insn (the copy or add) in the delay slot of a bl. */
7111 if (GET_CODE (operands[1]) == CONST_INT)
7112 return "b %2\n\tldi %1,%0";
7113 else
7114 return "b %2\n\tcopy %1,%0";
7115 }
7116 else
7117 {
7118 /* Something in the delay slot, but we've got a long branch. */
7119 if (GET_CODE (operands[1]) == CONST_INT)
7120 return "ldi %1,%0\n\tb %2";
7121 else
7122 return "copy %1,%0\n\tb %2";
7123 }
7124 }
7125
7126 /* Output an unconditional add and branch insn. */
7127
7128 const char *
7129 output_parallel_addb (operands, length)
7130 rtx *operands;
7131 int length;
7132 {
7133 /* To make life easy we want operand0 to be the shared input/output
7134 operand and operand1 to be the readonly operand. */
7135 if (operands[0] == operands[1])
7136 operands[1] = operands[2];
7137
7138 /* These are the cases in which we win. */
7139 if (length == 4)
7140 return "add%I1b,tr %1,%0,%3";
7141
7142 /* None of these cases win, but they don't lose either. */
7143 if (dbr_sequence_length () == 0)
7144 {
7145 /* Nothing in the delay slot, fake it by putting the combined
7146 insn (the copy or add) in the delay slot of a bl. */
7147 return "b %3\n\tadd%I1 %1,%0,%0";
7148 }
7149 else
7150 {
7151 /* Something in the delay slot, but we've got a long branch. */
7152 return "add%I1 %1,%0,%0\n\tb %3";
7153 }
7154 }
7155
7156 /* Return nonzero if INSN (a jump insn) immediately follows a call
7157 to a named function. This is used to avoid filling the delay slot
7158 of the jump since it can usually be eliminated by modifying RP in
7159 the delay slot of the call. */
7160
7161 int
7162 following_call (insn)
7163 rtx insn;
7164 {
7165 if (! TARGET_JUMP_IN_DELAY)
7166 return 0;
7167
7168 /* Find the previous real insn, skipping NOTEs. */
7169 insn = PREV_INSN (insn);
7170 while (insn && GET_CODE (insn) == NOTE)
7171 insn = PREV_INSN (insn);
7172
7173 /* Check for CALL_INSNs and millicode calls. */
7174 if (insn
7175 && ((GET_CODE (insn) == CALL_INSN
7176 && get_attr_type (insn) != TYPE_DYNCALL)
7177 || (GET_CODE (insn) == INSN
7178 && GET_CODE (PATTERN (insn)) != SEQUENCE
7179 && GET_CODE (PATTERN (insn)) != USE
7180 && GET_CODE (PATTERN (insn)) != CLOBBER
7181 && get_attr_type (insn) == TYPE_MILLI)))
7182 return 1;
7183
7184 return 0;
7185 }
7186
7187 /* We use this hook to perform a PA specific optimization which is difficult
7188 to do in earlier passes.
7189
7190 We want the delay slots of branches within jump tables to be filled.
7191 None of the compiler passes at the moment even has the notion that a
7192 PA jump table doesn't contain addresses, but instead contains actual
7193 instructions!
7194
7195 Because we actually jump into the table, the addresses of each entry
7196 must stay constant in relation to the beginning of the table (which
7197 itself must stay constant relative to the instruction to jump into
7198 it). I don't believe we can guarantee earlier passes of the compiler
7199 will adhere to those rules.
7200
7201 So, late in the compilation process we find all the jump tables, and
7202 expand them into real code -- eg each entry in the jump table vector
7203 will get an appropriate label followed by a jump to the final target.
7204
7205 Reorg and the final jump pass can then optimize these branches and
7206 fill their delay slots. We end up with smaller, more efficient code.
7207
7208 The jump instructions within the table are special; we must be able
7209 to identify them during assembly output (if the jumps don't get filled
7210 we need to emit a nop rather than nullifying the delay slot)). We
7211 identify jumps in switch tables by marking the SET with DImode.
7212
7213 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
7214 insns. This serves two purposes, first it prevents jump.c from
7215 noticing that the last N entries in the table jump to the instruction
7216 immediately after the table and deleting the jumps. Second, those
7217 insns mark where we should emit .begin_brtab and .end_brtab directives
7218 when using GAS (allows for better link time optimizations). */
7219
7220 void
7221 pa_reorg (insns)
7222 rtx insns;
7223 {
7224 rtx insn;
7225
7226 remove_useless_addtr_insns (insns, 1);
7227
7228 if (pa_cpu < PROCESSOR_8000)
7229 pa_combine_instructions (get_insns ());
7230
7231
7232 /* This is fairly cheap, so always run it if optimizing. */
7233 if (optimize > 0 && !TARGET_BIG_SWITCH)
7234 {
7235 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
7236 insns = get_insns ();
7237 for (insn = insns; insn; insn = NEXT_INSN (insn))
7238 {
7239 rtx pattern, tmp, location;
7240 unsigned int length, i;
7241
7242 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
7243 if (GET_CODE (insn) != JUMP_INSN
7244 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7245 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7246 continue;
7247
7248 /* Emit marker for the beginning of the branch table. */
7249 emit_insn_before (gen_begin_brtab (), insn);
7250
7251 pattern = PATTERN (insn);
7252 location = PREV_INSN (insn);
7253 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
7254
7255 for (i = 0; i < length; i++)
7256 {
7257 /* Emit a label before each jump to keep jump.c from
7258 removing this code. */
7259 tmp = gen_label_rtx ();
7260 LABEL_NUSES (tmp) = 1;
7261 emit_label_after (tmp, location);
7262 location = NEXT_INSN (location);
7263
7264 if (GET_CODE (pattern) == ADDR_VEC)
7265 {
7266 /* Emit the jump itself. */
7267 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
7268 tmp = emit_jump_insn_after (tmp, location);
7269 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
7270 /* It is easy to rely on the branch table markers
7271 during assembly output to trigger the correct code
7272 for a switch table jump with an unfilled delay slot,
7273
7274 However, that requires state and assumes that we look
7275 at insns in order.
7276
7277 We can't make such assumptions when computing the length
7278 of instructions. Ugh. We could walk the insn chain to
7279 determine if this instruction is in a branch table, but
7280 that can get rather expensive, particularly during the
7281 branch shortening phase of the compiler.
7282
7283 So instead we mark this jump as being special. This is
7284 far from ideal and knows that no code after this will
7285 muck around with the mode of the JUMP_INSN itself. */
7286 PUT_MODE (tmp, SImode);
7287 LABEL_NUSES (JUMP_LABEL (tmp))++;
7288 location = NEXT_INSN (location);
7289 }
7290 else
7291 {
7292 /* Emit the jump itself. */
7293 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
7294 tmp = emit_jump_insn_after (tmp, location);
7295 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
7296 /* It is easy to rely on the branch table markers
7297 during assembly output to trigger the correct code
7298 for a switch table jump with an unfilled delay slot,
7299
7300 However, that requires state and assumes that we look
7301 at insns in order.
7302
7303 We can't make such assumptions when computing the length
7304 of instructions. Ugh. We could walk the insn chain to
7305 determine if this instruction is in a branch table, but
7306 that can get rather expensive, particularly during the
7307 branch shortening phase of the compiler.
7308
7309 So instead we mark this jump as being special. This is
7310 far from ideal and knows that no code after this will
7311 muck around with the mode of the JUMP_INSN itself. */
7312 PUT_MODE (tmp, SImode);
7313 LABEL_NUSES (JUMP_LABEL (tmp))++;
7314 location = NEXT_INSN (location);
7315 }
7316
7317 /* Emit a BARRIER after the jump. */
7318 emit_barrier_after (location);
7319 location = NEXT_INSN (location);
7320 }
7321
7322 /* Emit marker for the end of the branch table. */
7323 emit_insn_before (gen_end_brtab (), location);
7324 location = NEXT_INSN (location);
7325 emit_barrier_after (location);
7326
7327 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
7328 delete_insn (insn);
7329 }
7330 }
7331 else
7332 {
7333 /* Sill need an end_brtab insn. */
7334 insns = get_insns ();
7335 for (insn = insns; insn; insn = NEXT_INSN (insn))
7336 {
7337 /* Find an ADDR_VEC insn. */
7338 if (GET_CODE (insn) != JUMP_INSN
7339 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7340 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7341 continue;
7342
7343 /* Now generate markers for the beginning and end of the
7344 branch table. */
7345 emit_insn_before (gen_begin_brtab (), insn);
7346 emit_insn_after (gen_end_brtab (), insn);
7347 }
7348 }
7349 }
7350
7351 /* The PA has a number of odd instructions which can perform multiple
7352 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
7353 it may be profitable to combine two instructions into one instruction
7354 with two outputs. It's not profitable PA2.0 machines because the
7355 two outputs would take two slots in the reorder buffers.
7356
7357 This routine finds instructions which can be combined and combines
7358 them. We only support some of the potential combinations, and we
7359 only try common ways to find suitable instructions.
7360
7361 * addb can add two registers or a register and a small integer
7362 and jump to a nearby (+-8k) location. Normally the jump to the
7363 nearby location is conditional on the result of the add, but by
7364 using the "true" condition we can make the jump unconditional.
7365 Thus addb can perform two independent operations in one insn.
7366
7367 * movb is similar to addb in that it can perform a reg->reg
7368 or small immediate->reg copy and jump to a nearby (+-8k location).
7369
7370 * fmpyadd and fmpysub can perform a FP multiply and either an
7371 FP add or FP sub if the operands of the multiply and add/sub are
7372 independent (there are other minor restrictions). Note both
7373 the fmpy and fadd/fsub can in theory move to better spots according
7374 to data dependencies, but for now we require the fmpy stay at a
7375 fixed location.
7376
7377 * Many of the memory operations can perform pre & post updates
7378 of index registers. GCC's pre/post increment/decrement addressing
7379 is far too simple to take advantage of all the possibilities. This
7380 pass may not be suitable since those insns may not be independent.
7381
7382 * comclr can compare two ints or an int and a register, nullify
7383 the following instruction and zero some other register. This
7384 is more difficult to use as it's harder to find an insn which
7385 will generate a comclr than finding something like an unconditional
7386 branch. (conditional moves & long branches create comclr insns).
7387
7388 * Most arithmetic operations can conditionally skip the next
7389 instruction. They can be viewed as "perform this operation
7390 and conditionally jump to this nearby location" (where nearby
7391 is an insns away). These are difficult to use due to the
7392 branch length restrictions. */
7393
7394 static void
7395 pa_combine_instructions (insns)
7396 rtx insns ATTRIBUTE_UNUSED;
7397 {
7398 rtx anchor, new;
7399
7400 /* This can get expensive since the basic algorithm is on the
7401 order of O(n^2) (or worse). Only do it for -O2 or higher
7402 levels of optimization. */
7403 if (optimize < 2)
7404 return;
7405
7406 /* Walk down the list of insns looking for "anchor" insns which
7407 may be combined with "floating" insns. As the name implies,
7408 "anchor" instructions don't move, while "floating" insns may
7409 move around. */
7410 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
7411 new = make_insn_raw (new);
7412
7413 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
7414 {
7415 enum attr_pa_combine_type anchor_attr;
7416 enum attr_pa_combine_type floater_attr;
7417
7418 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
7419 Also ignore any special USE insns. */
7420 if ((GET_CODE (anchor) != INSN
7421 && GET_CODE (anchor) != JUMP_INSN
7422 && GET_CODE (anchor) != CALL_INSN)
7423 || GET_CODE (PATTERN (anchor)) == USE
7424 || GET_CODE (PATTERN (anchor)) == CLOBBER
7425 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
7426 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
7427 continue;
7428
7429 anchor_attr = get_attr_pa_combine_type (anchor);
7430 /* See if anchor is an insn suitable for combination. */
7431 if (anchor_attr == PA_COMBINE_TYPE_FMPY
7432 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
7433 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7434 && ! forward_branch_p (anchor)))
7435 {
7436 rtx floater;
7437
7438 for (floater = PREV_INSN (anchor);
7439 floater;
7440 floater = PREV_INSN (floater))
7441 {
7442 if (GET_CODE (floater) == NOTE
7443 || (GET_CODE (floater) == INSN
7444 && (GET_CODE (PATTERN (floater)) == USE
7445 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7446 continue;
7447
7448 /* Anything except a regular INSN will stop our search. */
7449 if (GET_CODE (floater) != INSN
7450 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7451 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7452 {
7453 floater = NULL_RTX;
7454 break;
7455 }
7456
7457 /* See if FLOATER is suitable for combination with the
7458 anchor. */
7459 floater_attr = get_attr_pa_combine_type (floater);
7460 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7461 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7462 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7463 && floater_attr == PA_COMBINE_TYPE_FMPY))
7464 {
7465 /* If ANCHOR and FLOATER can be combined, then we're
7466 done with this pass. */
7467 if (pa_can_combine_p (new, anchor, floater, 0,
7468 SET_DEST (PATTERN (floater)),
7469 XEXP (SET_SRC (PATTERN (floater)), 0),
7470 XEXP (SET_SRC (PATTERN (floater)), 1)))
7471 break;
7472 }
7473
7474 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7475 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
7476 {
7477 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
7478 {
7479 if (pa_can_combine_p (new, anchor, floater, 0,
7480 SET_DEST (PATTERN (floater)),
7481 XEXP (SET_SRC (PATTERN (floater)), 0),
7482 XEXP (SET_SRC (PATTERN (floater)), 1)))
7483 break;
7484 }
7485 else
7486 {
7487 if (pa_can_combine_p (new, anchor, floater, 0,
7488 SET_DEST (PATTERN (floater)),
7489 SET_SRC (PATTERN (floater)),
7490 SET_SRC (PATTERN (floater))))
7491 break;
7492 }
7493 }
7494 }
7495
7496 /* If we didn't find anything on the backwards scan try forwards. */
7497 if (!floater
7498 && (anchor_attr == PA_COMBINE_TYPE_FMPY
7499 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
7500 {
7501 for (floater = anchor; floater; floater = NEXT_INSN (floater))
7502 {
7503 if (GET_CODE (floater) == NOTE
7504 || (GET_CODE (floater) == INSN
7505 && (GET_CODE (PATTERN (floater)) == USE
7506 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7507
7508 continue;
7509
7510 /* Anything except a regular INSN will stop our search. */
7511 if (GET_CODE (floater) != INSN
7512 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7513 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7514 {
7515 floater = NULL_RTX;
7516 break;
7517 }
7518
7519 /* See if FLOATER is suitable for combination with the
7520 anchor. */
7521 floater_attr = get_attr_pa_combine_type (floater);
7522 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7523 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7524 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7525 && floater_attr == PA_COMBINE_TYPE_FMPY))
7526 {
7527 /* If ANCHOR and FLOATER can be combined, then we're
7528 done with this pass. */
7529 if (pa_can_combine_p (new, anchor, floater, 1,
7530 SET_DEST (PATTERN (floater)),
7531 XEXP (SET_SRC (PATTERN (floater)),
7532 0),
7533 XEXP (SET_SRC (PATTERN (floater)),
7534 1)))
7535 break;
7536 }
7537 }
7538 }
7539
7540 /* FLOATER will be nonzero if we found a suitable floating
7541 insn for combination with ANCHOR. */
7542 if (floater
7543 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7544 || anchor_attr == PA_COMBINE_TYPE_FMPY))
7545 {
7546 /* Emit the new instruction and delete the old anchor. */
7547 emit_insn_before (gen_rtx_PARALLEL
7548 (VOIDmode,
7549 gen_rtvec (2, PATTERN (anchor),
7550 PATTERN (floater))),
7551 anchor);
7552
7553 PUT_CODE (anchor, NOTE);
7554 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7555 NOTE_SOURCE_FILE (anchor) = 0;
7556
7557 /* Emit a special USE insn for FLOATER, then delete
7558 the floating insn. */
7559 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7560 delete_insn (floater);
7561
7562 continue;
7563 }
7564 else if (floater
7565 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
7566 {
7567 rtx temp;
7568 /* Emit the new_jump instruction and delete the old anchor. */
7569 temp
7570 = emit_jump_insn_before (gen_rtx_PARALLEL
7571 (VOIDmode,
7572 gen_rtvec (2, PATTERN (anchor),
7573 PATTERN (floater))),
7574 anchor);
7575
7576 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
7577 PUT_CODE (anchor, NOTE);
7578 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7579 NOTE_SOURCE_FILE (anchor) = 0;
7580
7581 /* Emit a special USE insn for FLOATER, then delete
7582 the floating insn. */
7583 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7584 delete_insn (floater);
7585 continue;
7586 }
7587 }
7588 }
7589 }
7590
7591 static int
7592 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
7593 rtx new, anchor, floater;
7594 int reversed;
7595 rtx dest, src1, src2;
7596 {
7597 int insn_code_number;
7598 rtx start, end;
7599
7600 /* Create a PARALLEL with the patterns of ANCHOR and
7601 FLOATER, try to recognize it, then test constraints
7602 for the resulting pattern.
7603
7604 If the pattern doesn't match or the constraints
7605 aren't met keep searching for a suitable floater
7606 insn. */
7607 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
7608 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
7609 INSN_CODE (new) = -1;
7610 insn_code_number = recog_memoized (new);
7611 if (insn_code_number < 0
7612 || (extract_insn (new), ! constrain_operands (1)))
7613 return 0;
7614
7615 if (reversed)
7616 {
7617 start = anchor;
7618 end = floater;
7619 }
7620 else
7621 {
7622 start = floater;
7623 end = anchor;
7624 }
7625
7626 /* There's up to three operands to consider. One
7627 output and two inputs.
7628
7629 The output must not be used between FLOATER & ANCHOR
7630 exclusive. The inputs must not be set between
7631 FLOATER and ANCHOR exclusive. */
7632
7633 if (reg_used_between_p (dest, start, end))
7634 return 0;
7635
7636 if (reg_set_between_p (src1, start, end))
7637 return 0;
7638
7639 if (reg_set_between_p (src2, start, end))
7640 return 0;
7641
7642 /* If we get here, then everything is good. */
7643 return 1;
7644 }
7645
7646 /* Return nonzero if references for INSN are delayed.
7647
7648 Millicode insns are actually function calls with some special
7649 constraints on arguments and register usage.
7650
7651 Millicode calls always expect their arguments in the integer argument
7652 registers, and always return their result in %r29 (ret1). They
7653 are expected to clobber their arguments, %r1, %r29, and the return
7654 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
7655
7656 This function tells reorg that the references to arguments and
7657 millicode calls do not appear to happen until after the millicode call.
7658 This allows reorg to put insns which set the argument registers into the
7659 delay slot of the millicode call -- thus they act more like traditional
7660 CALL_INSNs.
7661
7662 Note we can not consider side effects of the insn to be delayed because
7663 the branch and link insn will clobber the return pointer. If we happened
7664 to use the return pointer in the delay slot of the call, then we lose.
7665
7666 get_attr_type will try to recognize the given insn, so make sure to
7667 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
7668 in particular. */
7669 int
7670 insn_refs_are_delayed (insn)
7671 rtx insn;
7672 {
7673 return ((GET_CODE (insn) == INSN
7674 && GET_CODE (PATTERN (insn)) != SEQUENCE
7675 && GET_CODE (PATTERN (insn)) != USE
7676 && GET_CODE (PATTERN (insn)) != CLOBBER
7677 && get_attr_type (insn) == TYPE_MILLI));
7678 }
7679
7680 /* On the HP-PA the value is found in register(s) 28(-29), unless
7681 the mode is SF or DF. Then the value is returned in fr4 (32).
7682
7683 This must perform the same promotions as PROMOTE_MODE, else
7684 PROMOTE_FUNCTION_RETURN will not work correctly.
7685
7686 Small structures must be returned in a PARALLEL on PA64 in order
7687 to match the HP Compiler ABI. */
7688
7689 rtx
7690 function_value (valtype, func)
7691 tree valtype;
7692 tree func ATTRIBUTE_UNUSED;
7693 {
7694 enum machine_mode valmode;
7695
7696 /* Aggregates with a size less than or equal to 128 bits are returned
7697 in GR 28(-29). They are left justified. The pad bits are undefined.
7698 Larger aggregates are returned in memory. */
7699 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
7700 {
7701 rtx loc[2];
7702 int i, offset = 0;
7703 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
7704
7705 for (i = 0; i < ub; i++)
7706 {
7707 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
7708 gen_rtx_REG (DImode, 28 + i),
7709 GEN_INT (offset));
7710 offset += 8;
7711 }
7712
7713 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
7714 }
7715
7716 if ((INTEGRAL_TYPE_P (valtype)
7717 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
7718 || POINTER_TYPE_P (valtype))
7719 valmode = word_mode;
7720 else
7721 valmode = TYPE_MODE (valtype);
7722
7723 if (TREE_CODE (valtype) == REAL_TYPE
7724 && TYPE_MODE (valtype) != TFmode
7725 && !TARGET_SOFT_FLOAT)
7726 return gen_rtx_REG (valmode, 32);
7727
7728 return gen_rtx_REG (valmode, 28);
7729 }
7730
7731 /* Return the location of a parameter that is passed in a register or NULL
7732 if the parameter has any component that is passed in memory.
7733
7734 This is new code and will be pushed to into the net sources after
7735 further testing.
7736
7737 ??? We might want to restructure this so that it looks more like other
7738 ports. */
7739 rtx
7740 function_arg (cum, mode, type, named, incoming)
7741 CUMULATIVE_ARGS *cum;
7742 enum machine_mode mode;
7743 tree type;
7744 int named ATTRIBUTE_UNUSED;
7745 int incoming;
7746 {
7747 int max_arg_words = (TARGET_64BIT ? 8 : 4);
7748 int alignment = 0;
7749 int arg_size;
7750 int fpr_reg_base;
7751 int gpr_reg_base;
7752 rtx retval;
7753
7754 if (mode == VOIDmode)
7755 return NULL_RTX;
7756
7757 arg_size = FUNCTION_ARG_SIZE (mode, type);
7758
7759 /* If this arg would be passed partially or totally on the stack, then
7760 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
7761 handle arguments which are split between regs and stack slots if
7762 the ABI mandates split arguments. */
7763 if (! TARGET_64BIT)
7764 {
7765 /* The 32-bit ABI does not split arguments. */
7766 if (cum->words + arg_size > max_arg_words)
7767 return NULL_RTX;
7768 }
7769 else
7770 {
7771 if (arg_size > 1)
7772 alignment = cum->words & 1;
7773 if (cum->words + alignment >= max_arg_words)
7774 return NULL_RTX;
7775 }
7776
7777 /* The 32bit ABIs and the 64bit ABIs are rather different,
7778 particularly in their handling of FP registers. We might
7779 be able to cleverly share code between them, but I'm not
7780 going to bother in the hope that splitting them up results
7781 in code that is more easily understood. */
7782
7783 if (TARGET_64BIT)
7784 {
7785 /* Advance the base registers to their current locations.
7786
7787 Remember, gprs grow towards smaller register numbers while
7788 fprs grow to higher register numbers. Also remember that
7789 although FP regs are 32-bit addressable, we pretend that
7790 the registers are 64-bits wide. */
7791 gpr_reg_base = 26 - cum->words;
7792 fpr_reg_base = 32 + cum->words;
7793
7794 /* Arguments wider than one word and small aggregates need special
7795 treatment. */
7796 if (arg_size > 1
7797 || mode == BLKmode
7798 || (type && AGGREGATE_TYPE_P (type)))
7799 {
7800 /* Double-extended precision (80-bit), quad-precision (128-bit)
7801 and aggregates including complex numbers are aligned on
7802 128-bit boundaries. The first eight 64-bit argument slots
7803 are associated one-to-one, with general registers r26
7804 through r19, and also with floating-point registers fr4
7805 through fr11. Arguments larger than one word are always
7806 passed in general registers.
7807
7808 Using a PARALLEL with a word mode register results in left
7809 justified data on a big-endian target. */
7810
7811 rtx loc[8];
7812 int i, offset = 0, ub = arg_size;
7813
7814 /* Align the base register. */
7815 gpr_reg_base -= alignment;
7816
7817 ub = MIN (ub, max_arg_words - cum->words - alignment);
7818 for (i = 0; i < ub; i++)
7819 {
7820 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
7821 gen_rtx_REG (DImode, gpr_reg_base),
7822 GEN_INT (offset));
7823 gpr_reg_base -= 1;
7824 offset += 8;
7825 }
7826
7827 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
7828 }
7829 }
7830 else
7831 {
7832 /* If the argument is larger than a word, then we know precisely
7833 which registers we must use. */
7834 if (arg_size > 1)
7835 {
7836 if (cum->words)
7837 {
7838 gpr_reg_base = 23;
7839 fpr_reg_base = 38;
7840 }
7841 else
7842 {
7843 gpr_reg_base = 25;
7844 fpr_reg_base = 34;
7845 }
7846
7847 /* Structures 5 to 8 bytes in size are passed in the general
7848 registers in the same manner as other non floating-point
7849 objects. The data is right-justified and zero-extended
7850 to 64 bits.
7851
7852 This is magic. Normally, using a PARALLEL results in left
7853 justified data on a big-endian target. However, using a
7854 single double-word register provides the required right
7855 justication for 5 to 8 byte structures. This has nothing
7856 to do with the direction of padding specified for the argument.
7857 It has to do with how the data is widened and shifted into
7858 and from the register.
7859
7860 Aside from adding load_multiple and store_multiple patterns,
7861 this is the only way that I have found to obtain right
7862 justification of BLKmode data when it has a size greater
7863 than one word. Splitting the operation into two SImode loads
7864 or returning a DImode REG results in left justified data. */
7865 if (mode == BLKmode)
7866 {
7867 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
7868 gen_rtx_REG (DImode, gpr_reg_base),
7869 const0_rtx);
7870 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
7871 }
7872 }
7873 else
7874 {
7875 /* We have a single word (32 bits). A simple computation
7876 will get us the register #s we need. */
7877 gpr_reg_base = 26 - cum->words;
7878 fpr_reg_base = 32 + 2 * cum->words;
7879 }
7880 }
7881
7882 /* Determine if the argument needs to be passed in both general and
7883 floating point registers. */
7884 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
7885 /* If we are doing soft-float with portable runtime, then there
7886 is no need to worry about FP regs. */
7887 && ! TARGET_SOFT_FLOAT
7888 /* The parameter must be some kind of float, else we can just
7889 pass it in integer registers. */
7890 && FLOAT_MODE_P (mode)
7891 /* The target function must not have a prototype. */
7892 && cum->nargs_prototype <= 0
7893 /* libcalls do not need to pass items in both FP and general
7894 registers. */
7895 && type != NULL_TREE
7896 /* All this hair applies to outgoing args only. */
7897 && ! incoming)
7898 /* Also pass outgoing floating arguments in both registers in indirect
7899 calls with the 32 bit ABI and the HP assembler since there is no
7900 way to the specify argument locations in static functions. */
7901 || (! TARGET_64BIT
7902 && ! TARGET_GAS
7903 && ! incoming
7904 && cum->indirect
7905 && FLOAT_MODE_P (mode)))
7906 {
7907 retval
7908 = gen_rtx_PARALLEL
7909 (mode,
7910 gen_rtvec (2,
7911 gen_rtx_EXPR_LIST (VOIDmode,
7912 gen_rtx_REG (mode, fpr_reg_base),
7913 const0_rtx),
7914 gen_rtx_EXPR_LIST (VOIDmode,
7915 gen_rtx_REG (mode, gpr_reg_base),
7916 const0_rtx)));
7917 }
7918 else
7919 {
7920 /* See if we should pass this parameter in a general register. */
7921 if (TARGET_SOFT_FLOAT
7922 /* Indirect calls in the normal 32bit ABI require all arguments
7923 to be passed in general registers. */
7924 || (!TARGET_PORTABLE_RUNTIME
7925 && !TARGET_64BIT
7926 && !TARGET_ELF32
7927 && cum->indirect)
7928 /* If the parameter is not a floating point parameter, then
7929 it belongs in GPRs. */
7930 || !FLOAT_MODE_P (mode))
7931 retval = gen_rtx_REG (mode, gpr_reg_base);
7932 else
7933 retval = gen_rtx_REG (mode, fpr_reg_base);
7934 }
7935 return retval;
7936 }
7937
7938
7939 /* If this arg would be passed totally in registers or totally on the stack,
7940 then this routine should return zero. It is currently called only for
7941 the 64-bit target. */
7942 int
7943 function_arg_partial_nregs (cum, mode, type, named)
7944 CUMULATIVE_ARGS *cum;
7945 enum machine_mode mode;
7946 tree type;
7947 int named ATTRIBUTE_UNUSED;
7948 {
7949 unsigned int max_arg_words = 8;
7950 unsigned int offset = 0;
7951
7952 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7953 offset = 1;
7954
7955 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
7956 /* Arg fits fully into registers. */
7957 return 0;
7958 else if (cum->words + offset >= max_arg_words)
7959 /* Arg fully on the stack. */
7960 return 0;
7961 else
7962 /* Arg is split. */
7963 return max_arg_words - cum->words - offset;
7964 }
7965
7966
7967 /* Return 1 if this is a comparison operator. This allows the use of
7968 MATCH_OPERATOR to recognize all the branch insns. */
7969
7970 int
7971 cmpib_comparison_operator (op, mode)
7972 register rtx op;
7973 enum machine_mode mode;
7974 {
7975 return ((mode == VOIDmode || GET_MODE (op) == mode)
7976 && (GET_CODE (op) == EQ
7977 || GET_CODE (op) == NE
7978 || GET_CODE (op) == GT
7979 || GET_CODE (op) == GTU
7980 || GET_CODE (op) == GE
7981 || GET_CODE (op) == LT
7982 || GET_CODE (op) == LE
7983 || GET_CODE (op) == LEU));
7984 }
7985
7986 /* On hpux10, the linker will give an error if we have a reference
7987 in the read-only data section to a symbol defined in a shared
7988 library. Therefore, expressions that might require a reloc can
7989 not be placed in the read-only data section. */
7990
7991 static void
7992 pa_select_section (exp, reloc, align)
7993 tree exp;
7994 int reloc;
7995 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED;
7996 {
7997 if (TREE_CODE (exp) == VAR_DECL
7998 && TREE_READONLY (exp)
7999 && !TREE_THIS_VOLATILE (exp)
8000 && DECL_INITIAL (exp)
8001 && (DECL_INITIAL (exp) == error_mark_node
8002 || TREE_CONSTANT (DECL_INITIAL (exp)))
8003 && !reloc)
8004 readonly_data_section ();
8005 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
8006 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
8007 && !reloc)
8008 readonly_data_section ();
8009 else
8010 data_section ();
8011 }
8012
8013 static void
8014 pa_globalize_label (stream, name)
8015 FILE *stream;
8016 const char *name;
8017 {
8018 /* We only handle DATA objects here, functions are globalized in
8019 ASM_DECLARE_FUNCTION_NAME. */
8020 if (! FUNCTION_NAME_P (name))
8021 {
8022 fputs ("\t.EXPORT ", stream);
8023 assemble_name (stream, name);
8024 fputs (",DATA\n", stream);
8025 }
8026 }
8027 #include "gt-pa.h"