]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pa/pa.c
re PR middle-end/46500 (target.h includes tm.h)
[thirdparty/gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "output.h"
36 #include "except.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "reload.h"
40 #include "integrate.h"
41 #include "function.h"
42 #include "diagnostic-core.h"
43 #include "ggc.h"
44 #include "recog.h"
45 #include "predict.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "common/common-target.h"
49 #include "target-def.h"
50 #include "langhooks.h"
51 #include "df.h"
52 #include "opts.h"
53
54 /* Return nonzero if there is a bypass for the output of
55 OUT_INSN and the fp store IN_INSN. */
56 int
57 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
58 {
59 enum machine_mode store_mode;
60 enum machine_mode other_mode;
61 rtx set;
62
63 if (recog_memoized (in_insn) < 0
64 || (get_attr_type (in_insn) != TYPE_FPSTORE
65 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
66 || recog_memoized (out_insn) < 0)
67 return 0;
68
69 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
70
71 set = single_set (out_insn);
72 if (!set)
73 return 0;
74
75 other_mode = GET_MODE (SET_SRC (set));
76
77 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
78 }
79
80
81 #ifndef DO_FRAME_NOTES
82 #ifdef INCOMING_RETURN_ADDR_RTX
83 #define DO_FRAME_NOTES 1
84 #else
85 #define DO_FRAME_NOTES 0
86 #endif
87 #endif
88
89 static void pa_option_override (void);
90 static void copy_reg_pointer (rtx, rtx);
91 static void fix_range (const char *);
92 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
93 reg_class_t);
94 static int hppa_address_cost (rtx, bool);
95 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
96 static inline rtx force_mode (enum machine_mode, rtx);
97 static void pa_reorg (void);
98 static void pa_combine_instructions (void);
99 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
100 static bool forward_branch_p (rtx);
101 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
102 static int compute_movmem_length (rtx);
103 static int compute_clrmem_length (rtx);
104 static bool pa_assemble_integer (rtx, unsigned int, int);
105 static void remove_useless_addtr_insns (int);
106 static void store_reg (int, HOST_WIDE_INT, int);
107 static void store_reg_modify (int, int, HOST_WIDE_INT);
108 static void load_reg (int, HOST_WIDE_INT, int);
109 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
110 static rtx pa_function_value (const_tree, const_tree, bool);
111 static rtx pa_libcall_value (enum machine_mode, const_rtx);
112 static bool pa_function_value_regno_p (const unsigned int);
113 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
114 static void update_total_code_bytes (unsigned int);
115 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
116 static int pa_adjust_cost (rtx, rtx, rtx, int);
117 static int pa_adjust_priority (rtx, int);
118 static int pa_issue_rate (void);
119 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
120 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
121 ATTRIBUTE_UNUSED;
122 static void pa_encode_section_info (tree, rtx, int);
123 static const char *pa_strip_name_encoding (const char *);
124 static bool pa_function_ok_for_sibcall (tree, tree);
125 static void pa_globalize_label (FILE *, const char *)
126 ATTRIBUTE_UNUSED;
127 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
128 HOST_WIDE_INT, tree);
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor (rtx, int);
131 static void pa_asm_out_destructor (rtx, int);
132 #endif
133 static void pa_init_builtins (void);
134 static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
135 static rtx hppa_builtin_saveregs (void);
136 static void hppa_va_start (tree, rtx);
137 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
138 static bool pa_scalar_mode_supported_p (enum machine_mode);
139 static bool pa_commutative_p (const_rtx x, int outer_code);
140 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
141 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
142 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
143 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
144 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
145 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
146 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
147 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
148 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
149 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
150 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
151 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
152 static void output_deferred_plabels (void);
153 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
154 #ifdef ASM_OUTPUT_EXTERNAL_REAL
155 static void pa_hpux_file_end (void);
156 #endif
157 #if HPUX_LONG_DOUBLE_LIBRARY
158 static void pa_hpux_init_libfuncs (void);
159 #endif
160 static rtx pa_struct_value_rtx (tree, int);
161 static bool pa_pass_by_reference (cumulative_args_t, enum machine_mode,
162 const_tree, bool);
163 static int pa_arg_partial_bytes (cumulative_args_t, enum machine_mode,
164 tree, bool);
165 static void pa_function_arg_advance (cumulative_args_t, enum machine_mode,
166 const_tree, bool);
167 static rtx pa_function_arg (cumulative_args_t, enum machine_mode,
168 const_tree, bool);
169 static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
170 static struct machine_function * pa_init_machine_status (void);
171 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
172 enum machine_mode,
173 secondary_reload_info *);
174 static void pa_extra_live_on_entry (bitmap);
175 static enum machine_mode pa_promote_function_mode (const_tree,
176 enum machine_mode, int *,
177 const_tree, int);
178
179 static void pa_asm_trampoline_template (FILE *);
180 static void pa_trampoline_init (rtx, tree, rtx);
181 static rtx pa_trampoline_adjust_address (rtx);
182 static rtx pa_delegitimize_address (rtx);
183 static bool pa_print_operand_punct_valid_p (unsigned char);
184 static rtx pa_internal_arg_pointer (void);
185 static bool pa_can_eliminate (const int, const int);
186 static void pa_conditional_register_usage (void);
187 static enum machine_mode pa_c_mode_for_suffix (char);
188 static section *pa_function_section (tree, enum node_frequency, bool, bool);
189 static bool pa_cannot_force_const_mem (enum machine_mode, rtx);
190 static bool pa_legitimate_constant_p (enum machine_mode, rtx);
191
192 /* The following extra sections are only used for SOM. */
193 static GTY(()) section *som_readonly_data_section;
194 static GTY(()) section *som_one_only_readonly_data_section;
195 static GTY(()) section *som_one_only_data_section;
196
197 /* Counts for the number of callee-saved general and floating point
198 registers which were saved by the current function's prologue. */
199 static int gr_saved, fr_saved;
200
201 /* Boolean indicating whether the return pointer was saved by the
202 current function's prologue. */
203 static bool rp_saved;
204
205 static rtx find_addr_reg (rtx);
206
207 /* Keep track of the number of bytes we have output in the CODE subspace
208 during this compilation so we'll know when to emit inline long-calls. */
209 unsigned long total_code_bytes;
210
211 /* The last address of the previous function plus the number of bytes in
212 associated thunks that have been output. This is used to determine if
213 a thunk can use an IA-relative branch to reach its target function. */
214 static unsigned int last_address;
215
216 /* Variables to handle plabels that we discover are necessary at assembly
217 output time. They are output after the current function. */
218 struct GTY(()) deferred_plabel
219 {
220 rtx internal_label;
221 rtx symbol;
222 };
223 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
224 deferred_plabels;
225 static size_t n_deferred_plabels = 0;
226 \f
227 /* Initialize the GCC target structure. */
228
229 #undef TARGET_OPTION_OVERRIDE
230 #define TARGET_OPTION_OVERRIDE pa_option_override
231
232 #undef TARGET_ASM_ALIGNED_HI_OP
233 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
234 #undef TARGET_ASM_ALIGNED_SI_OP
235 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
236 #undef TARGET_ASM_ALIGNED_DI_OP
237 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
238 #undef TARGET_ASM_UNALIGNED_HI_OP
239 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
240 #undef TARGET_ASM_UNALIGNED_SI_OP
241 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
242 #undef TARGET_ASM_UNALIGNED_DI_OP
243 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
244 #undef TARGET_ASM_INTEGER
245 #define TARGET_ASM_INTEGER pa_assemble_integer
246
247 #undef TARGET_ASM_FUNCTION_PROLOGUE
248 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
249 #undef TARGET_ASM_FUNCTION_EPILOGUE
250 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
251
252 #undef TARGET_FUNCTION_VALUE
253 #define TARGET_FUNCTION_VALUE pa_function_value
254 #undef TARGET_LIBCALL_VALUE
255 #define TARGET_LIBCALL_VALUE pa_libcall_value
256 #undef TARGET_FUNCTION_VALUE_REGNO_P
257 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
258
259 #undef TARGET_LEGITIMIZE_ADDRESS
260 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
261
262 #undef TARGET_SCHED_ADJUST_COST
263 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
264 #undef TARGET_SCHED_ADJUST_PRIORITY
265 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
266 #undef TARGET_SCHED_ISSUE_RATE
267 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
268
269 #undef TARGET_ENCODE_SECTION_INFO
270 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
271 #undef TARGET_STRIP_NAME_ENCODING
272 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
273
274 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
275 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
276
277 #undef TARGET_COMMUTATIVE_P
278 #define TARGET_COMMUTATIVE_P pa_commutative_p
279
280 #undef TARGET_ASM_OUTPUT_MI_THUNK
281 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
282 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
283 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
284
285 #undef TARGET_ASM_FILE_END
286 #ifdef ASM_OUTPUT_EXTERNAL_REAL
287 #define TARGET_ASM_FILE_END pa_hpux_file_end
288 #else
289 #define TARGET_ASM_FILE_END output_deferred_plabels
290 #endif
291
292 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
293 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
294
295 #if !defined(USE_COLLECT2)
296 #undef TARGET_ASM_CONSTRUCTOR
297 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
298 #undef TARGET_ASM_DESTRUCTOR
299 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
300 #endif
301
302 #undef TARGET_INIT_BUILTINS
303 #define TARGET_INIT_BUILTINS pa_init_builtins
304
305 #undef TARGET_EXPAND_BUILTIN
306 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
307
308 #undef TARGET_REGISTER_MOVE_COST
309 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
310 #undef TARGET_RTX_COSTS
311 #define TARGET_RTX_COSTS hppa_rtx_costs
312 #undef TARGET_ADDRESS_COST
313 #define TARGET_ADDRESS_COST hppa_address_cost
314
315 #undef TARGET_MACHINE_DEPENDENT_REORG
316 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
317
318 #if HPUX_LONG_DOUBLE_LIBRARY
319 #undef TARGET_INIT_LIBFUNCS
320 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
321 #endif
322
323 #undef TARGET_PROMOTE_FUNCTION_MODE
324 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
325 #undef TARGET_PROMOTE_PROTOTYPES
326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
327
328 #undef TARGET_STRUCT_VALUE_RTX
329 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
330 #undef TARGET_RETURN_IN_MEMORY
331 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
332 #undef TARGET_MUST_PASS_IN_STACK
333 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
334 #undef TARGET_PASS_BY_REFERENCE
335 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
336 #undef TARGET_CALLEE_COPIES
337 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
338 #undef TARGET_ARG_PARTIAL_BYTES
339 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
340 #undef TARGET_FUNCTION_ARG
341 #define TARGET_FUNCTION_ARG pa_function_arg
342 #undef TARGET_FUNCTION_ARG_ADVANCE
343 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
344 #undef TARGET_FUNCTION_ARG_BOUNDARY
345 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
346
347 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
348 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
349 #undef TARGET_EXPAND_BUILTIN_VA_START
350 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
351 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
352 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
353
354 #undef TARGET_SCALAR_MODE_SUPPORTED_P
355 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
356
357 #undef TARGET_CANNOT_FORCE_CONST_MEM
358 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
359
360 #undef TARGET_SECONDARY_RELOAD
361 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
362
363 #undef TARGET_EXTRA_LIVE_ON_ENTRY
364 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
365
366 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
367 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
368 #undef TARGET_TRAMPOLINE_INIT
369 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
370 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
371 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
372 #undef TARGET_DELEGITIMIZE_ADDRESS
373 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
374 #undef TARGET_INTERNAL_ARG_POINTER
375 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
376 #undef TARGET_CAN_ELIMINATE
377 #define TARGET_CAN_ELIMINATE pa_can_eliminate
378 #undef TARGET_CONDITIONAL_REGISTER_USAGE
379 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
380 #undef TARGET_C_MODE_FOR_SUFFIX
381 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
382 #undef TARGET_ASM_FUNCTION_SECTION
383 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
384
385 #undef TARGET_LEGITIMATE_CONSTANT_P
386 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
387
388 struct gcc_target targetm = TARGET_INITIALIZER;
389 \f
390 /* Parse the -mfixed-range= option string. */
391
392 static void
393 fix_range (const char *const_str)
394 {
395 int i, first, last;
396 char *str, *dash, *comma;
397
398 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
399 REG2 are either register names or register numbers. The effect
400 of this option is to mark the registers in the range from REG1 to
401 REG2 as ``fixed'' so they won't be used by the compiler. This is
402 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
403
404 i = strlen (const_str);
405 str = (char *) alloca (i + 1);
406 memcpy (str, const_str, i + 1);
407
408 while (1)
409 {
410 dash = strchr (str, '-');
411 if (!dash)
412 {
413 warning (0, "value of -mfixed-range must have form REG1-REG2");
414 return;
415 }
416 *dash = '\0';
417
418 comma = strchr (dash + 1, ',');
419 if (comma)
420 *comma = '\0';
421
422 first = decode_reg_name (str);
423 if (first < 0)
424 {
425 warning (0, "unknown register name: %s", str);
426 return;
427 }
428
429 last = decode_reg_name (dash + 1);
430 if (last < 0)
431 {
432 warning (0, "unknown register name: %s", dash + 1);
433 return;
434 }
435
436 *dash = '-';
437
438 if (first > last)
439 {
440 warning (0, "%s-%s is an empty range", str, dash + 1);
441 return;
442 }
443
444 for (i = first; i <= last; ++i)
445 fixed_regs[i] = call_used_regs[i] = 1;
446
447 if (!comma)
448 break;
449
450 *comma = ',';
451 str = comma + 1;
452 }
453
454 /* Check if all floating point registers have been fixed. */
455 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
456 if (!fixed_regs[i])
457 break;
458
459 if (i > FP_REG_LAST)
460 target_flags |= MASK_DISABLE_FPREGS;
461 }
462
463 /* Implement the TARGET_OPTION_OVERRIDE hook. */
464
465 static void
466 pa_option_override (void)
467 {
468 unsigned int i;
469 cl_deferred_option *opt;
470 VEC(cl_deferred_option,heap) *vec
471 = (VEC(cl_deferred_option,heap) *) pa_deferred_options;
472
473 FOR_EACH_VEC_ELT (cl_deferred_option, vec, i, opt)
474 {
475 switch (opt->opt_index)
476 {
477 case OPT_mfixed_range_:
478 fix_range (opt->arg);
479 break;
480
481 default:
482 gcc_unreachable ();
483 }
484 }
485
486 /* Unconditional branches in the delay slot are not compatible with dwarf2
487 call frame information. There is no benefit in using this optimization
488 on PA8000 and later processors. */
489 if (pa_cpu >= PROCESSOR_8000
490 || (targetm_common.except_unwind_info (&global_options) == UI_DWARF2
491 && flag_exceptions)
492 || flag_unwind_tables)
493 target_flags &= ~MASK_JUMP_IN_DELAY;
494
495 if (flag_pic && TARGET_PORTABLE_RUNTIME)
496 {
497 warning (0, "PIC code generation is not supported in the portable runtime model");
498 }
499
500 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
501 {
502 warning (0, "PIC code generation is not compatible with fast indirect calls");
503 }
504
505 if (! TARGET_GAS && write_symbols != NO_DEBUG)
506 {
507 warning (0, "-g is only supported when using GAS on this processor,");
508 warning (0, "-g option disabled");
509 write_symbols = NO_DEBUG;
510 }
511
512 /* We only support the "big PIC" model now. And we always generate PIC
513 code when in 64bit mode. */
514 if (flag_pic == 1 || TARGET_64BIT)
515 flag_pic = 2;
516
517 /* Disable -freorder-blocks-and-partition as we don't support hot and
518 cold partitioning. */
519 if (flag_reorder_blocks_and_partition)
520 {
521 inform (input_location,
522 "-freorder-blocks-and-partition does not work "
523 "on this architecture");
524 flag_reorder_blocks_and_partition = 0;
525 flag_reorder_blocks = 1;
526 }
527
528 /* We can't guarantee that .dword is available for 32-bit targets. */
529 if (UNITS_PER_WORD == 4)
530 targetm.asm_out.aligned_op.di = NULL;
531
532 /* The unaligned ops are only available when using GAS. */
533 if (!TARGET_GAS)
534 {
535 targetm.asm_out.unaligned_op.hi = NULL;
536 targetm.asm_out.unaligned_op.si = NULL;
537 targetm.asm_out.unaligned_op.di = NULL;
538 }
539
540 init_machine_status = pa_init_machine_status;
541 }
542
543 enum pa_builtins
544 {
545 PA_BUILTIN_COPYSIGNQ,
546 PA_BUILTIN_FABSQ,
547 PA_BUILTIN_INFQ,
548 PA_BUILTIN_HUGE_VALQ,
549 PA_BUILTIN_max
550 };
551
552 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
553
554 static void
555 pa_init_builtins (void)
556 {
557 #ifdef DONT_HAVE_FPUTC_UNLOCKED
558 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
559 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
560 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
561 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
562 #endif
563 #if TARGET_HPUX_11
564 if (built_in_decls [BUILT_IN_FINITE])
565 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
566 if (built_in_decls [BUILT_IN_FINITEF])
567 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
568 #endif
569
570 if (HPUX_LONG_DOUBLE_LIBRARY)
571 {
572 tree decl, ftype;
573
574 /* Under HPUX, the __float128 type is a synonym for "long double". */
575 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
576 "__float128");
577
578 /* TFmode support builtins. */
579 ftype = build_function_type_list (long_double_type_node,
580 long_double_type_node,
581 NULL_TREE);
582 decl = add_builtin_function ("__builtin_fabsq", ftype,
583 PA_BUILTIN_FABSQ, BUILT_IN_MD,
584 "_U_Qfabs", NULL_TREE);
585 TREE_READONLY (decl) = 1;
586 pa_builtins[PA_BUILTIN_FABSQ] = decl;
587
588 ftype = build_function_type_list (long_double_type_node,
589 long_double_type_node,
590 long_double_type_node,
591 NULL_TREE);
592 decl = add_builtin_function ("__builtin_copysignq", ftype,
593 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
594 "_U_Qfcopysign", NULL_TREE);
595 TREE_READONLY (decl) = 1;
596 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
597
598 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
599 decl = add_builtin_function ("__builtin_infq", ftype,
600 PA_BUILTIN_INFQ, BUILT_IN_MD,
601 NULL, NULL_TREE);
602 pa_builtins[PA_BUILTIN_INFQ] = decl;
603
604 decl = add_builtin_function ("__builtin_huge_valq", ftype,
605 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
606 NULL, NULL_TREE);
607 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
608 }
609 }
610
611 static rtx
612 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
613 enum machine_mode mode ATTRIBUTE_UNUSED,
614 int ignore ATTRIBUTE_UNUSED)
615 {
616 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
617 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
618
619 switch (fcode)
620 {
621 case PA_BUILTIN_FABSQ:
622 case PA_BUILTIN_COPYSIGNQ:
623 return expand_call (exp, target, ignore);
624
625 case PA_BUILTIN_INFQ:
626 case PA_BUILTIN_HUGE_VALQ:
627 {
628 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
629 REAL_VALUE_TYPE inf;
630 rtx tmp;
631
632 real_inf (&inf);
633 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
634
635 tmp = validize_mem (force_const_mem (target_mode, tmp));
636
637 if (target == 0)
638 target = gen_reg_rtx (target_mode);
639
640 emit_move_insn (target, tmp);
641 return target;
642 }
643
644 default:
645 gcc_unreachable ();
646 }
647
648 return NULL_RTX;
649 }
650
651 /* Function to init struct machine_function.
652 This will be called, via a pointer variable,
653 from push_function_context. */
654
655 static struct machine_function *
656 pa_init_machine_status (void)
657 {
658 return ggc_alloc_cleared_machine_function ();
659 }
660
661 /* If FROM is a probable pointer register, mark TO as a probable
662 pointer register with the same pointer alignment as FROM. */
663
664 static void
665 copy_reg_pointer (rtx to, rtx from)
666 {
667 if (REG_POINTER (from))
668 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
669 }
670
671 /* Return 1 if X contains a symbolic expression. We know these
672 expressions will have one of a few well defined forms, so
673 we need only check those forms. */
674 int
675 symbolic_expression_p (rtx x)
676 {
677
678 /* Strip off any HIGH. */
679 if (GET_CODE (x) == HIGH)
680 x = XEXP (x, 0);
681
682 return (symbolic_operand (x, VOIDmode));
683 }
684
685 /* Accept any constant that can be moved in one instruction into a
686 general register. */
687 int
688 cint_ok_for_move (HOST_WIDE_INT ival)
689 {
690 /* OK if ldo, ldil, or zdepi, can be used. */
691 return (VAL_14_BITS_P (ival)
692 || ldil_cint_p (ival)
693 || zdepi_cint_p (ival));
694 }
695 \f
696 /* True iff ldil can be used to load this CONST_INT. The least
697 significant 11 bits of the value must be zero and the value must
698 not change sign when extended from 32 to 64 bits. */
699 int
700 ldil_cint_p (HOST_WIDE_INT ival)
701 {
702 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
703
704 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
705 }
706
707 /* True iff zdepi can be used to generate this CONST_INT.
708 zdepi first sign extends a 5-bit signed number to a given field
709 length, then places this field anywhere in a zero. */
710 int
711 zdepi_cint_p (unsigned HOST_WIDE_INT x)
712 {
713 unsigned HOST_WIDE_INT lsb_mask, t;
714
715 /* This might not be obvious, but it's at least fast.
716 This function is critical; we don't have the time loops would take. */
717 lsb_mask = x & -x;
718 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
719 /* Return true iff t is a power of two. */
720 return ((t & (t - 1)) == 0);
721 }
722
723 /* True iff depi or extru can be used to compute (reg & mask).
724 Accept bit pattern like these:
725 0....01....1
726 1....10....0
727 1..10..01..1 */
728 int
729 and_mask_p (unsigned HOST_WIDE_INT mask)
730 {
731 mask = ~mask;
732 mask += mask & -mask;
733 return (mask & (mask - 1)) == 0;
734 }
735
736 /* True iff depi can be used to compute (reg | MASK). */
737 int
738 ior_mask_p (unsigned HOST_WIDE_INT mask)
739 {
740 mask += mask & -mask;
741 return (mask & (mask - 1)) == 0;
742 }
743 \f
744 /* Legitimize PIC addresses. If the address is already
745 position-independent, we return ORIG. Newly generated
746 position-independent addresses go to REG. If we need more
747 than one register, we lose. */
748
749 rtx
750 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
751 {
752 rtx pic_ref = orig;
753
754 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
755
756 /* Labels need special handling. */
757 if (pic_label_operand (orig, mode))
758 {
759 rtx insn;
760
761 /* We do not want to go through the movXX expanders here since that
762 would create recursion.
763
764 Nor do we really want to call a generator for a named pattern
765 since that requires multiple patterns if we want to support
766 multiple word sizes.
767
768 So instead we just emit the raw set, which avoids the movXX
769 expanders completely. */
770 mark_reg_pointer (reg, BITS_PER_UNIT);
771 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
772
773 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
774 add_reg_note (insn, REG_EQUAL, orig);
775
776 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
777 and update LABEL_NUSES because this is not done automatically. */
778 if (reload_in_progress || reload_completed)
779 {
780 /* Extract LABEL_REF. */
781 if (GET_CODE (orig) == CONST)
782 orig = XEXP (XEXP (orig, 0), 0);
783 /* Extract CODE_LABEL. */
784 orig = XEXP (orig, 0);
785 add_reg_note (insn, REG_LABEL_OPERAND, orig);
786 LABEL_NUSES (orig)++;
787 }
788 crtl->uses_pic_offset_table = 1;
789 return reg;
790 }
791 if (GET_CODE (orig) == SYMBOL_REF)
792 {
793 rtx insn, tmp_reg;
794
795 gcc_assert (reg);
796
797 /* Before reload, allocate a temporary register for the intermediate
798 result. This allows the sequence to be deleted when the final
799 result is unused and the insns are trivially dead. */
800 tmp_reg = ((reload_in_progress || reload_completed)
801 ? reg : gen_reg_rtx (Pmode));
802
803 if (function_label_operand (orig, VOIDmode))
804 {
805 /* Force function label into memory in word mode. */
806 orig = XEXP (force_const_mem (word_mode, orig), 0);
807 /* Load plabel address from DLT. */
808 emit_move_insn (tmp_reg,
809 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
810 gen_rtx_HIGH (word_mode, orig)));
811 pic_ref
812 = gen_const_mem (Pmode,
813 gen_rtx_LO_SUM (Pmode, tmp_reg,
814 gen_rtx_UNSPEC (Pmode,
815 gen_rtvec (1, orig),
816 UNSPEC_DLTIND14R)));
817 emit_move_insn (reg, pic_ref);
818 /* Now load address of function descriptor. */
819 pic_ref = gen_rtx_MEM (Pmode, reg);
820 }
821 else
822 {
823 /* Load symbol reference from DLT. */
824 emit_move_insn (tmp_reg,
825 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
826 gen_rtx_HIGH (word_mode, orig)));
827 pic_ref
828 = gen_const_mem (Pmode,
829 gen_rtx_LO_SUM (Pmode, tmp_reg,
830 gen_rtx_UNSPEC (Pmode,
831 gen_rtvec (1, orig),
832 UNSPEC_DLTIND14R)));
833 }
834
835 crtl->uses_pic_offset_table = 1;
836 mark_reg_pointer (reg, BITS_PER_UNIT);
837 insn = emit_move_insn (reg, pic_ref);
838
839 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
840 set_unique_reg_note (insn, REG_EQUAL, orig);
841
842 return reg;
843 }
844 else if (GET_CODE (orig) == CONST)
845 {
846 rtx base;
847
848 if (GET_CODE (XEXP (orig, 0)) == PLUS
849 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
850 return orig;
851
852 gcc_assert (reg);
853 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
854
855 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
856 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
857 base == reg ? 0 : reg);
858
859 if (GET_CODE (orig) == CONST_INT)
860 {
861 if (INT_14_BITS (orig))
862 return plus_constant (base, INTVAL (orig));
863 orig = force_reg (Pmode, orig);
864 }
865 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
866 /* Likewise, should we set special REG_NOTEs here? */
867 }
868
869 return pic_ref;
870 }
871
872 static GTY(()) rtx gen_tls_tga;
873
874 static rtx
875 gen_tls_get_addr (void)
876 {
877 if (!gen_tls_tga)
878 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
879 return gen_tls_tga;
880 }
881
882 static rtx
883 hppa_tls_call (rtx arg)
884 {
885 rtx ret;
886
887 ret = gen_reg_rtx (Pmode);
888 emit_library_call_value (gen_tls_get_addr (), ret,
889 LCT_CONST, Pmode, 1, arg, Pmode);
890
891 return ret;
892 }
893
894 static rtx
895 legitimize_tls_address (rtx addr)
896 {
897 rtx ret, insn, tmp, t1, t2, tp;
898 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
899
900 switch (model)
901 {
902 case TLS_MODEL_GLOBAL_DYNAMIC:
903 tmp = gen_reg_rtx (Pmode);
904 if (flag_pic)
905 emit_insn (gen_tgd_load_pic (tmp, addr));
906 else
907 emit_insn (gen_tgd_load (tmp, addr));
908 ret = hppa_tls_call (tmp);
909 break;
910
911 case TLS_MODEL_LOCAL_DYNAMIC:
912 ret = gen_reg_rtx (Pmode);
913 tmp = gen_reg_rtx (Pmode);
914 start_sequence ();
915 if (flag_pic)
916 emit_insn (gen_tld_load_pic (tmp, addr));
917 else
918 emit_insn (gen_tld_load (tmp, addr));
919 t1 = hppa_tls_call (tmp);
920 insn = get_insns ();
921 end_sequence ();
922 t2 = gen_reg_rtx (Pmode);
923 emit_libcall_block (insn, t2, t1,
924 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
925 UNSPEC_TLSLDBASE));
926 emit_insn (gen_tld_offset_load (ret, addr, t2));
927 break;
928
929 case TLS_MODEL_INITIAL_EXEC:
930 tp = gen_reg_rtx (Pmode);
931 tmp = gen_reg_rtx (Pmode);
932 ret = gen_reg_rtx (Pmode);
933 emit_insn (gen_tp_load (tp));
934 if (flag_pic)
935 emit_insn (gen_tie_load_pic (tmp, addr));
936 else
937 emit_insn (gen_tie_load (tmp, addr));
938 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
939 break;
940
941 case TLS_MODEL_LOCAL_EXEC:
942 tp = gen_reg_rtx (Pmode);
943 ret = gen_reg_rtx (Pmode);
944 emit_insn (gen_tp_load (tp));
945 emit_insn (gen_tle_load (ret, addr, tp));
946 break;
947
948 default:
949 gcc_unreachable ();
950 }
951
952 return ret;
953 }
954
955 /* Try machine-dependent ways of modifying an illegitimate address
956 to be legitimate. If we find one, return the new, valid address.
957 This macro is used in only one place: `memory_address' in explow.c.
958
959 OLDX is the address as it was before break_out_memory_refs was called.
960 In some cases it is useful to look at this to decide what needs to be done.
961
962 It is always safe for this macro to do nothing. It exists to recognize
963 opportunities to optimize the output.
964
965 For the PA, transform:
966
967 memory(X + <large int>)
968
969 into:
970
971 if (<large int> & mask) >= 16
972 Y = (<large int> & ~mask) + mask + 1 Round up.
973 else
974 Y = (<large int> & ~mask) Round down.
975 Z = X + Y
976 memory (Z + (<large int> - Y));
977
978 This is for CSE to find several similar references, and only use one Z.
979
980 X can either be a SYMBOL_REF or REG, but because combine cannot
981 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
982 D will not fit in 14 bits.
983
984 MODE_FLOAT references allow displacements which fit in 5 bits, so use
985 0x1f as the mask.
986
987 MODE_INT references allow displacements which fit in 14 bits, so use
988 0x3fff as the mask.
989
990 This relies on the fact that most mode MODE_FLOAT references will use FP
991 registers and most mode MODE_INT references will use integer registers.
992 (In the rare case of an FP register used in an integer MODE, we depend
993 on secondary reloads to clean things up.)
994
995
996 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
997 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
998 addressing modes to be used).
999
1000 Put X and Z into registers. Then put the entire expression into
1001 a register. */
1002
1003 rtx
1004 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1005 enum machine_mode mode)
1006 {
1007 rtx orig = x;
1008
1009 /* We need to canonicalize the order of operands in unscaled indexed
1010 addresses since the code that checks if an address is valid doesn't
1011 always try both orders. */
1012 if (!TARGET_NO_SPACE_REGS
1013 && GET_CODE (x) == PLUS
1014 && GET_MODE (x) == Pmode
1015 && REG_P (XEXP (x, 0))
1016 && REG_P (XEXP (x, 1))
1017 && REG_POINTER (XEXP (x, 0))
1018 && !REG_POINTER (XEXP (x, 1)))
1019 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1020
1021 if (PA_SYMBOL_REF_TLS_P (x))
1022 return legitimize_tls_address (x);
1023 else if (flag_pic)
1024 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1025
1026 /* Strip off CONST. */
1027 if (GET_CODE (x) == CONST)
1028 x = XEXP (x, 0);
1029
1030 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1031 That should always be safe. */
1032 if (GET_CODE (x) == PLUS
1033 && GET_CODE (XEXP (x, 0)) == REG
1034 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1035 {
1036 rtx reg = force_reg (Pmode, XEXP (x, 1));
1037 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1038 }
1039
1040 /* Note we must reject symbols which represent function addresses
1041 since the assembler/linker can't handle arithmetic on plabels. */
1042 if (GET_CODE (x) == PLUS
1043 && GET_CODE (XEXP (x, 1)) == CONST_INT
1044 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1045 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1046 || GET_CODE (XEXP (x, 0)) == REG))
1047 {
1048 rtx int_part, ptr_reg;
1049 int newoffset;
1050 int offset = INTVAL (XEXP (x, 1));
1051 int mask;
1052
1053 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1054 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
1055
1056 /* Choose which way to round the offset. Round up if we
1057 are >= halfway to the next boundary. */
1058 if ((offset & mask) >= ((mask + 1) / 2))
1059 newoffset = (offset & ~ mask) + mask + 1;
1060 else
1061 newoffset = (offset & ~ mask);
1062
1063 /* If the newoffset will not fit in 14 bits (ldo), then
1064 handling this would take 4 or 5 instructions (2 to load
1065 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1066 add the new offset and the SYMBOL_REF.) Combine can
1067 not handle 4->2 or 5->2 combinations, so do not create
1068 them. */
1069 if (! VAL_14_BITS_P (newoffset)
1070 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1071 {
1072 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1073 rtx tmp_reg
1074 = force_reg (Pmode,
1075 gen_rtx_HIGH (Pmode, const_part));
1076 ptr_reg
1077 = force_reg (Pmode,
1078 gen_rtx_LO_SUM (Pmode,
1079 tmp_reg, const_part));
1080 }
1081 else
1082 {
1083 if (! VAL_14_BITS_P (newoffset))
1084 int_part = force_reg (Pmode, GEN_INT (newoffset));
1085 else
1086 int_part = GEN_INT (newoffset);
1087
1088 ptr_reg = force_reg (Pmode,
1089 gen_rtx_PLUS (Pmode,
1090 force_reg (Pmode, XEXP (x, 0)),
1091 int_part));
1092 }
1093 return plus_constant (ptr_reg, offset - newoffset);
1094 }
1095
1096 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1097
1098 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1099 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1100 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1101 && (OBJECT_P (XEXP (x, 1))
1102 || GET_CODE (XEXP (x, 1)) == SUBREG)
1103 && GET_CODE (XEXP (x, 1)) != CONST)
1104 {
1105 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1106 rtx reg1, reg2;
1107
1108 reg1 = XEXP (x, 1);
1109 if (GET_CODE (reg1) != REG)
1110 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1111
1112 reg2 = XEXP (XEXP (x, 0), 0);
1113 if (GET_CODE (reg2) != REG)
1114 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1115
1116 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1117 gen_rtx_MULT (Pmode,
1118 reg2,
1119 GEN_INT (val)),
1120 reg1));
1121 }
1122
1123 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1124
1125 Only do so for floating point modes since this is more speculative
1126 and we lose if it's an integer store. */
1127 if (GET_CODE (x) == PLUS
1128 && GET_CODE (XEXP (x, 0)) == PLUS
1129 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1130 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1131 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1132 && (mode == SFmode || mode == DFmode))
1133 {
1134
1135 /* First, try and figure out what to use as a base register. */
1136 rtx reg1, reg2, base, idx;
1137
1138 reg1 = XEXP (XEXP (x, 0), 1);
1139 reg2 = XEXP (x, 1);
1140 base = NULL_RTX;
1141 idx = NULL_RTX;
1142
1143 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1144 then emit_move_sequence will turn on REG_POINTER so we'll know
1145 it's a base register below. */
1146 if (GET_CODE (reg1) != REG)
1147 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1148
1149 if (GET_CODE (reg2) != REG)
1150 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1151
1152 /* Figure out what the base and index are. */
1153
1154 if (GET_CODE (reg1) == REG
1155 && REG_POINTER (reg1))
1156 {
1157 base = reg1;
1158 idx = gen_rtx_PLUS (Pmode,
1159 gen_rtx_MULT (Pmode,
1160 XEXP (XEXP (XEXP (x, 0), 0), 0),
1161 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1162 XEXP (x, 1));
1163 }
1164 else if (GET_CODE (reg2) == REG
1165 && REG_POINTER (reg2))
1166 {
1167 base = reg2;
1168 idx = XEXP (x, 0);
1169 }
1170
1171 if (base == 0)
1172 return orig;
1173
1174 /* If the index adds a large constant, try to scale the
1175 constant so that it can be loaded with only one insn. */
1176 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1177 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1178 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1179 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1180 {
1181 /* Divide the CONST_INT by the scale factor, then add it to A. */
1182 int val = INTVAL (XEXP (idx, 1));
1183
1184 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1185 reg1 = XEXP (XEXP (idx, 0), 0);
1186 if (GET_CODE (reg1) != REG)
1187 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1188
1189 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1190
1191 /* We can now generate a simple scaled indexed address. */
1192 return
1193 force_reg
1194 (Pmode, gen_rtx_PLUS (Pmode,
1195 gen_rtx_MULT (Pmode, reg1,
1196 XEXP (XEXP (idx, 0), 1)),
1197 base));
1198 }
1199
1200 /* If B + C is still a valid base register, then add them. */
1201 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1202 && INTVAL (XEXP (idx, 1)) <= 4096
1203 && INTVAL (XEXP (idx, 1)) >= -4096)
1204 {
1205 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1206 rtx reg1, reg2;
1207
1208 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1209
1210 reg2 = XEXP (XEXP (idx, 0), 0);
1211 if (GET_CODE (reg2) != CONST_INT)
1212 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1213
1214 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1215 gen_rtx_MULT (Pmode,
1216 reg2,
1217 GEN_INT (val)),
1218 reg1));
1219 }
1220
1221 /* Get the index into a register, then add the base + index and
1222 return a register holding the result. */
1223
1224 /* First get A into a register. */
1225 reg1 = XEXP (XEXP (idx, 0), 0);
1226 if (GET_CODE (reg1) != REG)
1227 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1228
1229 /* And get B into a register. */
1230 reg2 = XEXP (idx, 1);
1231 if (GET_CODE (reg2) != REG)
1232 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1233
1234 reg1 = force_reg (Pmode,
1235 gen_rtx_PLUS (Pmode,
1236 gen_rtx_MULT (Pmode, reg1,
1237 XEXP (XEXP (idx, 0), 1)),
1238 reg2));
1239
1240 /* Add the result to our base register and return. */
1241 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1242
1243 }
1244
1245 /* Uh-oh. We might have an address for x[n-100000]. This needs
1246 special handling to avoid creating an indexed memory address
1247 with x-100000 as the base.
1248
1249 If the constant part is small enough, then it's still safe because
1250 there is a guard page at the beginning and end of the data segment.
1251
1252 Scaled references are common enough that we want to try and rearrange the
1253 terms so that we can use indexing for these addresses too. Only
1254 do the optimization for floatint point modes. */
1255
1256 if (GET_CODE (x) == PLUS
1257 && symbolic_expression_p (XEXP (x, 1)))
1258 {
1259 /* Ugly. We modify things here so that the address offset specified
1260 by the index expression is computed first, then added to x to form
1261 the entire address. */
1262
1263 rtx regx1, regx2, regy1, regy2, y;
1264
1265 /* Strip off any CONST. */
1266 y = XEXP (x, 1);
1267 if (GET_CODE (y) == CONST)
1268 y = XEXP (y, 0);
1269
1270 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1271 {
1272 /* See if this looks like
1273 (plus (mult (reg) (shadd_const))
1274 (const (plus (symbol_ref) (const_int))))
1275
1276 Where const_int is small. In that case the const
1277 expression is a valid pointer for indexing.
1278
1279 If const_int is big, but can be divided evenly by shadd_const
1280 and added to (reg). This allows more scaled indexed addresses. */
1281 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1282 && GET_CODE (XEXP (x, 0)) == MULT
1283 && GET_CODE (XEXP (y, 1)) == CONST_INT
1284 && INTVAL (XEXP (y, 1)) >= -4096
1285 && INTVAL (XEXP (y, 1)) <= 4095
1286 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1287 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1288 {
1289 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1290 rtx reg1, reg2;
1291
1292 reg1 = XEXP (x, 1);
1293 if (GET_CODE (reg1) != REG)
1294 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1295
1296 reg2 = XEXP (XEXP (x, 0), 0);
1297 if (GET_CODE (reg2) != REG)
1298 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1299
1300 return force_reg (Pmode,
1301 gen_rtx_PLUS (Pmode,
1302 gen_rtx_MULT (Pmode,
1303 reg2,
1304 GEN_INT (val)),
1305 reg1));
1306 }
1307 else if ((mode == DFmode || mode == SFmode)
1308 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1309 && GET_CODE (XEXP (x, 0)) == MULT
1310 && GET_CODE (XEXP (y, 1)) == CONST_INT
1311 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1312 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1313 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1314 {
1315 regx1
1316 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1317 / INTVAL (XEXP (XEXP (x, 0), 1))));
1318 regx2 = XEXP (XEXP (x, 0), 0);
1319 if (GET_CODE (regx2) != REG)
1320 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1321 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1322 regx2, regx1));
1323 return
1324 force_reg (Pmode,
1325 gen_rtx_PLUS (Pmode,
1326 gen_rtx_MULT (Pmode, regx2,
1327 XEXP (XEXP (x, 0), 1)),
1328 force_reg (Pmode, XEXP (y, 0))));
1329 }
1330 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1331 && INTVAL (XEXP (y, 1)) >= -4096
1332 && INTVAL (XEXP (y, 1)) <= 4095)
1333 {
1334 /* This is safe because of the guard page at the
1335 beginning and end of the data space. Just
1336 return the original address. */
1337 return orig;
1338 }
1339 else
1340 {
1341 /* Doesn't look like one we can optimize. */
1342 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1343 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1344 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1345 regx1 = force_reg (Pmode,
1346 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1347 regx1, regy2));
1348 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1349 }
1350 }
1351 }
1352
1353 return orig;
1354 }
1355
1356 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1357
1358 Compute extra cost of moving data between one register class
1359 and another.
1360
1361 Make moves from SAR so expensive they should never happen. We used to
1362 have 0xffff here, but that generates overflow in rare cases.
1363
1364 Copies involving a FP register and a non-FP register are relatively
1365 expensive because they must go through memory.
1366
1367 Other copies are reasonably cheap. */
1368
1369 static int
1370 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1371 reg_class_t from, reg_class_t to)
1372 {
1373 if (from == SHIFT_REGS)
1374 return 0x100;
1375 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1376 return 18;
1377 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1378 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1379 return 16;
1380 else
1381 return 2;
1382 }
1383
1384 /* For the HPPA, REG and REG+CONST is cost 0
1385 and addresses involving symbolic constants are cost 2.
1386
1387 PIC addresses are very expensive.
1388
1389 It is no coincidence that this has the same structure
1390 as GO_IF_LEGITIMATE_ADDRESS. */
1391
1392 static int
1393 hppa_address_cost (rtx X,
1394 bool speed ATTRIBUTE_UNUSED)
1395 {
1396 switch (GET_CODE (X))
1397 {
1398 case REG:
1399 case PLUS:
1400 case LO_SUM:
1401 return 1;
1402 case HIGH:
1403 return 2;
1404 default:
1405 return 4;
1406 }
1407 }
1408
1409 /* Compute a (partial) cost for rtx X. Return true if the complete
1410 cost has been computed, and false if subexpressions should be
1411 scanned. In either case, *TOTAL contains the cost result. */
1412
1413 static bool
1414 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1415 bool speed ATTRIBUTE_UNUSED)
1416 {
1417 switch (code)
1418 {
1419 case CONST_INT:
1420 if (INTVAL (x) == 0)
1421 *total = 0;
1422 else if (INT_14_BITS (x))
1423 *total = 1;
1424 else
1425 *total = 2;
1426 return true;
1427
1428 case HIGH:
1429 *total = 2;
1430 return true;
1431
1432 case CONST:
1433 case LABEL_REF:
1434 case SYMBOL_REF:
1435 *total = 4;
1436 return true;
1437
1438 case CONST_DOUBLE:
1439 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1440 && outer_code != SET)
1441 *total = 0;
1442 else
1443 *total = 8;
1444 return true;
1445
1446 case MULT:
1447 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1448 *total = COSTS_N_INSNS (3);
1449 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1450 *total = COSTS_N_INSNS (8);
1451 else
1452 *total = COSTS_N_INSNS (20);
1453 return true;
1454
1455 case DIV:
1456 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1457 {
1458 *total = COSTS_N_INSNS (14);
1459 return true;
1460 }
1461 /* FALLTHRU */
1462
1463 case UDIV:
1464 case MOD:
1465 case UMOD:
1466 *total = COSTS_N_INSNS (60);
1467 return true;
1468
1469 case PLUS: /* this includes shNadd insns */
1470 case MINUS:
1471 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1472 *total = COSTS_N_INSNS (3);
1473 else
1474 *total = COSTS_N_INSNS (1);
1475 return true;
1476
1477 case ASHIFT:
1478 case ASHIFTRT:
1479 case LSHIFTRT:
1480 *total = COSTS_N_INSNS (1);
1481 return true;
1482
1483 default:
1484 return false;
1485 }
1486 }
1487
1488 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1489 new rtx with the correct mode. */
1490 static inline rtx
1491 force_mode (enum machine_mode mode, rtx orig)
1492 {
1493 if (mode == GET_MODE (orig))
1494 return orig;
1495
1496 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1497
1498 return gen_rtx_REG (mode, REGNO (orig));
1499 }
1500
1501 /* Return 1 if *X is a thread-local symbol. */
1502
1503 static int
1504 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1505 {
1506 return PA_SYMBOL_REF_TLS_P (*x);
1507 }
1508
1509 /* Return 1 if X contains a thread-local symbol. */
1510
1511 bool
1512 pa_tls_referenced_p (rtx x)
1513 {
1514 if (!TARGET_HAVE_TLS)
1515 return false;
1516
1517 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1518 }
1519
1520 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1521
1522 static bool
1523 pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1524 {
1525 return pa_tls_referenced_p (x);
1526 }
1527
1528 /* Emit insns to move operands[1] into operands[0].
1529
1530 Return 1 if we have written out everything that needs to be done to
1531 do the move. Otherwise, return 0 and the caller will emit the move
1532 normally.
1533
1534 Note SCRATCH_REG may not be in the proper mode depending on how it
1535 will be used. This routine is responsible for creating a new copy
1536 of SCRATCH_REG in the proper mode. */
1537
1538 int
1539 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1540 {
1541 register rtx operand0 = operands[0];
1542 register rtx operand1 = operands[1];
1543 register rtx tem;
1544
1545 /* We can only handle indexed addresses in the destination operand
1546 of floating point stores. Thus, we need to break out indexed
1547 addresses from the destination operand. */
1548 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1549 {
1550 gcc_assert (can_create_pseudo_p ());
1551
1552 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1553 operand0 = replace_equiv_address (operand0, tem);
1554 }
1555
1556 /* On targets with non-equivalent space registers, break out unscaled
1557 indexed addresses from the source operand before the final CSE.
1558 We have to do this because the REG_POINTER flag is not correctly
1559 carried through various optimization passes and CSE may substitute
1560 a pseudo without the pointer set for one with the pointer set. As
1561 a result, we loose various opportunities to create insns with
1562 unscaled indexed addresses. */
1563 if (!TARGET_NO_SPACE_REGS
1564 && !cse_not_expected
1565 && GET_CODE (operand1) == MEM
1566 && GET_CODE (XEXP (operand1, 0)) == PLUS
1567 && REG_P (XEXP (XEXP (operand1, 0), 0))
1568 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1569 operand1
1570 = replace_equiv_address (operand1,
1571 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1572
1573 if (scratch_reg
1574 && reload_in_progress && GET_CODE (operand0) == REG
1575 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1576 operand0 = reg_equiv_mem (REGNO (operand0));
1577 else if (scratch_reg
1578 && reload_in_progress && GET_CODE (operand0) == SUBREG
1579 && GET_CODE (SUBREG_REG (operand0)) == REG
1580 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1581 {
1582 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1583 the code which tracks sets/uses for delete_output_reload. */
1584 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1585 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1586 SUBREG_BYTE (operand0));
1587 operand0 = alter_subreg (&temp);
1588 }
1589
1590 if (scratch_reg
1591 && reload_in_progress && GET_CODE (operand1) == REG
1592 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1593 operand1 = reg_equiv_mem (REGNO (operand1));
1594 else if (scratch_reg
1595 && reload_in_progress && GET_CODE (operand1) == SUBREG
1596 && GET_CODE (SUBREG_REG (operand1)) == REG
1597 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1598 {
1599 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1600 the code which tracks sets/uses for delete_output_reload. */
1601 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1602 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1603 SUBREG_BYTE (operand1));
1604 operand1 = alter_subreg (&temp);
1605 }
1606
1607 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1608 && ((tem = find_replacement (&XEXP (operand0, 0)))
1609 != XEXP (operand0, 0)))
1610 operand0 = replace_equiv_address (operand0, tem);
1611
1612 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1613 && ((tem = find_replacement (&XEXP (operand1, 0)))
1614 != XEXP (operand1, 0)))
1615 operand1 = replace_equiv_address (operand1, tem);
1616
1617 /* Handle secondary reloads for loads/stores of FP registers from
1618 REG+D addresses where D does not fit in 5 or 14 bits, including
1619 (subreg (mem (addr))) cases. */
1620 if (scratch_reg
1621 && fp_reg_operand (operand0, mode)
1622 && ((GET_CODE (operand1) == MEM
1623 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1624 XEXP (operand1, 0)))
1625 || ((GET_CODE (operand1) == SUBREG
1626 && GET_CODE (XEXP (operand1, 0)) == MEM
1627 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1628 ? SFmode : DFmode),
1629 XEXP (XEXP (operand1, 0), 0))))))
1630 {
1631 if (GET_CODE (operand1) == SUBREG)
1632 operand1 = XEXP (operand1, 0);
1633
1634 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1635 it in WORD_MODE regardless of what mode it was originally given
1636 to us. */
1637 scratch_reg = force_mode (word_mode, scratch_reg);
1638
1639 /* D might not fit in 14 bits either; for such cases load D into
1640 scratch reg. */
1641 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1642 {
1643 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1644 emit_move_insn (scratch_reg,
1645 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1646 Pmode,
1647 XEXP (XEXP (operand1, 0), 0),
1648 scratch_reg));
1649 }
1650 else
1651 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1652 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1653 replace_equiv_address (operand1, scratch_reg)));
1654 return 1;
1655 }
1656 else if (scratch_reg
1657 && fp_reg_operand (operand1, mode)
1658 && ((GET_CODE (operand0) == MEM
1659 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1660 ? SFmode : DFmode),
1661 XEXP (operand0, 0)))
1662 || ((GET_CODE (operand0) == SUBREG)
1663 && GET_CODE (XEXP (operand0, 0)) == MEM
1664 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1665 ? SFmode : DFmode),
1666 XEXP (XEXP (operand0, 0), 0)))))
1667 {
1668 if (GET_CODE (operand0) == SUBREG)
1669 operand0 = XEXP (operand0, 0);
1670
1671 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1672 it in WORD_MODE regardless of what mode it was originally given
1673 to us. */
1674 scratch_reg = force_mode (word_mode, scratch_reg);
1675
1676 /* D might not fit in 14 bits either; for such cases load D into
1677 scratch reg. */
1678 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1679 {
1680 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1681 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1682 0)),
1683 Pmode,
1684 XEXP (XEXP (operand0, 0),
1685 0),
1686 scratch_reg));
1687 }
1688 else
1689 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1690 emit_insn (gen_rtx_SET (VOIDmode,
1691 replace_equiv_address (operand0, scratch_reg),
1692 operand1));
1693 return 1;
1694 }
1695 /* Handle secondary reloads for loads of FP registers from constant
1696 expressions by forcing the constant into memory.
1697
1698 Use scratch_reg to hold the address of the memory location.
1699
1700 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
1701 NO_REGS when presented with a const_int and a register class
1702 containing only FP registers. Doing so unfortunately creates
1703 more problems than it solves. Fix this for 2.5. */
1704 else if (scratch_reg
1705 && CONSTANT_P (operand1)
1706 && fp_reg_operand (operand0, mode))
1707 {
1708 rtx const_mem, xoperands[2];
1709
1710 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1711 it in WORD_MODE regardless of what mode it was originally given
1712 to us. */
1713 scratch_reg = force_mode (word_mode, scratch_reg);
1714
1715 /* Force the constant into memory and put the address of the
1716 memory location into scratch_reg. */
1717 const_mem = force_const_mem (mode, operand1);
1718 xoperands[0] = scratch_reg;
1719 xoperands[1] = XEXP (const_mem, 0);
1720 emit_move_sequence (xoperands, Pmode, 0);
1721
1722 /* Now load the destination register. */
1723 emit_insn (gen_rtx_SET (mode, operand0,
1724 replace_equiv_address (const_mem, scratch_reg)));
1725 return 1;
1726 }
1727 /* Handle secondary reloads for SAR. These occur when trying to load
1728 the SAR from memory or a constant. */
1729 else if (scratch_reg
1730 && GET_CODE (operand0) == REG
1731 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1732 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1733 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1734 {
1735 /* D might not fit in 14 bits either; for such cases load D into
1736 scratch reg. */
1737 if (GET_CODE (operand1) == MEM
1738 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1739 {
1740 /* We are reloading the address into the scratch register, so we
1741 want to make sure the scratch register is a full register. */
1742 scratch_reg = force_mode (word_mode, scratch_reg);
1743
1744 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1745 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1746 0)),
1747 Pmode,
1748 XEXP (XEXP (operand1, 0),
1749 0),
1750 scratch_reg));
1751
1752 /* Now we are going to load the scratch register from memory,
1753 we want to load it in the same width as the original MEM,
1754 which must be the same as the width of the ultimate destination,
1755 OPERAND0. */
1756 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1757
1758 emit_move_insn (scratch_reg,
1759 replace_equiv_address (operand1, scratch_reg));
1760 }
1761 else
1762 {
1763 /* We want to load the scratch register using the same mode as
1764 the ultimate destination. */
1765 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1766
1767 emit_move_insn (scratch_reg, operand1);
1768 }
1769
1770 /* And emit the insn to set the ultimate destination. We know that
1771 the scratch register has the same mode as the destination at this
1772 point. */
1773 emit_move_insn (operand0, scratch_reg);
1774 return 1;
1775 }
1776 /* Handle the most common case: storing into a register. */
1777 else if (register_operand (operand0, mode))
1778 {
1779 if (register_operand (operand1, mode)
1780 || (GET_CODE (operand1) == CONST_INT
1781 && cint_ok_for_move (INTVAL (operand1)))
1782 || (operand1 == CONST0_RTX (mode))
1783 || (GET_CODE (operand1) == HIGH
1784 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1785 /* Only `general_operands' can come here, so MEM is ok. */
1786 || GET_CODE (operand1) == MEM)
1787 {
1788 /* Various sets are created during RTL generation which don't
1789 have the REG_POINTER flag correctly set. After the CSE pass,
1790 instruction recognition can fail if we don't consistently
1791 set this flag when performing register copies. This should
1792 also improve the opportunities for creating insns that use
1793 unscaled indexing. */
1794 if (REG_P (operand0) && REG_P (operand1))
1795 {
1796 if (REG_POINTER (operand1)
1797 && !REG_POINTER (operand0)
1798 && !HARD_REGISTER_P (operand0))
1799 copy_reg_pointer (operand0, operand1);
1800 }
1801
1802 /* When MEMs are broken out, the REG_POINTER flag doesn't
1803 get set. In some cases, we can set the REG_POINTER flag
1804 from the declaration for the MEM. */
1805 if (REG_P (operand0)
1806 && GET_CODE (operand1) == MEM
1807 && !REG_POINTER (operand0))
1808 {
1809 tree decl = MEM_EXPR (operand1);
1810
1811 /* Set the register pointer flag and register alignment
1812 if the declaration for this memory reference is a
1813 pointer type. */
1814 if (decl)
1815 {
1816 tree type;
1817
1818 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1819 tree operand 1. */
1820 if (TREE_CODE (decl) == COMPONENT_REF)
1821 decl = TREE_OPERAND (decl, 1);
1822
1823 type = TREE_TYPE (decl);
1824 type = strip_array_types (type);
1825
1826 if (POINTER_TYPE_P (type))
1827 {
1828 int align;
1829
1830 type = TREE_TYPE (type);
1831 /* Using TYPE_ALIGN_OK is rather conservative as
1832 only the ada frontend actually sets it. */
1833 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1834 : BITS_PER_UNIT);
1835 mark_reg_pointer (operand0, align);
1836 }
1837 }
1838 }
1839
1840 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1841 return 1;
1842 }
1843 }
1844 else if (GET_CODE (operand0) == MEM)
1845 {
1846 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1847 && !(reload_in_progress || reload_completed))
1848 {
1849 rtx temp = gen_reg_rtx (DFmode);
1850
1851 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1852 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1853 return 1;
1854 }
1855 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1856 {
1857 /* Run this case quickly. */
1858 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1859 return 1;
1860 }
1861 if (! (reload_in_progress || reload_completed))
1862 {
1863 operands[0] = validize_mem (operand0);
1864 operands[1] = operand1 = force_reg (mode, operand1);
1865 }
1866 }
1867
1868 /* Simplify the source if we need to.
1869 Note we do have to handle function labels here, even though we do
1870 not consider them legitimate constants. Loop optimizations can
1871 call the emit_move_xxx with one as a source. */
1872 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1873 || function_label_operand (operand1, VOIDmode)
1874 || (GET_CODE (operand1) == HIGH
1875 && symbolic_operand (XEXP (operand1, 0), mode)))
1876 {
1877 int ishighonly = 0;
1878
1879 if (GET_CODE (operand1) == HIGH)
1880 {
1881 ishighonly = 1;
1882 operand1 = XEXP (operand1, 0);
1883 }
1884 if (symbolic_operand (operand1, mode))
1885 {
1886 /* Argh. The assembler and linker can't handle arithmetic
1887 involving plabels.
1888
1889 So we force the plabel into memory, load operand0 from
1890 the memory location, then add in the constant part. */
1891 if ((GET_CODE (operand1) == CONST
1892 && GET_CODE (XEXP (operand1, 0)) == PLUS
1893 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1894 VOIDmode))
1895 || function_label_operand (operand1, VOIDmode))
1896 {
1897 rtx temp, const_part;
1898
1899 /* Figure out what (if any) scratch register to use. */
1900 if (reload_in_progress || reload_completed)
1901 {
1902 scratch_reg = scratch_reg ? scratch_reg : operand0;
1903 /* SCRATCH_REG will hold an address and maybe the actual
1904 data. We want it in WORD_MODE regardless of what mode it
1905 was originally given to us. */
1906 scratch_reg = force_mode (word_mode, scratch_reg);
1907 }
1908 else if (flag_pic)
1909 scratch_reg = gen_reg_rtx (Pmode);
1910
1911 if (GET_CODE (operand1) == CONST)
1912 {
1913 /* Save away the constant part of the expression. */
1914 const_part = XEXP (XEXP (operand1, 0), 1);
1915 gcc_assert (GET_CODE (const_part) == CONST_INT);
1916
1917 /* Force the function label into memory. */
1918 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1919 }
1920 else
1921 {
1922 /* No constant part. */
1923 const_part = NULL_RTX;
1924
1925 /* Force the function label into memory. */
1926 temp = force_const_mem (mode, operand1);
1927 }
1928
1929
1930 /* Get the address of the memory location. PIC-ify it if
1931 necessary. */
1932 temp = XEXP (temp, 0);
1933 if (flag_pic)
1934 temp = legitimize_pic_address (temp, mode, scratch_reg);
1935
1936 /* Put the address of the memory location into our destination
1937 register. */
1938 operands[1] = temp;
1939 emit_move_sequence (operands, mode, scratch_reg);
1940
1941 /* Now load from the memory location into our destination
1942 register. */
1943 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1944 emit_move_sequence (operands, mode, scratch_reg);
1945
1946 /* And add back in the constant part. */
1947 if (const_part != NULL_RTX)
1948 expand_inc (operand0, const_part);
1949
1950 return 1;
1951 }
1952
1953 if (flag_pic)
1954 {
1955 rtx temp;
1956
1957 if (reload_in_progress || reload_completed)
1958 {
1959 temp = scratch_reg ? scratch_reg : operand0;
1960 /* TEMP will hold an address and maybe the actual
1961 data. We want it in WORD_MODE regardless of what mode it
1962 was originally given to us. */
1963 temp = force_mode (word_mode, temp);
1964 }
1965 else
1966 temp = gen_reg_rtx (Pmode);
1967
1968 /* (const (plus (symbol) (const_int))) must be forced to
1969 memory during/after reload if the const_int will not fit
1970 in 14 bits. */
1971 if (GET_CODE (operand1) == CONST
1972 && GET_CODE (XEXP (operand1, 0)) == PLUS
1973 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1974 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1975 && (reload_completed || reload_in_progress)
1976 && flag_pic)
1977 {
1978 rtx const_mem = force_const_mem (mode, operand1);
1979 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1980 mode, temp);
1981 operands[1] = replace_equiv_address (const_mem, operands[1]);
1982 emit_move_sequence (operands, mode, temp);
1983 }
1984 else
1985 {
1986 operands[1] = legitimize_pic_address (operand1, mode, temp);
1987 if (REG_P (operand0) && REG_P (operands[1]))
1988 copy_reg_pointer (operand0, operands[1]);
1989 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1990 }
1991 }
1992 /* On the HPPA, references to data space are supposed to use dp,
1993 register 27, but showing it in the RTL inhibits various cse
1994 and loop optimizations. */
1995 else
1996 {
1997 rtx temp, set;
1998
1999 if (reload_in_progress || reload_completed)
2000 {
2001 temp = scratch_reg ? scratch_reg : operand0;
2002 /* TEMP will hold an address and maybe the actual
2003 data. We want it in WORD_MODE regardless of what mode it
2004 was originally given to us. */
2005 temp = force_mode (word_mode, temp);
2006 }
2007 else
2008 temp = gen_reg_rtx (mode);
2009
2010 /* Loading a SYMBOL_REF into a register makes that register
2011 safe to be used as the base in an indexed address.
2012
2013 Don't mark hard registers though. That loses. */
2014 if (GET_CODE (operand0) == REG
2015 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2016 mark_reg_pointer (operand0, BITS_PER_UNIT);
2017 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2018 mark_reg_pointer (temp, BITS_PER_UNIT);
2019
2020 if (ishighonly)
2021 set = gen_rtx_SET (mode, operand0, temp);
2022 else
2023 set = gen_rtx_SET (VOIDmode,
2024 operand0,
2025 gen_rtx_LO_SUM (mode, temp, operand1));
2026
2027 emit_insn (gen_rtx_SET (VOIDmode,
2028 temp,
2029 gen_rtx_HIGH (mode, operand1)));
2030 emit_insn (set);
2031
2032 }
2033 return 1;
2034 }
2035 else if (pa_tls_referenced_p (operand1))
2036 {
2037 rtx tmp = operand1;
2038 rtx addend = NULL;
2039
2040 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2041 {
2042 addend = XEXP (XEXP (tmp, 0), 1);
2043 tmp = XEXP (XEXP (tmp, 0), 0);
2044 }
2045
2046 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2047 tmp = legitimize_tls_address (tmp);
2048 if (addend)
2049 {
2050 tmp = gen_rtx_PLUS (mode, tmp, addend);
2051 tmp = force_operand (tmp, operands[0]);
2052 }
2053 operands[1] = tmp;
2054 }
2055 else if (GET_CODE (operand1) != CONST_INT
2056 || !cint_ok_for_move (INTVAL (operand1)))
2057 {
2058 rtx insn, temp;
2059 rtx op1 = operand1;
2060 HOST_WIDE_INT value = 0;
2061 HOST_WIDE_INT insv = 0;
2062 int insert = 0;
2063
2064 if (GET_CODE (operand1) == CONST_INT)
2065 value = INTVAL (operand1);
2066
2067 if (TARGET_64BIT
2068 && GET_CODE (operand1) == CONST_INT
2069 && HOST_BITS_PER_WIDE_INT > 32
2070 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2071 {
2072 HOST_WIDE_INT nval;
2073
2074 /* Extract the low order 32 bits of the value and sign extend.
2075 If the new value is the same as the original value, we can
2076 can use the original value as-is. If the new value is
2077 different, we use it and insert the most-significant 32-bits
2078 of the original value into the final result. */
2079 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2080 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2081 if (value != nval)
2082 {
2083 #if HOST_BITS_PER_WIDE_INT > 32
2084 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2085 #endif
2086 insert = 1;
2087 value = nval;
2088 operand1 = GEN_INT (nval);
2089 }
2090 }
2091
2092 if (reload_in_progress || reload_completed)
2093 temp = scratch_reg ? scratch_reg : operand0;
2094 else
2095 temp = gen_reg_rtx (mode);
2096
2097 /* We don't directly split DImode constants on 32-bit targets
2098 because PLUS uses an 11-bit immediate and the insn sequence
2099 generated is not as efficient as the one using HIGH/LO_SUM. */
2100 if (GET_CODE (operand1) == CONST_INT
2101 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2102 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2103 && !insert)
2104 {
2105 /* Directly break constant into high and low parts. This
2106 provides better optimization opportunities because various
2107 passes recognize constants split with PLUS but not LO_SUM.
2108 We use a 14-bit signed low part except when the addition
2109 of 0x4000 to the high part might change the sign of the
2110 high part. */
2111 HOST_WIDE_INT low = value & 0x3fff;
2112 HOST_WIDE_INT high = value & ~ 0x3fff;
2113
2114 if (low >= 0x2000)
2115 {
2116 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2117 high += 0x2000;
2118 else
2119 high += 0x4000;
2120 }
2121
2122 low = value - high;
2123
2124 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2125 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2126 }
2127 else
2128 {
2129 emit_insn (gen_rtx_SET (VOIDmode, temp,
2130 gen_rtx_HIGH (mode, operand1)));
2131 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2132 }
2133
2134 insn = emit_move_insn (operands[0], operands[1]);
2135
2136 /* Now insert the most significant 32 bits of the value
2137 into the register. When we don't have a second register
2138 available, it could take up to nine instructions to load
2139 a 64-bit integer constant. Prior to reload, we force
2140 constants that would take more than three instructions
2141 to load to the constant pool. During and after reload,
2142 we have to handle all possible values. */
2143 if (insert)
2144 {
2145 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2146 register and the value to be inserted is outside the
2147 range that can be loaded with three depdi instructions. */
2148 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2149 {
2150 operand1 = GEN_INT (insv);
2151
2152 emit_insn (gen_rtx_SET (VOIDmode, temp,
2153 gen_rtx_HIGH (mode, operand1)));
2154 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2155 emit_insn (gen_insv (operand0, GEN_INT (32),
2156 const0_rtx, temp));
2157 }
2158 else
2159 {
2160 int len = 5, pos = 27;
2161
2162 /* Insert the bits using the depdi instruction. */
2163 while (pos >= 0)
2164 {
2165 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2166 HOST_WIDE_INT sign = v5 < 0;
2167
2168 /* Left extend the insertion. */
2169 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2170 while (pos > 0 && (insv & 1) == sign)
2171 {
2172 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2173 len += 1;
2174 pos -= 1;
2175 }
2176
2177 emit_insn (gen_insv (operand0, GEN_INT (len),
2178 GEN_INT (pos), GEN_INT (v5)));
2179
2180 len = pos > 0 && pos < 5 ? pos : 5;
2181 pos -= len;
2182 }
2183 }
2184 }
2185
2186 set_unique_reg_note (insn, REG_EQUAL, op1);
2187
2188 return 1;
2189 }
2190 }
2191 /* Now have insn-emit do whatever it normally does. */
2192 return 0;
2193 }
2194
2195 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2196 it will need a link/runtime reloc). */
2197
2198 int
2199 reloc_needed (tree exp)
2200 {
2201 int reloc = 0;
2202
2203 switch (TREE_CODE (exp))
2204 {
2205 case ADDR_EXPR:
2206 return 1;
2207
2208 case POINTER_PLUS_EXPR:
2209 case PLUS_EXPR:
2210 case MINUS_EXPR:
2211 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2212 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2213 break;
2214
2215 CASE_CONVERT:
2216 case NON_LVALUE_EXPR:
2217 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2218 break;
2219
2220 case CONSTRUCTOR:
2221 {
2222 tree value;
2223 unsigned HOST_WIDE_INT ix;
2224
2225 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2226 if (value)
2227 reloc |= reloc_needed (value);
2228 }
2229 break;
2230
2231 case ERROR_MARK:
2232 break;
2233
2234 default:
2235 break;
2236 }
2237 return reloc;
2238 }
2239
2240 \f
2241 /* Return the best assembler insn template
2242 for moving operands[1] into operands[0] as a fullword. */
2243 const char *
2244 singlemove_string (rtx *operands)
2245 {
2246 HOST_WIDE_INT intval;
2247
2248 if (GET_CODE (operands[0]) == MEM)
2249 return "stw %r1,%0";
2250 if (GET_CODE (operands[1]) == MEM)
2251 return "ldw %1,%0";
2252 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2253 {
2254 long i;
2255 REAL_VALUE_TYPE d;
2256
2257 gcc_assert (GET_MODE (operands[1]) == SFmode);
2258
2259 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2260 bit pattern. */
2261 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2262 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2263
2264 operands[1] = GEN_INT (i);
2265 /* Fall through to CONST_INT case. */
2266 }
2267 if (GET_CODE (operands[1]) == CONST_INT)
2268 {
2269 intval = INTVAL (operands[1]);
2270
2271 if (VAL_14_BITS_P (intval))
2272 return "ldi %1,%0";
2273 else if ((intval & 0x7ff) == 0)
2274 return "ldil L'%1,%0";
2275 else if (zdepi_cint_p (intval))
2276 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2277 else
2278 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2279 }
2280 return "copy %1,%0";
2281 }
2282 \f
2283
2284 /* Compute position (in OP[1]) and width (in OP[2])
2285 useful for copying IMM to a register using the zdepi
2286 instructions. Store the immediate value to insert in OP[0]. */
2287 static void
2288 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2289 {
2290 int lsb, len;
2291
2292 /* Find the least significant set bit in IMM. */
2293 for (lsb = 0; lsb < 32; lsb++)
2294 {
2295 if ((imm & 1) != 0)
2296 break;
2297 imm >>= 1;
2298 }
2299
2300 /* Choose variants based on *sign* of the 5-bit field. */
2301 if ((imm & 0x10) == 0)
2302 len = (lsb <= 28) ? 4 : 32 - lsb;
2303 else
2304 {
2305 /* Find the width of the bitstring in IMM. */
2306 for (len = 5; len < 32 - lsb; len++)
2307 {
2308 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2309 break;
2310 }
2311
2312 /* Sign extend IMM as a 5-bit value. */
2313 imm = (imm & 0xf) - 0x10;
2314 }
2315
2316 op[0] = imm;
2317 op[1] = 31 - lsb;
2318 op[2] = len;
2319 }
2320
2321 /* Compute position (in OP[1]) and width (in OP[2])
2322 useful for copying IMM to a register using the depdi,z
2323 instructions. Store the immediate value to insert in OP[0]. */
2324 void
2325 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2326 {
2327 int lsb, len, maxlen;
2328
2329 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2330
2331 /* Find the least significant set bit in IMM. */
2332 for (lsb = 0; lsb < maxlen; lsb++)
2333 {
2334 if ((imm & 1) != 0)
2335 break;
2336 imm >>= 1;
2337 }
2338
2339 /* Choose variants based on *sign* of the 5-bit field. */
2340 if ((imm & 0x10) == 0)
2341 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2342 else
2343 {
2344 /* Find the width of the bitstring in IMM. */
2345 for (len = 5; len < maxlen - lsb; len++)
2346 {
2347 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2348 break;
2349 }
2350
2351 /* Extend length if host is narrow and IMM is negative. */
2352 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2353 len += 32;
2354
2355 /* Sign extend IMM as a 5-bit value. */
2356 imm = (imm & 0xf) - 0x10;
2357 }
2358
2359 op[0] = imm;
2360 op[1] = 63 - lsb;
2361 op[2] = len;
2362 }
2363
2364 /* Output assembler code to perform a doubleword move insn
2365 with operands OPERANDS. */
2366
2367 const char *
2368 output_move_double (rtx *operands)
2369 {
2370 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2371 rtx latehalf[2];
2372 rtx addreg0 = 0, addreg1 = 0;
2373
2374 /* First classify both operands. */
2375
2376 if (REG_P (operands[0]))
2377 optype0 = REGOP;
2378 else if (offsettable_memref_p (operands[0]))
2379 optype0 = OFFSOP;
2380 else if (GET_CODE (operands[0]) == MEM)
2381 optype0 = MEMOP;
2382 else
2383 optype0 = RNDOP;
2384
2385 if (REG_P (operands[1]))
2386 optype1 = REGOP;
2387 else if (CONSTANT_P (operands[1]))
2388 optype1 = CNSTOP;
2389 else if (offsettable_memref_p (operands[1]))
2390 optype1 = OFFSOP;
2391 else if (GET_CODE (operands[1]) == MEM)
2392 optype1 = MEMOP;
2393 else
2394 optype1 = RNDOP;
2395
2396 /* Check for the cases that the operand constraints are not
2397 supposed to allow to happen. */
2398 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2399
2400 /* Handle copies between general and floating registers. */
2401
2402 if (optype0 == REGOP && optype1 == REGOP
2403 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2404 {
2405 if (FP_REG_P (operands[0]))
2406 {
2407 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2408 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2409 return "{fldds|fldd} -16(%%sp),%0";
2410 }
2411 else
2412 {
2413 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2414 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2415 return "{ldws|ldw} -12(%%sp),%R0";
2416 }
2417 }
2418
2419 /* Handle auto decrementing and incrementing loads and stores
2420 specifically, since the structure of the function doesn't work
2421 for them without major modification. Do it better when we learn
2422 this port about the general inc/dec addressing of PA.
2423 (This was written by tege. Chide him if it doesn't work.) */
2424
2425 if (optype0 == MEMOP)
2426 {
2427 /* We have to output the address syntax ourselves, since print_operand
2428 doesn't deal with the addresses we want to use. Fix this later. */
2429
2430 rtx addr = XEXP (operands[0], 0);
2431 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2432 {
2433 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2434
2435 operands[0] = XEXP (addr, 0);
2436 gcc_assert (GET_CODE (operands[1]) == REG
2437 && GET_CODE (operands[0]) == REG);
2438
2439 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2440
2441 /* No overlap between high target register and address
2442 register. (We do this in a non-obvious way to
2443 save a register file writeback) */
2444 if (GET_CODE (addr) == POST_INC)
2445 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2446 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2447 }
2448 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2449 {
2450 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2451
2452 operands[0] = XEXP (addr, 0);
2453 gcc_assert (GET_CODE (operands[1]) == REG
2454 && GET_CODE (operands[0]) == REG);
2455
2456 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2457 /* No overlap between high target register and address
2458 register. (We do this in a non-obvious way to save a
2459 register file writeback) */
2460 if (GET_CODE (addr) == PRE_INC)
2461 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2462 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2463 }
2464 }
2465 if (optype1 == MEMOP)
2466 {
2467 /* We have to output the address syntax ourselves, since print_operand
2468 doesn't deal with the addresses we want to use. Fix this later. */
2469
2470 rtx addr = XEXP (operands[1], 0);
2471 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2472 {
2473 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2474
2475 operands[1] = XEXP (addr, 0);
2476 gcc_assert (GET_CODE (operands[0]) == REG
2477 && GET_CODE (operands[1]) == REG);
2478
2479 if (!reg_overlap_mentioned_p (high_reg, addr))
2480 {
2481 /* No overlap between high target register and address
2482 register. (We do this in a non-obvious way to
2483 save a register file writeback) */
2484 if (GET_CODE (addr) == POST_INC)
2485 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2486 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2487 }
2488 else
2489 {
2490 /* This is an undefined situation. We should load into the
2491 address register *and* update that register. Probably
2492 we don't need to handle this at all. */
2493 if (GET_CODE (addr) == POST_INC)
2494 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2495 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2496 }
2497 }
2498 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2499 {
2500 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2501
2502 operands[1] = XEXP (addr, 0);
2503 gcc_assert (GET_CODE (operands[0]) == REG
2504 && GET_CODE (operands[1]) == REG);
2505
2506 if (!reg_overlap_mentioned_p (high_reg, addr))
2507 {
2508 /* No overlap between high target register and address
2509 register. (We do this in a non-obvious way to
2510 save a register file writeback) */
2511 if (GET_CODE (addr) == PRE_INC)
2512 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2513 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2514 }
2515 else
2516 {
2517 /* This is an undefined situation. We should load into the
2518 address register *and* update that register. Probably
2519 we don't need to handle this at all. */
2520 if (GET_CODE (addr) == PRE_INC)
2521 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2522 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2523 }
2524 }
2525 else if (GET_CODE (addr) == PLUS
2526 && GET_CODE (XEXP (addr, 0)) == MULT)
2527 {
2528 rtx xoperands[4];
2529 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2530
2531 if (!reg_overlap_mentioned_p (high_reg, addr))
2532 {
2533 xoperands[0] = high_reg;
2534 xoperands[1] = XEXP (addr, 1);
2535 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2536 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2537 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2538 xoperands);
2539 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2540 }
2541 else
2542 {
2543 xoperands[0] = high_reg;
2544 xoperands[1] = XEXP (addr, 1);
2545 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2546 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2547 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2548 xoperands);
2549 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2550 }
2551 }
2552 }
2553
2554 /* If an operand is an unoffsettable memory ref, find a register
2555 we can increment temporarily to make it refer to the second word. */
2556
2557 if (optype0 == MEMOP)
2558 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2559
2560 if (optype1 == MEMOP)
2561 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2562
2563 /* Ok, we can do one word at a time.
2564 Normally we do the low-numbered word first.
2565
2566 In either case, set up in LATEHALF the operands to use
2567 for the high-numbered word and in some cases alter the
2568 operands in OPERANDS to be suitable for the low-numbered word. */
2569
2570 if (optype0 == REGOP)
2571 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2572 else if (optype0 == OFFSOP)
2573 latehalf[0] = adjust_address (operands[0], SImode, 4);
2574 else
2575 latehalf[0] = operands[0];
2576
2577 if (optype1 == REGOP)
2578 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2579 else if (optype1 == OFFSOP)
2580 latehalf[1] = adjust_address (operands[1], SImode, 4);
2581 else if (optype1 == CNSTOP)
2582 split_double (operands[1], &operands[1], &latehalf[1]);
2583 else
2584 latehalf[1] = operands[1];
2585
2586 /* If the first move would clobber the source of the second one,
2587 do them in the other order.
2588
2589 This can happen in two cases:
2590
2591 mem -> register where the first half of the destination register
2592 is the same register used in the memory's address. Reload
2593 can create such insns.
2594
2595 mem in this case will be either register indirect or register
2596 indirect plus a valid offset.
2597
2598 register -> register move where REGNO(dst) == REGNO(src + 1)
2599 someone (Tim/Tege?) claimed this can happen for parameter loads.
2600
2601 Handle mem -> register case first. */
2602 if (optype0 == REGOP
2603 && (optype1 == MEMOP || optype1 == OFFSOP)
2604 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2605 operands[1], 0))
2606 {
2607 /* Do the late half first. */
2608 if (addreg1)
2609 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2610 output_asm_insn (singlemove_string (latehalf), latehalf);
2611
2612 /* Then clobber. */
2613 if (addreg1)
2614 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2615 return singlemove_string (operands);
2616 }
2617
2618 /* Now handle register -> register case. */
2619 if (optype0 == REGOP && optype1 == REGOP
2620 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2621 {
2622 output_asm_insn (singlemove_string (latehalf), latehalf);
2623 return singlemove_string (operands);
2624 }
2625
2626 /* Normal case: do the two words, low-numbered first. */
2627
2628 output_asm_insn (singlemove_string (operands), operands);
2629
2630 /* Make any unoffsettable addresses point at high-numbered word. */
2631 if (addreg0)
2632 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2633 if (addreg1)
2634 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2635
2636 /* Do that word. */
2637 output_asm_insn (singlemove_string (latehalf), latehalf);
2638
2639 /* Undo the adds we just did. */
2640 if (addreg0)
2641 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2642 if (addreg1)
2643 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2644
2645 return "";
2646 }
2647 \f
2648 const char *
2649 output_fp_move_double (rtx *operands)
2650 {
2651 if (FP_REG_P (operands[0]))
2652 {
2653 if (FP_REG_P (operands[1])
2654 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2655 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2656 else
2657 output_asm_insn ("fldd%F1 %1,%0", operands);
2658 }
2659 else if (FP_REG_P (operands[1]))
2660 {
2661 output_asm_insn ("fstd%F0 %1,%0", operands);
2662 }
2663 else
2664 {
2665 rtx xoperands[2];
2666
2667 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2668
2669 /* This is a pain. You have to be prepared to deal with an
2670 arbitrary address here including pre/post increment/decrement.
2671
2672 so avoid this in the MD. */
2673 gcc_assert (GET_CODE (operands[0]) == REG);
2674
2675 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2676 xoperands[0] = operands[0];
2677 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2678 }
2679 return "";
2680 }
2681 \f
2682 /* Return a REG that occurs in ADDR with coefficient 1.
2683 ADDR can be effectively incremented by incrementing REG. */
2684
2685 static rtx
2686 find_addr_reg (rtx addr)
2687 {
2688 while (GET_CODE (addr) == PLUS)
2689 {
2690 if (GET_CODE (XEXP (addr, 0)) == REG)
2691 addr = XEXP (addr, 0);
2692 else if (GET_CODE (XEXP (addr, 1)) == REG)
2693 addr = XEXP (addr, 1);
2694 else if (CONSTANT_P (XEXP (addr, 0)))
2695 addr = XEXP (addr, 1);
2696 else if (CONSTANT_P (XEXP (addr, 1)))
2697 addr = XEXP (addr, 0);
2698 else
2699 gcc_unreachable ();
2700 }
2701 gcc_assert (GET_CODE (addr) == REG);
2702 return addr;
2703 }
2704
2705 /* Emit code to perform a block move.
2706
2707 OPERANDS[0] is the destination pointer as a REG, clobbered.
2708 OPERANDS[1] is the source pointer as a REG, clobbered.
2709 OPERANDS[2] is a register for temporary storage.
2710 OPERANDS[3] is a register for temporary storage.
2711 OPERANDS[4] is the size as a CONST_INT
2712 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2713 OPERANDS[6] is another temporary register. */
2714
2715 const char *
2716 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2717 {
2718 int align = INTVAL (operands[5]);
2719 unsigned long n_bytes = INTVAL (operands[4]);
2720
2721 /* We can't move more than a word at a time because the PA
2722 has no longer integer move insns. (Could use fp mem ops?) */
2723 if (align > (TARGET_64BIT ? 8 : 4))
2724 align = (TARGET_64BIT ? 8 : 4);
2725
2726 /* Note that we know each loop below will execute at least twice
2727 (else we would have open-coded the copy). */
2728 switch (align)
2729 {
2730 case 8:
2731 /* Pre-adjust the loop counter. */
2732 operands[4] = GEN_INT (n_bytes - 16);
2733 output_asm_insn ("ldi %4,%2", operands);
2734
2735 /* Copying loop. */
2736 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2737 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2738 output_asm_insn ("std,ma %3,8(%0)", operands);
2739 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2740 output_asm_insn ("std,ma %6,8(%0)", operands);
2741
2742 /* Handle the residual. There could be up to 7 bytes of
2743 residual to copy! */
2744 if (n_bytes % 16 != 0)
2745 {
2746 operands[4] = GEN_INT (n_bytes % 8);
2747 if (n_bytes % 16 >= 8)
2748 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2749 if (n_bytes % 8 != 0)
2750 output_asm_insn ("ldd 0(%1),%6", operands);
2751 if (n_bytes % 16 >= 8)
2752 output_asm_insn ("std,ma %3,8(%0)", operands);
2753 if (n_bytes % 8 != 0)
2754 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2755 }
2756 return "";
2757
2758 case 4:
2759 /* Pre-adjust the loop counter. */
2760 operands[4] = GEN_INT (n_bytes - 8);
2761 output_asm_insn ("ldi %4,%2", operands);
2762
2763 /* Copying loop. */
2764 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2765 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2766 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2767 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2768 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2769
2770 /* Handle the residual. There could be up to 7 bytes of
2771 residual to copy! */
2772 if (n_bytes % 8 != 0)
2773 {
2774 operands[4] = GEN_INT (n_bytes % 4);
2775 if (n_bytes % 8 >= 4)
2776 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2777 if (n_bytes % 4 != 0)
2778 output_asm_insn ("ldw 0(%1),%6", operands);
2779 if (n_bytes % 8 >= 4)
2780 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2781 if (n_bytes % 4 != 0)
2782 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2783 }
2784 return "";
2785
2786 case 2:
2787 /* Pre-adjust the loop counter. */
2788 operands[4] = GEN_INT (n_bytes - 4);
2789 output_asm_insn ("ldi %4,%2", operands);
2790
2791 /* Copying loop. */
2792 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2793 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2794 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2795 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2796 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2797
2798 /* Handle the residual. */
2799 if (n_bytes % 4 != 0)
2800 {
2801 if (n_bytes % 4 >= 2)
2802 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2803 if (n_bytes % 2 != 0)
2804 output_asm_insn ("ldb 0(%1),%6", operands);
2805 if (n_bytes % 4 >= 2)
2806 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2807 if (n_bytes % 2 != 0)
2808 output_asm_insn ("stb %6,0(%0)", operands);
2809 }
2810 return "";
2811
2812 case 1:
2813 /* Pre-adjust the loop counter. */
2814 operands[4] = GEN_INT (n_bytes - 2);
2815 output_asm_insn ("ldi %4,%2", operands);
2816
2817 /* Copying loop. */
2818 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2819 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2820 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2821 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2822 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2823
2824 /* Handle the residual. */
2825 if (n_bytes % 2 != 0)
2826 {
2827 output_asm_insn ("ldb 0(%1),%3", operands);
2828 output_asm_insn ("stb %3,0(%0)", operands);
2829 }
2830 return "";
2831
2832 default:
2833 gcc_unreachable ();
2834 }
2835 }
2836
2837 /* Count the number of insns necessary to handle this block move.
2838
2839 Basic structure is the same as emit_block_move, except that we
2840 count insns rather than emit them. */
2841
2842 static int
2843 compute_movmem_length (rtx insn)
2844 {
2845 rtx pat = PATTERN (insn);
2846 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2847 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2848 unsigned int n_insns = 0;
2849
2850 /* We can't move more than four bytes at a time because the PA
2851 has no longer integer move insns. (Could use fp mem ops?) */
2852 if (align > (TARGET_64BIT ? 8 : 4))
2853 align = (TARGET_64BIT ? 8 : 4);
2854
2855 /* The basic copying loop. */
2856 n_insns = 6;
2857
2858 /* Residuals. */
2859 if (n_bytes % (2 * align) != 0)
2860 {
2861 if ((n_bytes % (2 * align)) >= align)
2862 n_insns += 2;
2863
2864 if ((n_bytes % align) != 0)
2865 n_insns += 2;
2866 }
2867
2868 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2869 return n_insns * 4;
2870 }
2871
2872 /* Emit code to perform a block clear.
2873
2874 OPERANDS[0] is the destination pointer as a REG, clobbered.
2875 OPERANDS[1] is a register for temporary storage.
2876 OPERANDS[2] is the size as a CONST_INT
2877 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2878
2879 const char *
2880 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2881 {
2882 int align = INTVAL (operands[3]);
2883 unsigned long n_bytes = INTVAL (operands[2]);
2884
2885 /* We can't clear more than a word at a time because the PA
2886 has no longer integer move insns. */
2887 if (align > (TARGET_64BIT ? 8 : 4))
2888 align = (TARGET_64BIT ? 8 : 4);
2889
2890 /* Note that we know each loop below will execute at least twice
2891 (else we would have open-coded the copy). */
2892 switch (align)
2893 {
2894 case 8:
2895 /* Pre-adjust the loop counter. */
2896 operands[2] = GEN_INT (n_bytes - 16);
2897 output_asm_insn ("ldi %2,%1", operands);
2898
2899 /* Loop. */
2900 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2901 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2902 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2903
2904 /* Handle the residual. There could be up to 7 bytes of
2905 residual to copy! */
2906 if (n_bytes % 16 != 0)
2907 {
2908 operands[2] = GEN_INT (n_bytes % 8);
2909 if (n_bytes % 16 >= 8)
2910 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2911 if (n_bytes % 8 != 0)
2912 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2913 }
2914 return "";
2915
2916 case 4:
2917 /* Pre-adjust the loop counter. */
2918 operands[2] = GEN_INT (n_bytes - 8);
2919 output_asm_insn ("ldi %2,%1", operands);
2920
2921 /* Loop. */
2922 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2923 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2924 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2925
2926 /* Handle the residual. There could be up to 7 bytes of
2927 residual to copy! */
2928 if (n_bytes % 8 != 0)
2929 {
2930 operands[2] = GEN_INT (n_bytes % 4);
2931 if (n_bytes % 8 >= 4)
2932 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2933 if (n_bytes % 4 != 0)
2934 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2935 }
2936 return "";
2937
2938 case 2:
2939 /* Pre-adjust the loop counter. */
2940 operands[2] = GEN_INT (n_bytes - 4);
2941 output_asm_insn ("ldi %2,%1", operands);
2942
2943 /* Loop. */
2944 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2945 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2946 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2947
2948 /* Handle the residual. */
2949 if (n_bytes % 4 != 0)
2950 {
2951 if (n_bytes % 4 >= 2)
2952 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2953 if (n_bytes % 2 != 0)
2954 output_asm_insn ("stb %%r0,0(%0)", operands);
2955 }
2956 return "";
2957
2958 case 1:
2959 /* Pre-adjust the loop counter. */
2960 operands[2] = GEN_INT (n_bytes - 2);
2961 output_asm_insn ("ldi %2,%1", operands);
2962
2963 /* Loop. */
2964 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2965 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2966 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2967
2968 /* Handle the residual. */
2969 if (n_bytes % 2 != 0)
2970 output_asm_insn ("stb %%r0,0(%0)", operands);
2971
2972 return "";
2973
2974 default:
2975 gcc_unreachable ();
2976 }
2977 }
2978
2979 /* Count the number of insns necessary to handle this block move.
2980
2981 Basic structure is the same as emit_block_move, except that we
2982 count insns rather than emit them. */
2983
2984 static int
2985 compute_clrmem_length (rtx insn)
2986 {
2987 rtx pat = PATTERN (insn);
2988 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2989 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2990 unsigned int n_insns = 0;
2991
2992 /* We can't clear more than a word at a time because the PA
2993 has no longer integer move insns. */
2994 if (align > (TARGET_64BIT ? 8 : 4))
2995 align = (TARGET_64BIT ? 8 : 4);
2996
2997 /* The basic loop. */
2998 n_insns = 4;
2999
3000 /* Residuals. */
3001 if (n_bytes % (2 * align) != 0)
3002 {
3003 if ((n_bytes % (2 * align)) >= align)
3004 n_insns++;
3005
3006 if ((n_bytes % align) != 0)
3007 n_insns++;
3008 }
3009
3010 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3011 return n_insns * 4;
3012 }
3013 \f
3014
3015 const char *
3016 output_and (rtx *operands)
3017 {
3018 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3019 {
3020 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3021 int ls0, ls1, ms0, p, len;
3022
3023 for (ls0 = 0; ls0 < 32; ls0++)
3024 if ((mask & (1 << ls0)) == 0)
3025 break;
3026
3027 for (ls1 = ls0; ls1 < 32; ls1++)
3028 if ((mask & (1 << ls1)) != 0)
3029 break;
3030
3031 for (ms0 = ls1; ms0 < 32; ms0++)
3032 if ((mask & (1 << ms0)) == 0)
3033 break;
3034
3035 gcc_assert (ms0 == 32);
3036
3037 if (ls1 == 32)
3038 {
3039 len = ls0;
3040
3041 gcc_assert (len);
3042
3043 operands[2] = GEN_INT (len);
3044 return "{extru|extrw,u} %1,31,%2,%0";
3045 }
3046 else
3047 {
3048 /* We could use this `depi' for the case above as well, but `depi'
3049 requires one more register file access than an `extru'. */
3050
3051 p = 31 - ls0;
3052 len = ls1 - ls0;
3053
3054 operands[2] = GEN_INT (p);
3055 operands[3] = GEN_INT (len);
3056 return "{depi|depwi} 0,%2,%3,%0";
3057 }
3058 }
3059 else
3060 return "and %1,%2,%0";
3061 }
3062
3063 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3064 storing the result in operands[0]. */
3065 const char *
3066 output_64bit_and (rtx *operands)
3067 {
3068 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3069 {
3070 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3071 int ls0, ls1, ms0, p, len;
3072
3073 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3074 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3075 break;
3076
3077 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3078 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3079 break;
3080
3081 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3082 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3083 break;
3084
3085 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3086
3087 if (ls1 == HOST_BITS_PER_WIDE_INT)
3088 {
3089 len = ls0;
3090
3091 gcc_assert (len);
3092
3093 operands[2] = GEN_INT (len);
3094 return "extrd,u %1,63,%2,%0";
3095 }
3096 else
3097 {
3098 /* We could use this `depi' for the case above as well, but `depi'
3099 requires one more register file access than an `extru'. */
3100
3101 p = 63 - ls0;
3102 len = ls1 - ls0;
3103
3104 operands[2] = GEN_INT (p);
3105 operands[3] = GEN_INT (len);
3106 return "depdi 0,%2,%3,%0";
3107 }
3108 }
3109 else
3110 return "and %1,%2,%0";
3111 }
3112
3113 const char *
3114 output_ior (rtx *operands)
3115 {
3116 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3117 int bs0, bs1, p, len;
3118
3119 if (INTVAL (operands[2]) == 0)
3120 return "copy %1,%0";
3121
3122 for (bs0 = 0; bs0 < 32; bs0++)
3123 if ((mask & (1 << bs0)) != 0)
3124 break;
3125
3126 for (bs1 = bs0; bs1 < 32; bs1++)
3127 if ((mask & (1 << bs1)) == 0)
3128 break;
3129
3130 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3131
3132 p = 31 - bs0;
3133 len = bs1 - bs0;
3134
3135 operands[2] = GEN_INT (p);
3136 operands[3] = GEN_INT (len);
3137 return "{depi|depwi} -1,%2,%3,%0";
3138 }
3139
3140 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3141 storing the result in operands[0]. */
3142 const char *
3143 output_64bit_ior (rtx *operands)
3144 {
3145 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3146 int bs0, bs1, p, len;
3147
3148 if (INTVAL (operands[2]) == 0)
3149 return "copy %1,%0";
3150
3151 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3152 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3153 break;
3154
3155 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3156 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3157 break;
3158
3159 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3160 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3161
3162 p = 63 - bs0;
3163 len = bs1 - bs0;
3164
3165 operands[2] = GEN_INT (p);
3166 operands[3] = GEN_INT (len);
3167 return "depdi -1,%2,%3,%0";
3168 }
3169 \f
3170 /* Target hook for assembling integer objects. This code handles
3171 aligned SI and DI integers specially since function references
3172 must be preceded by P%. */
3173
3174 static bool
3175 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3176 {
3177 if (size == UNITS_PER_WORD
3178 && aligned_p
3179 && function_label_operand (x, VOIDmode))
3180 {
3181 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3182 output_addr_const (asm_out_file, x);
3183 fputc ('\n', asm_out_file);
3184 return true;
3185 }
3186 return default_assemble_integer (x, size, aligned_p);
3187 }
3188 \f
3189 /* Output an ascii string. */
3190 void
3191 output_ascii (FILE *file, const char *p, int size)
3192 {
3193 int i;
3194 int chars_output;
3195 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3196
3197 /* The HP assembler can only take strings of 256 characters at one
3198 time. This is a limitation on input line length, *not* the
3199 length of the string. Sigh. Even worse, it seems that the
3200 restriction is in number of input characters (see \xnn &
3201 \whatever). So we have to do this very carefully. */
3202
3203 fputs ("\t.STRING \"", file);
3204
3205 chars_output = 0;
3206 for (i = 0; i < size; i += 4)
3207 {
3208 int co = 0;
3209 int io = 0;
3210 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3211 {
3212 register unsigned int c = (unsigned char) p[i + io];
3213
3214 if (c == '\"' || c == '\\')
3215 partial_output[co++] = '\\';
3216 if (c >= ' ' && c < 0177)
3217 partial_output[co++] = c;
3218 else
3219 {
3220 unsigned int hexd;
3221 partial_output[co++] = '\\';
3222 partial_output[co++] = 'x';
3223 hexd = c / 16 - 0 + '0';
3224 if (hexd > '9')
3225 hexd -= '9' - 'a' + 1;
3226 partial_output[co++] = hexd;
3227 hexd = c % 16 - 0 + '0';
3228 if (hexd > '9')
3229 hexd -= '9' - 'a' + 1;
3230 partial_output[co++] = hexd;
3231 }
3232 }
3233 if (chars_output + co > 243)
3234 {
3235 fputs ("\"\n\t.STRING \"", file);
3236 chars_output = 0;
3237 }
3238 fwrite (partial_output, 1, (size_t) co, file);
3239 chars_output += co;
3240 co = 0;
3241 }
3242 fputs ("\"\n", file);
3243 }
3244
3245 /* Try to rewrite floating point comparisons & branches to avoid
3246 useless add,tr insns.
3247
3248 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3249 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3250 first attempt to remove useless add,tr insns. It is zero
3251 for the second pass as reorg sometimes leaves bogus REG_DEAD
3252 notes lying around.
3253
3254 When CHECK_NOTES is zero we can only eliminate add,tr insns
3255 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3256 instructions. */
3257 static void
3258 remove_useless_addtr_insns (int check_notes)
3259 {
3260 rtx insn;
3261 static int pass = 0;
3262
3263 /* This is fairly cheap, so always run it when optimizing. */
3264 if (optimize > 0)
3265 {
3266 int fcmp_count = 0;
3267 int fbranch_count = 0;
3268
3269 /* Walk all the insns in this function looking for fcmp & fbranch
3270 instructions. Keep track of how many of each we find. */
3271 for (insn = get_insns (); insn; insn = next_insn (insn))
3272 {
3273 rtx tmp;
3274
3275 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3276 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3277 continue;
3278
3279 tmp = PATTERN (insn);
3280
3281 /* It must be a set. */
3282 if (GET_CODE (tmp) != SET)
3283 continue;
3284
3285 /* If the destination is CCFP, then we've found an fcmp insn. */
3286 tmp = SET_DEST (tmp);
3287 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3288 {
3289 fcmp_count++;
3290 continue;
3291 }
3292
3293 tmp = PATTERN (insn);
3294 /* If this is an fbranch instruction, bump the fbranch counter. */
3295 if (GET_CODE (tmp) == SET
3296 && SET_DEST (tmp) == pc_rtx
3297 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3298 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3299 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3300 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3301 {
3302 fbranch_count++;
3303 continue;
3304 }
3305 }
3306
3307
3308 /* Find all floating point compare + branch insns. If possible,
3309 reverse the comparison & the branch to avoid add,tr insns. */
3310 for (insn = get_insns (); insn; insn = next_insn (insn))
3311 {
3312 rtx tmp, next;
3313
3314 /* Ignore anything that isn't an INSN. */
3315 if (GET_CODE (insn) != INSN)
3316 continue;
3317
3318 tmp = PATTERN (insn);
3319
3320 /* It must be a set. */
3321 if (GET_CODE (tmp) != SET)
3322 continue;
3323
3324 /* The destination must be CCFP, which is register zero. */
3325 tmp = SET_DEST (tmp);
3326 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3327 continue;
3328
3329 /* INSN should be a set of CCFP.
3330
3331 See if the result of this insn is used in a reversed FP
3332 conditional branch. If so, reverse our condition and
3333 the branch. Doing so avoids useless add,tr insns. */
3334 next = next_insn (insn);
3335 while (next)
3336 {
3337 /* Jumps, calls and labels stop our search. */
3338 if (GET_CODE (next) == JUMP_INSN
3339 || GET_CODE (next) == CALL_INSN
3340 || GET_CODE (next) == CODE_LABEL)
3341 break;
3342
3343 /* As does another fcmp insn. */
3344 if (GET_CODE (next) == INSN
3345 && GET_CODE (PATTERN (next)) == SET
3346 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3347 && REGNO (SET_DEST (PATTERN (next))) == 0)
3348 break;
3349
3350 next = next_insn (next);
3351 }
3352
3353 /* Is NEXT_INSN a branch? */
3354 if (next
3355 && GET_CODE (next) == JUMP_INSN)
3356 {
3357 rtx pattern = PATTERN (next);
3358
3359 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3360 and CCFP dies, then reverse our conditional and the branch
3361 to avoid the add,tr. */
3362 if (GET_CODE (pattern) == SET
3363 && SET_DEST (pattern) == pc_rtx
3364 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3365 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3366 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3367 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3368 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3369 && (fcmp_count == fbranch_count
3370 || (check_notes
3371 && find_regno_note (next, REG_DEAD, 0))))
3372 {
3373 /* Reverse the branch. */
3374 tmp = XEXP (SET_SRC (pattern), 1);
3375 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3376 XEXP (SET_SRC (pattern), 2) = tmp;
3377 INSN_CODE (next) = -1;
3378
3379 /* Reverse our condition. */
3380 tmp = PATTERN (insn);
3381 PUT_CODE (XEXP (tmp, 1),
3382 (reverse_condition_maybe_unordered
3383 (GET_CODE (XEXP (tmp, 1)))));
3384 }
3385 }
3386 }
3387 }
3388
3389 pass = !pass;
3390
3391 }
3392 \f
3393 /* You may have trouble believing this, but this is the 32 bit HP-PA
3394 stack layout. Wow.
3395
3396 Offset Contents
3397
3398 Variable arguments (optional; any number may be allocated)
3399
3400 SP-(4*(N+9)) arg word N
3401 : :
3402 SP-56 arg word 5
3403 SP-52 arg word 4
3404
3405 Fixed arguments (must be allocated; may remain unused)
3406
3407 SP-48 arg word 3
3408 SP-44 arg word 2
3409 SP-40 arg word 1
3410 SP-36 arg word 0
3411
3412 Frame Marker
3413
3414 SP-32 External Data Pointer (DP)
3415 SP-28 External sr4
3416 SP-24 External/stub RP (RP')
3417 SP-20 Current RP
3418 SP-16 Static Link
3419 SP-12 Clean up
3420 SP-8 Calling Stub RP (RP'')
3421 SP-4 Previous SP
3422
3423 Top of Frame
3424
3425 SP-0 Stack Pointer (points to next available address)
3426
3427 */
3428
3429 /* This function saves registers as follows. Registers marked with ' are
3430 this function's registers (as opposed to the previous function's).
3431 If a frame_pointer isn't needed, r4 is saved as a general register;
3432 the space for the frame pointer is still allocated, though, to keep
3433 things simple.
3434
3435
3436 Top of Frame
3437
3438 SP (FP') Previous FP
3439 SP + 4 Alignment filler (sigh)
3440 SP + 8 Space for locals reserved here.
3441 .
3442 .
3443 .
3444 SP + n All call saved register used.
3445 .
3446 .
3447 .
3448 SP + o All call saved fp registers used.
3449 .
3450 .
3451 .
3452 SP + p (SP') points to next available address.
3453
3454 */
3455
3456 /* Global variables set by output_function_prologue(). */
3457 /* Size of frame. Need to know this to emit return insns from
3458 leaf procedures. */
3459 static HOST_WIDE_INT actual_fsize, local_fsize;
3460 static int save_fregs;
3461
3462 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3463 Handle case where DISP > 8k by using the add_high_const patterns.
3464
3465 Note in DISP > 8k case, we will leave the high part of the address
3466 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3467
3468 static void
3469 store_reg (int reg, HOST_WIDE_INT disp, int base)
3470 {
3471 rtx insn, dest, src, basereg;
3472
3473 src = gen_rtx_REG (word_mode, reg);
3474 basereg = gen_rtx_REG (Pmode, base);
3475 if (VAL_14_BITS_P (disp))
3476 {
3477 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3478 insn = emit_move_insn (dest, src);
3479 }
3480 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3481 {
3482 rtx delta = GEN_INT (disp);
3483 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3484
3485 emit_move_insn (tmpreg, delta);
3486 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3487 if (DO_FRAME_NOTES)
3488 {
3489 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3490 gen_rtx_SET (VOIDmode, tmpreg,
3491 gen_rtx_PLUS (Pmode, basereg, delta)));
3492 RTX_FRAME_RELATED_P (insn) = 1;
3493 }
3494 dest = gen_rtx_MEM (word_mode, tmpreg);
3495 insn = emit_move_insn (dest, src);
3496 }
3497 else
3498 {
3499 rtx delta = GEN_INT (disp);
3500 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3501 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3502
3503 emit_move_insn (tmpreg, high);
3504 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3505 insn = emit_move_insn (dest, src);
3506 if (DO_FRAME_NOTES)
3507 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3508 gen_rtx_SET (VOIDmode,
3509 gen_rtx_MEM (word_mode,
3510 gen_rtx_PLUS (word_mode,
3511 basereg,
3512 delta)),
3513 src));
3514 }
3515
3516 if (DO_FRAME_NOTES)
3517 RTX_FRAME_RELATED_P (insn) = 1;
3518 }
3519
3520 /* Emit RTL to store REG at the memory location specified by BASE and then
3521 add MOD to BASE. MOD must be <= 8k. */
3522
3523 static void
3524 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3525 {
3526 rtx insn, basereg, srcreg, delta;
3527
3528 gcc_assert (VAL_14_BITS_P (mod));
3529
3530 basereg = gen_rtx_REG (Pmode, base);
3531 srcreg = gen_rtx_REG (word_mode, reg);
3532 delta = GEN_INT (mod);
3533
3534 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3535 if (DO_FRAME_NOTES)
3536 {
3537 RTX_FRAME_RELATED_P (insn) = 1;
3538
3539 /* RTX_FRAME_RELATED_P must be set on each frame related set
3540 in a parallel with more than one element. */
3541 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3542 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3543 }
3544 }
3545
3546 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3547 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3548 whether to add a frame note or not.
3549
3550 In the DISP > 8k case, we leave the high part of the address in %r1.
3551 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3552
3553 static void
3554 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3555 {
3556 rtx insn;
3557
3558 if (VAL_14_BITS_P (disp))
3559 {
3560 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3561 plus_constant (gen_rtx_REG (Pmode, base), disp));
3562 }
3563 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3564 {
3565 rtx basereg = gen_rtx_REG (Pmode, base);
3566 rtx delta = GEN_INT (disp);
3567 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3568
3569 emit_move_insn (tmpreg, delta);
3570 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3571 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3572 if (DO_FRAME_NOTES)
3573 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3574 gen_rtx_SET (VOIDmode, tmpreg,
3575 gen_rtx_PLUS (Pmode, basereg, delta)));
3576 }
3577 else
3578 {
3579 rtx basereg = gen_rtx_REG (Pmode, base);
3580 rtx delta = GEN_INT (disp);
3581 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3582
3583 emit_move_insn (tmpreg,
3584 gen_rtx_PLUS (Pmode, basereg,
3585 gen_rtx_HIGH (Pmode, delta)));
3586 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3587 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3588 }
3589
3590 if (DO_FRAME_NOTES && note)
3591 RTX_FRAME_RELATED_P (insn) = 1;
3592 }
3593
3594 HOST_WIDE_INT
3595 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3596 {
3597 int freg_saved = 0;
3598 int i, j;
3599
3600 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3601 be consistent with the rounding and size calculation done here.
3602 Change them at the same time. */
3603
3604 /* We do our own stack alignment. First, round the size of the
3605 stack locals up to a word boundary. */
3606 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3607
3608 /* Space for previous frame pointer + filler. If any frame is
3609 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3610 waste some space here for the sake of HP compatibility. The
3611 first slot is only used when the frame pointer is needed. */
3612 if (size || frame_pointer_needed)
3613 size += STARTING_FRAME_OFFSET;
3614
3615 /* If the current function calls __builtin_eh_return, then we need
3616 to allocate stack space for registers that will hold data for
3617 the exception handler. */
3618 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3619 {
3620 unsigned int i;
3621
3622 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3623 continue;
3624 size += i * UNITS_PER_WORD;
3625 }
3626
3627 /* Account for space used by the callee general register saves. */
3628 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3629 if (df_regs_ever_live_p (i))
3630 size += UNITS_PER_WORD;
3631
3632 /* Account for space used by the callee floating point register saves. */
3633 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3634 if (df_regs_ever_live_p (i)
3635 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3636 {
3637 freg_saved = 1;
3638
3639 /* We always save both halves of the FP register, so always
3640 increment the frame size by 8 bytes. */
3641 size += 8;
3642 }
3643
3644 /* If any of the floating registers are saved, account for the
3645 alignment needed for the floating point register save block. */
3646 if (freg_saved)
3647 {
3648 size = (size + 7) & ~7;
3649 if (fregs_live)
3650 *fregs_live = 1;
3651 }
3652
3653 /* The various ABIs include space for the outgoing parameters in the
3654 size of the current function's stack frame. We don't need to align
3655 for the outgoing arguments as their alignment is set by the final
3656 rounding for the frame as a whole. */
3657 size += crtl->outgoing_args_size;
3658
3659 /* Allocate space for the fixed frame marker. This space must be
3660 allocated for any function that makes calls or allocates
3661 stack space. */
3662 if (!current_function_is_leaf || size)
3663 size += TARGET_64BIT ? 48 : 32;
3664
3665 /* Finally, round to the preferred stack boundary. */
3666 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3667 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3668 }
3669
3670 /* Generate the assembly code for function entry. FILE is a stdio
3671 stream to output the code to. SIZE is an int: how many units of
3672 temporary storage to allocate.
3673
3674 Refer to the array `regs_ever_live' to determine which registers to
3675 save; `regs_ever_live[I]' is nonzero if register number I is ever
3676 used in the function. This function is responsible for knowing
3677 which registers should not be saved even if used. */
3678
3679 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3680 of memory. If any fpu reg is used in the function, we allocate
3681 such a block here, at the bottom of the frame, just in case it's needed.
3682
3683 If this function is a leaf procedure, then we may choose not
3684 to do a "save" insn. The decision about whether or not
3685 to do this is made in regclass.c. */
3686
3687 static void
3688 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3689 {
3690 /* The function's label and associated .PROC must never be
3691 separated and must be output *after* any profiling declarations
3692 to avoid changing spaces/subspaces within a procedure. */
3693 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3694 fputs ("\t.PROC\n", file);
3695
3696 /* hppa_expand_prologue does the dirty work now. We just need
3697 to output the assembler directives which denote the start
3698 of a function. */
3699 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3700 if (current_function_is_leaf)
3701 fputs (",NO_CALLS", file);
3702 else
3703 fputs (",CALLS", file);
3704 if (rp_saved)
3705 fputs (",SAVE_RP", file);
3706
3707 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3708 at the beginning of the frame and that it is used as the frame
3709 pointer for the frame. We do this because our current frame
3710 layout doesn't conform to that specified in the HP runtime
3711 documentation and we need a way to indicate to programs such as
3712 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3713 isn't used by HP compilers but is supported by the assembler.
3714 However, SAVE_SP is supposed to indicate that the previous stack
3715 pointer has been saved in the frame marker. */
3716 if (frame_pointer_needed)
3717 fputs (",SAVE_SP", file);
3718
3719 /* Pass on information about the number of callee register saves
3720 performed in the prologue.
3721
3722 The compiler is supposed to pass the highest register number
3723 saved, the assembler then has to adjust that number before
3724 entering it into the unwind descriptor (to account for any
3725 caller saved registers with lower register numbers than the
3726 first callee saved register). */
3727 if (gr_saved)
3728 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3729
3730 if (fr_saved)
3731 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3732
3733 fputs ("\n\t.ENTRY\n", file);
3734
3735 remove_useless_addtr_insns (0);
3736 }
3737
3738 void
3739 hppa_expand_prologue (void)
3740 {
3741 int merge_sp_adjust_with_store = 0;
3742 HOST_WIDE_INT size = get_frame_size ();
3743 HOST_WIDE_INT offset;
3744 int i;
3745 rtx insn, tmpreg;
3746
3747 gr_saved = 0;
3748 fr_saved = 0;
3749 save_fregs = 0;
3750
3751 /* Compute total size for frame pointer, filler, locals and rounding to
3752 the next word boundary. Similar code appears in compute_frame_size
3753 and must be changed in tandem with this code. */
3754 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3755 if (local_fsize || frame_pointer_needed)
3756 local_fsize += STARTING_FRAME_OFFSET;
3757
3758 actual_fsize = compute_frame_size (size, &save_fregs);
3759 if (flag_stack_usage_info)
3760 current_function_static_stack_size = actual_fsize;
3761
3762 /* Compute a few things we will use often. */
3763 tmpreg = gen_rtx_REG (word_mode, 1);
3764
3765 /* Save RP first. The calling conventions manual states RP will
3766 always be stored into the caller's frame at sp - 20 or sp - 16
3767 depending on which ABI is in use. */
3768 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3769 {
3770 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3771 rp_saved = true;
3772 }
3773 else
3774 rp_saved = false;
3775
3776 /* Allocate the local frame and set up the frame pointer if needed. */
3777 if (actual_fsize != 0)
3778 {
3779 if (frame_pointer_needed)
3780 {
3781 /* Copy the old frame pointer temporarily into %r1. Set up the
3782 new stack pointer, then store away the saved old frame pointer
3783 into the stack at sp and at the same time update the stack
3784 pointer by actual_fsize bytes. Two versions, first
3785 handles small (<8k) frames. The second handles large (>=8k)
3786 frames. */
3787 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3788 if (DO_FRAME_NOTES)
3789 RTX_FRAME_RELATED_P (insn) = 1;
3790
3791 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3792 if (DO_FRAME_NOTES)
3793 RTX_FRAME_RELATED_P (insn) = 1;
3794
3795 if (VAL_14_BITS_P (actual_fsize))
3796 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3797 else
3798 {
3799 /* It is incorrect to store the saved frame pointer at *sp,
3800 then increment sp (writes beyond the current stack boundary).
3801
3802 So instead use stwm to store at *sp and post-increment the
3803 stack pointer as an atomic operation. Then increment sp to
3804 finish allocating the new frame. */
3805 HOST_WIDE_INT adjust1 = 8192 - 64;
3806 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3807
3808 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3809 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3810 adjust2, 1);
3811 }
3812
3813 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3814 we need to store the previous stack pointer (frame pointer)
3815 into the frame marker on targets that use the HP unwind
3816 library. This allows the HP unwind library to be used to
3817 unwind GCC frames. However, we are not fully compatible
3818 with the HP library because our frame layout differs from
3819 that specified in the HP runtime specification.
3820
3821 We don't want a frame note on this instruction as the frame
3822 marker moves during dynamic stack allocation.
3823
3824 This instruction also serves as a blockage to prevent
3825 register spills from being scheduled before the stack
3826 pointer is raised. This is necessary as we store
3827 registers using the frame pointer as a base register,
3828 and the frame pointer is set before sp is raised. */
3829 if (TARGET_HPUX_UNWIND_LIBRARY)
3830 {
3831 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3832 GEN_INT (TARGET_64BIT ? -8 : -4));
3833
3834 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3835 hard_frame_pointer_rtx);
3836 }
3837 else
3838 emit_insn (gen_blockage ());
3839 }
3840 /* no frame pointer needed. */
3841 else
3842 {
3843 /* In some cases we can perform the first callee register save
3844 and allocating the stack frame at the same time. If so, just
3845 make a note of it and defer allocating the frame until saving
3846 the callee registers. */
3847 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3848 merge_sp_adjust_with_store = 1;
3849 /* Can not optimize. Adjust the stack frame by actual_fsize
3850 bytes. */
3851 else
3852 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3853 actual_fsize, 1);
3854 }
3855 }
3856
3857 /* Normal register save.
3858
3859 Do not save the frame pointer in the frame_pointer_needed case. It
3860 was done earlier. */
3861 if (frame_pointer_needed)
3862 {
3863 offset = local_fsize;
3864
3865 /* Saving the EH return data registers in the frame is the simplest
3866 way to get the frame unwind information emitted. We put them
3867 just before the general registers. */
3868 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3869 {
3870 unsigned int i, regno;
3871
3872 for (i = 0; ; ++i)
3873 {
3874 regno = EH_RETURN_DATA_REGNO (i);
3875 if (regno == INVALID_REGNUM)
3876 break;
3877
3878 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3879 offset += UNITS_PER_WORD;
3880 }
3881 }
3882
3883 for (i = 18; i >= 4; i--)
3884 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3885 {
3886 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3887 offset += UNITS_PER_WORD;
3888 gr_saved++;
3889 }
3890 /* Account for %r3 which is saved in a special place. */
3891 gr_saved++;
3892 }
3893 /* No frame pointer needed. */
3894 else
3895 {
3896 offset = local_fsize - actual_fsize;
3897
3898 /* Saving the EH return data registers in the frame is the simplest
3899 way to get the frame unwind information emitted. */
3900 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3901 {
3902 unsigned int i, regno;
3903
3904 for (i = 0; ; ++i)
3905 {
3906 regno = EH_RETURN_DATA_REGNO (i);
3907 if (regno == INVALID_REGNUM)
3908 break;
3909
3910 /* If merge_sp_adjust_with_store is nonzero, then we can
3911 optimize the first save. */
3912 if (merge_sp_adjust_with_store)
3913 {
3914 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3915 merge_sp_adjust_with_store = 0;
3916 }
3917 else
3918 store_reg (regno, offset, STACK_POINTER_REGNUM);
3919 offset += UNITS_PER_WORD;
3920 }
3921 }
3922
3923 for (i = 18; i >= 3; i--)
3924 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3925 {
3926 /* If merge_sp_adjust_with_store is nonzero, then we can
3927 optimize the first GR save. */
3928 if (merge_sp_adjust_with_store)
3929 {
3930 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3931 merge_sp_adjust_with_store = 0;
3932 }
3933 else
3934 store_reg (i, offset, STACK_POINTER_REGNUM);
3935 offset += UNITS_PER_WORD;
3936 gr_saved++;
3937 }
3938
3939 /* If we wanted to merge the SP adjustment with a GR save, but we never
3940 did any GR saves, then just emit the adjustment here. */
3941 if (merge_sp_adjust_with_store)
3942 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3943 actual_fsize, 1);
3944 }
3945
3946 /* The hppa calling conventions say that %r19, the pic offset
3947 register, is saved at sp - 32 (in this function's frame)
3948 when generating PIC code. FIXME: What is the correct thing
3949 to do for functions which make no calls and allocate no
3950 frame? Do we need to allocate a frame, or can we just omit
3951 the save? For now we'll just omit the save.
3952
3953 We don't want a note on this insn as the frame marker can
3954 move if there is a dynamic stack allocation. */
3955 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3956 {
3957 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3958
3959 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3960
3961 }
3962
3963 /* Align pointer properly (doubleword boundary). */
3964 offset = (offset + 7) & ~7;
3965
3966 /* Floating point register store. */
3967 if (save_fregs)
3968 {
3969 rtx base;
3970
3971 /* First get the frame or stack pointer to the start of the FP register
3972 save area. */
3973 if (frame_pointer_needed)
3974 {
3975 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
3976 base = hard_frame_pointer_rtx;
3977 }
3978 else
3979 {
3980 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3981 base = stack_pointer_rtx;
3982 }
3983
3984 /* Now actually save the FP registers. */
3985 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3986 {
3987 if (df_regs_ever_live_p (i)
3988 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3989 {
3990 rtx addr, insn, reg;
3991 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3992 reg = gen_rtx_REG (DFmode, i);
3993 insn = emit_move_insn (addr, reg);
3994 if (DO_FRAME_NOTES)
3995 {
3996 RTX_FRAME_RELATED_P (insn) = 1;
3997 if (TARGET_64BIT)
3998 {
3999 rtx mem = gen_rtx_MEM (DFmode,
4000 plus_constant (base, offset));
4001 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4002 gen_rtx_SET (VOIDmode, mem, reg));
4003 }
4004 else
4005 {
4006 rtx meml = gen_rtx_MEM (SFmode,
4007 plus_constant (base, offset));
4008 rtx memr = gen_rtx_MEM (SFmode,
4009 plus_constant (base, offset + 4));
4010 rtx regl = gen_rtx_REG (SFmode, i);
4011 rtx regr = gen_rtx_REG (SFmode, i + 1);
4012 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4013 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4014 rtvec vec;
4015
4016 RTX_FRAME_RELATED_P (setl) = 1;
4017 RTX_FRAME_RELATED_P (setr) = 1;
4018 vec = gen_rtvec (2, setl, setr);
4019 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4020 gen_rtx_SEQUENCE (VOIDmode, vec));
4021 }
4022 }
4023 offset += GET_MODE_SIZE (DFmode);
4024 fr_saved++;
4025 }
4026 }
4027 }
4028 }
4029
4030 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4031 Handle case where DISP > 8k by using the add_high_const patterns. */
4032
4033 static void
4034 load_reg (int reg, HOST_WIDE_INT disp, int base)
4035 {
4036 rtx dest = gen_rtx_REG (word_mode, reg);
4037 rtx basereg = gen_rtx_REG (Pmode, base);
4038 rtx src;
4039
4040 if (VAL_14_BITS_P (disp))
4041 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4042 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4043 {
4044 rtx delta = GEN_INT (disp);
4045 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4046
4047 emit_move_insn (tmpreg, delta);
4048 if (TARGET_DISABLE_INDEXING)
4049 {
4050 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4051 src = gen_rtx_MEM (word_mode, tmpreg);
4052 }
4053 else
4054 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4055 }
4056 else
4057 {
4058 rtx delta = GEN_INT (disp);
4059 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4060 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4061
4062 emit_move_insn (tmpreg, high);
4063 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4064 }
4065
4066 emit_move_insn (dest, src);
4067 }
4068
4069 /* Update the total code bytes output to the text section. */
4070
4071 static void
4072 update_total_code_bytes (unsigned int nbytes)
4073 {
4074 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4075 && !IN_NAMED_SECTION_P (cfun->decl))
4076 {
4077 unsigned int old_total = total_code_bytes;
4078
4079 total_code_bytes += nbytes;
4080
4081 /* Be prepared to handle overflows. */
4082 if (old_total > total_code_bytes)
4083 total_code_bytes = UINT_MAX;
4084 }
4085 }
4086
4087 /* This function generates the assembly code for function exit.
4088 Args are as for output_function_prologue ().
4089
4090 The function epilogue should not depend on the current stack
4091 pointer! It should use the frame pointer only. This is mandatory
4092 because of alloca; we also take advantage of it to omit stack
4093 adjustments before returning. */
4094
4095 static void
4096 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4097 {
4098 rtx insn = get_last_insn ();
4099
4100 last_address = 0;
4101
4102 /* hppa_expand_epilogue does the dirty work now. We just need
4103 to output the assembler directives which denote the end
4104 of a function.
4105
4106 To make debuggers happy, emit a nop if the epilogue was completely
4107 eliminated due to a volatile call as the last insn in the
4108 current function. That way the return address (in %r2) will
4109 always point to a valid instruction in the current function. */
4110
4111 /* Get the last real insn. */
4112 if (GET_CODE (insn) == NOTE)
4113 insn = prev_real_insn (insn);
4114
4115 /* If it is a sequence, then look inside. */
4116 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4117 insn = XVECEXP (PATTERN (insn), 0, 0);
4118
4119 /* If insn is a CALL_INSN, then it must be a call to a volatile
4120 function (otherwise there would be epilogue insns). */
4121 if (insn && GET_CODE (insn) == CALL_INSN)
4122 {
4123 fputs ("\tnop\n", file);
4124 last_address += 4;
4125 }
4126
4127 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4128
4129 if (TARGET_SOM && TARGET_GAS)
4130 {
4131 /* We done with this subspace except possibly for some additional
4132 debug information. Forget that we are in this subspace to ensure
4133 that the next function is output in its own subspace. */
4134 in_section = NULL;
4135 cfun->machine->in_nsubspa = 2;
4136 }
4137
4138 if (INSN_ADDRESSES_SET_P ())
4139 {
4140 insn = get_last_nonnote_insn ();
4141 last_address += INSN_ADDRESSES (INSN_UID (insn));
4142 if (INSN_P (insn))
4143 last_address += insn_default_length (insn);
4144 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4145 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4146 }
4147 else
4148 last_address = UINT_MAX;
4149
4150 /* Finally, update the total number of code bytes output so far. */
4151 update_total_code_bytes (last_address);
4152 }
4153
4154 void
4155 hppa_expand_epilogue (void)
4156 {
4157 rtx tmpreg;
4158 HOST_WIDE_INT offset;
4159 HOST_WIDE_INT ret_off = 0;
4160 int i;
4161 int merge_sp_adjust_with_load = 0;
4162
4163 /* We will use this often. */
4164 tmpreg = gen_rtx_REG (word_mode, 1);
4165
4166 /* Try to restore RP early to avoid load/use interlocks when
4167 RP gets used in the return (bv) instruction. This appears to still
4168 be necessary even when we schedule the prologue and epilogue. */
4169 if (rp_saved)
4170 {
4171 ret_off = TARGET_64BIT ? -16 : -20;
4172 if (frame_pointer_needed)
4173 {
4174 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4175 ret_off = 0;
4176 }
4177 else
4178 {
4179 /* No frame pointer, and stack is smaller than 8k. */
4180 if (VAL_14_BITS_P (ret_off - actual_fsize))
4181 {
4182 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4183 ret_off = 0;
4184 }
4185 }
4186 }
4187
4188 /* General register restores. */
4189 if (frame_pointer_needed)
4190 {
4191 offset = local_fsize;
4192
4193 /* If the current function calls __builtin_eh_return, then we need
4194 to restore the saved EH data registers. */
4195 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4196 {
4197 unsigned int i, regno;
4198
4199 for (i = 0; ; ++i)
4200 {
4201 regno = EH_RETURN_DATA_REGNO (i);
4202 if (regno == INVALID_REGNUM)
4203 break;
4204
4205 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4206 offset += UNITS_PER_WORD;
4207 }
4208 }
4209
4210 for (i = 18; i >= 4; i--)
4211 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4212 {
4213 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4214 offset += UNITS_PER_WORD;
4215 }
4216 }
4217 else
4218 {
4219 offset = local_fsize - actual_fsize;
4220
4221 /* If the current function calls __builtin_eh_return, then we need
4222 to restore the saved EH data registers. */
4223 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4224 {
4225 unsigned int i, regno;
4226
4227 for (i = 0; ; ++i)
4228 {
4229 regno = EH_RETURN_DATA_REGNO (i);
4230 if (regno == INVALID_REGNUM)
4231 break;
4232
4233 /* Only for the first load.
4234 merge_sp_adjust_with_load holds the register load
4235 with which we will merge the sp adjustment. */
4236 if (merge_sp_adjust_with_load == 0
4237 && local_fsize == 0
4238 && VAL_14_BITS_P (-actual_fsize))
4239 merge_sp_adjust_with_load = regno;
4240 else
4241 load_reg (regno, offset, STACK_POINTER_REGNUM);
4242 offset += UNITS_PER_WORD;
4243 }
4244 }
4245
4246 for (i = 18; i >= 3; i--)
4247 {
4248 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4249 {
4250 /* Only for the first load.
4251 merge_sp_adjust_with_load holds the register load
4252 with which we will merge the sp adjustment. */
4253 if (merge_sp_adjust_with_load == 0
4254 && local_fsize == 0
4255 && VAL_14_BITS_P (-actual_fsize))
4256 merge_sp_adjust_with_load = i;
4257 else
4258 load_reg (i, offset, STACK_POINTER_REGNUM);
4259 offset += UNITS_PER_WORD;
4260 }
4261 }
4262 }
4263
4264 /* Align pointer properly (doubleword boundary). */
4265 offset = (offset + 7) & ~7;
4266
4267 /* FP register restores. */
4268 if (save_fregs)
4269 {
4270 /* Adjust the register to index off of. */
4271 if (frame_pointer_needed)
4272 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4273 else
4274 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4275
4276 /* Actually do the restores now. */
4277 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4278 if (df_regs_ever_live_p (i)
4279 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4280 {
4281 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4282 rtx dest = gen_rtx_REG (DFmode, i);
4283 emit_move_insn (dest, src);
4284 }
4285 }
4286
4287 /* Emit a blockage insn here to keep these insns from being moved to
4288 an earlier spot in the epilogue, or into the main instruction stream.
4289
4290 This is necessary as we must not cut the stack back before all the
4291 restores are finished. */
4292 emit_insn (gen_blockage ());
4293
4294 /* Reset stack pointer (and possibly frame pointer). The stack
4295 pointer is initially set to fp + 64 to avoid a race condition. */
4296 if (frame_pointer_needed)
4297 {
4298 rtx delta = GEN_INT (-64);
4299
4300 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4301 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4302 stack_pointer_rtx, delta));
4303 }
4304 /* If we were deferring a callee register restore, do it now. */
4305 else if (merge_sp_adjust_with_load)
4306 {
4307 rtx delta = GEN_INT (-actual_fsize);
4308 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4309
4310 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4311 }
4312 else if (actual_fsize != 0)
4313 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4314 - actual_fsize, 0);
4315
4316 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4317 frame greater than 8k), do so now. */
4318 if (ret_off != 0)
4319 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4320
4321 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4322 {
4323 rtx sa = EH_RETURN_STACKADJ_RTX;
4324
4325 emit_insn (gen_blockage ());
4326 emit_insn (TARGET_64BIT
4327 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4328 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4329 }
4330 }
4331
4332 rtx
4333 hppa_pic_save_rtx (void)
4334 {
4335 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4336 }
4337
4338 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4339 #define NO_DEFERRED_PROFILE_COUNTERS 0
4340 #endif
4341
4342
4343 /* Vector of funcdef numbers. */
4344 static VEC(int,heap) *funcdef_nos;
4345
4346 /* Output deferred profile counters. */
4347 static void
4348 output_deferred_profile_counters (void)
4349 {
4350 unsigned int i;
4351 int align, n;
4352
4353 if (VEC_empty (int, funcdef_nos))
4354 return;
4355
4356 switch_to_section (data_section);
4357 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4358 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4359
4360 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4361 {
4362 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4363 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4364 }
4365
4366 VEC_free (int, heap, funcdef_nos);
4367 }
4368
4369 void
4370 hppa_profile_hook (int label_no)
4371 {
4372 /* We use SImode for the address of the function in both 32 and
4373 64-bit code to avoid having to provide DImode versions of the
4374 lcla2 and load_offset_label_address insn patterns. */
4375 rtx reg = gen_reg_rtx (SImode);
4376 rtx label_rtx = gen_label_rtx ();
4377 rtx begin_label_rtx, call_insn;
4378 char begin_label_name[16];
4379
4380 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4381 label_no);
4382 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4383
4384 if (TARGET_64BIT)
4385 emit_move_insn (arg_pointer_rtx,
4386 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4387 GEN_INT (64)));
4388
4389 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4390
4391 /* The address of the function is loaded into %r25 with an instruction-
4392 relative sequence that avoids the use of relocations. The sequence
4393 is split so that the load_offset_label_address instruction can
4394 occupy the delay slot of the call to _mcount. */
4395 if (TARGET_PA_20)
4396 emit_insn (gen_lcla2 (reg, label_rtx));
4397 else
4398 emit_insn (gen_lcla1 (reg, label_rtx));
4399
4400 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4401 reg, begin_label_rtx, label_rtx));
4402
4403 #if !NO_DEFERRED_PROFILE_COUNTERS
4404 {
4405 rtx count_label_rtx, addr, r24;
4406 char count_label_name[16];
4407
4408 VEC_safe_push (int, heap, funcdef_nos, label_no);
4409 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4410 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4411
4412 addr = force_reg (Pmode, count_label_rtx);
4413 r24 = gen_rtx_REG (Pmode, 24);
4414 emit_move_insn (r24, addr);
4415
4416 call_insn =
4417 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4418 gen_rtx_SYMBOL_REF (Pmode,
4419 "_mcount")),
4420 GEN_INT (TARGET_64BIT ? 24 : 12)));
4421
4422 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4423 }
4424 #else
4425
4426 call_insn =
4427 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4428 gen_rtx_SYMBOL_REF (Pmode,
4429 "_mcount")),
4430 GEN_INT (TARGET_64BIT ? 16 : 8)));
4431
4432 #endif
4433
4434 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4435 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4436
4437 /* Indicate the _mcount call cannot throw, nor will it execute a
4438 non-local goto. */
4439 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4440 }
4441
4442 /* Fetch the return address for the frame COUNT steps up from
4443 the current frame, after the prologue. FRAMEADDR is the
4444 frame pointer of the COUNT frame.
4445
4446 We want to ignore any export stub remnants here. To handle this,
4447 we examine the code at the return address, and if it is an export
4448 stub, we return a memory rtx for the stub return address stored
4449 at frame-24.
4450
4451 The value returned is used in two different ways:
4452
4453 1. To find a function's caller.
4454
4455 2. To change the return address for a function.
4456
4457 This function handles most instances of case 1; however, it will
4458 fail if there are two levels of stubs to execute on the return
4459 path. The only way I believe that can happen is if the return value
4460 needs a parameter relocation, which never happens for C code.
4461
4462 This function handles most instances of case 2; however, it will
4463 fail if we did not originally have stub code on the return path
4464 but will need stub code on the new return path. This can happen if
4465 the caller & callee are both in the main program, but the new
4466 return location is in a shared library. */
4467
4468 rtx
4469 return_addr_rtx (int count, rtx frameaddr)
4470 {
4471 rtx label;
4472 rtx rp;
4473 rtx saved_rp;
4474 rtx ins;
4475
4476 /* Instruction stream at the normal return address for the export stub:
4477
4478 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4479 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4480 0x00011820 | stub+16: mtsp r1,sr0
4481 0xe0400002 | stub+20: be,n 0(sr0,rp)
4482
4483 0xe0400002 must be specified as -532676606 so that it won't be
4484 rejected as an invalid immediate operand on 64-bit hosts. */
4485
4486 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4487 int i;
4488
4489 if (count != 0)
4490 return NULL_RTX;
4491
4492 rp = get_hard_reg_initial_val (Pmode, 2);
4493
4494 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4495 return rp;
4496
4497 /* If there is no export stub then just use the value saved from
4498 the return pointer register. */
4499
4500 saved_rp = gen_reg_rtx (Pmode);
4501 emit_move_insn (saved_rp, rp);
4502
4503 /* Get pointer to the instruction stream. We have to mask out the
4504 privilege level from the two low order bits of the return address
4505 pointer here so that ins will point to the start of the first
4506 instruction that would have been executed if we returned. */
4507 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4508 label = gen_label_rtx ();
4509
4510 /* Check the instruction stream at the normal return address for the
4511 export stub. If it is an export stub, than our return address is
4512 really in -24[frameaddr]. */
4513
4514 for (i = 0; i < 3; i++)
4515 {
4516 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4517 rtx op1 = GEN_INT (insns[i]);
4518 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4519 }
4520
4521 /* Here we know that our return address points to an export
4522 stub. We don't want to return the address of the export stub,
4523 but rather the return address of the export stub. That return
4524 address is stored at -24[frameaddr]. */
4525
4526 emit_move_insn (saved_rp,
4527 gen_rtx_MEM (Pmode,
4528 memory_address (Pmode,
4529 plus_constant (frameaddr,
4530 -24))));
4531
4532 emit_label (label);
4533
4534 return saved_rp;
4535 }
4536
4537 void
4538 emit_bcond_fp (rtx operands[])
4539 {
4540 enum rtx_code code = GET_CODE (operands[0]);
4541 rtx operand0 = operands[1];
4542 rtx operand1 = operands[2];
4543 rtx label = operands[3];
4544
4545 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4546 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4547
4548 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4549 gen_rtx_IF_THEN_ELSE (VOIDmode,
4550 gen_rtx_fmt_ee (NE,
4551 VOIDmode,
4552 gen_rtx_REG (CCFPmode, 0),
4553 const0_rtx),
4554 gen_rtx_LABEL_REF (VOIDmode, label),
4555 pc_rtx)));
4556
4557 }
4558
4559 /* Adjust the cost of a scheduling dependency. Return the new cost of
4560 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4561
4562 static int
4563 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4564 {
4565 enum attr_type attr_type;
4566
4567 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4568 true dependencies as they are described with bypasses now. */
4569 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4570 return cost;
4571
4572 if (! recog_memoized (insn))
4573 return 0;
4574
4575 attr_type = get_attr_type (insn);
4576
4577 switch (REG_NOTE_KIND (link))
4578 {
4579 case REG_DEP_ANTI:
4580 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4581 cycles later. */
4582
4583 if (attr_type == TYPE_FPLOAD)
4584 {
4585 rtx pat = PATTERN (insn);
4586 rtx dep_pat = PATTERN (dep_insn);
4587 if (GET_CODE (pat) == PARALLEL)
4588 {
4589 /* This happens for the fldXs,mb patterns. */
4590 pat = XVECEXP (pat, 0, 0);
4591 }
4592 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4593 /* If this happens, we have to extend this to schedule
4594 optimally. Return 0 for now. */
4595 return 0;
4596
4597 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4598 {
4599 if (! recog_memoized (dep_insn))
4600 return 0;
4601 switch (get_attr_type (dep_insn))
4602 {
4603 case TYPE_FPALU:
4604 case TYPE_FPMULSGL:
4605 case TYPE_FPMULDBL:
4606 case TYPE_FPDIVSGL:
4607 case TYPE_FPDIVDBL:
4608 case TYPE_FPSQRTSGL:
4609 case TYPE_FPSQRTDBL:
4610 /* A fpload can't be issued until one cycle before a
4611 preceding arithmetic operation has finished if
4612 the target of the fpload is any of the sources
4613 (or destination) of the arithmetic operation. */
4614 return insn_default_latency (dep_insn) - 1;
4615
4616 default:
4617 return 0;
4618 }
4619 }
4620 }
4621 else if (attr_type == TYPE_FPALU)
4622 {
4623 rtx pat = PATTERN (insn);
4624 rtx dep_pat = PATTERN (dep_insn);
4625 if (GET_CODE (pat) == PARALLEL)
4626 {
4627 /* This happens for the fldXs,mb patterns. */
4628 pat = XVECEXP (pat, 0, 0);
4629 }
4630 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4631 /* If this happens, we have to extend this to schedule
4632 optimally. Return 0 for now. */
4633 return 0;
4634
4635 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4636 {
4637 if (! recog_memoized (dep_insn))
4638 return 0;
4639 switch (get_attr_type (dep_insn))
4640 {
4641 case TYPE_FPDIVSGL:
4642 case TYPE_FPDIVDBL:
4643 case TYPE_FPSQRTSGL:
4644 case TYPE_FPSQRTDBL:
4645 /* An ALU flop can't be issued until two cycles before a
4646 preceding divide or sqrt operation has finished if
4647 the target of the ALU flop is any of the sources
4648 (or destination) of the divide or sqrt operation. */
4649 return insn_default_latency (dep_insn) - 2;
4650
4651 default:
4652 return 0;
4653 }
4654 }
4655 }
4656
4657 /* For other anti dependencies, the cost is 0. */
4658 return 0;
4659
4660 case REG_DEP_OUTPUT:
4661 /* Output dependency; DEP_INSN writes a register that INSN writes some
4662 cycles later. */
4663 if (attr_type == TYPE_FPLOAD)
4664 {
4665 rtx pat = PATTERN (insn);
4666 rtx dep_pat = PATTERN (dep_insn);
4667 if (GET_CODE (pat) == PARALLEL)
4668 {
4669 /* This happens for the fldXs,mb patterns. */
4670 pat = XVECEXP (pat, 0, 0);
4671 }
4672 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4673 /* If this happens, we have to extend this to schedule
4674 optimally. Return 0 for now. */
4675 return 0;
4676
4677 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4678 {
4679 if (! recog_memoized (dep_insn))
4680 return 0;
4681 switch (get_attr_type (dep_insn))
4682 {
4683 case TYPE_FPALU:
4684 case TYPE_FPMULSGL:
4685 case TYPE_FPMULDBL:
4686 case TYPE_FPDIVSGL:
4687 case TYPE_FPDIVDBL:
4688 case TYPE_FPSQRTSGL:
4689 case TYPE_FPSQRTDBL:
4690 /* A fpload can't be issued until one cycle before a
4691 preceding arithmetic operation has finished if
4692 the target of the fpload is the destination of the
4693 arithmetic operation.
4694
4695 Exception: For PA7100LC, PA7200 and PA7300, the cost
4696 is 3 cycles, unless they bundle together. We also
4697 pay the penalty if the second insn is a fpload. */
4698 return insn_default_latency (dep_insn) - 1;
4699
4700 default:
4701 return 0;
4702 }
4703 }
4704 }
4705 else if (attr_type == TYPE_FPALU)
4706 {
4707 rtx pat = PATTERN (insn);
4708 rtx dep_pat = PATTERN (dep_insn);
4709 if (GET_CODE (pat) == PARALLEL)
4710 {
4711 /* This happens for the fldXs,mb patterns. */
4712 pat = XVECEXP (pat, 0, 0);
4713 }
4714 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4715 /* If this happens, we have to extend this to schedule
4716 optimally. Return 0 for now. */
4717 return 0;
4718
4719 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4720 {
4721 if (! recog_memoized (dep_insn))
4722 return 0;
4723 switch (get_attr_type (dep_insn))
4724 {
4725 case TYPE_FPDIVSGL:
4726 case TYPE_FPDIVDBL:
4727 case TYPE_FPSQRTSGL:
4728 case TYPE_FPSQRTDBL:
4729 /* An ALU flop can't be issued until two cycles before a
4730 preceding divide or sqrt operation has finished if
4731 the target of the ALU flop is also the target of
4732 the divide or sqrt operation. */
4733 return insn_default_latency (dep_insn) - 2;
4734
4735 default:
4736 return 0;
4737 }
4738 }
4739 }
4740
4741 /* For other output dependencies, the cost is 0. */
4742 return 0;
4743
4744 default:
4745 gcc_unreachable ();
4746 }
4747 }
4748
4749 /* Adjust scheduling priorities. We use this to try and keep addil
4750 and the next use of %r1 close together. */
4751 static int
4752 pa_adjust_priority (rtx insn, int priority)
4753 {
4754 rtx set = single_set (insn);
4755 rtx src, dest;
4756 if (set)
4757 {
4758 src = SET_SRC (set);
4759 dest = SET_DEST (set);
4760 if (GET_CODE (src) == LO_SUM
4761 && symbolic_operand (XEXP (src, 1), VOIDmode)
4762 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4763 priority >>= 3;
4764
4765 else if (GET_CODE (src) == MEM
4766 && GET_CODE (XEXP (src, 0)) == LO_SUM
4767 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4768 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4769 priority >>= 1;
4770
4771 else if (GET_CODE (dest) == MEM
4772 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4773 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4774 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4775 priority >>= 3;
4776 }
4777 return priority;
4778 }
4779
4780 /* The 700 can only issue a single insn at a time.
4781 The 7XXX processors can issue two insns at a time.
4782 The 8000 can issue 4 insns at a time. */
4783 static int
4784 pa_issue_rate (void)
4785 {
4786 switch (pa_cpu)
4787 {
4788 case PROCESSOR_700: return 1;
4789 case PROCESSOR_7100: return 2;
4790 case PROCESSOR_7100LC: return 2;
4791 case PROCESSOR_7200: return 2;
4792 case PROCESSOR_7300: return 2;
4793 case PROCESSOR_8000: return 4;
4794
4795 default:
4796 gcc_unreachable ();
4797 }
4798 }
4799
4800
4801
4802 /* Return any length adjustment needed by INSN which already has its length
4803 computed as LENGTH. Return zero if no adjustment is necessary.
4804
4805 For the PA: function calls, millicode calls, and backwards short
4806 conditional branches with unfilled delay slots need an adjustment by +1
4807 (to account for the NOP which will be inserted into the instruction stream).
4808
4809 Also compute the length of an inline block move here as it is too
4810 complicated to express as a length attribute in pa.md. */
4811 int
4812 pa_adjust_insn_length (rtx insn, int length)
4813 {
4814 rtx pat = PATTERN (insn);
4815
4816 /* Jumps inside switch tables which have unfilled delay slots need
4817 adjustment. */
4818 if (GET_CODE (insn) == JUMP_INSN
4819 && GET_CODE (pat) == PARALLEL
4820 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4821 return 4;
4822 /* Millicode insn with an unfilled delay slot. */
4823 else if (GET_CODE (insn) == INSN
4824 && GET_CODE (pat) != SEQUENCE
4825 && GET_CODE (pat) != USE
4826 && GET_CODE (pat) != CLOBBER
4827 && get_attr_type (insn) == TYPE_MILLI)
4828 return 4;
4829 /* Block move pattern. */
4830 else if (GET_CODE (insn) == INSN
4831 && GET_CODE (pat) == PARALLEL
4832 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4833 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4834 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4835 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4836 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4837 return compute_movmem_length (insn) - 4;
4838 /* Block clear pattern. */
4839 else if (GET_CODE (insn) == INSN
4840 && GET_CODE (pat) == PARALLEL
4841 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4842 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4843 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4844 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4845 return compute_clrmem_length (insn) - 4;
4846 /* Conditional branch with an unfilled delay slot. */
4847 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4848 {
4849 /* Adjust a short backwards conditional with an unfilled delay slot. */
4850 if (GET_CODE (pat) == SET
4851 && length == 4
4852 && JUMP_LABEL (insn) != NULL_RTX
4853 && ! forward_branch_p (insn))
4854 return 4;
4855 else if (GET_CODE (pat) == PARALLEL
4856 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4857 && length == 4)
4858 return 4;
4859 /* Adjust dbra insn with short backwards conditional branch with
4860 unfilled delay slot -- only for case where counter is in a
4861 general register register. */
4862 else if (GET_CODE (pat) == PARALLEL
4863 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4864 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4865 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4866 && length == 4
4867 && ! forward_branch_p (insn))
4868 return 4;
4869 else
4870 return 0;
4871 }
4872 return 0;
4873 }
4874
4875 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4876
4877 static bool
4878 pa_print_operand_punct_valid_p (unsigned char code)
4879 {
4880 if (code == '@'
4881 || code == '#'
4882 || code == '*'
4883 || code == '^')
4884 return true;
4885
4886 return false;
4887 }
4888
4889 /* Print operand X (an rtx) in assembler syntax to file FILE.
4890 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4891 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4892
4893 void
4894 print_operand (FILE *file, rtx x, int code)
4895 {
4896 switch (code)
4897 {
4898 case '#':
4899 /* Output a 'nop' if there's nothing for the delay slot. */
4900 if (dbr_sequence_length () == 0)
4901 fputs ("\n\tnop", file);
4902 return;
4903 case '*':
4904 /* Output a nullification completer if there's nothing for the */
4905 /* delay slot or nullification is requested. */
4906 if (dbr_sequence_length () == 0 ||
4907 (final_sequence &&
4908 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4909 fputs (",n", file);
4910 return;
4911 case 'R':
4912 /* Print out the second register name of a register pair.
4913 I.e., R (6) => 7. */
4914 fputs (reg_names[REGNO (x) + 1], file);
4915 return;
4916 case 'r':
4917 /* A register or zero. */
4918 if (x == const0_rtx
4919 || (x == CONST0_RTX (DFmode))
4920 || (x == CONST0_RTX (SFmode)))
4921 {
4922 fputs ("%r0", file);
4923 return;
4924 }
4925 else
4926 break;
4927 case 'f':
4928 /* A register or zero (floating point). */
4929 if (x == const0_rtx
4930 || (x == CONST0_RTX (DFmode))
4931 || (x == CONST0_RTX (SFmode)))
4932 {
4933 fputs ("%fr0", file);
4934 return;
4935 }
4936 else
4937 break;
4938 case 'A':
4939 {
4940 rtx xoperands[2];
4941
4942 xoperands[0] = XEXP (XEXP (x, 0), 0);
4943 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4944 output_global_address (file, xoperands[1], 0);
4945 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4946 return;
4947 }
4948
4949 case 'C': /* Plain (C)ondition */
4950 case 'X':
4951 switch (GET_CODE (x))
4952 {
4953 case EQ:
4954 fputs ("=", file); break;
4955 case NE:
4956 fputs ("<>", file); break;
4957 case GT:
4958 fputs (">", file); break;
4959 case GE:
4960 fputs (">=", file); break;
4961 case GEU:
4962 fputs (">>=", file); break;
4963 case GTU:
4964 fputs (">>", file); break;
4965 case LT:
4966 fputs ("<", file); break;
4967 case LE:
4968 fputs ("<=", file); break;
4969 case LEU:
4970 fputs ("<<=", file); break;
4971 case LTU:
4972 fputs ("<<", file); break;
4973 default:
4974 gcc_unreachable ();
4975 }
4976 return;
4977 case 'N': /* Condition, (N)egated */
4978 switch (GET_CODE (x))
4979 {
4980 case EQ:
4981 fputs ("<>", file); break;
4982 case NE:
4983 fputs ("=", file); break;
4984 case GT:
4985 fputs ("<=", file); break;
4986 case GE:
4987 fputs ("<", file); break;
4988 case GEU:
4989 fputs ("<<", file); break;
4990 case GTU:
4991 fputs ("<<=", file); break;
4992 case LT:
4993 fputs (">=", file); break;
4994 case LE:
4995 fputs (">", file); break;
4996 case LEU:
4997 fputs (">>", file); break;
4998 case LTU:
4999 fputs (">>=", file); break;
5000 default:
5001 gcc_unreachable ();
5002 }
5003 return;
5004 /* For floating point comparisons. Note that the output
5005 predicates are the complement of the desired mode. The
5006 conditions for GT, GE, LT, LE and LTGT cause an invalid
5007 operation exception if the result is unordered and this
5008 exception is enabled in the floating-point status register. */
5009 case 'Y':
5010 switch (GET_CODE (x))
5011 {
5012 case EQ:
5013 fputs ("!=", file); break;
5014 case NE:
5015 fputs ("=", file); break;
5016 case GT:
5017 fputs ("!>", file); break;
5018 case GE:
5019 fputs ("!>=", file); break;
5020 case LT:
5021 fputs ("!<", file); break;
5022 case LE:
5023 fputs ("!<=", file); break;
5024 case LTGT:
5025 fputs ("!<>", file); break;
5026 case UNLE:
5027 fputs ("!?<=", file); break;
5028 case UNLT:
5029 fputs ("!?<", file); break;
5030 case UNGE:
5031 fputs ("!?>=", file); break;
5032 case UNGT:
5033 fputs ("!?>", file); break;
5034 case UNEQ:
5035 fputs ("!?=", file); break;
5036 case UNORDERED:
5037 fputs ("!?", file); break;
5038 case ORDERED:
5039 fputs ("?", file); break;
5040 default:
5041 gcc_unreachable ();
5042 }
5043 return;
5044 case 'S': /* Condition, operands are (S)wapped. */
5045 switch (GET_CODE (x))
5046 {
5047 case EQ:
5048 fputs ("=", file); break;
5049 case NE:
5050 fputs ("<>", file); break;
5051 case GT:
5052 fputs ("<", file); break;
5053 case GE:
5054 fputs ("<=", file); break;
5055 case GEU:
5056 fputs ("<<=", file); break;
5057 case GTU:
5058 fputs ("<<", file); break;
5059 case LT:
5060 fputs (">", file); break;
5061 case LE:
5062 fputs (">=", file); break;
5063 case LEU:
5064 fputs (">>=", file); break;
5065 case LTU:
5066 fputs (">>", file); break;
5067 default:
5068 gcc_unreachable ();
5069 }
5070 return;
5071 case 'B': /* Condition, (B)oth swapped and negate. */
5072 switch (GET_CODE (x))
5073 {
5074 case EQ:
5075 fputs ("<>", file); break;
5076 case NE:
5077 fputs ("=", file); break;
5078 case GT:
5079 fputs (">=", file); break;
5080 case GE:
5081 fputs (">", file); break;
5082 case GEU:
5083 fputs (">>", file); break;
5084 case GTU:
5085 fputs (">>=", file); break;
5086 case LT:
5087 fputs ("<=", file); break;
5088 case LE:
5089 fputs ("<", file); break;
5090 case LEU:
5091 fputs ("<<", file); break;
5092 case LTU:
5093 fputs ("<<=", file); break;
5094 default:
5095 gcc_unreachable ();
5096 }
5097 return;
5098 case 'k':
5099 gcc_assert (GET_CODE (x) == CONST_INT);
5100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5101 return;
5102 case 'Q':
5103 gcc_assert (GET_CODE (x) == CONST_INT);
5104 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5105 return;
5106 case 'L':
5107 gcc_assert (GET_CODE (x) == CONST_INT);
5108 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5109 return;
5110 case 'O':
5111 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5112 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5113 return;
5114 case 'p':
5115 gcc_assert (GET_CODE (x) == CONST_INT);
5116 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5117 return;
5118 case 'P':
5119 gcc_assert (GET_CODE (x) == CONST_INT);
5120 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5121 return;
5122 case 'I':
5123 if (GET_CODE (x) == CONST_INT)
5124 fputs ("i", file);
5125 return;
5126 case 'M':
5127 case 'F':
5128 switch (GET_CODE (XEXP (x, 0)))
5129 {
5130 case PRE_DEC:
5131 case PRE_INC:
5132 if (ASSEMBLER_DIALECT == 0)
5133 fputs ("s,mb", file);
5134 else
5135 fputs (",mb", file);
5136 break;
5137 case POST_DEC:
5138 case POST_INC:
5139 if (ASSEMBLER_DIALECT == 0)
5140 fputs ("s,ma", file);
5141 else
5142 fputs (",ma", file);
5143 break;
5144 case PLUS:
5145 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5146 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5147 {
5148 if (ASSEMBLER_DIALECT == 0)
5149 fputs ("x", file);
5150 }
5151 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5152 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5153 {
5154 if (ASSEMBLER_DIALECT == 0)
5155 fputs ("x,s", file);
5156 else
5157 fputs (",s", file);
5158 }
5159 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5160 fputs ("s", file);
5161 break;
5162 default:
5163 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5164 fputs ("s", file);
5165 break;
5166 }
5167 return;
5168 case 'G':
5169 output_global_address (file, x, 0);
5170 return;
5171 case 'H':
5172 output_global_address (file, x, 1);
5173 return;
5174 case 0: /* Don't do anything special */
5175 break;
5176 case 'Z':
5177 {
5178 unsigned op[3];
5179 compute_zdepwi_operands (INTVAL (x), op);
5180 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5181 return;
5182 }
5183 case 'z':
5184 {
5185 unsigned op[3];
5186 compute_zdepdi_operands (INTVAL (x), op);
5187 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5188 return;
5189 }
5190 case 'c':
5191 /* We can get here from a .vtable_inherit due to our
5192 CONSTANT_ADDRESS_P rejecting perfectly good constant
5193 addresses. */
5194 break;
5195 default:
5196 gcc_unreachable ();
5197 }
5198 if (GET_CODE (x) == REG)
5199 {
5200 fputs (reg_names [REGNO (x)], file);
5201 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5202 {
5203 fputs ("R", file);
5204 return;
5205 }
5206 if (FP_REG_P (x)
5207 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5208 && (REGNO (x) & 1) == 0)
5209 fputs ("L", file);
5210 }
5211 else if (GET_CODE (x) == MEM)
5212 {
5213 int size = GET_MODE_SIZE (GET_MODE (x));
5214 rtx base = NULL_RTX;
5215 switch (GET_CODE (XEXP (x, 0)))
5216 {
5217 case PRE_DEC:
5218 case POST_DEC:
5219 base = XEXP (XEXP (x, 0), 0);
5220 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5221 break;
5222 case PRE_INC:
5223 case POST_INC:
5224 base = XEXP (XEXP (x, 0), 0);
5225 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5226 break;
5227 case PLUS:
5228 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5229 fprintf (file, "%s(%s)",
5230 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5231 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5232 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5233 fprintf (file, "%s(%s)",
5234 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5235 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5236 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5237 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5238 {
5239 /* Because the REG_POINTER flag can get lost during reload,
5240 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5241 index and base registers in the combined move patterns. */
5242 rtx base = XEXP (XEXP (x, 0), 1);
5243 rtx index = XEXP (XEXP (x, 0), 0);
5244
5245 fprintf (file, "%s(%s)",
5246 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5247 }
5248 else
5249 output_address (XEXP (x, 0));
5250 break;
5251 default:
5252 output_address (XEXP (x, 0));
5253 break;
5254 }
5255 }
5256 else
5257 output_addr_const (file, x);
5258 }
5259
5260 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5261
5262 void
5263 output_global_address (FILE *file, rtx x, int round_constant)
5264 {
5265
5266 /* Imagine (high (const (plus ...))). */
5267 if (GET_CODE (x) == HIGH)
5268 x = XEXP (x, 0);
5269
5270 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5271 output_addr_const (file, x);
5272 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5273 {
5274 output_addr_const (file, x);
5275 fputs ("-$global$", file);
5276 }
5277 else if (GET_CODE (x) == CONST)
5278 {
5279 const char *sep = "";
5280 int offset = 0; /* assembler wants -$global$ at end */
5281 rtx base = NULL_RTX;
5282
5283 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5284 {
5285 case SYMBOL_REF:
5286 base = XEXP (XEXP (x, 0), 0);
5287 output_addr_const (file, base);
5288 break;
5289 case CONST_INT:
5290 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5291 break;
5292 default:
5293 gcc_unreachable ();
5294 }
5295
5296 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5297 {
5298 case SYMBOL_REF:
5299 base = XEXP (XEXP (x, 0), 1);
5300 output_addr_const (file, base);
5301 break;
5302 case CONST_INT:
5303 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5304 break;
5305 default:
5306 gcc_unreachable ();
5307 }
5308
5309 /* How bogus. The compiler is apparently responsible for
5310 rounding the constant if it uses an LR field selector.
5311
5312 The linker and/or assembler seem a better place since
5313 they have to do this kind of thing already.
5314
5315 If we fail to do this, HP's optimizing linker may eliminate
5316 an addil, but not update the ldw/stw/ldo instruction that
5317 uses the result of the addil. */
5318 if (round_constant)
5319 offset = ((offset + 0x1000) & ~0x1fff);
5320
5321 switch (GET_CODE (XEXP (x, 0)))
5322 {
5323 case PLUS:
5324 if (offset < 0)
5325 {
5326 offset = -offset;
5327 sep = "-";
5328 }
5329 else
5330 sep = "+";
5331 break;
5332
5333 case MINUS:
5334 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5335 sep = "-";
5336 break;
5337
5338 default:
5339 gcc_unreachable ();
5340 }
5341
5342 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5343 fputs ("-$global$", file);
5344 if (offset)
5345 fprintf (file, "%s%d", sep, offset);
5346 }
5347 else
5348 output_addr_const (file, x);
5349 }
5350
5351 /* Output boilerplate text to appear at the beginning of the file.
5352 There are several possible versions. */
5353 #define aputs(x) fputs(x, asm_out_file)
5354 static inline void
5355 pa_file_start_level (void)
5356 {
5357 if (TARGET_64BIT)
5358 aputs ("\t.LEVEL 2.0w\n");
5359 else if (TARGET_PA_20)
5360 aputs ("\t.LEVEL 2.0\n");
5361 else if (TARGET_PA_11)
5362 aputs ("\t.LEVEL 1.1\n");
5363 else
5364 aputs ("\t.LEVEL 1.0\n");
5365 }
5366
5367 static inline void
5368 pa_file_start_space (int sortspace)
5369 {
5370 aputs ("\t.SPACE $PRIVATE$");
5371 if (sortspace)
5372 aputs (",SORT=16");
5373 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5374 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5375 "\n\t.SPACE $TEXT$");
5376 if (sortspace)
5377 aputs (",SORT=8");
5378 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5379 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5380 }
5381
5382 static inline void
5383 pa_file_start_file (int want_version)
5384 {
5385 if (write_symbols != NO_DEBUG)
5386 {
5387 output_file_directive (asm_out_file, main_input_filename);
5388 if (want_version)
5389 aputs ("\t.version\t\"01.01\"\n");
5390 }
5391 }
5392
5393 static inline void
5394 pa_file_start_mcount (const char *aswhat)
5395 {
5396 if (profile_flag)
5397 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5398 }
5399
5400 static void
5401 pa_elf_file_start (void)
5402 {
5403 pa_file_start_level ();
5404 pa_file_start_mcount ("ENTRY");
5405 pa_file_start_file (0);
5406 }
5407
5408 static void
5409 pa_som_file_start (void)
5410 {
5411 pa_file_start_level ();
5412 pa_file_start_space (0);
5413 aputs ("\t.IMPORT $global$,DATA\n"
5414 "\t.IMPORT $$dyncall,MILLICODE\n");
5415 pa_file_start_mcount ("CODE");
5416 pa_file_start_file (0);
5417 }
5418
5419 static void
5420 pa_linux_file_start (void)
5421 {
5422 pa_file_start_file (1);
5423 pa_file_start_level ();
5424 pa_file_start_mcount ("CODE");
5425 }
5426
5427 static void
5428 pa_hpux64_gas_file_start (void)
5429 {
5430 pa_file_start_level ();
5431 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5432 if (profile_flag)
5433 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5434 #endif
5435 pa_file_start_file (1);
5436 }
5437
5438 static void
5439 pa_hpux64_hpas_file_start (void)
5440 {
5441 pa_file_start_level ();
5442 pa_file_start_space (1);
5443 pa_file_start_mcount ("CODE");
5444 pa_file_start_file (0);
5445 }
5446 #undef aputs
5447
5448 /* Search the deferred plabel list for SYMBOL and return its internal
5449 label. If an entry for SYMBOL is not found, a new entry is created. */
5450
5451 rtx
5452 get_deferred_plabel (rtx symbol)
5453 {
5454 const char *fname = XSTR (symbol, 0);
5455 size_t i;
5456
5457 /* See if we have already put this function on the list of deferred
5458 plabels. This list is generally small, so a liner search is not
5459 too ugly. If it proves too slow replace it with something faster. */
5460 for (i = 0; i < n_deferred_plabels; i++)
5461 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5462 break;
5463
5464 /* If the deferred plabel list is empty, or this entry was not found
5465 on the list, create a new entry on the list. */
5466 if (deferred_plabels == NULL || i == n_deferred_plabels)
5467 {
5468 tree id;
5469
5470 if (deferred_plabels == 0)
5471 deferred_plabels = ggc_alloc_deferred_plabel ();
5472 else
5473 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5474 deferred_plabels,
5475 n_deferred_plabels + 1);
5476
5477 i = n_deferred_plabels++;
5478 deferred_plabels[i].internal_label = gen_label_rtx ();
5479 deferred_plabels[i].symbol = symbol;
5480
5481 /* Gross. We have just implicitly taken the address of this
5482 function. Mark it in the same manner as assemble_name. */
5483 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5484 if (id)
5485 mark_referenced (id);
5486 }
5487
5488 return deferred_plabels[i].internal_label;
5489 }
5490
5491 static void
5492 output_deferred_plabels (void)
5493 {
5494 size_t i;
5495
5496 /* If we have some deferred plabels, then we need to switch into the
5497 data or readonly data section, and align it to a 4 byte boundary
5498 before outputting the deferred plabels. */
5499 if (n_deferred_plabels)
5500 {
5501 switch_to_section (flag_pic ? data_section : readonly_data_section);
5502 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5503 }
5504
5505 /* Now output the deferred plabels. */
5506 for (i = 0; i < n_deferred_plabels; i++)
5507 {
5508 targetm.asm_out.internal_label (asm_out_file, "L",
5509 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5510 assemble_integer (deferred_plabels[i].symbol,
5511 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5512 }
5513 }
5514
5515 #if HPUX_LONG_DOUBLE_LIBRARY
5516 /* Initialize optabs to point to HPUX long double emulation routines. */
5517 static void
5518 pa_hpux_init_libfuncs (void)
5519 {
5520 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5521 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5522 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5523 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5524 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5525 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5526 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5527 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5528 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5529
5530 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5531 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5532 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5533 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5534 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5535 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5536 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5537
5538 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5539 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5540 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5541 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5542
5543 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5544 ? "__U_Qfcnvfxt_quad_to_sgl"
5545 : "_U_Qfcnvfxt_quad_to_sgl");
5546 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5547 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5548 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5549
5550 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5551 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5552 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5553 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5554 }
5555 #endif
5556
5557 /* HP's millicode routines mean something special to the assembler.
5558 Keep track of which ones we have used. */
5559
5560 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5561 static void import_milli (enum millicodes);
5562 static char imported[(int) end1000];
5563 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5564 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5565 #define MILLI_START 10
5566
5567 static void
5568 import_milli (enum millicodes code)
5569 {
5570 char str[sizeof (import_string)];
5571
5572 if (!imported[(int) code])
5573 {
5574 imported[(int) code] = 1;
5575 strcpy (str, import_string);
5576 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5577 output_asm_insn (str, 0);
5578 }
5579 }
5580
5581 /* The register constraints have put the operands and return value in
5582 the proper registers. */
5583
5584 const char *
5585 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5586 {
5587 import_milli (mulI);
5588 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5589 }
5590
5591 /* Emit the rtl for doing a division by a constant. */
5592
5593 /* Do magic division millicodes exist for this value? */
5594 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5595
5596 /* We'll use an array to keep track of the magic millicodes and
5597 whether or not we've used them already. [n][0] is signed, [n][1] is
5598 unsigned. */
5599
5600 static int div_milli[16][2];
5601
5602 int
5603 emit_hpdiv_const (rtx *operands, int unsignedp)
5604 {
5605 if (GET_CODE (operands[2]) == CONST_INT
5606 && INTVAL (operands[2]) > 0
5607 && INTVAL (operands[2]) < 16
5608 && magic_milli[INTVAL (operands[2])])
5609 {
5610 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5611
5612 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5613 emit
5614 (gen_rtx_PARALLEL
5615 (VOIDmode,
5616 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5617 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5618 SImode,
5619 gen_rtx_REG (SImode, 26),
5620 operands[2])),
5621 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5622 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5623 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5624 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5625 gen_rtx_CLOBBER (VOIDmode, ret))));
5626 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5627 return 1;
5628 }
5629 return 0;
5630 }
5631
5632 const char *
5633 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5634 {
5635 int divisor;
5636
5637 /* If the divisor is a constant, try to use one of the special
5638 opcodes .*/
5639 if (GET_CODE (operands[0]) == CONST_INT)
5640 {
5641 static char buf[100];
5642 divisor = INTVAL (operands[0]);
5643 if (!div_milli[divisor][unsignedp])
5644 {
5645 div_milli[divisor][unsignedp] = 1;
5646 if (unsignedp)
5647 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5648 else
5649 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5650 }
5651 if (unsignedp)
5652 {
5653 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5654 INTVAL (operands[0]));
5655 return output_millicode_call (insn,
5656 gen_rtx_SYMBOL_REF (SImode, buf));
5657 }
5658 else
5659 {
5660 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5661 INTVAL (operands[0]));
5662 return output_millicode_call (insn,
5663 gen_rtx_SYMBOL_REF (SImode, buf));
5664 }
5665 }
5666 /* Divisor isn't a special constant. */
5667 else
5668 {
5669 if (unsignedp)
5670 {
5671 import_milli (divU);
5672 return output_millicode_call (insn,
5673 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5674 }
5675 else
5676 {
5677 import_milli (divI);
5678 return output_millicode_call (insn,
5679 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5680 }
5681 }
5682 }
5683
5684 /* Output a $$rem millicode to do mod. */
5685
5686 const char *
5687 output_mod_insn (int unsignedp, rtx insn)
5688 {
5689 if (unsignedp)
5690 {
5691 import_milli (remU);
5692 return output_millicode_call (insn,
5693 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5694 }
5695 else
5696 {
5697 import_milli (remI);
5698 return output_millicode_call (insn,
5699 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5700 }
5701 }
5702
5703 void
5704 output_arg_descriptor (rtx call_insn)
5705 {
5706 const char *arg_regs[4];
5707 enum machine_mode arg_mode;
5708 rtx link;
5709 int i, output_flag = 0;
5710 int regno;
5711
5712 /* We neither need nor want argument location descriptors for the
5713 64bit runtime environment or the ELF32 environment. */
5714 if (TARGET_64BIT || TARGET_ELF32)
5715 return;
5716
5717 for (i = 0; i < 4; i++)
5718 arg_regs[i] = 0;
5719
5720 /* Specify explicitly that no argument relocations should take place
5721 if using the portable runtime calling conventions. */
5722 if (TARGET_PORTABLE_RUNTIME)
5723 {
5724 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5725 asm_out_file);
5726 return;
5727 }
5728
5729 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5730 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5731 link; link = XEXP (link, 1))
5732 {
5733 rtx use = XEXP (link, 0);
5734
5735 if (! (GET_CODE (use) == USE
5736 && GET_CODE (XEXP (use, 0)) == REG
5737 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5738 continue;
5739
5740 arg_mode = GET_MODE (XEXP (use, 0));
5741 regno = REGNO (XEXP (use, 0));
5742 if (regno >= 23 && regno <= 26)
5743 {
5744 arg_regs[26 - regno] = "GR";
5745 if (arg_mode == DImode)
5746 arg_regs[25 - regno] = "GR";
5747 }
5748 else if (regno >= 32 && regno <= 39)
5749 {
5750 if (arg_mode == SFmode)
5751 arg_regs[(regno - 32) / 2] = "FR";
5752 else
5753 {
5754 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5755 arg_regs[(regno - 34) / 2] = "FR";
5756 arg_regs[(regno - 34) / 2 + 1] = "FU";
5757 #else
5758 arg_regs[(regno - 34) / 2] = "FU";
5759 arg_regs[(regno - 34) / 2 + 1] = "FR";
5760 #endif
5761 }
5762 }
5763 }
5764 fputs ("\t.CALL ", asm_out_file);
5765 for (i = 0; i < 4; i++)
5766 {
5767 if (arg_regs[i])
5768 {
5769 if (output_flag++)
5770 fputc (',', asm_out_file);
5771 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5772 }
5773 }
5774 fputc ('\n', asm_out_file);
5775 }
5776 \f
5777 /* Inform reload about cases where moving X with a mode MODE to a register in
5778 RCLASS requires an extra scratch or immediate register. Return the class
5779 needed for the immediate register. */
5780
5781 static reg_class_t
5782 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5783 enum machine_mode mode, secondary_reload_info *sri)
5784 {
5785 int regno;
5786 enum reg_class rclass = (enum reg_class) rclass_i;
5787
5788 /* Handle the easy stuff first. */
5789 if (rclass == R1_REGS)
5790 return NO_REGS;
5791
5792 if (REG_P (x))
5793 {
5794 regno = REGNO (x);
5795 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5796 return NO_REGS;
5797 }
5798 else
5799 regno = -1;
5800
5801 /* If we have something like (mem (mem (...)), we can safely assume the
5802 inner MEM will end up in a general register after reloading, so there's
5803 no need for a secondary reload. */
5804 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5805 return NO_REGS;
5806
5807 /* Trying to load a constant into a FP register during PIC code
5808 generation requires %r1 as a scratch register. */
5809 if (flag_pic
5810 && (mode == SImode || mode == DImode)
5811 && FP_REG_CLASS_P (rclass)
5812 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5813 {
5814 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5815 : CODE_FOR_reload_indi_r1);
5816 return NO_REGS;
5817 }
5818
5819 /* Secondary reloads of symbolic operands require %r1 as a scratch
5820 register when we're generating PIC code and when the operand isn't
5821 readonly. */
5822 if (symbolic_expression_p (x))
5823 {
5824 if (GET_CODE (x) == HIGH)
5825 x = XEXP (x, 0);
5826
5827 if (flag_pic || !read_only_operand (x, VOIDmode))
5828 {
5829 gcc_assert (mode == SImode || mode == DImode);
5830 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5831 : CODE_FOR_reload_indi_r1);
5832 return NO_REGS;
5833 }
5834 }
5835
5836 /* Profiling showed the PA port spends about 1.3% of its compilation
5837 time in true_regnum from calls inside pa_secondary_reload_class. */
5838 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5839 regno = true_regnum (x);
5840
5841 /* In order to allow 14-bit displacements in integer loads and stores,
5842 we need to prevent reload from generating out of range integer mode
5843 loads and stores to the floating point registers. Previously, we
5844 used to call for a secondary reload and have emit_move_sequence()
5845 fix the instruction sequence. However, reload occasionally wouldn't
5846 generate the reload and we would end up with an invalid REG+D memory
5847 address. So, now we use an intermediate general register for most
5848 memory loads and stores. */
5849 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5850 && GET_MODE_CLASS (mode) == MODE_INT
5851 && FP_REG_CLASS_P (rclass))
5852 {
5853 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5854 the secondary reload needed for a pseudo. It never passes a
5855 REG+D address. */
5856 if (GET_CODE (x) == MEM)
5857 {
5858 x = XEXP (x, 0);
5859
5860 /* We don't need an intermediate for indexed and LO_SUM DLT
5861 memory addresses. When INT14_OK_STRICT is true, it might
5862 appear that we could directly allow register indirect
5863 memory addresses. However, this doesn't work because we
5864 don't support SUBREGs in floating-point register copies
5865 and reload doesn't tell us when it's going to use a SUBREG. */
5866 if (IS_INDEX_ADDR_P (x)
5867 || IS_LO_SUM_DLT_ADDR_P (x))
5868 return NO_REGS;
5869
5870 /* Otherwise, we need an intermediate general register. */
5871 return GENERAL_REGS;
5872 }
5873
5874 /* Request a secondary reload with a general scratch register
5875 for everthing else. ??? Could symbolic operands be handled
5876 directly when generating non-pic PA 2.0 code? */
5877 sri->icode = (in_p
5878 ? direct_optab_handler (reload_in_optab, mode)
5879 : direct_optab_handler (reload_out_optab, mode));
5880 return NO_REGS;
5881 }
5882
5883 /* A SAR<->FP register copy requires an intermediate general register
5884 and secondary memory. We need a secondary reload with a general
5885 scratch register for spills. */
5886 if (rclass == SHIFT_REGS)
5887 {
5888 /* Handle spill. */
5889 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
5890 {
5891 sri->icode = (in_p
5892 ? direct_optab_handler (reload_in_optab, mode)
5893 : direct_optab_handler (reload_out_optab, mode));
5894 return NO_REGS;
5895 }
5896
5897 /* Handle FP copy. */
5898 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
5899 return GENERAL_REGS;
5900 }
5901
5902 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5903 && REGNO_REG_CLASS (regno) == SHIFT_REGS
5904 && FP_REG_CLASS_P (rclass))
5905 return GENERAL_REGS;
5906
5907 return NO_REGS;
5908 }
5909
5910 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5911 is only marked as live on entry by df-scan when it is a fixed
5912 register. It isn't a fixed register in the 64-bit runtime,
5913 so we need to mark it here. */
5914
5915 static void
5916 pa_extra_live_on_entry (bitmap regs)
5917 {
5918 if (TARGET_64BIT)
5919 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5920 }
5921
5922 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5923 to prevent it from being deleted. */
5924
5925 rtx
5926 pa_eh_return_handler_rtx (void)
5927 {
5928 rtx tmp;
5929
5930 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
5931 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5932 tmp = gen_rtx_MEM (word_mode, tmp);
5933 tmp->volatil = 1;
5934 return tmp;
5935 }
5936
5937 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5938 by invisible reference. As a GCC extension, we also pass anything
5939 with a zero or variable size by reference.
5940
5941 The 64-bit runtime does not describe passing any types by invisible
5942 reference. The internals of GCC can't currently handle passing
5943 empty structures, and zero or variable length arrays when they are
5944 not passed entirely on the stack or by reference. Thus, as a GCC
5945 extension, we pass these types by reference. The HP compiler doesn't
5946 support these types, so hopefully there shouldn't be any compatibility
5947 issues. This may have to be revisited when HP releases a C99 compiler
5948 or updates the ABI. */
5949
5950 static bool
5951 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5952 enum machine_mode mode, const_tree type,
5953 bool named ATTRIBUTE_UNUSED)
5954 {
5955 HOST_WIDE_INT size;
5956
5957 if (type)
5958 size = int_size_in_bytes (type);
5959 else
5960 size = GET_MODE_SIZE (mode);
5961
5962 if (TARGET_64BIT)
5963 return size <= 0;
5964 else
5965 return size <= 0 || size > 8;
5966 }
5967
5968 enum direction
5969 function_arg_padding (enum machine_mode mode, const_tree type)
5970 {
5971 if (mode == BLKmode
5972 || (TARGET_64BIT
5973 && type
5974 && (AGGREGATE_TYPE_P (type)
5975 || TREE_CODE (type) == COMPLEX_TYPE
5976 || TREE_CODE (type) == VECTOR_TYPE)))
5977 {
5978 /* Return none if justification is not required. */
5979 if (type
5980 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5981 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5982 return none;
5983
5984 /* The directions set here are ignored when a BLKmode argument larger
5985 than a word is placed in a register. Different code is used for
5986 the stack and registers. This makes it difficult to have a
5987 consistent data representation for both the stack and registers.
5988 For both runtimes, the justification and padding for arguments on
5989 the stack and in registers should be identical. */
5990 if (TARGET_64BIT)
5991 /* The 64-bit runtime specifies left justification for aggregates. */
5992 return upward;
5993 else
5994 /* The 32-bit runtime architecture specifies right justification.
5995 When the argument is passed on the stack, the argument is padded
5996 with garbage on the left. The HP compiler pads with zeros. */
5997 return downward;
5998 }
5999
6000 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6001 return downward;
6002 else
6003 return none;
6004 }
6005
6006 \f
6007 /* Do what is necessary for `va_start'. We look at the current function
6008 to determine if stdargs or varargs is used and fill in an initial
6009 va_list. A pointer to this constructor is returned. */
6010
6011 static rtx
6012 hppa_builtin_saveregs (void)
6013 {
6014 rtx offset, dest;
6015 tree fntype = TREE_TYPE (current_function_decl);
6016 int argadj = ((!stdarg_p (fntype))
6017 ? UNITS_PER_WORD : 0);
6018
6019 if (argadj)
6020 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
6021 else
6022 offset = crtl->args.arg_offset_rtx;
6023
6024 if (TARGET_64BIT)
6025 {
6026 int i, off;
6027
6028 /* Adjust for varargs/stdarg differences. */
6029 if (argadj)
6030 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
6031 else
6032 offset = crtl->args.arg_offset_rtx;
6033
6034 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6035 from the incoming arg pointer and growing to larger addresses. */
6036 for (i = 26, off = -64; i >= 19; i--, off += 8)
6037 emit_move_insn (gen_rtx_MEM (word_mode,
6038 plus_constant (arg_pointer_rtx, off)),
6039 gen_rtx_REG (word_mode, i));
6040
6041 /* The incoming args pointer points just beyond the flushback area;
6042 normally this is not a serious concern. However, when we are doing
6043 varargs/stdargs we want to make the arg pointer point to the start
6044 of the incoming argument area. */
6045 emit_move_insn (virtual_incoming_args_rtx,
6046 plus_constant (arg_pointer_rtx, -64));
6047
6048 /* Now return a pointer to the first anonymous argument. */
6049 return copy_to_reg (expand_binop (Pmode, add_optab,
6050 virtual_incoming_args_rtx,
6051 offset, 0, 0, OPTAB_LIB_WIDEN));
6052 }
6053
6054 /* Store general registers on the stack. */
6055 dest = gen_rtx_MEM (BLKmode,
6056 plus_constant (crtl->args.internal_arg_pointer,
6057 -16));
6058 set_mem_alias_set (dest, get_varargs_alias_set ());
6059 set_mem_align (dest, BITS_PER_WORD);
6060 move_block_from_reg (23, dest, 4);
6061
6062 /* move_block_from_reg will emit code to store the argument registers
6063 individually as scalar stores.
6064
6065 However, other insns may later load from the same addresses for
6066 a structure load (passing a struct to a varargs routine).
6067
6068 The alias code assumes that such aliasing can never happen, so we
6069 have to keep memory referencing insns from moving up beyond the
6070 last argument register store. So we emit a blockage insn here. */
6071 emit_insn (gen_blockage ());
6072
6073 return copy_to_reg (expand_binop (Pmode, add_optab,
6074 crtl->args.internal_arg_pointer,
6075 offset, 0, 0, OPTAB_LIB_WIDEN));
6076 }
6077
6078 static void
6079 hppa_va_start (tree valist, rtx nextarg)
6080 {
6081 nextarg = expand_builtin_saveregs ();
6082 std_expand_builtin_va_start (valist, nextarg);
6083 }
6084
6085 static tree
6086 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6087 gimple_seq *post_p)
6088 {
6089 if (TARGET_64BIT)
6090 {
6091 /* Args grow upward. We can use the generic routines. */
6092 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6093 }
6094 else /* !TARGET_64BIT */
6095 {
6096 tree ptr = build_pointer_type (type);
6097 tree valist_type;
6098 tree t, u;
6099 unsigned int size, ofs;
6100 bool indirect;
6101
6102 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6103 if (indirect)
6104 {
6105 type = ptr;
6106 ptr = build_pointer_type (type);
6107 }
6108 size = int_size_in_bytes (type);
6109 valist_type = TREE_TYPE (valist);
6110
6111 /* Args grow down. Not handled by generic routines. */
6112
6113 u = fold_convert (sizetype, size_in_bytes (type));
6114 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6115 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6116
6117 /* Align to 4 or 8 byte boundary depending on argument size. */
6118
6119 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6120 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6121 t = fold_convert (valist_type, t);
6122
6123 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6124
6125 ofs = (8 - size) % 4;
6126 if (ofs != 0)
6127 {
6128 u = size_int (ofs);
6129 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6130 }
6131
6132 t = fold_convert (ptr, t);
6133 t = build_va_arg_indirect_ref (t);
6134
6135 if (indirect)
6136 t = build_va_arg_indirect_ref (t);
6137
6138 return t;
6139 }
6140 }
6141
6142 /* True if MODE is valid for the target. By "valid", we mean able to
6143 be manipulated in non-trivial ways. In particular, this means all
6144 the arithmetic is supported.
6145
6146 Currently, TImode is not valid as the HP 64-bit runtime documentation
6147 doesn't document the alignment and calling conventions for this type.
6148 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6149 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6150
6151 static bool
6152 pa_scalar_mode_supported_p (enum machine_mode mode)
6153 {
6154 int precision = GET_MODE_PRECISION (mode);
6155
6156 switch (GET_MODE_CLASS (mode))
6157 {
6158 case MODE_PARTIAL_INT:
6159 case MODE_INT:
6160 if (precision == CHAR_TYPE_SIZE)
6161 return true;
6162 if (precision == SHORT_TYPE_SIZE)
6163 return true;
6164 if (precision == INT_TYPE_SIZE)
6165 return true;
6166 if (precision == LONG_TYPE_SIZE)
6167 return true;
6168 if (precision == LONG_LONG_TYPE_SIZE)
6169 return true;
6170 return false;
6171
6172 case MODE_FLOAT:
6173 if (precision == FLOAT_TYPE_SIZE)
6174 return true;
6175 if (precision == DOUBLE_TYPE_SIZE)
6176 return true;
6177 if (precision == LONG_DOUBLE_TYPE_SIZE)
6178 return true;
6179 return false;
6180
6181 case MODE_DECIMAL_FLOAT:
6182 return false;
6183
6184 default:
6185 gcc_unreachable ();
6186 }
6187 }
6188
6189 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6190 it branches into the delay slot. Otherwise, return FALSE. */
6191
6192 static bool
6193 branch_to_delay_slot_p (rtx insn)
6194 {
6195 rtx jump_insn;
6196
6197 if (dbr_sequence_length ())
6198 return FALSE;
6199
6200 jump_insn = next_active_insn (JUMP_LABEL (insn));
6201 while (insn)
6202 {
6203 insn = next_active_insn (insn);
6204 if (jump_insn == insn)
6205 return TRUE;
6206
6207 /* We can't rely on the length of asms. So, we return FALSE when
6208 the branch is followed by an asm. */
6209 if (!insn
6210 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6211 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6212 || get_attr_length (insn) > 0)
6213 break;
6214 }
6215
6216 return FALSE;
6217 }
6218
6219 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6220
6221 This occurs when INSN has an unfilled delay slot and is followed
6222 by an asm. Disaster can occur if the asm is empty and the jump
6223 branches into the delay slot. So, we add a nop in the delay slot
6224 when this occurs. */
6225
6226 static bool
6227 branch_needs_nop_p (rtx insn)
6228 {
6229 rtx jump_insn;
6230
6231 if (dbr_sequence_length ())
6232 return FALSE;
6233
6234 jump_insn = next_active_insn (JUMP_LABEL (insn));
6235 while (insn)
6236 {
6237 insn = next_active_insn (insn);
6238 if (!insn || jump_insn == insn)
6239 return TRUE;
6240
6241 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6242 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6243 && get_attr_length (insn) > 0)
6244 break;
6245 }
6246
6247 return FALSE;
6248 }
6249
6250 /* Return TRUE if INSN, a forward jump insn, can use nullification
6251 to skip the following instruction. This avoids an extra cycle due
6252 to a mis-predicted branch when we fall through. */
6253
6254 static bool
6255 use_skip_p (rtx insn)
6256 {
6257 rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6258
6259 while (insn)
6260 {
6261 insn = next_active_insn (insn);
6262
6263 /* We can't rely on the length of asms, so we can't skip asms. */
6264 if (!insn
6265 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6266 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6267 break;
6268 if (get_attr_length (insn) == 4
6269 && jump_insn == next_active_insn (insn))
6270 return TRUE;
6271 if (get_attr_length (insn) > 0)
6272 break;
6273 }
6274
6275 return FALSE;
6276 }
6277
6278 /* This routine handles all the normal conditional branch sequences we
6279 might need to generate. It handles compare immediate vs compare
6280 register, nullification of delay slots, varying length branches,
6281 negated branches, and all combinations of the above. It returns the
6282 output appropriate to emit the branch corresponding to all given
6283 parameters. */
6284
6285 const char *
6286 output_cbranch (rtx *operands, int negated, rtx insn)
6287 {
6288 static char buf[100];
6289 bool useskip;
6290 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6291 int length = get_attr_length (insn);
6292 int xdelay;
6293
6294 /* A conditional branch to the following instruction (e.g. the delay slot)
6295 is asking for a disaster. This can happen when not optimizing and
6296 when jump optimization fails.
6297
6298 While it is usually safe to emit nothing, this can fail if the
6299 preceding instruction is a nullified branch with an empty delay
6300 slot and the same branch target as this branch. We could check
6301 for this but jump optimization should eliminate nop jumps. It
6302 is always safe to emit a nop. */
6303 if (branch_to_delay_slot_p (insn))
6304 return "nop";
6305
6306 /* The doubleword form of the cmpib instruction doesn't have the LEU
6307 and GTU conditions while the cmpb instruction does. Since we accept
6308 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6309 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6310 operands[2] = gen_rtx_REG (DImode, 0);
6311 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6312 operands[1] = gen_rtx_REG (DImode, 0);
6313
6314 /* If this is a long branch with its delay slot unfilled, set `nullify'
6315 as it can nullify the delay slot and save a nop. */
6316 if (length == 8 && dbr_sequence_length () == 0)
6317 nullify = 1;
6318
6319 /* If this is a short forward conditional branch which did not get
6320 its delay slot filled, the delay slot can still be nullified. */
6321 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6322 nullify = forward_branch_p (insn);
6323
6324 /* A forward branch over a single nullified insn can be done with a
6325 comclr instruction. This avoids a single cycle penalty due to
6326 mis-predicted branch if we fall through (branch not taken). */
6327 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6328
6329 switch (length)
6330 {
6331 /* All short conditional branches except backwards with an unfilled
6332 delay slot. */
6333 case 4:
6334 if (useskip)
6335 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6336 else
6337 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6338 if (GET_MODE (operands[1]) == DImode)
6339 strcat (buf, "*");
6340 if (negated)
6341 strcat (buf, "%B3");
6342 else
6343 strcat (buf, "%S3");
6344 if (useskip)
6345 strcat (buf, " %2,%r1,%%r0");
6346 else if (nullify)
6347 {
6348 if (branch_needs_nop_p (insn))
6349 strcat (buf, ",n %2,%r1,%0%#");
6350 else
6351 strcat (buf, ",n %2,%r1,%0");
6352 }
6353 else
6354 strcat (buf, " %2,%r1,%0");
6355 break;
6356
6357 /* All long conditionals. Note a short backward branch with an
6358 unfilled delay slot is treated just like a long backward branch
6359 with an unfilled delay slot. */
6360 case 8:
6361 /* Handle weird backwards branch with a filled delay slot
6362 which is nullified. */
6363 if (dbr_sequence_length () != 0
6364 && ! forward_branch_p (insn)
6365 && nullify)
6366 {
6367 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6368 if (GET_MODE (operands[1]) == DImode)
6369 strcat (buf, "*");
6370 if (negated)
6371 strcat (buf, "%S3");
6372 else
6373 strcat (buf, "%B3");
6374 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6375 }
6376 /* Handle short backwards branch with an unfilled delay slot.
6377 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6378 taken and untaken branches. */
6379 else if (dbr_sequence_length () == 0
6380 && ! forward_branch_p (insn)
6381 && INSN_ADDRESSES_SET_P ()
6382 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6383 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6384 {
6385 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6386 if (GET_MODE (operands[1]) == DImode)
6387 strcat (buf, "*");
6388 if (negated)
6389 strcat (buf, "%B3 %2,%r1,%0%#");
6390 else
6391 strcat (buf, "%S3 %2,%r1,%0%#");
6392 }
6393 else
6394 {
6395 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6396 if (GET_MODE (operands[1]) == DImode)
6397 strcat (buf, "*");
6398 if (negated)
6399 strcat (buf, "%S3");
6400 else
6401 strcat (buf, "%B3");
6402 if (nullify)
6403 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6404 else
6405 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6406 }
6407 break;
6408
6409 default:
6410 /* The reversed conditional branch must branch over one additional
6411 instruction if the delay slot is filled and needs to be extracted
6412 by output_lbranch. If the delay slot is empty or this is a
6413 nullified forward branch, the instruction after the reversed
6414 condition branch must be nullified. */
6415 if (dbr_sequence_length () == 0
6416 || (nullify && forward_branch_p (insn)))
6417 {
6418 nullify = 1;
6419 xdelay = 0;
6420 operands[4] = GEN_INT (length);
6421 }
6422 else
6423 {
6424 xdelay = 1;
6425 operands[4] = GEN_INT (length + 4);
6426 }
6427
6428 /* Create a reversed conditional branch which branches around
6429 the following insns. */
6430 if (GET_MODE (operands[1]) != DImode)
6431 {
6432 if (nullify)
6433 {
6434 if (negated)
6435 strcpy (buf,
6436 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6437 else
6438 strcpy (buf,
6439 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6440 }
6441 else
6442 {
6443 if (negated)
6444 strcpy (buf,
6445 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6446 else
6447 strcpy (buf,
6448 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6449 }
6450 }
6451 else
6452 {
6453 if (nullify)
6454 {
6455 if (negated)
6456 strcpy (buf,
6457 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6458 else
6459 strcpy (buf,
6460 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6461 }
6462 else
6463 {
6464 if (negated)
6465 strcpy (buf,
6466 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6467 else
6468 strcpy (buf,
6469 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6470 }
6471 }
6472
6473 output_asm_insn (buf, operands);
6474 return output_lbranch (operands[0], insn, xdelay);
6475 }
6476 return buf;
6477 }
6478
6479 /* This routine handles output of long unconditional branches that
6480 exceed the maximum range of a simple branch instruction. Since
6481 we don't have a register available for the branch, we save register
6482 %r1 in the frame marker, load the branch destination DEST into %r1,
6483 execute the branch, and restore %r1 in the delay slot of the branch.
6484
6485 Since long branches may have an insn in the delay slot and the
6486 delay slot is used to restore %r1, we in general need to extract
6487 this insn and execute it before the branch. However, to facilitate
6488 use of this function by conditional branches, we also provide an
6489 option to not extract the delay insn so that it will be emitted
6490 after the long branch. So, if there is an insn in the delay slot,
6491 it is extracted if XDELAY is nonzero.
6492
6493 The lengths of the various long-branch sequences are 20, 16 and 24
6494 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6495
6496 const char *
6497 output_lbranch (rtx dest, rtx insn, int xdelay)
6498 {
6499 rtx xoperands[2];
6500
6501 xoperands[0] = dest;
6502
6503 /* First, free up the delay slot. */
6504 if (xdelay && dbr_sequence_length () != 0)
6505 {
6506 /* We can't handle a jump in the delay slot. */
6507 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6508
6509 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6510 optimize, 0, NULL);
6511
6512 /* Now delete the delay insn. */
6513 SET_INSN_DELETED (NEXT_INSN (insn));
6514 }
6515
6516 /* Output an insn to save %r1. The runtime documentation doesn't
6517 specify whether the "Clean Up" slot in the callers frame can
6518 be clobbered by the callee. It isn't copied by HP's builtin
6519 alloca, so this suggests that it can be clobbered if necessary.
6520 The "Static Link" location is copied by HP builtin alloca, so
6521 we avoid using it. Using the cleanup slot might be a problem
6522 if we have to interoperate with languages that pass cleanup
6523 information. However, it should be possible to handle these
6524 situations with GCC's asm feature.
6525
6526 The "Current RP" slot is reserved for the called procedure, so
6527 we try to use it when we don't have a frame of our own. It's
6528 rather unlikely that we won't have a frame when we need to emit
6529 a very long branch.
6530
6531 Really the way to go long term is a register scavenger; goto
6532 the target of the jump and find a register which we can use
6533 as a scratch to hold the value in %r1. Then, we wouldn't have
6534 to free up the delay slot or clobber a slot that may be needed
6535 for other purposes. */
6536 if (TARGET_64BIT)
6537 {
6538 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6539 /* Use the return pointer slot in the frame marker. */
6540 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6541 else
6542 /* Use the slot at -40 in the frame marker since HP builtin
6543 alloca doesn't copy it. */
6544 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6545 }
6546 else
6547 {
6548 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6549 /* Use the return pointer slot in the frame marker. */
6550 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6551 else
6552 /* Use the "Clean Up" slot in the frame marker. In GCC,
6553 the only other use of this location is for copying a
6554 floating point double argument from a floating-point
6555 register to two general registers. The copy is done
6556 as an "atomic" operation when outputting a call, so it
6557 won't interfere with our using the location here. */
6558 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6559 }
6560
6561 if (TARGET_PORTABLE_RUNTIME)
6562 {
6563 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6564 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6565 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6566 }
6567 else if (flag_pic)
6568 {
6569 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6570 if (TARGET_SOM || !TARGET_GAS)
6571 {
6572 xoperands[1] = gen_label_rtx ();
6573 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6574 targetm.asm_out.internal_label (asm_out_file, "L",
6575 CODE_LABEL_NUMBER (xoperands[1]));
6576 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6577 }
6578 else
6579 {
6580 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6581 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6582 }
6583 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6584 }
6585 else
6586 /* Now output a very long branch to the original target. */
6587 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6588
6589 /* Now restore the value of %r1 in the delay slot. */
6590 if (TARGET_64BIT)
6591 {
6592 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6593 return "ldd -16(%%r30),%%r1";
6594 else
6595 return "ldd -40(%%r30),%%r1";
6596 }
6597 else
6598 {
6599 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6600 return "ldw -20(%%r30),%%r1";
6601 else
6602 return "ldw -12(%%r30),%%r1";
6603 }
6604 }
6605
6606 /* This routine handles all the branch-on-bit conditional branch sequences we
6607 might need to generate. It handles nullification of delay slots,
6608 varying length branches, negated branches and all combinations of the
6609 above. it returns the appropriate output template to emit the branch. */
6610
6611 const char *
6612 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6613 {
6614 static char buf[100];
6615 bool useskip;
6616 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6617 int length = get_attr_length (insn);
6618 int xdelay;
6619
6620 /* A conditional branch to the following instruction (e.g. the delay slot) is
6621 asking for a disaster. I do not think this can happen as this pattern
6622 is only used when optimizing; jump optimization should eliminate the
6623 jump. But be prepared just in case. */
6624
6625 if (branch_to_delay_slot_p (insn))
6626 return "nop";
6627
6628 /* If this is a long branch with its delay slot unfilled, set `nullify'
6629 as it can nullify the delay slot and save a nop. */
6630 if (length == 8 && dbr_sequence_length () == 0)
6631 nullify = 1;
6632
6633 /* If this is a short forward conditional branch which did not get
6634 its delay slot filled, the delay slot can still be nullified. */
6635 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6636 nullify = forward_branch_p (insn);
6637
6638 /* A forward branch over a single nullified insn can be done with a
6639 extrs instruction. This avoids a single cycle penalty due to
6640 mis-predicted branch if we fall through (branch not taken). */
6641 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6642
6643 switch (length)
6644 {
6645
6646 /* All short conditional branches except backwards with an unfilled
6647 delay slot. */
6648 case 4:
6649 if (useskip)
6650 strcpy (buf, "{extrs,|extrw,s,}");
6651 else
6652 strcpy (buf, "bb,");
6653 if (useskip && GET_MODE (operands[0]) == DImode)
6654 strcpy (buf, "extrd,s,*");
6655 else if (GET_MODE (operands[0]) == DImode)
6656 strcpy (buf, "bb,*");
6657 if ((which == 0 && negated)
6658 || (which == 1 && ! negated))
6659 strcat (buf, ">=");
6660 else
6661 strcat (buf, "<");
6662 if (useskip)
6663 strcat (buf, " %0,%1,1,%%r0");
6664 else if (nullify && negated)
6665 {
6666 if (branch_needs_nop_p (insn))
6667 strcat (buf, ",n %0,%1,%3%#");
6668 else
6669 strcat (buf, ",n %0,%1,%3");
6670 }
6671 else if (nullify && ! negated)
6672 {
6673 if (branch_needs_nop_p (insn))
6674 strcat (buf, ",n %0,%1,%2%#");
6675 else
6676 strcat (buf, ",n %0,%1,%2");
6677 }
6678 else if (! nullify && negated)
6679 strcat (buf, " %0,%1,%3");
6680 else if (! nullify && ! negated)
6681 strcat (buf, " %0,%1,%2");
6682 break;
6683
6684 /* All long conditionals. Note a short backward branch with an
6685 unfilled delay slot is treated just like a long backward branch
6686 with an unfilled delay slot. */
6687 case 8:
6688 /* Handle weird backwards branch with a filled delay slot
6689 which is nullified. */
6690 if (dbr_sequence_length () != 0
6691 && ! forward_branch_p (insn)
6692 && nullify)
6693 {
6694 strcpy (buf, "bb,");
6695 if (GET_MODE (operands[0]) == DImode)
6696 strcat (buf, "*");
6697 if ((which == 0 && negated)
6698 || (which == 1 && ! negated))
6699 strcat (buf, "<");
6700 else
6701 strcat (buf, ">=");
6702 if (negated)
6703 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6704 else
6705 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6706 }
6707 /* Handle short backwards branch with an unfilled delay slot.
6708 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6709 taken and untaken branches. */
6710 else if (dbr_sequence_length () == 0
6711 && ! forward_branch_p (insn)
6712 && INSN_ADDRESSES_SET_P ()
6713 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6714 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6715 {
6716 strcpy (buf, "bb,");
6717 if (GET_MODE (operands[0]) == DImode)
6718 strcat (buf, "*");
6719 if ((which == 0 && negated)
6720 || (which == 1 && ! negated))
6721 strcat (buf, ">=");
6722 else
6723 strcat (buf, "<");
6724 if (negated)
6725 strcat (buf, " %0,%1,%3%#");
6726 else
6727 strcat (buf, " %0,%1,%2%#");
6728 }
6729 else
6730 {
6731 if (GET_MODE (operands[0]) == DImode)
6732 strcpy (buf, "extrd,s,*");
6733 else
6734 strcpy (buf, "{extrs,|extrw,s,}");
6735 if ((which == 0 && negated)
6736 || (which == 1 && ! negated))
6737 strcat (buf, "<");
6738 else
6739 strcat (buf, ">=");
6740 if (nullify && negated)
6741 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6742 else if (nullify && ! negated)
6743 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6744 else if (negated)
6745 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6746 else
6747 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6748 }
6749 break;
6750
6751 default:
6752 /* The reversed conditional branch must branch over one additional
6753 instruction if the delay slot is filled and needs to be extracted
6754 by output_lbranch. If the delay slot is empty or this is a
6755 nullified forward branch, the instruction after the reversed
6756 condition branch must be nullified. */
6757 if (dbr_sequence_length () == 0
6758 || (nullify && forward_branch_p (insn)))
6759 {
6760 nullify = 1;
6761 xdelay = 0;
6762 operands[4] = GEN_INT (length);
6763 }
6764 else
6765 {
6766 xdelay = 1;
6767 operands[4] = GEN_INT (length + 4);
6768 }
6769
6770 if (GET_MODE (operands[0]) == DImode)
6771 strcpy (buf, "bb,*");
6772 else
6773 strcpy (buf, "bb,");
6774 if ((which == 0 && negated)
6775 || (which == 1 && !negated))
6776 strcat (buf, "<");
6777 else
6778 strcat (buf, ">=");
6779 if (nullify)
6780 strcat (buf, ",n %0,%1,.+%4");
6781 else
6782 strcat (buf, " %0,%1,.+%4");
6783 output_asm_insn (buf, operands);
6784 return output_lbranch (negated ? operands[3] : operands[2],
6785 insn, xdelay);
6786 }
6787 return buf;
6788 }
6789
6790 /* This routine handles all the branch-on-variable-bit conditional branch
6791 sequences we might need to generate. It handles nullification of delay
6792 slots, varying length branches, negated branches and all combinations
6793 of the above. it returns the appropriate output template to emit the
6794 branch. */
6795
6796 const char *
6797 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6798 {
6799 static char buf[100];
6800 bool useskip;
6801 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6802 int length = get_attr_length (insn);
6803 int xdelay;
6804
6805 /* A conditional branch to the following instruction (e.g. the delay slot) is
6806 asking for a disaster. I do not think this can happen as this pattern
6807 is only used when optimizing; jump optimization should eliminate the
6808 jump. But be prepared just in case. */
6809
6810 if (branch_to_delay_slot_p (insn))
6811 return "nop";
6812
6813 /* If this is a long branch with its delay slot unfilled, set `nullify'
6814 as it can nullify the delay slot and save a nop. */
6815 if (length == 8 && dbr_sequence_length () == 0)
6816 nullify = 1;
6817
6818 /* If this is a short forward conditional branch which did not get
6819 its delay slot filled, the delay slot can still be nullified. */
6820 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6821 nullify = forward_branch_p (insn);
6822
6823 /* A forward branch over a single nullified insn can be done with a
6824 extrs instruction. This avoids a single cycle penalty due to
6825 mis-predicted branch if we fall through (branch not taken). */
6826 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6827
6828 switch (length)
6829 {
6830
6831 /* All short conditional branches except backwards with an unfilled
6832 delay slot. */
6833 case 4:
6834 if (useskip)
6835 strcpy (buf, "{vextrs,|extrw,s,}");
6836 else
6837 strcpy (buf, "{bvb,|bb,}");
6838 if (useskip && GET_MODE (operands[0]) == DImode)
6839 strcpy (buf, "extrd,s,*");
6840 else if (GET_MODE (operands[0]) == DImode)
6841 strcpy (buf, "bb,*");
6842 if ((which == 0 && negated)
6843 || (which == 1 && ! negated))
6844 strcat (buf, ">=");
6845 else
6846 strcat (buf, "<");
6847 if (useskip)
6848 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6849 else if (nullify && negated)
6850 {
6851 if (branch_needs_nop_p (insn))
6852 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6853 else
6854 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6855 }
6856 else if (nullify && ! negated)
6857 {
6858 if (branch_needs_nop_p (insn))
6859 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6860 else
6861 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6862 }
6863 else if (! nullify && negated)
6864 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6865 else if (! nullify && ! negated)
6866 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6867 break;
6868
6869 /* All long conditionals. Note a short backward branch with an
6870 unfilled delay slot is treated just like a long backward branch
6871 with an unfilled delay slot. */
6872 case 8:
6873 /* Handle weird backwards branch with a filled delay slot
6874 which is nullified. */
6875 if (dbr_sequence_length () != 0
6876 && ! forward_branch_p (insn)
6877 && nullify)
6878 {
6879 strcpy (buf, "{bvb,|bb,}");
6880 if (GET_MODE (operands[0]) == DImode)
6881 strcat (buf, "*");
6882 if ((which == 0 && negated)
6883 || (which == 1 && ! negated))
6884 strcat (buf, "<");
6885 else
6886 strcat (buf, ">=");
6887 if (negated)
6888 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6889 else
6890 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6891 }
6892 /* Handle short backwards branch with an unfilled delay slot.
6893 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6894 taken and untaken branches. */
6895 else if (dbr_sequence_length () == 0
6896 && ! forward_branch_p (insn)
6897 && INSN_ADDRESSES_SET_P ()
6898 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6899 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6900 {
6901 strcpy (buf, "{bvb,|bb,}");
6902 if (GET_MODE (operands[0]) == DImode)
6903 strcat (buf, "*");
6904 if ((which == 0 && negated)
6905 || (which == 1 && ! negated))
6906 strcat (buf, ">=");
6907 else
6908 strcat (buf, "<");
6909 if (negated)
6910 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6911 else
6912 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6913 }
6914 else
6915 {
6916 strcpy (buf, "{vextrs,|extrw,s,}");
6917 if (GET_MODE (operands[0]) == DImode)
6918 strcpy (buf, "extrd,s,*");
6919 if ((which == 0 && negated)
6920 || (which == 1 && ! negated))
6921 strcat (buf, "<");
6922 else
6923 strcat (buf, ">=");
6924 if (nullify && negated)
6925 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6926 else if (nullify && ! negated)
6927 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6928 else if (negated)
6929 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6930 else
6931 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6932 }
6933 break;
6934
6935 default:
6936 /* The reversed conditional branch must branch over one additional
6937 instruction if the delay slot is filled and needs to be extracted
6938 by output_lbranch. If the delay slot is empty or this is a
6939 nullified forward branch, the instruction after the reversed
6940 condition branch must be nullified. */
6941 if (dbr_sequence_length () == 0
6942 || (nullify && forward_branch_p (insn)))
6943 {
6944 nullify = 1;
6945 xdelay = 0;
6946 operands[4] = GEN_INT (length);
6947 }
6948 else
6949 {
6950 xdelay = 1;
6951 operands[4] = GEN_INT (length + 4);
6952 }
6953
6954 if (GET_MODE (operands[0]) == DImode)
6955 strcpy (buf, "bb,*");
6956 else
6957 strcpy (buf, "{bvb,|bb,}");
6958 if ((which == 0 && negated)
6959 || (which == 1 && !negated))
6960 strcat (buf, "<");
6961 else
6962 strcat (buf, ">=");
6963 if (nullify)
6964 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6965 else
6966 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6967 output_asm_insn (buf, operands);
6968 return output_lbranch (negated ? operands[3] : operands[2],
6969 insn, xdelay);
6970 }
6971 return buf;
6972 }
6973
6974 /* Return the output template for emitting a dbra type insn.
6975
6976 Note it may perform some output operations on its own before
6977 returning the final output string. */
6978 const char *
6979 output_dbra (rtx *operands, rtx insn, int which_alternative)
6980 {
6981 int length = get_attr_length (insn);
6982
6983 /* A conditional branch to the following instruction (e.g. the delay slot) is
6984 asking for a disaster. Be prepared! */
6985
6986 if (branch_to_delay_slot_p (insn))
6987 {
6988 if (which_alternative == 0)
6989 return "ldo %1(%0),%0";
6990 else if (which_alternative == 1)
6991 {
6992 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6993 output_asm_insn ("ldw -16(%%r30),%4", operands);
6994 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6995 return "{fldws|fldw} -16(%%r30),%0";
6996 }
6997 else
6998 {
6999 output_asm_insn ("ldw %0,%4", operands);
7000 return "ldo %1(%4),%4\n\tstw %4,%0";
7001 }
7002 }
7003
7004 if (which_alternative == 0)
7005 {
7006 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7007 int xdelay;
7008
7009 /* If this is a long branch with its delay slot unfilled, set `nullify'
7010 as it can nullify the delay slot and save a nop. */
7011 if (length == 8 && dbr_sequence_length () == 0)
7012 nullify = 1;
7013
7014 /* If this is a short forward conditional branch which did not get
7015 its delay slot filled, the delay slot can still be nullified. */
7016 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7017 nullify = forward_branch_p (insn);
7018
7019 switch (length)
7020 {
7021 case 4:
7022 if (nullify)
7023 {
7024 if (branch_needs_nop_p (insn))
7025 return "addib,%C2,n %1,%0,%3%#";
7026 else
7027 return "addib,%C2,n %1,%0,%3";
7028 }
7029 else
7030 return "addib,%C2 %1,%0,%3";
7031
7032 case 8:
7033 /* Handle weird backwards branch with a fulled delay slot
7034 which is nullified. */
7035 if (dbr_sequence_length () != 0
7036 && ! forward_branch_p (insn)
7037 && nullify)
7038 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7039 /* Handle short backwards branch with an unfilled delay slot.
7040 Using a addb;nop rather than addi;bl saves 1 cycle for both
7041 taken and untaken branches. */
7042 else if (dbr_sequence_length () == 0
7043 && ! forward_branch_p (insn)
7044 && INSN_ADDRESSES_SET_P ()
7045 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7046 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7047 return "addib,%C2 %1,%0,%3%#";
7048
7049 /* Handle normal cases. */
7050 if (nullify)
7051 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7052 else
7053 return "addi,%N2 %1,%0,%0\n\tb %3";
7054
7055 default:
7056 /* The reversed conditional branch must branch over one additional
7057 instruction if the delay slot is filled and needs to be extracted
7058 by output_lbranch. If the delay slot is empty or this is a
7059 nullified forward branch, the instruction after the reversed
7060 condition branch must be nullified. */
7061 if (dbr_sequence_length () == 0
7062 || (nullify && forward_branch_p (insn)))
7063 {
7064 nullify = 1;
7065 xdelay = 0;
7066 operands[4] = GEN_INT (length);
7067 }
7068 else
7069 {
7070 xdelay = 1;
7071 operands[4] = GEN_INT (length + 4);
7072 }
7073
7074 if (nullify)
7075 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7076 else
7077 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7078
7079 return output_lbranch (operands[3], insn, xdelay);
7080 }
7081
7082 }
7083 /* Deal with gross reload from FP register case. */
7084 else if (which_alternative == 1)
7085 {
7086 /* Move loop counter from FP register to MEM then into a GR,
7087 increment the GR, store the GR into MEM, and finally reload
7088 the FP register from MEM from within the branch's delay slot. */
7089 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7090 operands);
7091 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7092 if (length == 24)
7093 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7094 else if (length == 28)
7095 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7096 else
7097 {
7098 operands[5] = GEN_INT (length - 16);
7099 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7100 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7101 return output_lbranch (operands[3], insn, 0);
7102 }
7103 }
7104 /* Deal with gross reload from memory case. */
7105 else
7106 {
7107 /* Reload loop counter from memory, the store back to memory
7108 happens in the branch's delay slot. */
7109 output_asm_insn ("ldw %0,%4", operands);
7110 if (length == 12)
7111 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7112 else if (length == 16)
7113 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7114 else
7115 {
7116 operands[5] = GEN_INT (length - 4);
7117 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7118 return output_lbranch (operands[3], insn, 0);
7119 }
7120 }
7121 }
7122
7123 /* Return the output template for emitting a movb type insn.
7124
7125 Note it may perform some output operations on its own before
7126 returning the final output string. */
7127 const char *
7128 output_movb (rtx *operands, rtx insn, int which_alternative,
7129 int reverse_comparison)
7130 {
7131 int length = get_attr_length (insn);
7132
7133 /* A conditional branch to the following instruction (e.g. the delay slot) is
7134 asking for a disaster. Be prepared! */
7135
7136 if (branch_to_delay_slot_p (insn))
7137 {
7138 if (which_alternative == 0)
7139 return "copy %1,%0";
7140 else if (which_alternative == 1)
7141 {
7142 output_asm_insn ("stw %1,-16(%%r30)", operands);
7143 return "{fldws|fldw} -16(%%r30),%0";
7144 }
7145 else if (which_alternative == 2)
7146 return "stw %1,%0";
7147 else
7148 return "mtsar %r1";
7149 }
7150
7151 /* Support the second variant. */
7152 if (reverse_comparison)
7153 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7154
7155 if (which_alternative == 0)
7156 {
7157 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7158 int xdelay;
7159
7160 /* If this is a long branch with its delay slot unfilled, set `nullify'
7161 as it can nullify the delay slot and save a nop. */
7162 if (length == 8 && dbr_sequence_length () == 0)
7163 nullify = 1;
7164
7165 /* If this is a short forward conditional branch which did not get
7166 its delay slot filled, the delay slot can still be nullified. */
7167 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7168 nullify = forward_branch_p (insn);
7169
7170 switch (length)
7171 {
7172 case 4:
7173 if (nullify)
7174 {
7175 if (branch_needs_nop_p (insn))
7176 return "movb,%C2,n %1,%0,%3%#";
7177 else
7178 return "movb,%C2,n %1,%0,%3";
7179 }
7180 else
7181 return "movb,%C2 %1,%0,%3";
7182
7183 case 8:
7184 /* Handle weird backwards branch with a filled delay slot
7185 which is nullified. */
7186 if (dbr_sequence_length () != 0
7187 && ! forward_branch_p (insn)
7188 && nullify)
7189 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7190
7191 /* Handle short backwards branch with an unfilled delay slot.
7192 Using a movb;nop rather than or;bl saves 1 cycle for both
7193 taken and untaken branches. */
7194 else if (dbr_sequence_length () == 0
7195 && ! forward_branch_p (insn)
7196 && INSN_ADDRESSES_SET_P ()
7197 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7198 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7199 return "movb,%C2 %1,%0,%3%#";
7200 /* Handle normal cases. */
7201 if (nullify)
7202 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7203 else
7204 return "or,%N2 %1,%%r0,%0\n\tb %3";
7205
7206 default:
7207 /* The reversed conditional branch must branch over one additional
7208 instruction if the delay slot is filled and needs to be extracted
7209 by output_lbranch. If the delay slot is empty or this is a
7210 nullified forward branch, the instruction after the reversed
7211 condition branch must be nullified. */
7212 if (dbr_sequence_length () == 0
7213 || (nullify && forward_branch_p (insn)))
7214 {
7215 nullify = 1;
7216 xdelay = 0;
7217 operands[4] = GEN_INT (length);
7218 }
7219 else
7220 {
7221 xdelay = 1;
7222 operands[4] = GEN_INT (length + 4);
7223 }
7224
7225 if (nullify)
7226 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7227 else
7228 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7229
7230 return output_lbranch (operands[3], insn, xdelay);
7231 }
7232 }
7233 /* Deal with gross reload for FP destination register case. */
7234 else if (which_alternative == 1)
7235 {
7236 /* Move source register to MEM, perform the branch test, then
7237 finally load the FP register from MEM from within the branch's
7238 delay slot. */
7239 output_asm_insn ("stw %1,-16(%%r30)", operands);
7240 if (length == 12)
7241 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7242 else if (length == 16)
7243 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7244 else
7245 {
7246 operands[4] = GEN_INT (length - 4);
7247 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7248 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7249 return output_lbranch (operands[3], insn, 0);
7250 }
7251 }
7252 /* Deal with gross reload from memory case. */
7253 else if (which_alternative == 2)
7254 {
7255 /* Reload loop counter from memory, the store back to memory
7256 happens in the branch's delay slot. */
7257 if (length == 8)
7258 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7259 else if (length == 12)
7260 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7261 else
7262 {
7263 operands[4] = GEN_INT (length);
7264 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7265 operands);
7266 return output_lbranch (operands[3], insn, 0);
7267 }
7268 }
7269 /* Handle SAR as a destination. */
7270 else
7271 {
7272 if (length == 8)
7273 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7274 else if (length == 12)
7275 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7276 else
7277 {
7278 operands[4] = GEN_INT (length);
7279 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7280 operands);
7281 return output_lbranch (operands[3], insn, 0);
7282 }
7283 }
7284 }
7285
7286 /* Copy any FP arguments in INSN into integer registers. */
7287 static void
7288 copy_fp_args (rtx insn)
7289 {
7290 rtx link;
7291 rtx xoperands[2];
7292
7293 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7294 {
7295 int arg_mode, regno;
7296 rtx use = XEXP (link, 0);
7297
7298 if (! (GET_CODE (use) == USE
7299 && GET_CODE (XEXP (use, 0)) == REG
7300 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7301 continue;
7302
7303 arg_mode = GET_MODE (XEXP (use, 0));
7304 regno = REGNO (XEXP (use, 0));
7305
7306 /* Is it a floating point register? */
7307 if (regno >= 32 && regno <= 39)
7308 {
7309 /* Copy the FP register into an integer register via memory. */
7310 if (arg_mode == SFmode)
7311 {
7312 xoperands[0] = XEXP (use, 0);
7313 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7314 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7315 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7316 }
7317 else
7318 {
7319 xoperands[0] = XEXP (use, 0);
7320 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7321 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7322 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7323 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7324 }
7325 }
7326 }
7327 }
7328
7329 /* Compute length of the FP argument copy sequence for INSN. */
7330 static int
7331 length_fp_args (rtx insn)
7332 {
7333 int length = 0;
7334 rtx link;
7335
7336 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7337 {
7338 int arg_mode, regno;
7339 rtx use = XEXP (link, 0);
7340
7341 if (! (GET_CODE (use) == USE
7342 && GET_CODE (XEXP (use, 0)) == REG
7343 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7344 continue;
7345
7346 arg_mode = GET_MODE (XEXP (use, 0));
7347 regno = REGNO (XEXP (use, 0));
7348
7349 /* Is it a floating point register? */
7350 if (regno >= 32 && regno <= 39)
7351 {
7352 if (arg_mode == SFmode)
7353 length += 8;
7354 else
7355 length += 12;
7356 }
7357 }
7358
7359 return length;
7360 }
7361
7362 /* Return the attribute length for the millicode call instruction INSN.
7363 The length must match the code generated by output_millicode_call.
7364 We include the delay slot in the returned length as it is better to
7365 over estimate the length than to under estimate it. */
7366
7367 int
7368 attr_length_millicode_call (rtx insn)
7369 {
7370 unsigned long distance = -1;
7371 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7372
7373 if (INSN_ADDRESSES_SET_P ())
7374 {
7375 distance = (total + insn_current_reference_address (insn));
7376 if (distance < total)
7377 distance = -1;
7378 }
7379
7380 if (TARGET_64BIT)
7381 {
7382 if (!TARGET_LONG_CALLS && distance < 7600000)
7383 return 8;
7384
7385 return 20;
7386 }
7387 else if (TARGET_PORTABLE_RUNTIME)
7388 return 24;
7389 else
7390 {
7391 if (!TARGET_LONG_CALLS && distance < 240000)
7392 return 8;
7393
7394 if (TARGET_LONG_ABS_CALL && !flag_pic)
7395 return 12;
7396
7397 return 24;
7398 }
7399 }
7400
7401 /* INSN is a function call. It may have an unconditional jump
7402 in its delay slot.
7403
7404 CALL_DEST is the routine we are calling. */
7405
7406 const char *
7407 output_millicode_call (rtx insn, rtx call_dest)
7408 {
7409 int attr_length = get_attr_length (insn);
7410 int seq_length = dbr_sequence_length ();
7411 int distance;
7412 rtx seq_insn;
7413 rtx xoperands[3];
7414
7415 xoperands[0] = call_dest;
7416 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7417
7418 /* Handle the common case where we are sure that the branch will
7419 reach the beginning of the $CODE$ subspace. The within reach
7420 form of the $$sh_func_adrs call has a length of 28. Because
7421 it has an attribute type of multi, it never has a nonzero
7422 sequence length. The length of the $$sh_func_adrs is the same
7423 as certain out of reach PIC calls to other routines. */
7424 if (!TARGET_LONG_CALLS
7425 && ((seq_length == 0
7426 && (attr_length == 12
7427 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7428 || (seq_length != 0 && attr_length == 8)))
7429 {
7430 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7431 }
7432 else
7433 {
7434 if (TARGET_64BIT)
7435 {
7436 /* It might seem that one insn could be saved by accessing
7437 the millicode function using the linkage table. However,
7438 this doesn't work in shared libraries and other dynamically
7439 loaded objects. Using a pc-relative sequence also avoids
7440 problems related to the implicit use of the gp register. */
7441 output_asm_insn ("b,l .+8,%%r1", xoperands);
7442
7443 if (TARGET_GAS)
7444 {
7445 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7446 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7447 }
7448 else
7449 {
7450 xoperands[1] = gen_label_rtx ();
7451 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7452 targetm.asm_out.internal_label (asm_out_file, "L",
7453 CODE_LABEL_NUMBER (xoperands[1]));
7454 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7455 }
7456
7457 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7458 }
7459 else if (TARGET_PORTABLE_RUNTIME)
7460 {
7461 /* Pure portable runtime doesn't allow be/ble; we also don't
7462 have PIC support in the assembler/linker, so this sequence
7463 is needed. */
7464
7465 /* Get the address of our target into %r1. */
7466 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7467 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7468
7469 /* Get our return address into %r31. */
7470 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7471 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7472
7473 /* Jump to our target address in %r1. */
7474 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7475 }
7476 else if (!flag_pic)
7477 {
7478 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7479 if (TARGET_PA_20)
7480 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7481 else
7482 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7483 }
7484 else
7485 {
7486 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7487 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7488
7489 if (TARGET_SOM || !TARGET_GAS)
7490 {
7491 /* The HP assembler can generate relocations for the
7492 difference of two symbols. GAS can do this for a
7493 millicode symbol but not an arbitrary external
7494 symbol when generating SOM output. */
7495 xoperands[1] = gen_label_rtx ();
7496 targetm.asm_out.internal_label (asm_out_file, "L",
7497 CODE_LABEL_NUMBER (xoperands[1]));
7498 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7499 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7500 }
7501 else
7502 {
7503 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7504 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7505 xoperands);
7506 }
7507
7508 /* Jump to our target address in %r1. */
7509 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7510 }
7511 }
7512
7513 if (seq_length == 0)
7514 output_asm_insn ("nop", xoperands);
7515
7516 /* We are done if there isn't a jump in the delay slot. */
7517 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7518 return "";
7519
7520 /* This call has an unconditional jump in its delay slot. */
7521 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7522
7523 /* See if the return address can be adjusted. Use the containing
7524 sequence insn's address. */
7525 if (INSN_ADDRESSES_SET_P ())
7526 {
7527 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7528 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7529 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7530
7531 if (VAL_14_BITS_P (distance))
7532 {
7533 xoperands[1] = gen_label_rtx ();
7534 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7535 targetm.asm_out.internal_label (asm_out_file, "L",
7536 CODE_LABEL_NUMBER (xoperands[1]));
7537 }
7538 else
7539 /* ??? This branch may not reach its target. */
7540 output_asm_insn ("nop\n\tb,n %0", xoperands);
7541 }
7542 else
7543 /* ??? This branch may not reach its target. */
7544 output_asm_insn ("nop\n\tb,n %0", xoperands);
7545
7546 /* Delete the jump. */
7547 SET_INSN_DELETED (NEXT_INSN (insn));
7548
7549 return "";
7550 }
7551
7552 /* Return the attribute length of the call instruction INSN. The SIBCALL
7553 flag indicates whether INSN is a regular call or a sibling call. The
7554 length returned must be longer than the code actually generated by
7555 output_call. Since branch shortening is done before delay branch
7556 sequencing, there is no way to determine whether or not the delay
7557 slot will be filled during branch shortening. Even when the delay
7558 slot is filled, we may have to add a nop if the delay slot contains
7559 a branch that can't reach its target. Thus, we always have to include
7560 the delay slot in the length estimate. This used to be done in
7561 pa_adjust_insn_length but we do it here now as some sequences always
7562 fill the delay slot and we can save four bytes in the estimate for
7563 these sequences. */
7564
7565 int
7566 attr_length_call (rtx insn, int sibcall)
7567 {
7568 int local_call;
7569 rtx call, call_dest;
7570 tree call_decl;
7571 int length = 0;
7572 rtx pat = PATTERN (insn);
7573 unsigned long distance = -1;
7574
7575 gcc_assert (GET_CODE (insn) == CALL_INSN);
7576
7577 if (INSN_ADDRESSES_SET_P ())
7578 {
7579 unsigned long total;
7580
7581 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7582 distance = (total + insn_current_reference_address (insn));
7583 if (distance < total)
7584 distance = -1;
7585 }
7586
7587 gcc_assert (GET_CODE (pat) == PARALLEL);
7588
7589 /* Get the call rtx. */
7590 call = XVECEXP (pat, 0, 0);
7591 if (GET_CODE (call) == SET)
7592 call = SET_SRC (call);
7593
7594 gcc_assert (GET_CODE (call) == CALL);
7595
7596 /* Determine if this is a local call. */
7597 call_dest = XEXP (XEXP (call, 0), 0);
7598 call_decl = SYMBOL_REF_DECL (call_dest);
7599 local_call = call_decl && targetm.binds_local_p (call_decl);
7600
7601 /* pc-relative branch. */
7602 if (!TARGET_LONG_CALLS
7603 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7604 || distance < 240000))
7605 length += 8;
7606
7607 /* 64-bit plabel sequence. */
7608 else if (TARGET_64BIT && !local_call)
7609 length += sibcall ? 28 : 24;
7610
7611 /* non-pic long absolute branch sequence. */
7612 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7613 length += 12;
7614
7615 /* long pc-relative branch sequence. */
7616 else if (TARGET_LONG_PIC_SDIFF_CALL
7617 || (TARGET_GAS && !TARGET_SOM
7618 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7619 {
7620 length += 20;
7621
7622 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7623 length += 8;
7624 }
7625
7626 /* 32-bit plabel sequence. */
7627 else
7628 {
7629 length += 32;
7630
7631 if (TARGET_SOM)
7632 length += length_fp_args (insn);
7633
7634 if (flag_pic)
7635 length += 4;
7636
7637 if (!TARGET_PA_20)
7638 {
7639 if (!sibcall)
7640 length += 8;
7641
7642 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7643 length += 8;
7644 }
7645 }
7646
7647 return length;
7648 }
7649
7650 /* INSN is a function call. It may have an unconditional jump
7651 in its delay slot.
7652
7653 CALL_DEST is the routine we are calling. */
7654
7655 const char *
7656 output_call (rtx insn, rtx call_dest, int sibcall)
7657 {
7658 int delay_insn_deleted = 0;
7659 int delay_slot_filled = 0;
7660 int seq_length = dbr_sequence_length ();
7661 tree call_decl = SYMBOL_REF_DECL (call_dest);
7662 int local_call = call_decl && targetm.binds_local_p (call_decl);
7663 rtx xoperands[2];
7664
7665 xoperands[0] = call_dest;
7666
7667 /* Handle the common case where we're sure that the branch will reach
7668 the beginning of the "$CODE$" subspace. This is the beginning of
7669 the current function if we are in a named section. */
7670 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7671 {
7672 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7673 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7674 }
7675 else
7676 {
7677 if (TARGET_64BIT && !local_call)
7678 {
7679 /* ??? As far as I can tell, the HP linker doesn't support the
7680 long pc-relative sequence described in the 64-bit runtime
7681 architecture. So, we use a slightly longer indirect call. */
7682 xoperands[0] = get_deferred_plabel (call_dest);
7683 xoperands[1] = gen_label_rtx ();
7684
7685 /* If this isn't a sibcall, we put the load of %r27 into the
7686 delay slot. We can't do this in a sibcall as we don't
7687 have a second call-clobbered scratch register available. */
7688 if (seq_length != 0
7689 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7690 && !sibcall)
7691 {
7692 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7693 optimize, 0, NULL);
7694
7695 /* Now delete the delay insn. */
7696 SET_INSN_DELETED (NEXT_INSN (insn));
7697 delay_insn_deleted = 1;
7698 }
7699
7700 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7701 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7702 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7703
7704 if (sibcall)
7705 {
7706 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7707 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7708 output_asm_insn ("bve (%%r1)", xoperands);
7709 }
7710 else
7711 {
7712 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7713 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7714 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7715 delay_slot_filled = 1;
7716 }
7717 }
7718 else
7719 {
7720 int indirect_call = 0;
7721
7722 /* Emit a long call. There are several different sequences
7723 of increasing length and complexity. In most cases,
7724 they don't allow an instruction in the delay slot. */
7725 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7726 && !TARGET_LONG_PIC_SDIFF_CALL
7727 && !(TARGET_GAS && !TARGET_SOM
7728 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7729 && !TARGET_64BIT)
7730 indirect_call = 1;
7731
7732 if (seq_length != 0
7733 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7734 && !sibcall
7735 && (!TARGET_PA_20
7736 || indirect_call
7737 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7738 {
7739 /* A non-jump insn in the delay slot. By definition we can
7740 emit this insn before the call (and in fact before argument
7741 relocating. */
7742 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7743 NULL);
7744
7745 /* Now delete the delay insn. */
7746 SET_INSN_DELETED (NEXT_INSN (insn));
7747 delay_insn_deleted = 1;
7748 }
7749
7750 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7751 {
7752 /* This is the best sequence for making long calls in
7753 non-pic code. Unfortunately, GNU ld doesn't provide
7754 the stub needed for external calls, and GAS's support
7755 for this with the SOM linker is buggy. It is safe
7756 to use this for local calls. */
7757 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7758 if (sibcall)
7759 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7760 else
7761 {
7762 if (TARGET_PA_20)
7763 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7764 xoperands);
7765 else
7766 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7767
7768 output_asm_insn ("copy %%r31,%%r2", xoperands);
7769 delay_slot_filled = 1;
7770 }
7771 }
7772 else
7773 {
7774 if (TARGET_LONG_PIC_SDIFF_CALL)
7775 {
7776 /* The HP assembler and linker can handle relocations
7777 for the difference of two symbols. The HP assembler
7778 recognizes the sequence as a pc-relative call and
7779 the linker provides stubs when needed. */
7780 xoperands[1] = gen_label_rtx ();
7781 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7782 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7783 targetm.asm_out.internal_label (asm_out_file, "L",
7784 CODE_LABEL_NUMBER (xoperands[1]));
7785 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7786 }
7787 else if (TARGET_GAS && !TARGET_SOM
7788 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7789 {
7790 /* GAS currently can't generate the relocations that
7791 are needed for the SOM linker under HP-UX using this
7792 sequence. The GNU linker doesn't generate the stubs
7793 that are needed for external calls on TARGET_ELF32
7794 with this sequence. For now, we have to use a
7795 longer plabel sequence when using GAS. */
7796 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7797 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7798 xoperands);
7799 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7800 xoperands);
7801 }
7802 else
7803 {
7804 /* Emit a long plabel-based call sequence. This is
7805 essentially an inline implementation of $$dyncall.
7806 We don't actually try to call $$dyncall as this is
7807 as difficult as calling the function itself. */
7808 xoperands[0] = get_deferred_plabel (call_dest);
7809 xoperands[1] = gen_label_rtx ();
7810
7811 /* Since the call is indirect, FP arguments in registers
7812 need to be copied to the general registers. Then, the
7813 argument relocation stub will copy them back. */
7814 if (TARGET_SOM)
7815 copy_fp_args (insn);
7816
7817 if (flag_pic)
7818 {
7819 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7820 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7821 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7822 }
7823 else
7824 {
7825 output_asm_insn ("addil LR'%0-$global$,%%r27",
7826 xoperands);
7827 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7828 xoperands);
7829 }
7830
7831 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7832 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7833 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7834 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7835
7836 if (!sibcall && !TARGET_PA_20)
7837 {
7838 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7839 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7840 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7841 else
7842 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7843 }
7844 }
7845
7846 if (TARGET_PA_20)
7847 {
7848 if (sibcall)
7849 output_asm_insn ("bve (%%r1)", xoperands);
7850 else
7851 {
7852 if (indirect_call)
7853 {
7854 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7855 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7856 delay_slot_filled = 1;
7857 }
7858 else
7859 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7860 }
7861 }
7862 else
7863 {
7864 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7865 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7866 xoperands);
7867
7868 if (sibcall)
7869 {
7870 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7871 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7872 else
7873 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7874 }
7875 else
7876 {
7877 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7878 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7879 else
7880 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7881
7882 if (indirect_call)
7883 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7884 else
7885 output_asm_insn ("copy %%r31,%%r2", xoperands);
7886 delay_slot_filled = 1;
7887 }
7888 }
7889 }
7890 }
7891 }
7892
7893 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7894 output_asm_insn ("nop", xoperands);
7895
7896 /* We are done if there isn't a jump in the delay slot. */
7897 if (seq_length == 0
7898 || delay_insn_deleted
7899 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7900 return "";
7901
7902 /* A sibcall should never have a branch in the delay slot. */
7903 gcc_assert (!sibcall);
7904
7905 /* This call has an unconditional jump in its delay slot. */
7906 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7907
7908 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7909 {
7910 /* See if the return address can be adjusted. Use the containing
7911 sequence insn's address. This would break the regular call/return@
7912 relationship assumed by the table based eh unwinder, so only do that
7913 if the call is not possibly throwing. */
7914 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7915 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7916 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7917
7918 if (VAL_14_BITS_P (distance)
7919 && !(can_throw_internal (insn) || can_throw_external (insn)))
7920 {
7921 xoperands[1] = gen_label_rtx ();
7922 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7923 targetm.asm_out.internal_label (asm_out_file, "L",
7924 CODE_LABEL_NUMBER (xoperands[1]));
7925 }
7926 else
7927 output_asm_insn ("nop\n\tb,n %0", xoperands);
7928 }
7929 else
7930 output_asm_insn ("b,n %0", xoperands);
7931
7932 /* Delete the jump. */
7933 SET_INSN_DELETED (NEXT_INSN (insn));
7934
7935 return "";
7936 }
7937
7938 /* Return the attribute length of the indirect call instruction INSN.
7939 The length must match the code generated by output_indirect call.
7940 The returned length includes the delay slot. Currently, the delay
7941 slot of an indirect call sequence is not exposed and it is used by
7942 the sequence itself. */
7943
7944 int
7945 attr_length_indirect_call (rtx insn)
7946 {
7947 unsigned long distance = -1;
7948 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7949
7950 if (INSN_ADDRESSES_SET_P ())
7951 {
7952 distance = (total + insn_current_reference_address (insn));
7953 if (distance < total)
7954 distance = -1;
7955 }
7956
7957 if (TARGET_64BIT)
7958 return 12;
7959
7960 if (TARGET_FAST_INDIRECT_CALLS
7961 || (!TARGET_PORTABLE_RUNTIME
7962 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7963 || distance < 240000)))
7964 return 8;
7965
7966 if (flag_pic)
7967 return 24;
7968
7969 if (TARGET_PORTABLE_RUNTIME)
7970 return 20;
7971
7972 /* Out of reach, can use ble. */
7973 return 12;
7974 }
7975
7976 const char *
7977 output_indirect_call (rtx insn, rtx call_dest)
7978 {
7979 rtx xoperands[1];
7980
7981 if (TARGET_64BIT)
7982 {
7983 xoperands[0] = call_dest;
7984 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7985 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7986 return "";
7987 }
7988
7989 /* First the special case for kernels, level 0 systems, etc. */
7990 if (TARGET_FAST_INDIRECT_CALLS)
7991 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7992
7993 /* Now the normal case -- we can reach $$dyncall directly or
7994 we're sure that we can get there via a long-branch stub.
7995
7996 No need to check target flags as the length uniquely identifies
7997 the remaining cases. */
7998 if (attr_length_indirect_call (insn) == 8)
7999 {
8000 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8001 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8002 variant of the B,L instruction can't be used on the SOM target. */
8003 if (TARGET_PA_20 && !TARGET_SOM)
8004 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8005 else
8006 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8007 }
8008
8009 /* Long millicode call, but we are not generating PIC or portable runtime
8010 code. */
8011 if (attr_length_indirect_call (insn) == 12)
8012 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8013
8014 /* Long millicode call for portable runtime. */
8015 if (attr_length_indirect_call (insn) == 20)
8016 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8017
8018 /* We need a long PIC call to $$dyncall. */
8019 xoperands[0] = NULL_RTX;
8020 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8021 if (TARGET_SOM || !TARGET_GAS)
8022 {
8023 xoperands[0] = gen_label_rtx ();
8024 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
8025 targetm.asm_out.internal_label (asm_out_file, "L",
8026 CODE_LABEL_NUMBER (xoperands[0]));
8027 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8028 }
8029 else
8030 {
8031 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
8032 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8033 xoperands);
8034 }
8035 output_asm_insn ("blr %%r0,%%r2", xoperands);
8036 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
8037 return "";
8038 }
8039
8040 /* Return the total length of the save and restore instructions needed for
8041 the data linkage table pointer (i.e., the PIC register) across the call
8042 instruction INSN. No-return calls do not require a save and restore.
8043 In addition, we may be able to avoid the save and restore for calls
8044 within the same translation unit. */
8045
8046 int
8047 attr_length_save_restore_dltp (rtx insn)
8048 {
8049 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
8050 return 0;
8051
8052 return 8;
8053 }
8054
8055 /* In HPUX 8.0's shared library scheme, special relocations are needed
8056 for function labels if they might be passed to a function
8057 in a shared library (because shared libraries don't live in code
8058 space), and special magic is needed to construct their address. */
8059
8060 void
8061 hppa_encode_label (rtx sym)
8062 {
8063 const char *str = XSTR (sym, 0);
8064 int len = strlen (str) + 1;
8065 char *newstr, *p;
8066
8067 p = newstr = XALLOCAVEC (char, len + 1);
8068 *p++ = '@';
8069 strcpy (p, str);
8070
8071 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8072 }
8073
8074 static void
8075 pa_encode_section_info (tree decl, rtx rtl, int first)
8076 {
8077 int old_referenced = 0;
8078
8079 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8080 old_referenced
8081 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8082
8083 default_encode_section_info (decl, rtl, first);
8084
8085 if (first && TEXT_SPACE_P (decl))
8086 {
8087 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8088 if (TREE_CODE (decl) == FUNCTION_DECL)
8089 hppa_encode_label (XEXP (rtl, 0));
8090 }
8091 else if (old_referenced)
8092 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8093 }
8094
8095 /* This is sort of inverse to pa_encode_section_info. */
8096
8097 static const char *
8098 pa_strip_name_encoding (const char *str)
8099 {
8100 str += (*str == '@');
8101 str += (*str == '*');
8102 return str;
8103 }
8104
8105 /* Returns 1 if OP is a function label involved in a simple addition
8106 with a constant. Used to keep certain patterns from matching
8107 during instruction combination. */
8108 int
8109 is_function_label_plus_const (rtx op)
8110 {
8111 /* Strip off any CONST. */
8112 if (GET_CODE (op) == CONST)
8113 op = XEXP (op, 0);
8114
8115 return (GET_CODE (op) == PLUS
8116 && function_label_operand (XEXP (op, 0), VOIDmode)
8117 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8118 }
8119
8120 /* Output assembly code for a thunk to FUNCTION. */
8121
8122 static void
8123 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8124 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8125 tree function)
8126 {
8127 static unsigned int current_thunk_number;
8128 int val_14 = VAL_14_BITS_P (delta);
8129 unsigned int old_last_address = last_address, nbytes = 0;
8130 char label[16];
8131 rtx xoperands[4];
8132
8133 xoperands[0] = XEXP (DECL_RTL (function), 0);
8134 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8135 xoperands[2] = GEN_INT (delta);
8136
8137 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8138 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8139
8140 /* Output the thunk. We know that the function is in the same
8141 translation unit (i.e., the same space) as the thunk, and that
8142 thunks are output after their method. Thus, we don't need an
8143 external branch to reach the function. With SOM and GAS,
8144 functions and thunks are effectively in different sections.
8145 Thus, we can always use a IA-relative branch and the linker
8146 will add a long branch stub if necessary.
8147
8148 However, we have to be careful when generating PIC code on the
8149 SOM port to ensure that the sequence does not transfer to an
8150 import stub for the target function as this could clobber the
8151 return value saved at SP-24. This would also apply to the
8152 32-bit linux port if the multi-space model is implemented. */
8153 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8154 && !(flag_pic && TREE_PUBLIC (function))
8155 && (TARGET_GAS || last_address < 262132))
8156 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8157 && ((targetm_common.have_named_sections
8158 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8159 /* The GNU 64-bit linker has rather poor stub management.
8160 So, we use a long branch from thunks that aren't in
8161 the same section as the target function. */
8162 && ((!TARGET_64BIT
8163 && (DECL_SECTION_NAME (thunk_fndecl)
8164 != DECL_SECTION_NAME (function)))
8165 || ((DECL_SECTION_NAME (thunk_fndecl)
8166 == DECL_SECTION_NAME (function))
8167 && last_address < 262132)))
8168 || (targetm_common.have_named_sections
8169 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8170 && DECL_SECTION_NAME (function) == NULL
8171 && last_address < 262132)
8172 || (!targetm_common.have_named_sections
8173 && last_address < 262132))))
8174 {
8175 if (!val_14)
8176 output_asm_insn ("addil L'%2,%%r26", xoperands);
8177
8178 output_asm_insn ("b %0", xoperands);
8179
8180 if (val_14)
8181 {
8182 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8183 nbytes += 8;
8184 }
8185 else
8186 {
8187 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8188 nbytes += 12;
8189 }
8190 }
8191 else if (TARGET_64BIT)
8192 {
8193 /* We only have one call-clobbered scratch register, so we can't
8194 make use of the delay slot if delta doesn't fit in 14 bits. */
8195 if (!val_14)
8196 {
8197 output_asm_insn ("addil L'%2,%%r26", xoperands);
8198 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8199 }
8200
8201 output_asm_insn ("b,l .+8,%%r1", xoperands);
8202
8203 if (TARGET_GAS)
8204 {
8205 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8206 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8207 }
8208 else
8209 {
8210 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8211 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8212 }
8213
8214 if (val_14)
8215 {
8216 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8217 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8218 nbytes += 20;
8219 }
8220 else
8221 {
8222 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8223 nbytes += 24;
8224 }
8225 }
8226 else if (TARGET_PORTABLE_RUNTIME)
8227 {
8228 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8229 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8230
8231 if (!val_14)
8232 output_asm_insn ("addil L'%2,%%r26", xoperands);
8233
8234 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8235
8236 if (val_14)
8237 {
8238 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8239 nbytes += 16;
8240 }
8241 else
8242 {
8243 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8244 nbytes += 20;
8245 }
8246 }
8247 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8248 {
8249 /* The function is accessible from outside this module. The only
8250 way to avoid an import stub between the thunk and function is to
8251 call the function directly with an indirect sequence similar to
8252 that used by $$dyncall. This is possible because $$dyncall acts
8253 as the import stub in an indirect call. */
8254 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8255 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8256 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8257 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8258 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8259 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8260 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8261 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8262 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8263
8264 if (!val_14)
8265 {
8266 output_asm_insn ("addil L'%2,%%r26", xoperands);
8267 nbytes += 4;
8268 }
8269
8270 if (TARGET_PA_20)
8271 {
8272 output_asm_insn ("bve (%%r22)", xoperands);
8273 nbytes += 36;
8274 }
8275 else if (TARGET_NO_SPACE_REGS)
8276 {
8277 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8278 nbytes += 36;
8279 }
8280 else
8281 {
8282 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8283 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8284 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8285 nbytes += 44;
8286 }
8287
8288 if (val_14)
8289 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8290 else
8291 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8292 }
8293 else if (flag_pic)
8294 {
8295 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8296
8297 if (TARGET_SOM || !TARGET_GAS)
8298 {
8299 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8300 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8301 }
8302 else
8303 {
8304 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8305 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8306 }
8307
8308 if (!val_14)
8309 output_asm_insn ("addil L'%2,%%r26", xoperands);
8310
8311 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8312
8313 if (val_14)
8314 {
8315 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8316 nbytes += 20;
8317 }
8318 else
8319 {
8320 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8321 nbytes += 24;
8322 }
8323 }
8324 else
8325 {
8326 if (!val_14)
8327 output_asm_insn ("addil L'%2,%%r26", xoperands);
8328
8329 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8330 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8331
8332 if (val_14)
8333 {
8334 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8335 nbytes += 12;
8336 }
8337 else
8338 {
8339 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8340 nbytes += 16;
8341 }
8342 }
8343
8344 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8345
8346 if (TARGET_SOM && TARGET_GAS)
8347 {
8348 /* We done with this subspace except possibly for some additional
8349 debug information. Forget that we are in this subspace to ensure
8350 that the next function is output in its own subspace. */
8351 in_section = NULL;
8352 cfun->machine->in_nsubspa = 2;
8353 }
8354
8355 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8356 {
8357 switch_to_section (data_section);
8358 output_asm_insn (".align 4", xoperands);
8359 ASM_OUTPUT_LABEL (file, label);
8360 output_asm_insn (".word P'%0", xoperands);
8361 }
8362
8363 current_thunk_number++;
8364 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8365 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8366 last_address += nbytes;
8367 if (old_last_address > last_address)
8368 last_address = UINT_MAX;
8369 update_total_code_bytes (nbytes);
8370 }
8371
8372 /* Only direct calls to static functions are allowed to be sibling (tail)
8373 call optimized.
8374
8375 This restriction is necessary because some linker generated stubs will
8376 store return pointers into rp' in some cases which might clobber a
8377 live value already in rp'.
8378
8379 In a sibcall the current function and the target function share stack
8380 space. Thus if the path to the current function and the path to the
8381 target function save a value in rp', they save the value into the
8382 same stack slot, which has undesirable consequences.
8383
8384 Because of the deferred binding nature of shared libraries any function
8385 with external scope could be in a different load module and thus require
8386 rp' to be saved when calling that function. So sibcall optimizations
8387 can only be safe for static function.
8388
8389 Note that GCC never needs return value relocations, so we don't have to
8390 worry about static calls with return value relocations (which require
8391 saving rp').
8392
8393 It is safe to perform a sibcall optimization when the target function
8394 will never return. */
8395 static bool
8396 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8397 {
8398 if (TARGET_PORTABLE_RUNTIME)
8399 return false;
8400
8401 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8402 single subspace mode and the call is not indirect. As far as I know,
8403 there is no operating system support for the multiple subspace mode.
8404 It might be possible to support indirect calls if we didn't use
8405 $$dyncall (see the indirect sequence generated in output_call). */
8406 if (TARGET_ELF32)
8407 return (decl != NULL_TREE);
8408
8409 /* Sibcalls are not ok because the arg pointer register is not a fixed
8410 register. This prevents the sibcall optimization from occurring. In
8411 addition, there are problems with stub placement using GNU ld. This
8412 is because a normal sibcall branch uses a 17-bit relocation while
8413 a regular call branch uses a 22-bit relocation. As a result, more
8414 care needs to be taken in the placement of long-branch stubs. */
8415 if (TARGET_64BIT)
8416 return false;
8417
8418 /* Sibcalls are only ok within a translation unit. */
8419 return (decl && !TREE_PUBLIC (decl));
8420 }
8421
8422 /* ??? Addition is not commutative on the PA due to the weird implicit
8423 space register selection rules for memory addresses. Therefore, we
8424 don't consider a + b == b + a, as this might be inside a MEM. */
8425 static bool
8426 pa_commutative_p (const_rtx x, int outer_code)
8427 {
8428 return (COMMUTATIVE_P (x)
8429 && (TARGET_NO_SPACE_REGS
8430 || (outer_code != UNKNOWN && outer_code != MEM)
8431 || GET_CODE (x) != PLUS));
8432 }
8433
8434 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8435 use in fmpyadd instructions. */
8436 int
8437 fmpyaddoperands (rtx *operands)
8438 {
8439 enum machine_mode mode = GET_MODE (operands[0]);
8440
8441 /* Must be a floating point mode. */
8442 if (mode != SFmode && mode != DFmode)
8443 return 0;
8444
8445 /* All modes must be the same. */
8446 if (! (mode == GET_MODE (operands[1])
8447 && mode == GET_MODE (operands[2])
8448 && mode == GET_MODE (operands[3])
8449 && mode == GET_MODE (operands[4])
8450 && mode == GET_MODE (operands[5])))
8451 return 0;
8452
8453 /* All operands must be registers. */
8454 if (! (GET_CODE (operands[1]) == REG
8455 && GET_CODE (operands[2]) == REG
8456 && GET_CODE (operands[3]) == REG
8457 && GET_CODE (operands[4]) == REG
8458 && GET_CODE (operands[5]) == REG))
8459 return 0;
8460
8461 /* Only 2 real operands to the addition. One of the input operands must
8462 be the same as the output operand. */
8463 if (! rtx_equal_p (operands[3], operands[4])
8464 && ! rtx_equal_p (operands[3], operands[5]))
8465 return 0;
8466
8467 /* Inout operand of add cannot conflict with any operands from multiply. */
8468 if (rtx_equal_p (operands[3], operands[0])
8469 || rtx_equal_p (operands[3], operands[1])
8470 || rtx_equal_p (operands[3], operands[2]))
8471 return 0;
8472
8473 /* multiply cannot feed into addition operands. */
8474 if (rtx_equal_p (operands[4], operands[0])
8475 || rtx_equal_p (operands[5], operands[0]))
8476 return 0;
8477
8478 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8479 if (mode == SFmode
8480 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8481 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8482 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8483 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8484 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8485 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8486 return 0;
8487
8488 /* Passed. Operands are suitable for fmpyadd. */
8489 return 1;
8490 }
8491
8492 #if !defined(USE_COLLECT2)
8493 static void
8494 pa_asm_out_constructor (rtx symbol, int priority)
8495 {
8496 if (!function_label_operand (symbol, VOIDmode))
8497 hppa_encode_label (symbol);
8498
8499 #ifdef CTORS_SECTION_ASM_OP
8500 default_ctor_section_asm_out_constructor (symbol, priority);
8501 #else
8502 # ifdef TARGET_ASM_NAMED_SECTION
8503 default_named_section_asm_out_constructor (symbol, priority);
8504 # else
8505 default_stabs_asm_out_constructor (symbol, priority);
8506 # endif
8507 #endif
8508 }
8509
8510 static void
8511 pa_asm_out_destructor (rtx symbol, int priority)
8512 {
8513 if (!function_label_operand (symbol, VOIDmode))
8514 hppa_encode_label (symbol);
8515
8516 #ifdef DTORS_SECTION_ASM_OP
8517 default_dtor_section_asm_out_destructor (symbol, priority);
8518 #else
8519 # ifdef TARGET_ASM_NAMED_SECTION
8520 default_named_section_asm_out_destructor (symbol, priority);
8521 # else
8522 default_stabs_asm_out_destructor (symbol, priority);
8523 # endif
8524 #endif
8525 }
8526 #endif
8527
8528 /* This function places uninitialized global data in the bss section.
8529 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8530 function on the SOM port to prevent uninitialized global data from
8531 being placed in the data section. */
8532
8533 void
8534 pa_asm_output_aligned_bss (FILE *stream,
8535 const char *name,
8536 unsigned HOST_WIDE_INT size,
8537 unsigned int align)
8538 {
8539 switch_to_section (bss_section);
8540 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8541
8542 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8543 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8544 #endif
8545
8546 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8547 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8548 #endif
8549
8550 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8551 ASM_OUTPUT_LABEL (stream, name);
8552 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8553 }
8554
8555 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8556 that doesn't allow the alignment of global common storage to be directly
8557 specified. The SOM linker aligns common storage based on the rounded
8558 value of the NUM_BYTES parameter in the .comm directive. It's not
8559 possible to use the .align directive as it doesn't affect the alignment
8560 of the label associated with a .comm directive. */
8561
8562 void
8563 pa_asm_output_aligned_common (FILE *stream,
8564 const char *name,
8565 unsigned HOST_WIDE_INT size,
8566 unsigned int align)
8567 {
8568 unsigned int max_common_align;
8569
8570 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8571 if (align > max_common_align)
8572 {
8573 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8574 "for global common data. Using %u",
8575 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8576 align = max_common_align;
8577 }
8578
8579 switch_to_section (bss_section);
8580
8581 assemble_name (stream, name);
8582 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8583 MAX (size, align / BITS_PER_UNIT));
8584 }
8585
8586 /* We can't use .comm for local common storage as the SOM linker effectively
8587 treats the symbol as universal and uses the same storage for local symbols
8588 with the same name in different object files. The .block directive
8589 reserves an uninitialized block of storage. However, it's not common
8590 storage. Fortunately, GCC never requests common storage with the same
8591 name in any given translation unit. */
8592
8593 void
8594 pa_asm_output_aligned_local (FILE *stream,
8595 const char *name,
8596 unsigned HOST_WIDE_INT size,
8597 unsigned int align)
8598 {
8599 switch_to_section (bss_section);
8600 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8601
8602 #ifdef LOCAL_ASM_OP
8603 fprintf (stream, "%s", LOCAL_ASM_OP);
8604 assemble_name (stream, name);
8605 fprintf (stream, "\n");
8606 #endif
8607
8608 ASM_OUTPUT_LABEL (stream, name);
8609 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8610 }
8611
8612 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8613 use in fmpysub instructions. */
8614 int
8615 fmpysuboperands (rtx *operands)
8616 {
8617 enum machine_mode mode = GET_MODE (operands[0]);
8618
8619 /* Must be a floating point mode. */
8620 if (mode != SFmode && mode != DFmode)
8621 return 0;
8622
8623 /* All modes must be the same. */
8624 if (! (mode == GET_MODE (operands[1])
8625 && mode == GET_MODE (operands[2])
8626 && mode == GET_MODE (operands[3])
8627 && mode == GET_MODE (operands[4])
8628 && mode == GET_MODE (operands[5])))
8629 return 0;
8630
8631 /* All operands must be registers. */
8632 if (! (GET_CODE (operands[1]) == REG
8633 && GET_CODE (operands[2]) == REG
8634 && GET_CODE (operands[3]) == REG
8635 && GET_CODE (operands[4]) == REG
8636 && GET_CODE (operands[5]) == REG))
8637 return 0;
8638
8639 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8640 operation, so operands[4] must be the same as operand[3]. */
8641 if (! rtx_equal_p (operands[3], operands[4]))
8642 return 0;
8643
8644 /* multiply cannot feed into subtraction. */
8645 if (rtx_equal_p (operands[5], operands[0]))
8646 return 0;
8647
8648 /* Inout operand of sub cannot conflict with any operands from multiply. */
8649 if (rtx_equal_p (operands[3], operands[0])
8650 || rtx_equal_p (operands[3], operands[1])
8651 || rtx_equal_p (operands[3], operands[2]))
8652 return 0;
8653
8654 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8655 if (mode == SFmode
8656 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8657 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8658 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8659 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8660 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8661 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8662 return 0;
8663
8664 /* Passed. Operands are suitable for fmpysub. */
8665 return 1;
8666 }
8667
8668 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8669 constants for shadd instructions. */
8670 int
8671 shadd_constant_p (int val)
8672 {
8673 if (val == 2 || val == 4 || val == 8)
8674 return 1;
8675 else
8676 return 0;
8677 }
8678
8679 /* Return TRUE if INSN branches forward. */
8680
8681 static bool
8682 forward_branch_p (rtx insn)
8683 {
8684 rtx lab = JUMP_LABEL (insn);
8685
8686 /* The INSN must have a jump label. */
8687 gcc_assert (lab != NULL_RTX);
8688
8689 if (INSN_ADDRESSES_SET_P ())
8690 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8691
8692 while (insn)
8693 {
8694 if (insn == lab)
8695 return true;
8696 else
8697 insn = NEXT_INSN (insn);
8698 }
8699
8700 return false;
8701 }
8702
8703 /* Return 1 if INSN is in the delay slot of a call instruction. */
8704 int
8705 jump_in_call_delay (rtx insn)
8706 {
8707
8708 if (GET_CODE (insn) != JUMP_INSN)
8709 return 0;
8710
8711 if (PREV_INSN (insn)
8712 && PREV_INSN (PREV_INSN (insn))
8713 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8714 {
8715 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8716
8717 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8718 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8719
8720 }
8721 else
8722 return 0;
8723 }
8724
8725 /* Output an unconditional move and branch insn. */
8726
8727 const char *
8728 output_parallel_movb (rtx *operands, rtx insn)
8729 {
8730 int length = get_attr_length (insn);
8731
8732 /* These are the cases in which we win. */
8733 if (length == 4)
8734 return "mov%I1b,tr %1,%0,%2";
8735
8736 /* None of the following cases win, but they don't lose either. */
8737 if (length == 8)
8738 {
8739 if (dbr_sequence_length () == 0)
8740 {
8741 /* Nothing in the delay slot, fake it by putting the combined
8742 insn (the copy or add) in the delay slot of a bl. */
8743 if (GET_CODE (operands[1]) == CONST_INT)
8744 return "b %2\n\tldi %1,%0";
8745 else
8746 return "b %2\n\tcopy %1,%0";
8747 }
8748 else
8749 {
8750 /* Something in the delay slot, but we've got a long branch. */
8751 if (GET_CODE (operands[1]) == CONST_INT)
8752 return "ldi %1,%0\n\tb %2";
8753 else
8754 return "copy %1,%0\n\tb %2";
8755 }
8756 }
8757
8758 if (GET_CODE (operands[1]) == CONST_INT)
8759 output_asm_insn ("ldi %1,%0", operands);
8760 else
8761 output_asm_insn ("copy %1,%0", operands);
8762 return output_lbranch (operands[2], insn, 1);
8763 }
8764
8765 /* Output an unconditional add and branch insn. */
8766
8767 const char *
8768 output_parallel_addb (rtx *operands, rtx insn)
8769 {
8770 int length = get_attr_length (insn);
8771
8772 /* To make life easy we want operand0 to be the shared input/output
8773 operand and operand1 to be the readonly operand. */
8774 if (operands[0] == operands[1])
8775 operands[1] = operands[2];
8776
8777 /* These are the cases in which we win. */
8778 if (length == 4)
8779 return "add%I1b,tr %1,%0,%3";
8780
8781 /* None of the following cases win, but they don't lose either. */
8782 if (length == 8)
8783 {
8784 if (dbr_sequence_length () == 0)
8785 /* Nothing in the delay slot, fake it by putting the combined
8786 insn (the copy or add) in the delay slot of a bl. */
8787 return "b %3\n\tadd%I1 %1,%0,%0";
8788 else
8789 /* Something in the delay slot, but we've got a long branch. */
8790 return "add%I1 %1,%0,%0\n\tb %3";
8791 }
8792
8793 output_asm_insn ("add%I1 %1,%0,%0", operands);
8794 return output_lbranch (operands[3], insn, 1);
8795 }
8796
8797 /* Return nonzero if INSN (a jump insn) immediately follows a call
8798 to a named function. This is used to avoid filling the delay slot
8799 of the jump since it can usually be eliminated by modifying RP in
8800 the delay slot of the call. */
8801
8802 int
8803 following_call (rtx insn)
8804 {
8805 if (! TARGET_JUMP_IN_DELAY)
8806 return 0;
8807
8808 /* Find the previous real insn, skipping NOTEs. */
8809 insn = PREV_INSN (insn);
8810 while (insn && GET_CODE (insn) == NOTE)
8811 insn = PREV_INSN (insn);
8812
8813 /* Check for CALL_INSNs and millicode calls. */
8814 if (insn
8815 && ((GET_CODE (insn) == CALL_INSN
8816 && get_attr_type (insn) != TYPE_DYNCALL)
8817 || (GET_CODE (insn) == INSN
8818 && GET_CODE (PATTERN (insn)) != SEQUENCE
8819 && GET_CODE (PATTERN (insn)) != USE
8820 && GET_CODE (PATTERN (insn)) != CLOBBER
8821 && get_attr_type (insn) == TYPE_MILLI)))
8822 return 1;
8823
8824 return 0;
8825 }
8826
8827 /* We use this hook to perform a PA specific optimization which is difficult
8828 to do in earlier passes.
8829
8830 We want the delay slots of branches within jump tables to be filled.
8831 None of the compiler passes at the moment even has the notion that a
8832 PA jump table doesn't contain addresses, but instead contains actual
8833 instructions!
8834
8835 Because we actually jump into the table, the addresses of each entry
8836 must stay constant in relation to the beginning of the table (which
8837 itself must stay constant relative to the instruction to jump into
8838 it). I don't believe we can guarantee earlier passes of the compiler
8839 will adhere to those rules.
8840
8841 So, late in the compilation process we find all the jump tables, and
8842 expand them into real code -- e.g. each entry in the jump table vector
8843 will get an appropriate label followed by a jump to the final target.
8844
8845 Reorg and the final jump pass can then optimize these branches and
8846 fill their delay slots. We end up with smaller, more efficient code.
8847
8848 The jump instructions within the table are special; we must be able
8849 to identify them during assembly output (if the jumps don't get filled
8850 we need to emit a nop rather than nullifying the delay slot)). We
8851 identify jumps in switch tables by using insns with the attribute
8852 type TYPE_BTABLE_BRANCH.
8853
8854 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8855 insns. This serves two purposes, first it prevents jump.c from
8856 noticing that the last N entries in the table jump to the instruction
8857 immediately after the table and deleting the jumps. Second, those
8858 insns mark where we should emit .begin_brtab and .end_brtab directives
8859 when using GAS (allows for better link time optimizations). */
8860
8861 static void
8862 pa_reorg (void)
8863 {
8864 rtx insn;
8865
8866 remove_useless_addtr_insns (1);
8867
8868 if (pa_cpu < PROCESSOR_8000)
8869 pa_combine_instructions ();
8870
8871
8872 /* This is fairly cheap, so always run it if optimizing. */
8873 if (optimize > 0 && !TARGET_BIG_SWITCH)
8874 {
8875 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8876 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8877 {
8878 rtx pattern, tmp, location, label;
8879 unsigned int length, i;
8880
8881 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8882 if (GET_CODE (insn) != JUMP_INSN
8883 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8884 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8885 continue;
8886
8887 /* Emit marker for the beginning of the branch table. */
8888 emit_insn_before (gen_begin_brtab (), insn);
8889
8890 pattern = PATTERN (insn);
8891 location = PREV_INSN (insn);
8892 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8893
8894 for (i = 0; i < length; i++)
8895 {
8896 /* Emit a label before each jump to keep jump.c from
8897 removing this code. */
8898 tmp = gen_label_rtx ();
8899 LABEL_NUSES (tmp) = 1;
8900 emit_label_after (tmp, location);
8901 location = NEXT_INSN (location);
8902
8903 if (GET_CODE (pattern) == ADDR_VEC)
8904 label = XEXP (XVECEXP (pattern, 0, i), 0);
8905 else
8906 label = XEXP (XVECEXP (pattern, 1, i), 0);
8907
8908 tmp = gen_short_jump (label);
8909
8910 /* Emit the jump itself. */
8911 tmp = emit_jump_insn_after (tmp, location);
8912 JUMP_LABEL (tmp) = label;
8913 LABEL_NUSES (label)++;
8914 location = NEXT_INSN (location);
8915
8916 /* Emit a BARRIER after the jump. */
8917 emit_barrier_after (location);
8918 location = NEXT_INSN (location);
8919 }
8920
8921 /* Emit marker for the end of the branch table. */
8922 emit_insn_before (gen_end_brtab (), location);
8923 location = NEXT_INSN (location);
8924 emit_barrier_after (location);
8925
8926 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8927 delete_insn (insn);
8928 }
8929 }
8930 else
8931 {
8932 /* Still need brtab marker insns. FIXME: the presence of these
8933 markers disables output of the branch table to readonly memory,
8934 and any alignment directives that might be needed. Possibly,
8935 the begin_brtab insn should be output before the label for the
8936 table. This doesn't matter at the moment since the tables are
8937 always output in the text section. */
8938 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8939 {
8940 /* Find an ADDR_VEC insn. */
8941 if (GET_CODE (insn) != JUMP_INSN
8942 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8943 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8944 continue;
8945
8946 /* Now generate markers for the beginning and end of the
8947 branch table. */
8948 emit_insn_before (gen_begin_brtab (), insn);
8949 emit_insn_after (gen_end_brtab (), insn);
8950 }
8951 }
8952 }
8953
8954 /* The PA has a number of odd instructions which can perform multiple
8955 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8956 it may be profitable to combine two instructions into one instruction
8957 with two outputs. It's not profitable PA2.0 machines because the
8958 two outputs would take two slots in the reorder buffers.
8959
8960 This routine finds instructions which can be combined and combines
8961 them. We only support some of the potential combinations, and we
8962 only try common ways to find suitable instructions.
8963
8964 * addb can add two registers or a register and a small integer
8965 and jump to a nearby (+-8k) location. Normally the jump to the
8966 nearby location is conditional on the result of the add, but by
8967 using the "true" condition we can make the jump unconditional.
8968 Thus addb can perform two independent operations in one insn.
8969
8970 * movb is similar to addb in that it can perform a reg->reg
8971 or small immediate->reg copy and jump to a nearby (+-8k location).
8972
8973 * fmpyadd and fmpysub can perform a FP multiply and either an
8974 FP add or FP sub if the operands of the multiply and add/sub are
8975 independent (there are other minor restrictions). Note both
8976 the fmpy and fadd/fsub can in theory move to better spots according
8977 to data dependencies, but for now we require the fmpy stay at a
8978 fixed location.
8979
8980 * Many of the memory operations can perform pre & post updates
8981 of index registers. GCC's pre/post increment/decrement addressing
8982 is far too simple to take advantage of all the possibilities. This
8983 pass may not be suitable since those insns may not be independent.
8984
8985 * comclr can compare two ints or an int and a register, nullify
8986 the following instruction and zero some other register. This
8987 is more difficult to use as it's harder to find an insn which
8988 will generate a comclr than finding something like an unconditional
8989 branch. (conditional moves & long branches create comclr insns).
8990
8991 * Most arithmetic operations can conditionally skip the next
8992 instruction. They can be viewed as "perform this operation
8993 and conditionally jump to this nearby location" (where nearby
8994 is an insns away). These are difficult to use due to the
8995 branch length restrictions. */
8996
8997 static void
8998 pa_combine_instructions (void)
8999 {
9000 rtx anchor, new_rtx;
9001
9002 /* This can get expensive since the basic algorithm is on the
9003 order of O(n^2) (or worse). Only do it for -O2 or higher
9004 levels of optimization. */
9005 if (optimize < 2)
9006 return;
9007
9008 /* Walk down the list of insns looking for "anchor" insns which
9009 may be combined with "floating" insns. As the name implies,
9010 "anchor" instructions don't move, while "floating" insns may
9011 move around. */
9012 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9013 new_rtx = make_insn_raw (new_rtx);
9014
9015 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9016 {
9017 enum attr_pa_combine_type anchor_attr;
9018 enum attr_pa_combine_type floater_attr;
9019
9020 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9021 Also ignore any special USE insns. */
9022 if ((GET_CODE (anchor) != INSN
9023 && GET_CODE (anchor) != JUMP_INSN
9024 && GET_CODE (anchor) != CALL_INSN)
9025 || GET_CODE (PATTERN (anchor)) == USE
9026 || GET_CODE (PATTERN (anchor)) == CLOBBER
9027 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9028 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9029 continue;
9030
9031 anchor_attr = get_attr_pa_combine_type (anchor);
9032 /* See if anchor is an insn suitable for combination. */
9033 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9034 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9035 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9036 && ! forward_branch_p (anchor)))
9037 {
9038 rtx floater;
9039
9040 for (floater = PREV_INSN (anchor);
9041 floater;
9042 floater = PREV_INSN (floater))
9043 {
9044 if (GET_CODE (floater) == NOTE
9045 || (GET_CODE (floater) == INSN
9046 && (GET_CODE (PATTERN (floater)) == USE
9047 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9048 continue;
9049
9050 /* Anything except a regular INSN will stop our search. */
9051 if (GET_CODE (floater) != INSN
9052 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9053 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9054 {
9055 floater = NULL_RTX;
9056 break;
9057 }
9058
9059 /* See if FLOATER is suitable for combination with the
9060 anchor. */
9061 floater_attr = get_attr_pa_combine_type (floater);
9062 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9063 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9064 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9065 && floater_attr == PA_COMBINE_TYPE_FMPY))
9066 {
9067 /* If ANCHOR and FLOATER can be combined, then we're
9068 done with this pass. */
9069 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9070 SET_DEST (PATTERN (floater)),
9071 XEXP (SET_SRC (PATTERN (floater)), 0),
9072 XEXP (SET_SRC (PATTERN (floater)), 1)))
9073 break;
9074 }
9075
9076 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9077 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9078 {
9079 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9080 {
9081 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9082 SET_DEST (PATTERN (floater)),
9083 XEXP (SET_SRC (PATTERN (floater)), 0),
9084 XEXP (SET_SRC (PATTERN (floater)), 1)))
9085 break;
9086 }
9087 else
9088 {
9089 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9090 SET_DEST (PATTERN (floater)),
9091 SET_SRC (PATTERN (floater)),
9092 SET_SRC (PATTERN (floater))))
9093 break;
9094 }
9095 }
9096 }
9097
9098 /* If we didn't find anything on the backwards scan try forwards. */
9099 if (!floater
9100 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9101 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9102 {
9103 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9104 {
9105 if (GET_CODE (floater) == NOTE
9106 || (GET_CODE (floater) == INSN
9107 && (GET_CODE (PATTERN (floater)) == USE
9108 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9109
9110 continue;
9111
9112 /* Anything except a regular INSN will stop our search. */
9113 if (GET_CODE (floater) != INSN
9114 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9115 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9116 {
9117 floater = NULL_RTX;
9118 break;
9119 }
9120
9121 /* See if FLOATER is suitable for combination with the
9122 anchor. */
9123 floater_attr = get_attr_pa_combine_type (floater);
9124 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9125 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9126 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9127 && floater_attr == PA_COMBINE_TYPE_FMPY))
9128 {
9129 /* If ANCHOR and FLOATER can be combined, then we're
9130 done with this pass. */
9131 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9132 SET_DEST (PATTERN (floater)),
9133 XEXP (SET_SRC (PATTERN (floater)),
9134 0),
9135 XEXP (SET_SRC (PATTERN (floater)),
9136 1)))
9137 break;
9138 }
9139 }
9140 }
9141
9142 /* FLOATER will be nonzero if we found a suitable floating
9143 insn for combination with ANCHOR. */
9144 if (floater
9145 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9146 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9147 {
9148 /* Emit the new instruction and delete the old anchor. */
9149 emit_insn_before (gen_rtx_PARALLEL
9150 (VOIDmode,
9151 gen_rtvec (2, PATTERN (anchor),
9152 PATTERN (floater))),
9153 anchor);
9154
9155 SET_INSN_DELETED (anchor);
9156
9157 /* Emit a special USE insn for FLOATER, then delete
9158 the floating insn. */
9159 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9160 delete_insn (floater);
9161
9162 continue;
9163 }
9164 else if (floater
9165 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9166 {
9167 rtx temp;
9168 /* Emit the new_jump instruction and delete the old anchor. */
9169 temp
9170 = emit_jump_insn_before (gen_rtx_PARALLEL
9171 (VOIDmode,
9172 gen_rtvec (2, PATTERN (anchor),
9173 PATTERN (floater))),
9174 anchor);
9175
9176 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9177 SET_INSN_DELETED (anchor);
9178
9179 /* Emit a special USE insn for FLOATER, then delete
9180 the floating insn. */
9181 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9182 delete_insn (floater);
9183 continue;
9184 }
9185 }
9186 }
9187 }
9188
9189 static int
9190 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9191 rtx src1, rtx src2)
9192 {
9193 int insn_code_number;
9194 rtx start, end;
9195
9196 /* Create a PARALLEL with the patterns of ANCHOR and
9197 FLOATER, try to recognize it, then test constraints
9198 for the resulting pattern.
9199
9200 If the pattern doesn't match or the constraints
9201 aren't met keep searching for a suitable floater
9202 insn. */
9203 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9204 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9205 INSN_CODE (new_rtx) = -1;
9206 insn_code_number = recog_memoized (new_rtx);
9207 if (insn_code_number < 0
9208 || (extract_insn (new_rtx), ! constrain_operands (1)))
9209 return 0;
9210
9211 if (reversed)
9212 {
9213 start = anchor;
9214 end = floater;
9215 }
9216 else
9217 {
9218 start = floater;
9219 end = anchor;
9220 }
9221
9222 /* There's up to three operands to consider. One
9223 output and two inputs.
9224
9225 The output must not be used between FLOATER & ANCHOR
9226 exclusive. The inputs must not be set between
9227 FLOATER and ANCHOR exclusive. */
9228
9229 if (reg_used_between_p (dest, start, end))
9230 return 0;
9231
9232 if (reg_set_between_p (src1, start, end))
9233 return 0;
9234
9235 if (reg_set_between_p (src2, start, end))
9236 return 0;
9237
9238 /* If we get here, then everything is good. */
9239 return 1;
9240 }
9241
9242 /* Return nonzero if references for INSN are delayed.
9243
9244 Millicode insns are actually function calls with some special
9245 constraints on arguments and register usage.
9246
9247 Millicode calls always expect their arguments in the integer argument
9248 registers, and always return their result in %r29 (ret1). They
9249 are expected to clobber their arguments, %r1, %r29, and the return
9250 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9251
9252 This function tells reorg that the references to arguments and
9253 millicode calls do not appear to happen until after the millicode call.
9254 This allows reorg to put insns which set the argument registers into the
9255 delay slot of the millicode call -- thus they act more like traditional
9256 CALL_INSNs.
9257
9258 Note we cannot consider side effects of the insn to be delayed because
9259 the branch and link insn will clobber the return pointer. If we happened
9260 to use the return pointer in the delay slot of the call, then we lose.
9261
9262 get_attr_type will try to recognize the given insn, so make sure to
9263 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9264 in particular. */
9265 int
9266 insn_refs_are_delayed (rtx insn)
9267 {
9268 return ((GET_CODE (insn) == INSN
9269 && GET_CODE (PATTERN (insn)) != SEQUENCE
9270 && GET_CODE (PATTERN (insn)) != USE
9271 && GET_CODE (PATTERN (insn)) != CLOBBER
9272 && get_attr_type (insn) == TYPE_MILLI));
9273 }
9274
9275 /* Promote the return value, but not the arguments. */
9276
9277 static enum machine_mode
9278 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9279 enum machine_mode mode,
9280 int *punsignedp ATTRIBUTE_UNUSED,
9281 const_tree fntype ATTRIBUTE_UNUSED,
9282 int for_return)
9283 {
9284 if (for_return == 0)
9285 return mode;
9286 return promote_mode (type, mode, punsignedp);
9287 }
9288
9289 /* On the HP-PA the value is found in register(s) 28(-29), unless
9290 the mode is SF or DF. Then the value is returned in fr4 (32).
9291
9292 This must perform the same promotions as PROMOTE_MODE, else promoting
9293 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9294
9295 Small structures must be returned in a PARALLEL on PA64 in order
9296 to match the HP Compiler ABI. */
9297
9298 static rtx
9299 pa_function_value (const_tree valtype,
9300 const_tree func ATTRIBUTE_UNUSED,
9301 bool outgoing ATTRIBUTE_UNUSED)
9302 {
9303 enum machine_mode valmode;
9304
9305 if (AGGREGATE_TYPE_P (valtype)
9306 || TREE_CODE (valtype) == COMPLEX_TYPE
9307 || TREE_CODE (valtype) == VECTOR_TYPE)
9308 {
9309 if (TARGET_64BIT)
9310 {
9311 /* Aggregates with a size less than or equal to 128 bits are
9312 returned in GR 28(-29). They are left justified. The pad
9313 bits are undefined. Larger aggregates are returned in
9314 memory. */
9315 rtx loc[2];
9316 int i, offset = 0;
9317 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9318
9319 for (i = 0; i < ub; i++)
9320 {
9321 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9322 gen_rtx_REG (DImode, 28 + i),
9323 GEN_INT (offset));
9324 offset += 8;
9325 }
9326
9327 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9328 }
9329 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9330 {
9331 /* Aggregates 5 to 8 bytes in size are returned in general
9332 registers r28-r29 in the same manner as other non
9333 floating-point objects. The data is right-justified and
9334 zero-extended to 64 bits. This is opposite to the normal
9335 justification used on big endian targets and requires
9336 special treatment. */
9337 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9338 gen_rtx_REG (DImode, 28), const0_rtx);
9339 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9340 }
9341 }
9342
9343 if ((INTEGRAL_TYPE_P (valtype)
9344 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9345 || POINTER_TYPE_P (valtype))
9346 valmode = word_mode;
9347 else
9348 valmode = TYPE_MODE (valtype);
9349
9350 if (TREE_CODE (valtype) == REAL_TYPE
9351 && !AGGREGATE_TYPE_P (valtype)
9352 && TYPE_MODE (valtype) != TFmode
9353 && !TARGET_SOFT_FLOAT)
9354 return gen_rtx_REG (valmode, 32);
9355
9356 return gen_rtx_REG (valmode, 28);
9357 }
9358
9359 /* Implement the TARGET_LIBCALL_VALUE hook. */
9360
9361 static rtx
9362 pa_libcall_value (enum machine_mode mode,
9363 const_rtx fun ATTRIBUTE_UNUSED)
9364 {
9365 if (! TARGET_SOFT_FLOAT
9366 && (mode == SFmode || mode == DFmode))
9367 return gen_rtx_REG (mode, 32);
9368 else
9369 return gen_rtx_REG (mode, 28);
9370 }
9371
9372 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9373
9374 static bool
9375 pa_function_value_regno_p (const unsigned int regno)
9376 {
9377 if (regno == 28
9378 || (! TARGET_SOFT_FLOAT && regno == 32))
9379 return true;
9380
9381 return false;
9382 }
9383
9384 /* Update the data in CUM to advance over an argument
9385 of mode MODE and data type TYPE.
9386 (TYPE is null for libcalls where that information may not be available.) */
9387
9388 static void
9389 pa_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
9390 const_tree type, bool named ATTRIBUTE_UNUSED)
9391 {
9392 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9393 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9394
9395 cum->nargs_prototype--;
9396 cum->words += (arg_size
9397 + ((cum->words & 01)
9398 && type != NULL_TREE
9399 && arg_size > 1));
9400 }
9401
9402 /* Return the location of a parameter that is passed in a register or NULL
9403 if the parameter has any component that is passed in memory.
9404
9405 This is new code and will be pushed to into the net sources after
9406 further testing.
9407
9408 ??? We might want to restructure this so that it looks more like other
9409 ports. */
9410 static rtx
9411 pa_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9412 const_tree type, bool named ATTRIBUTE_UNUSED)
9413 {
9414 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9415 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9416 int alignment = 0;
9417 int arg_size;
9418 int fpr_reg_base;
9419 int gpr_reg_base;
9420 rtx retval;
9421
9422 if (mode == VOIDmode)
9423 return NULL_RTX;
9424
9425 arg_size = FUNCTION_ARG_SIZE (mode, type);
9426
9427 /* If this arg would be passed partially or totally on the stack, then
9428 this routine should return zero. pa_arg_partial_bytes will
9429 handle arguments which are split between regs and stack slots if
9430 the ABI mandates split arguments. */
9431 if (!TARGET_64BIT)
9432 {
9433 /* The 32-bit ABI does not split arguments. */
9434 if (cum->words + arg_size > max_arg_words)
9435 return NULL_RTX;
9436 }
9437 else
9438 {
9439 if (arg_size > 1)
9440 alignment = cum->words & 1;
9441 if (cum->words + alignment >= max_arg_words)
9442 return NULL_RTX;
9443 }
9444
9445 /* The 32bit ABIs and the 64bit ABIs are rather different,
9446 particularly in their handling of FP registers. We might
9447 be able to cleverly share code between them, but I'm not
9448 going to bother in the hope that splitting them up results
9449 in code that is more easily understood. */
9450
9451 if (TARGET_64BIT)
9452 {
9453 /* Advance the base registers to their current locations.
9454
9455 Remember, gprs grow towards smaller register numbers while
9456 fprs grow to higher register numbers. Also remember that
9457 although FP regs are 32-bit addressable, we pretend that
9458 the registers are 64-bits wide. */
9459 gpr_reg_base = 26 - cum->words;
9460 fpr_reg_base = 32 + cum->words;
9461
9462 /* Arguments wider than one word and small aggregates need special
9463 treatment. */
9464 if (arg_size > 1
9465 || mode == BLKmode
9466 || (type && (AGGREGATE_TYPE_P (type)
9467 || TREE_CODE (type) == COMPLEX_TYPE
9468 || TREE_CODE (type) == VECTOR_TYPE)))
9469 {
9470 /* Double-extended precision (80-bit), quad-precision (128-bit)
9471 and aggregates including complex numbers are aligned on
9472 128-bit boundaries. The first eight 64-bit argument slots
9473 are associated one-to-one, with general registers r26
9474 through r19, and also with floating-point registers fr4
9475 through fr11. Arguments larger than one word are always
9476 passed in general registers.
9477
9478 Using a PARALLEL with a word mode register results in left
9479 justified data on a big-endian target. */
9480
9481 rtx loc[8];
9482 int i, offset = 0, ub = arg_size;
9483
9484 /* Align the base register. */
9485 gpr_reg_base -= alignment;
9486
9487 ub = MIN (ub, max_arg_words - cum->words - alignment);
9488 for (i = 0; i < ub; i++)
9489 {
9490 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9491 gen_rtx_REG (DImode, gpr_reg_base),
9492 GEN_INT (offset));
9493 gpr_reg_base -= 1;
9494 offset += 8;
9495 }
9496
9497 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9498 }
9499 }
9500 else
9501 {
9502 /* If the argument is larger than a word, then we know precisely
9503 which registers we must use. */
9504 if (arg_size > 1)
9505 {
9506 if (cum->words)
9507 {
9508 gpr_reg_base = 23;
9509 fpr_reg_base = 38;
9510 }
9511 else
9512 {
9513 gpr_reg_base = 25;
9514 fpr_reg_base = 34;
9515 }
9516
9517 /* Structures 5 to 8 bytes in size are passed in the general
9518 registers in the same manner as other non floating-point
9519 objects. The data is right-justified and zero-extended
9520 to 64 bits. This is opposite to the normal justification
9521 used on big endian targets and requires special treatment.
9522 We now define BLOCK_REG_PADDING to pad these objects.
9523 Aggregates, complex and vector types are passed in the same
9524 manner as structures. */
9525 if (mode == BLKmode
9526 || (type && (AGGREGATE_TYPE_P (type)
9527 || TREE_CODE (type) == COMPLEX_TYPE
9528 || TREE_CODE (type) == VECTOR_TYPE)))
9529 {
9530 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9531 gen_rtx_REG (DImode, gpr_reg_base),
9532 const0_rtx);
9533 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9534 }
9535 }
9536 else
9537 {
9538 /* We have a single word (32 bits). A simple computation
9539 will get us the register #s we need. */
9540 gpr_reg_base = 26 - cum->words;
9541 fpr_reg_base = 32 + 2 * cum->words;
9542 }
9543 }
9544
9545 /* Determine if the argument needs to be passed in both general and
9546 floating point registers. */
9547 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9548 /* If we are doing soft-float with portable runtime, then there
9549 is no need to worry about FP regs. */
9550 && !TARGET_SOFT_FLOAT
9551 /* The parameter must be some kind of scalar float, else we just
9552 pass it in integer registers. */
9553 && GET_MODE_CLASS (mode) == MODE_FLOAT
9554 /* The target function must not have a prototype. */
9555 && cum->nargs_prototype <= 0
9556 /* libcalls do not need to pass items in both FP and general
9557 registers. */
9558 && type != NULL_TREE
9559 /* All this hair applies to "outgoing" args only. This includes
9560 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9561 && !cum->incoming)
9562 /* Also pass outgoing floating arguments in both registers in indirect
9563 calls with the 32 bit ABI and the HP assembler since there is no
9564 way to the specify argument locations in static functions. */
9565 || (!TARGET_64BIT
9566 && !TARGET_GAS
9567 && !cum->incoming
9568 && cum->indirect
9569 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9570 {
9571 retval
9572 = gen_rtx_PARALLEL
9573 (mode,
9574 gen_rtvec (2,
9575 gen_rtx_EXPR_LIST (VOIDmode,
9576 gen_rtx_REG (mode, fpr_reg_base),
9577 const0_rtx),
9578 gen_rtx_EXPR_LIST (VOIDmode,
9579 gen_rtx_REG (mode, gpr_reg_base),
9580 const0_rtx)));
9581 }
9582 else
9583 {
9584 /* See if we should pass this parameter in a general register. */
9585 if (TARGET_SOFT_FLOAT
9586 /* Indirect calls in the normal 32bit ABI require all arguments
9587 to be passed in general registers. */
9588 || (!TARGET_PORTABLE_RUNTIME
9589 && !TARGET_64BIT
9590 && !TARGET_ELF32
9591 && cum->indirect)
9592 /* If the parameter is not a scalar floating-point parameter,
9593 then it belongs in GPRs. */
9594 || GET_MODE_CLASS (mode) != MODE_FLOAT
9595 /* Structure with single SFmode field belongs in GPR. */
9596 || (type && AGGREGATE_TYPE_P (type)))
9597 retval = gen_rtx_REG (mode, gpr_reg_base);
9598 else
9599 retval = gen_rtx_REG (mode, fpr_reg_base);
9600 }
9601 return retval;
9602 }
9603
9604 /* Arguments larger than one word are double word aligned. */
9605
9606 static unsigned int
9607 pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9608 {
9609 bool singleword = (type
9610 ? (integer_zerop (TYPE_SIZE (type))
9611 || !TREE_CONSTANT (TYPE_SIZE (type))
9612 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9613 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9614
9615 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9616 }
9617
9618 /* If this arg would be passed totally in registers or totally on the stack,
9619 then this routine should return zero. */
9620
9621 static int
9622 pa_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
9623 tree type, bool named ATTRIBUTE_UNUSED)
9624 {
9625 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9626 unsigned int max_arg_words = 8;
9627 unsigned int offset = 0;
9628
9629 if (!TARGET_64BIT)
9630 return 0;
9631
9632 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9633 offset = 1;
9634
9635 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9636 /* Arg fits fully into registers. */
9637 return 0;
9638 else if (cum->words + offset >= max_arg_words)
9639 /* Arg fully on the stack. */
9640 return 0;
9641 else
9642 /* Arg is split. */
9643 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9644 }
9645
9646
9647 /* A get_unnamed_section callback for switching to the text section.
9648
9649 This function is only used with SOM. Because we don't support
9650 named subspaces, we can only create a new subspace or switch back
9651 to the default text subspace. */
9652
9653 static void
9654 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9655 {
9656 gcc_assert (TARGET_SOM);
9657 if (TARGET_GAS)
9658 {
9659 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9660 {
9661 /* We only want to emit a .nsubspa directive once at the
9662 start of the function. */
9663 cfun->machine->in_nsubspa = 1;
9664
9665 /* Create a new subspace for the text. This provides
9666 better stub placement and one-only functions. */
9667 if (cfun->decl
9668 && DECL_ONE_ONLY (cfun->decl)
9669 && !DECL_WEAK (cfun->decl))
9670 {
9671 output_section_asm_op ("\t.SPACE $TEXT$\n"
9672 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9673 "ACCESS=44,SORT=24,COMDAT");
9674 return;
9675 }
9676 }
9677 else
9678 {
9679 /* There isn't a current function or the body of the current
9680 function has been completed. So, we are changing to the
9681 text section to output debugging information. Thus, we
9682 need to forget that we are in the text section so that
9683 varasm.c will call us when text_section is selected again. */
9684 gcc_assert (!cfun || !cfun->machine
9685 || cfun->machine->in_nsubspa == 2);
9686 in_section = NULL;
9687 }
9688 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9689 return;
9690 }
9691 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9692 }
9693
9694 /* A get_unnamed_section callback for switching to comdat data
9695 sections. This function is only used with SOM. */
9696
9697 static void
9698 som_output_comdat_data_section_asm_op (const void *data)
9699 {
9700 in_section = NULL;
9701 output_section_asm_op (data);
9702 }
9703
9704 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9705
9706 static void
9707 pa_som_asm_init_sections (void)
9708 {
9709 text_section
9710 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9711
9712 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9713 is not being generated. */
9714 som_readonly_data_section
9715 = get_unnamed_section (0, output_section_asm_op,
9716 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9717
9718 /* When secondary definitions are not supported, SOM makes readonly
9719 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9720 the comdat flag. */
9721 som_one_only_readonly_data_section
9722 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9723 "\t.SPACE $TEXT$\n"
9724 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9725 "ACCESS=0x2c,SORT=16,COMDAT");
9726
9727
9728 /* When secondary definitions are not supported, SOM makes data one-only
9729 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9730 som_one_only_data_section
9731 = get_unnamed_section (SECTION_WRITE,
9732 som_output_comdat_data_section_asm_op,
9733 "\t.SPACE $PRIVATE$\n"
9734 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9735 "ACCESS=31,SORT=24,COMDAT");
9736
9737 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9738 which reference data within the $TEXT$ space (for example constant
9739 strings in the $LIT$ subspace).
9740
9741 The assemblers (GAS and HP as) both have problems with handling
9742 the difference of two symbols which is the other correct way to
9743 reference constant data during PIC code generation.
9744
9745 So, there's no way to reference constant data which is in the
9746 $TEXT$ space during PIC generation. Instead place all constant
9747 data into the $PRIVATE$ subspace (this reduces sharing, but it
9748 works correctly). */
9749 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9750
9751 /* We must not have a reference to an external symbol defined in a
9752 shared library in a readonly section, else the SOM linker will
9753 complain.
9754
9755 So, we force exception information into the data section. */
9756 exception_section = data_section;
9757 }
9758
9759 /* On hpux10, the linker will give an error if we have a reference
9760 in the read-only data section to a symbol defined in a shared
9761 library. Therefore, expressions that might require a reloc can
9762 not be placed in the read-only data section. */
9763
9764 static section *
9765 pa_select_section (tree exp, int reloc,
9766 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9767 {
9768 if (TREE_CODE (exp) == VAR_DECL
9769 && TREE_READONLY (exp)
9770 && !TREE_THIS_VOLATILE (exp)
9771 && DECL_INITIAL (exp)
9772 && (DECL_INITIAL (exp) == error_mark_node
9773 || TREE_CONSTANT (DECL_INITIAL (exp)))
9774 && !reloc)
9775 {
9776 if (TARGET_SOM
9777 && DECL_ONE_ONLY (exp)
9778 && !DECL_WEAK (exp))
9779 return som_one_only_readonly_data_section;
9780 else
9781 return readonly_data_section;
9782 }
9783 else if (CONSTANT_CLASS_P (exp) && !reloc)
9784 return readonly_data_section;
9785 else if (TARGET_SOM
9786 && TREE_CODE (exp) == VAR_DECL
9787 && DECL_ONE_ONLY (exp)
9788 && !DECL_WEAK (exp))
9789 return som_one_only_data_section;
9790 else
9791 return data_section;
9792 }
9793
9794 static void
9795 pa_globalize_label (FILE *stream, const char *name)
9796 {
9797 /* We only handle DATA objects here, functions are globalized in
9798 ASM_DECLARE_FUNCTION_NAME. */
9799 if (! FUNCTION_NAME_P (name))
9800 {
9801 fputs ("\t.EXPORT ", stream);
9802 assemble_name (stream, name);
9803 fputs (",DATA\n", stream);
9804 }
9805 }
9806
9807 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9808
9809 static rtx
9810 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9811 int incoming ATTRIBUTE_UNUSED)
9812 {
9813 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9814 }
9815
9816 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9817
9818 bool
9819 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9820 {
9821 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9822 PA64 ABI says that objects larger than 128 bits are returned in memory.
9823 Note, int_size_in_bytes can return -1 if the size of the object is
9824 variable or larger than the maximum value that can be expressed as
9825 a HOST_WIDE_INT. It can also return zero for an empty type. The
9826 simplest way to handle variable and empty types is to pass them in
9827 memory. This avoids problems in defining the boundaries of argument
9828 slots, allocating registers, etc. */
9829 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9830 || int_size_in_bytes (type) <= 0);
9831 }
9832
9833 /* Structure to hold declaration and name of external symbols that are
9834 emitted by GCC. We generate a vector of these symbols and output them
9835 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9836 This avoids putting out names that are never really used. */
9837
9838 typedef struct GTY(()) extern_symbol
9839 {
9840 tree decl;
9841 const char *name;
9842 } extern_symbol;
9843
9844 /* Define gc'd vector type for extern_symbol. */
9845 DEF_VEC_O(extern_symbol);
9846 DEF_VEC_ALLOC_O(extern_symbol,gc);
9847
9848 /* Vector of extern_symbol pointers. */
9849 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9850
9851 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9852 /* Mark DECL (name NAME) as an external reference (assembler output
9853 file FILE). This saves the names to output at the end of the file
9854 if actually referenced. */
9855
9856 void
9857 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9858 {
9859 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9860
9861 gcc_assert (file == asm_out_file);
9862 p->decl = decl;
9863 p->name = name;
9864 }
9865
9866 /* Output text required at the end of an assembler file.
9867 This includes deferred plabels and .import directives for
9868 all external symbols that were actually referenced. */
9869
9870 static void
9871 pa_hpux_file_end (void)
9872 {
9873 unsigned int i;
9874 extern_symbol *p;
9875
9876 if (!NO_DEFERRED_PROFILE_COUNTERS)
9877 output_deferred_profile_counters ();
9878
9879 output_deferred_plabels ();
9880
9881 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9882 {
9883 tree decl = p->decl;
9884
9885 if (!TREE_ASM_WRITTEN (decl)
9886 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9887 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9888 }
9889
9890 VEC_free (extern_symbol, gc, extern_symbols);
9891 }
9892 #endif
9893
9894 /* Return true if a change from mode FROM to mode TO for a register
9895 in register class RCLASS is invalid. */
9896
9897 bool
9898 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9899 enum reg_class rclass)
9900 {
9901 if (from == to)
9902 return false;
9903
9904 /* Reject changes to/from complex and vector modes. */
9905 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9906 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9907 return true;
9908
9909 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9910 return false;
9911
9912 /* There is no way to load QImode or HImode values directly from
9913 memory. SImode loads to the FP registers are not zero extended.
9914 On the 64-bit target, this conflicts with the definition of
9915 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9916 with different sizes in the floating-point registers. */
9917 if (MAYBE_FP_REG_CLASS_P (rclass))
9918 return true;
9919
9920 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9921 in specific sets of registers. Thus, we cannot allow changing
9922 to a larger mode when it's larger than a word. */
9923 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9924 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9925 return true;
9926
9927 return false;
9928 }
9929
9930 /* Returns TRUE if it is a good idea to tie two pseudo registers
9931 when one has mode MODE1 and one has mode MODE2.
9932 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9933 for any hard reg, then this must be FALSE for correct output.
9934
9935 We should return FALSE for QImode and HImode because these modes
9936 are not ok in the floating-point registers. However, this prevents
9937 tieing these modes to SImode and DImode in the general registers.
9938 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9939 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9940 in the floating-point registers. */
9941
9942 bool
9943 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9944 {
9945 /* Don't tie modes in different classes. */
9946 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9947 return false;
9948
9949 return true;
9950 }
9951
9952 \f
9953 /* Length in units of the trampoline instruction code. */
9954
9955 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9956
9957
9958 /* Output assembler code for a block containing the constant parts
9959 of a trampoline, leaving space for the variable parts.\
9960
9961 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9962 and then branches to the specified routine.
9963
9964 This code template is copied from text segment to stack location
9965 and then patched with pa_trampoline_init to contain valid values,
9966 and then entered as a subroutine.
9967
9968 It is best to keep this as small as possible to avoid having to
9969 flush multiple lines in the cache. */
9970
9971 static void
9972 pa_asm_trampoline_template (FILE *f)
9973 {
9974 if (!TARGET_64BIT)
9975 {
9976 fputs ("\tldw 36(%r22),%r21\n", f);
9977 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9978 if (ASSEMBLER_DIALECT == 0)
9979 fputs ("\tdepi 0,31,2,%r21\n", f);
9980 else
9981 fputs ("\tdepwi 0,31,2,%r21\n", f);
9982 fputs ("\tldw 4(%r21),%r19\n", f);
9983 fputs ("\tldw 0(%r21),%r21\n", f);
9984 if (TARGET_PA_20)
9985 {
9986 fputs ("\tbve (%r21)\n", f);
9987 fputs ("\tldw 40(%r22),%r29\n", f);
9988 fputs ("\t.word 0\n", f);
9989 fputs ("\t.word 0\n", f);
9990 }
9991 else
9992 {
9993 fputs ("\tldsid (%r21),%r1\n", f);
9994 fputs ("\tmtsp %r1,%sr0\n", f);
9995 fputs ("\tbe 0(%sr0,%r21)\n", f);
9996 fputs ("\tldw 40(%r22),%r29\n", f);
9997 }
9998 fputs ("\t.word 0\n", f);
9999 fputs ("\t.word 0\n", f);
10000 fputs ("\t.word 0\n", f);
10001 fputs ("\t.word 0\n", f);
10002 }
10003 else
10004 {
10005 fputs ("\t.dword 0\n", f);
10006 fputs ("\t.dword 0\n", f);
10007 fputs ("\t.dword 0\n", f);
10008 fputs ("\t.dword 0\n", f);
10009 fputs ("\tmfia %r31\n", f);
10010 fputs ("\tldd 24(%r31),%r1\n", f);
10011 fputs ("\tldd 24(%r1),%r27\n", f);
10012 fputs ("\tldd 16(%r1),%r1\n", f);
10013 fputs ("\tbve (%r1)\n", f);
10014 fputs ("\tldd 32(%r31),%r31\n", f);
10015 fputs ("\t.dword 0 ; fptr\n", f);
10016 fputs ("\t.dword 0 ; static link\n", f);
10017 }
10018 }
10019
10020 /* Emit RTL insns to initialize the variable parts of a trampoline.
10021 FNADDR is an RTX for the address of the function's pure code.
10022 CXT is an RTX for the static chain value for the function.
10023
10024 Move the function address to the trampoline template at offset 36.
10025 Move the static chain value to trampoline template at offset 40.
10026 Move the trampoline address to trampoline template at offset 44.
10027 Move r19 to trampoline template at offset 48. The latter two
10028 words create a plabel for the indirect call to the trampoline.
10029
10030 A similar sequence is used for the 64-bit port but the plabel is
10031 at the beginning of the trampoline.
10032
10033 Finally, the cache entries for the trampoline code are flushed.
10034 This is necessary to ensure that the trampoline instruction sequence
10035 is written to memory prior to any attempts at prefetching the code
10036 sequence. */
10037
10038 static void
10039 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10040 {
10041 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10042 rtx start_addr = gen_reg_rtx (Pmode);
10043 rtx end_addr = gen_reg_rtx (Pmode);
10044 rtx line_length = gen_reg_rtx (Pmode);
10045 rtx r_tramp, tmp;
10046
10047 emit_block_move (m_tramp, assemble_trampoline_template (),
10048 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10049 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10050
10051 if (!TARGET_64BIT)
10052 {
10053 tmp = adjust_address (m_tramp, Pmode, 36);
10054 emit_move_insn (tmp, fnaddr);
10055 tmp = adjust_address (m_tramp, Pmode, 40);
10056 emit_move_insn (tmp, chain_value);
10057
10058 /* Create a fat pointer for the trampoline. */
10059 tmp = adjust_address (m_tramp, Pmode, 44);
10060 emit_move_insn (tmp, r_tramp);
10061 tmp = adjust_address (m_tramp, Pmode, 48);
10062 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10063
10064 /* fdc and fic only use registers for the address to flush,
10065 they do not accept integer displacements. We align the
10066 start and end addresses to the beginning of their respective
10067 cache lines to minimize the number of lines flushed. */
10068 emit_insn (gen_andsi3 (start_addr, r_tramp,
10069 GEN_INT (-MIN_CACHELINE_SIZE)));
10070 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
10071 emit_insn (gen_andsi3 (end_addr, tmp,
10072 GEN_INT (-MIN_CACHELINE_SIZE)));
10073 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10074 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10075 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10076 gen_reg_rtx (Pmode),
10077 gen_reg_rtx (Pmode)));
10078 }
10079 else
10080 {
10081 tmp = adjust_address (m_tramp, Pmode, 56);
10082 emit_move_insn (tmp, fnaddr);
10083 tmp = adjust_address (m_tramp, Pmode, 64);
10084 emit_move_insn (tmp, chain_value);
10085
10086 /* Create a fat pointer for the trampoline. */
10087 tmp = adjust_address (m_tramp, Pmode, 16);
10088 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
10089 tmp = adjust_address (m_tramp, Pmode, 24);
10090 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10091
10092 /* fdc and fic only use registers for the address to flush,
10093 they do not accept integer displacements. We align the
10094 start and end addresses to the beginning of their respective
10095 cache lines to minimize the number of lines flushed. */
10096 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
10097 emit_insn (gen_anddi3 (start_addr, tmp,
10098 GEN_INT (-MIN_CACHELINE_SIZE)));
10099 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
10100 emit_insn (gen_anddi3 (end_addr, tmp,
10101 GEN_INT (-MIN_CACHELINE_SIZE)));
10102 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10103 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10104 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10105 gen_reg_rtx (Pmode),
10106 gen_reg_rtx (Pmode)));
10107 }
10108 }
10109
10110 /* Perform any machine-specific adjustment in the address of the trampoline.
10111 ADDR contains the address that was passed to pa_trampoline_init.
10112 Adjust the trampoline address to point to the plabel at offset 44. */
10113
10114 static rtx
10115 pa_trampoline_adjust_address (rtx addr)
10116 {
10117 if (!TARGET_64BIT)
10118 addr = memory_address (Pmode, plus_constant (addr, 46));
10119 return addr;
10120 }
10121
10122 static rtx
10123 pa_delegitimize_address (rtx orig_x)
10124 {
10125 rtx x = delegitimize_mem_from_attrs (orig_x);
10126
10127 if (GET_CODE (x) == LO_SUM
10128 && GET_CODE (XEXP (x, 1)) == UNSPEC
10129 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10130 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10131 return x;
10132 }
10133 \f
10134 static rtx
10135 pa_internal_arg_pointer (void)
10136 {
10137 /* The argument pointer and the hard frame pointer are the same in
10138 the 32-bit runtime, so we don't need a copy. */
10139 if (TARGET_64BIT)
10140 return copy_to_reg (virtual_incoming_args_rtx);
10141 else
10142 return virtual_incoming_args_rtx;
10143 }
10144
10145 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10146 Frame pointer elimination is automatically handled. */
10147
10148 static bool
10149 pa_can_eliminate (const int from, const int to)
10150 {
10151 /* The argument cannot be eliminated in the 64-bit runtime. */
10152 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10153 return false;
10154
10155 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10156 ? ! frame_pointer_needed
10157 : true);
10158 }
10159
10160 /* Define the offset between two registers, FROM to be eliminated and its
10161 replacement TO, at the start of a routine. */
10162 HOST_WIDE_INT
10163 pa_initial_elimination_offset (int from, int to)
10164 {
10165 HOST_WIDE_INT offset;
10166
10167 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10168 && to == STACK_POINTER_REGNUM)
10169 offset = -compute_frame_size (get_frame_size (), 0);
10170 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10171 offset = 0;
10172 else
10173 gcc_unreachable ();
10174
10175 return offset;
10176 }
10177
10178 static void
10179 pa_conditional_register_usage (void)
10180 {
10181 int i;
10182
10183 if (!TARGET_64BIT && !TARGET_PA_11)
10184 {
10185 for (i = 56; i <= FP_REG_LAST; i++)
10186 fixed_regs[i] = call_used_regs[i] = 1;
10187 for (i = 33; i < 56; i += 2)
10188 fixed_regs[i] = call_used_regs[i] = 1;
10189 }
10190 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10191 {
10192 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10193 fixed_regs[i] = call_used_regs[i] = 1;
10194 }
10195 if (flag_pic)
10196 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10197 }
10198
10199 /* Target hook for c_mode_for_suffix. */
10200
10201 static enum machine_mode
10202 pa_c_mode_for_suffix (char suffix)
10203 {
10204 if (HPUX_LONG_DOUBLE_LIBRARY)
10205 {
10206 if (suffix == 'q')
10207 return TFmode;
10208 }
10209
10210 return VOIDmode;
10211 }
10212
10213 /* Target hook for function_section. */
10214
10215 static section *
10216 pa_function_section (tree decl, enum node_frequency freq,
10217 bool startup, bool exit)
10218 {
10219 /* Put functions in text section if target doesn't have named sections. */
10220 if (!targetm_common.have_named_sections)
10221 return text_section;
10222
10223 /* Force nested functions into the same section as the containing
10224 function. */
10225 if (decl
10226 && DECL_SECTION_NAME (decl) == NULL_TREE
10227 && DECL_CONTEXT (decl) != NULL_TREE
10228 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10229 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
10230 return function_section (DECL_CONTEXT (decl));
10231
10232 /* Otherwise, use the default function section. */
10233 return default_function_section (decl, freq, startup, exit);
10234 }
10235
10236 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10237
10238 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10239 that need more than three instructions to load prior to reload. This
10240 limit is somewhat arbitrary. It takes three instructions to load a
10241 CONST_INT from memory but two are memory accesses. It may be better
10242 to increase the allowed range for CONST_INTS. We may also be able
10243 to handle CONST_DOUBLES. */
10244
10245 static bool
10246 pa_legitimate_constant_p (enum machine_mode mode, rtx x)
10247 {
10248 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10249 return false;
10250
10251 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10252 return false;
10253
10254 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10255 return false;
10256
10257 if (TARGET_64BIT
10258 && HOST_BITS_PER_WIDE_INT > 32
10259 && GET_CODE (x) == CONST_INT
10260 && !reload_in_progress
10261 && !reload_completed
10262 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10263 && !cint_ok_for_move (INTVAL (x)))
10264 return false;
10265
10266 if (function_label_operand (x, mode))
10267 return false;
10268
10269 return true;
10270 }
10271
10272 #include "gt-pa.h"