]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pa/pa.c
2015-07-07 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "df.h"
28 #include "regs.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "insn-attr.h"
32 #include "flags.h"
33 #include "alias.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "stringpool.h"
37 #include "varasm.h"
38 #include "calls.h"
39 #include "output.h"
40 #include "dbxout.h"
41 #include "except.h"
42 #include "expmed.h"
43 #include "dojump.h"
44 #include "explow.h"
45 #include "emit-rtl.h"
46 #include "stmt.h"
47 #include "expr.h"
48 #include "insn-codes.h"
49 #include "optabs.h"
50 #include "reload.h"
51 #include "diagnostic-core.h"
52 #include "recog.h"
53 #include "tm_p.h"
54 #include "target.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "opts.h"
63 #include "builtins.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Return nonzero if there is a bypass for the output of
69 OUT_INSN and the fp store IN_INSN. */
70 int
71 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
72 {
73 machine_mode store_mode;
74 machine_mode other_mode;
75 rtx set;
76
77 if (recog_memoized (in_insn) < 0
78 || (get_attr_type (in_insn) != TYPE_FPSTORE
79 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
80 || recog_memoized (out_insn) < 0)
81 return 0;
82
83 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
84
85 set = single_set (out_insn);
86 if (!set)
87 return 0;
88
89 other_mode = GET_MODE (SET_SRC (set));
90
91 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
92 }
93
94
95 #ifndef DO_FRAME_NOTES
96 #ifdef INCOMING_RETURN_ADDR_RTX
97 #define DO_FRAME_NOTES 1
98 #else
99 #define DO_FRAME_NOTES 0
100 #endif
101 #endif
102
103 static void pa_option_override (void);
104 static void copy_reg_pointer (rtx, rtx);
105 static void fix_range (const char *);
106 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
107 reg_class_t);
108 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
109 static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
110 static inline rtx force_mode (machine_mode, rtx);
111 static void pa_reorg (void);
112 static void pa_combine_instructions (void);
113 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
114 rtx, rtx);
115 static bool forward_branch_p (rtx_insn *);
116 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
117 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
118 static int compute_movmem_length (rtx_insn *);
119 static int compute_clrmem_length (rtx_insn *);
120 static bool pa_assemble_integer (rtx, unsigned int, int);
121 static void remove_useless_addtr_insns (int);
122 static void store_reg (int, HOST_WIDE_INT, int);
123 static void store_reg_modify (int, int, HOST_WIDE_INT);
124 static void load_reg (int, HOST_WIDE_INT, int);
125 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
126 static rtx pa_function_value (const_tree, const_tree, bool);
127 static rtx pa_libcall_value (machine_mode, const_rtx);
128 static bool pa_function_value_regno_p (const unsigned int);
129 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static void update_total_code_bytes (unsigned int);
131 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
133 static int pa_adjust_priority (rtx_insn *, int);
134 static int pa_issue_rate (void);
135 static int pa_reloc_rw_mask (void);
136 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
137 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
138 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
139 ATTRIBUTE_UNUSED;
140 static void pa_encode_section_info (tree, rtx, int);
141 static const char *pa_strip_name_encoding (const char *);
142 static bool pa_function_ok_for_sibcall (tree, tree);
143 static void pa_globalize_label (FILE *, const char *)
144 ATTRIBUTE_UNUSED;
145 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
146 HOST_WIDE_INT, tree);
147 #if !defined(USE_COLLECT2)
148 static void pa_asm_out_constructor (rtx, int);
149 static void pa_asm_out_destructor (rtx, int);
150 #endif
151 static void pa_init_builtins (void);
152 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
153 static rtx hppa_builtin_saveregs (void);
154 static void hppa_va_start (tree, rtx);
155 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
156 static bool pa_scalar_mode_supported_p (machine_mode);
157 static bool pa_commutative_p (const_rtx x, int outer_code);
158 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
159 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
160 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
161 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
162 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
163 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
164 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
165 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
166 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
167 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
168 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
169 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
170 static void output_deferred_plabels (void);
171 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
172 #ifdef ASM_OUTPUT_EXTERNAL_REAL
173 static void pa_hpux_file_end (void);
174 #endif
175 static void pa_init_libfuncs (void);
176 static rtx pa_struct_value_rtx (tree, int);
177 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
178 const_tree, bool);
179 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
180 tree, bool);
181 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
182 const_tree, bool);
183 static rtx pa_function_arg (cumulative_args_t, machine_mode,
184 const_tree, bool);
185 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
186 static struct machine_function * pa_init_machine_status (void);
187 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
188 machine_mode,
189 secondary_reload_info *);
190 static void pa_extra_live_on_entry (bitmap);
191 static machine_mode pa_promote_function_mode (const_tree,
192 machine_mode, int *,
193 const_tree, int);
194
195 static void pa_asm_trampoline_template (FILE *);
196 static void pa_trampoline_init (rtx, tree, rtx);
197 static rtx pa_trampoline_adjust_address (rtx);
198 static rtx pa_delegitimize_address (rtx);
199 static bool pa_print_operand_punct_valid_p (unsigned char);
200 static rtx pa_internal_arg_pointer (void);
201 static bool pa_can_eliminate (const int, const int);
202 static void pa_conditional_register_usage (void);
203 static machine_mode pa_c_mode_for_suffix (char);
204 static section *pa_function_section (tree, enum node_frequency, bool, bool);
205 static bool pa_cannot_force_const_mem (machine_mode, rtx);
206 static bool pa_legitimate_constant_p (machine_mode, rtx);
207 static unsigned int pa_section_type_flags (tree, const char *, int);
208 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
209
210 /* The following extra sections are only used for SOM. */
211 static GTY(()) section *som_readonly_data_section;
212 static GTY(()) section *som_one_only_readonly_data_section;
213 static GTY(()) section *som_one_only_data_section;
214 static GTY(()) section *som_tm_clone_table_section;
215
216 /* Counts for the number of callee-saved general and floating point
217 registers which were saved by the current function's prologue. */
218 static int gr_saved, fr_saved;
219
220 /* Boolean indicating whether the return pointer was saved by the
221 current function's prologue. */
222 static bool rp_saved;
223
224 static rtx find_addr_reg (rtx);
225
226 /* Keep track of the number of bytes we have output in the CODE subspace
227 during this compilation so we'll know when to emit inline long-calls. */
228 unsigned long total_code_bytes;
229
230 /* The last address of the previous function plus the number of bytes in
231 associated thunks that have been output. This is used to determine if
232 a thunk can use an IA-relative branch to reach its target function. */
233 static unsigned int last_address;
234
235 /* Variables to handle plabels that we discover are necessary at assembly
236 output time. They are output after the current function. */
237 struct GTY(()) deferred_plabel
238 {
239 rtx internal_label;
240 rtx symbol;
241 };
242 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
243 deferred_plabels;
244 static size_t n_deferred_plabels = 0;
245 \f
246 /* Initialize the GCC target structure. */
247
248 #undef TARGET_OPTION_OVERRIDE
249 #define TARGET_OPTION_OVERRIDE pa_option_override
250
251 #undef TARGET_ASM_ALIGNED_HI_OP
252 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
253 #undef TARGET_ASM_ALIGNED_SI_OP
254 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
255 #undef TARGET_ASM_ALIGNED_DI_OP
256 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
257 #undef TARGET_ASM_UNALIGNED_HI_OP
258 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
259 #undef TARGET_ASM_UNALIGNED_SI_OP
260 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
261 #undef TARGET_ASM_UNALIGNED_DI_OP
262 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
263 #undef TARGET_ASM_INTEGER
264 #define TARGET_ASM_INTEGER pa_assemble_integer
265
266 #undef TARGET_ASM_FUNCTION_PROLOGUE
267 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
268 #undef TARGET_ASM_FUNCTION_EPILOGUE
269 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
270
271 #undef TARGET_FUNCTION_VALUE
272 #define TARGET_FUNCTION_VALUE pa_function_value
273 #undef TARGET_LIBCALL_VALUE
274 #define TARGET_LIBCALL_VALUE pa_libcall_value
275 #undef TARGET_FUNCTION_VALUE_REGNO_P
276 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
277
278 #undef TARGET_LEGITIMIZE_ADDRESS
279 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
280
281 #undef TARGET_SCHED_ADJUST_COST
282 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
283 #undef TARGET_SCHED_ADJUST_PRIORITY
284 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
285 #undef TARGET_SCHED_ISSUE_RATE
286 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
287
288 #undef TARGET_ENCODE_SECTION_INFO
289 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
290 #undef TARGET_STRIP_NAME_ENCODING
291 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
292
293 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
294 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
295
296 #undef TARGET_COMMUTATIVE_P
297 #define TARGET_COMMUTATIVE_P pa_commutative_p
298
299 #undef TARGET_ASM_OUTPUT_MI_THUNK
300 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
301 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
302 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
303
304 #undef TARGET_ASM_FILE_END
305 #ifdef ASM_OUTPUT_EXTERNAL_REAL
306 #define TARGET_ASM_FILE_END pa_hpux_file_end
307 #else
308 #define TARGET_ASM_FILE_END output_deferred_plabels
309 #endif
310
311 #undef TARGET_ASM_RELOC_RW_MASK
312 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
313
314 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
315 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
316
317 #if !defined(USE_COLLECT2)
318 #undef TARGET_ASM_CONSTRUCTOR
319 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
320 #undef TARGET_ASM_DESTRUCTOR
321 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
322 #endif
323
324 #undef TARGET_INIT_BUILTINS
325 #define TARGET_INIT_BUILTINS pa_init_builtins
326
327 #undef TARGET_EXPAND_BUILTIN
328 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
329
330 #undef TARGET_REGISTER_MOVE_COST
331 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
332 #undef TARGET_RTX_COSTS
333 #define TARGET_RTX_COSTS hppa_rtx_costs
334 #undef TARGET_ADDRESS_COST
335 #define TARGET_ADDRESS_COST hppa_address_cost
336
337 #undef TARGET_MACHINE_DEPENDENT_REORG
338 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
339
340 #undef TARGET_INIT_LIBFUNCS
341 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
342
343 #undef TARGET_PROMOTE_FUNCTION_MODE
344 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
345 #undef TARGET_PROMOTE_PROTOTYPES
346 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
347
348 #undef TARGET_STRUCT_VALUE_RTX
349 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
350 #undef TARGET_RETURN_IN_MEMORY
351 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
352 #undef TARGET_MUST_PASS_IN_STACK
353 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
354 #undef TARGET_PASS_BY_REFERENCE
355 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
356 #undef TARGET_CALLEE_COPIES
357 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
358 #undef TARGET_ARG_PARTIAL_BYTES
359 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
360 #undef TARGET_FUNCTION_ARG
361 #define TARGET_FUNCTION_ARG pa_function_arg
362 #undef TARGET_FUNCTION_ARG_ADVANCE
363 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
364 #undef TARGET_FUNCTION_ARG_BOUNDARY
365 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
366
367 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
368 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
369 #undef TARGET_EXPAND_BUILTIN_VA_START
370 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
371 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
372 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
373
374 #undef TARGET_SCALAR_MODE_SUPPORTED_P
375 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
376
377 #undef TARGET_CANNOT_FORCE_CONST_MEM
378 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
379
380 #undef TARGET_SECONDARY_RELOAD
381 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
382
383 #undef TARGET_EXTRA_LIVE_ON_ENTRY
384 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
385
386 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
387 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
388 #undef TARGET_TRAMPOLINE_INIT
389 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
390 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
391 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
392 #undef TARGET_DELEGITIMIZE_ADDRESS
393 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
394 #undef TARGET_INTERNAL_ARG_POINTER
395 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
396 #undef TARGET_CAN_ELIMINATE
397 #define TARGET_CAN_ELIMINATE pa_can_eliminate
398 #undef TARGET_CONDITIONAL_REGISTER_USAGE
399 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
400 #undef TARGET_C_MODE_FOR_SUFFIX
401 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
402 #undef TARGET_ASM_FUNCTION_SECTION
403 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
404
405 #undef TARGET_LEGITIMATE_CONSTANT_P
406 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
407 #undef TARGET_SECTION_TYPE_FLAGS
408 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
409 #undef TARGET_LEGITIMATE_ADDRESS_P
410 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
411
412 struct gcc_target targetm = TARGET_INITIALIZER;
413 \f
414 /* Parse the -mfixed-range= option string. */
415
416 static void
417 fix_range (const char *const_str)
418 {
419 int i, first, last;
420 char *str, *dash, *comma;
421
422 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
423 REG2 are either register names or register numbers. The effect
424 of this option is to mark the registers in the range from REG1 to
425 REG2 as ``fixed'' so they won't be used by the compiler. This is
426 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
427
428 i = strlen (const_str);
429 str = (char *) alloca (i + 1);
430 memcpy (str, const_str, i + 1);
431
432 while (1)
433 {
434 dash = strchr (str, '-');
435 if (!dash)
436 {
437 warning (0, "value of -mfixed-range must have form REG1-REG2");
438 return;
439 }
440 *dash = '\0';
441
442 comma = strchr (dash + 1, ',');
443 if (comma)
444 *comma = '\0';
445
446 first = decode_reg_name (str);
447 if (first < 0)
448 {
449 warning (0, "unknown register name: %s", str);
450 return;
451 }
452
453 last = decode_reg_name (dash + 1);
454 if (last < 0)
455 {
456 warning (0, "unknown register name: %s", dash + 1);
457 return;
458 }
459
460 *dash = '-';
461
462 if (first > last)
463 {
464 warning (0, "%s-%s is an empty range", str, dash + 1);
465 return;
466 }
467
468 for (i = first; i <= last; ++i)
469 fixed_regs[i] = call_used_regs[i] = 1;
470
471 if (!comma)
472 break;
473
474 *comma = ',';
475 str = comma + 1;
476 }
477
478 /* Check if all floating point registers have been fixed. */
479 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
480 if (!fixed_regs[i])
481 break;
482
483 if (i > FP_REG_LAST)
484 target_flags |= MASK_DISABLE_FPREGS;
485 }
486
487 /* Implement the TARGET_OPTION_OVERRIDE hook. */
488
489 static void
490 pa_option_override (void)
491 {
492 unsigned int i;
493 cl_deferred_option *opt;
494 vec<cl_deferred_option> *v
495 = (vec<cl_deferred_option> *) pa_deferred_options;
496
497 if (v)
498 FOR_EACH_VEC_ELT (*v, i, opt)
499 {
500 switch (opt->opt_index)
501 {
502 case OPT_mfixed_range_:
503 fix_range (opt->arg);
504 break;
505
506 default:
507 gcc_unreachable ();
508 }
509 }
510
511 if (flag_pic && TARGET_PORTABLE_RUNTIME)
512 {
513 warning (0, "PIC code generation is not supported in the portable runtime model");
514 }
515
516 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
517 {
518 warning (0, "PIC code generation is not compatible with fast indirect calls");
519 }
520
521 if (! TARGET_GAS && write_symbols != NO_DEBUG)
522 {
523 warning (0, "-g is only supported when using GAS on this processor,");
524 warning (0, "-g option disabled");
525 write_symbols = NO_DEBUG;
526 }
527
528 /* We only support the "big PIC" model now. And we always generate PIC
529 code when in 64bit mode. */
530 if (flag_pic == 1 || TARGET_64BIT)
531 flag_pic = 2;
532
533 /* Disable -freorder-blocks-and-partition as we don't support hot and
534 cold partitioning. */
535 if (flag_reorder_blocks_and_partition)
536 {
537 inform (input_location,
538 "-freorder-blocks-and-partition does not work "
539 "on this architecture");
540 flag_reorder_blocks_and_partition = 0;
541 flag_reorder_blocks = 1;
542 }
543
544 /* We can't guarantee that .dword is available for 32-bit targets. */
545 if (UNITS_PER_WORD == 4)
546 targetm.asm_out.aligned_op.di = NULL;
547
548 /* The unaligned ops are only available when using GAS. */
549 if (!TARGET_GAS)
550 {
551 targetm.asm_out.unaligned_op.hi = NULL;
552 targetm.asm_out.unaligned_op.si = NULL;
553 targetm.asm_out.unaligned_op.di = NULL;
554 }
555
556 init_machine_status = pa_init_machine_status;
557 }
558
559 enum pa_builtins
560 {
561 PA_BUILTIN_COPYSIGNQ,
562 PA_BUILTIN_FABSQ,
563 PA_BUILTIN_INFQ,
564 PA_BUILTIN_HUGE_VALQ,
565 PA_BUILTIN_max
566 };
567
568 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
569
570 static void
571 pa_init_builtins (void)
572 {
573 #ifdef DONT_HAVE_FPUTC_UNLOCKED
574 {
575 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
576 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
577 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
578 }
579 #endif
580 #if TARGET_HPUX_11
581 {
582 tree decl;
583
584 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
585 set_user_assembler_name (decl, "_Isfinite");
586 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
587 set_user_assembler_name (decl, "_Isfinitef");
588 }
589 #endif
590
591 if (HPUX_LONG_DOUBLE_LIBRARY)
592 {
593 tree decl, ftype;
594
595 /* Under HPUX, the __float128 type is a synonym for "long double". */
596 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
597 "__float128");
598
599 /* TFmode support builtins. */
600 ftype = build_function_type_list (long_double_type_node,
601 long_double_type_node,
602 NULL_TREE);
603 decl = add_builtin_function ("__builtin_fabsq", ftype,
604 PA_BUILTIN_FABSQ, BUILT_IN_MD,
605 "_U_Qfabs", NULL_TREE);
606 TREE_READONLY (decl) = 1;
607 pa_builtins[PA_BUILTIN_FABSQ] = decl;
608
609 ftype = build_function_type_list (long_double_type_node,
610 long_double_type_node,
611 long_double_type_node,
612 NULL_TREE);
613 decl = add_builtin_function ("__builtin_copysignq", ftype,
614 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
615 "_U_Qfcopysign", NULL_TREE);
616 TREE_READONLY (decl) = 1;
617 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
618
619 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
620 decl = add_builtin_function ("__builtin_infq", ftype,
621 PA_BUILTIN_INFQ, BUILT_IN_MD,
622 NULL, NULL_TREE);
623 pa_builtins[PA_BUILTIN_INFQ] = decl;
624
625 decl = add_builtin_function ("__builtin_huge_valq", ftype,
626 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
627 NULL, NULL_TREE);
628 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
629 }
630 }
631
632 static rtx
633 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
634 machine_mode mode ATTRIBUTE_UNUSED,
635 int ignore ATTRIBUTE_UNUSED)
636 {
637 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
638 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
639
640 switch (fcode)
641 {
642 case PA_BUILTIN_FABSQ:
643 case PA_BUILTIN_COPYSIGNQ:
644 return expand_call (exp, target, ignore);
645
646 case PA_BUILTIN_INFQ:
647 case PA_BUILTIN_HUGE_VALQ:
648 {
649 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
650 REAL_VALUE_TYPE inf;
651 rtx tmp;
652
653 real_inf (&inf);
654 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
655
656 tmp = validize_mem (force_const_mem (target_mode, tmp));
657
658 if (target == 0)
659 target = gen_reg_rtx (target_mode);
660
661 emit_move_insn (target, tmp);
662 return target;
663 }
664
665 default:
666 gcc_unreachable ();
667 }
668
669 return NULL_RTX;
670 }
671
672 /* Function to init struct machine_function.
673 This will be called, via a pointer variable,
674 from push_function_context. */
675
676 static struct machine_function *
677 pa_init_machine_status (void)
678 {
679 return ggc_cleared_alloc<machine_function> ();
680 }
681
682 /* If FROM is a probable pointer register, mark TO as a probable
683 pointer register with the same pointer alignment as FROM. */
684
685 static void
686 copy_reg_pointer (rtx to, rtx from)
687 {
688 if (REG_POINTER (from))
689 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
690 }
691
692 /* Return 1 if X contains a symbolic expression. We know these
693 expressions will have one of a few well defined forms, so
694 we need only check those forms. */
695 int
696 pa_symbolic_expression_p (rtx x)
697 {
698
699 /* Strip off any HIGH. */
700 if (GET_CODE (x) == HIGH)
701 x = XEXP (x, 0);
702
703 return symbolic_operand (x, VOIDmode);
704 }
705
706 /* Accept any constant that can be moved in one instruction into a
707 general register. */
708 int
709 pa_cint_ok_for_move (HOST_WIDE_INT ival)
710 {
711 /* OK if ldo, ldil, or zdepi, can be used. */
712 return (VAL_14_BITS_P (ival)
713 || pa_ldil_cint_p (ival)
714 || pa_zdepi_cint_p (ival));
715 }
716 \f
717 /* True iff ldil can be used to load this CONST_INT. The least
718 significant 11 bits of the value must be zero and the value must
719 not change sign when extended from 32 to 64 bits. */
720 int
721 pa_ldil_cint_p (HOST_WIDE_INT ival)
722 {
723 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
724
725 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
726 }
727
728 /* True iff zdepi can be used to generate this CONST_INT.
729 zdepi first sign extends a 5-bit signed number to a given field
730 length, then places this field anywhere in a zero. */
731 int
732 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
733 {
734 unsigned HOST_WIDE_INT lsb_mask, t;
735
736 /* This might not be obvious, but it's at least fast.
737 This function is critical; we don't have the time loops would take. */
738 lsb_mask = x & -x;
739 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
740 /* Return true iff t is a power of two. */
741 return ((t & (t - 1)) == 0);
742 }
743
744 /* True iff depi or extru can be used to compute (reg & mask).
745 Accept bit pattern like these:
746 0....01....1
747 1....10....0
748 1..10..01..1 */
749 int
750 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
751 {
752 mask = ~mask;
753 mask += mask & -mask;
754 return (mask & (mask - 1)) == 0;
755 }
756
757 /* True iff depi can be used to compute (reg | MASK). */
758 int
759 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
760 {
761 mask += mask & -mask;
762 return (mask & (mask - 1)) == 0;
763 }
764 \f
765 /* Legitimize PIC addresses. If the address is already
766 position-independent, we return ORIG. Newly generated
767 position-independent addresses go to REG. If we need more
768 than one register, we lose. */
769
770 static rtx
771 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
772 {
773 rtx pic_ref = orig;
774
775 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
776
777 /* Labels need special handling. */
778 if (pic_label_operand (orig, mode))
779 {
780 rtx_insn *insn;
781
782 /* We do not want to go through the movXX expanders here since that
783 would create recursion.
784
785 Nor do we really want to call a generator for a named pattern
786 since that requires multiple patterns if we want to support
787 multiple word sizes.
788
789 So instead we just emit the raw set, which avoids the movXX
790 expanders completely. */
791 mark_reg_pointer (reg, BITS_PER_UNIT);
792 insn = emit_insn (gen_rtx_SET (reg, orig));
793
794 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
795 add_reg_note (insn, REG_EQUAL, orig);
796
797 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
798 and update LABEL_NUSES because this is not done automatically. */
799 if (reload_in_progress || reload_completed)
800 {
801 /* Extract LABEL_REF. */
802 if (GET_CODE (orig) == CONST)
803 orig = XEXP (XEXP (orig, 0), 0);
804 /* Extract CODE_LABEL. */
805 orig = XEXP (orig, 0);
806 add_reg_note (insn, REG_LABEL_OPERAND, orig);
807 /* Make sure we have label and not a note. */
808 if (LABEL_P (orig))
809 LABEL_NUSES (orig)++;
810 }
811 crtl->uses_pic_offset_table = 1;
812 return reg;
813 }
814 if (GET_CODE (orig) == SYMBOL_REF)
815 {
816 rtx_insn *insn;
817 rtx tmp_reg;
818
819 gcc_assert (reg);
820
821 /* Before reload, allocate a temporary register for the intermediate
822 result. This allows the sequence to be deleted when the final
823 result is unused and the insns are trivially dead. */
824 tmp_reg = ((reload_in_progress || reload_completed)
825 ? reg : gen_reg_rtx (Pmode));
826
827 if (function_label_operand (orig, VOIDmode))
828 {
829 /* Force function label into memory in word mode. */
830 orig = XEXP (force_const_mem (word_mode, orig), 0);
831 /* Load plabel address from DLT. */
832 emit_move_insn (tmp_reg,
833 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
834 gen_rtx_HIGH (word_mode, orig)));
835 pic_ref
836 = gen_const_mem (Pmode,
837 gen_rtx_LO_SUM (Pmode, tmp_reg,
838 gen_rtx_UNSPEC (Pmode,
839 gen_rtvec (1, orig),
840 UNSPEC_DLTIND14R)));
841 emit_move_insn (reg, pic_ref);
842 /* Now load address of function descriptor. */
843 pic_ref = gen_rtx_MEM (Pmode, reg);
844 }
845 else
846 {
847 /* Load symbol reference from DLT. */
848 emit_move_insn (tmp_reg,
849 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
850 gen_rtx_HIGH (word_mode, orig)));
851 pic_ref
852 = gen_const_mem (Pmode,
853 gen_rtx_LO_SUM (Pmode, tmp_reg,
854 gen_rtx_UNSPEC (Pmode,
855 gen_rtvec (1, orig),
856 UNSPEC_DLTIND14R)));
857 }
858
859 crtl->uses_pic_offset_table = 1;
860 mark_reg_pointer (reg, BITS_PER_UNIT);
861 insn = emit_move_insn (reg, pic_ref);
862
863 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
864 set_unique_reg_note (insn, REG_EQUAL, orig);
865
866 return reg;
867 }
868 else if (GET_CODE (orig) == CONST)
869 {
870 rtx base;
871
872 if (GET_CODE (XEXP (orig, 0)) == PLUS
873 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
874 return orig;
875
876 gcc_assert (reg);
877 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
878
879 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
880 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
881 base == reg ? 0 : reg);
882
883 if (GET_CODE (orig) == CONST_INT)
884 {
885 if (INT_14_BITS (orig))
886 return plus_constant (Pmode, base, INTVAL (orig));
887 orig = force_reg (Pmode, orig);
888 }
889 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
890 /* Likewise, should we set special REG_NOTEs here? */
891 }
892
893 return pic_ref;
894 }
895
896 static GTY(()) rtx gen_tls_tga;
897
898 static rtx
899 gen_tls_get_addr (void)
900 {
901 if (!gen_tls_tga)
902 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
903 return gen_tls_tga;
904 }
905
906 static rtx
907 hppa_tls_call (rtx arg)
908 {
909 rtx ret;
910
911 ret = gen_reg_rtx (Pmode);
912 emit_library_call_value (gen_tls_get_addr (), ret,
913 LCT_CONST, Pmode, 1, arg, Pmode);
914
915 return ret;
916 }
917
918 static rtx
919 legitimize_tls_address (rtx addr)
920 {
921 rtx ret, tmp, t1, t2, tp;
922 rtx_insn *insn;
923
924 /* Currently, we can't handle anything but a SYMBOL_REF. */
925 if (GET_CODE (addr) != SYMBOL_REF)
926 return addr;
927
928 switch (SYMBOL_REF_TLS_MODEL (addr))
929 {
930 case TLS_MODEL_GLOBAL_DYNAMIC:
931 tmp = gen_reg_rtx (Pmode);
932 if (flag_pic)
933 emit_insn (gen_tgd_load_pic (tmp, addr));
934 else
935 emit_insn (gen_tgd_load (tmp, addr));
936 ret = hppa_tls_call (tmp);
937 break;
938
939 case TLS_MODEL_LOCAL_DYNAMIC:
940 ret = gen_reg_rtx (Pmode);
941 tmp = gen_reg_rtx (Pmode);
942 start_sequence ();
943 if (flag_pic)
944 emit_insn (gen_tld_load_pic (tmp, addr));
945 else
946 emit_insn (gen_tld_load (tmp, addr));
947 t1 = hppa_tls_call (tmp);
948 insn = get_insns ();
949 end_sequence ();
950 t2 = gen_reg_rtx (Pmode);
951 emit_libcall_block (insn, t2, t1,
952 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
953 UNSPEC_TLSLDBASE));
954 emit_insn (gen_tld_offset_load (ret, addr, t2));
955 break;
956
957 case TLS_MODEL_INITIAL_EXEC:
958 tp = gen_reg_rtx (Pmode);
959 tmp = gen_reg_rtx (Pmode);
960 ret = gen_reg_rtx (Pmode);
961 emit_insn (gen_tp_load (tp));
962 if (flag_pic)
963 emit_insn (gen_tie_load_pic (tmp, addr));
964 else
965 emit_insn (gen_tie_load (tmp, addr));
966 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
967 break;
968
969 case TLS_MODEL_LOCAL_EXEC:
970 tp = gen_reg_rtx (Pmode);
971 ret = gen_reg_rtx (Pmode);
972 emit_insn (gen_tp_load (tp));
973 emit_insn (gen_tle_load (ret, addr, tp));
974 break;
975
976 default:
977 gcc_unreachable ();
978 }
979
980 return ret;
981 }
982
983 /* Helper for hppa_legitimize_address. Given X, return true if it
984 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
985
986 This respectively represent canonical shift-add rtxs or scaled
987 memory addresses. */
988 static bool
989 mem_shadd_or_shadd_rtx_p (rtx x)
990 {
991 return ((GET_CODE (x) == ASHIFT
992 || GET_CODE (x) == MULT)
993 && GET_CODE (XEXP (x, 1)) == CONST_INT
994 && ((GET_CODE (x) == ASHIFT
995 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
996 || (GET_CODE (x) == MULT
997 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
998 }
999
1000 /* Try machine-dependent ways of modifying an illegitimate address
1001 to be legitimate. If we find one, return the new, valid address.
1002 This macro is used in only one place: `memory_address' in explow.c.
1003
1004 OLDX is the address as it was before break_out_memory_refs was called.
1005 In some cases it is useful to look at this to decide what needs to be done.
1006
1007 It is always safe for this macro to do nothing. It exists to recognize
1008 opportunities to optimize the output.
1009
1010 For the PA, transform:
1011
1012 memory(X + <large int>)
1013
1014 into:
1015
1016 if (<large int> & mask) >= 16
1017 Y = (<large int> & ~mask) + mask + 1 Round up.
1018 else
1019 Y = (<large int> & ~mask) Round down.
1020 Z = X + Y
1021 memory (Z + (<large int> - Y));
1022
1023 This is for CSE to find several similar references, and only use one Z.
1024
1025 X can either be a SYMBOL_REF or REG, but because combine cannot
1026 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1027 D will not fit in 14 bits.
1028
1029 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1030 0x1f as the mask.
1031
1032 MODE_INT references allow displacements which fit in 14 bits, so use
1033 0x3fff as the mask.
1034
1035 This relies on the fact that most mode MODE_FLOAT references will use FP
1036 registers and most mode MODE_INT references will use integer registers.
1037 (In the rare case of an FP register used in an integer MODE, we depend
1038 on secondary reloads to clean things up.)
1039
1040
1041 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1042 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1043 addressing modes to be used).
1044
1045 Note that the addresses passed into hppa_legitimize_address always
1046 come from a MEM, so we only have to match the MULT form on incoming
1047 addresses. But to be future proof we also match the ASHIFT form.
1048
1049 However, this routine always places those shift-add sequences into
1050 registers, so we have to generate the ASHIFT form as our output.
1051
1052 Put X and Z into registers. Then put the entire expression into
1053 a register. */
1054
1055 rtx
1056 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1057 machine_mode mode)
1058 {
1059 rtx orig = x;
1060
1061 /* We need to canonicalize the order of operands in unscaled indexed
1062 addresses since the code that checks if an address is valid doesn't
1063 always try both orders. */
1064 if (!TARGET_NO_SPACE_REGS
1065 && GET_CODE (x) == PLUS
1066 && GET_MODE (x) == Pmode
1067 && REG_P (XEXP (x, 0))
1068 && REG_P (XEXP (x, 1))
1069 && REG_POINTER (XEXP (x, 0))
1070 && !REG_POINTER (XEXP (x, 1)))
1071 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1072
1073 if (tls_referenced_p (x))
1074 return legitimize_tls_address (x);
1075 else if (flag_pic)
1076 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1077
1078 /* Strip off CONST. */
1079 if (GET_CODE (x) == CONST)
1080 x = XEXP (x, 0);
1081
1082 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1083 That should always be safe. */
1084 if (GET_CODE (x) == PLUS
1085 && GET_CODE (XEXP (x, 0)) == REG
1086 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1087 {
1088 rtx reg = force_reg (Pmode, XEXP (x, 1));
1089 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1090 }
1091
1092 /* Note we must reject symbols which represent function addresses
1093 since the assembler/linker can't handle arithmetic on plabels. */
1094 if (GET_CODE (x) == PLUS
1095 && GET_CODE (XEXP (x, 1)) == CONST_INT
1096 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1097 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1098 || GET_CODE (XEXP (x, 0)) == REG))
1099 {
1100 rtx int_part, ptr_reg;
1101 int newoffset;
1102 int offset = INTVAL (XEXP (x, 1));
1103 int mask;
1104
1105 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1106 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1107
1108 /* Choose which way to round the offset. Round up if we
1109 are >= halfway to the next boundary. */
1110 if ((offset & mask) >= ((mask + 1) / 2))
1111 newoffset = (offset & ~ mask) + mask + 1;
1112 else
1113 newoffset = (offset & ~ mask);
1114
1115 /* If the newoffset will not fit in 14 bits (ldo), then
1116 handling this would take 4 or 5 instructions (2 to load
1117 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1118 add the new offset and the SYMBOL_REF.) Combine can
1119 not handle 4->2 or 5->2 combinations, so do not create
1120 them. */
1121 if (! VAL_14_BITS_P (newoffset)
1122 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1123 {
1124 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1125 rtx tmp_reg
1126 = force_reg (Pmode,
1127 gen_rtx_HIGH (Pmode, const_part));
1128 ptr_reg
1129 = force_reg (Pmode,
1130 gen_rtx_LO_SUM (Pmode,
1131 tmp_reg, const_part));
1132 }
1133 else
1134 {
1135 if (! VAL_14_BITS_P (newoffset))
1136 int_part = force_reg (Pmode, GEN_INT (newoffset));
1137 else
1138 int_part = GEN_INT (newoffset);
1139
1140 ptr_reg = force_reg (Pmode,
1141 gen_rtx_PLUS (Pmode,
1142 force_reg (Pmode, XEXP (x, 0)),
1143 int_part));
1144 }
1145 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1146 }
1147
1148 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1149
1150 if (GET_CODE (x) == PLUS
1151 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1152 && (OBJECT_P (XEXP (x, 1))
1153 || GET_CODE (XEXP (x, 1)) == SUBREG)
1154 && GET_CODE (XEXP (x, 1)) != CONST)
1155 {
1156 /* If we were given a MULT, we must fix the constant
1157 as we're going to create the ASHIFT form. */
1158 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1159 if (GET_CODE (XEXP (x, 0)) == MULT)
1160 shift_val = exact_log2 (shift_val);
1161
1162 rtx reg1, reg2;
1163 reg1 = XEXP (x, 1);
1164 if (GET_CODE (reg1) != REG)
1165 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1166
1167 reg2 = XEXP (XEXP (x, 0), 0);
1168 if (GET_CODE (reg2) != REG)
1169 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1170
1171 return force_reg (Pmode,
1172 gen_rtx_PLUS (Pmode,
1173 gen_rtx_ASHIFT (Pmode, reg2,
1174 GEN_INT (shift_val)),
1175 reg1));
1176 }
1177
1178 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1179
1180 Only do so for floating point modes since this is more speculative
1181 and we lose if it's an integer store. */
1182 if (GET_CODE (x) == PLUS
1183 && GET_CODE (XEXP (x, 0)) == PLUS
1184 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1185 && (mode == SFmode || mode == DFmode))
1186 {
1187 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1188
1189 /* If we were given a MULT, we must fix the constant
1190 as we're going to create the ASHIFT form. */
1191 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1192 shift_val = exact_log2 (shift_val);
1193
1194 /* Try and figure out what to use as a base register. */
1195 rtx reg1, reg2, base, idx;
1196
1197 reg1 = XEXP (XEXP (x, 0), 1);
1198 reg2 = XEXP (x, 1);
1199 base = NULL_RTX;
1200 idx = NULL_RTX;
1201
1202 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1203 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1204 it's a base register below. */
1205 if (GET_CODE (reg1) != REG)
1206 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1207
1208 if (GET_CODE (reg2) != REG)
1209 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1210
1211 /* Figure out what the base and index are. */
1212
1213 if (GET_CODE (reg1) == REG
1214 && REG_POINTER (reg1))
1215 {
1216 base = reg1;
1217 idx = gen_rtx_PLUS (Pmode,
1218 gen_rtx_ASHIFT (Pmode,
1219 XEXP (XEXP (XEXP (x, 0), 0), 0),
1220 GEN_INT (shift_val)),
1221 XEXP (x, 1));
1222 }
1223 else if (GET_CODE (reg2) == REG
1224 && REG_POINTER (reg2))
1225 {
1226 base = reg2;
1227 idx = XEXP (x, 0);
1228 }
1229
1230 if (base == 0)
1231 return orig;
1232
1233 /* If the index adds a large constant, try to scale the
1234 constant so that it can be loaded with only one insn. */
1235 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1236 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1237 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1238 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1239 {
1240 /* Divide the CONST_INT by the scale factor, then add it to A. */
1241 int val = INTVAL (XEXP (idx, 1));
1242 val /= (1 << shift_val);
1243
1244 reg1 = XEXP (XEXP (idx, 0), 0);
1245 if (GET_CODE (reg1) != REG)
1246 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1247
1248 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1249
1250 /* We can now generate a simple scaled indexed address. */
1251 return
1252 force_reg
1253 (Pmode, gen_rtx_PLUS (Pmode,
1254 gen_rtx_ASHIFT (Pmode, reg1,
1255 GEN_INT (shift_val)),
1256 base));
1257 }
1258
1259 /* If B + C is still a valid base register, then add them. */
1260 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1261 && INTVAL (XEXP (idx, 1)) <= 4096
1262 && INTVAL (XEXP (idx, 1)) >= -4096)
1263 {
1264 rtx reg1, reg2;
1265
1266 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1267
1268 reg2 = XEXP (XEXP (idx, 0), 0);
1269 if (GET_CODE (reg2) != CONST_INT)
1270 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1271
1272 return force_reg (Pmode,
1273 gen_rtx_PLUS (Pmode,
1274 gen_rtx_ASHIFT (Pmode, reg2,
1275 GEN_INT (shift_val)),
1276 reg1));
1277 }
1278
1279 /* Get the index into a register, then add the base + index and
1280 return a register holding the result. */
1281
1282 /* First get A into a register. */
1283 reg1 = XEXP (XEXP (idx, 0), 0);
1284 if (GET_CODE (reg1) != REG)
1285 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1286
1287 /* And get B into a register. */
1288 reg2 = XEXP (idx, 1);
1289 if (GET_CODE (reg2) != REG)
1290 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1291
1292 reg1 = force_reg (Pmode,
1293 gen_rtx_PLUS (Pmode,
1294 gen_rtx_ASHIFT (Pmode, reg1,
1295 GEN_INT (shift_val)),
1296 reg2));
1297
1298 /* Add the result to our base register and return. */
1299 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1300
1301 }
1302
1303 /* Uh-oh. We might have an address for x[n-100000]. This needs
1304 special handling to avoid creating an indexed memory address
1305 with x-100000 as the base.
1306
1307 If the constant part is small enough, then it's still safe because
1308 there is a guard page at the beginning and end of the data segment.
1309
1310 Scaled references are common enough that we want to try and rearrange the
1311 terms so that we can use indexing for these addresses too. Only
1312 do the optimization for floatint point modes. */
1313
1314 if (GET_CODE (x) == PLUS
1315 && pa_symbolic_expression_p (XEXP (x, 1)))
1316 {
1317 /* Ugly. We modify things here so that the address offset specified
1318 by the index expression is computed first, then added to x to form
1319 the entire address. */
1320
1321 rtx regx1, regx2, regy1, regy2, y;
1322
1323 /* Strip off any CONST. */
1324 y = XEXP (x, 1);
1325 if (GET_CODE (y) == CONST)
1326 y = XEXP (y, 0);
1327
1328 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1329 {
1330 /* See if this looks like
1331 (plus (mult (reg) (mem_shadd_const))
1332 (const (plus (symbol_ref) (const_int))))
1333
1334 Where const_int is small. In that case the const
1335 expression is a valid pointer for indexing.
1336
1337 If const_int is big, but can be divided evenly by shadd_const
1338 and added to (reg). This allows more scaled indexed addresses. */
1339 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1340 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1341 && GET_CODE (XEXP (y, 1)) == CONST_INT
1342 && INTVAL (XEXP (y, 1)) >= -4096
1343 && INTVAL (XEXP (y, 1)) <= 4095)
1344 {
1345 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1346
1347 /* If we were given a MULT, we must fix the constant
1348 as we're going to create the ASHIFT form. */
1349 if (GET_CODE (XEXP (x, 0)) == MULT)
1350 shift_val = exact_log2 (shift_val);
1351
1352 rtx reg1, reg2;
1353
1354 reg1 = XEXP (x, 1);
1355 if (GET_CODE (reg1) != REG)
1356 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1357
1358 reg2 = XEXP (XEXP (x, 0), 0);
1359 if (GET_CODE (reg2) != REG)
1360 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1361
1362 return
1363 force_reg (Pmode,
1364 gen_rtx_PLUS (Pmode,
1365 gen_rtx_ASHIFT (Pmode,
1366 reg2,
1367 GEN_INT (shift_val)),
1368 reg1));
1369 }
1370 else if ((mode == DFmode || mode == SFmode)
1371 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1372 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1373 && GET_CODE (XEXP (y, 1)) == CONST_INT
1374 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1375 {
1376 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1377
1378 /* If we were given a MULT, we must fix the constant
1379 as we're going to create the ASHIFT form. */
1380 if (GET_CODE (XEXP (x, 0)) == MULT)
1381 shift_val = exact_log2 (shift_val);
1382
1383 regx1
1384 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1385 / INTVAL (XEXP (XEXP (x, 0), 1))));
1386 regx2 = XEXP (XEXP (x, 0), 0);
1387 if (GET_CODE (regx2) != REG)
1388 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1389 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1390 regx2, regx1));
1391 return
1392 force_reg (Pmode,
1393 gen_rtx_PLUS (Pmode,
1394 gen_rtx_ASHIFT (Pmode, regx2,
1395 GEN_INT (shift_val)),
1396 force_reg (Pmode, XEXP (y, 0))));
1397 }
1398 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1399 && INTVAL (XEXP (y, 1)) >= -4096
1400 && INTVAL (XEXP (y, 1)) <= 4095)
1401 {
1402 /* This is safe because of the guard page at the
1403 beginning and end of the data space. Just
1404 return the original address. */
1405 return orig;
1406 }
1407 else
1408 {
1409 /* Doesn't look like one we can optimize. */
1410 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1411 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1412 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1413 regx1 = force_reg (Pmode,
1414 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1415 regx1, regy2));
1416 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1417 }
1418 }
1419 }
1420
1421 return orig;
1422 }
1423
1424 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1425
1426 Compute extra cost of moving data between one register class
1427 and another.
1428
1429 Make moves from SAR so expensive they should never happen. We used to
1430 have 0xffff here, but that generates overflow in rare cases.
1431
1432 Copies involving a FP register and a non-FP register are relatively
1433 expensive because they must go through memory.
1434
1435 Other copies are reasonably cheap. */
1436
1437 static int
1438 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1439 reg_class_t from, reg_class_t to)
1440 {
1441 if (from == SHIFT_REGS)
1442 return 0x100;
1443 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1444 return 18;
1445 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1446 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1447 return 16;
1448 else
1449 return 2;
1450 }
1451
1452 /* For the HPPA, REG and REG+CONST is cost 0
1453 and addresses involving symbolic constants are cost 2.
1454
1455 PIC addresses are very expensive.
1456
1457 It is no coincidence that this has the same structure
1458 as pa_legitimate_address_p. */
1459
1460 static int
1461 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1462 addr_space_t as ATTRIBUTE_UNUSED,
1463 bool speed ATTRIBUTE_UNUSED)
1464 {
1465 switch (GET_CODE (X))
1466 {
1467 case REG:
1468 case PLUS:
1469 case LO_SUM:
1470 return 1;
1471 case HIGH:
1472 return 2;
1473 default:
1474 return 4;
1475 }
1476 }
1477
1478 /* Compute a (partial) cost for rtx X. Return true if the complete
1479 cost has been computed, and false if subexpressions should be
1480 scanned. In either case, *TOTAL contains the cost result. */
1481
1482 static bool
1483 hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1484 int *total, bool speed ATTRIBUTE_UNUSED)
1485 {
1486 int factor;
1487
1488 switch (code)
1489 {
1490 case CONST_INT:
1491 if (INTVAL (x) == 0)
1492 *total = 0;
1493 else if (INT_14_BITS (x))
1494 *total = 1;
1495 else
1496 *total = 2;
1497 return true;
1498
1499 case HIGH:
1500 *total = 2;
1501 return true;
1502
1503 case CONST:
1504 case LABEL_REF:
1505 case SYMBOL_REF:
1506 *total = 4;
1507 return true;
1508
1509 case CONST_DOUBLE:
1510 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1511 && outer_code != SET)
1512 *total = 0;
1513 else
1514 *total = 8;
1515 return true;
1516
1517 case MULT:
1518 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1519 {
1520 *total = COSTS_N_INSNS (3);
1521 return true;
1522 }
1523
1524 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1525 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1526 if (factor == 0)
1527 factor = 1;
1528
1529 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1530 *total = factor * factor * COSTS_N_INSNS (8);
1531 else
1532 *total = factor * factor * COSTS_N_INSNS (20);
1533 return true;
1534
1535 case DIV:
1536 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1537 {
1538 *total = COSTS_N_INSNS (14);
1539 return true;
1540 }
1541 /* FALLTHRU */
1542
1543 case UDIV:
1544 case MOD:
1545 case UMOD:
1546 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1547 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1548 if (factor == 0)
1549 factor = 1;
1550
1551 *total = factor * factor * COSTS_N_INSNS (60);
1552 return true;
1553
1554 case PLUS: /* this includes shNadd insns */
1555 case MINUS:
1556 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1557 {
1558 *total = COSTS_N_INSNS (3);
1559 return true;
1560 }
1561
1562 /* A size N times larger than UNITS_PER_WORD needs N times as
1563 many insns, taking N times as long. */
1564 factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
1565 if (factor == 0)
1566 factor = 1;
1567 *total = factor * COSTS_N_INSNS (1);
1568 return true;
1569
1570 case ASHIFT:
1571 case ASHIFTRT:
1572 case LSHIFTRT:
1573 *total = COSTS_N_INSNS (1);
1574 return true;
1575
1576 default:
1577 return false;
1578 }
1579 }
1580
1581 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1582 new rtx with the correct mode. */
1583 static inline rtx
1584 force_mode (machine_mode mode, rtx orig)
1585 {
1586 if (mode == GET_MODE (orig))
1587 return orig;
1588
1589 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1590
1591 return gen_rtx_REG (mode, REGNO (orig));
1592 }
1593
1594 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1595
1596 static bool
1597 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1598 {
1599 return tls_referenced_p (x);
1600 }
1601
1602 /* Emit insns to move operands[1] into operands[0].
1603
1604 Return 1 if we have written out everything that needs to be done to
1605 do the move. Otherwise, return 0 and the caller will emit the move
1606 normally.
1607
1608 Note SCRATCH_REG may not be in the proper mode depending on how it
1609 will be used. This routine is responsible for creating a new copy
1610 of SCRATCH_REG in the proper mode. */
1611
1612 int
1613 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1614 {
1615 register rtx operand0 = operands[0];
1616 register rtx operand1 = operands[1];
1617 register rtx tem;
1618
1619 /* We can only handle indexed addresses in the destination operand
1620 of floating point stores. Thus, we need to break out indexed
1621 addresses from the destination operand. */
1622 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1623 {
1624 gcc_assert (can_create_pseudo_p ());
1625
1626 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1627 operand0 = replace_equiv_address (operand0, tem);
1628 }
1629
1630 /* On targets with non-equivalent space registers, break out unscaled
1631 indexed addresses from the source operand before the final CSE.
1632 We have to do this because the REG_POINTER flag is not correctly
1633 carried through various optimization passes and CSE may substitute
1634 a pseudo without the pointer set for one with the pointer set. As
1635 a result, we loose various opportunities to create insns with
1636 unscaled indexed addresses. */
1637 if (!TARGET_NO_SPACE_REGS
1638 && !cse_not_expected
1639 && GET_CODE (operand1) == MEM
1640 && GET_CODE (XEXP (operand1, 0)) == PLUS
1641 && REG_P (XEXP (XEXP (operand1, 0), 0))
1642 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1643 operand1
1644 = replace_equiv_address (operand1,
1645 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1646
1647 if (scratch_reg
1648 && reload_in_progress && GET_CODE (operand0) == REG
1649 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1650 operand0 = reg_equiv_mem (REGNO (operand0));
1651 else if (scratch_reg
1652 && reload_in_progress && GET_CODE (operand0) == SUBREG
1653 && GET_CODE (SUBREG_REG (operand0)) == REG
1654 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1655 {
1656 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1657 the code which tracks sets/uses for delete_output_reload. */
1658 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1659 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1660 SUBREG_BYTE (operand0));
1661 operand0 = alter_subreg (&temp, true);
1662 }
1663
1664 if (scratch_reg
1665 && reload_in_progress && GET_CODE (operand1) == REG
1666 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1667 operand1 = reg_equiv_mem (REGNO (operand1));
1668 else if (scratch_reg
1669 && reload_in_progress && GET_CODE (operand1) == SUBREG
1670 && GET_CODE (SUBREG_REG (operand1)) == REG
1671 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1672 {
1673 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1674 the code which tracks sets/uses for delete_output_reload. */
1675 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1676 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1677 SUBREG_BYTE (operand1));
1678 operand1 = alter_subreg (&temp, true);
1679 }
1680
1681 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1682 && ((tem = find_replacement (&XEXP (operand0, 0)))
1683 != XEXP (operand0, 0)))
1684 operand0 = replace_equiv_address (operand0, tem);
1685
1686 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1687 && ((tem = find_replacement (&XEXP (operand1, 0)))
1688 != XEXP (operand1, 0)))
1689 operand1 = replace_equiv_address (operand1, tem);
1690
1691 /* Handle secondary reloads for loads/stores of FP registers from
1692 REG+D addresses where D does not fit in 5 or 14 bits, including
1693 (subreg (mem (addr))) cases. */
1694 if (scratch_reg
1695 && fp_reg_operand (operand0, mode)
1696 && (MEM_P (operand1)
1697 || (GET_CODE (operand1) == SUBREG
1698 && MEM_P (XEXP (operand1, 0))))
1699 && !floating_point_store_memory_operand (operand1, mode))
1700 {
1701 if (GET_CODE (operand1) == SUBREG)
1702 operand1 = XEXP (operand1, 0);
1703
1704 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1705 it in WORD_MODE regardless of what mode it was originally given
1706 to us. */
1707 scratch_reg = force_mode (word_mode, scratch_reg);
1708
1709 /* D might not fit in 14 bits either; for such cases load D into
1710 scratch reg. */
1711 if (reg_plus_base_memory_operand (operand1, mode)
1712 && !(TARGET_PA_20
1713 && !TARGET_ELF32
1714 && INT_14_BITS (XEXP (XEXP (operand1, 0), 1))))
1715 {
1716 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1717 emit_move_insn (scratch_reg,
1718 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1719 Pmode,
1720 XEXP (XEXP (operand1, 0), 0),
1721 scratch_reg));
1722 }
1723 else
1724 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1725 emit_insn (gen_rtx_SET (operand0,
1726 replace_equiv_address (operand1, scratch_reg)));
1727 return 1;
1728 }
1729 else if (scratch_reg
1730 && fp_reg_operand (operand1, mode)
1731 && (MEM_P (operand0)
1732 || (GET_CODE (operand0) == SUBREG
1733 && MEM_P (XEXP (operand0, 0))))
1734 && !floating_point_store_memory_operand (operand0, mode))
1735 {
1736 if (GET_CODE (operand0) == SUBREG)
1737 operand0 = XEXP (operand0, 0);
1738
1739 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1740 it in WORD_MODE regardless of what mode it was originally given
1741 to us. */
1742 scratch_reg = force_mode (word_mode, scratch_reg);
1743
1744 /* D might not fit in 14 bits either; for such cases load D into
1745 scratch reg. */
1746 if (reg_plus_base_memory_operand (operand0, mode)
1747 && !(TARGET_PA_20
1748 && !TARGET_ELF32
1749 && INT_14_BITS (XEXP (XEXP (operand0, 0), 1))))
1750 {
1751 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1752 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1753 0)),
1754 Pmode,
1755 XEXP (XEXP (operand0, 0),
1756 0),
1757 scratch_reg));
1758 }
1759 else
1760 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1761 emit_insn (gen_rtx_SET (replace_equiv_address (operand0, scratch_reg),
1762 operand1));
1763 return 1;
1764 }
1765 /* Handle secondary reloads for loads of FP registers from constant
1766 expressions by forcing the constant into memory. For the most part,
1767 this is only necessary for SImode and DImode.
1768
1769 Use scratch_reg to hold the address of the memory location. */
1770 else if (scratch_reg
1771 && CONSTANT_P (operand1)
1772 && fp_reg_operand (operand0, mode))
1773 {
1774 rtx const_mem, xoperands[2];
1775
1776 if (operand1 == CONST0_RTX (mode))
1777 {
1778 emit_insn (gen_rtx_SET (operand0, operand1));
1779 return 1;
1780 }
1781
1782 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1783 it in WORD_MODE regardless of what mode it was originally given
1784 to us. */
1785 scratch_reg = force_mode (word_mode, scratch_reg);
1786
1787 /* Force the constant into memory and put the address of the
1788 memory location into scratch_reg. */
1789 const_mem = force_const_mem (mode, operand1);
1790 xoperands[0] = scratch_reg;
1791 xoperands[1] = XEXP (const_mem, 0);
1792 pa_emit_move_sequence (xoperands, Pmode, 0);
1793
1794 /* Now load the destination register. */
1795 emit_insn (gen_rtx_SET (operand0,
1796 replace_equiv_address (const_mem, scratch_reg)));
1797 return 1;
1798 }
1799 /* Handle secondary reloads for SAR. These occur when trying to load
1800 the SAR from memory or a constant. */
1801 else if (scratch_reg
1802 && GET_CODE (operand0) == REG
1803 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1804 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1805 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1806 {
1807 /* D might not fit in 14 bits either; for such cases load D into
1808 scratch reg. */
1809 if (GET_CODE (operand1) == MEM
1810 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1811 {
1812 /* We are reloading the address into the scratch register, so we
1813 want to make sure the scratch register is a full register. */
1814 scratch_reg = force_mode (word_mode, scratch_reg);
1815
1816 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1817 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1818 0)),
1819 Pmode,
1820 XEXP (XEXP (operand1, 0),
1821 0),
1822 scratch_reg));
1823
1824 /* Now we are going to load the scratch register from memory,
1825 we want to load it in the same width as the original MEM,
1826 which must be the same as the width of the ultimate destination,
1827 OPERAND0. */
1828 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1829
1830 emit_move_insn (scratch_reg,
1831 replace_equiv_address (operand1, scratch_reg));
1832 }
1833 else
1834 {
1835 /* We want to load the scratch register using the same mode as
1836 the ultimate destination. */
1837 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1838
1839 emit_move_insn (scratch_reg, operand1);
1840 }
1841
1842 /* And emit the insn to set the ultimate destination. We know that
1843 the scratch register has the same mode as the destination at this
1844 point. */
1845 emit_move_insn (operand0, scratch_reg);
1846 return 1;
1847 }
1848 /* Handle the most common case: storing into a register. */
1849 else if (register_operand (operand0, mode))
1850 {
1851 /* Legitimize TLS symbol references. This happens for references
1852 that aren't a legitimate constant. */
1853 if (PA_SYMBOL_REF_TLS_P (operand1))
1854 operand1 = legitimize_tls_address (operand1);
1855
1856 if (register_operand (operand1, mode)
1857 || (GET_CODE (operand1) == CONST_INT
1858 && pa_cint_ok_for_move (INTVAL (operand1)))
1859 || (operand1 == CONST0_RTX (mode))
1860 || (GET_CODE (operand1) == HIGH
1861 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1862 /* Only `general_operands' can come here, so MEM is ok. */
1863 || GET_CODE (operand1) == MEM)
1864 {
1865 /* Various sets are created during RTL generation which don't
1866 have the REG_POINTER flag correctly set. After the CSE pass,
1867 instruction recognition can fail if we don't consistently
1868 set this flag when performing register copies. This should
1869 also improve the opportunities for creating insns that use
1870 unscaled indexing. */
1871 if (REG_P (operand0) && REG_P (operand1))
1872 {
1873 if (REG_POINTER (operand1)
1874 && !REG_POINTER (operand0)
1875 && !HARD_REGISTER_P (operand0))
1876 copy_reg_pointer (operand0, operand1);
1877 }
1878
1879 /* When MEMs are broken out, the REG_POINTER flag doesn't
1880 get set. In some cases, we can set the REG_POINTER flag
1881 from the declaration for the MEM. */
1882 if (REG_P (operand0)
1883 && GET_CODE (operand1) == MEM
1884 && !REG_POINTER (operand0))
1885 {
1886 tree decl = MEM_EXPR (operand1);
1887
1888 /* Set the register pointer flag and register alignment
1889 if the declaration for this memory reference is a
1890 pointer type. */
1891 if (decl)
1892 {
1893 tree type;
1894
1895 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1896 tree operand 1. */
1897 if (TREE_CODE (decl) == COMPONENT_REF)
1898 decl = TREE_OPERAND (decl, 1);
1899
1900 type = TREE_TYPE (decl);
1901 type = strip_array_types (type);
1902
1903 if (POINTER_TYPE_P (type))
1904 {
1905 int align;
1906
1907 type = TREE_TYPE (type);
1908 /* Using TYPE_ALIGN_OK is rather conservative as
1909 only the ada frontend actually sets it. */
1910 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1911 : BITS_PER_UNIT);
1912 mark_reg_pointer (operand0, align);
1913 }
1914 }
1915 }
1916
1917 emit_insn (gen_rtx_SET (operand0, operand1));
1918 return 1;
1919 }
1920 }
1921 else if (GET_CODE (operand0) == MEM)
1922 {
1923 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1924 && !(reload_in_progress || reload_completed))
1925 {
1926 rtx temp = gen_reg_rtx (DFmode);
1927
1928 emit_insn (gen_rtx_SET (temp, operand1));
1929 emit_insn (gen_rtx_SET (operand0, temp));
1930 return 1;
1931 }
1932 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1933 {
1934 /* Run this case quickly. */
1935 emit_insn (gen_rtx_SET (operand0, operand1));
1936 return 1;
1937 }
1938 if (! (reload_in_progress || reload_completed))
1939 {
1940 operands[0] = validize_mem (operand0);
1941 operands[1] = operand1 = force_reg (mode, operand1);
1942 }
1943 }
1944
1945 /* Simplify the source if we need to.
1946 Note we do have to handle function labels here, even though we do
1947 not consider them legitimate constants. Loop optimizations can
1948 call the emit_move_xxx with one as a source. */
1949 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1950 || (GET_CODE (operand1) == HIGH
1951 && symbolic_operand (XEXP (operand1, 0), mode))
1952 || function_label_operand (operand1, VOIDmode)
1953 || tls_referenced_p (operand1))
1954 {
1955 int ishighonly = 0;
1956
1957 if (GET_CODE (operand1) == HIGH)
1958 {
1959 ishighonly = 1;
1960 operand1 = XEXP (operand1, 0);
1961 }
1962 if (symbolic_operand (operand1, mode))
1963 {
1964 /* Argh. The assembler and linker can't handle arithmetic
1965 involving plabels.
1966
1967 So we force the plabel into memory, load operand0 from
1968 the memory location, then add in the constant part. */
1969 if ((GET_CODE (operand1) == CONST
1970 && GET_CODE (XEXP (operand1, 0)) == PLUS
1971 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1972 VOIDmode))
1973 || function_label_operand (operand1, VOIDmode))
1974 {
1975 rtx temp, const_part;
1976
1977 /* Figure out what (if any) scratch register to use. */
1978 if (reload_in_progress || reload_completed)
1979 {
1980 scratch_reg = scratch_reg ? scratch_reg : operand0;
1981 /* SCRATCH_REG will hold an address and maybe the actual
1982 data. We want it in WORD_MODE regardless of what mode it
1983 was originally given to us. */
1984 scratch_reg = force_mode (word_mode, scratch_reg);
1985 }
1986 else if (flag_pic)
1987 scratch_reg = gen_reg_rtx (Pmode);
1988
1989 if (GET_CODE (operand1) == CONST)
1990 {
1991 /* Save away the constant part of the expression. */
1992 const_part = XEXP (XEXP (operand1, 0), 1);
1993 gcc_assert (GET_CODE (const_part) == CONST_INT);
1994
1995 /* Force the function label into memory. */
1996 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1997 }
1998 else
1999 {
2000 /* No constant part. */
2001 const_part = NULL_RTX;
2002
2003 /* Force the function label into memory. */
2004 temp = force_const_mem (mode, operand1);
2005 }
2006
2007
2008 /* Get the address of the memory location. PIC-ify it if
2009 necessary. */
2010 temp = XEXP (temp, 0);
2011 if (flag_pic)
2012 temp = legitimize_pic_address (temp, mode, scratch_reg);
2013
2014 /* Put the address of the memory location into our destination
2015 register. */
2016 operands[1] = temp;
2017 pa_emit_move_sequence (operands, mode, scratch_reg);
2018
2019 /* Now load from the memory location into our destination
2020 register. */
2021 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2022 pa_emit_move_sequence (operands, mode, scratch_reg);
2023
2024 /* And add back in the constant part. */
2025 if (const_part != NULL_RTX)
2026 expand_inc (operand0, const_part);
2027
2028 return 1;
2029 }
2030
2031 if (flag_pic)
2032 {
2033 rtx_insn *insn;
2034 rtx temp;
2035
2036 if (reload_in_progress || reload_completed)
2037 {
2038 temp = scratch_reg ? scratch_reg : operand0;
2039 /* TEMP will hold an address and maybe the actual
2040 data. We want it in WORD_MODE regardless of what mode it
2041 was originally given to us. */
2042 temp = force_mode (word_mode, temp);
2043 }
2044 else
2045 temp = gen_reg_rtx (Pmode);
2046
2047 /* Force (const (plus (symbol) (const_int))) to memory
2048 if the const_int will not fit in 14 bits. Although
2049 this requires a relocation, the instruction sequence
2050 needed to load the value is shorter. */
2051 if (GET_CODE (operand1) == CONST
2052 && GET_CODE (XEXP (operand1, 0)) == PLUS
2053 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2054 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2055 {
2056 rtx x, m = force_const_mem (mode, operand1);
2057
2058 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2059 x = replace_equiv_address (m, x);
2060 insn = emit_move_insn (operand0, x);
2061 }
2062 else
2063 {
2064 operands[1] = legitimize_pic_address (operand1, mode, temp);
2065 if (REG_P (operand0) && REG_P (operands[1]))
2066 copy_reg_pointer (operand0, operands[1]);
2067 insn = emit_move_insn (operand0, operands[1]);
2068 }
2069
2070 /* Put a REG_EQUAL note on this insn. */
2071 set_unique_reg_note (insn, REG_EQUAL, operand1);
2072 }
2073 /* On the HPPA, references to data space are supposed to use dp,
2074 register 27, but showing it in the RTL inhibits various cse
2075 and loop optimizations. */
2076 else
2077 {
2078 rtx temp, set;
2079
2080 if (reload_in_progress || reload_completed)
2081 {
2082 temp = scratch_reg ? scratch_reg : operand0;
2083 /* TEMP will hold an address and maybe the actual
2084 data. We want it in WORD_MODE regardless of what mode it
2085 was originally given to us. */
2086 temp = force_mode (word_mode, temp);
2087 }
2088 else
2089 temp = gen_reg_rtx (mode);
2090
2091 /* Loading a SYMBOL_REF into a register makes that register
2092 safe to be used as the base in an indexed address.
2093
2094 Don't mark hard registers though. That loses. */
2095 if (GET_CODE (operand0) == REG
2096 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2097 mark_reg_pointer (operand0, BITS_PER_UNIT);
2098 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2099 mark_reg_pointer (temp, BITS_PER_UNIT);
2100
2101 if (ishighonly)
2102 set = gen_rtx_SET (operand0, temp);
2103 else
2104 set = gen_rtx_SET (operand0,
2105 gen_rtx_LO_SUM (mode, temp, operand1));
2106
2107 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2108 emit_insn (set);
2109
2110 }
2111 return 1;
2112 }
2113 else if (tls_referenced_p (operand1))
2114 {
2115 rtx tmp = operand1;
2116 rtx addend = NULL;
2117
2118 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2119 {
2120 addend = XEXP (XEXP (tmp, 0), 1);
2121 tmp = XEXP (XEXP (tmp, 0), 0);
2122 }
2123
2124 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2125 tmp = legitimize_tls_address (tmp);
2126 if (addend)
2127 {
2128 tmp = gen_rtx_PLUS (mode, tmp, addend);
2129 tmp = force_operand (tmp, operands[0]);
2130 }
2131 operands[1] = tmp;
2132 }
2133 else if (GET_CODE (operand1) != CONST_INT
2134 || !pa_cint_ok_for_move (INTVAL (operand1)))
2135 {
2136 rtx temp;
2137 rtx_insn *insn;
2138 rtx op1 = operand1;
2139 HOST_WIDE_INT value = 0;
2140 HOST_WIDE_INT insv = 0;
2141 int insert = 0;
2142
2143 if (GET_CODE (operand1) == CONST_INT)
2144 value = INTVAL (operand1);
2145
2146 if (TARGET_64BIT
2147 && GET_CODE (operand1) == CONST_INT
2148 && HOST_BITS_PER_WIDE_INT > 32
2149 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2150 {
2151 HOST_WIDE_INT nval;
2152
2153 /* Extract the low order 32 bits of the value and sign extend.
2154 If the new value is the same as the original value, we can
2155 can use the original value as-is. If the new value is
2156 different, we use it and insert the most-significant 32-bits
2157 of the original value into the final result. */
2158 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2159 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2160 if (value != nval)
2161 {
2162 #if HOST_BITS_PER_WIDE_INT > 32
2163 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2164 #endif
2165 insert = 1;
2166 value = nval;
2167 operand1 = GEN_INT (nval);
2168 }
2169 }
2170
2171 if (reload_in_progress || reload_completed)
2172 temp = scratch_reg ? scratch_reg : operand0;
2173 else
2174 temp = gen_reg_rtx (mode);
2175
2176 /* We don't directly split DImode constants on 32-bit targets
2177 because PLUS uses an 11-bit immediate and the insn sequence
2178 generated is not as efficient as the one using HIGH/LO_SUM. */
2179 if (GET_CODE (operand1) == CONST_INT
2180 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2181 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2182 && !insert)
2183 {
2184 /* Directly break constant into high and low parts. This
2185 provides better optimization opportunities because various
2186 passes recognize constants split with PLUS but not LO_SUM.
2187 We use a 14-bit signed low part except when the addition
2188 of 0x4000 to the high part might change the sign of the
2189 high part. */
2190 HOST_WIDE_INT low = value & 0x3fff;
2191 HOST_WIDE_INT high = value & ~ 0x3fff;
2192
2193 if (low >= 0x2000)
2194 {
2195 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2196 high += 0x2000;
2197 else
2198 high += 0x4000;
2199 }
2200
2201 low = value - high;
2202
2203 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2204 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2205 }
2206 else
2207 {
2208 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2209 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2210 }
2211
2212 insn = emit_move_insn (operands[0], operands[1]);
2213
2214 /* Now insert the most significant 32 bits of the value
2215 into the register. When we don't have a second register
2216 available, it could take up to nine instructions to load
2217 a 64-bit integer constant. Prior to reload, we force
2218 constants that would take more than three instructions
2219 to load to the constant pool. During and after reload,
2220 we have to handle all possible values. */
2221 if (insert)
2222 {
2223 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2224 register and the value to be inserted is outside the
2225 range that can be loaded with three depdi instructions. */
2226 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2227 {
2228 operand1 = GEN_INT (insv);
2229
2230 emit_insn (gen_rtx_SET (temp,
2231 gen_rtx_HIGH (mode, operand1)));
2232 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2233 if (mode == DImode)
2234 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2235 const0_rtx, temp));
2236 else
2237 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2238 const0_rtx, temp));
2239 }
2240 else
2241 {
2242 int len = 5, pos = 27;
2243
2244 /* Insert the bits using the depdi instruction. */
2245 while (pos >= 0)
2246 {
2247 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2248 HOST_WIDE_INT sign = v5 < 0;
2249
2250 /* Left extend the insertion. */
2251 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2252 while (pos > 0 && (insv & 1) == sign)
2253 {
2254 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2255 len += 1;
2256 pos -= 1;
2257 }
2258
2259 if (mode == DImode)
2260 insn = emit_insn (gen_insvdi (operand0,
2261 GEN_INT (len),
2262 GEN_INT (pos),
2263 GEN_INT (v5)));
2264 else
2265 insn = emit_insn (gen_insvsi (operand0,
2266 GEN_INT (len),
2267 GEN_INT (pos),
2268 GEN_INT (v5)));
2269
2270 len = pos > 0 && pos < 5 ? pos : 5;
2271 pos -= len;
2272 }
2273 }
2274 }
2275
2276 set_unique_reg_note (insn, REG_EQUAL, op1);
2277
2278 return 1;
2279 }
2280 }
2281 /* Now have insn-emit do whatever it normally does. */
2282 return 0;
2283 }
2284
2285 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2286 it will need a link/runtime reloc). */
2287
2288 int
2289 pa_reloc_needed (tree exp)
2290 {
2291 int reloc = 0;
2292
2293 switch (TREE_CODE (exp))
2294 {
2295 case ADDR_EXPR:
2296 return 1;
2297
2298 case POINTER_PLUS_EXPR:
2299 case PLUS_EXPR:
2300 case MINUS_EXPR:
2301 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2302 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2303 break;
2304
2305 CASE_CONVERT:
2306 case NON_LVALUE_EXPR:
2307 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2308 break;
2309
2310 case CONSTRUCTOR:
2311 {
2312 tree value;
2313 unsigned HOST_WIDE_INT ix;
2314
2315 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2316 if (value)
2317 reloc |= pa_reloc_needed (value);
2318 }
2319 break;
2320
2321 case ERROR_MARK:
2322 break;
2323
2324 default:
2325 break;
2326 }
2327 return reloc;
2328 }
2329
2330 \f
2331 /* Return the best assembler insn template
2332 for moving operands[1] into operands[0] as a fullword. */
2333 const char *
2334 pa_singlemove_string (rtx *operands)
2335 {
2336 HOST_WIDE_INT intval;
2337
2338 if (GET_CODE (operands[0]) == MEM)
2339 return "stw %r1,%0";
2340 if (GET_CODE (operands[1]) == MEM)
2341 return "ldw %1,%0";
2342 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2343 {
2344 long i;
2345 REAL_VALUE_TYPE d;
2346
2347 gcc_assert (GET_MODE (operands[1]) == SFmode);
2348
2349 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2350 bit pattern. */
2351 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2352 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2353
2354 operands[1] = GEN_INT (i);
2355 /* Fall through to CONST_INT case. */
2356 }
2357 if (GET_CODE (operands[1]) == CONST_INT)
2358 {
2359 intval = INTVAL (operands[1]);
2360
2361 if (VAL_14_BITS_P (intval))
2362 return "ldi %1,%0";
2363 else if ((intval & 0x7ff) == 0)
2364 return "ldil L'%1,%0";
2365 else if (pa_zdepi_cint_p (intval))
2366 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2367 else
2368 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2369 }
2370 return "copy %1,%0";
2371 }
2372 \f
2373
2374 /* Compute position (in OP[1]) and width (in OP[2])
2375 useful for copying IMM to a register using the zdepi
2376 instructions. Store the immediate value to insert in OP[0]. */
2377 static void
2378 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2379 {
2380 int lsb, len;
2381
2382 /* Find the least significant set bit in IMM. */
2383 for (lsb = 0; lsb < 32; lsb++)
2384 {
2385 if ((imm & 1) != 0)
2386 break;
2387 imm >>= 1;
2388 }
2389
2390 /* Choose variants based on *sign* of the 5-bit field. */
2391 if ((imm & 0x10) == 0)
2392 len = (lsb <= 28) ? 4 : 32 - lsb;
2393 else
2394 {
2395 /* Find the width of the bitstring in IMM. */
2396 for (len = 5; len < 32 - lsb; len++)
2397 {
2398 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2399 break;
2400 }
2401
2402 /* Sign extend IMM as a 5-bit value. */
2403 imm = (imm & 0xf) - 0x10;
2404 }
2405
2406 op[0] = imm;
2407 op[1] = 31 - lsb;
2408 op[2] = len;
2409 }
2410
2411 /* Compute position (in OP[1]) and width (in OP[2])
2412 useful for copying IMM to a register using the depdi,z
2413 instructions. Store the immediate value to insert in OP[0]. */
2414
2415 static void
2416 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2417 {
2418 int lsb, len, maxlen;
2419
2420 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2421
2422 /* Find the least significant set bit in IMM. */
2423 for (lsb = 0; lsb < maxlen; lsb++)
2424 {
2425 if ((imm & 1) != 0)
2426 break;
2427 imm >>= 1;
2428 }
2429
2430 /* Choose variants based on *sign* of the 5-bit field. */
2431 if ((imm & 0x10) == 0)
2432 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2433 else
2434 {
2435 /* Find the width of the bitstring in IMM. */
2436 for (len = 5; len < maxlen - lsb; len++)
2437 {
2438 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2439 break;
2440 }
2441
2442 /* Extend length if host is narrow and IMM is negative. */
2443 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2444 len += 32;
2445
2446 /* Sign extend IMM as a 5-bit value. */
2447 imm = (imm & 0xf) - 0x10;
2448 }
2449
2450 op[0] = imm;
2451 op[1] = 63 - lsb;
2452 op[2] = len;
2453 }
2454
2455 /* Output assembler code to perform a doubleword move insn
2456 with operands OPERANDS. */
2457
2458 const char *
2459 pa_output_move_double (rtx *operands)
2460 {
2461 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2462 rtx latehalf[2];
2463 rtx addreg0 = 0, addreg1 = 0;
2464
2465 /* First classify both operands. */
2466
2467 if (REG_P (operands[0]))
2468 optype0 = REGOP;
2469 else if (offsettable_memref_p (operands[0]))
2470 optype0 = OFFSOP;
2471 else if (GET_CODE (operands[0]) == MEM)
2472 optype0 = MEMOP;
2473 else
2474 optype0 = RNDOP;
2475
2476 if (REG_P (operands[1]))
2477 optype1 = REGOP;
2478 else if (CONSTANT_P (operands[1]))
2479 optype1 = CNSTOP;
2480 else if (offsettable_memref_p (operands[1]))
2481 optype1 = OFFSOP;
2482 else if (GET_CODE (operands[1]) == MEM)
2483 optype1 = MEMOP;
2484 else
2485 optype1 = RNDOP;
2486
2487 /* Check for the cases that the operand constraints are not
2488 supposed to allow to happen. */
2489 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2490
2491 /* Handle copies between general and floating registers. */
2492
2493 if (optype0 == REGOP && optype1 == REGOP
2494 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2495 {
2496 if (FP_REG_P (operands[0]))
2497 {
2498 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2499 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2500 return "{fldds|fldd} -16(%%sp),%0";
2501 }
2502 else
2503 {
2504 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2505 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2506 return "{ldws|ldw} -12(%%sp),%R0";
2507 }
2508 }
2509
2510 /* Handle auto decrementing and incrementing loads and stores
2511 specifically, since the structure of the function doesn't work
2512 for them without major modification. Do it better when we learn
2513 this port about the general inc/dec addressing of PA.
2514 (This was written by tege. Chide him if it doesn't work.) */
2515
2516 if (optype0 == MEMOP)
2517 {
2518 /* We have to output the address syntax ourselves, since print_operand
2519 doesn't deal with the addresses we want to use. Fix this later. */
2520
2521 rtx addr = XEXP (operands[0], 0);
2522 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2523 {
2524 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2525
2526 operands[0] = XEXP (addr, 0);
2527 gcc_assert (GET_CODE (operands[1]) == REG
2528 && GET_CODE (operands[0]) == REG);
2529
2530 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2531
2532 /* No overlap between high target register and address
2533 register. (We do this in a non-obvious way to
2534 save a register file writeback) */
2535 if (GET_CODE (addr) == POST_INC)
2536 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2537 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2538 }
2539 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2540 {
2541 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2542
2543 operands[0] = XEXP (addr, 0);
2544 gcc_assert (GET_CODE (operands[1]) == REG
2545 && GET_CODE (operands[0]) == REG);
2546
2547 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2548 /* No overlap between high target register and address
2549 register. (We do this in a non-obvious way to save a
2550 register file writeback) */
2551 if (GET_CODE (addr) == PRE_INC)
2552 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2553 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2554 }
2555 }
2556 if (optype1 == MEMOP)
2557 {
2558 /* We have to output the address syntax ourselves, since print_operand
2559 doesn't deal with the addresses we want to use. Fix this later. */
2560
2561 rtx addr = XEXP (operands[1], 0);
2562 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2563 {
2564 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2565
2566 operands[1] = XEXP (addr, 0);
2567 gcc_assert (GET_CODE (operands[0]) == REG
2568 && GET_CODE (operands[1]) == REG);
2569
2570 if (!reg_overlap_mentioned_p (high_reg, addr))
2571 {
2572 /* No overlap between high target register and address
2573 register. (We do this in a non-obvious way to
2574 save a register file writeback) */
2575 if (GET_CODE (addr) == POST_INC)
2576 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2577 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2578 }
2579 else
2580 {
2581 /* This is an undefined situation. We should load into the
2582 address register *and* update that register. Probably
2583 we don't need to handle this at all. */
2584 if (GET_CODE (addr) == POST_INC)
2585 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2586 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2587 }
2588 }
2589 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2590 {
2591 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2592
2593 operands[1] = XEXP (addr, 0);
2594 gcc_assert (GET_CODE (operands[0]) == REG
2595 && GET_CODE (operands[1]) == REG);
2596
2597 if (!reg_overlap_mentioned_p (high_reg, addr))
2598 {
2599 /* No overlap between high target register and address
2600 register. (We do this in a non-obvious way to
2601 save a register file writeback) */
2602 if (GET_CODE (addr) == PRE_INC)
2603 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2604 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2605 }
2606 else
2607 {
2608 /* This is an undefined situation. We should load into the
2609 address register *and* update that register. Probably
2610 we don't need to handle this at all. */
2611 if (GET_CODE (addr) == PRE_INC)
2612 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2613 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2614 }
2615 }
2616 else if (GET_CODE (addr) == PLUS
2617 && GET_CODE (XEXP (addr, 0)) == MULT)
2618 {
2619 rtx xoperands[4];
2620
2621 /* Load address into left half of destination register. */
2622 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2623 xoperands[1] = XEXP (addr, 1);
2624 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2625 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2626 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2627 xoperands);
2628 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2629 }
2630 else if (GET_CODE (addr) == PLUS
2631 && REG_P (XEXP (addr, 0))
2632 && REG_P (XEXP (addr, 1)))
2633 {
2634 rtx xoperands[3];
2635
2636 /* Load address into left half of destination register. */
2637 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2638 xoperands[1] = XEXP (addr, 0);
2639 xoperands[2] = XEXP (addr, 1);
2640 output_asm_insn ("{addl|add,l} %1,%2,%0",
2641 xoperands);
2642 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2643 }
2644 }
2645
2646 /* If an operand is an unoffsettable memory ref, find a register
2647 we can increment temporarily to make it refer to the second word. */
2648
2649 if (optype0 == MEMOP)
2650 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2651
2652 if (optype1 == MEMOP)
2653 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2654
2655 /* Ok, we can do one word at a time.
2656 Normally we do the low-numbered word first.
2657
2658 In either case, set up in LATEHALF the operands to use
2659 for the high-numbered word and in some cases alter the
2660 operands in OPERANDS to be suitable for the low-numbered word. */
2661
2662 if (optype0 == REGOP)
2663 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2664 else if (optype0 == OFFSOP)
2665 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2666 else
2667 latehalf[0] = operands[0];
2668
2669 if (optype1 == REGOP)
2670 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2671 else if (optype1 == OFFSOP)
2672 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2673 else if (optype1 == CNSTOP)
2674 split_double (operands[1], &operands[1], &latehalf[1]);
2675 else
2676 latehalf[1] = operands[1];
2677
2678 /* If the first move would clobber the source of the second one,
2679 do them in the other order.
2680
2681 This can happen in two cases:
2682
2683 mem -> register where the first half of the destination register
2684 is the same register used in the memory's address. Reload
2685 can create such insns.
2686
2687 mem in this case will be either register indirect or register
2688 indirect plus a valid offset.
2689
2690 register -> register move where REGNO(dst) == REGNO(src + 1)
2691 someone (Tim/Tege?) claimed this can happen for parameter loads.
2692
2693 Handle mem -> register case first. */
2694 if (optype0 == REGOP
2695 && (optype1 == MEMOP || optype1 == OFFSOP)
2696 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2697 {
2698 /* Do the late half first. */
2699 if (addreg1)
2700 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2701 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2702
2703 /* Then clobber. */
2704 if (addreg1)
2705 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2706 return pa_singlemove_string (operands);
2707 }
2708
2709 /* Now handle register -> register case. */
2710 if (optype0 == REGOP && optype1 == REGOP
2711 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2712 {
2713 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2714 return pa_singlemove_string (operands);
2715 }
2716
2717 /* Normal case: do the two words, low-numbered first. */
2718
2719 output_asm_insn (pa_singlemove_string (operands), operands);
2720
2721 /* Make any unoffsettable addresses point at high-numbered word. */
2722 if (addreg0)
2723 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2724 if (addreg1)
2725 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2726
2727 /* Do that word. */
2728 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2729
2730 /* Undo the adds we just did. */
2731 if (addreg0)
2732 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2733 if (addreg1)
2734 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2735
2736 return "";
2737 }
2738 \f
2739 const char *
2740 pa_output_fp_move_double (rtx *operands)
2741 {
2742 if (FP_REG_P (operands[0]))
2743 {
2744 if (FP_REG_P (operands[1])
2745 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2746 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2747 else
2748 output_asm_insn ("fldd%F1 %1,%0", operands);
2749 }
2750 else if (FP_REG_P (operands[1]))
2751 {
2752 output_asm_insn ("fstd%F0 %1,%0", operands);
2753 }
2754 else
2755 {
2756 rtx xoperands[2];
2757
2758 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2759
2760 /* This is a pain. You have to be prepared to deal with an
2761 arbitrary address here including pre/post increment/decrement.
2762
2763 so avoid this in the MD. */
2764 gcc_assert (GET_CODE (operands[0]) == REG);
2765
2766 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2767 xoperands[0] = operands[0];
2768 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2769 }
2770 return "";
2771 }
2772 \f
2773 /* Return a REG that occurs in ADDR with coefficient 1.
2774 ADDR can be effectively incremented by incrementing REG. */
2775
2776 static rtx
2777 find_addr_reg (rtx addr)
2778 {
2779 while (GET_CODE (addr) == PLUS)
2780 {
2781 if (GET_CODE (XEXP (addr, 0)) == REG)
2782 addr = XEXP (addr, 0);
2783 else if (GET_CODE (XEXP (addr, 1)) == REG)
2784 addr = XEXP (addr, 1);
2785 else if (CONSTANT_P (XEXP (addr, 0)))
2786 addr = XEXP (addr, 1);
2787 else if (CONSTANT_P (XEXP (addr, 1)))
2788 addr = XEXP (addr, 0);
2789 else
2790 gcc_unreachable ();
2791 }
2792 gcc_assert (GET_CODE (addr) == REG);
2793 return addr;
2794 }
2795
2796 /* Emit code to perform a block move.
2797
2798 OPERANDS[0] is the destination pointer as a REG, clobbered.
2799 OPERANDS[1] is the source pointer as a REG, clobbered.
2800 OPERANDS[2] is a register for temporary storage.
2801 OPERANDS[3] is a register for temporary storage.
2802 OPERANDS[4] is the size as a CONST_INT
2803 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2804 OPERANDS[6] is another temporary register. */
2805
2806 const char *
2807 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2808 {
2809 int align = INTVAL (operands[5]);
2810 unsigned long n_bytes = INTVAL (operands[4]);
2811
2812 /* We can't move more than a word at a time because the PA
2813 has no longer integer move insns. (Could use fp mem ops?) */
2814 if (align > (TARGET_64BIT ? 8 : 4))
2815 align = (TARGET_64BIT ? 8 : 4);
2816
2817 /* Note that we know each loop below will execute at least twice
2818 (else we would have open-coded the copy). */
2819 switch (align)
2820 {
2821 case 8:
2822 /* Pre-adjust the loop counter. */
2823 operands[4] = GEN_INT (n_bytes - 16);
2824 output_asm_insn ("ldi %4,%2", operands);
2825
2826 /* Copying loop. */
2827 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2828 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2829 output_asm_insn ("std,ma %3,8(%0)", operands);
2830 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2831 output_asm_insn ("std,ma %6,8(%0)", operands);
2832
2833 /* Handle the residual. There could be up to 7 bytes of
2834 residual to copy! */
2835 if (n_bytes % 16 != 0)
2836 {
2837 operands[4] = GEN_INT (n_bytes % 8);
2838 if (n_bytes % 16 >= 8)
2839 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2840 if (n_bytes % 8 != 0)
2841 output_asm_insn ("ldd 0(%1),%6", operands);
2842 if (n_bytes % 16 >= 8)
2843 output_asm_insn ("std,ma %3,8(%0)", operands);
2844 if (n_bytes % 8 != 0)
2845 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2846 }
2847 return "";
2848
2849 case 4:
2850 /* Pre-adjust the loop counter. */
2851 operands[4] = GEN_INT (n_bytes - 8);
2852 output_asm_insn ("ldi %4,%2", operands);
2853
2854 /* Copying loop. */
2855 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2856 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2857 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2858 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2859 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2860
2861 /* Handle the residual. There could be up to 7 bytes of
2862 residual to copy! */
2863 if (n_bytes % 8 != 0)
2864 {
2865 operands[4] = GEN_INT (n_bytes % 4);
2866 if (n_bytes % 8 >= 4)
2867 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2868 if (n_bytes % 4 != 0)
2869 output_asm_insn ("ldw 0(%1),%6", operands);
2870 if (n_bytes % 8 >= 4)
2871 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2872 if (n_bytes % 4 != 0)
2873 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2874 }
2875 return "";
2876
2877 case 2:
2878 /* Pre-adjust the loop counter. */
2879 operands[4] = GEN_INT (n_bytes - 4);
2880 output_asm_insn ("ldi %4,%2", operands);
2881
2882 /* Copying loop. */
2883 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2884 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2885 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2886 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2887 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2888
2889 /* Handle the residual. */
2890 if (n_bytes % 4 != 0)
2891 {
2892 if (n_bytes % 4 >= 2)
2893 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2894 if (n_bytes % 2 != 0)
2895 output_asm_insn ("ldb 0(%1),%6", operands);
2896 if (n_bytes % 4 >= 2)
2897 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2898 if (n_bytes % 2 != 0)
2899 output_asm_insn ("stb %6,0(%0)", operands);
2900 }
2901 return "";
2902
2903 case 1:
2904 /* Pre-adjust the loop counter. */
2905 operands[4] = GEN_INT (n_bytes - 2);
2906 output_asm_insn ("ldi %4,%2", operands);
2907
2908 /* Copying loop. */
2909 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2910 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2911 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2912 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2913 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2914
2915 /* Handle the residual. */
2916 if (n_bytes % 2 != 0)
2917 {
2918 output_asm_insn ("ldb 0(%1),%3", operands);
2919 output_asm_insn ("stb %3,0(%0)", operands);
2920 }
2921 return "";
2922
2923 default:
2924 gcc_unreachable ();
2925 }
2926 }
2927
2928 /* Count the number of insns necessary to handle this block move.
2929
2930 Basic structure is the same as emit_block_move, except that we
2931 count insns rather than emit them. */
2932
2933 static int
2934 compute_movmem_length (rtx_insn *insn)
2935 {
2936 rtx pat = PATTERN (insn);
2937 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2938 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2939 unsigned int n_insns = 0;
2940
2941 /* We can't move more than four bytes at a time because the PA
2942 has no longer integer move insns. (Could use fp mem ops?) */
2943 if (align > (TARGET_64BIT ? 8 : 4))
2944 align = (TARGET_64BIT ? 8 : 4);
2945
2946 /* The basic copying loop. */
2947 n_insns = 6;
2948
2949 /* Residuals. */
2950 if (n_bytes % (2 * align) != 0)
2951 {
2952 if ((n_bytes % (2 * align)) >= align)
2953 n_insns += 2;
2954
2955 if ((n_bytes % align) != 0)
2956 n_insns += 2;
2957 }
2958
2959 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2960 return n_insns * 4;
2961 }
2962
2963 /* Emit code to perform a block clear.
2964
2965 OPERANDS[0] is the destination pointer as a REG, clobbered.
2966 OPERANDS[1] is a register for temporary storage.
2967 OPERANDS[2] is the size as a CONST_INT
2968 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2969
2970 const char *
2971 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2972 {
2973 int align = INTVAL (operands[3]);
2974 unsigned long n_bytes = INTVAL (operands[2]);
2975
2976 /* We can't clear more than a word at a time because the PA
2977 has no longer integer move insns. */
2978 if (align > (TARGET_64BIT ? 8 : 4))
2979 align = (TARGET_64BIT ? 8 : 4);
2980
2981 /* Note that we know each loop below will execute at least twice
2982 (else we would have open-coded the copy). */
2983 switch (align)
2984 {
2985 case 8:
2986 /* Pre-adjust the loop counter. */
2987 operands[2] = GEN_INT (n_bytes - 16);
2988 output_asm_insn ("ldi %2,%1", operands);
2989
2990 /* Loop. */
2991 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2992 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2993 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2994
2995 /* Handle the residual. There could be up to 7 bytes of
2996 residual to copy! */
2997 if (n_bytes % 16 != 0)
2998 {
2999 operands[2] = GEN_INT (n_bytes % 8);
3000 if (n_bytes % 16 >= 8)
3001 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3002 if (n_bytes % 8 != 0)
3003 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3004 }
3005 return "";
3006
3007 case 4:
3008 /* Pre-adjust the loop counter. */
3009 operands[2] = GEN_INT (n_bytes - 8);
3010 output_asm_insn ("ldi %2,%1", operands);
3011
3012 /* Loop. */
3013 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3014 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3015 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3016
3017 /* Handle the residual. There could be up to 7 bytes of
3018 residual to copy! */
3019 if (n_bytes % 8 != 0)
3020 {
3021 operands[2] = GEN_INT (n_bytes % 4);
3022 if (n_bytes % 8 >= 4)
3023 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3024 if (n_bytes % 4 != 0)
3025 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3026 }
3027 return "";
3028
3029 case 2:
3030 /* Pre-adjust the loop counter. */
3031 operands[2] = GEN_INT (n_bytes - 4);
3032 output_asm_insn ("ldi %2,%1", operands);
3033
3034 /* Loop. */
3035 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3036 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3037 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3038
3039 /* Handle the residual. */
3040 if (n_bytes % 4 != 0)
3041 {
3042 if (n_bytes % 4 >= 2)
3043 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3044 if (n_bytes % 2 != 0)
3045 output_asm_insn ("stb %%r0,0(%0)", operands);
3046 }
3047 return "";
3048
3049 case 1:
3050 /* Pre-adjust the loop counter. */
3051 operands[2] = GEN_INT (n_bytes - 2);
3052 output_asm_insn ("ldi %2,%1", operands);
3053
3054 /* Loop. */
3055 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3056 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3057 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3058
3059 /* Handle the residual. */
3060 if (n_bytes % 2 != 0)
3061 output_asm_insn ("stb %%r0,0(%0)", operands);
3062
3063 return "";
3064
3065 default:
3066 gcc_unreachable ();
3067 }
3068 }
3069
3070 /* Count the number of insns necessary to handle this block move.
3071
3072 Basic structure is the same as emit_block_move, except that we
3073 count insns rather than emit them. */
3074
3075 static int
3076 compute_clrmem_length (rtx_insn *insn)
3077 {
3078 rtx pat = PATTERN (insn);
3079 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3080 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3081 unsigned int n_insns = 0;
3082
3083 /* We can't clear more than a word at a time because the PA
3084 has no longer integer move insns. */
3085 if (align > (TARGET_64BIT ? 8 : 4))
3086 align = (TARGET_64BIT ? 8 : 4);
3087
3088 /* The basic loop. */
3089 n_insns = 4;
3090
3091 /* Residuals. */
3092 if (n_bytes % (2 * align) != 0)
3093 {
3094 if ((n_bytes % (2 * align)) >= align)
3095 n_insns++;
3096
3097 if ((n_bytes % align) != 0)
3098 n_insns++;
3099 }
3100
3101 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3102 return n_insns * 4;
3103 }
3104 \f
3105
3106 const char *
3107 pa_output_and (rtx *operands)
3108 {
3109 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3110 {
3111 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3112 int ls0, ls1, ms0, p, len;
3113
3114 for (ls0 = 0; ls0 < 32; ls0++)
3115 if ((mask & (1 << ls0)) == 0)
3116 break;
3117
3118 for (ls1 = ls0; ls1 < 32; ls1++)
3119 if ((mask & (1 << ls1)) != 0)
3120 break;
3121
3122 for (ms0 = ls1; ms0 < 32; ms0++)
3123 if ((mask & (1 << ms0)) == 0)
3124 break;
3125
3126 gcc_assert (ms0 == 32);
3127
3128 if (ls1 == 32)
3129 {
3130 len = ls0;
3131
3132 gcc_assert (len);
3133
3134 operands[2] = GEN_INT (len);
3135 return "{extru|extrw,u} %1,31,%2,%0";
3136 }
3137 else
3138 {
3139 /* We could use this `depi' for the case above as well, but `depi'
3140 requires one more register file access than an `extru'. */
3141
3142 p = 31 - ls0;
3143 len = ls1 - ls0;
3144
3145 operands[2] = GEN_INT (p);
3146 operands[3] = GEN_INT (len);
3147 return "{depi|depwi} 0,%2,%3,%0";
3148 }
3149 }
3150 else
3151 return "and %1,%2,%0";
3152 }
3153
3154 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3155 storing the result in operands[0]. */
3156 const char *
3157 pa_output_64bit_and (rtx *operands)
3158 {
3159 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3160 {
3161 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3162 int ls0, ls1, ms0, p, len;
3163
3164 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3165 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3166 break;
3167
3168 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3169 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3170 break;
3171
3172 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3173 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3174 break;
3175
3176 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3177
3178 if (ls1 == HOST_BITS_PER_WIDE_INT)
3179 {
3180 len = ls0;
3181
3182 gcc_assert (len);
3183
3184 operands[2] = GEN_INT (len);
3185 return "extrd,u %1,63,%2,%0";
3186 }
3187 else
3188 {
3189 /* We could use this `depi' for the case above as well, but `depi'
3190 requires one more register file access than an `extru'. */
3191
3192 p = 63 - ls0;
3193 len = ls1 - ls0;
3194
3195 operands[2] = GEN_INT (p);
3196 operands[3] = GEN_INT (len);
3197 return "depdi 0,%2,%3,%0";
3198 }
3199 }
3200 else
3201 return "and %1,%2,%0";
3202 }
3203
3204 const char *
3205 pa_output_ior (rtx *operands)
3206 {
3207 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3208 int bs0, bs1, p, len;
3209
3210 if (INTVAL (operands[2]) == 0)
3211 return "copy %1,%0";
3212
3213 for (bs0 = 0; bs0 < 32; bs0++)
3214 if ((mask & (1 << bs0)) != 0)
3215 break;
3216
3217 for (bs1 = bs0; bs1 < 32; bs1++)
3218 if ((mask & (1 << bs1)) == 0)
3219 break;
3220
3221 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3222
3223 p = 31 - bs0;
3224 len = bs1 - bs0;
3225
3226 operands[2] = GEN_INT (p);
3227 operands[3] = GEN_INT (len);
3228 return "{depi|depwi} -1,%2,%3,%0";
3229 }
3230
3231 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3232 storing the result in operands[0]. */
3233 const char *
3234 pa_output_64bit_ior (rtx *operands)
3235 {
3236 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3237 int bs0, bs1, p, len;
3238
3239 if (INTVAL (operands[2]) == 0)
3240 return "copy %1,%0";
3241
3242 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3243 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3244 break;
3245
3246 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3247 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3248 break;
3249
3250 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3251 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3252
3253 p = 63 - bs0;
3254 len = bs1 - bs0;
3255
3256 operands[2] = GEN_INT (p);
3257 operands[3] = GEN_INT (len);
3258 return "depdi -1,%2,%3,%0";
3259 }
3260 \f
3261 /* Target hook for assembling integer objects. This code handles
3262 aligned SI and DI integers specially since function references
3263 must be preceded by P%. */
3264
3265 static bool
3266 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3267 {
3268 if (size == UNITS_PER_WORD
3269 && aligned_p
3270 && function_label_operand (x, VOIDmode))
3271 {
3272 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3273
3274 /* We don't want an OPD when generating fast indirect calls. */
3275 if (!TARGET_FAST_INDIRECT_CALLS)
3276 fputs ("P%", asm_out_file);
3277
3278 output_addr_const (asm_out_file, x);
3279 fputc ('\n', asm_out_file);
3280 return true;
3281 }
3282 return default_assemble_integer (x, size, aligned_p);
3283 }
3284 \f
3285 /* Output an ascii string. */
3286 void
3287 pa_output_ascii (FILE *file, const char *p, int size)
3288 {
3289 int i;
3290 int chars_output;
3291 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3292
3293 /* The HP assembler can only take strings of 256 characters at one
3294 time. This is a limitation on input line length, *not* the
3295 length of the string. Sigh. Even worse, it seems that the
3296 restriction is in number of input characters (see \xnn &
3297 \whatever). So we have to do this very carefully. */
3298
3299 fputs ("\t.STRING \"", file);
3300
3301 chars_output = 0;
3302 for (i = 0; i < size; i += 4)
3303 {
3304 int co = 0;
3305 int io = 0;
3306 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3307 {
3308 register unsigned int c = (unsigned char) p[i + io];
3309
3310 if (c == '\"' || c == '\\')
3311 partial_output[co++] = '\\';
3312 if (c >= ' ' && c < 0177)
3313 partial_output[co++] = c;
3314 else
3315 {
3316 unsigned int hexd;
3317 partial_output[co++] = '\\';
3318 partial_output[co++] = 'x';
3319 hexd = c / 16 - 0 + '0';
3320 if (hexd > '9')
3321 hexd -= '9' - 'a' + 1;
3322 partial_output[co++] = hexd;
3323 hexd = c % 16 - 0 + '0';
3324 if (hexd > '9')
3325 hexd -= '9' - 'a' + 1;
3326 partial_output[co++] = hexd;
3327 }
3328 }
3329 if (chars_output + co > 243)
3330 {
3331 fputs ("\"\n\t.STRING \"", file);
3332 chars_output = 0;
3333 }
3334 fwrite (partial_output, 1, (size_t) co, file);
3335 chars_output += co;
3336 co = 0;
3337 }
3338 fputs ("\"\n", file);
3339 }
3340
3341 /* Try to rewrite floating point comparisons & branches to avoid
3342 useless add,tr insns.
3343
3344 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3345 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3346 first attempt to remove useless add,tr insns. It is zero
3347 for the second pass as reorg sometimes leaves bogus REG_DEAD
3348 notes lying around.
3349
3350 When CHECK_NOTES is zero we can only eliminate add,tr insns
3351 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3352 instructions. */
3353 static void
3354 remove_useless_addtr_insns (int check_notes)
3355 {
3356 rtx_insn *insn;
3357 static int pass = 0;
3358
3359 /* This is fairly cheap, so always run it when optimizing. */
3360 if (optimize > 0)
3361 {
3362 int fcmp_count = 0;
3363 int fbranch_count = 0;
3364
3365 /* Walk all the insns in this function looking for fcmp & fbranch
3366 instructions. Keep track of how many of each we find. */
3367 for (insn = get_insns (); insn; insn = next_insn (insn))
3368 {
3369 rtx tmp;
3370
3371 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3372 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3373 continue;
3374
3375 tmp = PATTERN (insn);
3376
3377 /* It must be a set. */
3378 if (GET_CODE (tmp) != SET)
3379 continue;
3380
3381 /* If the destination is CCFP, then we've found an fcmp insn. */
3382 tmp = SET_DEST (tmp);
3383 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3384 {
3385 fcmp_count++;
3386 continue;
3387 }
3388
3389 tmp = PATTERN (insn);
3390 /* If this is an fbranch instruction, bump the fbranch counter. */
3391 if (GET_CODE (tmp) == SET
3392 && SET_DEST (tmp) == pc_rtx
3393 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3394 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3395 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3396 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3397 {
3398 fbranch_count++;
3399 continue;
3400 }
3401 }
3402
3403
3404 /* Find all floating point compare + branch insns. If possible,
3405 reverse the comparison & the branch to avoid add,tr insns. */
3406 for (insn = get_insns (); insn; insn = next_insn (insn))
3407 {
3408 rtx tmp;
3409 rtx_insn *next;
3410
3411 /* Ignore anything that isn't an INSN. */
3412 if (! NONJUMP_INSN_P (insn))
3413 continue;
3414
3415 tmp = PATTERN (insn);
3416
3417 /* It must be a set. */
3418 if (GET_CODE (tmp) != SET)
3419 continue;
3420
3421 /* The destination must be CCFP, which is register zero. */
3422 tmp = SET_DEST (tmp);
3423 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3424 continue;
3425
3426 /* INSN should be a set of CCFP.
3427
3428 See if the result of this insn is used in a reversed FP
3429 conditional branch. If so, reverse our condition and
3430 the branch. Doing so avoids useless add,tr insns. */
3431 next = next_insn (insn);
3432 while (next)
3433 {
3434 /* Jumps, calls and labels stop our search. */
3435 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3436 break;
3437
3438 /* As does another fcmp insn. */
3439 if (NONJUMP_INSN_P (next)
3440 && GET_CODE (PATTERN (next)) == SET
3441 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3442 && REGNO (SET_DEST (PATTERN (next))) == 0)
3443 break;
3444
3445 next = next_insn (next);
3446 }
3447
3448 /* Is NEXT_INSN a branch? */
3449 if (next && JUMP_P (next))
3450 {
3451 rtx pattern = PATTERN (next);
3452
3453 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3454 and CCFP dies, then reverse our conditional and the branch
3455 to avoid the add,tr. */
3456 if (GET_CODE (pattern) == SET
3457 && SET_DEST (pattern) == pc_rtx
3458 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3459 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3460 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3461 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3462 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3463 && (fcmp_count == fbranch_count
3464 || (check_notes
3465 && find_regno_note (next, REG_DEAD, 0))))
3466 {
3467 /* Reverse the branch. */
3468 tmp = XEXP (SET_SRC (pattern), 1);
3469 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3470 XEXP (SET_SRC (pattern), 2) = tmp;
3471 INSN_CODE (next) = -1;
3472
3473 /* Reverse our condition. */
3474 tmp = PATTERN (insn);
3475 PUT_CODE (XEXP (tmp, 1),
3476 (reverse_condition_maybe_unordered
3477 (GET_CODE (XEXP (tmp, 1)))));
3478 }
3479 }
3480 }
3481 }
3482
3483 pass = !pass;
3484
3485 }
3486 \f
3487 /* You may have trouble believing this, but this is the 32 bit HP-PA
3488 stack layout. Wow.
3489
3490 Offset Contents
3491
3492 Variable arguments (optional; any number may be allocated)
3493
3494 SP-(4*(N+9)) arg word N
3495 : :
3496 SP-56 arg word 5
3497 SP-52 arg word 4
3498
3499 Fixed arguments (must be allocated; may remain unused)
3500
3501 SP-48 arg word 3
3502 SP-44 arg word 2
3503 SP-40 arg word 1
3504 SP-36 arg word 0
3505
3506 Frame Marker
3507
3508 SP-32 External Data Pointer (DP)
3509 SP-28 External sr4
3510 SP-24 External/stub RP (RP')
3511 SP-20 Current RP
3512 SP-16 Static Link
3513 SP-12 Clean up
3514 SP-8 Calling Stub RP (RP'')
3515 SP-4 Previous SP
3516
3517 Top of Frame
3518
3519 SP-0 Stack Pointer (points to next available address)
3520
3521 */
3522
3523 /* This function saves registers as follows. Registers marked with ' are
3524 this function's registers (as opposed to the previous function's).
3525 If a frame_pointer isn't needed, r4 is saved as a general register;
3526 the space for the frame pointer is still allocated, though, to keep
3527 things simple.
3528
3529
3530 Top of Frame
3531
3532 SP (FP') Previous FP
3533 SP + 4 Alignment filler (sigh)
3534 SP + 8 Space for locals reserved here.
3535 .
3536 .
3537 .
3538 SP + n All call saved register used.
3539 .
3540 .
3541 .
3542 SP + o All call saved fp registers used.
3543 .
3544 .
3545 .
3546 SP + p (SP') points to next available address.
3547
3548 */
3549
3550 /* Global variables set by output_function_prologue(). */
3551 /* Size of frame. Need to know this to emit return insns from
3552 leaf procedures. */
3553 static HOST_WIDE_INT actual_fsize, local_fsize;
3554 static int save_fregs;
3555
3556 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3557 Handle case where DISP > 8k by using the add_high_const patterns.
3558
3559 Note in DISP > 8k case, we will leave the high part of the address
3560 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3561
3562 static void
3563 store_reg (int reg, HOST_WIDE_INT disp, int base)
3564 {
3565 rtx dest, src, basereg;
3566 rtx_insn *insn;
3567
3568 src = gen_rtx_REG (word_mode, reg);
3569 basereg = gen_rtx_REG (Pmode, base);
3570 if (VAL_14_BITS_P (disp))
3571 {
3572 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3573 insn = emit_move_insn (dest, src);
3574 }
3575 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3576 {
3577 rtx delta = GEN_INT (disp);
3578 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3579
3580 emit_move_insn (tmpreg, delta);
3581 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3582 if (DO_FRAME_NOTES)
3583 {
3584 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3585 gen_rtx_SET (tmpreg,
3586 gen_rtx_PLUS (Pmode, basereg, delta)));
3587 RTX_FRAME_RELATED_P (insn) = 1;
3588 }
3589 dest = gen_rtx_MEM (word_mode, tmpreg);
3590 insn = emit_move_insn (dest, src);
3591 }
3592 else
3593 {
3594 rtx delta = GEN_INT (disp);
3595 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3596 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3597
3598 emit_move_insn (tmpreg, high);
3599 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3600 insn = emit_move_insn (dest, src);
3601 if (DO_FRAME_NOTES)
3602 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3603 gen_rtx_SET (gen_rtx_MEM (word_mode,
3604 gen_rtx_PLUS (word_mode,
3605 basereg,
3606 delta)),
3607 src));
3608 }
3609
3610 if (DO_FRAME_NOTES)
3611 RTX_FRAME_RELATED_P (insn) = 1;
3612 }
3613
3614 /* Emit RTL to store REG at the memory location specified by BASE and then
3615 add MOD to BASE. MOD must be <= 8k. */
3616
3617 static void
3618 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3619 {
3620 rtx basereg, srcreg, delta;
3621 rtx_insn *insn;
3622
3623 gcc_assert (VAL_14_BITS_P (mod));
3624
3625 basereg = gen_rtx_REG (Pmode, base);
3626 srcreg = gen_rtx_REG (word_mode, reg);
3627 delta = GEN_INT (mod);
3628
3629 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3630 if (DO_FRAME_NOTES)
3631 {
3632 RTX_FRAME_RELATED_P (insn) = 1;
3633
3634 /* RTX_FRAME_RELATED_P must be set on each frame related set
3635 in a parallel with more than one element. */
3636 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3637 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3638 }
3639 }
3640
3641 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3642 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3643 whether to add a frame note or not.
3644
3645 In the DISP > 8k case, we leave the high part of the address in %r1.
3646 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3647
3648 static void
3649 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3650 {
3651 rtx_insn *insn;
3652
3653 if (VAL_14_BITS_P (disp))
3654 {
3655 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3656 plus_constant (Pmode,
3657 gen_rtx_REG (Pmode, base), disp));
3658 }
3659 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3660 {
3661 rtx basereg = gen_rtx_REG (Pmode, base);
3662 rtx delta = GEN_INT (disp);
3663 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3664
3665 emit_move_insn (tmpreg, delta);
3666 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3667 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3668 if (DO_FRAME_NOTES)
3669 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3670 gen_rtx_SET (tmpreg,
3671 gen_rtx_PLUS (Pmode, basereg, delta)));
3672 }
3673 else
3674 {
3675 rtx basereg = gen_rtx_REG (Pmode, base);
3676 rtx delta = GEN_INT (disp);
3677 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3678
3679 emit_move_insn (tmpreg,
3680 gen_rtx_PLUS (Pmode, basereg,
3681 gen_rtx_HIGH (Pmode, delta)));
3682 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3683 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3684 }
3685
3686 if (DO_FRAME_NOTES && note)
3687 RTX_FRAME_RELATED_P (insn) = 1;
3688 }
3689
3690 HOST_WIDE_INT
3691 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3692 {
3693 int freg_saved = 0;
3694 int i, j;
3695
3696 /* The code in pa_expand_prologue and pa_expand_epilogue must
3697 be consistent with the rounding and size calculation done here.
3698 Change them at the same time. */
3699
3700 /* We do our own stack alignment. First, round the size of the
3701 stack locals up to a word boundary. */
3702 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3703
3704 /* Space for previous frame pointer + filler. If any frame is
3705 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3706 waste some space here for the sake of HP compatibility. The
3707 first slot is only used when the frame pointer is needed. */
3708 if (size || frame_pointer_needed)
3709 size += STARTING_FRAME_OFFSET;
3710
3711 /* If the current function calls __builtin_eh_return, then we need
3712 to allocate stack space for registers that will hold data for
3713 the exception handler. */
3714 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3715 {
3716 unsigned int i;
3717
3718 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3719 continue;
3720 size += i * UNITS_PER_WORD;
3721 }
3722
3723 /* Account for space used by the callee general register saves. */
3724 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3725 if (df_regs_ever_live_p (i))
3726 size += UNITS_PER_WORD;
3727
3728 /* Account for space used by the callee floating point register saves. */
3729 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3730 if (df_regs_ever_live_p (i)
3731 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3732 {
3733 freg_saved = 1;
3734
3735 /* We always save both halves of the FP register, so always
3736 increment the frame size by 8 bytes. */
3737 size += 8;
3738 }
3739
3740 /* If any of the floating registers are saved, account for the
3741 alignment needed for the floating point register save block. */
3742 if (freg_saved)
3743 {
3744 size = (size + 7) & ~7;
3745 if (fregs_live)
3746 *fregs_live = 1;
3747 }
3748
3749 /* The various ABIs include space for the outgoing parameters in the
3750 size of the current function's stack frame. We don't need to align
3751 for the outgoing arguments as their alignment is set by the final
3752 rounding for the frame as a whole. */
3753 size += crtl->outgoing_args_size;
3754
3755 /* Allocate space for the fixed frame marker. This space must be
3756 allocated for any function that makes calls or allocates
3757 stack space. */
3758 if (!crtl->is_leaf || size)
3759 size += TARGET_64BIT ? 48 : 32;
3760
3761 /* Finally, round to the preferred stack boundary. */
3762 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3763 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3764 }
3765
3766 /* Generate the assembly code for function entry. FILE is a stdio
3767 stream to output the code to. SIZE is an int: how many units of
3768 temporary storage to allocate.
3769
3770 Refer to the array `regs_ever_live' to determine which registers to
3771 save; `regs_ever_live[I]' is nonzero if register number I is ever
3772 used in the function. This function is responsible for knowing
3773 which registers should not be saved even if used. */
3774
3775 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3776 of memory. If any fpu reg is used in the function, we allocate
3777 such a block here, at the bottom of the frame, just in case it's needed.
3778
3779 If this function is a leaf procedure, then we may choose not
3780 to do a "save" insn. The decision about whether or not
3781 to do this is made in regclass.c. */
3782
3783 static void
3784 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3785 {
3786 /* The function's label and associated .PROC must never be
3787 separated and must be output *after* any profiling declarations
3788 to avoid changing spaces/subspaces within a procedure. */
3789 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3790 fputs ("\t.PROC\n", file);
3791
3792 /* pa_expand_prologue does the dirty work now. We just need
3793 to output the assembler directives which denote the start
3794 of a function. */
3795 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3796 if (crtl->is_leaf)
3797 fputs (",NO_CALLS", file);
3798 else
3799 fputs (",CALLS", file);
3800 if (rp_saved)
3801 fputs (",SAVE_RP", file);
3802
3803 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3804 at the beginning of the frame and that it is used as the frame
3805 pointer for the frame. We do this because our current frame
3806 layout doesn't conform to that specified in the HP runtime
3807 documentation and we need a way to indicate to programs such as
3808 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3809 isn't used by HP compilers but is supported by the assembler.
3810 However, SAVE_SP is supposed to indicate that the previous stack
3811 pointer has been saved in the frame marker. */
3812 if (frame_pointer_needed)
3813 fputs (",SAVE_SP", file);
3814
3815 /* Pass on information about the number of callee register saves
3816 performed in the prologue.
3817
3818 The compiler is supposed to pass the highest register number
3819 saved, the assembler then has to adjust that number before
3820 entering it into the unwind descriptor (to account for any
3821 caller saved registers with lower register numbers than the
3822 first callee saved register). */
3823 if (gr_saved)
3824 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3825
3826 if (fr_saved)
3827 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3828
3829 fputs ("\n\t.ENTRY\n", file);
3830
3831 remove_useless_addtr_insns (0);
3832 }
3833
3834 void
3835 pa_expand_prologue (void)
3836 {
3837 int merge_sp_adjust_with_store = 0;
3838 HOST_WIDE_INT size = get_frame_size ();
3839 HOST_WIDE_INT offset;
3840 int i;
3841 rtx tmpreg;
3842 rtx_insn *insn;
3843
3844 gr_saved = 0;
3845 fr_saved = 0;
3846 save_fregs = 0;
3847
3848 /* Compute total size for frame pointer, filler, locals and rounding to
3849 the next word boundary. Similar code appears in pa_compute_frame_size
3850 and must be changed in tandem with this code. */
3851 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3852 if (local_fsize || frame_pointer_needed)
3853 local_fsize += STARTING_FRAME_OFFSET;
3854
3855 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3856 if (flag_stack_usage_info)
3857 current_function_static_stack_size = actual_fsize;
3858
3859 /* Compute a few things we will use often. */
3860 tmpreg = gen_rtx_REG (word_mode, 1);
3861
3862 /* Save RP first. The calling conventions manual states RP will
3863 always be stored into the caller's frame at sp - 20 or sp - 16
3864 depending on which ABI is in use. */
3865 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3866 {
3867 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3868 rp_saved = true;
3869 }
3870 else
3871 rp_saved = false;
3872
3873 /* Allocate the local frame and set up the frame pointer if needed. */
3874 if (actual_fsize != 0)
3875 {
3876 if (frame_pointer_needed)
3877 {
3878 /* Copy the old frame pointer temporarily into %r1. Set up the
3879 new stack pointer, then store away the saved old frame pointer
3880 into the stack at sp and at the same time update the stack
3881 pointer by actual_fsize bytes. Two versions, first
3882 handles small (<8k) frames. The second handles large (>=8k)
3883 frames. */
3884 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3885 if (DO_FRAME_NOTES)
3886 RTX_FRAME_RELATED_P (insn) = 1;
3887
3888 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3889 if (DO_FRAME_NOTES)
3890 RTX_FRAME_RELATED_P (insn) = 1;
3891
3892 if (VAL_14_BITS_P (actual_fsize))
3893 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3894 else
3895 {
3896 /* It is incorrect to store the saved frame pointer at *sp,
3897 then increment sp (writes beyond the current stack boundary).
3898
3899 So instead use stwm to store at *sp and post-increment the
3900 stack pointer as an atomic operation. Then increment sp to
3901 finish allocating the new frame. */
3902 HOST_WIDE_INT adjust1 = 8192 - 64;
3903 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3904
3905 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3906 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3907 adjust2, 1);
3908 }
3909
3910 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3911 we need to store the previous stack pointer (frame pointer)
3912 into the frame marker on targets that use the HP unwind
3913 library. This allows the HP unwind library to be used to
3914 unwind GCC frames. However, we are not fully compatible
3915 with the HP library because our frame layout differs from
3916 that specified in the HP runtime specification.
3917
3918 We don't want a frame note on this instruction as the frame
3919 marker moves during dynamic stack allocation.
3920
3921 This instruction also serves as a blockage to prevent
3922 register spills from being scheduled before the stack
3923 pointer is raised. This is necessary as we store
3924 registers using the frame pointer as a base register,
3925 and the frame pointer is set before sp is raised. */
3926 if (TARGET_HPUX_UNWIND_LIBRARY)
3927 {
3928 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3929 GEN_INT (TARGET_64BIT ? -8 : -4));
3930
3931 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3932 hard_frame_pointer_rtx);
3933 }
3934 else
3935 emit_insn (gen_blockage ());
3936 }
3937 /* no frame pointer needed. */
3938 else
3939 {
3940 /* In some cases we can perform the first callee register save
3941 and allocating the stack frame at the same time. If so, just
3942 make a note of it and defer allocating the frame until saving
3943 the callee registers. */
3944 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3945 merge_sp_adjust_with_store = 1;
3946 /* Can not optimize. Adjust the stack frame by actual_fsize
3947 bytes. */
3948 else
3949 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3950 actual_fsize, 1);
3951 }
3952 }
3953
3954 /* Normal register save.
3955
3956 Do not save the frame pointer in the frame_pointer_needed case. It
3957 was done earlier. */
3958 if (frame_pointer_needed)
3959 {
3960 offset = local_fsize;
3961
3962 /* Saving the EH return data registers in the frame is the simplest
3963 way to get the frame unwind information emitted. We put them
3964 just before the general registers. */
3965 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3966 {
3967 unsigned int i, regno;
3968
3969 for (i = 0; ; ++i)
3970 {
3971 regno = EH_RETURN_DATA_REGNO (i);
3972 if (regno == INVALID_REGNUM)
3973 break;
3974
3975 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3976 offset += UNITS_PER_WORD;
3977 }
3978 }
3979
3980 for (i = 18; i >= 4; i--)
3981 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3982 {
3983 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3984 offset += UNITS_PER_WORD;
3985 gr_saved++;
3986 }
3987 /* Account for %r3 which is saved in a special place. */
3988 gr_saved++;
3989 }
3990 /* No frame pointer needed. */
3991 else
3992 {
3993 offset = local_fsize - actual_fsize;
3994
3995 /* Saving the EH return data registers in the frame is the simplest
3996 way to get the frame unwind information emitted. */
3997 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3998 {
3999 unsigned int i, regno;
4000
4001 for (i = 0; ; ++i)
4002 {
4003 regno = EH_RETURN_DATA_REGNO (i);
4004 if (regno == INVALID_REGNUM)
4005 break;
4006
4007 /* If merge_sp_adjust_with_store is nonzero, then we can
4008 optimize the first save. */
4009 if (merge_sp_adjust_with_store)
4010 {
4011 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4012 merge_sp_adjust_with_store = 0;
4013 }
4014 else
4015 store_reg (regno, offset, STACK_POINTER_REGNUM);
4016 offset += UNITS_PER_WORD;
4017 }
4018 }
4019
4020 for (i = 18; i >= 3; i--)
4021 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4022 {
4023 /* If merge_sp_adjust_with_store is nonzero, then we can
4024 optimize the first GR save. */
4025 if (merge_sp_adjust_with_store)
4026 {
4027 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4028 merge_sp_adjust_with_store = 0;
4029 }
4030 else
4031 store_reg (i, offset, STACK_POINTER_REGNUM);
4032 offset += UNITS_PER_WORD;
4033 gr_saved++;
4034 }
4035
4036 /* If we wanted to merge the SP adjustment with a GR save, but we never
4037 did any GR saves, then just emit the adjustment here. */
4038 if (merge_sp_adjust_with_store)
4039 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4040 actual_fsize, 1);
4041 }
4042
4043 /* The hppa calling conventions say that %r19, the pic offset
4044 register, is saved at sp - 32 (in this function's frame)
4045 when generating PIC code. FIXME: What is the correct thing
4046 to do for functions which make no calls and allocate no
4047 frame? Do we need to allocate a frame, or can we just omit
4048 the save? For now we'll just omit the save.
4049
4050 We don't want a note on this insn as the frame marker can
4051 move if there is a dynamic stack allocation. */
4052 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4053 {
4054 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4055
4056 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4057
4058 }
4059
4060 /* Align pointer properly (doubleword boundary). */
4061 offset = (offset + 7) & ~7;
4062
4063 /* Floating point register store. */
4064 if (save_fregs)
4065 {
4066 rtx base;
4067
4068 /* First get the frame or stack pointer to the start of the FP register
4069 save area. */
4070 if (frame_pointer_needed)
4071 {
4072 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4073 base = hard_frame_pointer_rtx;
4074 }
4075 else
4076 {
4077 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4078 base = stack_pointer_rtx;
4079 }
4080
4081 /* Now actually save the FP registers. */
4082 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4083 {
4084 if (df_regs_ever_live_p (i)
4085 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4086 {
4087 rtx addr, reg;
4088 rtx_insn *insn;
4089 addr = gen_rtx_MEM (DFmode,
4090 gen_rtx_POST_INC (word_mode, tmpreg));
4091 reg = gen_rtx_REG (DFmode, i);
4092 insn = emit_move_insn (addr, reg);
4093 if (DO_FRAME_NOTES)
4094 {
4095 RTX_FRAME_RELATED_P (insn) = 1;
4096 if (TARGET_64BIT)
4097 {
4098 rtx mem = gen_rtx_MEM (DFmode,
4099 plus_constant (Pmode, base,
4100 offset));
4101 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4102 gen_rtx_SET (mem, reg));
4103 }
4104 else
4105 {
4106 rtx meml = gen_rtx_MEM (SFmode,
4107 plus_constant (Pmode, base,
4108 offset));
4109 rtx memr = gen_rtx_MEM (SFmode,
4110 plus_constant (Pmode, base,
4111 offset + 4));
4112 rtx regl = gen_rtx_REG (SFmode, i);
4113 rtx regr = gen_rtx_REG (SFmode, i + 1);
4114 rtx setl = gen_rtx_SET (meml, regl);
4115 rtx setr = gen_rtx_SET (memr, regr);
4116 rtvec vec;
4117
4118 RTX_FRAME_RELATED_P (setl) = 1;
4119 RTX_FRAME_RELATED_P (setr) = 1;
4120 vec = gen_rtvec (2, setl, setr);
4121 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4122 gen_rtx_SEQUENCE (VOIDmode, vec));
4123 }
4124 }
4125 offset += GET_MODE_SIZE (DFmode);
4126 fr_saved++;
4127 }
4128 }
4129 }
4130 }
4131
4132 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4133 Handle case where DISP > 8k by using the add_high_const patterns. */
4134
4135 static void
4136 load_reg (int reg, HOST_WIDE_INT disp, int base)
4137 {
4138 rtx dest = gen_rtx_REG (word_mode, reg);
4139 rtx basereg = gen_rtx_REG (Pmode, base);
4140 rtx src;
4141
4142 if (VAL_14_BITS_P (disp))
4143 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4144 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4145 {
4146 rtx delta = GEN_INT (disp);
4147 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4148
4149 emit_move_insn (tmpreg, delta);
4150 if (TARGET_DISABLE_INDEXING)
4151 {
4152 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4153 src = gen_rtx_MEM (word_mode, tmpreg);
4154 }
4155 else
4156 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4157 }
4158 else
4159 {
4160 rtx delta = GEN_INT (disp);
4161 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4162 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4163
4164 emit_move_insn (tmpreg, high);
4165 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4166 }
4167
4168 emit_move_insn (dest, src);
4169 }
4170
4171 /* Update the total code bytes output to the text section. */
4172
4173 static void
4174 update_total_code_bytes (unsigned int nbytes)
4175 {
4176 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4177 && !IN_NAMED_SECTION_P (cfun->decl))
4178 {
4179 unsigned int old_total = total_code_bytes;
4180
4181 total_code_bytes += nbytes;
4182
4183 /* Be prepared to handle overflows. */
4184 if (old_total > total_code_bytes)
4185 total_code_bytes = UINT_MAX;
4186 }
4187 }
4188
4189 /* This function generates the assembly code for function exit.
4190 Args are as for output_function_prologue ().
4191
4192 The function epilogue should not depend on the current stack
4193 pointer! It should use the frame pointer only. This is mandatory
4194 because of alloca; we also take advantage of it to omit stack
4195 adjustments before returning. */
4196
4197 static void
4198 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4199 {
4200 rtx_insn *insn = get_last_insn ();
4201 bool extra_nop;
4202
4203 /* pa_expand_epilogue does the dirty work now. We just need
4204 to output the assembler directives which denote the end
4205 of a function.
4206
4207 To make debuggers happy, emit a nop if the epilogue was completely
4208 eliminated due to a volatile call as the last insn in the
4209 current function. That way the return address (in %r2) will
4210 always point to a valid instruction in the current function. */
4211
4212 /* Get the last real insn. */
4213 if (NOTE_P (insn))
4214 insn = prev_real_insn (insn);
4215
4216 /* If it is a sequence, then look inside. */
4217 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4218 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4219
4220 /* If insn is a CALL_INSN, then it must be a call to a volatile
4221 function (otherwise there would be epilogue insns). */
4222 if (insn && CALL_P (insn))
4223 {
4224 fputs ("\tnop\n", file);
4225 extra_nop = true;
4226 }
4227 else
4228 extra_nop = false;
4229
4230 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4231
4232 if (TARGET_SOM && TARGET_GAS)
4233 {
4234 /* We are done with this subspace except possibly for some additional
4235 debug information. Forget that we are in this subspace to ensure
4236 that the next function is output in its own subspace. */
4237 in_section = NULL;
4238 cfun->machine->in_nsubspa = 2;
4239 }
4240
4241 /* Thunks do their own insn accounting. */
4242 if (cfun->is_thunk)
4243 return;
4244
4245 if (INSN_ADDRESSES_SET_P ())
4246 {
4247 last_address = extra_nop ? 4 : 0;
4248 insn = get_last_nonnote_insn ();
4249 if (insn)
4250 {
4251 last_address += INSN_ADDRESSES (INSN_UID (insn));
4252 if (INSN_P (insn))
4253 last_address += insn_default_length (insn);
4254 }
4255 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4256 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4257 }
4258 else
4259 last_address = UINT_MAX;
4260
4261 /* Finally, update the total number of code bytes output so far. */
4262 update_total_code_bytes (last_address);
4263 }
4264
4265 void
4266 pa_expand_epilogue (void)
4267 {
4268 rtx tmpreg;
4269 HOST_WIDE_INT offset;
4270 HOST_WIDE_INT ret_off = 0;
4271 int i;
4272 int merge_sp_adjust_with_load = 0;
4273
4274 /* We will use this often. */
4275 tmpreg = gen_rtx_REG (word_mode, 1);
4276
4277 /* Try to restore RP early to avoid load/use interlocks when
4278 RP gets used in the return (bv) instruction. This appears to still
4279 be necessary even when we schedule the prologue and epilogue. */
4280 if (rp_saved)
4281 {
4282 ret_off = TARGET_64BIT ? -16 : -20;
4283 if (frame_pointer_needed)
4284 {
4285 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4286 ret_off = 0;
4287 }
4288 else
4289 {
4290 /* No frame pointer, and stack is smaller than 8k. */
4291 if (VAL_14_BITS_P (ret_off - actual_fsize))
4292 {
4293 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4294 ret_off = 0;
4295 }
4296 }
4297 }
4298
4299 /* General register restores. */
4300 if (frame_pointer_needed)
4301 {
4302 offset = local_fsize;
4303
4304 /* If the current function calls __builtin_eh_return, then we need
4305 to restore the saved EH data registers. */
4306 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4307 {
4308 unsigned int i, regno;
4309
4310 for (i = 0; ; ++i)
4311 {
4312 regno = EH_RETURN_DATA_REGNO (i);
4313 if (regno == INVALID_REGNUM)
4314 break;
4315
4316 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4317 offset += UNITS_PER_WORD;
4318 }
4319 }
4320
4321 for (i = 18; i >= 4; i--)
4322 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4323 {
4324 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4325 offset += UNITS_PER_WORD;
4326 }
4327 }
4328 else
4329 {
4330 offset = local_fsize - actual_fsize;
4331
4332 /* If the current function calls __builtin_eh_return, then we need
4333 to restore the saved EH data registers. */
4334 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4335 {
4336 unsigned int i, regno;
4337
4338 for (i = 0; ; ++i)
4339 {
4340 regno = EH_RETURN_DATA_REGNO (i);
4341 if (regno == INVALID_REGNUM)
4342 break;
4343
4344 /* Only for the first load.
4345 merge_sp_adjust_with_load holds the register load
4346 with which we will merge the sp adjustment. */
4347 if (merge_sp_adjust_with_load == 0
4348 && local_fsize == 0
4349 && VAL_14_BITS_P (-actual_fsize))
4350 merge_sp_adjust_with_load = regno;
4351 else
4352 load_reg (regno, offset, STACK_POINTER_REGNUM);
4353 offset += UNITS_PER_WORD;
4354 }
4355 }
4356
4357 for (i = 18; i >= 3; i--)
4358 {
4359 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4360 {
4361 /* Only for the first load.
4362 merge_sp_adjust_with_load holds the register load
4363 with which we will merge the sp adjustment. */
4364 if (merge_sp_adjust_with_load == 0
4365 && local_fsize == 0
4366 && VAL_14_BITS_P (-actual_fsize))
4367 merge_sp_adjust_with_load = i;
4368 else
4369 load_reg (i, offset, STACK_POINTER_REGNUM);
4370 offset += UNITS_PER_WORD;
4371 }
4372 }
4373 }
4374
4375 /* Align pointer properly (doubleword boundary). */
4376 offset = (offset + 7) & ~7;
4377
4378 /* FP register restores. */
4379 if (save_fregs)
4380 {
4381 /* Adjust the register to index off of. */
4382 if (frame_pointer_needed)
4383 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4384 else
4385 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4386
4387 /* Actually do the restores now. */
4388 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4389 if (df_regs_ever_live_p (i)
4390 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4391 {
4392 rtx src = gen_rtx_MEM (DFmode,
4393 gen_rtx_POST_INC (word_mode, tmpreg));
4394 rtx dest = gen_rtx_REG (DFmode, i);
4395 emit_move_insn (dest, src);
4396 }
4397 }
4398
4399 /* Emit a blockage insn here to keep these insns from being moved to
4400 an earlier spot in the epilogue, or into the main instruction stream.
4401
4402 This is necessary as we must not cut the stack back before all the
4403 restores are finished. */
4404 emit_insn (gen_blockage ());
4405
4406 /* Reset stack pointer (and possibly frame pointer). The stack
4407 pointer is initially set to fp + 64 to avoid a race condition. */
4408 if (frame_pointer_needed)
4409 {
4410 rtx delta = GEN_INT (-64);
4411
4412 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4413 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4414 stack_pointer_rtx, delta));
4415 }
4416 /* If we were deferring a callee register restore, do it now. */
4417 else if (merge_sp_adjust_with_load)
4418 {
4419 rtx delta = GEN_INT (-actual_fsize);
4420 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4421
4422 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4423 }
4424 else if (actual_fsize != 0)
4425 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4426 - actual_fsize, 0);
4427
4428 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4429 frame greater than 8k), do so now. */
4430 if (ret_off != 0)
4431 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4432
4433 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4434 {
4435 rtx sa = EH_RETURN_STACKADJ_RTX;
4436
4437 emit_insn (gen_blockage ());
4438 emit_insn (TARGET_64BIT
4439 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4440 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4441 }
4442 }
4443
4444 bool
4445 pa_can_use_return_insn (void)
4446 {
4447 if (!reload_completed)
4448 return false;
4449
4450 if (frame_pointer_needed)
4451 return false;
4452
4453 if (df_regs_ever_live_p (2))
4454 return false;
4455
4456 if (crtl->profile)
4457 return false;
4458
4459 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4460 }
4461
4462 rtx
4463 hppa_pic_save_rtx (void)
4464 {
4465 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4466 }
4467
4468 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4469 #define NO_DEFERRED_PROFILE_COUNTERS 0
4470 #endif
4471
4472
4473 /* Vector of funcdef numbers. */
4474 static vec<int> funcdef_nos;
4475
4476 /* Output deferred profile counters. */
4477 static void
4478 output_deferred_profile_counters (void)
4479 {
4480 unsigned int i;
4481 int align, n;
4482
4483 if (funcdef_nos.is_empty ())
4484 return;
4485
4486 switch_to_section (data_section);
4487 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4488 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4489
4490 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4491 {
4492 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4493 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4494 }
4495
4496 funcdef_nos.release ();
4497 }
4498
4499 void
4500 hppa_profile_hook (int label_no)
4501 {
4502 /* We use SImode for the address of the function in both 32 and
4503 64-bit code to avoid having to provide DImode versions of the
4504 lcla2 and load_offset_label_address insn patterns. */
4505 rtx reg = gen_reg_rtx (SImode);
4506 rtx_code_label *label_rtx = gen_label_rtx ();
4507 rtx begin_label_rtx;
4508 rtx_insn *call_insn;
4509 char begin_label_name[16];
4510
4511 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4512 label_no);
4513 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4514
4515 if (TARGET_64BIT)
4516 emit_move_insn (arg_pointer_rtx,
4517 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4518 GEN_INT (64)));
4519
4520 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4521
4522 /* The address of the function is loaded into %r25 with an instruction-
4523 relative sequence that avoids the use of relocations. The sequence
4524 is split so that the load_offset_label_address instruction can
4525 occupy the delay slot of the call to _mcount. */
4526 if (TARGET_PA_20)
4527 emit_insn (gen_lcla2 (reg, label_rtx));
4528 else
4529 emit_insn (gen_lcla1 (reg, label_rtx));
4530
4531 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4532 reg, begin_label_rtx, label_rtx));
4533
4534 #if !NO_DEFERRED_PROFILE_COUNTERS
4535 {
4536 rtx count_label_rtx, addr, r24;
4537 char count_label_name[16];
4538
4539 funcdef_nos.safe_push (label_no);
4540 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4541 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4542
4543 addr = force_reg (Pmode, count_label_rtx);
4544 r24 = gen_rtx_REG (Pmode, 24);
4545 emit_move_insn (r24, addr);
4546
4547 call_insn =
4548 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4549 gen_rtx_SYMBOL_REF (Pmode,
4550 "_mcount")),
4551 GEN_INT (TARGET_64BIT ? 24 : 12)));
4552
4553 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4554 }
4555 #else
4556
4557 call_insn =
4558 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4559 gen_rtx_SYMBOL_REF (Pmode,
4560 "_mcount")),
4561 GEN_INT (TARGET_64BIT ? 16 : 8)));
4562
4563 #endif
4564
4565 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4566 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4567
4568 /* Indicate the _mcount call cannot throw, nor will it execute a
4569 non-local goto. */
4570 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4571 }
4572
4573 /* Fetch the return address for the frame COUNT steps up from
4574 the current frame, after the prologue. FRAMEADDR is the
4575 frame pointer of the COUNT frame.
4576
4577 We want to ignore any export stub remnants here. To handle this,
4578 we examine the code at the return address, and if it is an export
4579 stub, we return a memory rtx for the stub return address stored
4580 at frame-24.
4581
4582 The value returned is used in two different ways:
4583
4584 1. To find a function's caller.
4585
4586 2. To change the return address for a function.
4587
4588 This function handles most instances of case 1; however, it will
4589 fail if there are two levels of stubs to execute on the return
4590 path. The only way I believe that can happen is if the return value
4591 needs a parameter relocation, which never happens for C code.
4592
4593 This function handles most instances of case 2; however, it will
4594 fail if we did not originally have stub code on the return path
4595 but will need stub code on the new return path. This can happen if
4596 the caller & callee are both in the main program, but the new
4597 return location is in a shared library. */
4598
4599 rtx
4600 pa_return_addr_rtx (int count, rtx frameaddr)
4601 {
4602 rtx label;
4603 rtx rp;
4604 rtx saved_rp;
4605 rtx ins;
4606
4607 /* The instruction stream at the return address of a PA1.X export stub is:
4608
4609 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4610 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4611 0x00011820 | stub+16: mtsp r1,sr0
4612 0xe0400002 | stub+20: be,n 0(sr0,rp)
4613
4614 0xe0400002 must be specified as -532676606 so that it won't be
4615 rejected as an invalid immediate operand on 64-bit hosts.
4616
4617 The instruction stream at the return address of a PA2.0 export stub is:
4618
4619 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4620 0xe840d002 | stub+12: bve,n (rp)
4621 */
4622
4623 HOST_WIDE_INT insns[4];
4624 int i, len;
4625
4626 if (count != 0)
4627 return NULL_RTX;
4628
4629 rp = get_hard_reg_initial_val (Pmode, 2);
4630
4631 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4632 return rp;
4633
4634 /* If there is no export stub then just use the value saved from
4635 the return pointer register. */
4636
4637 saved_rp = gen_reg_rtx (Pmode);
4638 emit_move_insn (saved_rp, rp);
4639
4640 /* Get pointer to the instruction stream. We have to mask out the
4641 privilege level from the two low order bits of the return address
4642 pointer here so that ins will point to the start of the first
4643 instruction that would have been executed if we returned. */
4644 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4645 label = gen_label_rtx ();
4646
4647 if (TARGET_PA_20)
4648 {
4649 insns[0] = 0x4bc23fd1;
4650 insns[1] = -398405630;
4651 len = 2;
4652 }
4653 else
4654 {
4655 insns[0] = 0x4bc23fd1;
4656 insns[1] = 0x004010a1;
4657 insns[2] = 0x00011820;
4658 insns[3] = -532676606;
4659 len = 4;
4660 }
4661
4662 /* Check the instruction stream at the normal return address for the
4663 export stub. If it is an export stub, than our return address is
4664 really in -24[frameaddr]. */
4665
4666 for (i = 0; i < len; i++)
4667 {
4668 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4669 rtx op1 = GEN_INT (insns[i]);
4670 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4671 }
4672
4673 /* Here we know that our return address points to an export
4674 stub. We don't want to return the address of the export stub,
4675 but rather the return address of the export stub. That return
4676 address is stored at -24[frameaddr]. */
4677
4678 emit_move_insn (saved_rp,
4679 gen_rtx_MEM (Pmode,
4680 memory_address (Pmode,
4681 plus_constant (Pmode, frameaddr,
4682 -24))));
4683
4684 emit_label (label);
4685
4686 return saved_rp;
4687 }
4688
4689 void
4690 pa_emit_bcond_fp (rtx operands[])
4691 {
4692 enum rtx_code code = GET_CODE (operands[0]);
4693 rtx operand0 = operands[1];
4694 rtx operand1 = operands[2];
4695 rtx label = operands[3];
4696
4697 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4698 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4699
4700 emit_jump_insn (gen_rtx_SET (pc_rtx,
4701 gen_rtx_IF_THEN_ELSE (VOIDmode,
4702 gen_rtx_fmt_ee (NE,
4703 VOIDmode,
4704 gen_rtx_REG (CCFPmode, 0),
4705 const0_rtx),
4706 gen_rtx_LABEL_REF (VOIDmode, label),
4707 pc_rtx)));
4708
4709 }
4710
4711 /* Adjust the cost of a scheduling dependency. Return the new cost of
4712 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4713
4714 static int
4715 pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4716 {
4717 enum attr_type attr_type;
4718
4719 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4720 true dependencies as they are described with bypasses now. */
4721 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4722 return cost;
4723
4724 if (! recog_memoized (insn))
4725 return 0;
4726
4727 attr_type = get_attr_type (insn);
4728
4729 switch (REG_NOTE_KIND (link))
4730 {
4731 case REG_DEP_ANTI:
4732 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4733 cycles later. */
4734
4735 if (attr_type == TYPE_FPLOAD)
4736 {
4737 rtx pat = PATTERN (insn);
4738 rtx dep_pat = PATTERN (dep_insn);
4739 if (GET_CODE (pat) == PARALLEL)
4740 {
4741 /* This happens for the fldXs,mb patterns. */
4742 pat = XVECEXP (pat, 0, 0);
4743 }
4744 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4745 /* If this happens, we have to extend this to schedule
4746 optimally. Return 0 for now. */
4747 return 0;
4748
4749 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4750 {
4751 if (! recog_memoized (dep_insn))
4752 return 0;
4753 switch (get_attr_type (dep_insn))
4754 {
4755 case TYPE_FPALU:
4756 case TYPE_FPMULSGL:
4757 case TYPE_FPMULDBL:
4758 case TYPE_FPDIVSGL:
4759 case TYPE_FPDIVDBL:
4760 case TYPE_FPSQRTSGL:
4761 case TYPE_FPSQRTDBL:
4762 /* A fpload can't be issued until one cycle before a
4763 preceding arithmetic operation has finished if
4764 the target of the fpload is any of the sources
4765 (or destination) of the arithmetic operation. */
4766 return insn_default_latency (dep_insn) - 1;
4767
4768 default:
4769 return 0;
4770 }
4771 }
4772 }
4773 else if (attr_type == TYPE_FPALU)
4774 {
4775 rtx pat = PATTERN (insn);
4776 rtx dep_pat = PATTERN (dep_insn);
4777 if (GET_CODE (pat) == PARALLEL)
4778 {
4779 /* This happens for the fldXs,mb patterns. */
4780 pat = XVECEXP (pat, 0, 0);
4781 }
4782 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4783 /* If this happens, we have to extend this to schedule
4784 optimally. Return 0 for now. */
4785 return 0;
4786
4787 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4788 {
4789 if (! recog_memoized (dep_insn))
4790 return 0;
4791 switch (get_attr_type (dep_insn))
4792 {
4793 case TYPE_FPDIVSGL:
4794 case TYPE_FPDIVDBL:
4795 case TYPE_FPSQRTSGL:
4796 case TYPE_FPSQRTDBL:
4797 /* An ALU flop can't be issued until two cycles before a
4798 preceding divide or sqrt operation has finished if
4799 the target of the ALU flop is any of the sources
4800 (or destination) of the divide or sqrt operation. */
4801 return insn_default_latency (dep_insn) - 2;
4802
4803 default:
4804 return 0;
4805 }
4806 }
4807 }
4808
4809 /* For other anti dependencies, the cost is 0. */
4810 return 0;
4811
4812 case REG_DEP_OUTPUT:
4813 /* Output dependency; DEP_INSN writes a register that INSN writes some
4814 cycles later. */
4815 if (attr_type == TYPE_FPLOAD)
4816 {
4817 rtx pat = PATTERN (insn);
4818 rtx dep_pat = PATTERN (dep_insn);
4819 if (GET_CODE (pat) == PARALLEL)
4820 {
4821 /* This happens for the fldXs,mb patterns. */
4822 pat = XVECEXP (pat, 0, 0);
4823 }
4824 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4825 /* If this happens, we have to extend this to schedule
4826 optimally. Return 0 for now. */
4827 return 0;
4828
4829 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4830 {
4831 if (! recog_memoized (dep_insn))
4832 return 0;
4833 switch (get_attr_type (dep_insn))
4834 {
4835 case TYPE_FPALU:
4836 case TYPE_FPMULSGL:
4837 case TYPE_FPMULDBL:
4838 case TYPE_FPDIVSGL:
4839 case TYPE_FPDIVDBL:
4840 case TYPE_FPSQRTSGL:
4841 case TYPE_FPSQRTDBL:
4842 /* A fpload can't be issued until one cycle before a
4843 preceding arithmetic operation has finished if
4844 the target of the fpload is the destination of the
4845 arithmetic operation.
4846
4847 Exception: For PA7100LC, PA7200 and PA7300, the cost
4848 is 3 cycles, unless they bundle together. We also
4849 pay the penalty if the second insn is a fpload. */
4850 return insn_default_latency (dep_insn) - 1;
4851
4852 default:
4853 return 0;
4854 }
4855 }
4856 }
4857 else if (attr_type == TYPE_FPALU)
4858 {
4859 rtx pat = PATTERN (insn);
4860 rtx dep_pat = PATTERN (dep_insn);
4861 if (GET_CODE (pat) == PARALLEL)
4862 {
4863 /* This happens for the fldXs,mb patterns. */
4864 pat = XVECEXP (pat, 0, 0);
4865 }
4866 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4867 /* If this happens, we have to extend this to schedule
4868 optimally. Return 0 for now. */
4869 return 0;
4870
4871 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4872 {
4873 if (! recog_memoized (dep_insn))
4874 return 0;
4875 switch (get_attr_type (dep_insn))
4876 {
4877 case TYPE_FPDIVSGL:
4878 case TYPE_FPDIVDBL:
4879 case TYPE_FPSQRTSGL:
4880 case TYPE_FPSQRTDBL:
4881 /* An ALU flop can't be issued until two cycles before a
4882 preceding divide or sqrt operation has finished if
4883 the target of the ALU flop is also the target of
4884 the divide or sqrt operation. */
4885 return insn_default_latency (dep_insn) - 2;
4886
4887 default:
4888 return 0;
4889 }
4890 }
4891 }
4892
4893 /* For other output dependencies, the cost is 0. */
4894 return 0;
4895
4896 default:
4897 gcc_unreachable ();
4898 }
4899 }
4900
4901 /* Adjust scheduling priorities. We use this to try and keep addil
4902 and the next use of %r1 close together. */
4903 static int
4904 pa_adjust_priority (rtx_insn *insn, int priority)
4905 {
4906 rtx set = single_set (insn);
4907 rtx src, dest;
4908 if (set)
4909 {
4910 src = SET_SRC (set);
4911 dest = SET_DEST (set);
4912 if (GET_CODE (src) == LO_SUM
4913 && symbolic_operand (XEXP (src, 1), VOIDmode)
4914 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4915 priority >>= 3;
4916
4917 else if (GET_CODE (src) == MEM
4918 && GET_CODE (XEXP (src, 0)) == LO_SUM
4919 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4920 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4921 priority >>= 1;
4922
4923 else if (GET_CODE (dest) == MEM
4924 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4925 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4926 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4927 priority >>= 3;
4928 }
4929 return priority;
4930 }
4931
4932 /* The 700 can only issue a single insn at a time.
4933 The 7XXX processors can issue two insns at a time.
4934 The 8000 can issue 4 insns at a time. */
4935 static int
4936 pa_issue_rate (void)
4937 {
4938 switch (pa_cpu)
4939 {
4940 case PROCESSOR_700: return 1;
4941 case PROCESSOR_7100: return 2;
4942 case PROCESSOR_7100LC: return 2;
4943 case PROCESSOR_7200: return 2;
4944 case PROCESSOR_7300: return 2;
4945 case PROCESSOR_8000: return 4;
4946
4947 default:
4948 gcc_unreachable ();
4949 }
4950 }
4951
4952
4953
4954 /* Return any length plus adjustment needed by INSN which already has
4955 its length computed as LENGTH. Return LENGTH if no adjustment is
4956 necessary.
4957
4958 Also compute the length of an inline block move here as it is too
4959 complicated to express as a length attribute in pa.md. */
4960 int
4961 pa_adjust_insn_length (rtx_insn *insn, int length)
4962 {
4963 rtx pat = PATTERN (insn);
4964
4965 /* If length is negative or undefined, provide initial length. */
4966 if ((unsigned int) length >= INT_MAX)
4967 {
4968 if (GET_CODE (pat) == SEQUENCE)
4969 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
4970
4971 switch (get_attr_type (insn))
4972 {
4973 case TYPE_MILLI:
4974 length = pa_attr_length_millicode_call (insn);
4975 break;
4976 case TYPE_CALL:
4977 length = pa_attr_length_call (insn, 0);
4978 break;
4979 case TYPE_SIBCALL:
4980 length = pa_attr_length_call (insn, 1);
4981 break;
4982 case TYPE_DYNCALL:
4983 length = pa_attr_length_indirect_call (insn);
4984 break;
4985 case TYPE_SH_FUNC_ADRS:
4986 length = pa_attr_length_millicode_call (insn) + 20;
4987 break;
4988 default:
4989 gcc_unreachable ();
4990 }
4991 }
4992
4993 /* Block move pattern. */
4994 if (NONJUMP_INSN_P (insn)
4995 && GET_CODE (pat) == PARALLEL
4996 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4997 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4998 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4999 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5000 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5001 length += compute_movmem_length (insn) - 4;
5002 /* Block clear pattern. */
5003 else if (NONJUMP_INSN_P (insn)
5004 && GET_CODE (pat) == PARALLEL
5005 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5006 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5007 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5008 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5009 length += compute_clrmem_length (insn) - 4;
5010 /* Conditional branch with an unfilled delay slot. */
5011 else if (JUMP_P (insn) && ! simplejump_p (insn))
5012 {
5013 /* Adjust a short backwards conditional with an unfilled delay slot. */
5014 if (GET_CODE (pat) == SET
5015 && length == 4
5016 && JUMP_LABEL (insn) != NULL_RTX
5017 && ! forward_branch_p (insn))
5018 length += 4;
5019 else if (GET_CODE (pat) == PARALLEL
5020 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5021 && length == 4)
5022 length += 4;
5023 /* Adjust dbra insn with short backwards conditional branch with
5024 unfilled delay slot -- only for case where counter is in a
5025 general register register. */
5026 else if (GET_CODE (pat) == PARALLEL
5027 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5028 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5029 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5030 && length == 4
5031 && ! forward_branch_p (insn))
5032 length += 4;
5033 }
5034 return length;
5035 }
5036
5037 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5038
5039 static bool
5040 pa_print_operand_punct_valid_p (unsigned char code)
5041 {
5042 if (code == '@'
5043 || code == '#'
5044 || code == '*'
5045 || code == '^')
5046 return true;
5047
5048 return false;
5049 }
5050
5051 /* Print operand X (an rtx) in assembler syntax to file FILE.
5052 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5053 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5054
5055 void
5056 pa_print_operand (FILE *file, rtx x, int code)
5057 {
5058 switch (code)
5059 {
5060 case '#':
5061 /* Output a 'nop' if there's nothing for the delay slot. */
5062 if (dbr_sequence_length () == 0)
5063 fputs ("\n\tnop", file);
5064 return;
5065 case '*':
5066 /* Output a nullification completer if there's nothing for the */
5067 /* delay slot or nullification is requested. */
5068 if (dbr_sequence_length () == 0 ||
5069 (final_sequence &&
5070 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5071 fputs (",n", file);
5072 return;
5073 case 'R':
5074 /* Print out the second register name of a register pair.
5075 I.e., R (6) => 7. */
5076 fputs (reg_names[REGNO (x) + 1], file);
5077 return;
5078 case 'r':
5079 /* A register or zero. */
5080 if (x == const0_rtx
5081 || (x == CONST0_RTX (DFmode))
5082 || (x == CONST0_RTX (SFmode)))
5083 {
5084 fputs ("%r0", file);
5085 return;
5086 }
5087 else
5088 break;
5089 case 'f':
5090 /* A register or zero (floating point). */
5091 if (x == const0_rtx
5092 || (x == CONST0_RTX (DFmode))
5093 || (x == CONST0_RTX (SFmode)))
5094 {
5095 fputs ("%fr0", file);
5096 return;
5097 }
5098 else
5099 break;
5100 case 'A':
5101 {
5102 rtx xoperands[2];
5103
5104 xoperands[0] = XEXP (XEXP (x, 0), 0);
5105 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5106 pa_output_global_address (file, xoperands[1], 0);
5107 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5108 return;
5109 }
5110
5111 case 'C': /* Plain (C)ondition */
5112 case 'X':
5113 switch (GET_CODE (x))
5114 {
5115 case EQ:
5116 fputs ("=", file); break;
5117 case NE:
5118 fputs ("<>", file); break;
5119 case GT:
5120 fputs (">", file); break;
5121 case GE:
5122 fputs (">=", file); break;
5123 case GEU:
5124 fputs (">>=", file); break;
5125 case GTU:
5126 fputs (">>", file); break;
5127 case LT:
5128 fputs ("<", file); break;
5129 case LE:
5130 fputs ("<=", file); break;
5131 case LEU:
5132 fputs ("<<=", file); break;
5133 case LTU:
5134 fputs ("<<", file); break;
5135 default:
5136 gcc_unreachable ();
5137 }
5138 return;
5139 case 'N': /* Condition, (N)egated */
5140 switch (GET_CODE (x))
5141 {
5142 case EQ:
5143 fputs ("<>", file); break;
5144 case NE:
5145 fputs ("=", file); break;
5146 case GT:
5147 fputs ("<=", file); break;
5148 case GE:
5149 fputs ("<", file); break;
5150 case GEU:
5151 fputs ("<<", file); break;
5152 case GTU:
5153 fputs ("<<=", file); break;
5154 case LT:
5155 fputs (">=", file); break;
5156 case LE:
5157 fputs (">", file); break;
5158 case LEU:
5159 fputs (">>", file); break;
5160 case LTU:
5161 fputs (">>=", file); break;
5162 default:
5163 gcc_unreachable ();
5164 }
5165 return;
5166 /* For floating point comparisons. Note that the output
5167 predicates are the complement of the desired mode. The
5168 conditions for GT, GE, LT, LE and LTGT cause an invalid
5169 operation exception if the result is unordered and this
5170 exception is enabled in the floating-point status register. */
5171 case 'Y':
5172 switch (GET_CODE (x))
5173 {
5174 case EQ:
5175 fputs ("!=", file); break;
5176 case NE:
5177 fputs ("=", file); break;
5178 case GT:
5179 fputs ("!>", file); break;
5180 case GE:
5181 fputs ("!>=", file); break;
5182 case LT:
5183 fputs ("!<", file); break;
5184 case LE:
5185 fputs ("!<=", file); break;
5186 case LTGT:
5187 fputs ("!<>", file); break;
5188 case UNLE:
5189 fputs ("!?<=", file); break;
5190 case UNLT:
5191 fputs ("!?<", file); break;
5192 case UNGE:
5193 fputs ("!?>=", file); break;
5194 case UNGT:
5195 fputs ("!?>", file); break;
5196 case UNEQ:
5197 fputs ("!?=", file); break;
5198 case UNORDERED:
5199 fputs ("!?", file); break;
5200 case ORDERED:
5201 fputs ("?", file); break;
5202 default:
5203 gcc_unreachable ();
5204 }
5205 return;
5206 case 'S': /* Condition, operands are (S)wapped. */
5207 switch (GET_CODE (x))
5208 {
5209 case EQ:
5210 fputs ("=", file); break;
5211 case NE:
5212 fputs ("<>", file); break;
5213 case GT:
5214 fputs ("<", file); break;
5215 case GE:
5216 fputs ("<=", file); break;
5217 case GEU:
5218 fputs ("<<=", file); break;
5219 case GTU:
5220 fputs ("<<", file); break;
5221 case LT:
5222 fputs (">", file); break;
5223 case LE:
5224 fputs (">=", file); break;
5225 case LEU:
5226 fputs (">>=", file); break;
5227 case LTU:
5228 fputs (">>", file); break;
5229 default:
5230 gcc_unreachable ();
5231 }
5232 return;
5233 case 'B': /* Condition, (B)oth swapped and negate. */
5234 switch (GET_CODE (x))
5235 {
5236 case EQ:
5237 fputs ("<>", file); break;
5238 case NE:
5239 fputs ("=", file); break;
5240 case GT:
5241 fputs (">=", file); break;
5242 case GE:
5243 fputs (">", file); break;
5244 case GEU:
5245 fputs (">>", file); break;
5246 case GTU:
5247 fputs (">>=", file); break;
5248 case LT:
5249 fputs ("<=", file); break;
5250 case LE:
5251 fputs ("<", file); break;
5252 case LEU:
5253 fputs ("<<", file); break;
5254 case LTU:
5255 fputs ("<<=", file); break;
5256 default:
5257 gcc_unreachable ();
5258 }
5259 return;
5260 case 'k':
5261 gcc_assert (GET_CODE (x) == CONST_INT);
5262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5263 return;
5264 case 'Q':
5265 gcc_assert (GET_CODE (x) == CONST_INT);
5266 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5267 return;
5268 case 'L':
5269 gcc_assert (GET_CODE (x) == CONST_INT);
5270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5271 return;
5272 case 'o':
5273 gcc_assert (GET_CODE (x) == CONST_INT
5274 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5275 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5276 return;
5277 case 'O':
5278 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5279 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5280 return;
5281 case 'p':
5282 gcc_assert (GET_CODE (x) == CONST_INT);
5283 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5284 return;
5285 case 'P':
5286 gcc_assert (GET_CODE (x) == CONST_INT);
5287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5288 return;
5289 case 'I':
5290 if (GET_CODE (x) == CONST_INT)
5291 fputs ("i", file);
5292 return;
5293 case 'M':
5294 case 'F':
5295 switch (GET_CODE (XEXP (x, 0)))
5296 {
5297 case PRE_DEC:
5298 case PRE_INC:
5299 if (ASSEMBLER_DIALECT == 0)
5300 fputs ("s,mb", file);
5301 else
5302 fputs (",mb", file);
5303 break;
5304 case POST_DEC:
5305 case POST_INC:
5306 if (ASSEMBLER_DIALECT == 0)
5307 fputs ("s,ma", file);
5308 else
5309 fputs (",ma", file);
5310 break;
5311 case PLUS:
5312 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5313 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5314 {
5315 if (ASSEMBLER_DIALECT == 0)
5316 fputs ("x", file);
5317 }
5318 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5319 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5320 {
5321 if (ASSEMBLER_DIALECT == 0)
5322 fputs ("x,s", file);
5323 else
5324 fputs (",s", file);
5325 }
5326 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5327 fputs ("s", file);
5328 break;
5329 default:
5330 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5331 fputs ("s", file);
5332 break;
5333 }
5334 return;
5335 case 'G':
5336 pa_output_global_address (file, x, 0);
5337 return;
5338 case 'H':
5339 pa_output_global_address (file, x, 1);
5340 return;
5341 case 0: /* Don't do anything special */
5342 break;
5343 case 'Z':
5344 {
5345 unsigned op[3];
5346 compute_zdepwi_operands (INTVAL (x), op);
5347 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5348 return;
5349 }
5350 case 'z':
5351 {
5352 unsigned op[3];
5353 compute_zdepdi_operands (INTVAL (x), op);
5354 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5355 return;
5356 }
5357 case 'c':
5358 /* We can get here from a .vtable_inherit due to our
5359 CONSTANT_ADDRESS_P rejecting perfectly good constant
5360 addresses. */
5361 break;
5362 default:
5363 gcc_unreachable ();
5364 }
5365 if (GET_CODE (x) == REG)
5366 {
5367 fputs (reg_names [REGNO (x)], file);
5368 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5369 {
5370 fputs ("R", file);
5371 return;
5372 }
5373 if (FP_REG_P (x)
5374 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5375 && (REGNO (x) & 1) == 0)
5376 fputs ("L", file);
5377 }
5378 else if (GET_CODE (x) == MEM)
5379 {
5380 int size = GET_MODE_SIZE (GET_MODE (x));
5381 rtx base = NULL_RTX;
5382 switch (GET_CODE (XEXP (x, 0)))
5383 {
5384 case PRE_DEC:
5385 case POST_DEC:
5386 base = XEXP (XEXP (x, 0), 0);
5387 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5388 break;
5389 case PRE_INC:
5390 case POST_INC:
5391 base = XEXP (XEXP (x, 0), 0);
5392 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5393 break;
5394 case PLUS:
5395 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5396 fprintf (file, "%s(%s)",
5397 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5398 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5399 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5400 fprintf (file, "%s(%s)",
5401 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5402 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5403 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5404 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5405 {
5406 /* Because the REG_POINTER flag can get lost during reload,
5407 pa_legitimate_address_p canonicalizes the order of the
5408 index and base registers in the combined move patterns. */
5409 rtx base = XEXP (XEXP (x, 0), 1);
5410 rtx index = XEXP (XEXP (x, 0), 0);
5411
5412 fprintf (file, "%s(%s)",
5413 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5414 }
5415 else
5416 output_address (XEXP (x, 0));
5417 break;
5418 default:
5419 output_address (XEXP (x, 0));
5420 break;
5421 }
5422 }
5423 else
5424 output_addr_const (file, x);
5425 }
5426
5427 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5428
5429 void
5430 pa_output_global_address (FILE *file, rtx x, int round_constant)
5431 {
5432
5433 /* Imagine (high (const (plus ...))). */
5434 if (GET_CODE (x) == HIGH)
5435 x = XEXP (x, 0);
5436
5437 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5438 output_addr_const (file, x);
5439 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5440 {
5441 output_addr_const (file, x);
5442 fputs ("-$global$", file);
5443 }
5444 else if (GET_CODE (x) == CONST)
5445 {
5446 const char *sep = "";
5447 int offset = 0; /* assembler wants -$global$ at end */
5448 rtx base = NULL_RTX;
5449
5450 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5451 {
5452 case LABEL_REF:
5453 case SYMBOL_REF:
5454 base = XEXP (XEXP (x, 0), 0);
5455 output_addr_const (file, base);
5456 break;
5457 case CONST_INT:
5458 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5459 break;
5460 default:
5461 gcc_unreachable ();
5462 }
5463
5464 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5465 {
5466 case LABEL_REF:
5467 case SYMBOL_REF:
5468 base = XEXP (XEXP (x, 0), 1);
5469 output_addr_const (file, base);
5470 break;
5471 case CONST_INT:
5472 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5473 break;
5474 default:
5475 gcc_unreachable ();
5476 }
5477
5478 /* How bogus. The compiler is apparently responsible for
5479 rounding the constant if it uses an LR field selector.
5480
5481 The linker and/or assembler seem a better place since
5482 they have to do this kind of thing already.
5483
5484 If we fail to do this, HP's optimizing linker may eliminate
5485 an addil, but not update the ldw/stw/ldo instruction that
5486 uses the result of the addil. */
5487 if (round_constant)
5488 offset = ((offset + 0x1000) & ~0x1fff);
5489
5490 switch (GET_CODE (XEXP (x, 0)))
5491 {
5492 case PLUS:
5493 if (offset < 0)
5494 {
5495 offset = -offset;
5496 sep = "-";
5497 }
5498 else
5499 sep = "+";
5500 break;
5501
5502 case MINUS:
5503 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5504 sep = "-";
5505 break;
5506
5507 default:
5508 gcc_unreachable ();
5509 }
5510
5511 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5512 fputs ("-$global$", file);
5513 if (offset)
5514 fprintf (file, "%s%d", sep, offset);
5515 }
5516 else
5517 output_addr_const (file, x);
5518 }
5519
5520 /* Output boilerplate text to appear at the beginning of the file.
5521 There are several possible versions. */
5522 #define aputs(x) fputs(x, asm_out_file)
5523 static inline void
5524 pa_file_start_level (void)
5525 {
5526 if (TARGET_64BIT)
5527 aputs ("\t.LEVEL 2.0w\n");
5528 else if (TARGET_PA_20)
5529 aputs ("\t.LEVEL 2.0\n");
5530 else if (TARGET_PA_11)
5531 aputs ("\t.LEVEL 1.1\n");
5532 else
5533 aputs ("\t.LEVEL 1.0\n");
5534 }
5535
5536 static inline void
5537 pa_file_start_space (int sortspace)
5538 {
5539 aputs ("\t.SPACE $PRIVATE$");
5540 if (sortspace)
5541 aputs (",SORT=16");
5542 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5543 if (flag_tm)
5544 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5545 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5546 "\n\t.SPACE $TEXT$");
5547 if (sortspace)
5548 aputs (",SORT=8");
5549 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5550 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5551 }
5552
5553 static inline void
5554 pa_file_start_file (int want_version)
5555 {
5556 if (write_symbols != NO_DEBUG)
5557 {
5558 output_file_directive (asm_out_file, main_input_filename);
5559 if (want_version)
5560 aputs ("\t.version\t\"01.01\"\n");
5561 }
5562 }
5563
5564 static inline void
5565 pa_file_start_mcount (const char *aswhat)
5566 {
5567 if (profile_flag)
5568 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5569 }
5570
5571 static void
5572 pa_elf_file_start (void)
5573 {
5574 pa_file_start_level ();
5575 pa_file_start_mcount ("ENTRY");
5576 pa_file_start_file (0);
5577 }
5578
5579 static void
5580 pa_som_file_start (void)
5581 {
5582 pa_file_start_level ();
5583 pa_file_start_space (0);
5584 aputs ("\t.IMPORT $global$,DATA\n"
5585 "\t.IMPORT $$dyncall,MILLICODE\n");
5586 pa_file_start_mcount ("CODE");
5587 pa_file_start_file (0);
5588 }
5589
5590 static void
5591 pa_linux_file_start (void)
5592 {
5593 pa_file_start_file (1);
5594 pa_file_start_level ();
5595 pa_file_start_mcount ("CODE");
5596 }
5597
5598 static void
5599 pa_hpux64_gas_file_start (void)
5600 {
5601 pa_file_start_level ();
5602 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5603 if (profile_flag)
5604 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5605 #endif
5606 pa_file_start_file (1);
5607 }
5608
5609 static void
5610 pa_hpux64_hpas_file_start (void)
5611 {
5612 pa_file_start_level ();
5613 pa_file_start_space (1);
5614 pa_file_start_mcount ("CODE");
5615 pa_file_start_file (0);
5616 }
5617 #undef aputs
5618
5619 /* Search the deferred plabel list for SYMBOL and return its internal
5620 label. If an entry for SYMBOL is not found, a new entry is created. */
5621
5622 rtx
5623 pa_get_deferred_plabel (rtx symbol)
5624 {
5625 const char *fname = XSTR (symbol, 0);
5626 size_t i;
5627
5628 /* See if we have already put this function on the list of deferred
5629 plabels. This list is generally small, so a liner search is not
5630 too ugly. If it proves too slow replace it with something faster. */
5631 for (i = 0; i < n_deferred_plabels; i++)
5632 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5633 break;
5634
5635 /* If the deferred plabel list is empty, or this entry was not found
5636 on the list, create a new entry on the list. */
5637 if (deferred_plabels == NULL || i == n_deferred_plabels)
5638 {
5639 tree id;
5640
5641 if (deferred_plabels == 0)
5642 deferred_plabels = ggc_alloc<deferred_plabel> ();
5643 else
5644 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5645 deferred_plabels,
5646 n_deferred_plabels + 1);
5647
5648 i = n_deferred_plabels++;
5649 deferred_plabels[i].internal_label = gen_label_rtx ();
5650 deferred_plabels[i].symbol = symbol;
5651
5652 /* Gross. We have just implicitly taken the address of this
5653 function. Mark it in the same manner as assemble_name. */
5654 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5655 if (id)
5656 mark_referenced (id);
5657 }
5658
5659 return deferred_plabels[i].internal_label;
5660 }
5661
5662 static void
5663 output_deferred_plabels (void)
5664 {
5665 size_t i;
5666
5667 /* If we have some deferred plabels, then we need to switch into the
5668 data or readonly data section, and align it to a 4 byte boundary
5669 before outputting the deferred plabels. */
5670 if (n_deferred_plabels)
5671 {
5672 switch_to_section (flag_pic ? data_section : readonly_data_section);
5673 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5674 }
5675
5676 /* Now output the deferred plabels. */
5677 for (i = 0; i < n_deferred_plabels; i++)
5678 {
5679 targetm.asm_out.internal_label (asm_out_file, "L",
5680 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5681 assemble_integer (deferred_plabels[i].symbol,
5682 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5683 }
5684 }
5685
5686 /* Initialize optabs to point to emulation routines. */
5687
5688 static void
5689 pa_init_libfuncs (void)
5690 {
5691 if (HPUX_LONG_DOUBLE_LIBRARY)
5692 {
5693 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5694 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5695 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5696 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5697 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5698 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5699 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5700 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5701 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5702
5703 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5704 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5705 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5706 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5707 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5708 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5709 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5710
5711 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5712 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5713 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5714 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5715
5716 set_conv_libfunc (sfix_optab, SImode, TFmode,
5717 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5718 : "_U_Qfcnvfxt_quad_to_sgl");
5719 set_conv_libfunc (sfix_optab, DImode, TFmode,
5720 "_U_Qfcnvfxt_quad_to_dbl");
5721 set_conv_libfunc (ufix_optab, SImode, TFmode,
5722 "_U_Qfcnvfxt_quad_to_usgl");
5723 set_conv_libfunc (ufix_optab, DImode, TFmode,
5724 "_U_Qfcnvfxt_quad_to_udbl");
5725
5726 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5727 "_U_Qfcnvxf_sgl_to_quad");
5728 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5729 "_U_Qfcnvxf_dbl_to_quad");
5730 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5731 "_U_Qfcnvxf_usgl_to_quad");
5732 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5733 "_U_Qfcnvxf_udbl_to_quad");
5734 }
5735
5736 if (TARGET_SYNC_LIBCALL)
5737 init_sync_libfuncs (UNITS_PER_WORD);
5738 }
5739
5740 /* HP's millicode routines mean something special to the assembler.
5741 Keep track of which ones we have used. */
5742
5743 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5744 static void import_milli (enum millicodes);
5745 static char imported[(int) end1000];
5746 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5747 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5748 #define MILLI_START 10
5749
5750 static void
5751 import_milli (enum millicodes code)
5752 {
5753 char str[sizeof (import_string)];
5754
5755 if (!imported[(int) code])
5756 {
5757 imported[(int) code] = 1;
5758 strcpy (str, import_string);
5759 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5760 output_asm_insn (str, 0);
5761 }
5762 }
5763
5764 /* The register constraints have put the operands and return value in
5765 the proper registers. */
5766
5767 const char *
5768 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5769 {
5770 import_milli (mulI);
5771 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5772 }
5773
5774 /* Emit the rtl for doing a division by a constant. */
5775
5776 /* Do magic division millicodes exist for this value? */
5777 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5778
5779 /* We'll use an array to keep track of the magic millicodes and
5780 whether or not we've used them already. [n][0] is signed, [n][1] is
5781 unsigned. */
5782
5783 static int div_milli[16][2];
5784
5785 int
5786 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5787 {
5788 if (GET_CODE (operands[2]) == CONST_INT
5789 && INTVAL (operands[2]) > 0
5790 && INTVAL (operands[2]) < 16
5791 && pa_magic_milli[INTVAL (operands[2])])
5792 {
5793 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5794
5795 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5796 emit
5797 (gen_rtx_PARALLEL
5798 (VOIDmode,
5799 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5800 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5801 SImode,
5802 gen_rtx_REG (SImode, 26),
5803 operands[2])),
5804 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5805 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5806 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5807 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5808 gen_rtx_CLOBBER (VOIDmode, ret))));
5809 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5810 return 1;
5811 }
5812 return 0;
5813 }
5814
5815 const char *
5816 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5817 {
5818 int divisor;
5819
5820 /* If the divisor is a constant, try to use one of the special
5821 opcodes .*/
5822 if (GET_CODE (operands[0]) == CONST_INT)
5823 {
5824 static char buf[100];
5825 divisor = INTVAL (operands[0]);
5826 if (!div_milli[divisor][unsignedp])
5827 {
5828 div_milli[divisor][unsignedp] = 1;
5829 if (unsignedp)
5830 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5831 else
5832 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5833 }
5834 if (unsignedp)
5835 {
5836 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5837 INTVAL (operands[0]));
5838 return pa_output_millicode_call (insn,
5839 gen_rtx_SYMBOL_REF (SImode, buf));
5840 }
5841 else
5842 {
5843 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5844 INTVAL (operands[0]));
5845 return pa_output_millicode_call (insn,
5846 gen_rtx_SYMBOL_REF (SImode, buf));
5847 }
5848 }
5849 /* Divisor isn't a special constant. */
5850 else
5851 {
5852 if (unsignedp)
5853 {
5854 import_milli (divU);
5855 return pa_output_millicode_call (insn,
5856 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5857 }
5858 else
5859 {
5860 import_milli (divI);
5861 return pa_output_millicode_call (insn,
5862 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5863 }
5864 }
5865 }
5866
5867 /* Output a $$rem millicode to do mod. */
5868
5869 const char *
5870 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5871 {
5872 if (unsignedp)
5873 {
5874 import_milli (remU);
5875 return pa_output_millicode_call (insn,
5876 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5877 }
5878 else
5879 {
5880 import_milli (remI);
5881 return pa_output_millicode_call (insn,
5882 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5883 }
5884 }
5885
5886 void
5887 pa_output_arg_descriptor (rtx_insn *call_insn)
5888 {
5889 const char *arg_regs[4];
5890 machine_mode arg_mode;
5891 rtx link;
5892 int i, output_flag = 0;
5893 int regno;
5894
5895 /* We neither need nor want argument location descriptors for the
5896 64bit runtime environment or the ELF32 environment. */
5897 if (TARGET_64BIT || TARGET_ELF32)
5898 return;
5899
5900 for (i = 0; i < 4; i++)
5901 arg_regs[i] = 0;
5902
5903 /* Specify explicitly that no argument relocations should take place
5904 if using the portable runtime calling conventions. */
5905 if (TARGET_PORTABLE_RUNTIME)
5906 {
5907 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5908 asm_out_file);
5909 return;
5910 }
5911
5912 gcc_assert (CALL_P (call_insn));
5913 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5914 link; link = XEXP (link, 1))
5915 {
5916 rtx use = XEXP (link, 0);
5917
5918 if (! (GET_CODE (use) == USE
5919 && GET_CODE (XEXP (use, 0)) == REG
5920 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5921 continue;
5922
5923 arg_mode = GET_MODE (XEXP (use, 0));
5924 regno = REGNO (XEXP (use, 0));
5925 if (regno >= 23 && regno <= 26)
5926 {
5927 arg_regs[26 - regno] = "GR";
5928 if (arg_mode == DImode)
5929 arg_regs[25 - regno] = "GR";
5930 }
5931 else if (regno >= 32 && regno <= 39)
5932 {
5933 if (arg_mode == SFmode)
5934 arg_regs[(regno - 32) / 2] = "FR";
5935 else
5936 {
5937 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5938 arg_regs[(regno - 34) / 2] = "FR";
5939 arg_regs[(regno - 34) / 2 + 1] = "FU";
5940 #else
5941 arg_regs[(regno - 34) / 2] = "FU";
5942 arg_regs[(regno - 34) / 2 + 1] = "FR";
5943 #endif
5944 }
5945 }
5946 }
5947 fputs ("\t.CALL ", asm_out_file);
5948 for (i = 0; i < 4; i++)
5949 {
5950 if (arg_regs[i])
5951 {
5952 if (output_flag++)
5953 fputc (',', asm_out_file);
5954 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5955 }
5956 }
5957 fputc ('\n', asm_out_file);
5958 }
5959 \f
5960 /* Inform reload about cases where moving X with a mode MODE to or from
5961 a register in RCLASS requires an extra scratch or immediate register.
5962 Return the class needed for the immediate register. */
5963
5964 static reg_class_t
5965 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5966 machine_mode mode, secondary_reload_info *sri)
5967 {
5968 int regno;
5969 enum reg_class rclass = (enum reg_class) rclass_i;
5970
5971 /* Handle the easy stuff first. */
5972 if (rclass == R1_REGS)
5973 return NO_REGS;
5974
5975 if (REG_P (x))
5976 {
5977 regno = REGNO (x);
5978 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5979 return NO_REGS;
5980 }
5981 else
5982 regno = -1;
5983
5984 /* If we have something like (mem (mem (...)), we can safely assume the
5985 inner MEM will end up in a general register after reloading, so there's
5986 no need for a secondary reload. */
5987 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5988 return NO_REGS;
5989
5990 /* Trying to load a constant into a FP register during PIC code
5991 generation requires %r1 as a scratch register. For float modes,
5992 the only legitimate constant is CONST0_RTX. However, there are
5993 a few patterns that accept constant double operands. */
5994 if (flag_pic
5995 && FP_REG_CLASS_P (rclass)
5996 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5997 {
5998 switch (mode)
5999 {
6000 case SImode:
6001 sri->icode = CODE_FOR_reload_insi_r1;
6002 break;
6003
6004 case DImode:
6005 sri->icode = CODE_FOR_reload_indi_r1;
6006 break;
6007
6008 case SFmode:
6009 sri->icode = CODE_FOR_reload_insf_r1;
6010 break;
6011
6012 case DFmode:
6013 sri->icode = CODE_FOR_reload_indf_r1;
6014 break;
6015
6016 default:
6017 gcc_unreachable ();
6018 }
6019 return NO_REGS;
6020 }
6021
6022 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6023 register when we're generating PIC code or when the operand isn't
6024 readonly. */
6025 if (pa_symbolic_expression_p (x))
6026 {
6027 if (GET_CODE (x) == HIGH)
6028 x = XEXP (x, 0);
6029
6030 if (flag_pic || !read_only_operand (x, VOIDmode))
6031 {
6032 switch (mode)
6033 {
6034 case SImode:
6035 sri->icode = CODE_FOR_reload_insi_r1;
6036 break;
6037
6038 case DImode:
6039 sri->icode = CODE_FOR_reload_indi_r1;
6040 break;
6041
6042 default:
6043 gcc_unreachable ();
6044 }
6045 return NO_REGS;
6046 }
6047 }
6048
6049 /* Profiling showed the PA port spends about 1.3% of its compilation
6050 time in true_regnum from calls inside pa_secondary_reload_class. */
6051 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6052 regno = true_regnum (x);
6053
6054 /* Handle reloads for floating point loads and stores. */
6055 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6056 && FP_REG_CLASS_P (rclass))
6057 {
6058 if (MEM_P (x))
6059 {
6060 x = XEXP (x, 0);
6061
6062 /* We don't need a secondary reload for indexed memory addresses.
6063
6064 When INT14_OK_STRICT is true, it might appear that we could
6065 directly allow register indirect memory addresses. However,
6066 this doesn't work because we don't support SUBREGs in
6067 floating-point register copies and reload doesn't tell us
6068 when it's going to use a SUBREG. */
6069 if (IS_INDEX_ADDR_P (x))
6070 return NO_REGS;
6071 }
6072
6073 /* Request a secondary reload with a general scratch register
6074 for everything else. ??? Could symbolic operands be handled
6075 directly when generating non-pic PA 2.0 code? */
6076 sri->icode = (in_p
6077 ? direct_optab_handler (reload_in_optab, mode)
6078 : direct_optab_handler (reload_out_optab, mode));
6079 return NO_REGS;
6080 }
6081
6082 /* A SAR<->FP register copy requires an intermediate general register
6083 and secondary memory. We need a secondary reload with a general
6084 scratch register for spills. */
6085 if (rclass == SHIFT_REGS)
6086 {
6087 /* Handle spill. */
6088 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6089 {
6090 sri->icode = (in_p
6091 ? direct_optab_handler (reload_in_optab, mode)
6092 : direct_optab_handler (reload_out_optab, mode));
6093 return NO_REGS;
6094 }
6095
6096 /* Handle FP copy. */
6097 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6098 return GENERAL_REGS;
6099 }
6100
6101 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6102 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6103 && FP_REG_CLASS_P (rclass))
6104 return GENERAL_REGS;
6105
6106 return NO_REGS;
6107 }
6108
6109 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6110 is only marked as live on entry by df-scan when it is a fixed
6111 register. It isn't a fixed register in the 64-bit runtime,
6112 so we need to mark it here. */
6113
6114 static void
6115 pa_extra_live_on_entry (bitmap regs)
6116 {
6117 if (TARGET_64BIT)
6118 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6119 }
6120
6121 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6122 to prevent it from being deleted. */
6123
6124 rtx
6125 pa_eh_return_handler_rtx (void)
6126 {
6127 rtx tmp;
6128
6129 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6130 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6131 tmp = gen_rtx_MEM (word_mode, tmp);
6132 tmp->volatil = 1;
6133 return tmp;
6134 }
6135
6136 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6137 by invisible reference. As a GCC extension, we also pass anything
6138 with a zero or variable size by reference.
6139
6140 The 64-bit runtime does not describe passing any types by invisible
6141 reference. The internals of GCC can't currently handle passing
6142 empty structures, and zero or variable length arrays when they are
6143 not passed entirely on the stack or by reference. Thus, as a GCC
6144 extension, we pass these types by reference. The HP compiler doesn't
6145 support these types, so hopefully there shouldn't be any compatibility
6146 issues. This may have to be revisited when HP releases a C99 compiler
6147 or updates the ABI. */
6148
6149 static bool
6150 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6151 machine_mode mode, const_tree type,
6152 bool named ATTRIBUTE_UNUSED)
6153 {
6154 HOST_WIDE_INT size;
6155
6156 if (type)
6157 size = int_size_in_bytes (type);
6158 else
6159 size = GET_MODE_SIZE (mode);
6160
6161 if (TARGET_64BIT)
6162 return size <= 0;
6163 else
6164 return size <= 0 || size > 8;
6165 }
6166
6167 enum direction
6168 pa_function_arg_padding (machine_mode mode, const_tree type)
6169 {
6170 if (mode == BLKmode
6171 || (TARGET_64BIT
6172 && type
6173 && (AGGREGATE_TYPE_P (type)
6174 || TREE_CODE (type) == COMPLEX_TYPE
6175 || TREE_CODE (type) == VECTOR_TYPE)))
6176 {
6177 /* Return none if justification is not required. */
6178 if (type
6179 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6180 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6181 return none;
6182
6183 /* The directions set here are ignored when a BLKmode argument larger
6184 than a word is placed in a register. Different code is used for
6185 the stack and registers. This makes it difficult to have a
6186 consistent data representation for both the stack and registers.
6187 For both runtimes, the justification and padding for arguments on
6188 the stack and in registers should be identical. */
6189 if (TARGET_64BIT)
6190 /* The 64-bit runtime specifies left justification for aggregates. */
6191 return upward;
6192 else
6193 /* The 32-bit runtime architecture specifies right justification.
6194 When the argument is passed on the stack, the argument is padded
6195 with garbage on the left. The HP compiler pads with zeros. */
6196 return downward;
6197 }
6198
6199 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6200 return downward;
6201 else
6202 return none;
6203 }
6204
6205 \f
6206 /* Do what is necessary for `va_start'. We look at the current function
6207 to determine if stdargs or varargs is used and fill in an initial
6208 va_list. A pointer to this constructor is returned. */
6209
6210 static rtx
6211 hppa_builtin_saveregs (void)
6212 {
6213 rtx offset, dest;
6214 tree fntype = TREE_TYPE (current_function_decl);
6215 int argadj = ((!stdarg_p (fntype))
6216 ? UNITS_PER_WORD : 0);
6217
6218 if (argadj)
6219 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6220 else
6221 offset = crtl->args.arg_offset_rtx;
6222
6223 if (TARGET_64BIT)
6224 {
6225 int i, off;
6226
6227 /* Adjust for varargs/stdarg differences. */
6228 if (argadj)
6229 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6230 else
6231 offset = crtl->args.arg_offset_rtx;
6232
6233 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6234 from the incoming arg pointer and growing to larger addresses. */
6235 for (i = 26, off = -64; i >= 19; i--, off += 8)
6236 emit_move_insn (gen_rtx_MEM (word_mode,
6237 plus_constant (Pmode,
6238 arg_pointer_rtx, off)),
6239 gen_rtx_REG (word_mode, i));
6240
6241 /* The incoming args pointer points just beyond the flushback area;
6242 normally this is not a serious concern. However, when we are doing
6243 varargs/stdargs we want to make the arg pointer point to the start
6244 of the incoming argument area. */
6245 emit_move_insn (virtual_incoming_args_rtx,
6246 plus_constant (Pmode, arg_pointer_rtx, -64));
6247
6248 /* Now return a pointer to the first anonymous argument. */
6249 return copy_to_reg (expand_binop (Pmode, add_optab,
6250 virtual_incoming_args_rtx,
6251 offset, 0, 0, OPTAB_LIB_WIDEN));
6252 }
6253
6254 /* Store general registers on the stack. */
6255 dest = gen_rtx_MEM (BLKmode,
6256 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6257 -16));
6258 set_mem_alias_set (dest, get_varargs_alias_set ());
6259 set_mem_align (dest, BITS_PER_WORD);
6260 move_block_from_reg (23, dest, 4);
6261
6262 /* move_block_from_reg will emit code to store the argument registers
6263 individually as scalar stores.
6264
6265 However, other insns may later load from the same addresses for
6266 a structure load (passing a struct to a varargs routine).
6267
6268 The alias code assumes that such aliasing can never happen, so we
6269 have to keep memory referencing insns from moving up beyond the
6270 last argument register store. So we emit a blockage insn here. */
6271 emit_insn (gen_blockage ());
6272
6273 return copy_to_reg (expand_binop (Pmode, add_optab,
6274 crtl->args.internal_arg_pointer,
6275 offset, 0, 0, OPTAB_LIB_WIDEN));
6276 }
6277
6278 static void
6279 hppa_va_start (tree valist, rtx nextarg)
6280 {
6281 nextarg = expand_builtin_saveregs ();
6282 std_expand_builtin_va_start (valist, nextarg);
6283 }
6284
6285 static tree
6286 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6287 gimple_seq *post_p)
6288 {
6289 if (TARGET_64BIT)
6290 {
6291 /* Args grow upward. We can use the generic routines. */
6292 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6293 }
6294 else /* !TARGET_64BIT */
6295 {
6296 tree ptr = build_pointer_type (type);
6297 tree valist_type;
6298 tree t, u;
6299 unsigned int size, ofs;
6300 bool indirect;
6301
6302 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6303 if (indirect)
6304 {
6305 type = ptr;
6306 ptr = build_pointer_type (type);
6307 }
6308 size = int_size_in_bytes (type);
6309 valist_type = TREE_TYPE (valist);
6310
6311 /* Args grow down. Not handled by generic routines. */
6312
6313 u = fold_convert (sizetype, size_in_bytes (type));
6314 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6315 t = fold_build_pointer_plus (valist, u);
6316
6317 /* Align to 4 or 8 byte boundary depending on argument size. */
6318
6319 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6320 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6321 t = fold_convert (valist_type, t);
6322
6323 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6324
6325 ofs = (8 - size) % 4;
6326 if (ofs != 0)
6327 t = fold_build_pointer_plus_hwi (t, ofs);
6328
6329 t = fold_convert (ptr, t);
6330 t = build_va_arg_indirect_ref (t);
6331
6332 if (indirect)
6333 t = build_va_arg_indirect_ref (t);
6334
6335 return t;
6336 }
6337 }
6338
6339 /* True if MODE is valid for the target. By "valid", we mean able to
6340 be manipulated in non-trivial ways. In particular, this means all
6341 the arithmetic is supported.
6342
6343 Currently, TImode is not valid as the HP 64-bit runtime documentation
6344 doesn't document the alignment and calling conventions for this type.
6345 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6346 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6347
6348 static bool
6349 pa_scalar_mode_supported_p (machine_mode mode)
6350 {
6351 int precision = GET_MODE_PRECISION (mode);
6352
6353 switch (GET_MODE_CLASS (mode))
6354 {
6355 case MODE_PARTIAL_INT:
6356 case MODE_INT:
6357 if (precision == CHAR_TYPE_SIZE)
6358 return true;
6359 if (precision == SHORT_TYPE_SIZE)
6360 return true;
6361 if (precision == INT_TYPE_SIZE)
6362 return true;
6363 if (precision == LONG_TYPE_SIZE)
6364 return true;
6365 if (precision == LONG_LONG_TYPE_SIZE)
6366 return true;
6367 return false;
6368
6369 case MODE_FLOAT:
6370 if (precision == FLOAT_TYPE_SIZE)
6371 return true;
6372 if (precision == DOUBLE_TYPE_SIZE)
6373 return true;
6374 if (precision == LONG_DOUBLE_TYPE_SIZE)
6375 return true;
6376 return false;
6377
6378 case MODE_DECIMAL_FLOAT:
6379 return false;
6380
6381 default:
6382 gcc_unreachable ();
6383 }
6384 }
6385
6386 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6387 it branches into the delay slot. Otherwise, return FALSE. */
6388
6389 static bool
6390 branch_to_delay_slot_p (rtx_insn *insn)
6391 {
6392 rtx_insn *jump_insn;
6393
6394 if (dbr_sequence_length ())
6395 return FALSE;
6396
6397 jump_insn = next_active_insn (JUMP_LABEL (insn));
6398 while (insn)
6399 {
6400 insn = next_active_insn (insn);
6401 if (jump_insn == insn)
6402 return TRUE;
6403
6404 /* We can't rely on the length of asms. So, we return FALSE when
6405 the branch is followed by an asm. */
6406 if (!insn
6407 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6408 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6409 || get_attr_length (insn) > 0)
6410 break;
6411 }
6412
6413 return FALSE;
6414 }
6415
6416 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6417
6418 This occurs when INSN has an unfilled delay slot and is followed
6419 by an asm. Disaster can occur if the asm is empty and the jump
6420 branches into the delay slot. So, we add a nop in the delay slot
6421 when this occurs. */
6422
6423 static bool
6424 branch_needs_nop_p (rtx_insn *insn)
6425 {
6426 rtx_insn *jump_insn;
6427
6428 if (dbr_sequence_length ())
6429 return FALSE;
6430
6431 jump_insn = next_active_insn (JUMP_LABEL (insn));
6432 while (insn)
6433 {
6434 insn = next_active_insn (insn);
6435 if (!insn || jump_insn == insn)
6436 return TRUE;
6437
6438 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6439 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6440 && get_attr_length (insn) > 0)
6441 break;
6442 }
6443
6444 return FALSE;
6445 }
6446
6447 /* Return TRUE if INSN, a forward jump insn, can use nullification
6448 to skip the following instruction. This avoids an extra cycle due
6449 to a mis-predicted branch when we fall through. */
6450
6451 static bool
6452 use_skip_p (rtx_insn *insn)
6453 {
6454 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6455
6456 while (insn)
6457 {
6458 insn = next_active_insn (insn);
6459
6460 /* We can't rely on the length of asms, so we can't skip asms. */
6461 if (!insn
6462 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6463 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6464 break;
6465 if (get_attr_length (insn) == 4
6466 && jump_insn == next_active_insn (insn))
6467 return TRUE;
6468 if (get_attr_length (insn) > 0)
6469 break;
6470 }
6471
6472 return FALSE;
6473 }
6474
6475 /* This routine handles all the normal conditional branch sequences we
6476 might need to generate. It handles compare immediate vs compare
6477 register, nullification of delay slots, varying length branches,
6478 negated branches, and all combinations of the above. It returns the
6479 output appropriate to emit the branch corresponding to all given
6480 parameters. */
6481
6482 const char *
6483 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6484 {
6485 static char buf[100];
6486 bool useskip;
6487 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6488 int length = get_attr_length (insn);
6489 int xdelay;
6490
6491 /* A conditional branch to the following instruction (e.g. the delay slot)
6492 is asking for a disaster. This can happen when not optimizing and
6493 when jump optimization fails.
6494
6495 While it is usually safe to emit nothing, this can fail if the
6496 preceding instruction is a nullified branch with an empty delay
6497 slot and the same branch target as this branch. We could check
6498 for this but jump optimization should eliminate nop jumps. It
6499 is always safe to emit a nop. */
6500 if (branch_to_delay_slot_p (insn))
6501 return "nop";
6502
6503 /* The doubleword form of the cmpib instruction doesn't have the LEU
6504 and GTU conditions while the cmpb instruction does. Since we accept
6505 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6506 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6507 operands[2] = gen_rtx_REG (DImode, 0);
6508 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6509 operands[1] = gen_rtx_REG (DImode, 0);
6510
6511 /* If this is a long branch with its delay slot unfilled, set `nullify'
6512 as it can nullify the delay slot and save a nop. */
6513 if (length == 8 && dbr_sequence_length () == 0)
6514 nullify = 1;
6515
6516 /* If this is a short forward conditional branch which did not get
6517 its delay slot filled, the delay slot can still be nullified. */
6518 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6519 nullify = forward_branch_p (insn);
6520
6521 /* A forward branch over a single nullified insn can be done with a
6522 comclr instruction. This avoids a single cycle penalty due to
6523 mis-predicted branch if we fall through (branch not taken). */
6524 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6525
6526 switch (length)
6527 {
6528 /* All short conditional branches except backwards with an unfilled
6529 delay slot. */
6530 case 4:
6531 if (useskip)
6532 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6533 else
6534 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6535 if (GET_MODE (operands[1]) == DImode)
6536 strcat (buf, "*");
6537 if (negated)
6538 strcat (buf, "%B3");
6539 else
6540 strcat (buf, "%S3");
6541 if (useskip)
6542 strcat (buf, " %2,%r1,%%r0");
6543 else if (nullify)
6544 {
6545 if (branch_needs_nop_p (insn))
6546 strcat (buf, ",n %2,%r1,%0%#");
6547 else
6548 strcat (buf, ",n %2,%r1,%0");
6549 }
6550 else
6551 strcat (buf, " %2,%r1,%0");
6552 break;
6553
6554 /* All long conditionals. Note a short backward branch with an
6555 unfilled delay slot is treated just like a long backward branch
6556 with an unfilled delay slot. */
6557 case 8:
6558 /* Handle weird backwards branch with a filled delay slot
6559 which is nullified. */
6560 if (dbr_sequence_length () != 0
6561 && ! forward_branch_p (insn)
6562 && nullify)
6563 {
6564 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6565 if (GET_MODE (operands[1]) == DImode)
6566 strcat (buf, "*");
6567 if (negated)
6568 strcat (buf, "%S3");
6569 else
6570 strcat (buf, "%B3");
6571 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6572 }
6573 /* Handle short backwards branch with an unfilled delay slot.
6574 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6575 taken and untaken branches. */
6576 else if (dbr_sequence_length () == 0
6577 && ! forward_branch_p (insn)
6578 && INSN_ADDRESSES_SET_P ()
6579 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6580 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6581 {
6582 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6583 if (GET_MODE (operands[1]) == DImode)
6584 strcat (buf, "*");
6585 if (negated)
6586 strcat (buf, "%B3 %2,%r1,%0%#");
6587 else
6588 strcat (buf, "%S3 %2,%r1,%0%#");
6589 }
6590 else
6591 {
6592 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6593 if (GET_MODE (operands[1]) == DImode)
6594 strcat (buf, "*");
6595 if (negated)
6596 strcat (buf, "%S3");
6597 else
6598 strcat (buf, "%B3");
6599 if (nullify)
6600 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6601 else
6602 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6603 }
6604 break;
6605
6606 default:
6607 /* The reversed conditional branch must branch over one additional
6608 instruction if the delay slot is filled and needs to be extracted
6609 by pa_output_lbranch. If the delay slot is empty or this is a
6610 nullified forward branch, the instruction after the reversed
6611 condition branch must be nullified. */
6612 if (dbr_sequence_length () == 0
6613 || (nullify && forward_branch_p (insn)))
6614 {
6615 nullify = 1;
6616 xdelay = 0;
6617 operands[4] = GEN_INT (length);
6618 }
6619 else
6620 {
6621 xdelay = 1;
6622 operands[4] = GEN_INT (length + 4);
6623 }
6624
6625 /* Create a reversed conditional branch which branches around
6626 the following insns. */
6627 if (GET_MODE (operands[1]) != DImode)
6628 {
6629 if (nullify)
6630 {
6631 if (negated)
6632 strcpy (buf,
6633 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6634 else
6635 strcpy (buf,
6636 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6637 }
6638 else
6639 {
6640 if (negated)
6641 strcpy (buf,
6642 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6643 else
6644 strcpy (buf,
6645 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6646 }
6647 }
6648 else
6649 {
6650 if (nullify)
6651 {
6652 if (negated)
6653 strcpy (buf,
6654 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6655 else
6656 strcpy (buf,
6657 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6658 }
6659 else
6660 {
6661 if (negated)
6662 strcpy (buf,
6663 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6664 else
6665 strcpy (buf,
6666 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6667 }
6668 }
6669
6670 output_asm_insn (buf, operands);
6671 return pa_output_lbranch (operands[0], insn, xdelay);
6672 }
6673 return buf;
6674 }
6675
6676 /* This routine handles output of long unconditional branches that
6677 exceed the maximum range of a simple branch instruction. Since
6678 we don't have a register available for the branch, we save register
6679 %r1 in the frame marker, load the branch destination DEST into %r1,
6680 execute the branch, and restore %r1 in the delay slot of the branch.
6681
6682 Since long branches may have an insn in the delay slot and the
6683 delay slot is used to restore %r1, we in general need to extract
6684 this insn and execute it before the branch. However, to facilitate
6685 use of this function by conditional branches, we also provide an
6686 option to not extract the delay insn so that it will be emitted
6687 after the long branch. So, if there is an insn in the delay slot,
6688 it is extracted if XDELAY is nonzero.
6689
6690 The lengths of the various long-branch sequences are 20, 16 and 24
6691 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6692
6693 const char *
6694 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6695 {
6696 rtx xoperands[2];
6697
6698 xoperands[0] = dest;
6699
6700 /* First, free up the delay slot. */
6701 if (xdelay && dbr_sequence_length () != 0)
6702 {
6703 /* We can't handle a jump in the delay slot. */
6704 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6705
6706 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6707 optimize, 0, NULL);
6708
6709 /* Now delete the delay insn. */
6710 SET_INSN_DELETED (NEXT_INSN (insn));
6711 }
6712
6713 /* Output an insn to save %r1. The runtime documentation doesn't
6714 specify whether the "Clean Up" slot in the callers frame can
6715 be clobbered by the callee. It isn't copied by HP's builtin
6716 alloca, so this suggests that it can be clobbered if necessary.
6717 The "Static Link" location is copied by HP builtin alloca, so
6718 we avoid using it. Using the cleanup slot might be a problem
6719 if we have to interoperate with languages that pass cleanup
6720 information. However, it should be possible to handle these
6721 situations with GCC's asm feature.
6722
6723 The "Current RP" slot is reserved for the called procedure, so
6724 we try to use it when we don't have a frame of our own. It's
6725 rather unlikely that we won't have a frame when we need to emit
6726 a very long branch.
6727
6728 Really the way to go long term is a register scavenger; goto
6729 the target of the jump and find a register which we can use
6730 as a scratch to hold the value in %r1. Then, we wouldn't have
6731 to free up the delay slot or clobber a slot that may be needed
6732 for other purposes. */
6733 if (TARGET_64BIT)
6734 {
6735 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6736 /* Use the return pointer slot in the frame marker. */
6737 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6738 else
6739 /* Use the slot at -40 in the frame marker since HP builtin
6740 alloca doesn't copy it. */
6741 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6742 }
6743 else
6744 {
6745 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6746 /* Use the return pointer slot in the frame marker. */
6747 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6748 else
6749 /* Use the "Clean Up" slot in the frame marker. In GCC,
6750 the only other use of this location is for copying a
6751 floating point double argument from a floating-point
6752 register to two general registers. The copy is done
6753 as an "atomic" operation when outputting a call, so it
6754 won't interfere with our using the location here. */
6755 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6756 }
6757
6758 if (TARGET_PORTABLE_RUNTIME)
6759 {
6760 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6761 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6762 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6763 }
6764 else if (flag_pic)
6765 {
6766 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6767 if (TARGET_SOM || !TARGET_GAS)
6768 {
6769 xoperands[1] = gen_label_rtx ();
6770 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6771 targetm.asm_out.internal_label (asm_out_file, "L",
6772 CODE_LABEL_NUMBER (xoperands[1]));
6773 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6774 }
6775 else
6776 {
6777 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6778 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6779 }
6780 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6781 }
6782 else
6783 /* Now output a very long branch to the original target. */
6784 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6785
6786 /* Now restore the value of %r1 in the delay slot. */
6787 if (TARGET_64BIT)
6788 {
6789 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6790 return "ldd -16(%%r30),%%r1";
6791 else
6792 return "ldd -40(%%r30),%%r1";
6793 }
6794 else
6795 {
6796 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6797 return "ldw -20(%%r30),%%r1";
6798 else
6799 return "ldw -12(%%r30),%%r1";
6800 }
6801 }
6802
6803 /* This routine handles all the branch-on-bit conditional branch sequences we
6804 might need to generate. It handles nullification of delay slots,
6805 varying length branches, negated branches and all combinations of the
6806 above. it returns the appropriate output template to emit the branch. */
6807
6808 const char *
6809 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6810 {
6811 static char buf[100];
6812 bool useskip;
6813 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6814 int length = get_attr_length (insn);
6815 int xdelay;
6816
6817 /* A conditional branch to the following instruction (e.g. the delay slot) is
6818 asking for a disaster. I do not think this can happen as this pattern
6819 is only used when optimizing; jump optimization should eliminate the
6820 jump. But be prepared just in case. */
6821
6822 if (branch_to_delay_slot_p (insn))
6823 return "nop";
6824
6825 /* If this is a long branch with its delay slot unfilled, set `nullify'
6826 as it can nullify the delay slot and save a nop. */
6827 if (length == 8 && dbr_sequence_length () == 0)
6828 nullify = 1;
6829
6830 /* If this is a short forward conditional branch which did not get
6831 its delay slot filled, the delay slot can still be nullified. */
6832 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6833 nullify = forward_branch_p (insn);
6834
6835 /* A forward branch over a single nullified insn can be done with a
6836 extrs instruction. This avoids a single cycle penalty due to
6837 mis-predicted branch if we fall through (branch not taken). */
6838 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6839
6840 switch (length)
6841 {
6842
6843 /* All short conditional branches except backwards with an unfilled
6844 delay slot. */
6845 case 4:
6846 if (useskip)
6847 strcpy (buf, "{extrs,|extrw,s,}");
6848 else
6849 strcpy (buf, "bb,");
6850 if (useskip && GET_MODE (operands[0]) == DImode)
6851 strcpy (buf, "extrd,s,*");
6852 else if (GET_MODE (operands[0]) == DImode)
6853 strcpy (buf, "bb,*");
6854 if ((which == 0 && negated)
6855 || (which == 1 && ! negated))
6856 strcat (buf, ">=");
6857 else
6858 strcat (buf, "<");
6859 if (useskip)
6860 strcat (buf, " %0,%1,1,%%r0");
6861 else if (nullify && negated)
6862 {
6863 if (branch_needs_nop_p (insn))
6864 strcat (buf, ",n %0,%1,%3%#");
6865 else
6866 strcat (buf, ",n %0,%1,%3");
6867 }
6868 else if (nullify && ! negated)
6869 {
6870 if (branch_needs_nop_p (insn))
6871 strcat (buf, ",n %0,%1,%2%#");
6872 else
6873 strcat (buf, ",n %0,%1,%2");
6874 }
6875 else if (! nullify && negated)
6876 strcat (buf, " %0,%1,%3");
6877 else if (! nullify && ! negated)
6878 strcat (buf, " %0,%1,%2");
6879 break;
6880
6881 /* All long conditionals. Note a short backward branch with an
6882 unfilled delay slot is treated just like a long backward branch
6883 with an unfilled delay slot. */
6884 case 8:
6885 /* Handle weird backwards branch with a filled delay slot
6886 which is nullified. */
6887 if (dbr_sequence_length () != 0
6888 && ! forward_branch_p (insn)
6889 && nullify)
6890 {
6891 strcpy (buf, "bb,");
6892 if (GET_MODE (operands[0]) == DImode)
6893 strcat (buf, "*");
6894 if ((which == 0 && negated)
6895 || (which == 1 && ! negated))
6896 strcat (buf, "<");
6897 else
6898 strcat (buf, ">=");
6899 if (negated)
6900 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6901 else
6902 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6903 }
6904 /* Handle short backwards branch with an unfilled delay slot.
6905 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6906 taken and untaken branches. */
6907 else if (dbr_sequence_length () == 0
6908 && ! forward_branch_p (insn)
6909 && INSN_ADDRESSES_SET_P ()
6910 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6911 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6912 {
6913 strcpy (buf, "bb,");
6914 if (GET_MODE (operands[0]) == DImode)
6915 strcat (buf, "*");
6916 if ((which == 0 && negated)
6917 || (which == 1 && ! negated))
6918 strcat (buf, ">=");
6919 else
6920 strcat (buf, "<");
6921 if (negated)
6922 strcat (buf, " %0,%1,%3%#");
6923 else
6924 strcat (buf, " %0,%1,%2%#");
6925 }
6926 else
6927 {
6928 if (GET_MODE (operands[0]) == DImode)
6929 strcpy (buf, "extrd,s,*");
6930 else
6931 strcpy (buf, "{extrs,|extrw,s,}");
6932 if ((which == 0 && negated)
6933 || (which == 1 && ! negated))
6934 strcat (buf, "<");
6935 else
6936 strcat (buf, ">=");
6937 if (nullify && negated)
6938 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6939 else if (nullify && ! negated)
6940 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6941 else if (negated)
6942 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6943 else
6944 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6945 }
6946 break;
6947
6948 default:
6949 /* The reversed conditional branch must branch over one additional
6950 instruction if the delay slot is filled and needs to be extracted
6951 by pa_output_lbranch. If the delay slot is empty or this is a
6952 nullified forward branch, the instruction after the reversed
6953 condition branch must be nullified. */
6954 if (dbr_sequence_length () == 0
6955 || (nullify && forward_branch_p (insn)))
6956 {
6957 nullify = 1;
6958 xdelay = 0;
6959 operands[4] = GEN_INT (length);
6960 }
6961 else
6962 {
6963 xdelay = 1;
6964 operands[4] = GEN_INT (length + 4);
6965 }
6966
6967 if (GET_MODE (operands[0]) == DImode)
6968 strcpy (buf, "bb,*");
6969 else
6970 strcpy (buf, "bb,");
6971 if ((which == 0 && negated)
6972 || (which == 1 && !negated))
6973 strcat (buf, "<");
6974 else
6975 strcat (buf, ">=");
6976 if (nullify)
6977 strcat (buf, ",n %0,%1,.+%4");
6978 else
6979 strcat (buf, " %0,%1,.+%4");
6980 output_asm_insn (buf, operands);
6981 return pa_output_lbranch (negated ? operands[3] : operands[2],
6982 insn, xdelay);
6983 }
6984 return buf;
6985 }
6986
6987 /* This routine handles all the branch-on-variable-bit conditional branch
6988 sequences we might need to generate. It handles nullification of delay
6989 slots, varying length branches, negated branches and all combinations
6990 of the above. it returns the appropriate output template to emit the
6991 branch. */
6992
6993 const char *
6994 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
6995 int which)
6996 {
6997 static char buf[100];
6998 bool useskip;
6999 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7000 int length = get_attr_length (insn);
7001 int xdelay;
7002
7003 /* A conditional branch to the following instruction (e.g. the delay slot) is
7004 asking for a disaster. I do not think this can happen as this pattern
7005 is only used when optimizing; jump optimization should eliminate the
7006 jump. But be prepared just in case. */
7007
7008 if (branch_to_delay_slot_p (insn))
7009 return "nop";
7010
7011 /* If this is a long branch with its delay slot unfilled, set `nullify'
7012 as it can nullify the delay slot and save a nop. */
7013 if (length == 8 && dbr_sequence_length () == 0)
7014 nullify = 1;
7015
7016 /* If this is a short forward conditional branch which did not get
7017 its delay slot filled, the delay slot can still be nullified. */
7018 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7019 nullify = forward_branch_p (insn);
7020
7021 /* A forward branch over a single nullified insn can be done with a
7022 extrs instruction. This avoids a single cycle penalty due to
7023 mis-predicted branch if we fall through (branch not taken). */
7024 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7025
7026 switch (length)
7027 {
7028
7029 /* All short conditional branches except backwards with an unfilled
7030 delay slot. */
7031 case 4:
7032 if (useskip)
7033 strcpy (buf, "{vextrs,|extrw,s,}");
7034 else
7035 strcpy (buf, "{bvb,|bb,}");
7036 if (useskip && GET_MODE (operands[0]) == DImode)
7037 strcpy (buf, "extrd,s,*");
7038 else if (GET_MODE (operands[0]) == DImode)
7039 strcpy (buf, "bb,*");
7040 if ((which == 0 && negated)
7041 || (which == 1 && ! negated))
7042 strcat (buf, ">=");
7043 else
7044 strcat (buf, "<");
7045 if (useskip)
7046 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7047 else if (nullify && negated)
7048 {
7049 if (branch_needs_nop_p (insn))
7050 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7051 else
7052 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7053 }
7054 else if (nullify && ! negated)
7055 {
7056 if (branch_needs_nop_p (insn))
7057 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7058 else
7059 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7060 }
7061 else if (! nullify && negated)
7062 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7063 else if (! nullify && ! negated)
7064 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7065 break;
7066
7067 /* All long conditionals. Note a short backward branch with an
7068 unfilled delay slot is treated just like a long backward branch
7069 with an unfilled delay slot. */
7070 case 8:
7071 /* Handle weird backwards branch with a filled delay slot
7072 which is nullified. */
7073 if (dbr_sequence_length () != 0
7074 && ! forward_branch_p (insn)
7075 && nullify)
7076 {
7077 strcpy (buf, "{bvb,|bb,}");
7078 if (GET_MODE (operands[0]) == DImode)
7079 strcat (buf, "*");
7080 if ((which == 0 && negated)
7081 || (which == 1 && ! negated))
7082 strcat (buf, "<");
7083 else
7084 strcat (buf, ">=");
7085 if (negated)
7086 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7087 else
7088 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7089 }
7090 /* Handle short backwards branch with an unfilled delay slot.
7091 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7092 taken and untaken branches. */
7093 else if (dbr_sequence_length () == 0
7094 && ! forward_branch_p (insn)
7095 && INSN_ADDRESSES_SET_P ()
7096 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7097 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7098 {
7099 strcpy (buf, "{bvb,|bb,}");
7100 if (GET_MODE (operands[0]) == DImode)
7101 strcat (buf, "*");
7102 if ((which == 0 && negated)
7103 || (which == 1 && ! negated))
7104 strcat (buf, ">=");
7105 else
7106 strcat (buf, "<");
7107 if (negated)
7108 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7109 else
7110 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7111 }
7112 else
7113 {
7114 strcpy (buf, "{vextrs,|extrw,s,}");
7115 if (GET_MODE (operands[0]) == DImode)
7116 strcpy (buf, "extrd,s,*");
7117 if ((which == 0 && negated)
7118 || (which == 1 && ! negated))
7119 strcat (buf, "<");
7120 else
7121 strcat (buf, ">=");
7122 if (nullify && negated)
7123 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7124 else if (nullify && ! negated)
7125 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7126 else if (negated)
7127 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7128 else
7129 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7130 }
7131 break;
7132
7133 default:
7134 /* The reversed conditional branch must branch over one additional
7135 instruction if the delay slot is filled and needs to be extracted
7136 by pa_output_lbranch. If the delay slot is empty or this is a
7137 nullified forward branch, the instruction after the reversed
7138 condition branch must be nullified. */
7139 if (dbr_sequence_length () == 0
7140 || (nullify && forward_branch_p (insn)))
7141 {
7142 nullify = 1;
7143 xdelay = 0;
7144 operands[4] = GEN_INT (length);
7145 }
7146 else
7147 {
7148 xdelay = 1;
7149 operands[4] = GEN_INT (length + 4);
7150 }
7151
7152 if (GET_MODE (operands[0]) == DImode)
7153 strcpy (buf, "bb,*");
7154 else
7155 strcpy (buf, "{bvb,|bb,}");
7156 if ((which == 0 && negated)
7157 || (which == 1 && !negated))
7158 strcat (buf, "<");
7159 else
7160 strcat (buf, ">=");
7161 if (nullify)
7162 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7163 else
7164 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7165 output_asm_insn (buf, operands);
7166 return pa_output_lbranch (negated ? operands[3] : operands[2],
7167 insn, xdelay);
7168 }
7169 return buf;
7170 }
7171
7172 /* Return the output template for emitting a dbra type insn.
7173
7174 Note it may perform some output operations on its own before
7175 returning the final output string. */
7176 const char *
7177 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7178 {
7179 int length = get_attr_length (insn);
7180
7181 /* A conditional branch to the following instruction (e.g. the delay slot) is
7182 asking for a disaster. Be prepared! */
7183
7184 if (branch_to_delay_slot_p (insn))
7185 {
7186 if (which_alternative == 0)
7187 return "ldo %1(%0),%0";
7188 else if (which_alternative == 1)
7189 {
7190 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7191 output_asm_insn ("ldw -16(%%r30),%4", operands);
7192 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7193 return "{fldws|fldw} -16(%%r30),%0";
7194 }
7195 else
7196 {
7197 output_asm_insn ("ldw %0,%4", operands);
7198 return "ldo %1(%4),%4\n\tstw %4,%0";
7199 }
7200 }
7201
7202 if (which_alternative == 0)
7203 {
7204 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7205 int xdelay;
7206
7207 /* If this is a long branch with its delay slot unfilled, set `nullify'
7208 as it can nullify the delay slot and save a nop. */
7209 if (length == 8 && dbr_sequence_length () == 0)
7210 nullify = 1;
7211
7212 /* If this is a short forward conditional branch which did not get
7213 its delay slot filled, the delay slot can still be nullified. */
7214 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7215 nullify = forward_branch_p (insn);
7216
7217 switch (length)
7218 {
7219 case 4:
7220 if (nullify)
7221 {
7222 if (branch_needs_nop_p (insn))
7223 return "addib,%C2,n %1,%0,%3%#";
7224 else
7225 return "addib,%C2,n %1,%0,%3";
7226 }
7227 else
7228 return "addib,%C2 %1,%0,%3";
7229
7230 case 8:
7231 /* Handle weird backwards branch with a fulled delay slot
7232 which is nullified. */
7233 if (dbr_sequence_length () != 0
7234 && ! forward_branch_p (insn)
7235 && nullify)
7236 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7237 /* Handle short backwards branch with an unfilled delay slot.
7238 Using a addb;nop rather than addi;bl saves 1 cycle for both
7239 taken and untaken branches. */
7240 else if (dbr_sequence_length () == 0
7241 && ! forward_branch_p (insn)
7242 && INSN_ADDRESSES_SET_P ()
7243 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7244 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7245 return "addib,%C2 %1,%0,%3%#";
7246
7247 /* Handle normal cases. */
7248 if (nullify)
7249 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7250 else
7251 return "addi,%N2 %1,%0,%0\n\tb %3";
7252
7253 default:
7254 /* The reversed conditional branch must branch over one additional
7255 instruction if the delay slot is filled and needs to be extracted
7256 by pa_output_lbranch. If the delay slot is empty or this is a
7257 nullified forward branch, the instruction after the reversed
7258 condition branch must be nullified. */
7259 if (dbr_sequence_length () == 0
7260 || (nullify && forward_branch_p (insn)))
7261 {
7262 nullify = 1;
7263 xdelay = 0;
7264 operands[4] = GEN_INT (length);
7265 }
7266 else
7267 {
7268 xdelay = 1;
7269 operands[4] = GEN_INT (length + 4);
7270 }
7271
7272 if (nullify)
7273 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7274 else
7275 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7276
7277 return pa_output_lbranch (operands[3], insn, xdelay);
7278 }
7279
7280 }
7281 /* Deal with gross reload from FP register case. */
7282 else if (which_alternative == 1)
7283 {
7284 /* Move loop counter from FP register to MEM then into a GR,
7285 increment the GR, store the GR into MEM, and finally reload
7286 the FP register from MEM from within the branch's delay slot. */
7287 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7288 operands);
7289 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7290 if (length == 24)
7291 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7292 else if (length == 28)
7293 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7294 else
7295 {
7296 operands[5] = GEN_INT (length - 16);
7297 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7298 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7299 return pa_output_lbranch (operands[3], insn, 0);
7300 }
7301 }
7302 /* Deal with gross reload from memory case. */
7303 else
7304 {
7305 /* Reload loop counter from memory, the store back to memory
7306 happens in the branch's delay slot. */
7307 output_asm_insn ("ldw %0,%4", operands);
7308 if (length == 12)
7309 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7310 else if (length == 16)
7311 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7312 else
7313 {
7314 operands[5] = GEN_INT (length - 4);
7315 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7316 return pa_output_lbranch (operands[3], insn, 0);
7317 }
7318 }
7319 }
7320
7321 /* Return the output template for emitting a movb type insn.
7322
7323 Note it may perform some output operations on its own before
7324 returning the final output string. */
7325 const char *
7326 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7327 int reverse_comparison)
7328 {
7329 int length = get_attr_length (insn);
7330
7331 /* A conditional branch to the following instruction (e.g. the delay slot) is
7332 asking for a disaster. Be prepared! */
7333
7334 if (branch_to_delay_slot_p (insn))
7335 {
7336 if (which_alternative == 0)
7337 return "copy %1,%0";
7338 else if (which_alternative == 1)
7339 {
7340 output_asm_insn ("stw %1,-16(%%r30)", operands);
7341 return "{fldws|fldw} -16(%%r30),%0";
7342 }
7343 else if (which_alternative == 2)
7344 return "stw %1,%0";
7345 else
7346 return "mtsar %r1";
7347 }
7348
7349 /* Support the second variant. */
7350 if (reverse_comparison)
7351 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7352
7353 if (which_alternative == 0)
7354 {
7355 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7356 int xdelay;
7357
7358 /* If this is a long branch with its delay slot unfilled, set `nullify'
7359 as it can nullify the delay slot and save a nop. */
7360 if (length == 8 && dbr_sequence_length () == 0)
7361 nullify = 1;
7362
7363 /* If this is a short forward conditional branch which did not get
7364 its delay slot filled, the delay slot can still be nullified. */
7365 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7366 nullify = forward_branch_p (insn);
7367
7368 switch (length)
7369 {
7370 case 4:
7371 if (nullify)
7372 {
7373 if (branch_needs_nop_p (insn))
7374 return "movb,%C2,n %1,%0,%3%#";
7375 else
7376 return "movb,%C2,n %1,%0,%3";
7377 }
7378 else
7379 return "movb,%C2 %1,%0,%3";
7380
7381 case 8:
7382 /* Handle weird backwards branch with a filled delay slot
7383 which is nullified. */
7384 if (dbr_sequence_length () != 0
7385 && ! forward_branch_p (insn)
7386 && nullify)
7387 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7388
7389 /* Handle short backwards branch with an unfilled delay slot.
7390 Using a movb;nop rather than or;bl saves 1 cycle for both
7391 taken and untaken branches. */
7392 else if (dbr_sequence_length () == 0
7393 && ! forward_branch_p (insn)
7394 && INSN_ADDRESSES_SET_P ()
7395 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7396 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7397 return "movb,%C2 %1,%0,%3%#";
7398 /* Handle normal cases. */
7399 if (nullify)
7400 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7401 else
7402 return "or,%N2 %1,%%r0,%0\n\tb %3";
7403
7404 default:
7405 /* The reversed conditional branch must branch over one additional
7406 instruction if the delay slot is filled and needs to be extracted
7407 by pa_output_lbranch. If the delay slot is empty or this is a
7408 nullified forward branch, the instruction after the reversed
7409 condition branch must be nullified. */
7410 if (dbr_sequence_length () == 0
7411 || (nullify && forward_branch_p (insn)))
7412 {
7413 nullify = 1;
7414 xdelay = 0;
7415 operands[4] = GEN_INT (length);
7416 }
7417 else
7418 {
7419 xdelay = 1;
7420 operands[4] = GEN_INT (length + 4);
7421 }
7422
7423 if (nullify)
7424 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7425 else
7426 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7427
7428 return pa_output_lbranch (operands[3], insn, xdelay);
7429 }
7430 }
7431 /* Deal with gross reload for FP destination register case. */
7432 else if (which_alternative == 1)
7433 {
7434 /* Move source register to MEM, perform the branch test, then
7435 finally load the FP register from MEM from within the branch's
7436 delay slot. */
7437 output_asm_insn ("stw %1,-16(%%r30)", operands);
7438 if (length == 12)
7439 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7440 else if (length == 16)
7441 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7442 else
7443 {
7444 operands[4] = GEN_INT (length - 4);
7445 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7446 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7447 return pa_output_lbranch (operands[3], insn, 0);
7448 }
7449 }
7450 /* Deal with gross reload from memory case. */
7451 else if (which_alternative == 2)
7452 {
7453 /* Reload loop counter from memory, the store back to memory
7454 happens in the branch's delay slot. */
7455 if (length == 8)
7456 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7457 else if (length == 12)
7458 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7459 else
7460 {
7461 operands[4] = GEN_INT (length);
7462 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7463 operands);
7464 return pa_output_lbranch (operands[3], insn, 0);
7465 }
7466 }
7467 /* Handle SAR as a destination. */
7468 else
7469 {
7470 if (length == 8)
7471 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7472 else if (length == 12)
7473 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7474 else
7475 {
7476 operands[4] = GEN_INT (length);
7477 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7478 operands);
7479 return pa_output_lbranch (operands[3], insn, 0);
7480 }
7481 }
7482 }
7483
7484 /* Copy any FP arguments in INSN into integer registers. */
7485 static void
7486 copy_fp_args (rtx_insn *insn)
7487 {
7488 rtx link;
7489 rtx xoperands[2];
7490
7491 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7492 {
7493 int arg_mode, regno;
7494 rtx use = XEXP (link, 0);
7495
7496 if (! (GET_CODE (use) == USE
7497 && GET_CODE (XEXP (use, 0)) == REG
7498 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7499 continue;
7500
7501 arg_mode = GET_MODE (XEXP (use, 0));
7502 regno = REGNO (XEXP (use, 0));
7503
7504 /* Is it a floating point register? */
7505 if (regno >= 32 && regno <= 39)
7506 {
7507 /* Copy the FP register into an integer register via memory. */
7508 if (arg_mode == SFmode)
7509 {
7510 xoperands[0] = XEXP (use, 0);
7511 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7512 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7513 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7514 }
7515 else
7516 {
7517 xoperands[0] = XEXP (use, 0);
7518 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7519 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7520 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7521 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7522 }
7523 }
7524 }
7525 }
7526
7527 /* Compute length of the FP argument copy sequence for INSN. */
7528 static int
7529 length_fp_args (rtx_insn *insn)
7530 {
7531 int length = 0;
7532 rtx link;
7533
7534 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7535 {
7536 int arg_mode, regno;
7537 rtx use = XEXP (link, 0);
7538
7539 if (! (GET_CODE (use) == USE
7540 && GET_CODE (XEXP (use, 0)) == REG
7541 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7542 continue;
7543
7544 arg_mode = GET_MODE (XEXP (use, 0));
7545 regno = REGNO (XEXP (use, 0));
7546
7547 /* Is it a floating point register? */
7548 if (regno >= 32 && regno <= 39)
7549 {
7550 if (arg_mode == SFmode)
7551 length += 8;
7552 else
7553 length += 12;
7554 }
7555 }
7556
7557 return length;
7558 }
7559
7560 /* Return the attribute length for the millicode call instruction INSN.
7561 The length must match the code generated by pa_output_millicode_call.
7562 We include the delay slot in the returned length as it is better to
7563 over estimate the length than to under estimate it. */
7564
7565 int
7566 pa_attr_length_millicode_call (rtx_insn *insn)
7567 {
7568 unsigned long distance = -1;
7569 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7570
7571 if (INSN_ADDRESSES_SET_P ())
7572 {
7573 distance = (total + insn_current_reference_address (insn));
7574 if (distance < total)
7575 distance = -1;
7576 }
7577
7578 if (TARGET_64BIT)
7579 {
7580 if (!TARGET_LONG_CALLS && distance < 7600000)
7581 return 8;
7582
7583 return 20;
7584 }
7585 else if (TARGET_PORTABLE_RUNTIME)
7586 return 24;
7587 else
7588 {
7589 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7590 return 8;
7591
7592 if (!flag_pic)
7593 return 12;
7594
7595 return 24;
7596 }
7597 }
7598
7599 /* INSN is a function call.
7600
7601 CALL_DEST is the routine we are calling. */
7602
7603 const char *
7604 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7605 {
7606 int attr_length = get_attr_length (insn);
7607 int seq_length = dbr_sequence_length ();
7608 rtx xoperands[3];
7609
7610 xoperands[0] = call_dest;
7611 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7612
7613 /* Handle the common case where we are sure that the branch will
7614 reach the beginning of the $CODE$ subspace. The within reach
7615 form of the $$sh_func_adrs call has a length of 28. Because it
7616 has an attribute type of sh_func_adrs, it never has a nonzero
7617 sequence length (i.e., the delay slot is never filled). */
7618 if (!TARGET_LONG_CALLS
7619 && (attr_length == 8
7620 || (attr_length == 28
7621 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7622 {
7623 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7624 }
7625 else
7626 {
7627 if (TARGET_64BIT)
7628 {
7629 /* It might seem that one insn could be saved by accessing
7630 the millicode function using the linkage table. However,
7631 this doesn't work in shared libraries and other dynamically
7632 loaded objects. Using a pc-relative sequence also avoids
7633 problems related to the implicit use of the gp register. */
7634 output_asm_insn ("b,l .+8,%%r1", xoperands);
7635
7636 if (TARGET_GAS)
7637 {
7638 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7639 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7640 }
7641 else
7642 {
7643 xoperands[1] = gen_label_rtx ();
7644 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7645 targetm.asm_out.internal_label (asm_out_file, "L",
7646 CODE_LABEL_NUMBER (xoperands[1]));
7647 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7648 }
7649
7650 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7651 }
7652 else if (TARGET_PORTABLE_RUNTIME)
7653 {
7654 /* Pure portable runtime doesn't allow be/ble; we also don't
7655 have PIC support in the assembler/linker, so this sequence
7656 is needed. */
7657
7658 /* Get the address of our target into %r1. */
7659 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7660 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7661
7662 /* Get our return address into %r31. */
7663 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7664 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7665
7666 /* Jump to our target address in %r1. */
7667 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7668 }
7669 else if (!flag_pic)
7670 {
7671 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7672 if (TARGET_PA_20)
7673 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7674 else
7675 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7676 }
7677 else
7678 {
7679 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7680 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7681
7682 if (TARGET_SOM || !TARGET_GAS)
7683 {
7684 /* The HP assembler can generate relocations for the
7685 difference of two symbols. GAS can do this for a
7686 millicode symbol but not an arbitrary external
7687 symbol when generating SOM output. */
7688 xoperands[1] = gen_label_rtx ();
7689 targetm.asm_out.internal_label (asm_out_file, "L",
7690 CODE_LABEL_NUMBER (xoperands[1]));
7691 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7692 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7693 }
7694 else
7695 {
7696 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7697 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7698 xoperands);
7699 }
7700
7701 /* Jump to our target address in %r1. */
7702 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7703 }
7704 }
7705
7706 if (seq_length == 0)
7707 output_asm_insn ("nop", xoperands);
7708
7709 return "";
7710 }
7711
7712 /* Return the attribute length of the call instruction INSN. The SIBCALL
7713 flag indicates whether INSN is a regular call or a sibling call. The
7714 length returned must be longer than the code actually generated by
7715 pa_output_call. Since branch shortening is done before delay branch
7716 sequencing, there is no way to determine whether or not the delay
7717 slot will be filled during branch shortening. Even when the delay
7718 slot is filled, we may have to add a nop if the delay slot contains
7719 a branch that can't reach its target. Thus, we always have to include
7720 the delay slot in the length estimate. This used to be done in
7721 pa_adjust_insn_length but we do it here now as some sequences always
7722 fill the delay slot and we can save four bytes in the estimate for
7723 these sequences. */
7724
7725 int
7726 pa_attr_length_call (rtx_insn *insn, int sibcall)
7727 {
7728 int local_call;
7729 rtx call, call_dest;
7730 tree call_decl;
7731 int length = 0;
7732 rtx pat = PATTERN (insn);
7733 unsigned long distance = -1;
7734
7735 gcc_assert (CALL_P (insn));
7736
7737 if (INSN_ADDRESSES_SET_P ())
7738 {
7739 unsigned long total;
7740
7741 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7742 distance = (total + insn_current_reference_address (insn));
7743 if (distance < total)
7744 distance = -1;
7745 }
7746
7747 gcc_assert (GET_CODE (pat) == PARALLEL);
7748
7749 /* Get the call rtx. */
7750 call = XVECEXP (pat, 0, 0);
7751 if (GET_CODE (call) == SET)
7752 call = SET_SRC (call);
7753
7754 gcc_assert (GET_CODE (call) == CALL);
7755
7756 /* Determine if this is a local call. */
7757 call_dest = XEXP (XEXP (call, 0), 0);
7758 call_decl = SYMBOL_REF_DECL (call_dest);
7759 local_call = call_decl && targetm.binds_local_p (call_decl);
7760
7761 /* pc-relative branch. */
7762 if (!TARGET_LONG_CALLS
7763 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7764 || distance < MAX_PCREL17F_OFFSET))
7765 length += 8;
7766
7767 /* 64-bit plabel sequence. */
7768 else if (TARGET_64BIT && !local_call)
7769 length += sibcall ? 28 : 24;
7770
7771 /* non-pic long absolute branch sequence. */
7772 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7773 length += 12;
7774
7775 /* long pc-relative branch sequence. */
7776 else if (TARGET_LONG_PIC_SDIFF_CALL
7777 || (TARGET_GAS && !TARGET_SOM
7778 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7779 {
7780 length += 20;
7781
7782 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7783 length += 8;
7784 }
7785
7786 /* 32-bit plabel sequence. */
7787 else
7788 {
7789 length += 32;
7790
7791 if (TARGET_SOM)
7792 length += length_fp_args (insn);
7793
7794 if (flag_pic)
7795 length += 4;
7796
7797 if (!TARGET_PA_20)
7798 {
7799 if (!sibcall)
7800 length += 8;
7801
7802 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7803 length += 8;
7804 }
7805 }
7806
7807 return length;
7808 }
7809
7810 /* INSN is a function call.
7811
7812 CALL_DEST is the routine we are calling. */
7813
7814 const char *
7815 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7816 {
7817 int seq_length = dbr_sequence_length ();
7818 tree call_decl = SYMBOL_REF_DECL (call_dest);
7819 int local_call = call_decl && targetm.binds_local_p (call_decl);
7820 rtx xoperands[2];
7821
7822 xoperands[0] = call_dest;
7823
7824 /* Handle the common case where we're sure that the branch will reach
7825 the beginning of the "$CODE$" subspace. This is the beginning of
7826 the current function if we are in a named section. */
7827 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7828 {
7829 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7830 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7831 }
7832 else
7833 {
7834 if (TARGET_64BIT && !local_call)
7835 {
7836 /* ??? As far as I can tell, the HP linker doesn't support the
7837 long pc-relative sequence described in the 64-bit runtime
7838 architecture. So, we use a slightly longer indirect call. */
7839 xoperands[0] = pa_get_deferred_plabel (call_dest);
7840 xoperands[1] = gen_label_rtx ();
7841
7842 /* If this isn't a sibcall, we put the load of %r27 into the
7843 delay slot. We can't do this in a sibcall as we don't
7844 have a second call-clobbered scratch register available.
7845 We don't need to do anything when generating fast indirect
7846 calls. */
7847 if (seq_length != 0 && !sibcall)
7848 {
7849 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7850 optimize, 0, NULL);
7851
7852 /* Now delete the delay insn. */
7853 SET_INSN_DELETED (NEXT_INSN (insn));
7854 seq_length = 0;
7855 }
7856
7857 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7858 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7859 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7860
7861 if (sibcall)
7862 {
7863 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7864 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7865 output_asm_insn ("bve (%%r1)", xoperands);
7866 }
7867 else
7868 {
7869 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7870 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7871 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7872 seq_length = 1;
7873 }
7874 }
7875 else
7876 {
7877 int indirect_call = 0;
7878
7879 /* Emit a long call. There are several different sequences
7880 of increasing length and complexity. In most cases,
7881 they don't allow an instruction in the delay slot. */
7882 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7883 && !TARGET_LONG_PIC_SDIFF_CALL
7884 && !(TARGET_GAS && !TARGET_SOM
7885 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7886 && !TARGET_64BIT)
7887 indirect_call = 1;
7888
7889 if (seq_length != 0
7890 && !sibcall
7891 && (!TARGET_PA_20
7892 || indirect_call
7893 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7894 {
7895 /* A non-jump insn in the delay slot. By definition we can
7896 emit this insn before the call (and in fact before argument
7897 relocating. */
7898 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7899 NULL);
7900
7901 /* Now delete the delay insn. */
7902 SET_INSN_DELETED (NEXT_INSN (insn));
7903 seq_length = 0;
7904 }
7905
7906 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7907 {
7908 /* This is the best sequence for making long calls in
7909 non-pic code. Unfortunately, GNU ld doesn't provide
7910 the stub needed for external calls, and GAS's support
7911 for this with the SOM linker is buggy. It is safe
7912 to use this for local calls. */
7913 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7914 if (sibcall)
7915 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7916 else
7917 {
7918 if (TARGET_PA_20)
7919 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7920 xoperands);
7921 else
7922 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7923
7924 output_asm_insn ("copy %%r31,%%r2", xoperands);
7925 seq_length = 1;
7926 }
7927 }
7928 else
7929 {
7930 if (TARGET_LONG_PIC_SDIFF_CALL)
7931 {
7932 /* The HP assembler and linker can handle relocations
7933 for the difference of two symbols. The HP assembler
7934 recognizes the sequence as a pc-relative call and
7935 the linker provides stubs when needed. */
7936 xoperands[1] = gen_label_rtx ();
7937 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7938 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7939 targetm.asm_out.internal_label (asm_out_file, "L",
7940 CODE_LABEL_NUMBER (xoperands[1]));
7941 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7942 }
7943 else if (TARGET_GAS && !TARGET_SOM
7944 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7945 {
7946 /* GAS currently can't generate the relocations that
7947 are needed for the SOM linker under HP-UX using this
7948 sequence. The GNU linker doesn't generate the stubs
7949 that are needed for external calls on TARGET_ELF32
7950 with this sequence. For now, we have to use a
7951 longer plabel sequence when using GAS. */
7952 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7953 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7954 xoperands);
7955 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7956 xoperands);
7957 }
7958 else
7959 {
7960 /* Emit a long plabel-based call sequence. This is
7961 essentially an inline implementation of $$dyncall.
7962 We don't actually try to call $$dyncall as this is
7963 as difficult as calling the function itself. */
7964 xoperands[0] = pa_get_deferred_plabel (call_dest);
7965 xoperands[1] = gen_label_rtx ();
7966
7967 /* Since the call is indirect, FP arguments in registers
7968 need to be copied to the general registers. Then, the
7969 argument relocation stub will copy them back. */
7970 if (TARGET_SOM)
7971 copy_fp_args (insn);
7972
7973 if (flag_pic)
7974 {
7975 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7976 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7977 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7978 }
7979 else
7980 {
7981 output_asm_insn ("addil LR'%0-$global$,%%r27",
7982 xoperands);
7983 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7984 xoperands);
7985 }
7986
7987 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7988 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7989 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7990 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7991
7992 if (!sibcall && !TARGET_PA_20)
7993 {
7994 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7995 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7996 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7997 else
7998 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7999 }
8000 }
8001
8002 if (TARGET_PA_20)
8003 {
8004 if (sibcall)
8005 output_asm_insn ("bve (%%r1)", xoperands);
8006 else
8007 {
8008 if (indirect_call)
8009 {
8010 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8011 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8012 seq_length = 1;
8013 }
8014 else
8015 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8016 }
8017 }
8018 else
8019 {
8020 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8021 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8022 xoperands);
8023
8024 if (sibcall)
8025 {
8026 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8027 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8028 else
8029 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8030 }
8031 else
8032 {
8033 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8034 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8035 else
8036 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8037
8038 if (indirect_call)
8039 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8040 else
8041 output_asm_insn ("copy %%r31,%%r2", xoperands);
8042 seq_length = 1;
8043 }
8044 }
8045 }
8046 }
8047 }
8048
8049 if (seq_length == 0)
8050 output_asm_insn ("nop", xoperands);
8051
8052 return "";
8053 }
8054
8055 /* Return the attribute length of the indirect call instruction INSN.
8056 The length must match the code generated by output_indirect call.
8057 The returned length includes the delay slot. Currently, the delay
8058 slot of an indirect call sequence is not exposed and it is used by
8059 the sequence itself. */
8060
8061 int
8062 pa_attr_length_indirect_call (rtx_insn *insn)
8063 {
8064 unsigned long distance = -1;
8065 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8066
8067 if (INSN_ADDRESSES_SET_P ())
8068 {
8069 distance = (total + insn_current_reference_address (insn));
8070 if (distance < total)
8071 distance = -1;
8072 }
8073
8074 if (TARGET_64BIT)
8075 return 12;
8076
8077 if (TARGET_FAST_INDIRECT_CALLS
8078 || (!TARGET_LONG_CALLS
8079 && !TARGET_PORTABLE_RUNTIME
8080 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8081 || distance < MAX_PCREL17F_OFFSET)))
8082 return 8;
8083
8084 if (flag_pic)
8085 return 20;
8086
8087 if (TARGET_PORTABLE_RUNTIME)
8088 return 16;
8089
8090 /* Out of reach, can use ble. */
8091 return 12;
8092 }
8093
8094 const char *
8095 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8096 {
8097 rtx xoperands[1];
8098
8099 if (TARGET_64BIT)
8100 {
8101 xoperands[0] = call_dest;
8102 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8103 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8104 return "";
8105 }
8106
8107 /* First the special case for kernels, level 0 systems, etc. */
8108 if (TARGET_FAST_INDIRECT_CALLS)
8109 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8110
8111 /* Now the normal case -- we can reach $$dyncall directly or
8112 we're sure that we can get there via a long-branch stub.
8113
8114 No need to check target flags as the length uniquely identifies
8115 the remaining cases. */
8116 if (pa_attr_length_indirect_call (insn) == 8)
8117 {
8118 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8119 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8120 variant of the B,L instruction can't be used on the SOM target. */
8121 if (TARGET_PA_20 && !TARGET_SOM)
8122 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8123 else
8124 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8125 }
8126
8127 /* Long millicode call, but we are not generating PIC or portable runtime
8128 code. */
8129 if (pa_attr_length_indirect_call (insn) == 12)
8130 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8131
8132 /* Long millicode call for portable runtime. */
8133 if (pa_attr_length_indirect_call (insn) == 16)
8134 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8135
8136 /* We need a long PIC call to $$dyncall. */
8137 xoperands[0] = NULL_RTX;
8138 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8139 if (TARGET_SOM || !TARGET_GAS)
8140 {
8141 xoperands[0] = gen_label_rtx ();
8142 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
8143 targetm.asm_out.internal_label (asm_out_file, "L",
8144 CODE_LABEL_NUMBER (xoperands[0]));
8145 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8146 }
8147 else
8148 {
8149 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
8150 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8151 xoperands);
8152 }
8153 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8154 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
8155 return "";
8156 }
8157
8158 /* In HPUX 8.0's shared library scheme, special relocations are needed
8159 for function labels if they might be passed to a function
8160 in a shared library (because shared libraries don't live in code
8161 space), and special magic is needed to construct their address. */
8162
8163 void
8164 pa_encode_label (rtx sym)
8165 {
8166 const char *str = XSTR (sym, 0);
8167 int len = strlen (str) + 1;
8168 char *newstr, *p;
8169
8170 p = newstr = XALLOCAVEC (char, len + 1);
8171 *p++ = '@';
8172 strcpy (p, str);
8173
8174 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8175 }
8176
8177 static void
8178 pa_encode_section_info (tree decl, rtx rtl, int first)
8179 {
8180 int old_referenced = 0;
8181
8182 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8183 old_referenced
8184 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8185
8186 default_encode_section_info (decl, rtl, first);
8187
8188 if (first && TEXT_SPACE_P (decl))
8189 {
8190 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8191 if (TREE_CODE (decl) == FUNCTION_DECL)
8192 pa_encode_label (XEXP (rtl, 0));
8193 }
8194 else if (old_referenced)
8195 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8196 }
8197
8198 /* This is sort of inverse to pa_encode_section_info. */
8199
8200 static const char *
8201 pa_strip_name_encoding (const char *str)
8202 {
8203 str += (*str == '@');
8204 str += (*str == '*');
8205 return str;
8206 }
8207
8208 /* Returns 1 if OP is a function label involved in a simple addition
8209 with a constant. Used to keep certain patterns from matching
8210 during instruction combination. */
8211 int
8212 pa_is_function_label_plus_const (rtx op)
8213 {
8214 /* Strip off any CONST. */
8215 if (GET_CODE (op) == CONST)
8216 op = XEXP (op, 0);
8217
8218 return (GET_CODE (op) == PLUS
8219 && function_label_operand (XEXP (op, 0), VOIDmode)
8220 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8221 }
8222
8223 /* Output assembly code for a thunk to FUNCTION. */
8224
8225 static void
8226 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8227 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8228 tree function)
8229 {
8230 static unsigned int current_thunk_number;
8231 int val_14 = VAL_14_BITS_P (delta);
8232 unsigned int old_last_address = last_address, nbytes = 0;
8233 char label[16];
8234 rtx xoperands[4];
8235
8236 xoperands[0] = XEXP (DECL_RTL (function), 0);
8237 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8238 xoperands[2] = GEN_INT (delta);
8239
8240 final_start_function (emit_barrier (), file, 1);
8241
8242 /* Output the thunk. We know that the function is in the same
8243 translation unit (i.e., the same space) as the thunk, and that
8244 thunks are output after their method. Thus, we don't need an
8245 external branch to reach the function. With SOM and GAS,
8246 functions and thunks are effectively in different sections.
8247 Thus, we can always use a IA-relative branch and the linker
8248 will add a long branch stub if necessary.
8249
8250 However, we have to be careful when generating PIC code on the
8251 SOM port to ensure that the sequence does not transfer to an
8252 import stub for the target function as this could clobber the
8253 return value saved at SP-24. This would also apply to the
8254 32-bit linux port if the multi-space model is implemented. */
8255 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8256 && !(flag_pic && TREE_PUBLIC (function))
8257 && (TARGET_GAS || last_address < 262132))
8258 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8259 && ((targetm_common.have_named_sections
8260 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8261 /* The GNU 64-bit linker has rather poor stub management.
8262 So, we use a long branch from thunks that aren't in
8263 the same section as the target function. */
8264 && ((!TARGET_64BIT
8265 && (DECL_SECTION_NAME (thunk_fndecl)
8266 != DECL_SECTION_NAME (function)))
8267 || ((DECL_SECTION_NAME (thunk_fndecl)
8268 == DECL_SECTION_NAME (function))
8269 && last_address < 262132)))
8270 /* In this case, we need to be able to reach the start of
8271 the stub table even though the function is likely closer
8272 and can be jumped to directly. */
8273 || (targetm_common.have_named_sections
8274 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8275 && DECL_SECTION_NAME (function) == NULL
8276 && total_code_bytes < MAX_PCREL17F_OFFSET)
8277 /* Likewise. */
8278 || (!targetm_common.have_named_sections
8279 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8280 {
8281 if (!val_14)
8282 output_asm_insn ("addil L'%2,%%r26", xoperands);
8283
8284 output_asm_insn ("b %0", xoperands);
8285
8286 if (val_14)
8287 {
8288 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8289 nbytes += 8;
8290 }
8291 else
8292 {
8293 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8294 nbytes += 12;
8295 }
8296 }
8297 else if (TARGET_64BIT)
8298 {
8299 /* We only have one call-clobbered scratch register, so we can't
8300 make use of the delay slot if delta doesn't fit in 14 bits. */
8301 if (!val_14)
8302 {
8303 output_asm_insn ("addil L'%2,%%r26", xoperands);
8304 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8305 }
8306
8307 output_asm_insn ("b,l .+8,%%r1", xoperands);
8308
8309 if (TARGET_GAS)
8310 {
8311 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8312 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8313 }
8314 else
8315 {
8316 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8317 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8318 }
8319
8320 if (val_14)
8321 {
8322 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8323 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8324 nbytes += 20;
8325 }
8326 else
8327 {
8328 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8329 nbytes += 24;
8330 }
8331 }
8332 else if (TARGET_PORTABLE_RUNTIME)
8333 {
8334 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8335 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8336
8337 if (!val_14)
8338 output_asm_insn ("addil L'%2,%%r26", xoperands);
8339
8340 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8341
8342 if (val_14)
8343 {
8344 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8345 nbytes += 16;
8346 }
8347 else
8348 {
8349 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8350 nbytes += 20;
8351 }
8352 }
8353 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8354 {
8355 /* The function is accessible from outside this module. The only
8356 way to avoid an import stub between the thunk and function is to
8357 call the function directly with an indirect sequence similar to
8358 that used by $$dyncall. This is possible because $$dyncall acts
8359 as the import stub in an indirect call. */
8360 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8361 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8362 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8363 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8364 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8365 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8366 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8367 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8368 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8369
8370 if (!val_14)
8371 {
8372 output_asm_insn ("addil L'%2,%%r26", xoperands);
8373 nbytes += 4;
8374 }
8375
8376 if (TARGET_PA_20)
8377 {
8378 output_asm_insn ("bve (%%r22)", xoperands);
8379 nbytes += 36;
8380 }
8381 else if (TARGET_NO_SPACE_REGS)
8382 {
8383 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8384 nbytes += 36;
8385 }
8386 else
8387 {
8388 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8389 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8390 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8391 nbytes += 44;
8392 }
8393
8394 if (val_14)
8395 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8396 else
8397 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8398 }
8399 else if (flag_pic)
8400 {
8401 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8402
8403 if (TARGET_SOM || !TARGET_GAS)
8404 {
8405 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8406 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8407 }
8408 else
8409 {
8410 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8411 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8412 }
8413
8414 if (!val_14)
8415 output_asm_insn ("addil L'%2,%%r26", xoperands);
8416
8417 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8418
8419 if (val_14)
8420 {
8421 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8422 nbytes += 20;
8423 }
8424 else
8425 {
8426 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8427 nbytes += 24;
8428 }
8429 }
8430 else
8431 {
8432 if (!val_14)
8433 output_asm_insn ("addil L'%2,%%r26", xoperands);
8434
8435 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8436 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8437
8438 if (val_14)
8439 {
8440 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8441 nbytes += 12;
8442 }
8443 else
8444 {
8445 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8446 nbytes += 16;
8447 }
8448 }
8449
8450 final_end_function ();
8451
8452 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8453 {
8454 switch_to_section (data_section);
8455 output_asm_insn (".align 4", xoperands);
8456 ASM_OUTPUT_LABEL (file, label);
8457 output_asm_insn (".word P'%0", xoperands);
8458 }
8459
8460 current_thunk_number++;
8461 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8462 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8463 last_address += nbytes;
8464 if (old_last_address > last_address)
8465 last_address = UINT_MAX;
8466 update_total_code_bytes (nbytes);
8467 }
8468
8469 /* Only direct calls to static functions are allowed to be sibling (tail)
8470 call optimized.
8471
8472 This restriction is necessary because some linker generated stubs will
8473 store return pointers into rp' in some cases which might clobber a
8474 live value already in rp'.
8475
8476 In a sibcall the current function and the target function share stack
8477 space. Thus if the path to the current function and the path to the
8478 target function save a value in rp', they save the value into the
8479 same stack slot, which has undesirable consequences.
8480
8481 Because of the deferred binding nature of shared libraries any function
8482 with external scope could be in a different load module and thus require
8483 rp' to be saved when calling that function. So sibcall optimizations
8484 can only be safe for static function.
8485
8486 Note that GCC never needs return value relocations, so we don't have to
8487 worry about static calls with return value relocations (which require
8488 saving rp').
8489
8490 It is safe to perform a sibcall optimization when the target function
8491 will never return. */
8492 static bool
8493 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8494 {
8495 if (TARGET_PORTABLE_RUNTIME)
8496 return false;
8497
8498 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8499 single subspace mode and the call is not indirect. As far as I know,
8500 there is no operating system support for the multiple subspace mode.
8501 It might be possible to support indirect calls if we didn't use
8502 $$dyncall (see the indirect sequence generated in pa_output_call). */
8503 if (TARGET_ELF32)
8504 return (decl != NULL_TREE);
8505
8506 /* Sibcalls are not ok because the arg pointer register is not a fixed
8507 register. This prevents the sibcall optimization from occurring. In
8508 addition, there are problems with stub placement using GNU ld. This
8509 is because a normal sibcall branch uses a 17-bit relocation while
8510 a regular call branch uses a 22-bit relocation. As a result, more
8511 care needs to be taken in the placement of long-branch stubs. */
8512 if (TARGET_64BIT)
8513 return false;
8514
8515 /* Sibcalls are only ok within a translation unit. */
8516 return (decl && !TREE_PUBLIC (decl));
8517 }
8518
8519 /* ??? Addition is not commutative on the PA due to the weird implicit
8520 space register selection rules for memory addresses. Therefore, we
8521 don't consider a + b == b + a, as this might be inside a MEM. */
8522 static bool
8523 pa_commutative_p (const_rtx x, int outer_code)
8524 {
8525 return (COMMUTATIVE_P (x)
8526 && (TARGET_NO_SPACE_REGS
8527 || (outer_code != UNKNOWN && outer_code != MEM)
8528 || GET_CODE (x) != PLUS));
8529 }
8530
8531 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8532 use in fmpyadd instructions. */
8533 int
8534 pa_fmpyaddoperands (rtx *operands)
8535 {
8536 machine_mode mode = GET_MODE (operands[0]);
8537
8538 /* Must be a floating point mode. */
8539 if (mode != SFmode && mode != DFmode)
8540 return 0;
8541
8542 /* All modes must be the same. */
8543 if (! (mode == GET_MODE (operands[1])
8544 && mode == GET_MODE (operands[2])
8545 && mode == GET_MODE (operands[3])
8546 && mode == GET_MODE (operands[4])
8547 && mode == GET_MODE (operands[5])))
8548 return 0;
8549
8550 /* All operands must be registers. */
8551 if (! (GET_CODE (operands[1]) == REG
8552 && GET_CODE (operands[2]) == REG
8553 && GET_CODE (operands[3]) == REG
8554 && GET_CODE (operands[4]) == REG
8555 && GET_CODE (operands[5]) == REG))
8556 return 0;
8557
8558 /* Only 2 real operands to the addition. One of the input operands must
8559 be the same as the output operand. */
8560 if (! rtx_equal_p (operands[3], operands[4])
8561 && ! rtx_equal_p (operands[3], operands[5]))
8562 return 0;
8563
8564 /* Inout operand of add cannot conflict with any operands from multiply. */
8565 if (rtx_equal_p (operands[3], operands[0])
8566 || rtx_equal_p (operands[3], operands[1])
8567 || rtx_equal_p (operands[3], operands[2]))
8568 return 0;
8569
8570 /* multiply cannot feed into addition operands. */
8571 if (rtx_equal_p (operands[4], operands[0])
8572 || rtx_equal_p (operands[5], operands[0]))
8573 return 0;
8574
8575 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8576 if (mode == SFmode
8577 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8578 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8579 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8580 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8581 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8582 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8583 return 0;
8584
8585 /* Passed. Operands are suitable for fmpyadd. */
8586 return 1;
8587 }
8588
8589 #if !defined(USE_COLLECT2)
8590 static void
8591 pa_asm_out_constructor (rtx symbol, int priority)
8592 {
8593 if (!function_label_operand (symbol, VOIDmode))
8594 pa_encode_label (symbol);
8595
8596 #ifdef CTORS_SECTION_ASM_OP
8597 default_ctor_section_asm_out_constructor (symbol, priority);
8598 #else
8599 # ifdef TARGET_ASM_NAMED_SECTION
8600 default_named_section_asm_out_constructor (symbol, priority);
8601 # else
8602 default_stabs_asm_out_constructor (symbol, priority);
8603 # endif
8604 #endif
8605 }
8606
8607 static void
8608 pa_asm_out_destructor (rtx symbol, int priority)
8609 {
8610 if (!function_label_operand (symbol, VOIDmode))
8611 pa_encode_label (symbol);
8612
8613 #ifdef DTORS_SECTION_ASM_OP
8614 default_dtor_section_asm_out_destructor (symbol, priority);
8615 #else
8616 # ifdef TARGET_ASM_NAMED_SECTION
8617 default_named_section_asm_out_destructor (symbol, priority);
8618 # else
8619 default_stabs_asm_out_destructor (symbol, priority);
8620 # endif
8621 #endif
8622 }
8623 #endif
8624
8625 /* This function places uninitialized global data in the bss section.
8626 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8627 function on the SOM port to prevent uninitialized global data from
8628 being placed in the data section. */
8629
8630 void
8631 pa_asm_output_aligned_bss (FILE *stream,
8632 const char *name,
8633 unsigned HOST_WIDE_INT size,
8634 unsigned int align)
8635 {
8636 switch_to_section (bss_section);
8637 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8638
8639 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8640 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8641 #endif
8642
8643 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8644 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8645 #endif
8646
8647 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8648 ASM_OUTPUT_LABEL (stream, name);
8649 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8650 }
8651
8652 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8653 that doesn't allow the alignment of global common storage to be directly
8654 specified. The SOM linker aligns common storage based on the rounded
8655 value of the NUM_BYTES parameter in the .comm directive. It's not
8656 possible to use the .align directive as it doesn't affect the alignment
8657 of the label associated with a .comm directive. */
8658
8659 void
8660 pa_asm_output_aligned_common (FILE *stream,
8661 const char *name,
8662 unsigned HOST_WIDE_INT size,
8663 unsigned int align)
8664 {
8665 unsigned int max_common_align;
8666
8667 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8668 if (align > max_common_align)
8669 {
8670 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8671 "for global common data. Using %u",
8672 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8673 align = max_common_align;
8674 }
8675
8676 switch_to_section (bss_section);
8677
8678 assemble_name (stream, name);
8679 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8680 MAX (size, align / BITS_PER_UNIT));
8681 }
8682
8683 /* We can't use .comm for local common storage as the SOM linker effectively
8684 treats the symbol as universal and uses the same storage for local symbols
8685 with the same name in different object files. The .block directive
8686 reserves an uninitialized block of storage. However, it's not common
8687 storage. Fortunately, GCC never requests common storage with the same
8688 name in any given translation unit. */
8689
8690 void
8691 pa_asm_output_aligned_local (FILE *stream,
8692 const char *name,
8693 unsigned HOST_WIDE_INT size,
8694 unsigned int align)
8695 {
8696 switch_to_section (bss_section);
8697 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8698
8699 #ifdef LOCAL_ASM_OP
8700 fprintf (stream, "%s", LOCAL_ASM_OP);
8701 assemble_name (stream, name);
8702 fprintf (stream, "\n");
8703 #endif
8704
8705 ASM_OUTPUT_LABEL (stream, name);
8706 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8707 }
8708
8709 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8710 use in fmpysub instructions. */
8711 int
8712 pa_fmpysuboperands (rtx *operands)
8713 {
8714 machine_mode mode = GET_MODE (operands[0]);
8715
8716 /* Must be a floating point mode. */
8717 if (mode != SFmode && mode != DFmode)
8718 return 0;
8719
8720 /* All modes must be the same. */
8721 if (! (mode == GET_MODE (operands[1])
8722 && mode == GET_MODE (operands[2])
8723 && mode == GET_MODE (operands[3])
8724 && mode == GET_MODE (operands[4])
8725 && mode == GET_MODE (operands[5])))
8726 return 0;
8727
8728 /* All operands must be registers. */
8729 if (! (GET_CODE (operands[1]) == REG
8730 && GET_CODE (operands[2]) == REG
8731 && GET_CODE (operands[3]) == REG
8732 && GET_CODE (operands[4]) == REG
8733 && GET_CODE (operands[5]) == REG))
8734 return 0;
8735
8736 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8737 operation, so operands[4] must be the same as operand[3]. */
8738 if (! rtx_equal_p (operands[3], operands[4]))
8739 return 0;
8740
8741 /* multiply cannot feed into subtraction. */
8742 if (rtx_equal_p (operands[5], operands[0]))
8743 return 0;
8744
8745 /* Inout operand of sub cannot conflict with any operands from multiply. */
8746 if (rtx_equal_p (operands[3], operands[0])
8747 || rtx_equal_p (operands[3], operands[1])
8748 || rtx_equal_p (operands[3], operands[2]))
8749 return 0;
8750
8751 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8752 if (mode == SFmode
8753 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8754 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8755 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8756 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8757 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8758 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8759 return 0;
8760
8761 /* Passed. Operands are suitable for fmpysub. */
8762 return 1;
8763 }
8764
8765 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8766 constants for a MULT embedded inside a memory address. */
8767 int
8768 pa_mem_shadd_constant_p (int val)
8769 {
8770 if (val == 2 || val == 4 || val == 8)
8771 return 1;
8772 else
8773 return 0;
8774 }
8775
8776 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8777 constants for shadd instructions. */
8778 int
8779 pa_shadd_constant_p (int val)
8780 {
8781 if (val == 1 || val == 2 || val == 3)
8782 return 1;
8783 else
8784 return 0;
8785 }
8786
8787 /* Return TRUE if INSN branches forward. */
8788
8789 static bool
8790 forward_branch_p (rtx_insn *insn)
8791 {
8792 rtx lab = JUMP_LABEL (insn);
8793
8794 /* The INSN must have a jump label. */
8795 gcc_assert (lab != NULL_RTX);
8796
8797 if (INSN_ADDRESSES_SET_P ())
8798 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8799
8800 while (insn)
8801 {
8802 if (insn == lab)
8803 return true;
8804 else
8805 insn = NEXT_INSN (insn);
8806 }
8807
8808 return false;
8809 }
8810
8811 /* Output an unconditional move and branch insn. */
8812
8813 const char *
8814 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8815 {
8816 int length = get_attr_length (insn);
8817
8818 /* These are the cases in which we win. */
8819 if (length == 4)
8820 return "mov%I1b,tr %1,%0,%2";
8821
8822 /* None of the following cases win, but they don't lose either. */
8823 if (length == 8)
8824 {
8825 if (dbr_sequence_length () == 0)
8826 {
8827 /* Nothing in the delay slot, fake it by putting the combined
8828 insn (the copy or add) in the delay slot of a bl. */
8829 if (GET_CODE (operands[1]) == CONST_INT)
8830 return "b %2\n\tldi %1,%0";
8831 else
8832 return "b %2\n\tcopy %1,%0";
8833 }
8834 else
8835 {
8836 /* Something in the delay slot, but we've got a long branch. */
8837 if (GET_CODE (operands[1]) == CONST_INT)
8838 return "ldi %1,%0\n\tb %2";
8839 else
8840 return "copy %1,%0\n\tb %2";
8841 }
8842 }
8843
8844 if (GET_CODE (operands[1]) == CONST_INT)
8845 output_asm_insn ("ldi %1,%0", operands);
8846 else
8847 output_asm_insn ("copy %1,%0", operands);
8848 return pa_output_lbranch (operands[2], insn, 1);
8849 }
8850
8851 /* Output an unconditional add and branch insn. */
8852
8853 const char *
8854 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8855 {
8856 int length = get_attr_length (insn);
8857
8858 /* To make life easy we want operand0 to be the shared input/output
8859 operand and operand1 to be the readonly operand. */
8860 if (operands[0] == operands[1])
8861 operands[1] = operands[2];
8862
8863 /* These are the cases in which we win. */
8864 if (length == 4)
8865 return "add%I1b,tr %1,%0,%3";
8866
8867 /* None of the following cases win, but they don't lose either. */
8868 if (length == 8)
8869 {
8870 if (dbr_sequence_length () == 0)
8871 /* Nothing in the delay slot, fake it by putting the combined
8872 insn (the copy or add) in the delay slot of a bl. */
8873 return "b %3\n\tadd%I1 %1,%0,%0";
8874 else
8875 /* Something in the delay slot, but we've got a long branch. */
8876 return "add%I1 %1,%0,%0\n\tb %3";
8877 }
8878
8879 output_asm_insn ("add%I1 %1,%0,%0", operands);
8880 return pa_output_lbranch (operands[3], insn, 1);
8881 }
8882
8883 /* We use this hook to perform a PA specific optimization which is difficult
8884 to do in earlier passes. */
8885
8886 static void
8887 pa_reorg (void)
8888 {
8889 remove_useless_addtr_insns (1);
8890
8891 if (pa_cpu < PROCESSOR_8000)
8892 pa_combine_instructions ();
8893 }
8894
8895 /* The PA has a number of odd instructions which can perform multiple
8896 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8897 it may be profitable to combine two instructions into one instruction
8898 with two outputs. It's not profitable PA2.0 machines because the
8899 two outputs would take two slots in the reorder buffers.
8900
8901 This routine finds instructions which can be combined and combines
8902 them. We only support some of the potential combinations, and we
8903 only try common ways to find suitable instructions.
8904
8905 * addb can add two registers or a register and a small integer
8906 and jump to a nearby (+-8k) location. Normally the jump to the
8907 nearby location is conditional on the result of the add, but by
8908 using the "true" condition we can make the jump unconditional.
8909 Thus addb can perform two independent operations in one insn.
8910
8911 * movb is similar to addb in that it can perform a reg->reg
8912 or small immediate->reg copy and jump to a nearby (+-8k location).
8913
8914 * fmpyadd and fmpysub can perform a FP multiply and either an
8915 FP add or FP sub if the operands of the multiply and add/sub are
8916 independent (there are other minor restrictions). Note both
8917 the fmpy and fadd/fsub can in theory move to better spots according
8918 to data dependencies, but for now we require the fmpy stay at a
8919 fixed location.
8920
8921 * Many of the memory operations can perform pre & post updates
8922 of index registers. GCC's pre/post increment/decrement addressing
8923 is far too simple to take advantage of all the possibilities. This
8924 pass may not be suitable since those insns may not be independent.
8925
8926 * comclr can compare two ints or an int and a register, nullify
8927 the following instruction and zero some other register. This
8928 is more difficult to use as it's harder to find an insn which
8929 will generate a comclr than finding something like an unconditional
8930 branch. (conditional moves & long branches create comclr insns).
8931
8932 * Most arithmetic operations can conditionally skip the next
8933 instruction. They can be viewed as "perform this operation
8934 and conditionally jump to this nearby location" (where nearby
8935 is an insns away). These are difficult to use due to the
8936 branch length restrictions. */
8937
8938 static void
8939 pa_combine_instructions (void)
8940 {
8941 rtx_insn *anchor;
8942
8943 /* This can get expensive since the basic algorithm is on the
8944 order of O(n^2) (or worse). Only do it for -O2 or higher
8945 levels of optimization. */
8946 if (optimize < 2)
8947 return;
8948
8949 /* Walk down the list of insns looking for "anchor" insns which
8950 may be combined with "floating" insns. As the name implies,
8951 "anchor" instructions don't move, while "floating" insns may
8952 move around. */
8953 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8954 rtx_insn *new_rtx = make_insn_raw (par);
8955
8956 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8957 {
8958 enum attr_pa_combine_type anchor_attr;
8959 enum attr_pa_combine_type floater_attr;
8960
8961 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8962 Also ignore any special USE insns. */
8963 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
8964 || GET_CODE (PATTERN (anchor)) == USE
8965 || GET_CODE (PATTERN (anchor)) == CLOBBER)
8966 continue;
8967
8968 anchor_attr = get_attr_pa_combine_type (anchor);
8969 /* See if anchor is an insn suitable for combination. */
8970 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8971 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8972 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8973 && ! forward_branch_p (anchor)))
8974 {
8975 rtx_insn *floater;
8976
8977 for (floater = PREV_INSN (anchor);
8978 floater;
8979 floater = PREV_INSN (floater))
8980 {
8981 if (NOTE_P (floater)
8982 || (NONJUMP_INSN_P (floater)
8983 && (GET_CODE (PATTERN (floater)) == USE
8984 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8985 continue;
8986
8987 /* Anything except a regular INSN will stop our search. */
8988 if (! NONJUMP_INSN_P (floater))
8989 {
8990 floater = NULL;
8991 break;
8992 }
8993
8994 /* See if FLOATER is suitable for combination with the
8995 anchor. */
8996 floater_attr = get_attr_pa_combine_type (floater);
8997 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8998 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8999 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9000 && floater_attr == PA_COMBINE_TYPE_FMPY))
9001 {
9002 /* If ANCHOR and FLOATER can be combined, then we're
9003 done with this pass. */
9004 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9005 SET_DEST (PATTERN (floater)),
9006 XEXP (SET_SRC (PATTERN (floater)), 0),
9007 XEXP (SET_SRC (PATTERN (floater)), 1)))
9008 break;
9009 }
9010
9011 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9012 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9013 {
9014 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9015 {
9016 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9017 SET_DEST (PATTERN (floater)),
9018 XEXP (SET_SRC (PATTERN (floater)), 0),
9019 XEXP (SET_SRC (PATTERN (floater)), 1)))
9020 break;
9021 }
9022 else
9023 {
9024 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9025 SET_DEST (PATTERN (floater)),
9026 SET_SRC (PATTERN (floater)),
9027 SET_SRC (PATTERN (floater))))
9028 break;
9029 }
9030 }
9031 }
9032
9033 /* If we didn't find anything on the backwards scan try forwards. */
9034 if (!floater
9035 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9036 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9037 {
9038 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9039 {
9040 if (NOTE_P (floater)
9041 || (NONJUMP_INSN_P (floater)
9042 && (GET_CODE (PATTERN (floater)) == USE
9043 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9044
9045 continue;
9046
9047 /* Anything except a regular INSN will stop our search. */
9048 if (! NONJUMP_INSN_P (floater))
9049 {
9050 floater = NULL;
9051 break;
9052 }
9053
9054 /* See if FLOATER is suitable for combination with the
9055 anchor. */
9056 floater_attr = get_attr_pa_combine_type (floater);
9057 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9058 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9059 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9060 && floater_attr == PA_COMBINE_TYPE_FMPY))
9061 {
9062 /* If ANCHOR and FLOATER can be combined, then we're
9063 done with this pass. */
9064 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9065 SET_DEST (PATTERN (floater)),
9066 XEXP (SET_SRC (PATTERN (floater)),
9067 0),
9068 XEXP (SET_SRC (PATTERN (floater)),
9069 1)))
9070 break;
9071 }
9072 }
9073 }
9074
9075 /* FLOATER will be nonzero if we found a suitable floating
9076 insn for combination with ANCHOR. */
9077 if (floater
9078 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9079 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9080 {
9081 /* Emit the new instruction and delete the old anchor. */
9082 emit_insn_before (gen_rtx_PARALLEL
9083 (VOIDmode,
9084 gen_rtvec (2, PATTERN (anchor),
9085 PATTERN (floater))),
9086 anchor);
9087
9088 SET_INSN_DELETED (anchor);
9089
9090 /* Emit a special USE insn for FLOATER, then delete
9091 the floating insn. */
9092 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9093 delete_insn (floater);
9094
9095 continue;
9096 }
9097 else if (floater
9098 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9099 {
9100 rtx temp;
9101 /* Emit the new_jump instruction and delete the old anchor. */
9102 temp
9103 = emit_jump_insn_before (gen_rtx_PARALLEL
9104 (VOIDmode,
9105 gen_rtvec (2, PATTERN (anchor),
9106 PATTERN (floater))),
9107 anchor);
9108
9109 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9110 SET_INSN_DELETED (anchor);
9111
9112 /* Emit a special USE insn for FLOATER, then delete
9113 the floating insn. */
9114 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9115 delete_insn (floater);
9116 continue;
9117 }
9118 }
9119 }
9120 }
9121
9122 static int
9123 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9124 int reversed, rtx dest,
9125 rtx src1, rtx src2)
9126 {
9127 int insn_code_number;
9128 rtx_insn *start, *end;
9129
9130 /* Create a PARALLEL with the patterns of ANCHOR and
9131 FLOATER, try to recognize it, then test constraints
9132 for the resulting pattern.
9133
9134 If the pattern doesn't match or the constraints
9135 aren't met keep searching for a suitable floater
9136 insn. */
9137 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9138 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9139 INSN_CODE (new_rtx) = -1;
9140 insn_code_number = recog_memoized (new_rtx);
9141 basic_block bb = BLOCK_FOR_INSN (anchor);
9142 if (insn_code_number < 0
9143 || (extract_insn (new_rtx),
9144 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9145 return 0;
9146
9147 if (reversed)
9148 {
9149 start = anchor;
9150 end = floater;
9151 }
9152 else
9153 {
9154 start = floater;
9155 end = anchor;
9156 }
9157
9158 /* There's up to three operands to consider. One
9159 output and two inputs.
9160
9161 The output must not be used between FLOATER & ANCHOR
9162 exclusive. The inputs must not be set between
9163 FLOATER and ANCHOR exclusive. */
9164
9165 if (reg_used_between_p (dest, start, end))
9166 return 0;
9167
9168 if (reg_set_between_p (src1, start, end))
9169 return 0;
9170
9171 if (reg_set_between_p (src2, start, end))
9172 return 0;
9173
9174 /* If we get here, then everything is good. */
9175 return 1;
9176 }
9177
9178 /* Return nonzero if references for INSN are delayed.
9179
9180 Millicode insns are actually function calls with some special
9181 constraints on arguments and register usage.
9182
9183 Millicode calls always expect their arguments in the integer argument
9184 registers, and always return their result in %r29 (ret1). They
9185 are expected to clobber their arguments, %r1, %r29, and the return
9186 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9187
9188 This function tells reorg that the references to arguments and
9189 millicode calls do not appear to happen until after the millicode call.
9190 This allows reorg to put insns which set the argument registers into the
9191 delay slot of the millicode call -- thus they act more like traditional
9192 CALL_INSNs.
9193
9194 Note we cannot consider side effects of the insn to be delayed because
9195 the branch and link insn will clobber the return pointer. If we happened
9196 to use the return pointer in the delay slot of the call, then we lose.
9197
9198 get_attr_type will try to recognize the given insn, so make sure to
9199 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9200 in particular. */
9201 int
9202 pa_insn_refs_are_delayed (rtx_insn *insn)
9203 {
9204 return ((NONJUMP_INSN_P (insn)
9205 && GET_CODE (PATTERN (insn)) != SEQUENCE
9206 && GET_CODE (PATTERN (insn)) != USE
9207 && GET_CODE (PATTERN (insn)) != CLOBBER
9208 && get_attr_type (insn) == TYPE_MILLI));
9209 }
9210
9211 /* Promote the return value, but not the arguments. */
9212
9213 static machine_mode
9214 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9215 machine_mode mode,
9216 int *punsignedp ATTRIBUTE_UNUSED,
9217 const_tree fntype ATTRIBUTE_UNUSED,
9218 int for_return)
9219 {
9220 if (for_return == 0)
9221 return mode;
9222 return promote_mode (type, mode, punsignedp);
9223 }
9224
9225 /* On the HP-PA the value is found in register(s) 28(-29), unless
9226 the mode is SF or DF. Then the value is returned in fr4 (32).
9227
9228 This must perform the same promotions as PROMOTE_MODE, else promoting
9229 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9230
9231 Small structures must be returned in a PARALLEL on PA64 in order
9232 to match the HP Compiler ABI. */
9233
9234 static rtx
9235 pa_function_value (const_tree valtype,
9236 const_tree func ATTRIBUTE_UNUSED,
9237 bool outgoing ATTRIBUTE_UNUSED)
9238 {
9239 machine_mode valmode;
9240
9241 if (AGGREGATE_TYPE_P (valtype)
9242 || TREE_CODE (valtype) == COMPLEX_TYPE
9243 || TREE_CODE (valtype) == VECTOR_TYPE)
9244 {
9245 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9246
9247 /* Handle aggregates that fit exactly in a word or double word. */
9248 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9249 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9250
9251 if (TARGET_64BIT)
9252 {
9253 /* Aggregates with a size less than or equal to 128 bits are
9254 returned in GR 28(-29). They are left justified. The pad
9255 bits are undefined. Larger aggregates are returned in
9256 memory. */
9257 rtx loc[2];
9258 int i, offset = 0;
9259 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9260
9261 for (i = 0; i < ub; i++)
9262 {
9263 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9264 gen_rtx_REG (DImode, 28 + i),
9265 GEN_INT (offset));
9266 offset += 8;
9267 }
9268
9269 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9270 }
9271 else if (valsize > UNITS_PER_WORD)
9272 {
9273 /* Aggregates 5 to 8 bytes in size are returned in general
9274 registers r28-r29 in the same manner as other non
9275 floating-point objects. The data is right-justified and
9276 zero-extended to 64 bits. This is opposite to the normal
9277 justification used on big endian targets and requires
9278 special treatment. */
9279 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9280 gen_rtx_REG (DImode, 28), const0_rtx);
9281 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9282 }
9283 }
9284
9285 if ((INTEGRAL_TYPE_P (valtype)
9286 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9287 || POINTER_TYPE_P (valtype))
9288 valmode = word_mode;
9289 else
9290 valmode = TYPE_MODE (valtype);
9291
9292 if (TREE_CODE (valtype) == REAL_TYPE
9293 && !AGGREGATE_TYPE_P (valtype)
9294 && TYPE_MODE (valtype) != TFmode
9295 && !TARGET_SOFT_FLOAT)
9296 return gen_rtx_REG (valmode, 32);
9297
9298 return gen_rtx_REG (valmode, 28);
9299 }
9300
9301 /* Implement the TARGET_LIBCALL_VALUE hook. */
9302
9303 static rtx
9304 pa_libcall_value (machine_mode mode,
9305 const_rtx fun ATTRIBUTE_UNUSED)
9306 {
9307 if (! TARGET_SOFT_FLOAT
9308 && (mode == SFmode || mode == DFmode))
9309 return gen_rtx_REG (mode, 32);
9310 else
9311 return gen_rtx_REG (mode, 28);
9312 }
9313
9314 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9315
9316 static bool
9317 pa_function_value_regno_p (const unsigned int regno)
9318 {
9319 if (regno == 28
9320 || (! TARGET_SOFT_FLOAT && regno == 32))
9321 return true;
9322
9323 return false;
9324 }
9325
9326 /* Update the data in CUM to advance over an argument
9327 of mode MODE and data type TYPE.
9328 (TYPE is null for libcalls where that information may not be available.) */
9329
9330 static void
9331 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9332 const_tree type, bool named ATTRIBUTE_UNUSED)
9333 {
9334 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9335 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9336
9337 cum->nargs_prototype--;
9338 cum->words += (arg_size
9339 + ((cum->words & 01)
9340 && type != NULL_TREE
9341 && arg_size > 1));
9342 }
9343
9344 /* Return the location of a parameter that is passed in a register or NULL
9345 if the parameter has any component that is passed in memory.
9346
9347 This is new code and will be pushed to into the net sources after
9348 further testing.
9349
9350 ??? We might want to restructure this so that it looks more like other
9351 ports. */
9352 static rtx
9353 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9354 const_tree type, bool named ATTRIBUTE_UNUSED)
9355 {
9356 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9357 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9358 int alignment = 0;
9359 int arg_size;
9360 int fpr_reg_base;
9361 int gpr_reg_base;
9362 rtx retval;
9363
9364 if (mode == VOIDmode)
9365 return NULL_RTX;
9366
9367 arg_size = FUNCTION_ARG_SIZE (mode, type);
9368
9369 /* If this arg would be passed partially or totally on the stack, then
9370 this routine should return zero. pa_arg_partial_bytes will
9371 handle arguments which are split between regs and stack slots if
9372 the ABI mandates split arguments. */
9373 if (!TARGET_64BIT)
9374 {
9375 /* The 32-bit ABI does not split arguments. */
9376 if (cum->words + arg_size > max_arg_words)
9377 return NULL_RTX;
9378 }
9379 else
9380 {
9381 if (arg_size > 1)
9382 alignment = cum->words & 1;
9383 if (cum->words + alignment >= max_arg_words)
9384 return NULL_RTX;
9385 }
9386
9387 /* The 32bit ABIs and the 64bit ABIs are rather different,
9388 particularly in their handling of FP registers. We might
9389 be able to cleverly share code between them, but I'm not
9390 going to bother in the hope that splitting them up results
9391 in code that is more easily understood. */
9392
9393 if (TARGET_64BIT)
9394 {
9395 /* Advance the base registers to their current locations.
9396
9397 Remember, gprs grow towards smaller register numbers while
9398 fprs grow to higher register numbers. Also remember that
9399 although FP regs are 32-bit addressable, we pretend that
9400 the registers are 64-bits wide. */
9401 gpr_reg_base = 26 - cum->words;
9402 fpr_reg_base = 32 + cum->words;
9403
9404 /* Arguments wider than one word and small aggregates need special
9405 treatment. */
9406 if (arg_size > 1
9407 || mode == BLKmode
9408 || (type && (AGGREGATE_TYPE_P (type)
9409 || TREE_CODE (type) == COMPLEX_TYPE
9410 || TREE_CODE (type) == VECTOR_TYPE)))
9411 {
9412 /* Double-extended precision (80-bit), quad-precision (128-bit)
9413 and aggregates including complex numbers are aligned on
9414 128-bit boundaries. The first eight 64-bit argument slots
9415 are associated one-to-one, with general registers r26
9416 through r19, and also with floating-point registers fr4
9417 through fr11. Arguments larger than one word are always
9418 passed in general registers.
9419
9420 Using a PARALLEL with a word mode register results in left
9421 justified data on a big-endian target. */
9422
9423 rtx loc[8];
9424 int i, offset = 0, ub = arg_size;
9425
9426 /* Align the base register. */
9427 gpr_reg_base -= alignment;
9428
9429 ub = MIN (ub, max_arg_words - cum->words - alignment);
9430 for (i = 0; i < ub; i++)
9431 {
9432 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9433 gen_rtx_REG (DImode, gpr_reg_base),
9434 GEN_INT (offset));
9435 gpr_reg_base -= 1;
9436 offset += 8;
9437 }
9438
9439 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9440 }
9441 }
9442 else
9443 {
9444 /* If the argument is larger than a word, then we know precisely
9445 which registers we must use. */
9446 if (arg_size > 1)
9447 {
9448 if (cum->words)
9449 {
9450 gpr_reg_base = 23;
9451 fpr_reg_base = 38;
9452 }
9453 else
9454 {
9455 gpr_reg_base = 25;
9456 fpr_reg_base = 34;
9457 }
9458
9459 /* Structures 5 to 8 bytes in size are passed in the general
9460 registers in the same manner as other non floating-point
9461 objects. The data is right-justified and zero-extended
9462 to 64 bits. This is opposite to the normal justification
9463 used on big endian targets and requires special treatment.
9464 We now define BLOCK_REG_PADDING to pad these objects.
9465 Aggregates, complex and vector types are passed in the same
9466 manner as structures. */
9467 if (mode == BLKmode
9468 || (type && (AGGREGATE_TYPE_P (type)
9469 || TREE_CODE (type) == COMPLEX_TYPE
9470 || TREE_CODE (type) == VECTOR_TYPE)))
9471 {
9472 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9473 gen_rtx_REG (DImode, gpr_reg_base),
9474 const0_rtx);
9475 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9476 }
9477 }
9478 else
9479 {
9480 /* We have a single word (32 bits). A simple computation
9481 will get us the register #s we need. */
9482 gpr_reg_base = 26 - cum->words;
9483 fpr_reg_base = 32 + 2 * cum->words;
9484 }
9485 }
9486
9487 /* Determine if the argument needs to be passed in both general and
9488 floating point registers. */
9489 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9490 /* If we are doing soft-float with portable runtime, then there
9491 is no need to worry about FP regs. */
9492 && !TARGET_SOFT_FLOAT
9493 /* The parameter must be some kind of scalar float, else we just
9494 pass it in integer registers. */
9495 && GET_MODE_CLASS (mode) == MODE_FLOAT
9496 /* The target function must not have a prototype. */
9497 && cum->nargs_prototype <= 0
9498 /* libcalls do not need to pass items in both FP and general
9499 registers. */
9500 && type != NULL_TREE
9501 /* All this hair applies to "outgoing" args only. This includes
9502 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9503 && !cum->incoming)
9504 /* Also pass outgoing floating arguments in both registers in indirect
9505 calls with the 32 bit ABI and the HP assembler since there is no
9506 way to the specify argument locations in static functions. */
9507 || (!TARGET_64BIT
9508 && !TARGET_GAS
9509 && !cum->incoming
9510 && cum->indirect
9511 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9512 {
9513 retval
9514 = gen_rtx_PARALLEL
9515 (mode,
9516 gen_rtvec (2,
9517 gen_rtx_EXPR_LIST (VOIDmode,
9518 gen_rtx_REG (mode, fpr_reg_base),
9519 const0_rtx),
9520 gen_rtx_EXPR_LIST (VOIDmode,
9521 gen_rtx_REG (mode, gpr_reg_base),
9522 const0_rtx)));
9523 }
9524 else
9525 {
9526 /* See if we should pass this parameter in a general register. */
9527 if (TARGET_SOFT_FLOAT
9528 /* Indirect calls in the normal 32bit ABI require all arguments
9529 to be passed in general registers. */
9530 || (!TARGET_PORTABLE_RUNTIME
9531 && !TARGET_64BIT
9532 && !TARGET_ELF32
9533 && cum->indirect)
9534 /* If the parameter is not a scalar floating-point parameter,
9535 then it belongs in GPRs. */
9536 || GET_MODE_CLASS (mode) != MODE_FLOAT
9537 /* Structure with single SFmode field belongs in GPR. */
9538 || (type && AGGREGATE_TYPE_P (type)))
9539 retval = gen_rtx_REG (mode, gpr_reg_base);
9540 else
9541 retval = gen_rtx_REG (mode, fpr_reg_base);
9542 }
9543 return retval;
9544 }
9545
9546 /* Arguments larger than one word are double word aligned. */
9547
9548 static unsigned int
9549 pa_function_arg_boundary (machine_mode mode, const_tree type)
9550 {
9551 bool singleword = (type
9552 ? (integer_zerop (TYPE_SIZE (type))
9553 || !TREE_CONSTANT (TYPE_SIZE (type))
9554 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9555 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9556
9557 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9558 }
9559
9560 /* If this arg would be passed totally in registers or totally on the stack,
9561 then this routine should return zero. */
9562
9563 static int
9564 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9565 tree type, bool named ATTRIBUTE_UNUSED)
9566 {
9567 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9568 unsigned int max_arg_words = 8;
9569 unsigned int offset = 0;
9570
9571 if (!TARGET_64BIT)
9572 return 0;
9573
9574 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9575 offset = 1;
9576
9577 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9578 /* Arg fits fully into registers. */
9579 return 0;
9580 else if (cum->words + offset >= max_arg_words)
9581 /* Arg fully on the stack. */
9582 return 0;
9583 else
9584 /* Arg is split. */
9585 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9586 }
9587
9588
9589 /* A get_unnamed_section callback for switching to the text section.
9590
9591 This function is only used with SOM. Because we don't support
9592 named subspaces, we can only create a new subspace or switch back
9593 to the default text subspace. */
9594
9595 static void
9596 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9597 {
9598 gcc_assert (TARGET_SOM);
9599 if (TARGET_GAS)
9600 {
9601 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9602 {
9603 /* We only want to emit a .nsubspa directive once at the
9604 start of the function. */
9605 cfun->machine->in_nsubspa = 1;
9606
9607 /* Create a new subspace for the text. This provides
9608 better stub placement and one-only functions. */
9609 if (cfun->decl
9610 && DECL_ONE_ONLY (cfun->decl)
9611 && !DECL_WEAK (cfun->decl))
9612 {
9613 output_section_asm_op ("\t.SPACE $TEXT$\n"
9614 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9615 "ACCESS=44,SORT=24,COMDAT");
9616 return;
9617 }
9618 }
9619 else
9620 {
9621 /* There isn't a current function or the body of the current
9622 function has been completed. So, we are changing to the
9623 text section to output debugging information. Thus, we
9624 need to forget that we are in the text section so that
9625 varasm.c will call us when text_section is selected again. */
9626 gcc_assert (!cfun || !cfun->machine
9627 || cfun->machine->in_nsubspa == 2);
9628 in_section = NULL;
9629 }
9630 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9631 return;
9632 }
9633 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9634 }
9635
9636 /* A get_unnamed_section callback for switching to comdat data
9637 sections. This function is only used with SOM. */
9638
9639 static void
9640 som_output_comdat_data_section_asm_op (const void *data)
9641 {
9642 in_section = NULL;
9643 output_section_asm_op (data);
9644 }
9645
9646 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9647
9648 static void
9649 pa_som_asm_init_sections (void)
9650 {
9651 text_section
9652 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9653
9654 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9655 is not being generated. */
9656 som_readonly_data_section
9657 = get_unnamed_section (0, output_section_asm_op,
9658 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9659
9660 /* When secondary definitions are not supported, SOM makes readonly
9661 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9662 the comdat flag. */
9663 som_one_only_readonly_data_section
9664 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9665 "\t.SPACE $TEXT$\n"
9666 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9667 "ACCESS=0x2c,SORT=16,COMDAT");
9668
9669
9670 /* When secondary definitions are not supported, SOM makes data one-only
9671 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9672 som_one_only_data_section
9673 = get_unnamed_section (SECTION_WRITE,
9674 som_output_comdat_data_section_asm_op,
9675 "\t.SPACE $PRIVATE$\n"
9676 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9677 "ACCESS=31,SORT=24,COMDAT");
9678
9679 if (flag_tm)
9680 som_tm_clone_table_section
9681 = get_unnamed_section (0, output_section_asm_op,
9682 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9683
9684 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9685 which reference data within the $TEXT$ space (for example constant
9686 strings in the $LIT$ subspace).
9687
9688 The assemblers (GAS and HP as) both have problems with handling
9689 the difference of two symbols which is the other correct way to
9690 reference constant data during PIC code generation.
9691
9692 So, there's no way to reference constant data which is in the
9693 $TEXT$ space during PIC generation. Instead place all constant
9694 data into the $PRIVATE$ subspace (this reduces sharing, but it
9695 works correctly). */
9696 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9697
9698 /* We must not have a reference to an external symbol defined in a
9699 shared library in a readonly section, else the SOM linker will
9700 complain.
9701
9702 So, we force exception information into the data section. */
9703 exception_section = data_section;
9704 }
9705
9706 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9707
9708 static section *
9709 pa_som_tm_clone_table_section (void)
9710 {
9711 return som_tm_clone_table_section;
9712 }
9713
9714 /* On hpux10, the linker will give an error if we have a reference
9715 in the read-only data section to a symbol defined in a shared
9716 library. Therefore, expressions that might require a reloc can
9717 not be placed in the read-only data section. */
9718
9719 static section *
9720 pa_select_section (tree exp, int reloc,
9721 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9722 {
9723 if (TREE_CODE (exp) == VAR_DECL
9724 && TREE_READONLY (exp)
9725 && !TREE_THIS_VOLATILE (exp)
9726 && DECL_INITIAL (exp)
9727 && (DECL_INITIAL (exp) == error_mark_node
9728 || TREE_CONSTANT (DECL_INITIAL (exp)))
9729 && !reloc)
9730 {
9731 if (TARGET_SOM
9732 && DECL_ONE_ONLY (exp)
9733 && !DECL_WEAK (exp))
9734 return som_one_only_readonly_data_section;
9735 else
9736 return readonly_data_section;
9737 }
9738 else if (CONSTANT_CLASS_P (exp) && !reloc)
9739 return readonly_data_section;
9740 else if (TARGET_SOM
9741 && TREE_CODE (exp) == VAR_DECL
9742 && DECL_ONE_ONLY (exp)
9743 && !DECL_WEAK (exp))
9744 return som_one_only_data_section;
9745 else
9746 return data_section;
9747 }
9748
9749 /* Implement pa_reloc_rw_mask. */
9750
9751 static int
9752 pa_reloc_rw_mask (void)
9753 {
9754 /* We force (const (plus (symbol) (const_int))) to memory when the
9755 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9756 handle this construct in read-only memory and we want to avoid
9757 this for ELF. So, we always force an RTX needing relocation to
9758 the data section. */
9759 return 3;
9760 }
9761
9762 static void
9763 pa_globalize_label (FILE *stream, const char *name)
9764 {
9765 /* We only handle DATA objects here, functions are globalized in
9766 ASM_DECLARE_FUNCTION_NAME. */
9767 if (! FUNCTION_NAME_P (name))
9768 {
9769 fputs ("\t.EXPORT ", stream);
9770 assemble_name (stream, name);
9771 fputs (",DATA\n", stream);
9772 }
9773 }
9774
9775 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9776
9777 static rtx
9778 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9779 int incoming ATTRIBUTE_UNUSED)
9780 {
9781 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9782 }
9783
9784 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9785
9786 bool
9787 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9788 {
9789 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9790 PA64 ABI says that objects larger than 128 bits are returned in memory.
9791 Note, int_size_in_bytes can return -1 if the size of the object is
9792 variable or larger than the maximum value that can be expressed as
9793 a HOST_WIDE_INT. It can also return zero for an empty type. The
9794 simplest way to handle variable and empty types is to pass them in
9795 memory. This avoids problems in defining the boundaries of argument
9796 slots, allocating registers, etc. */
9797 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9798 || int_size_in_bytes (type) <= 0);
9799 }
9800
9801 /* Structure to hold declaration and name of external symbols that are
9802 emitted by GCC. We generate a vector of these symbols and output them
9803 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9804 This avoids putting out names that are never really used. */
9805
9806 typedef struct GTY(()) extern_symbol
9807 {
9808 tree decl;
9809 const char *name;
9810 } extern_symbol;
9811
9812 /* Define gc'd vector type for extern_symbol. */
9813
9814 /* Vector of extern_symbol pointers. */
9815 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9816
9817 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9818 /* Mark DECL (name NAME) as an external reference (assembler output
9819 file FILE). This saves the names to output at the end of the file
9820 if actually referenced. */
9821
9822 void
9823 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9824 {
9825 gcc_assert (file == asm_out_file);
9826 extern_symbol p = {decl, name};
9827 vec_safe_push (extern_symbols, p);
9828 }
9829
9830 /* Output text required at the end of an assembler file.
9831 This includes deferred plabels and .import directives for
9832 all external symbols that were actually referenced. */
9833
9834 static void
9835 pa_hpux_file_end (void)
9836 {
9837 unsigned int i;
9838 extern_symbol *p;
9839
9840 if (!NO_DEFERRED_PROFILE_COUNTERS)
9841 output_deferred_profile_counters ();
9842
9843 output_deferred_plabels ();
9844
9845 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9846 {
9847 tree decl = p->decl;
9848
9849 if (!TREE_ASM_WRITTEN (decl)
9850 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9851 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9852 }
9853
9854 vec_free (extern_symbols);
9855 }
9856 #endif
9857
9858 /* Return true if a change from mode FROM to mode TO for a register
9859 in register class RCLASS is invalid. */
9860
9861 bool
9862 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9863 enum reg_class rclass)
9864 {
9865 if (from == to)
9866 return false;
9867
9868 /* Reject changes to/from complex and vector modes. */
9869 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9870 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9871 return true;
9872
9873 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9874 return false;
9875
9876 /* There is no way to load QImode or HImode values directly from
9877 memory. SImode loads to the FP registers are not zero extended.
9878 On the 64-bit target, this conflicts with the definition of
9879 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9880 with different sizes in the floating-point registers. */
9881 if (MAYBE_FP_REG_CLASS_P (rclass))
9882 return true;
9883
9884 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9885 in specific sets of registers. Thus, we cannot allow changing
9886 to a larger mode when it's larger than a word. */
9887 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9888 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9889 return true;
9890
9891 return false;
9892 }
9893
9894 /* Returns TRUE if it is a good idea to tie two pseudo registers
9895 when one has mode MODE1 and one has mode MODE2.
9896 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9897 for any hard reg, then this must be FALSE for correct output.
9898
9899 We should return FALSE for QImode and HImode because these modes
9900 are not ok in the floating-point registers. However, this prevents
9901 tieing these modes to SImode and DImode in the general registers.
9902 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9903 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9904 in the floating-point registers. */
9905
9906 bool
9907 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9908 {
9909 /* Don't tie modes in different classes. */
9910 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9911 return false;
9912
9913 return true;
9914 }
9915
9916 \f
9917 /* Length in units of the trampoline instruction code. */
9918
9919 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9920
9921
9922 /* Output assembler code for a block containing the constant parts
9923 of a trampoline, leaving space for the variable parts.\
9924
9925 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9926 and then branches to the specified routine.
9927
9928 This code template is copied from text segment to stack location
9929 and then patched with pa_trampoline_init to contain valid values,
9930 and then entered as a subroutine.
9931
9932 It is best to keep this as small as possible to avoid having to
9933 flush multiple lines in the cache. */
9934
9935 static void
9936 pa_asm_trampoline_template (FILE *f)
9937 {
9938 if (!TARGET_64BIT)
9939 {
9940 fputs ("\tldw 36(%r22),%r21\n", f);
9941 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9942 if (ASSEMBLER_DIALECT == 0)
9943 fputs ("\tdepi 0,31,2,%r21\n", f);
9944 else
9945 fputs ("\tdepwi 0,31,2,%r21\n", f);
9946 fputs ("\tldw 4(%r21),%r19\n", f);
9947 fputs ("\tldw 0(%r21),%r21\n", f);
9948 if (TARGET_PA_20)
9949 {
9950 fputs ("\tbve (%r21)\n", f);
9951 fputs ("\tldw 40(%r22),%r29\n", f);
9952 fputs ("\t.word 0\n", f);
9953 fputs ("\t.word 0\n", f);
9954 }
9955 else
9956 {
9957 fputs ("\tldsid (%r21),%r1\n", f);
9958 fputs ("\tmtsp %r1,%sr0\n", f);
9959 fputs ("\tbe 0(%sr0,%r21)\n", f);
9960 fputs ("\tldw 40(%r22),%r29\n", f);
9961 }
9962 fputs ("\t.word 0\n", f);
9963 fputs ("\t.word 0\n", f);
9964 fputs ("\t.word 0\n", f);
9965 fputs ("\t.word 0\n", f);
9966 }
9967 else
9968 {
9969 fputs ("\t.dword 0\n", f);
9970 fputs ("\t.dword 0\n", f);
9971 fputs ("\t.dword 0\n", f);
9972 fputs ("\t.dword 0\n", f);
9973 fputs ("\tmfia %r31\n", f);
9974 fputs ("\tldd 24(%r31),%r1\n", f);
9975 fputs ("\tldd 24(%r1),%r27\n", f);
9976 fputs ("\tldd 16(%r1),%r1\n", f);
9977 fputs ("\tbve (%r1)\n", f);
9978 fputs ("\tldd 32(%r31),%r31\n", f);
9979 fputs ("\t.dword 0 ; fptr\n", f);
9980 fputs ("\t.dword 0 ; static link\n", f);
9981 }
9982 }
9983
9984 /* Emit RTL insns to initialize the variable parts of a trampoline.
9985 FNADDR is an RTX for the address of the function's pure code.
9986 CXT is an RTX for the static chain value for the function.
9987
9988 Move the function address to the trampoline template at offset 36.
9989 Move the static chain value to trampoline template at offset 40.
9990 Move the trampoline address to trampoline template at offset 44.
9991 Move r19 to trampoline template at offset 48. The latter two
9992 words create a plabel for the indirect call to the trampoline.
9993
9994 A similar sequence is used for the 64-bit port but the plabel is
9995 at the beginning of the trampoline.
9996
9997 Finally, the cache entries for the trampoline code are flushed.
9998 This is necessary to ensure that the trampoline instruction sequence
9999 is written to memory prior to any attempts at prefetching the code
10000 sequence. */
10001
10002 static void
10003 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10004 {
10005 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10006 rtx start_addr = gen_reg_rtx (Pmode);
10007 rtx end_addr = gen_reg_rtx (Pmode);
10008 rtx line_length = gen_reg_rtx (Pmode);
10009 rtx r_tramp, tmp;
10010
10011 emit_block_move (m_tramp, assemble_trampoline_template (),
10012 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10013 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10014
10015 if (!TARGET_64BIT)
10016 {
10017 tmp = adjust_address (m_tramp, Pmode, 36);
10018 emit_move_insn (tmp, fnaddr);
10019 tmp = adjust_address (m_tramp, Pmode, 40);
10020 emit_move_insn (tmp, chain_value);
10021
10022 /* Create a fat pointer for the trampoline. */
10023 tmp = adjust_address (m_tramp, Pmode, 44);
10024 emit_move_insn (tmp, r_tramp);
10025 tmp = adjust_address (m_tramp, Pmode, 48);
10026 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10027
10028 /* fdc and fic only use registers for the address to flush,
10029 they do not accept integer displacements. We align the
10030 start and end addresses to the beginning of their respective
10031 cache lines to minimize the number of lines flushed. */
10032 emit_insn (gen_andsi3 (start_addr, r_tramp,
10033 GEN_INT (-MIN_CACHELINE_SIZE)));
10034 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10035 TRAMPOLINE_CODE_SIZE-1));
10036 emit_insn (gen_andsi3 (end_addr, tmp,
10037 GEN_INT (-MIN_CACHELINE_SIZE)));
10038 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10039 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10040 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10041 gen_reg_rtx (Pmode),
10042 gen_reg_rtx (Pmode)));
10043 }
10044 else
10045 {
10046 tmp = adjust_address (m_tramp, Pmode, 56);
10047 emit_move_insn (tmp, fnaddr);
10048 tmp = adjust_address (m_tramp, Pmode, 64);
10049 emit_move_insn (tmp, chain_value);
10050
10051 /* Create a fat pointer for the trampoline. */
10052 tmp = adjust_address (m_tramp, Pmode, 16);
10053 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10054 r_tramp, 32)));
10055 tmp = adjust_address (m_tramp, Pmode, 24);
10056 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10057
10058 /* fdc and fic only use registers for the address to flush,
10059 they do not accept integer displacements. We align the
10060 start and end addresses to the beginning of their respective
10061 cache lines to minimize the number of lines flushed. */
10062 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10063 emit_insn (gen_anddi3 (start_addr, tmp,
10064 GEN_INT (-MIN_CACHELINE_SIZE)));
10065 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10066 TRAMPOLINE_CODE_SIZE - 1));
10067 emit_insn (gen_anddi3 (end_addr, tmp,
10068 GEN_INT (-MIN_CACHELINE_SIZE)));
10069 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10070 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10071 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10072 gen_reg_rtx (Pmode),
10073 gen_reg_rtx (Pmode)));
10074 }
10075
10076 #ifdef HAVE_ENABLE_EXECUTE_STACK
10077  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10078      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10079 #endif
10080 }
10081
10082 /* Perform any machine-specific adjustment in the address of the trampoline.
10083 ADDR contains the address that was passed to pa_trampoline_init.
10084 Adjust the trampoline address to point to the plabel at offset 44. */
10085
10086 static rtx
10087 pa_trampoline_adjust_address (rtx addr)
10088 {
10089 if (!TARGET_64BIT)
10090 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10091 return addr;
10092 }
10093
10094 static rtx
10095 pa_delegitimize_address (rtx orig_x)
10096 {
10097 rtx x = delegitimize_mem_from_attrs (orig_x);
10098
10099 if (GET_CODE (x) == LO_SUM
10100 && GET_CODE (XEXP (x, 1)) == UNSPEC
10101 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10102 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10103 return x;
10104 }
10105 \f
10106 static rtx
10107 pa_internal_arg_pointer (void)
10108 {
10109 /* The argument pointer and the hard frame pointer are the same in
10110 the 32-bit runtime, so we don't need a copy. */
10111 if (TARGET_64BIT)
10112 return copy_to_reg (virtual_incoming_args_rtx);
10113 else
10114 return virtual_incoming_args_rtx;
10115 }
10116
10117 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10118 Frame pointer elimination is automatically handled. */
10119
10120 static bool
10121 pa_can_eliminate (const int from, const int to)
10122 {
10123 /* The argument cannot be eliminated in the 64-bit runtime. */
10124 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10125 return false;
10126
10127 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10128 ? ! frame_pointer_needed
10129 : true);
10130 }
10131
10132 /* Define the offset between two registers, FROM to be eliminated and its
10133 replacement TO, at the start of a routine. */
10134 HOST_WIDE_INT
10135 pa_initial_elimination_offset (int from, int to)
10136 {
10137 HOST_WIDE_INT offset;
10138
10139 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10140 && to == STACK_POINTER_REGNUM)
10141 offset = -pa_compute_frame_size (get_frame_size (), 0);
10142 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10143 offset = 0;
10144 else
10145 gcc_unreachable ();
10146
10147 return offset;
10148 }
10149
10150 static void
10151 pa_conditional_register_usage (void)
10152 {
10153 int i;
10154
10155 if (!TARGET_64BIT && !TARGET_PA_11)
10156 {
10157 for (i = 56; i <= FP_REG_LAST; i++)
10158 fixed_regs[i] = call_used_regs[i] = 1;
10159 for (i = 33; i < 56; i += 2)
10160 fixed_regs[i] = call_used_regs[i] = 1;
10161 }
10162 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10163 {
10164 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10165 fixed_regs[i] = call_used_regs[i] = 1;
10166 }
10167 if (flag_pic)
10168 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10169 }
10170
10171 /* Target hook for c_mode_for_suffix. */
10172
10173 static machine_mode
10174 pa_c_mode_for_suffix (char suffix)
10175 {
10176 if (HPUX_LONG_DOUBLE_LIBRARY)
10177 {
10178 if (suffix == 'q')
10179 return TFmode;
10180 }
10181
10182 return VOIDmode;
10183 }
10184
10185 /* Target hook for function_section. */
10186
10187 static section *
10188 pa_function_section (tree decl, enum node_frequency freq,
10189 bool startup, bool exit)
10190 {
10191 /* Put functions in text section if target doesn't have named sections. */
10192 if (!targetm_common.have_named_sections)
10193 return text_section;
10194
10195 /* Force nested functions into the same section as the containing
10196 function. */
10197 if (decl
10198 && DECL_SECTION_NAME (decl) == NULL
10199 && DECL_CONTEXT (decl) != NULL_TREE
10200 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10201 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10202 return function_section (DECL_CONTEXT (decl));
10203
10204 /* Otherwise, use the default function section. */
10205 return default_function_section (decl, freq, startup, exit);
10206 }
10207
10208 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10209
10210 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10211 that need more than three instructions to load prior to reload. This
10212 limit is somewhat arbitrary. It takes three instructions to load a
10213 CONST_INT from memory but two are memory accesses. It may be better
10214 to increase the allowed range for CONST_INTS. We may also be able
10215 to handle CONST_DOUBLES. */
10216
10217 static bool
10218 pa_legitimate_constant_p (machine_mode mode, rtx x)
10219 {
10220 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10221 return false;
10222
10223 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10224 return false;
10225
10226 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10227 legitimate constants. The other variants can't be handled by
10228 the move patterns after reload starts. */
10229 if (tls_referenced_p (x))
10230 return false;
10231
10232 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10233 return false;
10234
10235 if (TARGET_64BIT
10236 && HOST_BITS_PER_WIDE_INT > 32
10237 && GET_CODE (x) == CONST_INT
10238 && !reload_in_progress
10239 && !reload_completed
10240 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10241 && !pa_cint_ok_for_move (INTVAL (x)))
10242 return false;
10243
10244 if (function_label_operand (x, mode))
10245 return false;
10246
10247 return true;
10248 }
10249
10250 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10251
10252 static unsigned int
10253 pa_section_type_flags (tree decl, const char *name, int reloc)
10254 {
10255 unsigned int flags;
10256
10257 flags = default_section_type_flags (decl, name, reloc);
10258
10259 /* Function labels are placed in the constant pool. This can
10260 cause a section conflict if decls are put in ".data.rel.ro"
10261 or ".data.rel.ro.local" using the __attribute__ construct. */
10262 if (strcmp (name, ".data.rel.ro") == 0
10263 || strcmp (name, ".data.rel.ro.local") == 0)
10264 flags |= SECTION_WRITE | SECTION_RELRO;
10265
10266 return flags;
10267 }
10268
10269 /* pa_legitimate_address_p recognizes an RTL expression that is a
10270 valid memory address for an instruction. The MODE argument is the
10271 machine mode for the MEM expression that wants to use this address.
10272
10273 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10274 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10275 available with floating point loads and stores, and integer loads.
10276 We get better code by allowing indexed addresses in the initial
10277 RTL generation.
10278
10279 The acceptance of indexed addresses as legitimate implies that we
10280 must provide patterns for doing indexed integer stores, or the move
10281 expanders must force the address of an indexed store to a register.
10282 We have adopted the latter approach.
10283
10284 Another function of pa_legitimate_address_p is to ensure that
10285 the base register is a valid pointer for indexed instructions.
10286 On targets that have non-equivalent space registers, we have to
10287 know at the time of assembler output which register in a REG+REG
10288 pair is the base register. The REG_POINTER flag is sometimes lost
10289 in reload and the following passes, so it can't be relied on during
10290 code generation. Thus, we either have to canonicalize the order
10291 of the registers in REG+REG indexed addresses, or treat REG+REG
10292 addresses separately and provide patterns for both permutations.
10293
10294 The latter approach requires several hundred additional lines of
10295 code in pa.md. The downside to canonicalizing is that a PLUS
10296 in the wrong order can't combine to form to make a scaled indexed
10297 memory operand. As we won't need to canonicalize the operands if
10298 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10299
10300 We initially break out scaled indexed addresses in canonical order
10301 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10302 scaled indexed addresses during RTL generation. However, fold_rtx
10303 has its own opinion on how the operands of a PLUS should be ordered.
10304 If one of the operands is equivalent to a constant, it will make
10305 that operand the second operand. As the base register is likely to
10306 be equivalent to a SYMBOL_REF, we have made it the second operand.
10307
10308 pa_legitimate_address_p accepts REG+REG as legitimate when the
10309 operands are in the order INDEX+BASE on targets with non-equivalent
10310 space registers, and in any order on targets with equivalent space
10311 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10312
10313 We treat a SYMBOL_REF as legitimate if it is part of the current
10314 function's constant-pool, because such addresses can actually be
10315 output as REG+SMALLINT. */
10316
10317 static bool
10318 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10319 {
10320 if ((REG_P (x)
10321 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10322 : REG_OK_FOR_BASE_P (x)))
10323 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10324 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10325 && REG_P (XEXP (x, 0))
10326 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10327 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10328 return true;
10329
10330 if (GET_CODE (x) == PLUS)
10331 {
10332 rtx base, index;
10333
10334 /* For REG+REG, the base register should be in XEXP (x, 1),
10335 so check it first. */
10336 if (REG_P (XEXP (x, 1))
10337 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10338 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10339 base = XEXP (x, 1), index = XEXP (x, 0);
10340 else if (REG_P (XEXP (x, 0))
10341 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10342 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10343 base = XEXP (x, 0), index = XEXP (x, 1);
10344 else
10345 return false;
10346
10347 if (GET_CODE (index) == CONST_INT)
10348 {
10349 if (INT_5_BITS (index))
10350 return true;
10351
10352 /* When INT14_OK_STRICT is false, a secondary reload is needed
10353 to adjust the displacement of SImode and DImode floating point
10354 instructions but this may fail when the register also needs
10355 reloading. So, we return false when STRICT is true. We
10356 also reject long displacements for float mode addresses since
10357 the majority of accesses will use floating point instructions
10358 that don't support 14-bit offsets. */
10359 if (!INT14_OK_STRICT
10360 && (strict || !(reload_in_progress || reload_completed))
10361 && mode != QImode
10362 && mode != HImode)
10363 return false;
10364
10365 return base14_operand (index, mode);
10366 }
10367
10368 if (!TARGET_DISABLE_INDEXING
10369 /* Only accept the "canonical" INDEX+BASE operand order
10370 on targets with non-equivalent space registers. */
10371 && (TARGET_NO_SPACE_REGS
10372 ? REG_P (index)
10373 : (base == XEXP (x, 1) && REG_P (index)
10374 && (reload_completed
10375 || (reload_in_progress && HARD_REGISTER_P (base))
10376 || REG_POINTER (base))
10377 && (reload_completed
10378 || (reload_in_progress && HARD_REGISTER_P (index))
10379 || !REG_POINTER (index))))
10380 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10381 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10382 : REG_OK_FOR_INDEX_P (index))
10383 && borx_reg_operand (base, Pmode)
10384 && borx_reg_operand (index, Pmode))
10385 return true;
10386
10387 if (!TARGET_DISABLE_INDEXING
10388 && GET_CODE (index) == MULT
10389 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10390 && REG_P (XEXP (index, 0))
10391 && GET_MODE (XEXP (index, 0)) == Pmode
10392 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10393 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10394 && GET_CODE (XEXP (index, 1)) == CONST_INT
10395 && INTVAL (XEXP (index, 1))
10396 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10397 && borx_reg_operand (base, Pmode))
10398 return true;
10399
10400 return false;
10401 }
10402
10403 if (GET_CODE (x) == LO_SUM)
10404 {
10405 rtx y = XEXP (x, 0);
10406
10407 if (GET_CODE (y) == SUBREG)
10408 y = SUBREG_REG (y);
10409
10410 if (REG_P (y)
10411 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10412 : REG_OK_FOR_BASE_P (y)))
10413 {
10414 /* Needed for -fPIC */
10415 if (mode == Pmode
10416 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10417 return true;
10418
10419 if (!INT14_OK_STRICT
10420 && (strict || !(reload_in_progress || reload_completed))
10421 && mode != QImode
10422 && mode != HImode)
10423 return false;
10424
10425 if (CONSTANT_P (XEXP (x, 1)))
10426 return true;
10427 }
10428 return false;
10429 }
10430
10431 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10432 return true;
10433
10434 return false;
10435 }
10436
10437 /* Look for machine dependent ways to make the invalid address AD a
10438 valid address.
10439
10440 For the PA, transform:
10441
10442 memory(X + <large int>)
10443
10444 into:
10445
10446 if (<large int> & mask) >= 16
10447 Y = (<large int> & ~mask) + mask + 1 Round up.
10448 else
10449 Y = (<large int> & ~mask) Round down.
10450 Z = X + Y
10451 memory (Z + (<large int> - Y));
10452
10453 This makes reload inheritance and reload_cse work better since Z
10454 can be reused.
10455
10456 There may be more opportunities to improve code with this hook. */
10457
10458 rtx
10459 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10460 int opnum, int type,
10461 int ind_levels ATTRIBUTE_UNUSED)
10462 {
10463 long offset, newoffset, mask;
10464 rtx new_rtx, temp = NULL_RTX;
10465
10466 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10467 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10468
10469 if (optimize && GET_CODE (ad) == PLUS)
10470 temp = simplify_binary_operation (PLUS, Pmode,
10471 XEXP (ad, 0), XEXP (ad, 1));
10472
10473 new_rtx = temp ? temp : ad;
10474
10475 if (optimize
10476 && GET_CODE (new_rtx) == PLUS
10477 && GET_CODE (XEXP (new_rtx, 0)) == REG
10478 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10479 {
10480 offset = INTVAL (XEXP ((new_rtx), 1));
10481
10482 /* Choose rounding direction. Round up if we are >= halfway. */
10483 if ((offset & mask) >= ((mask + 1) / 2))
10484 newoffset = (offset & ~mask) + mask + 1;
10485 else
10486 newoffset = offset & ~mask;
10487
10488 /* Ensure that long displacements are aligned. */
10489 if (mask == 0x3fff
10490 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10491 || (TARGET_64BIT && (mode) == DImode)))
10492 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10493
10494 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10495 {
10496 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10497 GEN_INT (newoffset));
10498 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10499 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10500 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10501 opnum, (enum reload_type) type);
10502 return ad;
10503 }
10504 }
10505
10506 return NULL_RTX;
10507 }
10508
10509 /* Output address vector. */
10510
10511 void
10512 pa_output_addr_vec (rtx lab, rtx body)
10513 {
10514 int idx, vlen = XVECLEN (body, 0);
10515
10516 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10517 if (TARGET_GAS)
10518 fputs ("\t.begin_brtab\n", asm_out_file);
10519 for (idx = 0; idx < vlen; idx++)
10520 {
10521 ASM_OUTPUT_ADDR_VEC_ELT
10522 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10523 }
10524 if (TARGET_GAS)
10525 fputs ("\t.end_brtab\n", asm_out_file);
10526 }
10527
10528 /* Output address difference vector. */
10529
10530 void
10531 pa_output_addr_diff_vec (rtx lab, rtx body)
10532 {
10533 rtx base = XEXP (XEXP (body, 0), 0);
10534 int idx, vlen = XVECLEN (body, 1);
10535
10536 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10537 if (TARGET_GAS)
10538 fputs ("\t.begin_brtab\n", asm_out_file);
10539 for (idx = 0; idx < vlen; idx++)
10540 {
10541 ASM_OUTPUT_ADDR_DIFF_ELT
10542 (asm_out_file,
10543 body,
10544 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10545 CODE_LABEL_NUMBER (base));
10546 }
10547 if (TARGET_GAS)
10548 fputs ("\t.end_brtab\n", asm_out_file);
10549 }
10550
10551 #include "gt-pa.h"