]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pa/pa.c
Turn HARD_REGNO_NREGS into a target hook
[thirdparty/gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "memmodel.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "stringpool.h"
32 #include "attribs.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "alias.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "varasm.h"
43 #include "calls.h"
44 #include "output.h"
45 #include "except.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "reload.h"
49 #include "common/common-target.h"
50 #include "langhooks.h"
51 #include "cfgrtl.h"
52 #include "opts.h"
53 #include "builtins.h"
54
55 /* This file should be included last. */
56 #include "target-def.h"
57
58 /* Return nonzero if there is a bypass for the output of
59 OUT_INSN and the fp store IN_INSN. */
60 int
61 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
62 {
63 machine_mode store_mode;
64 machine_mode other_mode;
65 rtx set;
66
67 if (recog_memoized (in_insn) < 0
68 || (get_attr_type (in_insn) != TYPE_FPSTORE
69 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
70 || recog_memoized (out_insn) < 0)
71 return 0;
72
73 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
74
75 set = single_set (out_insn);
76 if (!set)
77 return 0;
78
79 other_mode = GET_MODE (SET_SRC (set));
80
81 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
82 }
83
84
85 #ifndef DO_FRAME_NOTES
86 #ifdef INCOMING_RETURN_ADDR_RTX
87 #define DO_FRAME_NOTES 1
88 #else
89 #define DO_FRAME_NOTES 0
90 #endif
91 #endif
92
93 static void pa_option_override (void);
94 static void copy_reg_pointer (rtx, rtx);
95 static void fix_range (const char *);
96 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
97 reg_class_t);
98 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
99 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
100 static inline rtx force_mode (machine_mode, rtx);
101 static void pa_reorg (void);
102 static void pa_combine_instructions (void);
103 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
104 rtx, rtx);
105 static bool forward_branch_p (rtx_insn *);
106 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
107 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
108 static int compute_movmem_length (rtx_insn *);
109 static int compute_clrmem_length (rtx_insn *);
110 static bool pa_assemble_integer (rtx, unsigned int, int);
111 static void remove_useless_addtr_insns (int);
112 static void store_reg (int, HOST_WIDE_INT, int);
113 static void store_reg_modify (int, int, HOST_WIDE_INT);
114 static void load_reg (int, HOST_WIDE_INT, int);
115 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
116 static rtx pa_function_value (const_tree, const_tree, bool);
117 static rtx pa_libcall_value (machine_mode, const_rtx);
118 static bool pa_function_value_regno_p (const unsigned int);
119 static void pa_output_function_prologue (FILE *);
120 static void update_total_code_bytes (unsigned int);
121 static void pa_output_function_epilogue (FILE *);
122 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
123 static int pa_adjust_priority (rtx_insn *, int);
124 static int pa_issue_rate (void);
125 static int pa_reloc_rw_mask (void);
126 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
127 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
128 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
129 ATTRIBUTE_UNUSED;
130 static void pa_encode_section_info (tree, rtx, int);
131 static const char *pa_strip_name_encoding (const char *);
132 static bool pa_function_ok_for_sibcall (tree, tree);
133 static void pa_globalize_label (FILE *, const char *)
134 ATTRIBUTE_UNUSED;
135 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
136 HOST_WIDE_INT, tree);
137 #if !defined(USE_COLLECT2)
138 static void pa_asm_out_constructor (rtx, int);
139 static void pa_asm_out_destructor (rtx, int);
140 #endif
141 static void pa_init_builtins (void);
142 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
143 static rtx hppa_builtin_saveregs (void);
144 static void hppa_va_start (tree, rtx);
145 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
146 static bool pa_scalar_mode_supported_p (scalar_mode);
147 static bool pa_commutative_p (const_rtx x, int outer_code);
148 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
149 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
150 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
151 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
152 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
153 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
155 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
156 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
157 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
160 static void output_deferred_plabels (void);
161 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
162 #ifdef ASM_OUTPUT_EXTERNAL_REAL
163 static void pa_hpux_file_end (void);
164 #endif
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
168 const_tree, bool);
169 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
170 tree, bool);
171 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
172 const_tree, bool);
173 static rtx pa_function_arg (cumulative_args_t, machine_mode,
174 const_tree, bool);
175 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
176 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
177 static struct machine_function * pa_init_machine_status (void);
178 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
179 machine_mode,
180 secondary_reload_info *);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
184 const_tree, int);
185
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, machine_mode,
201 const_tree, bool);
202 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
203 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
204 static bool pa_modes_tieable_p (machine_mode, machine_mode);
205
206 /* The following extra sections are only used for SOM. */
207 static GTY(()) section *som_readonly_data_section;
208 static GTY(()) section *som_one_only_readonly_data_section;
209 static GTY(()) section *som_one_only_data_section;
210 static GTY(()) section *som_tm_clone_table_section;
211
212 /* Counts for the number of callee-saved general and floating point
213 registers which were saved by the current function's prologue. */
214 static int gr_saved, fr_saved;
215
216 /* Boolean indicating whether the return pointer was saved by the
217 current function's prologue. */
218 static bool rp_saved;
219
220 static rtx find_addr_reg (rtx);
221
222 /* Keep track of the number of bytes we have output in the CODE subspace
223 during this compilation so we'll know when to emit inline long-calls. */
224 unsigned long total_code_bytes;
225
226 /* The last address of the previous function plus the number of bytes in
227 associated thunks that have been output. This is used to determine if
228 a thunk can use an IA-relative branch to reach its target function. */
229 static unsigned int last_address;
230
231 /* Variables to handle plabels that we discover are necessary at assembly
232 output time. They are output after the current function. */
233 struct GTY(()) deferred_plabel
234 {
235 rtx internal_label;
236 rtx symbol;
237 };
238 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
239 deferred_plabels;
240 static size_t n_deferred_plabels = 0;
241 \f
242 /* Initialize the GCC target structure. */
243
244 #undef TARGET_OPTION_OVERRIDE
245 #define TARGET_OPTION_OVERRIDE pa_option_override
246
247 #undef TARGET_ASM_ALIGNED_HI_OP
248 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
249 #undef TARGET_ASM_ALIGNED_SI_OP
250 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
251 #undef TARGET_ASM_ALIGNED_DI_OP
252 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
253 #undef TARGET_ASM_UNALIGNED_HI_OP
254 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
255 #undef TARGET_ASM_UNALIGNED_SI_OP
256 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
257 #undef TARGET_ASM_UNALIGNED_DI_OP
258 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
259 #undef TARGET_ASM_INTEGER
260 #define TARGET_ASM_INTEGER pa_assemble_integer
261
262 #undef TARGET_ASM_FUNCTION_PROLOGUE
263 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ADJUST_PRIORITY
280 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
281 #undef TARGET_SCHED_ISSUE_RATE
282 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
283
284 #undef TARGET_ENCODE_SECTION_INFO
285 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
286 #undef TARGET_STRIP_NAME_ENCODING
287 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
288
289 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
290 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
291
292 #undef TARGET_COMMUTATIVE_P
293 #define TARGET_COMMUTATIVE_P pa_commutative_p
294
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
299
300 #undef TARGET_ASM_FILE_END
301 #ifdef ASM_OUTPUT_EXTERNAL_REAL
302 #define TARGET_ASM_FILE_END pa_hpux_file_end
303 #else
304 #define TARGET_ASM_FILE_END output_deferred_plabels
305 #endif
306
307 #undef TARGET_ASM_RELOC_RW_MASK
308 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
309
310 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
311 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
312
313 #if !defined(USE_COLLECT2)
314 #undef TARGET_ASM_CONSTRUCTOR
315 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
316 #undef TARGET_ASM_DESTRUCTOR
317 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
318 #endif
319
320 #undef TARGET_INIT_BUILTINS
321 #define TARGET_INIT_BUILTINS pa_init_builtins
322
323 #undef TARGET_EXPAND_BUILTIN
324 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
325
326 #undef TARGET_REGISTER_MOVE_COST
327 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
328 #undef TARGET_RTX_COSTS
329 #define TARGET_RTX_COSTS hppa_rtx_costs
330 #undef TARGET_ADDRESS_COST
331 #define TARGET_ADDRESS_COST hppa_address_cost
332
333 #undef TARGET_MACHINE_DEPENDENT_REORG
334 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
335
336 #undef TARGET_INIT_LIBFUNCS
337 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
338
339 #undef TARGET_PROMOTE_FUNCTION_MODE
340 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
341 #undef TARGET_PROMOTE_PROTOTYPES
342 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
343
344 #undef TARGET_STRUCT_VALUE_RTX
345 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
346 #undef TARGET_RETURN_IN_MEMORY
347 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
348 #undef TARGET_MUST_PASS_IN_STACK
349 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
350 #undef TARGET_PASS_BY_REFERENCE
351 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
352 #undef TARGET_CALLEE_COPIES
353 #define TARGET_CALLEE_COPIES pa_callee_copies
354 #undef TARGET_ARG_PARTIAL_BYTES
355 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
356 #undef TARGET_FUNCTION_ARG
357 #define TARGET_FUNCTION_ARG pa_function_arg
358 #undef TARGET_FUNCTION_ARG_ADVANCE
359 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
360 #undef TARGET_FUNCTION_ARG_PADDING
361 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
362 #undef TARGET_FUNCTION_ARG_BOUNDARY
363 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
364
365 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
366 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
367 #undef TARGET_EXPAND_BUILTIN_VA_START
368 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
369 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
370 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
371
372 #undef TARGET_SCALAR_MODE_SUPPORTED_P
373 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
374
375 #undef TARGET_CANNOT_FORCE_CONST_MEM
376 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
377
378 #undef TARGET_SECONDARY_RELOAD
379 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
380
381 #undef TARGET_EXTRA_LIVE_ON_ENTRY
382 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
383
384 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
385 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
386 #undef TARGET_TRAMPOLINE_INIT
387 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
388 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
389 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
390 #undef TARGET_DELEGITIMIZE_ADDRESS
391 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
392 #undef TARGET_INTERNAL_ARG_POINTER
393 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
394 #undef TARGET_CAN_ELIMINATE
395 #define TARGET_CAN_ELIMINATE pa_can_eliminate
396 #undef TARGET_CONDITIONAL_REGISTER_USAGE
397 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
398 #undef TARGET_C_MODE_FOR_SUFFIX
399 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
400 #undef TARGET_ASM_FUNCTION_SECTION
401 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
402
403 #undef TARGET_LEGITIMATE_CONSTANT_P
404 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
405 #undef TARGET_SECTION_TYPE_FLAGS
406 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
407 #undef TARGET_LEGITIMATE_ADDRESS_P
408 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
409
410 #undef TARGET_LRA_P
411 #define TARGET_LRA_P hook_bool_void_false
412
413 #undef TARGET_HARD_REGNO_NREGS
414 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
415 #undef TARGET_HARD_REGNO_MODE_OK
416 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
417 #undef TARGET_MODES_TIEABLE_P
418 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
419
420 struct gcc_target targetm = TARGET_INITIALIZER;
421 \f
422 /* Parse the -mfixed-range= option string. */
423
424 static void
425 fix_range (const char *const_str)
426 {
427 int i, first, last;
428 char *str, *dash, *comma;
429
430 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
431 REG2 are either register names or register numbers. The effect
432 of this option is to mark the registers in the range from REG1 to
433 REG2 as ``fixed'' so they won't be used by the compiler. This is
434 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
435
436 i = strlen (const_str);
437 str = (char *) alloca (i + 1);
438 memcpy (str, const_str, i + 1);
439
440 while (1)
441 {
442 dash = strchr (str, '-');
443 if (!dash)
444 {
445 warning (0, "value of -mfixed-range must have form REG1-REG2");
446 return;
447 }
448 *dash = '\0';
449
450 comma = strchr (dash + 1, ',');
451 if (comma)
452 *comma = '\0';
453
454 first = decode_reg_name (str);
455 if (first < 0)
456 {
457 warning (0, "unknown register name: %s", str);
458 return;
459 }
460
461 last = decode_reg_name (dash + 1);
462 if (last < 0)
463 {
464 warning (0, "unknown register name: %s", dash + 1);
465 return;
466 }
467
468 *dash = '-';
469
470 if (first > last)
471 {
472 warning (0, "%s-%s is an empty range", str, dash + 1);
473 return;
474 }
475
476 for (i = first; i <= last; ++i)
477 fixed_regs[i] = call_used_regs[i] = 1;
478
479 if (!comma)
480 break;
481
482 *comma = ',';
483 str = comma + 1;
484 }
485
486 /* Check if all floating point registers have been fixed. */
487 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
488 if (!fixed_regs[i])
489 break;
490
491 if (i > FP_REG_LAST)
492 target_flags |= MASK_DISABLE_FPREGS;
493 }
494
495 /* Implement the TARGET_OPTION_OVERRIDE hook. */
496
497 static void
498 pa_option_override (void)
499 {
500 unsigned int i;
501 cl_deferred_option *opt;
502 vec<cl_deferred_option> *v
503 = (vec<cl_deferred_option> *) pa_deferred_options;
504
505 if (v)
506 FOR_EACH_VEC_ELT (*v, i, opt)
507 {
508 switch (opt->opt_index)
509 {
510 case OPT_mfixed_range_:
511 fix_range (opt->arg);
512 break;
513
514 default:
515 gcc_unreachable ();
516 }
517 }
518
519 if (flag_pic && TARGET_PORTABLE_RUNTIME)
520 {
521 warning (0, "PIC code generation is not supported in the portable runtime model");
522 }
523
524 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
525 {
526 warning (0, "PIC code generation is not compatible with fast indirect calls");
527 }
528
529 if (! TARGET_GAS && write_symbols != NO_DEBUG)
530 {
531 warning (0, "-g is only supported when using GAS on this processor,");
532 warning (0, "-g option disabled");
533 write_symbols = NO_DEBUG;
534 }
535
536 /* We only support the "big PIC" model now. And we always generate PIC
537 code when in 64bit mode. */
538 if (flag_pic == 1 || TARGET_64BIT)
539 flag_pic = 2;
540
541 /* Disable -freorder-blocks-and-partition as we don't support hot and
542 cold partitioning. */
543 if (flag_reorder_blocks_and_partition)
544 {
545 inform (input_location,
546 "-freorder-blocks-and-partition does not work "
547 "on this architecture");
548 flag_reorder_blocks_and_partition = 0;
549 flag_reorder_blocks = 1;
550 }
551
552 /* We can't guarantee that .dword is available for 32-bit targets. */
553 if (UNITS_PER_WORD == 4)
554 targetm.asm_out.aligned_op.di = NULL;
555
556 /* The unaligned ops are only available when using GAS. */
557 if (!TARGET_GAS)
558 {
559 targetm.asm_out.unaligned_op.hi = NULL;
560 targetm.asm_out.unaligned_op.si = NULL;
561 targetm.asm_out.unaligned_op.di = NULL;
562 }
563
564 init_machine_status = pa_init_machine_status;
565 }
566
567 enum pa_builtins
568 {
569 PA_BUILTIN_COPYSIGNQ,
570 PA_BUILTIN_FABSQ,
571 PA_BUILTIN_INFQ,
572 PA_BUILTIN_HUGE_VALQ,
573 PA_BUILTIN_max
574 };
575
576 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
577
578 static void
579 pa_init_builtins (void)
580 {
581 #ifdef DONT_HAVE_FPUTC_UNLOCKED
582 {
583 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
584 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
585 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
586 }
587 #endif
588 #if TARGET_HPUX_11
589 {
590 tree decl;
591
592 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
593 set_user_assembler_name (decl, "_Isfinite");
594 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
595 set_user_assembler_name (decl, "_Isfinitef");
596 }
597 #endif
598
599 if (HPUX_LONG_DOUBLE_LIBRARY)
600 {
601 tree decl, ftype;
602
603 /* Under HPUX, the __float128 type is a synonym for "long double". */
604 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
605 "__float128");
606
607 /* TFmode support builtins. */
608 ftype = build_function_type_list (long_double_type_node,
609 long_double_type_node,
610 NULL_TREE);
611 decl = add_builtin_function ("__builtin_fabsq", ftype,
612 PA_BUILTIN_FABSQ, BUILT_IN_MD,
613 "_U_Qfabs", NULL_TREE);
614 TREE_READONLY (decl) = 1;
615 pa_builtins[PA_BUILTIN_FABSQ] = decl;
616
617 ftype = build_function_type_list (long_double_type_node,
618 long_double_type_node,
619 long_double_type_node,
620 NULL_TREE);
621 decl = add_builtin_function ("__builtin_copysignq", ftype,
622 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
623 "_U_Qfcopysign", NULL_TREE);
624 TREE_READONLY (decl) = 1;
625 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
626
627 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
628 decl = add_builtin_function ("__builtin_infq", ftype,
629 PA_BUILTIN_INFQ, BUILT_IN_MD,
630 NULL, NULL_TREE);
631 pa_builtins[PA_BUILTIN_INFQ] = decl;
632
633 decl = add_builtin_function ("__builtin_huge_valq", ftype,
634 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
635 NULL, NULL_TREE);
636 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
637 }
638 }
639
640 static rtx
641 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
642 machine_mode mode ATTRIBUTE_UNUSED,
643 int ignore ATTRIBUTE_UNUSED)
644 {
645 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
646 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
647
648 switch (fcode)
649 {
650 case PA_BUILTIN_FABSQ:
651 case PA_BUILTIN_COPYSIGNQ:
652 return expand_call (exp, target, ignore);
653
654 case PA_BUILTIN_INFQ:
655 case PA_BUILTIN_HUGE_VALQ:
656 {
657 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
658 REAL_VALUE_TYPE inf;
659 rtx tmp;
660
661 real_inf (&inf);
662 tmp = const_double_from_real_value (inf, target_mode);
663
664 tmp = validize_mem (force_const_mem (target_mode, tmp));
665
666 if (target == 0)
667 target = gen_reg_rtx (target_mode);
668
669 emit_move_insn (target, tmp);
670 return target;
671 }
672
673 default:
674 gcc_unreachable ();
675 }
676
677 return NULL_RTX;
678 }
679
680 /* Function to init struct machine_function.
681 This will be called, via a pointer variable,
682 from push_function_context. */
683
684 static struct machine_function *
685 pa_init_machine_status (void)
686 {
687 return ggc_cleared_alloc<machine_function> ();
688 }
689
690 /* If FROM is a probable pointer register, mark TO as a probable
691 pointer register with the same pointer alignment as FROM. */
692
693 static void
694 copy_reg_pointer (rtx to, rtx from)
695 {
696 if (REG_POINTER (from))
697 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
698 }
699
700 /* Return 1 if X contains a symbolic expression. We know these
701 expressions will have one of a few well defined forms, so
702 we need only check those forms. */
703 int
704 pa_symbolic_expression_p (rtx x)
705 {
706
707 /* Strip off any HIGH. */
708 if (GET_CODE (x) == HIGH)
709 x = XEXP (x, 0);
710
711 return symbolic_operand (x, VOIDmode);
712 }
713
714 /* Accept any constant that can be moved in one instruction into a
715 general register. */
716 int
717 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
718 {
719 /* OK if ldo, ldil, or zdepi, can be used. */
720 return (VAL_14_BITS_P (ival)
721 || pa_ldil_cint_p (ival)
722 || pa_zdepi_cint_p (ival));
723 }
724 \f
725 /* True iff ldil can be used to load this CONST_INT. The least
726 significant 11 bits of the value must be zero and the value must
727 not change sign when extended from 32 to 64 bits. */
728 int
729 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
730 {
731 unsigned HOST_WIDE_INT x;
732
733 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
734 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
735 }
736
737 /* True iff zdepi can be used to generate this CONST_INT.
738 zdepi first sign extends a 5-bit signed number to a given field
739 length, then places this field anywhere in a zero. */
740 int
741 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
742 {
743 unsigned HOST_WIDE_INT lsb_mask, t;
744
745 /* This might not be obvious, but it's at least fast.
746 This function is critical; we don't have the time loops would take. */
747 lsb_mask = x & -x;
748 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
749 /* Return true iff t is a power of two. */
750 return ((t & (t - 1)) == 0);
751 }
752
753 /* True iff depi or extru can be used to compute (reg & mask).
754 Accept bit pattern like these:
755 0....01....1
756 1....10....0
757 1..10..01..1 */
758 int
759 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
760 {
761 mask = ~mask;
762 mask += mask & -mask;
763 return (mask & (mask - 1)) == 0;
764 }
765
766 /* True iff depi can be used to compute (reg | MASK). */
767 int
768 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
769 {
770 mask += mask & -mask;
771 return (mask & (mask - 1)) == 0;
772 }
773 \f
774 /* Legitimize PIC addresses. If the address is already
775 position-independent, we return ORIG. Newly generated
776 position-independent addresses go to REG. If we need more
777 than one register, we lose. */
778
779 static rtx
780 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
781 {
782 rtx pic_ref = orig;
783
784 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
785
786 /* Labels need special handling. */
787 if (pic_label_operand (orig, mode))
788 {
789 rtx_insn *insn;
790
791 /* We do not want to go through the movXX expanders here since that
792 would create recursion.
793
794 Nor do we really want to call a generator for a named pattern
795 since that requires multiple patterns if we want to support
796 multiple word sizes.
797
798 So instead we just emit the raw set, which avoids the movXX
799 expanders completely. */
800 mark_reg_pointer (reg, BITS_PER_UNIT);
801 insn = emit_insn (gen_rtx_SET (reg, orig));
802
803 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
804 add_reg_note (insn, REG_EQUAL, orig);
805
806 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
807 and update LABEL_NUSES because this is not done automatically. */
808 if (reload_in_progress || reload_completed)
809 {
810 /* Extract LABEL_REF. */
811 if (GET_CODE (orig) == CONST)
812 orig = XEXP (XEXP (orig, 0), 0);
813 /* Extract CODE_LABEL. */
814 orig = XEXP (orig, 0);
815 add_reg_note (insn, REG_LABEL_OPERAND, orig);
816 /* Make sure we have label and not a note. */
817 if (LABEL_P (orig))
818 LABEL_NUSES (orig)++;
819 }
820 crtl->uses_pic_offset_table = 1;
821 return reg;
822 }
823 if (GET_CODE (orig) == SYMBOL_REF)
824 {
825 rtx_insn *insn;
826 rtx tmp_reg;
827
828 gcc_assert (reg);
829
830 /* Before reload, allocate a temporary register for the intermediate
831 result. This allows the sequence to be deleted when the final
832 result is unused and the insns are trivially dead. */
833 tmp_reg = ((reload_in_progress || reload_completed)
834 ? reg : gen_reg_rtx (Pmode));
835
836 if (function_label_operand (orig, VOIDmode))
837 {
838 /* Force function label into memory in word mode. */
839 orig = XEXP (force_const_mem (word_mode, orig), 0);
840 /* Load plabel address from DLT. */
841 emit_move_insn (tmp_reg,
842 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
843 gen_rtx_HIGH (word_mode, orig)));
844 pic_ref
845 = gen_const_mem (Pmode,
846 gen_rtx_LO_SUM (Pmode, tmp_reg,
847 gen_rtx_UNSPEC (Pmode,
848 gen_rtvec (1, orig),
849 UNSPEC_DLTIND14R)));
850 emit_move_insn (reg, pic_ref);
851 /* Now load address of function descriptor. */
852 pic_ref = gen_rtx_MEM (Pmode, reg);
853 }
854 else
855 {
856 /* Load symbol reference from DLT. */
857 emit_move_insn (tmp_reg,
858 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
859 gen_rtx_HIGH (word_mode, orig)));
860 pic_ref
861 = gen_const_mem (Pmode,
862 gen_rtx_LO_SUM (Pmode, tmp_reg,
863 gen_rtx_UNSPEC (Pmode,
864 gen_rtvec (1, orig),
865 UNSPEC_DLTIND14R)));
866 }
867
868 crtl->uses_pic_offset_table = 1;
869 mark_reg_pointer (reg, BITS_PER_UNIT);
870 insn = emit_move_insn (reg, pic_ref);
871
872 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
873 set_unique_reg_note (insn, REG_EQUAL, orig);
874
875 return reg;
876 }
877 else if (GET_CODE (orig) == CONST)
878 {
879 rtx base;
880
881 if (GET_CODE (XEXP (orig, 0)) == PLUS
882 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
883 return orig;
884
885 gcc_assert (reg);
886 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
887
888 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
889 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
890 base == reg ? 0 : reg);
891
892 if (GET_CODE (orig) == CONST_INT)
893 {
894 if (INT_14_BITS (orig))
895 return plus_constant (Pmode, base, INTVAL (orig));
896 orig = force_reg (Pmode, orig);
897 }
898 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
899 /* Likewise, should we set special REG_NOTEs here? */
900 }
901
902 return pic_ref;
903 }
904
905 static GTY(()) rtx gen_tls_tga;
906
907 static rtx
908 gen_tls_get_addr (void)
909 {
910 if (!gen_tls_tga)
911 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
912 return gen_tls_tga;
913 }
914
915 static rtx
916 hppa_tls_call (rtx arg)
917 {
918 rtx ret;
919
920 ret = gen_reg_rtx (Pmode);
921 emit_library_call_value (gen_tls_get_addr (), ret,
922 LCT_CONST, Pmode, arg, Pmode);
923
924 return ret;
925 }
926
927 static rtx
928 legitimize_tls_address (rtx addr)
929 {
930 rtx ret, tmp, t1, t2, tp;
931 rtx_insn *insn;
932
933 /* Currently, we can't handle anything but a SYMBOL_REF. */
934 if (GET_CODE (addr) != SYMBOL_REF)
935 return addr;
936
937 switch (SYMBOL_REF_TLS_MODEL (addr))
938 {
939 case TLS_MODEL_GLOBAL_DYNAMIC:
940 tmp = gen_reg_rtx (Pmode);
941 if (flag_pic)
942 emit_insn (gen_tgd_load_pic (tmp, addr));
943 else
944 emit_insn (gen_tgd_load (tmp, addr));
945 ret = hppa_tls_call (tmp);
946 break;
947
948 case TLS_MODEL_LOCAL_DYNAMIC:
949 ret = gen_reg_rtx (Pmode);
950 tmp = gen_reg_rtx (Pmode);
951 start_sequence ();
952 if (flag_pic)
953 emit_insn (gen_tld_load_pic (tmp, addr));
954 else
955 emit_insn (gen_tld_load (tmp, addr));
956 t1 = hppa_tls_call (tmp);
957 insn = get_insns ();
958 end_sequence ();
959 t2 = gen_reg_rtx (Pmode);
960 emit_libcall_block (insn, t2, t1,
961 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
962 UNSPEC_TLSLDBASE));
963 emit_insn (gen_tld_offset_load (ret, addr, t2));
964 break;
965
966 case TLS_MODEL_INITIAL_EXEC:
967 tp = gen_reg_rtx (Pmode);
968 tmp = gen_reg_rtx (Pmode);
969 ret = gen_reg_rtx (Pmode);
970 emit_insn (gen_tp_load (tp));
971 if (flag_pic)
972 emit_insn (gen_tie_load_pic (tmp, addr));
973 else
974 emit_insn (gen_tie_load (tmp, addr));
975 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
976 break;
977
978 case TLS_MODEL_LOCAL_EXEC:
979 tp = gen_reg_rtx (Pmode);
980 ret = gen_reg_rtx (Pmode);
981 emit_insn (gen_tp_load (tp));
982 emit_insn (gen_tle_load (ret, addr, tp));
983 break;
984
985 default:
986 gcc_unreachable ();
987 }
988
989 return ret;
990 }
991
992 /* Helper for hppa_legitimize_address. Given X, return true if it
993 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
994
995 This respectively represent canonical shift-add rtxs or scaled
996 memory addresses. */
997 static bool
998 mem_shadd_or_shadd_rtx_p (rtx x)
999 {
1000 return ((GET_CODE (x) == ASHIFT
1001 || GET_CODE (x) == MULT)
1002 && GET_CODE (XEXP (x, 1)) == CONST_INT
1003 && ((GET_CODE (x) == ASHIFT
1004 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1005 || (GET_CODE (x) == MULT
1006 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1007 }
1008
1009 /* Try machine-dependent ways of modifying an illegitimate address
1010 to be legitimate. If we find one, return the new, valid address.
1011 This macro is used in only one place: `memory_address' in explow.c.
1012
1013 OLDX is the address as it was before break_out_memory_refs was called.
1014 In some cases it is useful to look at this to decide what needs to be done.
1015
1016 It is always safe for this macro to do nothing. It exists to recognize
1017 opportunities to optimize the output.
1018
1019 For the PA, transform:
1020
1021 memory(X + <large int>)
1022
1023 into:
1024
1025 if (<large int> & mask) >= 16
1026 Y = (<large int> & ~mask) + mask + 1 Round up.
1027 else
1028 Y = (<large int> & ~mask) Round down.
1029 Z = X + Y
1030 memory (Z + (<large int> - Y));
1031
1032 This is for CSE to find several similar references, and only use one Z.
1033
1034 X can either be a SYMBOL_REF or REG, but because combine cannot
1035 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1036 D will not fit in 14 bits.
1037
1038 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1039 0x1f as the mask.
1040
1041 MODE_INT references allow displacements which fit in 14 bits, so use
1042 0x3fff as the mask.
1043
1044 This relies on the fact that most mode MODE_FLOAT references will use FP
1045 registers and most mode MODE_INT references will use integer registers.
1046 (In the rare case of an FP register used in an integer MODE, we depend
1047 on secondary reloads to clean things up.)
1048
1049
1050 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1051 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1052 addressing modes to be used).
1053
1054 Note that the addresses passed into hppa_legitimize_address always
1055 come from a MEM, so we only have to match the MULT form on incoming
1056 addresses. But to be future proof we also match the ASHIFT form.
1057
1058 However, this routine always places those shift-add sequences into
1059 registers, so we have to generate the ASHIFT form as our output.
1060
1061 Put X and Z into registers. Then put the entire expression into
1062 a register. */
1063
1064 rtx
1065 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1066 machine_mode mode)
1067 {
1068 rtx orig = x;
1069
1070 /* We need to canonicalize the order of operands in unscaled indexed
1071 addresses since the code that checks if an address is valid doesn't
1072 always try both orders. */
1073 if (!TARGET_NO_SPACE_REGS
1074 && GET_CODE (x) == PLUS
1075 && GET_MODE (x) == Pmode
1076 && REG_P (XEXP (x, 0))
1077 && REG_P (XEXP (x, 1))
1078 && REG_POINTER (XEXP (x, 0))
1079 && !REG_POINTER (XEXP (x, 1)))
1080 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1081
1082 if (tls_referenced_p (x))
1083 return legitimize_tls_address (x);
1084 else if (flag_pic)
1085 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1086
1087 /* Strip off CONST. */
1088 if (GET_CODE (x) == CONST)
1089 x = XEXP (x, 0);
1090
1091 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1092 That should always be safe. */
1093 if (GET_CODE (x) == PLUS
1094 && GET_CODE (XEXP (x, 0)) == REG
1095 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1096 {
1097 rtx reg = force_reg (Pmode, XEXP (x, 1));
1098 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1099 }
1100
1101 /* Note we must reject symbols which represent function addresses
1102 since the assembler/linker can't handle arithmetic on plabels. */
1103 if (GET_CODE (x) == PLUS
1104 && GET_CODE (XEXP (x, 1)) == CONST_INT
1105 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1106 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1107 || GET_CODE (XEXP (x, 0)) == REG))
1108 {
1109 rtx int_part, ptr_reg;
1110 int newoffset;
1111 int offset = INTVAL (XEXP (x, 1));
1112 int mask;
1113
1114 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1115 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1116
1117 /* Choose which way to round the offset. Round up if we
1118 are >= halfway to the next boundary. */
1119 if ((offset & mask) >= ((mask + 1) / 2))
1120 newoffset = (offset & ~ mask) + mask + 1;
1121 else
1122 newoffset = (offset & ~ mask);
1123
1124 /* If the newoffset will not fit in 14 bits (ldo), then
1125 handling this would take 4 or 5 instructions (2 to load
1126 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1127 add the new offset and the SYMBOL_REF.) Combine can
1128 not handle 4->2 or 5->2 combinations, so do not create
1129 them. */
1130 if (! VAL_14_BITS_P (newoffset)
1131 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1132 {
1133 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1134 rtx tmp_reg
1135 = force_reg (Pmode,
1136 gen_rtx_HIGH (Pmode, const_part));
1137 ptr_reg
1138 = force_reg (Pmode,
1139 gen_rtx_LO_SUM (Pmode,
1140 tmp_reg, const_part));
1141 }
1142 else
1143 {
1144 if (! VAL_14_BITS_P (newoffset))
1145 int_part = force_reg (Pmode, GEN_INT (newoffset));
1146 else
1147 int_part = GEN_INT (newoffset);
1148
1149 ptr_reg = force_reg (Pmode,
1150 gen_rtx_PLUS (Pmode,
1151 force_reg (Pmode, XEXP (x, 0)),
1152 int_part));
1153 }
1154 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1155 }
1156
1157 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1158
1159 if (GET_CODE (x) == PLUS
1160 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1161 && (OBJECT_P (XEXP (x, 1))
1162 || GET_CODE (XEXP (x, 1)) == SUBREG)
1163 && GET_CODE (XEXP (x, 1)) != CONST)
1164 {
1165 /* If we were given a MULT, we must fix the constant
1166 as we're going to create the ASHIFT form. */
1167 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1168 if (GET_CODE (XEXP (x, 0)) == MULT)
1169 shift_val = exact_log2 (shift_val);
1170
1171 rtx reg1, reg2;
1172 reg1 = XEXP (x, 1);
1173 if (GET_CODE (reg1) != REG)
1174 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1175
1176 reg2 = XEXP (XEXP (x, 0), 0);
1177 if (GET_CODE (reg2) != REG)
1178 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1179
1180 return force_reg (Pmode,
1181 gen_rtx_PLUS (Pmode,
1182 gen_rtx_ASHIFT (Pmode, reg2,
1183 GEN_INT (shift_val)),
1184 reg1));
1185 }
1186
1187 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1188
1189 Only do so for floating point modes since this is more speculative
1190 and we lose if it's an integer store. */
1191 if (GET_CODE (x) == PLUS
1192 && GET_CODE (XEXP (x, 0)) == PLUS
1193 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1194 && (mode == SFmode || mode == DFmode))
1195 {
1196 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1197
1198 /* If we were given a MULT, we must fix the constant
1199 as we're going to create the ASHIFT form. */
1200 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1201 shift_val = exact_log2 (shift_val);
1202
1203 /* Try and figure out what to use as a base register. */
1204 rtx reg1, reg2, base, idx;
1205
1206 reg1 = XEXP (XEXP (x, 0), 1);
1207 reg2 = XEXP (x, 1);
1208 base = NULL_RTX;
1209 idx = NULL_RTX;
1210
1211 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1212 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1213 it's a base register below. */
1214 if (GET_CODE (reg1) != REG)
1215 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1216
1217 if (GET_CODE (reg2) != REG)
1218 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1219
1220 /* Figure out what the base and index are. */
1221
1222 if (GET_CODE (reg1) == REG
1223 && REG_POINTER (reg1))
1224 {
1225 base = reg1;
1226 idx = gen_rtx_PLUS (Pmode,
1227 gen_rtx_ASHIFT (Pmode,
1228 XEXP (XEXP (XEXP (x, 0), 0), 0),
1229 GEN_INT (shift_val)),
1230 XEXP (x, 1));
1231 }
1232 else if (GET_CODE (reg2) == REG
1233 && REG_POINTER (reg2))
1234 {
1235 base = reg2;
1236 idx = XEXP (x, 0);
1237 }
1238
1239 if (base == 0)
1240 return orig;
1241
1242 /* If the index adds a large constant, try to scale the
1243 constant so that it can be loaded with only one insn. */
1244 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1245 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1246 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1247 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1248 {
1249 /* Divide the CONST_INT by the scale factor, then add it to A. */
1250 int val = INTVAL (XEXP (idx, 1));
1251 val /= (1 << shift_val);
1252
1253 reg1 = XEXP (XEXP (idx, 0), 0);
1254 if (GET_CODE (reg1) != REG)
1255 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1256
1257 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1258
1259 /* We can now generate a simple scaled indexed address. */
1260 return
1261 force_reg
1262 (Pmode, gen_rtx_PLUS (Pmode,
1263 gen_rtx_ASHIFT (Pmode, reg1,
1264 GEN_INT (shift_val)),
1265 base));
1266 }
1267
1268 /* If B + C is still a valid base register, then add them. */
1269 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1270 && INTVAL (XEXP (idx, 1)) <= 4096
1271 && INTVAL (XEXP (idx, 1)) >= -4096)
1272 {
1273 rtx reg1, reg2;
1274
1275 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1276
1277 reg2 = XEXP (XEXP (idx, 0), 0);
1278 if (GET_CODE (reg2) != CONST_INT)
1279 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1280
1281 return force_reg (Pmode,
1282 gen_rtx_PLUS (Pmode,
1283 gen_rtx_ASHIFT (Pmode, reg2,
1284 GEN_INT (shift_val)),
1285 reg1));
1286 }
1287
1288 /* Get the index into a register, then add the base + index and
1289 return a register holding the result. */
1290
1291 /* First get A into a register. */
1292 reg1 = XEXP (XEXP (idx, 0), 0);
1293 if (GET_CODE (reg1) != REG)
1294 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1295
1296 /* And get B into a register. */
1297 reg2 = XEXP (idx, 1);
1298 if (GET_CODE (reg2) != REG)
1299 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1300
1301 reg1 = force_reg (Pmode,
1302 gen_rtx_PLUS (Pmode,
1303 gen_rtx_ASHIFT (Pmode, reg1,
1304 GEN_INT (shift_val)),
1305 reg2));
1306
1307 /* Add the result to our base register and return. */
1308 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1309
1310 }
1311
1312 /* Uh-oh. We might have an address for x[n-100000]. This needs
1313 special handling to avoid creating an indexed memory address
1314 with x-100000 as the base.
1315
1316 If the constant part is small enough, then it's still safe because
1317 there is a guard page at the beginning and end of the data segment.
1318
1319 Scaled references are common enough that we want to try and rearrange the
1320 terms so that we can use indexing for these addresses too. Only
1321 do the optimization for floatint point modes. */
1322
1323 if (GET_CODE (x) == PLUS
1324 && pa_symbolic_expression_p (XEXP (x, 1)))
1325 {
1326 /* Ugly. We modify things here so that the address offset specified
1327 by the index expression is computed first, then added to x to form
1328 the entire address. */
1329
1330 rtx regx1, regx2, regy1, regy2, y;
1331
1332 /* Strip off any CONST. */
1333 y = XEXP (x, 1);
1334 if (GET_CODE (y) == CONST)
1335 y = XEXP (y, 0);
1336
1337 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1338 {
1339 /* See if this looks like
1340 (plus (mult (reg) (mem_shadd_const))
1341 (const (plus (symbol_ref) (const_int))))
1342
1343 Where const_int is small. In that case the const
1344 expression is a valid pointer for indexing.
1345
1346 If const_int is big, but can be divided evenly by shadd_const
1347 and added to (reg). This allows more scaled indexed addresses. */
1348 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1349 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1350 && GET_CODE (XEXP (y, 1)) == CONST_INT
1351 && INTVAL (XEXP (y, 1)) >= -4096
1352 && INTVAL (XEXP (y, 1)) <= 4095)
1353 {
1354 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1355
1356 /* If we were given a MULT, we must fix the constant
1357 as we're going to create the ASHIFT form. */
1358 if (GET_CODE (XEXP (x, 0)) == MULT)
1359 shift_val = exact_log2 (shift_val);
1360
1361 rtx reg1, reg2;
1362
1363 reg1 = XEXP (x, 1);
1364 if (GET_CODE (reg1) != REG)
1365 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1366
1367 reg2 = XEXP (XEXP (x, 0), 0);
1368 if (GET_CODE (reg2) != REG)
1369 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1370
1371 return
1372 force_reg (Pmode,
1373 gen_rtx_PLUS (Pmode,
1374 gen_rtx_ASHIFT (Pmode,
1375 reg2,
1376 GEN_INT (shift_val)),
1377 reg1));
1378 }
1379 else if ((mode == DFmode || mode == SFmode)
1380 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1381 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1382 && GET_CODE (XEXP (y, 1)) == CONST_INT
1383 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1384 {
1385 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1386
1387 /* If we were given a MULT, we must fix the constant
1388 as we're going to create the ASHIFT form. */
1389 if (GET_CODE (XEXP (x, 0)) == MULT)
1390 shift_val = exact_log2 (shift_val);
1391
1392 regx1
1393 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1394 / INTVAL (XEXP (XEXP (x, 0), 1))));
1395 regx2 = XEXP (XEXP (x, 0), 0);
1396 if (GET_CODE (regx2) != REG)
1397 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1398 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1399 regx2, regx1));
1400 return
1401 force_reg (Pmode,
1402 gen_rtx_PLUS (Pmode,
1403 gen_rtx_ASHIFT (Pmode, regx2,
1404 GEN_INT (shift_val)),
1405 force_reg (Pmode, XEXP (y, 0))));
1406 }
1407 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1408 && INTVAL (XEXP (y, 1)) >= -4096
1409 && INTVAL (XEXP (y, 1)) <= 4095)
1410 {
1411 /* This is safe because of the guard page at the
1412 beginning and end of the data space. Just
1413 return the original address. */
1414 return orig;
1415 }
1416 else
1417 {
1418 /* Doesn't look like one we can optimize. */
1419 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1420 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1421 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1422 regx1 = force_reg (Pmode,
1423 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1424 regx1, regy2));
1425 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1426 }
1427 }
1428 }
1429
1430 return orig;
1431 }
1432
1433 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1434
1435 Compute extra cost of moving data between one register class
1436 and another.
1437
1438 Make moves from SAR so expensive they should never happen. We used to
1439 have 0xffff here, but that generates overflow in rare cases.
1440
1441 Copies involving a FP register and a non-FP register are relatively
1442 expensive because they must go through memory.
1443
1444 Other copies are reasonably cheap. */
1445
1446 static int
1447 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1448 reg_class_t from, reg_class_t to)
1449 {
1450 if (from == SHIFT_REGS)
1451 return 0x100;
1452 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1453 return 18;
1454 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1455 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1456 return 16;
1457 else
1458 return 2;
1459 }
1460
1461 /* For the HPPA, REG and REG+CONST is cost 0
1462 and addresses involving symbolic constants are cost 2.
1463
1464 PIC addresses are very expensive.
1465
1466 It is no coincidence that this has the same structure
1467 as pa_legitimate_address_p. */
1468
1469 static int
1470 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1471 addr_space_t as ATTRIBUTE_UNUSED,
1472 bool speed ATTRIBUTE_UNUSED)
1473 {
1474 switch (GET_CODE (X))
1475 {
1476 case REG:
1477 case PLUS:
1478 case LO_SUM:
1479 return 1;
1480 case HIGH:
1481 return 2;
1482 default:
1483 return 4;
1484 }
1485 }
1486
1487 /* Compute a (partial) cost for rtx X. Return true if the complete
1488 cost has been computed, and false if subexpressions should be
1489 scanned. In either case, *TOTAL contains the cost result. */
1490
1491 static bool
1492 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1493 int opno ATTRIBUTE_UNUSED,
1494 int *total, bool speed ATTRIBUTE_UNUSED)
1495 {
1496 int factor;
1497 int code = GET_CODE (x);
1498
1499 switch (code)
1500 {
1501 case CONST_INT:
1502 if (INTVAL (x) == 0)
1503 *total = 0;
1504 else if (INT_14_BITS (x))
1505 *total = 1;
1506 else
1507 *total = 2;
1508 return true;
1509
1510 case HIGH:
1511 *total = 2;
1512 return true;
1513
1514 case CONST:
1515 case LABEL_REF:
1516 case SYMBOL_REF:
1517 *total = 4;
1518 return true;
1519
1520 case CONST_DOUBLE:
1521 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1522 && outer_code != SET)
1523 *total = 0;
1524 else
1525 *total = 8;
1526 return true;
1527
1528 case MULT:
1529 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1530 {
1531 *total = COSTS_N_INSNS (3);
1532 return true;
1533 }
1534
1535 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1536 factor = GET_MODE_SIZE (mode) / 4;
1537 if (factor == 0)
1538 factor = 1;
1539
1540 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1541 *total = factor * factor * COSTS_N_INSNS (8);
1542 else
1543 *total = factor * factor * COSTS_N_INSNS (20);
1544 return true;
1545
1546 case DIV:
1547 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1548 {
1549 *total = COSTS_N_INSNS (14);
1550 return true;
1551 }
1552 /* FALLTHRU */
1553
1554 case UDIV:
1555 case MOD:
1556 case UMOD:
1557 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1558 factor = GET_MODE_SIZE (mode) / 4;
1559 if (factor == 0)
1560 factor = 1;
1561
1562 *total = factor * factor * COSTS_N_INSNS (60);
1563 return true;
1564
1565 case PLUS: /* this includes shNadd insns */
1566 case MINUS:
1567 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1568 {
1569 *total = COSTS_N_INSNS (3);
1570 return true;
1571 }
1572
1573 /* A size N times larger than UNITS_PER_WORD needs N times as
1574 many insns, taking N times as long. */
1575 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1576 if (factor == 0)
1577 factor = 1;
1578 *total = factor * COSTS_N_INSNS (1);
1579 return true;
1580
1581 case ASHIFT:
1582 case ASHIFTRT:
1583 case LSHIFTRT:
1584 *total = COSTS_N_INSNS (1);
1585 return true;
1586
1587 default:
1588 return false;
1589 }
1590 }
1591
1592 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1593 new rtx with the correct mode. */
1594 static inline rtx
1595 force_mode (machine_mode mode, rtx orig)
1596 {
1597 if (mode == GET_MODE (orig))
1598 return orig;
1599
1600 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1601
1602 return gen_rtx_REG (mode, REGNO (orig));
1603 }
1604
1605 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1606
1607 static bool
1608 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1609 {
1610 return tls_referenced_p (x);
1611 }
1612
1613 /* Emit insns to move operands[1] into operands[0].
1614
1615 Return 1 if we have written out everything that needs to be done to
1616 do the move. Otherwise, return 0 and the caller will emit the move
1617 normally.
1618
1619 Note SCRATCH_REG may not be in the proper mode depending on how it
1620 will be used. This routine is responsible for creating a new copy
1621 of SCRATCH_REG in the proper mode. */
1622
1623 int
1624 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1625 {
1626 register rtx operand0 = operands[0];
1627 register rtx operand1 = operands[1];
1628 register rtx tem;
1629
1630 /* We can only handle indexed addresses in the destination operand
1631 of floating point stores. Thus, we need to break out indexed
1632 addresses from the destination operand. */
1633 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1634 {
1635 gcc_assert (can_create_pseudo_p ());
1636
1637 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1638 operand0 = replace_equiv_address (operand0, tem);
1639 }
1640
1641 /* On targets with non-equivalent space registers, break out unscaled
1642 indexed addresses from the source operand before the final CSE.
1643 We have to do this because the REG_POINTER flag is not correctly
1644 carried through various optimization passes and CSE may substitute
1645 a pseudo without the pointer set for one with the pointer set. As
1646 a result, we loose various opportunities to create insns with
1647 unscaled indexed addresses. */
1648 if (!TARGET_NO_SPACE_REGS
1649 && !cse_not_expected
1650 && GET_CODE (operand1) == MEM
1651 && GET_CODE (XEXP (operand1, 0)) == PLUS
1652 && REG_P (XEXP (XEXP (operand1, 0), 0))
1653 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1654 operand1
1655 = replace_equiv_address (operand1,
1656 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1657
1658 if (scratch_reg
1659 && reload_in_progress && GET_CODE (operand0) == REG
1660 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1661 operand0 = reg_equiv_mem (REGNO (operand0));
1662 else if (scratch_reg
1663 && reload_in_progress && GET_CODE (operand0) == SUBREG
1664 && GET_CODE (SUBREG_REG (operand0)) == REG
1665 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1666 {
1667 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1668 the code which tracks sets/uses for delete_output_reload. */
1669 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1670 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1671 SUBREG_BYTE (operand0));
1672 operand0 = alter_subreg (&temp, true);
1673 }
1674
1675 if (scratch_reg
1676 && reload_in_progress && GET_CODE (operand1) == REG
1677 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1678 operand1 = reg_equiv_mem (REGNO (operand1));
1679 else if (scratch_reg
1680 && reload_in_progress && GET_CODE (operand1) == SUBREG
1681 && GET_CODE (SUBREG_REG (operand1)) == REG
1682 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1683 {
1684 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1685 the code which tracks sets/uses for delete_output_reload. */
1686 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1687 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1688 SUBREG_BYTE (operand1));
1689 operand1 = alter_subreg (&temp, true);
1690 }
1691
1692 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1693 && ((tem = find_replacement (&XEXP (operand0, 0)))
1694 != XEXP (operand0, 0)))
1695 operand0 = replace_equiv_address (operand0, tem);
1696
1697 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1698 && ((tem = find_replacement (&XEXP (operand1, 0)))
1699 != XEXP (operand1, 0)))
1700 operand1 = replace_equiv_address (operand1, tem);
1701
1702 /* Handle secondary reloads for loads/stores of FP registers from
1703 REG+D addresses where D does not fit in 5 or 14 bits, including
1704 (subreg (mem (addr))) cases, and reloads for other unsupported
1705 memory operands. */
1706 if (scratch_reg
1707 && FP_REG_P (operand0)
1708 && (MEM_P (operand1)
1709 || (GET_CODE (operand1) == SUBREG
1710 && MEM_P (XEXP (operand1, 0)))))
1711 {
1712 rtx op1 = operand1;
1713
1714 if (GET_CODE (op1) == SUBREG)
1715 op1 = XEXP (op1, 0);
1716
1717 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1718 {
1719 if (!(TARGET_PA_20
1720 && !TARGET_ELF32
1721 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1722 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1723 {
1724 /* SCRATCH_REG will hold an address and maybe the actual data.
1725 We want it in WORD_MODE regardless of what mode it was
1726 originally given to us. */
1727 scratch_reg = force_mode (word_mode, scratch_reg);
1728
1729 /* D might not fit in 14 bits either; for such cases load D
1730 into scratch reg. */
1731 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1732 {
1733 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1734 emit_move_insn (scratch_reg,
1735 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1736 Pmode,
1737 XEXP (XEXP (op1, 0), 0),
1738 scratch_reg));
1739 }
1740 else
1741 emit_move_insn (scratch_reg, XEXP (op1, 0));
1742 emit_insn (gen_rtx_SET (operand0,
1743 replace_equiv_address (op1, scratch_reg)));
1744 return 1;
1745 }
1746 }
1747 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1748 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1749 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1750 {
1751 /* Load memory address into SCRATCH_REG. */
1752 scratch_reg = force_mode (word_mode, scratch_reg);
1753 emit_move_insn (scratch_reg, XEXP (op1, 0));
1754 emit_insn (gen_rtx_SET (operand0,
1755 replace_equiv_address (op1, scratch_reg)));
1756 return 1;
1757 }
1758 }
1759 else if (scratch_reg
1760 && FP_REG_P (operand1)
1761 && (MEM_P (operand0)
1762 || (GET_CODE (operand0) == SUBREG
1763 && MEM_P (XEXP (operand0, 0)))))
1764 {
1765 rtx op0 = operand0;
1766
1767 if (GET_CODE (op0) == SUBREG)
1768 op0 = XEXP (op0, 0);
1769
1770 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1771 {
1772 if (!(TARGET_PA_20
1773 && !TARGET_ELF32
1774 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1775 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1776 {
1777 /* SCRATCH_REG will hold an address and maybe the actual data.
1778 We want it in WORD_MODE regardless of what mode it was
1779 originally given to us. */
1780 scratch_reg = force_mode (word_mode, scratch_reg);
1781
1782 /* D might not fit in 14 bits either; for such cases load D
1783 into scratch reg. */
1784 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1785 {
1786 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1787 emit_move_insn (scratch_reg,
1788 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1789 Pmode,
1790 XEXP (XEXP (op0, 0), 0),
1791 scratch_reg));
1792 }
1793 else
1794 emit_move_insn (scratch_reg, XEXP (op0, 0));
1795 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1796 operand1));
1797 return 1;
1798 }
1799 }
1800 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1801 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1802 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1803 {
1804 /* Load memory address into SCRATCH_REG. */
1805 scratch_reg = force_mode (word_mode, scratch_reg);
1806 emit_move_insn (scratch_reg, XEXP (op0, 0));
1807 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1808 operand1));
1809 return 1;
1810 }
1811 }
1812 /* Handle secondary reloads for loads of FP registers from constant
1813 expressions by forcing the constant into memory. For the most part,
1814 this is only necessary for SImode and DImode.
1815
1816 Use scratch_reg to hold the address of the memory location. */
1817 else if (scratch_reg
1818 && CONSTANT_P (operand1)
1819 && FP_REG_P (operand0))
1820 {
1821 rtx const_mem, xoperands[2];
1822
1823 if (operand1 == CONST0_RTX (mode))
1824 {
1825 emit_insn (gen_rtx_SET (operand0, operand1));
1826 return 1;
1827 }
1828
1829 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1830 it in WORD_MODE regardless of what mode it was originally given
1831 to us. */
1832 scratch_reg = force_mode (word_mode, scratch_reg);
1833
1834 /* Force the constant into memory and put the address of the
1835 memory location into scratch_reg. */
1836 const_mem = force_const_mem (mode, operand1);
1837 xoperands[0] = scratch_reg;
1838 xoperands[1] = XEXP (const_mem, 0);
1839 pa_emit_move_sequence (xoperands, Pmode, 0);
1840
1841 /* Now load the destination register. */
1842 emit_insn (gen_rtx_SET (operand0,
1843 replace_equiv_address (const_mem, scratch_reg)));
1844 return 1;
1845 }
1846 /* Handle secondary reloads for SAR. These occur when trying to load
1847 the SAR from memory or a constant. */
1848 else if (scratch_reg
1849 && GET_CODE (operand0) == REG
1850 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1851 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1852 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1853 {
1854 /* D might not fit in 14 bits either; for such cases load D into
1855 scratch reg. */
1856 if (GET_CODE (operand1) == MEM
1857 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1858 {
1859 /* We are reloading the address into the scratch register, so we
1860 want to make sure the scratch register is a full register. */
1861 scratch_reg = force_mode (word_mode, scratch_reg);
1862
1863 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1864 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1865 0)),
1866 Pmode,
1867 XEXP (XEXP (operand1, 0),
1868 0),
1869 scratch_reg));
1870
1871 /* Now we are going to load the scratch register from memory,
1872 we want to load it in the same width as the original MEM,
1873 which must be the same as the width of the ultimate destination,
1874 OPERAND0. */
1875 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1876
1877 emit_move_insn (scratch_reg,
1878 replace_equiv_address (operand1, scratch_reg));
1879 }
1880 else
1881 {
1882 /* We want to load the scratch register using the same mode as
1883 the ultimate destination. */
1884 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1885
1886 emit_move_insn (scratch_reg, operand1);
1887 }
1888
1889 /* And emit the insn to set the ultimate destination. We know that
1890 the scratch register has the same mode as the destination at this
1891 point. */
1892 emit_move_insn (operand0, scratch_reg);
1893 return 1;
1894 }
1895
1896 /* Handle the most common case: storing into a register. */
1897 if (register_operand (operand0, mode))
1898 {
1899 /* Legitimize TLS symbol references. This happens for references
1900 that aren't a legitimate constant. */
1901 if (PA_SYMBOL_REF_TLS_P (operand1))
1902 operand1 = legitimize_tls_address (operand1);
1903
1904 if (register_operand (operand1, mode)
1905 || (GET_CODE (operand1) == CONST_INT
1906 && pa_cint_ok_for_move (UINTVAL (operand1)))
1907 || (operand1 == CONST0_RTX (mode))
1908 || (GET_CODE (operand1) == HIGH
1909 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1910 /* Only `general_operands' can come here, so MEM is ok. */
1911 || GET_CODE (operand1) == MEM)
1912 {
1913 /* Various sets are created during RTL generation which don't
1914 have the REG_POINTER flag correctly set. After the CSE pass,
1915 instruction recognition can fail if we don't consistently
1916 set this flag when performing register copies. This should
1917 also improve the opportunities for creating insns that use
1918 unscaled indexing. */
1919 if (REG_P (operand0) && REG_P (operand1))
1920 {
1921 if (REG_POINTER (operand1)
1922 && !REG_POINTER (operand0)
1923 && !HARD_REGISTER_P (operand0))
1924 copy_reg_pointer (operand0, operand1);
1925 }
1926
1927 /* When MEMs are broken out, the REG_POINTER flag doesn't
1928 get set. In some cases, we can set the REG_POINTER flag
1929 from the declaration for the MEM. */
1930 if (REG_P (operand0)
1931 && GET_CODE (operand1) == MEM
1932 && !REG_POINTER (operand0))
1933 {
1934 tree decl = MEM_EXPR (operand1);
1935
1936 /* Set the register pointer flag and register alignment
1937 if the declaration for this memory reference is a
1938 pointer type. */
1939 if (decl)
1940 {
1941 tree type;
1942
1943 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1944 tree operand 1. */
1945 if (TREE_CODE (decl) == COMPONENT_REF)
1946 decl = TREE_OPERAND (decl, 1);
1947
1948 type = TREE_TYPE (decl);
1949 type = strip_array_types (type);
1950
1951 if (POINTER_TYPE_P (type))
1952 mark_reg_pointer (operand0, BITS_PER_UNIT);
1953 }
1954 }
1955
1956 emit_insn (gen_rtx_SET (operand0, operand1));
1957 return 1;
1958 }
1959 }
1960 else if (GET_CODE (operand0) == MEM)
1961 {
1962 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1963 && !(reload_in_progress || reload_completed))
1964 {
1965 rtx temp = gen_reg_rtx (DFmode);
1966
1967 emit_insn (gen_rtx_SET (temp, operand1));
1968 emit_insn (gen_rtx_SET (operand0, temp));
1969 return 1;
1970 }
1971 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1972 {
1973 /* Run this case quickly. */
1974 emit_insn (gen_rtx_SET (operand0, operand1));
1975 return 1;
1976 }
1977 if (! (reload_in_progress || reload_completed))
1978 {
1979 operands[0] = validize_mem (operand0);
1980 operands[1] = operand1 = force_reg (mode, operand1);
1981 }
1982 }
1983
1984 /* Simplify the source if we need to.
1985 Note we do have to handle function labels here, even though we do
1986 not consider them legitimate constants. Loop optimizations can
1987 call the emit_move_xxx with one as a source. */
1988 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1989 || (GET_CODE (operand1) == HIGH
1990 && symbolic_operand (XEXP (operand1, 0), mode))
1991 || function_label_operand (operand1, VOIDmode)
1992 || tls_referenced_p (operand1))
1993 {
1994 int ishighonly = 0;
1995
1996 if (GET_CODE (operand1) == HIGH)
1997 {
1998 ishighonly = 1;
1999 operand1 = XEXP (operand1, 0);
2000 }
2001 if (symbolic_operand (operand1, mode))
2002 {
2003 /* Argh. The assembler and linker can't handle arithmetic
2004 involving plabels.
2005
2006 So we force the plabel into memory, load operand0 from
2007 the memory location, then add in the constant part. */
2008 if ((GET_CODE (operand1) == CONST
2009 && GET_CODE (XEXP (operand1, 0)) == PLUS
2010 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2011 VOIDmode))
2012 || function_label_operand (operand1, VOIDmode))
2013 {
2014 rtx temp, const_part;
2015
2016 /* Figure out what (if any) scratch register to use. */
2017 if (reload_in_progress || reload_completed)
2018 {
2019 scratch_reg = scratch_reg ? scratch_reg : operand0;
2020 /* SCRATCH_REG will hold an address and maybe the actual
2021 data. We want it in WORD_MODE regardless of what mode it
2022 was originally given to us. */
2023 scratch_reg = force_mode (word_mode, scratch_reg);
2024 }
2025 else if (flag_pic)
2026 scratch_reg = gen_reg_rtx (Pmode);
2027
2028 if (GET_CODE (operand1) == CONST)
2029 {
2030 /* Save away the constant part of the expression. */
2031 const_part = XEXP (XEXP (operand1, 0), 1);
2032 gcc_assert (GET_CODE (const_part) == CONST_INT);
2033
2034 /* Force the function label into memory. */
2035 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2036 }
2037 else
2038 {
2039 /* No constant part. */
2040 const_part = NULL_RTX;
2041
2042 /* Force the function label into memory. */
2043 temp = force_const_mem (mode, operand1);
2044 }
2045
2046
2047 /* Get the address of the memory location. PIC-ify it if
2048 necessary. */
2049 temp = XEXP (temp, 0);
2050 if (flag_pic)
2051 temp = legitimize_pic_address (temp, mode, scratch_reg);
2052
2053 /* Put the address of the memory location into our destination
2054 register. */
2055 operands[1] = temp;
2056 pa_emit_move_sequence (operands, mode, scratch_reg);
2057
2058 /* Now load from the memory location into our destination
2059 register. */
2060 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2061 pa_emit_move_sequence (operands, mode, scratch_reg);
2062
2063 /* And add back in the constant part. */
2064 if (const_part != NULL_RTX)
2065 expand_inc (operand0, const_part);
2066
2067 return 1;
2068 }
2069
2070 if (flag_pic)
2071 {
2072 rtx_insn *insn;
2073 rtx temp;
2074
2075 if (reload_in_progress || reload_completed)
2076 {
2077 temp = scratch_reg ? scratch_reg : operand0;
2078 /* TEMP will hold an address and maybe the actual
2079 data. We want it in WORD_MODE regardless of what mode it
2080 was originally given to us. */
2081 temp = force_mode (word_mode, temp);
2082 }
2083 else
2084 temp = gen_reg_rtx (Pmode);
2085
2086 /* Force (const (plus (symbol) (const_int))) to memory
2087 if the const_int will not fit in 14 bits. Although
2088 this requires a relocation, the instruction sequence
2089 needed to load the value is shorter. */
2090 if (GET_CODE (operand1) == CONST
2091 && GET_CODE (XEXP (operand1, 0)) == PLUS
2092 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2093 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2094 {
2095 rtx x, m = force_const_mem (mode, operand1);
2096
2097 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2098 x = replace_equiv_address (m, x);
2099 insn = emit_move_insn (operand0, x);
2100 }
2101 else
2102 {
2103 operands[1] = legitimize_pic_address (operand1, mode, temp);
2104 if (REG_P (operand0) && REG_P (operands[1]))
2105 copy_reg_pointer (operand0, operands[1]);
2106 insn = emit_move_insn (operand0, operands[1]);
2107 }
2108
2109 /* Put a REG_EQUAL note on this insn. */
2110 set_unique_reg_note (insn, REG_EQUAL, operand1);
2111 }
2112 /* On the HPPA, references to data space are supposed to use dp,
2113 register 27, but showing it in the RTL inhibits various cse
2114 and loop optimizations. */
2115 else
2116 {
2117 rtx temp, set;
2118
2119 if (reload_in_progress || reload_completed)
2120 {
2121 temp = scratch_reg ? scratch_reg : operand0;
2122 /* TEMP will hold an address and maybe the actual
2123 data. We want it in WORD_MODE regardless of what mode it
2124 was originally given to us. */
2125 temp = force_mode (word_mode, temp);
2126 }
2127 else
2128 temp = gen_reg_rtx (mode);
2129
2130 /* Loading a SYMBOL_REF into a register makes that register
2131 safe to be used as the base in an indexed address.
2132
2133 Don't mark hard registers though. That loses. */
2134 if (GET_CODE (operand0) == REG
2135 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2136 mark_reg_pointer (operand0, BITS_PER_UNIT);
2137 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2138 mark_reg_pointer (temp, BITS_PER_UNIT);
2139
2140 if (ishighonly)
2141 set = gen_rtx_SET (operand0, temp);
2142 else
2143 set = gen_rtx_SET (operand0,
2144 gen_rtx_LO_SUM (mode, temp, operand1));
2145
2146 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2147 emit_insn (set);
2148
2149 }
2150 return 1;
2151 }
2152 else if (tls_referenced_p (operand1))
2153 {
2154 rtx tmp = operand1;
2155 rtx addend = NULL;
2156
2157 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2158 {
2159 addend = XEXP (XEXP (tmp, 0), 1);
2160 tmp = XEXP (XEXP (tmp, 0), 0);
2161 }
2162
2163 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2164 tmp = legitimize_tls_address (tmp);
2165 if (addend)
2166 {
2167 tmp = gen_rtx_PLUS (mode, tmp, addend);
2168 tmp = force_operand (tmp, operands[0]);
2169 }
2170 operands[1] = tmp;
2171 }
2172 else if (GET_CODE (operand1) != CONST_INT
2173 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2174 {
2175 rtx temp;
2176 rtx_insn *insn;
2177 rtx op1 = operand1;
2178 HOST_WIDE_INT value = 0;
2179 HOST_WIDE_INT insv = 0;
2180 int insert = 0;
2181
2182 if (GET_CODE (operand1) == CONST_INT)
2183 value = INTVAL (operand1);
2184
2185 if (TARGET_64BIT
2186 && GET_CODE (operand1) == CONST_INT
2187 && HOST_BITS_PER_WIDE_INT > 32
2188 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2189 {
2190 HOST_WIDE_INT nval;
2191
2192 /* Extract the low order 32 bits of the value and sign extend.
2193 If the new value is the same as the original value, we can
2194 can use the original value as-is. If the new value is
2195 different, we use it and insert the most-significant 32-bits
2196 of the original value into the final result. */
2197 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2198 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2199 if (value != nval)
2200 {
2201 #if HOST_BITS_PER_WIDE_INT > 32
2202 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2203 #endif
2204 insert = 1;
2205 value = nval;
2206 operand1 = GEN_INT (nval);
2207 }
2208 }
2209
2210 if (reload_in_progress || reload_completed)
2211 temp = scratch_reg ? scratch_reg : operand0;
2212 else
2213 temp = gen_reg_rtx (mode);
2214
2215 /* We don't directly split DImode constants on 32-bit targets
2216 because PLUS uses an 11-bit immediate and the insn sequence
2217 generated is not as efficient as the one using HIGH/LO_SUM. */
2218 if (GET_CODE (operand1) == CONST_INT
2219 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2220 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2221 && !insert)
2222 {
2223 /* Directly break constant into high and low parts. This
2224 provides better optimization opportunities because various
2225 passes recognize constants split with PLUS but not LO_SUM.
2226 We use a 14-bit signed low part except when the addition
2227 of 0x4000 to the high part might change the sign of the
2228 high part. */
2229 HOST_WIDE_INT low = value & 0x3fff;
2230 HOST_WIDE_INT high = value & ~ 0x3fff;
2231
2232 if (low >= 0x2000)
2233 {
2234 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2235 high += 0x2000;
2236 else
2237 high += 0x4000;
2238 }
2239
2240 low = value - high;
2241
2242 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2243 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2244 }
2245 else
2246 {
2247 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2248 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2249 }
2250
2251 insn = emit_move_insn (operands[0], operands[1]);
2252
2253 /* Now insert the most significant 32 bits of the value
2254 into the register. When we don't have a second register
2255 available, it could take up to nine instructions to load
2256 a 64-bit integer constant. Prior to reload, we force
2257 constants that would take more than three instructions
2258 to load to the constant pool. During and after reload,
2259 we have to handle all possible values. */
2260 if (insert)
2261 {
2262 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2263 register and the value to be inserted is outside the
2264 range that can be loaded with three depdi instructions. */
2265 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2266 {
2267 operand1 = GEN_INT (insv);
2268
2269 emit_insn (gen_rtx_SET (temp,
2270 gen_rtx_HIGH (mode, operand1)));
2271 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2272 if (mode == DImode)
2273 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2274 const0_rtx, temp));
2275 else
2276 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2277 const0_rtx, temp));
2278 }
2279 else
2280 {
2281 int len = 5, pos = 27;
2282
2283 /* Insert the bits using the depdi instruction. */
2284 while (pos >= 0)
2285 {
2286 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2287 HOST_WIDE_INT sign = v5 < 0;
2288
2289 /* Left extend the insertion. */
2290 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2291 while (pos > 0 && (insv & 1) == sign)
2292 {
2293 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2294 len += 1;
2295 pos -= 1;
2296 }
2297
2298 if (mode == DImode)
2299 insn = emit_insn (gen_insvdi (operand0,
2300 GEN_INT (len),
2301 GEN_INT (pos),
2302 GEN_INT (v5)));
2303 else
2304 insn = emit_insn (gen_insvsi (operand0,
2305 GEN_INT (len),
2306 GEN_INT (pos),
2307 GEN_INT (v5)));
2308
2309 len = pos > 0 && pos < 5 ? pos : 5;
2310 pos -= len;
2311 }
2312 }
2313 }
2314
2315 set_unique_reg_note (insn, REG_EQUAL, op1);
2316
2317 return 1;
2318 }
2319 }
2320 /* Now have insn-emit do whatever it normally does. */
2321 return 0;
2322 }
2323
2324 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2325 it will need a link/runtime reloc). */
2326
2327 int
2328 pa_reloc_needed (tree exp)
2329 {
2330 int reloc = 0;
2331
2332 switch (TREE_CODE (exp))
2333 {
2334 case ADDR_EXPR:
2335 return 1;
2336
2337 case POINTER_PLUS_EXPR:
2338 case PLUS_EXPR:
2339 case MINUS_EXPR:
2340 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2341 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2342 break;
2343
2344 CASE_CONVERT:
2345 case NON_LVALUE_EXPR:
2346 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2347 break;
2348
2349 case CONSTRUCTOR:
2350 {
2351 tree value;
2352 unsigned HOST_WIDE_INT ix;
2353
2354 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2355 if (value)
2356 reloc |= pa_reloc_needed (value);
2357 }
2358 break;
2359
2360 case ERROR_MARK:
2361 break;
2362
2363 default:
2364 break;
2365 }
2366 return reloc;
2367 }
2368
2369 \f
2370 /* Return the best assembler insn template
2371 for moving operands[1] into operands[0] as a fullword. */
2372 const char *
2373 pa_singlemove_string (rtx *operands)
2374 {
2375 HOST_WIDE_INT intval;
2376
2377 if (GET_CODE (operands[0]) == MEM)
2378 return "stw %r1,%0";
2379 if (GET_CODE (operands[1]) == MEM)
2380 return "ldw %1,%0";
2381 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2382 {
2383 long i;
2384
2385 gcc_assert (GET_MODE (operands[1]) == SFmode);
2386
2387 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2388 bit pattern. */
2389 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2390
2391 operands[1] = GEN_INT (i);
2392 /* Fall through to CONST_INT case. */
2393 }
2394 if (GET_CODE (operands[1]) == CONST_INT)
2395 {
2396 intval = INTVAL (operands[1]);
2397
2398 if (VAL_14_BITS_P (intval))
2399 return "ldi %1,%0";
2400 else if ((intval & 0x7ff) == 0)
2401 return "ldil L'%1,%0";
2402 else if (pa_zdepi_cint_p (intval))
2403 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2404 else
2405 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2406 }
2407 return "copy %1,%0";
2408 }
2409 \f
2410
2411 /* Compute position (in OP[1]) and width (in OP[2])
2412 useful for copying IMM to a register using the zdepi
2413 instructions. Store the immediate value to insert in OP[0]. */
2414 static void
2415 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2416 {
2417 int lsb, len;
2418
2419 /* Find the least significant set bit in IMM. */
2420 for (lsb = 0; lsb < 32; lsb++)
2421 {
2422 if ((imm & 1) != 0)
2423 break;
2424 imm >>= 1;
2425 }
2426
2427 /* Choose variants based on *sign* of the 5-bit field. */
2428 if ((imm & 0x10) == 0)
2429 len = (lsb <= 28) ? 4 : 32 - lsb;
2430 else
2431 {
2432 /* Find the width of the bitstring in IMM. */
2433 for (len = 5; len < 32 - lsb; len++)
2434 {
2435 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2436 break;
2437 }
2438
2439 /* Sign extend IMM as a 5-bit value. */
2440 imm = (imm & 0xf) - 0x10;
2441 }
2442
2443 op[0] = imm;
2444 op[1] = 31 - lsb;
2445 op[2] = len;
2446 }
2447
2448 /* Compute position (in OP[1]) and width (in OP[2])
2449 useful for copying IMM to a register using the depdi,z
2450 instructions. Store the immediate value to insert in OP[0]. */
2451
2452 static void
2453 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2454 {
2455 int lsb, len, maxlen;
2456
2457 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2458
2459 /* Find the least significant set bit in IMM. */
2460 for (lsb = 0; lsb < maxlen; lsb++)
2461 {
2462 if ((imm & 1) != 0)
2463 break;
2464 imm >>= 1;
2465 }
2466
2467 /* Choose variants based on *sign* of the 5-bit field. */
2468 if ((imm & 0x10) == 0)
2469 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2470 else
2471 {
2472 /* Find the width of the bitstring in IMM. */
2473 for (len = 5; len < maxlen - lsb; len++)
2474 {
2475 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2476 break;
2477 }
2478
2479 /* Extend length if host is narrow and IMM is negative. */
2480 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2481 len += 32;
2482
2483 /* Sign extend IMM as a 5-bit value. */
2484 imm = (imm & 0xf) - 0x10;
2485 }
2486
2487 op[0] = imm;
2488 op[1] = 63 - lsb;
2489 op[2] = len;
2490 }
2491
2492 /* Output assembler code to perform a doubleword move insn
2493 with operands OPERANDS. */
2494
2495 const char *
2496 pa_output_move_double (rtx *operands)
2497 {
2498 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2499 rtx latehalf[2];
2500 rtx addreg0 = 0, addreg1 = 0;
2501 int highonly = 0;
2502
2503 /* First classify both operands. */
2504
2505 if (REG_P (operands[0]))
2506 optype0 = REGOP;
2507 else if (offsettable_memref_p (operands[0]))
2508 optype0 = OFFSOP;
2509 else if (GET_CODE (operands[0]) == MEM)
2510 optype0 = MEMOP;
2511 else
2512 optype0 = RNDOP;
2513
2514 if (REG_P (operands[1]))
2515 optype1 = REGOP;
2516 else if (CONSTANT_P (operands[1]))
2517 optype1 = CNSTOP;
2518 else if (offsettable_memref_p (operands[1]))
2519 optype1 = OFFSOP;
2520 else if (GET_CODE (operands[1]) == MEM)
2521 optype1 = MEMOP;
2522 else
2523 optype1 = RNDOP;
2524
2525 /* Check for the cases that the operand constraints are not
2526 supposed to allow to happen. */
2527 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2528
2529 /* Handle copies between general and floating registers. */
2530
2531 if (optype0 == REGOP && optype1 == REGOP
2532 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2533 {
2534 if (FP_REG_P (operands[0]))
2535 {
2536 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2537 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2538 return "{fldds|fldd} -16(%%sp),%0";
2539 }
2540 else
2541 {
2542 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2543 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2544 return "{ldws|ldw} -12(%%sp),%R0";
2545 }
2546 }
2547
2548 /* Handle auto decrementing and incrementing loads and stores
2549 specifically, since the structure of the function doesn't work
2550 for them without major modification. Do it better when we learn
2551 this port about the general inc/dec addressing of PA.
2552 (This was written by tege. Chide him if it doesn't work.) */
2553
2554 if (optype0 == MEMOP)
2555 {
2556 /* We have to output the address syntax ourselves, since print_operand
2557 doesn't deal with the addresses we want to use. Fix this later. */
2558
2559 rtx addr = XEXP (operands[0], 0);
2560 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2561 {
2562 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2563
2564 operands[0] = XEXP (addr, 0);
2565 gcc_assert (GET_CODE (operands[1]) == REG
2566 && GET_CODE (operands[0]) == REG);
2567
2568 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2569
2570 /* No overlap between high target register and address
2571 register. (We do this in a non-obvious way to
2572 save a register file writeback) */
2573 if (GET_CODE (addr) == POST_INC)
2574 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2575 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2576 }
2577 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2578 {
2579 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2580
2581 operands[0] = XEXP (addr, 0);
2582 gcc_assert (GET_CODE (operands[1]) == REG
2583 && GET_CODE (operands[0]) == REG);
2584
2585 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2586 /* No overlap between high target register and address
2587 register. (We do this in a non-obvious way to save a
2588 register file writeback) */
2589 if (GET_CODE (addr) == PRE_INC)
2590 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2591 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2592 }
2593 }
2594 if (optype1 == MEMOP)
2595 {
2596 /* We have to output the address syntax ourselves, since print_operand
2597 doesn't deal with the addresses we want to use. Fix this later. */
2598
2599 rtx addr = XEXP (operands[1], 0);
2600 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2601 {
2602 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2603
2604 operands[1] = XEXP (addr, 0);
2605 gcc_assert (GET_CODE (operands[0]) == REG
2606 && GET_CODE (operands[1]) == REG);
2607
2608 if (!reg_overlap_mentioned_p (high_reg, addr))
2609 {
2610 /* No overlap between high target register and address
2611 register. (We do this in a non-obvious way to
2612 save a register file writeback) */
2613 if (GET_CODE (addr) == POST_INC)
2614 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2615 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2616 }
2617 else
2618 {
2619 /* This is an undefined situation. We should load into the
2620 address register *and* update that register. Probably
2621 we don't need to handle this at all. */
2622 if (GET_CODE (addr) == POST_INC)
2623 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2624 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2625 }
2626 }
2627 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2628 {
2629 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2630
2631 operands[1] = XEXP (addr, 0);
2632 gcc_assert (GET_CODE (operands[0]) == REG
2633 && GET_CODE (operands[1]) == REG);
2634
2635 if (!reg_overlap_mentioned_p (high_reg, addr))
2636 {
2637 /* No overlap between high target register and address
2638 register. (We do this in a non-obvious way to
2639 save a register file writeback) */
2640 if (GET_CODE (addr) == PRE_INC)
2641 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2642 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2643 }
2644 else
2645 {
2646 /* This is an undefined situation. We should load into the
2647 address register *and* update that register. Probably
2648 we don't need to handle this at all. */
2649 if (GET_CODE (addr) == PRE_INC)
2650 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2651 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2652 }
2653 }
2654 else if (GET_CODE (addr) == PLUS
2655 && GET_CODE (XEXP (addr, 0)) == MULT)
2656 {
2657 rtx xoperands[4];
2658
2659 /* Load address into left half of destination register. */
2660 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2661 xoperands[1] = XEXP (addr, 1);
2662 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2663 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2664 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2665 xoperands);
2666 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2667 }
2668 else if (GET_CODE (addr) == PLUS
2669 && REG_P (XEXP (addr, 0))
2670 && REG_P (XEXP (addr, 1)))
2671 {
2672 rtx xoperands[3];
2673
2674 /* Load address into left half of destination register. */
2675 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2676 xoperands[1] = XEXP (addr, 0);
2677 xoperands[2] = XEXP (addr, 1);
2678 output_asm_insn ("{addl|add,l} %1,%2,%0",
2679 xoperands);
2680 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2681 }
2682 }
2683
2684 /* If an operand is an unoffsettable memory ref, find a register
2685 we can increment temporarily to make it refer to the second word. */
2686
2687 if (optype0 == MEMOP)
2688 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2689
2690 if (optype1 == MEMOP)
2691 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2692
2693 /* Ok, we can do one word at a time.
2694 Normally we do the low-numbered word first.
2695
2696 In either case, set up in LATEHALF the operands to use
2697 for the high-numbered word and in some cases alter the
2698 operands in OPERANDS to be suitable for the low-numbered word. */
2699
2700 if (optype0 == REGOP)
2701 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2702 else if (optype0 == OFFSOP)
2703 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2704 else
2705 latehalf[0] = operands[0];
2706
2707 if (optype1 == REGOP)
2708 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2709 else if (optype1 == OFFSOP)
2710 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2711 else if (optype1 == CNSTOP)
2712 {
2713 if (GET_CODE (operands[1]) == HIGH)
2714 {
2715 operands[1] = XEXP (operands[1], 0);
2716 highonly = 1;
2717 }
2718 split_double (operands[1], &operands[1], &latehalf[1]);
2719 }
2720 else
2721 latehalf[1] = operands[1];
2722
2723 /* If the first move would clobber the source of the second one,
2724 do them in the other order.
2725
2726 This can happen in two cases:
2727
2728 mem -> register where the first half of the destination register
2729 is the same register used in the memory's address. Reload
2730 can create such insns.
2731
2732 mem in this case will be either register indirect or register
2733 indirect plus a valid offset.
2734
2735 register -> register move where REGNO(dst) == REGNO(src + 1)
2736 someone (Tim/Tege?) claimed this can happen for parameter loads.
2737
2738 Handle mem -> register case first. */
2739 if (optype0 == REGOP
2740 && (optype1 == MEMOP || optype1 == OFFSOP)
2741 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2742 {
2743 /* Do the late half first. */
2744 if (addreg1)
2745 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2746 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2747
2748 /* Then clobber. */
2749 if (addreg1)
2750 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2751 return pa_singlemove_string (operands);
2752 }
2753
2754 /* Now handle register -> register case. */
2755 if (optype0 == REGOP && optype1 == REGOP
2756 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2757 {
2758 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2759 return pa_singlemove_string (operands);
2760 }
2761
2762 /* Normal case: do the two words, low-numbered first. */
2763
2764 output_asm_insn (pa_singlemove_string (operands), operands);
2765
2766 /* Make any unoffsettable addresses point at high-numbered word. */
2767 if (addreg0)
2768 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2769 if (addreg1)
2770 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2771
2772 /* Do high-numbered word. */
2773 if (highonly)
2774 output_asm_insn ("ldil L'%1,%0", latehalf);
2775 else
2776 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2777
2778 /* Undo the adds we just did. */
2779 if (addreg0)
2780 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2781 if (addreg1)
2782 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2783
2784 return "";
2785 }
2786 \f
2787 const char *
2788 pa_output_fp_move_double (rtx *operands)
2789 {
2790 if (FP_REG_P (operands[0]))
2791 {
2792 if (FP_REG_P (operands[1])
2793 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2794 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2795 else
2796 output_asm_insn ("fldd%F1 %1,%0", operands);
2797 }
2798 else if (FP_REG_P (operands[1]))
2799 {
2800 output_asm_insn ("fstd%F0 %1,%0", operands);
2801 }
2802 else
2803 {
2804 rtx xoperands[2];
2805
2806 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2807
2808 /* This is a pain. You have to be prepared to deal with an
2809 arbitrary address here including pre/post increment/decrement.
2810
2811 so avoid this in the MD. */
2812 gcc_assert (GET_CODE (operands[0]) == REG);
2813
2814 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2815 xoperands[0] = operands[0];
2816 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2817 }
2818 return "";
2819 }
2820 \f
2821 /* Return a REG that occurs in ADDR with coefficient 1.
2822 ADDR can be effectively incremented by incrementing REG. */
2823
2824 static rtx
2825 find_addr_reg (rtx addr)
2826 {
2827 while (GET_CODE (addr) == PLUS)
2828 {
2829 if (GET_CODE (XEXP (addr, 0)) == REG)
2830 addr = XEXP (addr, 0);
2831 else if (GET_CODE (XEXP (addr, 1)) == REG)
2832 addr = XEXP (addr, 1);
2833 else if (CONSTANT_P (XEXP (addr, 0)))
2834 addr = XEXP (addr, 1);
2835 else if (CONSTANT_P (XEXP (addr, 1)))
2836 addr = XEXP (addr, 0);
2837 else
2838 gcc_unreachable ();
2839 }
2840 gcc_assert (GET_CODE (addr) == REG);
2841 return addr;
2842 }
2843
2844 /* Emit code to perform a block move.
2845
2846 OPERANDS[0] is the destination pointer as a REG, clobbered.
2847 OPERANDS[1] is the source pointer as a REG, clobbered.
2848 OPERANDS[2] is a register for temporary storage.
2849 OPERANDS[3] is a register for temporary storage.
2850 OPERANDS[4] is the size as a CONST_INT
2851 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2852 OPERANDS[6] is another temporary register. */
2853
2854 const char *
2855 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2856 {
2857 int align = INTVAL (operands[5]);
2858 unsigned long n_bytes = INTVAL (operands[4]);
2859
2860 /* We can't move more than a word at a time because the PA
2861 has no longer integer move insns. (Could use fp mem ops?) */
2862 if (align > (TARGET_64BIT ? 8 : 4))
2863 align = (TARGET_64BIT ? 8 : 4);
2864
2865 /* Note that we know each loop below will execute at least twice
2866 (else we would have open-coded the copy). */
2867 switch (align)
2868 {
2869 case 8:
2870 /* Pre-adjust the loop counter. */
2871 operands[4] = GEN_INT (n_bytes - 16);
2872 output_asm_insn ("ldi %4,%2", operands);
2873
2874 /* Copying loop. */
2875 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2876 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2877 output_asm_insn ("std,ma %3,8(%0)", operands);
2878 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2879 output_asm_insn ("std,ma %6,8(%0)", operands);
2880
2881 /* Handle the residual. There could be up to 7 bytes of
2882 residual to copy! */
2883 if (n_bytes % 16 != 0)
2884 {
2885 operands[4] = GEN_INT (n_bytes % 8);
2886 if (n_bytes % 16 >= 8)
2887 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2888 if (n_bytes % 8 != 0)
2889 output_asm_insn ("ldd 0(%1),%6", operands);
2890 if (n_bytes % 16 >= 8)
2891 output_asm_insn ("std,ma %3,8(%0)", operands);
2892 if (n_bytes % 8 != 0)
2893 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2894 }
2895 return "";
2896
2897 case 4:
2898 /* Pre-adjust the loop counter. */
2899 operands[4] = GEN_INT (n_bytes - 8);
2900 output_asm_insn ("ldi %4,%2", operands);
2901
2902 /* Copying loop. */
2903 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2904 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2905 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2906 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2907 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2908
2909 /* Handle the residual. There could be up to 7 bytes of
2910 residual to copy! */
2911 if (n_bytes % 8 != 0)
2912 {
2913 operands[4] = GEN_INT (n_bytes % 4);
2914 if (n_bytes % 8 >= 4)
2915 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2916 if (n_bytes % 4 != 0)
2917 output_asm_insn ("ldw 0(%1),%6", operands);
2918 if (n_bytes % 8 >= 4)
2919 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2920 if (n_bytes % 4 != 0)
2921 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2922 }
2923 return "";
2924
2925 case 2:
2926 /* Pre-adjust the loop counter. */
2927 operands[4] = GEN_INT (n_bytes - 4);
2928 output_asm_insn ("ldi %4,%2", operands);
2929
2930 /* Copying loop. */
2931 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2932 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2933 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2934 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2935 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2936
2937 /* Handle the residual. */
2938 if (n_bytes % 4 != 0)
2939 {
2940 if (n_bytes % 4 >= 2)
2941 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2942 if (n_bytes % 2 != 0)
2943 output_asm_insn ("ldb 0(%1),%6", operands);
2944 if (n_bytes % 4 >= 2)
2945 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2946 if (n_bytes % 2 != 0)
2947 output_asm_insn ("stb %6,0(%0)", operands);
2948 }
2949 return "";
2950
2951 case 1:
2952 /* Pre-adjust the loop counter. */
2953 operands[4] = GEN_INT (n_bytes - 2);
2954 output_asm_insn ("ldi %4,%2", operands);
2955
2956 /* Copying loop. */
2957 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2958 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2959 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2960 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2961 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2962
2963 /* Handle the residual. */
2964 if (n_bytes % 2 != 0)
2965 {
2966 output_asm_insn ("ldb 0(%1),%3", operands);
2967 output_asm_insn ("stb %3,0(%0)", operands);
2968 }
2969 return "";
2970
2971 default:
2972 gcc_unreachable ();
2973 }
2974 }
2975
2976 /* Count the number of insns necessary to handle this block move.
2977
2978 Basic structure is the same as emit_block_move, except that we
2979 count insns rather than emit them. */
2980
2981 static int
2982 compute_movmem_length (rtx_insn *insn)
2983 {
2984 rtx pat = PATTERN (insn);
2985 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2986 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2987 unsigned int n_insns = 0;
2988
2989 /* We can't move more than four bytes at a time because the PA
2990 has no longer integer move insns. (Could use fp mem ops?) */
2991 if (align > (TARGET_64BIT ? 8 : 4))
2992 align = (TARGET_64BIT ? 8 : 4);
2993
2994 /* The basic copying loop. */
2995 n_insns = 6;
2996
2997 /* Residuals. */
2998 if (n_bytes % (2 * align) != 0)
2999 {
3000 if ((n_bytes % (2 * align)) >= align)
3001 n_insns += 2;
3002
3003 if ((n_bytes % align) != 0)
3004 n_insns += 2;
3005 }
3006
3007 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3008 return n_insns * 4;
3009 }
3010
3011 /* Emit code to perform a block clear.
3012
3013 OPERANDS[0] is the destination pointer as a REG, clobbered.
3014 OPERANDS[1] is a register for temporary storage.
3015 OPERANDS[2] is the size as a CONST_INT
3016 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3017
3018 const char *
3019 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3020 {
3021 int align = INTVAL (operands[3]);
3022 unsigned long n_bytes = INTVAL (operands[2]);
3023
3024 /* We can't clear more than a word at a time because the PA
3025 has no longer integer move insns. */
3026 if (align > (TARGET_64BIT ? 8 : 4))
3027 align = (TARGET_64BIT ? 8 : 4);
3028
3029 /* Note that we know each loop below will execute at least twice
3030 (else we would have open-coded the copy). */
3031 switch (align)
3032 {
3033 case 8:
3034 /* Pre-adjust the loop counter. */
3035 operands[2] = GEN_INT (n_bytes - 16);
3036 output_asm_insn ("ldi %2,%1", operands);
3037
3038 /* Loop. */
3039 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3040 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3041 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3042
3043 /* Handle the residual. There could be up to 7 bytes of
3044 residual to copy! */
3045 if (n_bytes % 16 != 0)
3046 {
3047 operands[2] = GEN_INT (n_bytes % 8);
3048 if (n_bytes % 16 >= 8)
3049 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3050 if (n_bytes % 8 != 0)
3051 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3052 }
3053 return "";
3054
3055 case 4:
3056 /* Pre-adjust the loop counter. */
3057 operands[2] = GEN_INT (n_bytes - 8);
3058 output_asm_insn ("ldi %2,%1", operands);
3059
3060 /* Loop. */
3061 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3062 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3063 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3064
3065 /* Handle the residual. There could be up to 7 bytes of
3066 residual to copy! */
3067 if (n_bytes % 8 != 0)
3068 {
3069 operands[2] = GEN_INT (n_bytes % 4);
3070 if (n_bytes % 8 >= 4)
3071 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3072 if (n_bytes % 4 != 0)
3073 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3074 }
3075 return "";
3076
3077 case 2:
3078 /* Pre-adjust the loop counter. */
3079 operands[2] = GEN_INT (n_bytes - 4);
3080 output_asm_insn ("ldi %2,%1", operands);
3081
3082 /* Loop. */
3083 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3084 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3085 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3086
3087 /* Handle the residual. */
3088 if (n_bytes % 4 != 0)
3089 {
3090 if (n_bytes % 4 >= 2)
3091 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3092 if (n_bytes % 2 != 0)
3093 output_asm_insn ("stb %%r0,0(%0)", operands);
3094 }
3095 return "";
3096
3097 case 1:
3098 /* Pre-adjust the loop counter. */
3099 operands[2] = GEN_INT (n_bytes - 2);
3100 output_asm_insn ("ldi %2,%1", operands);
3101
3102 /* Loop. */
3103 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3104 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3105 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3106
3107 /* Handle the residual. */
3108 if (n_bytes % 2 != 0)
3109 output_asm_insn ("stb %%r0,0(%0)", operands);
3110
3111 return "";
3112
3113 default:
3114 gcc_unreachable ();
3115 }
3116 }
3117
3118 /* Count the number of insns necessary to handle this block move.
3119
3120 Basic structure is the same as emit_block_move, except that we
3121 count insns rather than emit them. */
3122
3123 static int
3124 compute_clrmem_length (rtx_insn *insn)
3125 {
3126 rtx pat = PATTERN (insn);
3127 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3128 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3129 unsigned int n_insns = 0;
3130
3131 /* We can't clear more than a word at a time because the PA
3132 has no longer integer move insns. */
3133 if (align > (TARGET_64BIT ? 8 : 4))
3134 align = (TARGET_64BIT ? 8 : 4);
3135
3136 /* The basic loop. */
3137 n_insns = 4;
3138
3139 /* Residuals. */
3140 if (n_bytes % (2 * align) != 0)
3141 {
3142 if ((n_bytes % (2 * align)) >= align)
3143 n_insns++;
3144
3145 if ((n_bytes % align) != 0)
3146 n_insns++;
3147 }
3148
3149 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3150 return n_insns * 4;
3151 }
3152 \f
3153
3154 const char *
3155 pa_output_and (rtx *operands)
3156 {
3157 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3158 {
3159 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3160 int ls0, ls1, ms0, p, len;
3161
3162 for (ls0 = 0; ls0 < 32; ls0++)
3163 if ((mask & (1 << ls0)) == 0)
3164 break;
3165
3166 for (ls1 = ls0; ls1 < 32; ls1++)
3167 if ((mask & (1 << ls1)) != 0)
3168 break;
3169
3170 for (ms0 = ls1; ms0 < 32; ms0++)
3171 if ((mask & (1 << ms0)) == 0)
3172 break;
3173
3174 gcc_assert (ms0 == 32);
3175
3176 if (ls1 == 32)
3177 {
3178 len = ls0;
3179
3180 gcc_assert (len);
3181
3182 operands[2] = GEN_INT (len);
3183 return "{extru|extrw,u} %1,31,%2,%0";
3184 }
3185 else
3186 {
3187 /* We could use this `depi' for the case above as well, but `depi'
3188 requires one more register file access than an `extru'. */
3189
3190 p = 31 - ls0;
3191 len = ls1 - ls0;
3192
3193 operands[2] = GEN_INT (p);
3194 operands[3] = GEN_INT (len);
3195 return "{depi|depwi} 0,%2,%3,%0";
3196 }
3197 }
3198 else
3199 return "and %1,%2,%0";
3200 }
3201
3202 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3203 storing the result in operands[0]. */
3204 const char *
3205 pa_output_64bit_and (rtx *operands)
3206 {
3207 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3208 {
3209 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3210 int ls0, ls1, ms0, p, len;
3211
3212 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3213 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3214 break;
3215
3216 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3217 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3218 break;
3219
3220 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3221 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3222 break;
3223
3224 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3225
3226 if (ls1 == HOST_BITS_PER_WIDE_INT)
3227 {
3228 len = ls0;
3229
3230 gcc_assert (len);
3231
3232 operands[2] = GEN_INT (len);
3233 return "extrd,u %1,63,%2,%0";
3234 }
3235 else
3236 {
3237 /* We could use this `depi' for the case above as well, but `depi'
3238 requires one more register file access than an `extru'. */
3239
3240 p = 63 - ls0;
3241 len = ls1 - ls0;
3242
3243 operands[2] = GEN_INT (p);
3244 operands[3] = GEN_INT (len);
3245 return "depdi 0,%2,%3,%0";
3246 }
3247 }
3248 else
3249 return "and %1,%2,%0";
3250 }
3251
3252 const char *
3253 pa_output_ior (rtx *operands)
3254 {
3255 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3256 int bs0, bs1, p, len;
3257
3258 if (INTVAL (operands[2]) == 0)
3259 return "copy %1,%0";
3260
3261 for (bs0 = 0; bs0 < 32; bs0++)
3262 if ((mask & (1 << bs0)) != 0)
3263 break;
3264
3265 for (bs1 = bs0; bs1 < 32; bs1++)
3266 if ((mask & (1 << bs1)) == 0)
3267 break;
3268
3269 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3270
3271 p = 31 - bs0;
3272 len = bs1 - bs0;
3273
3274 operands[2] = GEN_INT (p);
3275 operands[3] = GEN_INT (len);
3276 return "{depi|depwi} -1,%2,%3,%0";
3277 }
3278
3279 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3280 storing the result in operands[0]. */
3281 const char *
3282 pa_output_64bit_ior (rtx *operands)
3283 {
3284 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3285 int bs0, bs1, p, len;
3286
3287 if (INTVAL (operands[2]) == 0)
3288 return "copy %1,%0";
3289
3290 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3291 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3292 break;
3293
3294 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3295 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3296 break;
3297
3298 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3299 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3300
3301 p = 63 - bs0;
3302 len = bs1 - bs0;
3303
3304 operands[2] = GEN_INT (p);
3305 operands[3] = GEN_INT (len);
3306 return "depdi -1,%2,%3,%0";
3307 }
3308 \f
3309 /* Target hook for assembling integer objects. This code handles
3310 aligned SI and DI integers specially since function references
3311 must be preceded by P%. */
3312
3313 static bool
3314 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3315 {
3316 bool result;
3317 tree decl = NULL;
3318
3319 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3320 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3321 calling output_addr_const. Otherwise, it may call assemble_external
3322 in the midst of outputing the assembler code for the SYMBOL_REF.
3323 We restore the SYMBOL_REF_DECL after the output is done. */
3324 if (GET_CODE (x) == SYMBOL_REF)
3325 {
3326 decl = SYMBOL_REF_DECL (x);
3327 if (decl)
3328 {
3329 assemble_external (decl);
3330 SET_SYMBOL_REF_DECL (x, NULL);
3331 }
3332 }
3333
3334 if (size == UNITS_PER_WORD
3335 && aligned_p
3336 && function_label_operand (x, VOIDmode))
3337 {
3338 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3339
3340 /* We don't want an OPD when generating fast indirect calls. */
3341 if (!TARGET_FAST_INDIRECT_CALLS)
3342 fputs ("P%", asm_out_file);
3343
3344 output_addr_const (asm_out_file, x);
3345 fputc ('\n', asm_out_file);
3346 result = true;
3347 }
3348 else
3349 result = default_assemble_integer (x, size, aligned_p);
3350
3351 if (decl)
3352 SET_SYMBOL_REF_DECL (x, decl);
3353
3354 return result;
3355 }
3356 \f
3357 /* Output an ascii string. */
3358 void
3359 pa_output_ascii (FILE *file, const char *p, int size)
3360 {
3361 int i;
3362 int chars_output;
3363 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3364
3365 /* The HP assembler can only take strings of 256 characters at one
3366 time. This is a limitation on input line length, *not* the
3367 length of the string. Sigh. Even worse, it seems that the
3368 restriction is in number of input characters (see \xnn &
3369 \whatever). So we have to do this very carefully. */
3370
3371 fputs ("\t.STRING \"", file);
3372
3373 chars_output = 0;
3374 for (i = 0; i < size; i += 4)
3375 {
3376 int co = 0;
3377 int io = 0;
3378 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3379 {
3380 register unsigned int c = (unsigned char) p[i + io];
3381
3382 if (c == '\"' || c == '\\')
3383 partial_output[co++] = '\\';
3384 if (c >= ' ' && c < 0177)
3385 partial_output[co++] = c;
3386 else
3387 {
3388 unsigned int hexd;
3389 partial_output[co++] = '\\';
3390 partial_output[co++] = 'x';
3391 hexd = c / 16 - 0 + '0';
3392 if (hexd > '9')
3393 hexd -= '9' - 'a' + 1;
3394 partial_output[co++] = hexd;
3395 hexd = c % 16 - 0 + '0';
3396 if (hexd > '9')
3397 hexd -= '9' - 'a' + 1;
3398 partial_output[co++] = hexd;
3399 }
3400 }
3401 if (chars_output + co > 243)
3402 {
3403 fputs ("\"\n\t.STRING \"", file);
3404 chars_output = 0;
3405 }
3406 fwrite (partial_output, 1, (size_t) co, file);
3407 chars_output += co;
3408 co = 0;
3409 }
3410 fputs ("\"\n", file);
3411 }
3412
3413 /* Try to rewrite floating point comparisons & branches to avoid
3414 useless add,tr insns.
3415
3416 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3417 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3418 first attempt to remove useless add,tr insns. It is zero
3419 for the second pass as reorg sometimes leaves bogus REG_DEAD
3420 notes lying around.
3421
3422 When CHECK_NOTES is zero we can only eliminate add,tr insns
3423 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3424 instructions. */
3425 static void
3426 remove_useless_addtr_insns (int check_notes)
3427 {
3428 rtx_insn *insn;
3429 static int pass = 0;
3430
3431 /* This is fairly cheap, so always run it when optimizing. */
3432 if (optimize > 0)
3433 {
3434 int fcmp_count = 0;
3435 int fbranch_count = 0;
3436
3437 /* Walk all the insns in this function looking for fcmp & fbranch
3438 instructions. Keep track of how many of each we find. */
3439 for (insn = get_insns (); insn; insn = next_insn (insn))
3440 {
3441 rtx tmp;
3442
3443 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3444 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3445 continue;
3446
3447 tmp = PATTERN (insn);
3448
3449 /* It must be a set. */
3450 if (GET_CODE (tmp) != SET)
3451 continue;
3452
3453 /* If the destination is CCFP, then we've found an fcmp insn. */
3454 tmp = SET_DEST (tmp);
3455 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3456 {
3457 fcmp_count++;
3458 continue;
3459 }
3460
3461 tmp = PATTERN (insn);
3462 /* If this is an fbranch instruction, bump the fbranch counter. */
3463 if (GET_CODE (tmp) == SET
3464 && SET_DEST (tmp) == pc_rtx
3465 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3466 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3467 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3468 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3469 {
3470 fbranch_count++;
3471 continue;
3472 }
3473 }
3474
3475
3476 /* Find all floating point compare + branch insns. If possible,
3477 reverse the comparison & the branch to avoid add,tr insns. */
3478 for (insn = get_insns (); insn; insn = next_insn (insn))
3479 {
3480 rtx tmp;
3481 rtx_insn *next;
3482
3483 /* Ignore anything that isn't an INSN. */
3484 if (! NONJUMP_INSN_P (insn))
3485 continue;
3486
3487 tmp = PATTERN (insn);
3488
3489 /* It must be a set. */
3490 if (GET_CODE (tmp) != SET)
3491 continue;
3492
3493 /* The destination must be CCFP, which is register zero. */
3494 tmp = SET_DEST (tmp);
3495 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3496 continue;
3497
3498 /* INSN should be a set of CCFP.
3499
3500 See if the result of this insn is used in a reversed FP
3501 conditional branch. If so, reverse our condition and
3502 the branch. Doing so avoids useless add,tr insns. */
3503 next = next_insn (insn);
3504 while (next)
3505 {
3506 /* Jumps, calls and labels stop our search. */
3507 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3508 break;
3509
3510 /* As does another fcmp insn. */
3511 if (NONJUMP_INSN_P (next)
3512 && GET_CODE (PATTERN (next)) == SET
3513 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3514 && REGNO (SET_DEST (PATTERN (next))) == 0)
3515 break;
3516
3517 next = next_insn (next);
3518 }
3519
3520 /* Is NEXT_INSN a branch? */
3521 if (next && JUMP_P (next))
3522 {
3523 rtx pattern = PATTERN (next);
3524
3525 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3526 and CCFP dies, then reverse our conditional and the branch
3527 to avoid the add,tr. */
3528 if (GET_CODE (pattern) == SET
3529 && SET_DEST (pattern) == pc_rtx
3530 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3531 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3532 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3533 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3534 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3535 && (fcmp_count == fbranch_count
3536 || (check_notes
3537 && find_regno_note (next, REG_DEAD, 0))))
3538 {
3539 /* Reverse the branch. */
3540 tmp = XEXP (SET_SRC (pattern), 1);
3541 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3542 XEXP (SET_SRC (pattern), 2) = tmp;
3543 INSN_CODE (next) = -1;
3544
3545 /* Reverse our condition. */
3546 tmp = PATTERN (insn);
3547 PUT_CODE (XEXP (tmp, 1),
3548 (reverse_condition_maybe_unordered
3549 (GET_CODE (XEXP (tmp, 1)))));
3550 }
3551 }
3552 }
3553 }
3554
3555 pass = !pass;
3556
3557 }
3558 \f
3559 /* You may have trouble believing this, but this is the 32 bit HP-PA
3560 stack layout. Wow.
3561
3562 Offset Contents
3563
3564 Variable arguments (optional; any number may be allocated)
3565
3566 SP-(4*(N+9)) arg word N
3567 : :
3568 SP-56 arg word 5
3569 SP-52 arg word 4
3570
3571 Fixed arguments (must be allocated; may remain unused)
3572
3573 SP-48 arg word 3
3574 SP-44 arg word 2
3575 SP-40 arg word 1
3576 SP-36 arg word 0
3577
3578 Frame Marker
3579
3580 SP-32 External Data Pointer (DP)
3581 SP-28 External sr4
3582 SP-24 External/stub RP (RP')
3583 SP-20 Current RP
3584 SP-16 Static Link
3585 SP-12 Clean up
3586 SP-8 Calling Stub RP (RP'')
3587 SP-4 Previous SP
3588
3589 Top of Frame
3590
3591 SP-0 Stack Pointer (points to next available address)
3592
3593 */
3594
3595 /* This function saves registers as follows. Registers marked with ' are
3596 this function's registers (as opposed to the previous function's).
3597 If a frame_pointer isn't needed, r4 is saved as a general register;
3598 the space for the frame pointer is still allocated, though, to keep
3599 things simple.
3600
3601
3602 Top of Frame
3603
3604 SP (FP') Previous FP
3605 SP + 4 Alignment filler (sigh)
3606 SP + 8 Space for locals reserved here.
3607 .
3608 .
3609 .
3610 SP + n All call saved register used.
3611 .
3612 .
3613 .
3614 SP + o All call saved fp registers used.
3615 .
3616 .
3617 .
3618 SP + p (SP') points to next available address.
3619
3620 */
3621
3622 /* Global variables set by output_function_prologue(). */
3623 /* Size of frame. Need to know this to emit return insns from
3624 leaf procedures. */
3625 static HOST_WIDE_INT actual_fsize, local_fsize;
3626 static int save_fregs;
3627
3628 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3629 Handle case where DISP > 8k by using the add_high_const patterns.
3630
3631 Note in DISP > 8k case, we will leave the high part of the address
3632 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3633
3634 static void
3635 store_reg (int reg, HOST_WIDE_INT disp, int base)
3636 {
3637 rtx dest, src, basereg;
3638 rtx_insn *insn;
3639
3640 src = gen_rtx_REG (word_mode, reg);
3641 basereg = gen_rtx_REG (Pmode, base);
3642 if (VAL_14_BITS_P (disp))
3643 {
3644 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3645 insn = emit_move_insn (dest, src);
3646 }
3647 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3648 {
3649 rtx delta = GEN_INT (disp);
3650 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3651
3652 emit_move_insn (tmpreg, delta);
3653 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3654 if (DO_FRAME_NOTES)
3655 {
3656 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3657 gen_rtx_SET (tmpreg,
3658 gen_rtx_PLUS (Pmode, basereg, delta)));
3659 RTX_FRAME_RELATED_P (insn) = 1;
3660 }
3661 dest = gen_rtx_MEM (word_mode, tmpreg);
3662 insn = emit_move_insn (dest, src);
3663 }
3664 else
3665 {
3666 rtx delta = GEN_INT (disp);
3667 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3668 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3669
3670 emit_move_insn (tmpreg, high);
3671 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3672 insn = emit_move_insn (dest, src);
3673 if (DO_FRAME_NOTES)
3674 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3675 gen_rtx_SET (gen_rtx_MEM (word_mode,
3676 gen_rtx_PLUS (word_mode,
3677 basereg,
3678 delta)),
3679 src));
3680 }
3681
3682 if (DO_FRAME_NOTES)
3683 RTX_FRAME_RELATED_P (insn) = 1;
3684 }
3685
3686 /* Emit RTL to store REG at the memory location specified by BASE and then
3687 add MOD to BASE. MOD must be <= 8k. */
3688
3689 static void
3690 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3691 {
3692 rtx basereg, srcreg, delta;
3693 rtx_insn *insn;
3694
3695 gcc_assert (VAL_14_BITS_P (mod));
3696
3697 basereg = gen_rtx_REG (Pmode, base);
3698 srcreg = gen_rtx_REG (word_mode, reg);
3699 delta = GEN_INT (mod);
3700
3701 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3702 if (DO_FRAME_NOTES)
3703 {
3704 RTX_FRAME_RELATED_P (insn) = 1;
3705
3706 /* RTX_FRAME_RELATED_P must be set on each frame related set
3707 in a parallel with more than one element. */
3708 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3709 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3710 }
3711 }
3712
3713 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3714 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3715 whether to add a frame note or not.
3716
3717 In the DISP > 8k case, we leave the high part of the address in %r1.
3718 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3719
3720 static void
3721 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3722 {
3723 rtx_insn *insn;
3724
3725 if (VAL_14_BITS_P (disp))
3726 {
3727 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3728 plus_constant (Pmode,
3729 gen_rtx_REG (Pmode, base), disp));
3730 }
3731 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3732 {
3733 rtx basereg = gen_rtx_REG (Pmode, base);
3734 rtx delta = GEN_INT (disp);
3735 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3736
3737 emit_move_insn (tmpreg, delta);
3738 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3739 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3740 if (DO_FRAME_NOTES)
3741 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3742 gen_rtx_SET (tmpreg,
3743 gen_rtx_PLUS (Pmode, basereg, delta)));
3744 }
3745 else
3746 {
3747 rtx basereg = gen_rtx_REG (Pmode, base);
3748 rtx delta = GEN_INT (disp);
3749 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3750
3751 emit_move_insn (tmpreg,
3752 gen_rtx_PLUS (Pmode, basereg,
3753 gen_rtx_HIGH (Pmode, delta)));
3754 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3755 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3756 }
3757
3758 if (DO_FRAME_NOTES && note)
3759 RTX_FRAME_RELATED_P (insn) = 1;
3760 }
3761
3762 HOST_WIDE_INT
3763 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3764 {
3765 int freg_saved = 0;
3766 int i, j;
3767
3768 /* The code in pa_expand_prologue and pa_expand_epilogue must
3769 be consistent with the rounding and size calculation done here.
3770 Change them at the same time. */
3771
3772 /* We do our own stack alignment. First, round the size of the
3773 stack locals up to a word boundary. */
3774 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3775
3776 /* Space for previous frame pointer + filler. If any frame is
3777 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3778 waste some space here for the sake of HP compatibility. The
3779 first slot is only used when the frame pointer is needed. */
3780 if (size || frame_pointer_needed)
3781 size += STARTING_FRAME_OFFSET;
3782
3783 /* If the current function calls __builtin_eh_return, then we need
3784 to allocate stack space for registers that will hold data for
3785 the exception handler. */
3786 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3787 {
3788 unsigned int i;
3789
3790 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3791 continue;
3792 size += i * UNITS_PER_WORD;
3793 }
3794
3795 /* Account for space used by the callee general register saves. */
3796 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3797 if (df_regs_ever_live_p (i))
3798 size += UNITS_PER_WORD;
3799
3800 /* Account for space used by the callee floating point register saves. */
3801 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3802 if (df_regs_ever_live_p (i)
3803 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3804 {
3805 freg_saved = 1;
3806
3807 /* We always save both halves of the FP register, so always
3808 increment the frame size by 8 bytes. */
3809 size += 8;
3810 }
3811
3812 /* If any of the floating registers are saved, account for the
3813 alignment needed for the floating point register save block. */
3814 if (freg_saved)
3815 {
3816 size = (size + 7) & ~7;
3817 if (fregs_live)
3818 *fregs_live = 1;
3819 }
3820
3821 /* The various ABIs include space for the outgoing parameters in the
3822 size of the current function's stack frame. We don't need to align
3823 for the outgoing arguments as their alignment is set by the final
3824 rounding for the frame as a whole. */
3825 size += crtl->outgoing_args_size;
3826
3827 /* Allocate space for the fixed frame marker. This space must be
3828 allocated for any function that makes calls or allocates
3829 stack space. */
3830 if (!crtl->is_leaf || size)
3831 size += TARGET_64BIT ? 48 : 32;
3832
3833 /* Finally, round to the preferred stack boundary. */
3834 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3835 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3836 }
3837
3838 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3839 of memory. If any fpu reg is used in the function, we allocate
3840 such a block here, at the bottom of the frame, just in case it's needed.
3841
3842 If this function is a leaf procedure, then we may choose not
3843 to do a "save" insn. The decision about whether or not
3844 to do this is made in regclass.c. */
3845
3846 static void
3847 pa_output_function_prologue (FILE *file)
3848 {
3849 /* The function's label and associated .PROC must never be
3850 separated and must be output *after* any profiling declarations
3851 to avoid changing spaces/subspaces within a procedure. */
3852 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3853 fputs ("\t.PROC\n", file);
3854
3855 /* pa_expand_prologue does the dirty work now. We just need
3856 to output the assembler directives which denote the start
3857 of a function. */
3858 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3859 if (crtl->is_leaf)
3860 fputs (",NO_CALLS", file);
3861 else
3862 fputs (",CALLS", file);
3863 if (rp_saved)
3864 fputs (",SAVE_RP", file);
3865
3866 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3867 at the beginning of the frame and that it is used as the frame
3868 pointer for the frame. We do this because our current frame
3869 layout doesn't conform to that specified in the HP runtime
3870 documentation and we need a way to indicate to programs such as
3871 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3872 isn't used by HP compilers but is supported by the assembler.
3873 However, SAVE_SP is supposed to indicate that the previous stack
3874 pointer has been saved in the frame marker. */
3875 if (frame_pointer_needed)
3876 fputs (",SAVE_SP", file);
3877
3878 /* Pass on information about the number of callee register saves
3879 performed in the prologue.
3880
3881 The compiler is supposed to pass the highest register number
3882 saved, the assembler then has to adjust that number before
3883 entering it into the unwind descriptor (to account for any
3884 caller saved registers with lower register numbers than the
3885 first callee saved register). */
3886 if (gr_saved)
3887 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3888
3889 if (fr_saved)
3890 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3891
3892 fputs ("\n\t.ENTRY\n", file);
3893
3894 remove_useless_addtr_insns (0);
3895 }
3896
3897 void
3898 pa_expand_prologue (void)
3899 {
3900 int merge_sp_adjust_with_store = 0;
3901 HOST_WIDE_INT size = get_frame_size ();
3902 HOST_WIDE_INT offset;
3903 int i;
3904 rtx tmpreg;
3905 rtx_insn *insn;
3906
3907 gr_saved = 0;
3908 fr_saved = 0;
3909 save_fregs = 0;
3910
3911 /* Compute total size for frame pointer, filler, locals and rounding to
3912 the next word boundary. Similar code appears in pa_compute_frame_size
3913 and must be changed in tandem with this code. */
3914 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3915 if (local_fsize || frame_pointer_needed)
3916 local_fsize += STARTING_FRAME_OFFSET;
3917
3918 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3919 if (flag_stack_usage_info)
3920 current_function_static_stack_size = actual_fsize;
3921
3922 /* Compute a few things we will use often. */
3923 tmpreg = gen_rtx_REG (word_mode, 1);
3924
3925 /* Save RP first. The calling conventions manual states RP will
3926 always be stored into the caller's frame at sp - 20 or sp - 16
3927 depending on which ABI is in use. */
3928 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3929 {
3930 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3931 rp_saved = true;
3932 }
3933 else
3934 rp_saved = false;
3935
3936 /* Allocate the local frame and set up the frame pointer if needed. */
3937 if (actual_fsize != 0)
3938 {
3939 if (frame_pointer_needed)
3940 {
3941 /* Copy the old frame pointer temporarily into %r1. Set up the
3942 new stack pointer, then store away the saved old frame pointer
3943 into the stack at sp and at the same time update the stack
3944 pointer by actual_fsize bytes. Two versions, first
3945 handles small (<8k) frames. The second handles large (>=8k)
3946 frames. */
3947 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3948 if (DO_FRAME_NOTES)
3949 RTX_FRAME_RELATED_P (insn) = 1;
3950
3951 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3952 if (DO_FRAME_NOTES)
3953 RTX_FRAME_RELATED_P (insn) = 1;
3954
3955 if (VAL_14_BITS_P (actual_fsize))
3956 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3957 else
3958 {
3959 /* It is incorrect to store the saved frame pointer at *sp,
3960 then increment sp (writes beyond the current stack boundary).
3961
3962 So instead use stwm to store at *sp and post-increment the
3963 stack pointer as an atomic operation. Then increment sp to
3964 finish allocating the new frame. */
3965 HOST_WIDE_INT adjust1 = 8192 - 64;
3966 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3967
3968 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3969 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3970 adjust2, 1);
3971 }
3972
3973 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3974 we need to store the previous stack pointer (frame pointer)
3975 into the frame marker on targets that use the HP unwind
3976 library. This allows the HP unwind library to be used to
3977 unwind GCC frames. However, we are not fully compatible
3978 with the HP library because our frame layout differs from
3979 that specified in the HP runtime specification.
3980
3981 We don't want a frame note on this instruction as the frame
3982 marker moves during dynamic stack allocation.
3983
3984 This instruction also serves as a blockage to prevent
3985 register spills from being scheduled before the stack
3986 pointer is raised. This is necessary as we store
3987 registers using the frame pointer as a base register,
3988 and the frame pointer is set before sp is raised. */
3989 if (TARGET_HPUX_UNWIND_LIBRARY)
3990 {
3991 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3992 GEN_INT (TARGET_64BIT ? -8 : -4));
3993
3994 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3995 hard_frame_pointer_rtx);
3996 }
3997 else
3998 emit_insn (gen_blockage ());
3999 }
4000 /* no frame pointer needed. */
4001 else
4002 {
4003 /* In some cases we can perform the first callee register save
4004 and allocating the stack frame at the same time. If so, just
4005 make a note of it and defer allocating the frame until saving
4006 the callee registers. */
4007 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4008 merge_sp_adjust_with_store = 1;
4009 /* Can not optimize. Adjust the stack frame by actual_fsize
4010 bytes. */
4011 else
4012 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4013 actual_fsize, 1);
4014 }
4015 }
4016
4017 /* Normal register save.
4018
4019 Do not save the frame pointer in the frame_pointer_needed case. It
4020 was done earlier. */
4021 if (frame_pointer_needed)
4022 {
4023 offset = local_fsize;
4024
4025 /* Saving the EH return data registers in the frame is the simplest
4026 way to get the frame unwind information emitted. We put them
4027 just before the general registers. */
4028 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4029 {
4030 unsigned int i, regno;
4031
4032 for (i = 0; ; ++i)
4033 {
4034 regno = EH_RETURN_DATA_REGNO (i);
4035 if (regno == INVALID_REGNUM)
4036 break;
4037
4038 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4039 offset += UNITS_PER_WORD;
4040 }
4041 }
4042
4043 for (i = 18; i >= 4; i--)
4044 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4045 {
4046 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4047 offset += UNITS_PER_WORD;
4048 gr_saved++;
4049 }
4050 /* Account for %r3 which is saved in a special place. */
4051 gr_saved++;
4052 }
4053 /* No frame pointer needed. */
4054 else
4055 {
4056 offset = local_fsize - actual_fsize;
4057
4058 /* Saving the EH return data registers in the frame is the simplest
4059 way to get the frame unwind information emitted. */
4060 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4061 {
4062 unsigned int i, regno;
4063
4064 for (i = 0; ; ++i)
4065 {
4066 regno = EH_RETURN_DATA_REGNO (i);
4067 if (regno == INVALID_REGNUM)
4068 break;
4069
4070 /* If merge_sp_adjust_with_store is nonzero, then we can
4071 optimize the first save. */
4072 if (merge_sp_adjust_with_store)
4073 {
4074 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4075 merge_sp_adjust_with_store = 0;
4076 }
4077 else
4078 store_reg (regno, offset, STACK_POINTER_REGNUM);
4079 offset += UNITS_PER_WORD;
4080 }
4081 }
4082
4083 for (i = 18; i >= 3; i--)
4084 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4085 {
4086 /* If merge_sp_adjust_with_store is nonzero, then we can
4087 optimize the first GR save. */
4088 if (merge_sp_adjust_with_store)
4089 {
4090 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4091 merge_sp_adjust_with_store = 0;
4092 }
4093 else
4094 store_reg (i, offset, STACK_POINTER_REGNUM);
4095 offset += UNITS_PER_WORD;
4096 gr_saved++;
4097 }
4098
4099 /* If we wanted to merge the SP adjustment with a GR save, but we never
4100 did any GR saves, then just emit the adjustment here. */
4101 if (merge_sp_adjust_with_store)
4102 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4103 actual_fsize, 1);
4104 }
4105
4106 /* The hppa calling conventions say that %r19, the pic offset
4107 register, is saved at sp - 32 (in this function's frame)
4108 when generating PIC code. FIXME: What is the correct thing
4109 to do for functions which make no calls and allocate no
4110 frame? Do we need to allocate a frame, or can we just omit
4111 the save? For now we'll just omit the save.
4112
4113 We don't want a note on this insn as the frame marker can
4114 move if there is a dynamic stack allocation. */
4115 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4116 {
4117 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4118
4119 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4120
4121 }
4122
4123 /* Align pointer properly (doubleword boundary). */
4124 offset = (offset + 7) & ~7;
4125
4126 /* Floating point register store. */
4127 if (save_fregs)
4128 {
4129 rtx base;
4130
4131 /* First get the frame or stack pointer to the start of the FP register
4132 save area. */
4133 if (frame_pointer_needed)
4134 {
4135 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4136 base = hard_frame_pointer_rtx;
4137 }
4138 else
4139 {
4140 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4141 base = stack_pointer_rtx;
4142 }
4143
4144 /* Now actually save the FP registers. */
4145 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4146 {
4147 if (df_regs_ever_live_p (i)
4148 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4149 {
4150 rtx addr, reg;
4151 rtx_insn *insn;
4152 addr = gen_rtx_MEM (DFmode,
4153 gen_rtx_POST_INC (word_mode, tmpreg));
4154 reg = gen_rtx_REG (DFmode, i);
4155 insn = emit_move_insn (addr, reg);
4156 if (DO_FRAME_NOTES)
4157 {
4158 RTX_FRAME_RELATED_P (insn) = 1;
4159 if (TARGET_64BIT)
4160 {
4161 rtx mem = gen_rtx_MEM (DFmode,
4162 plus_constant (Pmode, base,
4163 offset));
4164 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4165 gen_rtx_SET (mem, reg));
4166 }
4167 else
4168 {
4169 rtx meml = gen_rtx_MEM (SFmode,
4170 plus_constant (Pmode, base,
4171 offset));
4172 rtx memr = gen_rtx_MEM (SFmode,
4173 plus_constant (Pmode, base,
4174 offset + 4));
4175 rtx regl = gen_rtx_REG (SFmode, i);
4176 rtx regr = gen_rtx_REG (SFmode, i + 1);
4177 rtx setl = gen_rtx_SET (meml, regl);
4178 rtx setr = gen_rtx_SET (memr, regr);
4179 rtvec vec;
4180
4181 RTX_FRAME_RELATED_P (setl) = 1;
4182 RTX_FRAME_RELATED_P (setr) = 1;
4183 vec = gen_rtvec (2, setl, setr);
4184 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4185 gen_rtx_SEQUENCE (VOIDmode, vec));
4186 }
4187 }
4188 offset += GET_MODE_SIZE (DFmode);
4189 fr_saved++;
4190 }
4191 }
4192 }
4193 }
4194
4195 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4196 Handle case where DISP > 8k by using the add_high_const patterns. */
4197
4198 static void
4199 load_reg (int reg, HOST_WIDE_INT disp, int base)
4200 {
4201 rtx dest = gen_rtx_REG (word_mode, reg);
4202 rtx basereg = gen_rtx_REG (Pmode, base);
4203 rtx src;
4204
4205 if (VAL_14_BITS_P (disp))
4206 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4207 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4208 {
4209 rtx delta = GEN_INT (disp);
4210 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4211
4212 emit_move_insn (tmpreg, delta);
4213 if (TARGET_DISABLE_INDEXING)
4214 {
4215 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4216 src = gen_rtx_MEM (word_mode, tmpreg);
4217 }
4218 else
4219 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4220 }
4221 else
4222 {
4223 rtx delta = GEN_INT (disp);
4224 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4225 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4226
4227 emit_move_insn (tmpreg, high);
4228 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4229 }
4230
4231 emit_move_insn (dest, src);
4232 }
4233
4234 /* Update the total code bytes output to the text section. */
4235
4236 static void
4237 update_total_code_bytes (unsigned int nbytes)
4238 {
4239 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4240 && !IN_NAMED_SECTION_P (cfun->decl))
4241 {
4242 unsigned int old_total = total_code_bytes;
4243
4244 total_code_bytes += nbytes;
4245
4246 /* Be prepared to handle overflows. */
4247 if (old_total > total_code_bytes)
4248 total_code_bytes = UINT_MAX;
4249 }
4250 }
4251
4252 /* This function generates the assembly code for function exit.
4253 Args are as for output_function_prologue ().
4254
4255 The function epilogue should not depend on the current stack
4256 pointer! It should use the frame pointer only. This is mandatory
4257 because of alloca; we also take advantage of it to omit stack
4258 adjustments before returning. */
4259
4260 static void
4261 pa_output_function_epilogue (FILE *file)
4262 {
4263 rtx_insn *insn = get_last_insn ();
4264 bool extra_nop;
4265
4266 /* pa_expand_epilogue does the dirty work now. We just need
4267 to output the assembler directives which denote the end
4268 of a function.
4269
4270 To make debuggers happy, emit a nop if the epilogue was completely
4271 eliminated due to a volatile call as the last insn in the
4272 current function. That way the return address (in %r2) will
4273 always point to a valid instruction in the current function. */
4274
4275 /* Get the last real insn. */
4276 if (NOTE_P (insn))
4277 insn = prev_real_insn (insn);
4278
4279 /* If it is a sequence, then look inside. */
4280 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4281 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4282
4283 /* If insn is a CALL_INSN, then it must be a call to a volatile
4284 function (otherwise there would be epilogue insns). */
4285 if (insn && CALL_P (insn))
4286 {
4287 fputs ("\tnop\n", file);
4288 extra_nop = true;
4289 }
4290 else
4291 extra_nop = false;
4292
4293 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4294
4295 if (TARGET_SOM && TARGET_GAS)
4296 {
4297 /* We are done with this subspace except possibly for some additional
4298 debug information. Forget that we are in this subspace to ensure
4299 that the next function is output in its own subspace. */
4300 in_section = NULL;
4301 cfun->machine->in_nsubspa = 2;
4302 }
4303
4304 /* Thunks do their own insn accounting. */
4305 if (cfun->is_thunk)
4306 return;
4307
4308 if (INSN_ADDRESSES_SET_P ())
4309 {
4310 last_address = extra_nop ? 4 : 0;
4311 insn = get_last_nonnote_insn ();
4312 if (insn)
4313 {
4314 last_address += INSN_ADDRESSES (INSN_UID (insn));
4315 if (INSN_P (insn))
4316 last_address += insn_default_length (insn);
4317 }
4318 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4319 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4320 }
4321 else
4322 last_address = UINT_MAX;
4323
4324 /* Finally, update the total number of code bytes output so far. */
4325 update_total_code_bytes (last_address);
4326 }
4327
4328 void
4329 pa_expand_epilogue (void)
4330 {
4331 rtx tmpreg;
4332 HOST_WIDE_INT offset;
4333 HOST_WIDE_INT ret_off = 0;
4334 int i;
4335 int merge_sp_adjust_with_load = 0;
4336
4337 /* We will use this often. */
4338 tmpreg = gen_rtx_REG (word_mode, 1);
4339
4340 /* Try to restore RP early to avoid load/use interlocks when
4341 RP gets used in the return (bv) instruction. This appears to still
4342 be necessary even when we schedule the prologue and epilogue. */
4343 if (rp_saved)
4344 {
4345 ret_off = TARGET_64BIT ? -16 : -20;
4346 if (frame_pointer_needed)
4347 {
4348 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4349 ret_off = 0;
4350 }
4351 else
4352 {
4353 /* No frame pointer, and stack is smaller than 8k. */
4354 if (VAL_14_BITS_P (ret_off - actual_fsize))
4355 {
4356 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4357 ret_off = 0;
4358 }
4359 }
4360 }
4361
4362 /* General register restores. */
4363 if (frame_pointer_needed)
4364 {
4365 offset = local_fsize;
4366
4367 /* If the current function calls __builtin_eh_return, then we need
4368 to restore the saved EH data registers. */
4369 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4370 {
4371 unsigned int i, regno;
4372
4373 for (i = 0; ; ++i)
4374 {
4375 regno = EH_RETURN_DATA_REGNO (i);
4376 if (regno == INVALID_REGNUM)
4377 break;
4378
4379 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4380 offset += UNITS_PER_WORD;
4381 }
4382 }
4383
4384 for (i = 18; i >= 4; i--)
4385 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4386 {
4387 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4388 offset += UNITS_PER_WORD;
4389 }
4390 }
4391 else
4392 {
4393 offset = local_fsize - actual_fsize;
4394
4395 /* If the current function calls __builtin_eh_return, then we need
4396 to restore the saved EH data registers. */
4397 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4398 {
4399 unsigned int i, regno;
4400
4401 for (i = 0; ; ++i)
4402 {
4403 regno = EH_RETURN_DATA_REGNO (i);
4404 if (regno == INVALID_REGNUM)
4405 break;
4406
4407 /* Only for the first load.
4408 merge_sp_adjust_with_load holds the register load
4409 with which we will merge the sp adjustment. */
4410 if (merge_sp_adjust_with_load == 0
4411 && local_fsize == 0
4412 && VAL_14_BITS_P (-actual_fsize))
4413 merge_sp_adjust_with_load = regno;
4414 else
4415 load_reg (regno, offset, STACK_POINTER_REGNUM);
4416 offset += UNITS_PER_WORD;
4417 }
4418 }
4419
4420 for (i = 18; i >= 3; i--)
4421 {
4422 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4423 {
4424 /* Only for the first load.
4425 merge_sp_adjust_with_load holds the register load
4426 with which we will merge the sp adjustment. */
4427 if (merge_sp_adjust_with_load == 0
4428 && local_fsize == 0
4429 && VAL_14_BITS_P (-actual_fsize))
4430 merge_sp_adjust_with_load = i;
4431 else
4432 load_reg (i, offset, STACK_POINTER_REGNUM);
4433 offset += UNITS_PER_WORD;
4434 }
4435 }
4436 }
4437
4438 /* Align pointer properly (doubleword boundary). */
4439 offset = (offset + 7) & ~7;
4440
4441 /* FP register restores. */
4442 if (save_fregs)
4443 {
4444 /* Adjust the register to index off of. */
4445 if (frame_pointer_needed)
4446 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4447 else
4448 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4449
4450 /* Actually do the restores now. */
4451 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4452 if (df_regs_ever_live_p (i)
4453 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4454 {
4455 rtx src = gen_rtx_MEM (DFmode,
4456 gen_rtx_POST_INC (word_mode, tmpreg));
4457 rtx dest = gen_rtx_REG (DFmode, i);
4458 emit_move_insn (dest, src);
4459 }
4460 }
4461
4462 /* Emit a blockage insn here to keep these insns from being moved to
4463 an earlier spot in the epilogue, or into the main instruction stream.
4464
4465 This is necessary as we must not cut the stack back before all the
4466 restores are finished. */
4467 emit_insn (gen_blockage ());
4468
4469 /* Reset stack pointer (and possibly frame pointer). The stack
4470 pointer is initially set to fp + 64 to avoid a race condition. */
4471 if (frame_pointer_needed)
4472 {
4473 rtx delta = GEN_INT (-64);
4474
4475 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4476 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4477 stack_pointer_rtx, delta));
4478 }
4479 /* If we were deferring a callee register restore, do it now. */
4480 else if (merge_sp_adjust_with_load)
4481 {
4482 rtx delta = GEN_INT (-actual_fsize);
4483 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4484
4485 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4486 }
4487 else if (actual_fsize != 0)
4488 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4489 - actual_fsize, 0);
4490
4491 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4492 frame greater than 8k), do so now. */
4493 if (ret_off != 0)
4494 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4495
4496 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4497 {
4498 rtx sa = EH_RETURN_STACKADJ_RTX;
4499
4500 emit_insn (gen_blockage ());
4501 emit_insn (TARGET_64BIT
4502 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4503 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4504 }
4505 }
4506
4507 bool
4508 pa_can_use_return_insn (void)
4509 {
4510 if (!reload_completed)
4511 return false;
4512
4513 if (frame_pointer_needed)
4514 return false;
4515
4516 if (df_regs_ever_live_p (2))
4517 return false;
4518
4519 if (crtl->profile)
4520 return false;
4521
4522 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4523 }
4524
4525 rtx
4526 hppa_pic_save_rtx (void)
4527 {
4528 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4529 }
4530
4531 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4532 #define NO_DEFERRED_PROFILE_COUNTERS 0
4533 #endif
4534
4535
4536 /* Vector of funcdef numbers. */
4537 static vec<int> funcdef_nos;
4538
4539 /* Output deferred profile counters. */
4540 static void
4541 output_deferred_profile_counters (void)
4542 {
4543 unsigned int i;
4544 int align, n;
4545
4546 if (funcdef_nos.is_empty ())
4547 return;
4548
4549 switch_to_section (data_section);
4550 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4551 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4552
4553 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4554 {
4555 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4556 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4557 }
4558
4559 funcdef_nos.release ();
4560 }
4561
4562 void
4563 hppa_profile_hook (int label_no)
4564 {
4565 /* We use SImode for the address of the function in both 32 and
4566 64-bit code to avoid having to provide DImode versions of the
4567 lcla2 and load_offset_label_address insn patterns. */
4568 rtx reg = gen_reg_rtx (SImode);
4569 rtx_code_label *label_rtx = gen_label_rtx ();
4570 rtx mcount = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, "_mcount"));
4571 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4572 rtx arg_bytes, begin_label_rtx;
4573 rtx_insn *call_insn;
4574 char begin_label_name[16];
4575 bool use_mcount_pcrel_call;
4576
4577 /* If we can reach _mcount with a pc-relative call, we can optimize
4578 loading the address of the current function. This requires linker
4579 long branch stub support. */
4580 if (!TARGET_PORTABLE_RUNTIME
4581 && !TARGET_LONG_CALLS
4582 && (TARGET_SOM || flag_function_sections))
4583 use_mcount_pcrel_call = TRUE;
4584 else
4585 use_mcount_pcrel_call = FALSE;
4586
4587 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4588 label_no);
4589 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4590
4591 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4592
4593 if (!use_mcount_pcrel_call)
4594 {
4595 /* The address of the function is loaded into %r25 with an instruction-
4596 relative sequence that avoids the use of relocations. The sequence
4597 is split so that the load_offset_label_address instruction can
4598 occupy the delay slot of the call to _mcount. */
4599 if (TARGET_PA_20)
4600 emit_insn (gen_lcla2 (reg, label_rtx));
4601 else
4602 emit_insn (gen_lcla1 (reg, label_rtx));
4603
4604 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4605 reg,
4606 begin_label_rtx,
4607 label_rtx));
4608 }
4609
4610 if (!NO_DEFERRED_PROFILE_COUNTERS)
4611 {
4612 rtx count_label_rtx, addr, r24;
4613 char count_label_name[16];
4614
4615 funcdef_nos.safe_push (label_no);
4616 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4617 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4618 ggc_strdup (count_label_name));
4619
4620 addr = force_reg (Pmode, count_label_rtx);
4621 r24 = gen_rtx_REG (Pmode, 24);
4622 emit_move_insn (r24, addr);
4623
4624 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4625 if (use_mcount_pcrel_call)
4626 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4627 begin_label_rtx));
4628 else
4629 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4630
4631 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4632 }
4633 else
4634 {
4635 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4636 if (use_mcount_pcrel_call)
4637 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4638 begin_label_rtx));
4639 else
4640 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4641 }
4642
4643 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4644 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4645
4646 /* Indicate the _mcount call cannot throw, nor will it execute a
4647 non-local goto. */
4648 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4649
4650 /* Allocate space for fixed arguments. */
4651 if (reg_parm_stack_space > crtl->outgoing_args_size)
4652 crtl->outgoing_args_size = reg_parm_stack_space;
4653 }
4654
4655 /* Fetch the return address for the frame COUNT steps up from
4656 the current frame, after the prologue. FRAMEADDR is the
4657 frame pointer of the COUNT frame.
4658
4659 We want to ignore any export stub remnants here. To handle this,
4660 we examine the code at the return address, and if it is an export
4661 stub, we return a memory rtx for the stub return address stored
4662 at frame-24.
4663
4664 The value returned is used in two different ways:
4665
4666 1. To find a function's caller.
4667
4668 2. To change the return address for a function.
4669
4670 This function handles most instances of case 1; however, it will
4671 fail if there are two levels of stubs to execute on the return
4672 path. The only way I believe that can happen is if the return value
4673 needs a parameter relocation, which never happens for C code.
4674
4675 This function handles most instances of case 2; however, it will
4676 fail if we did not originally have stub code on the return path
4677 but will need stub code on the new return path. This can happen if
4678 the caller & callee are both in the main program, but the new
4679 return location is in a shared library. */
4680
4681 rtx
4682 pa_return_addr_rtx (int count, rtx frameaddr)
4683 {
4684 rtx label;
4685 rtx rp;
4686 rtx saved_rp;
4687 rtx ins;
4688
4689 /* The instruction stream at the return address of a PA1.X export stub is:
4690
4691 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4692 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4693 0x00011820 | stub+16: mtsp r1,sr0
4694 0xe0400002 | stub+20: be,n 0(sr0,rp)
4695
4696 0xe0400002 must be specified as -532676606 so that it won't be
4697 rejected as an invalid immediate operand on 64-bit hosts.
4698
4699 The instruction stream at the return address of a PA2.0 export stub is:
4700
4701 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4702 0xe840d002 | stub+12: bve,n (rp)
4703 */
4704
4705 HOST_WIDE_INT insns[4];
4706 int i, len;
4707
4708 if (count != 0)
4709 return NULL_RTX;
4710
4711 rp = get_hard_reg_initial_val (Pmode, 2);
4712
4713 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4714 return rp;
4715
4716 /* If there is no export stub then just use the value saved from
4717 the return pointer register. */
4718
4719 saved_rp = gen_reg_rtx (Pmode);
4720 emit_move_insn (saved_rp, rp);
4721
4722 /* Get pointer to the instruction stream. We have to mask out the
4723 privilege level from the two low order bits of the return address
4724 pointer here so that ins will point to the start of the first
4725 instruction that would have been executed if we returned. */
4726 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4727 label = gen_label_rtx ();
4728
4729 if (TARGET_PA_20)
4730 {
4731 insns[0] = 0x4bc23fd1;
4732 insns[1] = -398405630;
4733 len = 2;
4734 }
4735 else
4736 {
4737 insns[0] = 0x4bc23fd1;
4738 insns[1] = 0x004010a1;
4739 insns[2] = 0x00011820;
4740 insns[3] = -532676606;
4741 len = 4;
4742 }
4743
4744 /* Check the instruction stream at the normal return address for the
4745 export stub. If it is an export stub, than our return address is
4746 really in -24[frameaddr]. */
4747
4748 for (i = 0; i < len; i++)
4749 {
4750 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4751 rtx op1 = GEN_INT (insns[i]);
4752 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4753 }
4754
4755 /* Here we know that our return address points to an export
4756 stub. We don't want to return the address of the export stub,
4757 but rather the return address of the export stub. That return
4758 address is stored at -24[frameaddr]. */
4759
4760 emit_move_insn (saved_rp,
4761 gen_rtx_MEM (Pmode,
4762 memory_address (Pmode,
4763 plus_constant (Pmode, frameaddr,
4764 -24))));
4765
4766 emit_label (label);
4767
4768 return saved_rp;
4769 }
4770
4771 void
4772 pa_emit_bcond_fp (rtx operands[])
4773 {
4774 enum rtx_code code = GET_CODE (operands[0]);
4775 rtx operand0 = operands[1];
4776 rtx operand1 = operands[2];
4777 rtx label = operands[3];
4778
4779 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4780 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4781
4782 emit_jump_insn (gen_rtx_SET (pc_rtx,
4783 gen_rtx_IF_THEN_ELSE (VOIDmode,
4784 gen_rtx_fmt_ee (NE,
4785 VOIDmode,
4786 gen_rtx_REG (CCFPmode, 0),
4787 const0_rtx),
4788 gen_rtx_LABEL_REF (VOIDmode, label),
4789 pc_rtx)));
4790
4791 }
4792
4793 /* Adjust the cost of a scheduling dependency. Return the new cost of
4794 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4795
4796 static int
4797 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4798 unsigned int)
4799 {
4800 enum attr_type attr_type;
4801
4802 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4803 true dependencies as they are described with bypasses now. */
4804 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4805 return cost;
4806
4807 if (! recog_memoized (insn))
4808 return 0;
4809
4810 attr_type = get_attr_type (insn);
4811
4812 switch (dep_type)
4813 {
4814 case REG_DEP_ANTI:
4815 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4816 cycles later. */
4817
4818 if (attr_type == TYPE_FPLOAD)
4819 {
4820 rtx pat = PATTERN (insn);
4821 rtx dep_pat = PATTERN (dep_insn);
4822 if (GET_CODE (pat) == PARALLEL)
4823 {
4824 /* This happens for the fldXs,mb patterns. */
4825 pat = XVECEXP (pat, 0, 0);
4826 }
4827 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4828 /* If this happens, we have to extend this to schedule
4829 optimally. Return 0 for now. */
4830 return 0;
4831
4832 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4833 {
4834 if (! recog_memoized (dep_insn))
4835 return 0;
4836 switch (get_attr_type (dep_insn))
4837 {
4838 case TYPE_FPALU:
4839 case TYPE_FPMULSGL:
4840 case TYPE_FPMULDBL:
4841 case TYPE_FPDIVSGL:
4842 case TYPE_FPDIVDBL:
4843 case TYPE_FPSQRTSGL:
4844 case TYPE_FPSQRTDBL:
4845 /* A fpload can't be issued until one cycle before a
4846 preceding arithmetic operation has finished if
4847 the target of the fpload is any of the sources
4848 (or destination) of the arithmetic operation. */
4849 return insn_default_latency (dep_insn) - 1;
4850
4851 default:
4852 return 0;
4853 }
4854 }
4855 }
4856 else if (attr_type == TYPE_FPALU)
4857 {
4858 rtx pat = PATTERN (insn);
4859 rtx dep_pat = PATTERN (dep_insn);
4860 if (GET_CODE (pat) == PARALLEL)
4861 {
4862 /* This happens for the fldXs,mb patterns. */
4863 pat = XVECEXP (pat, 0, 0);
4864 }
4865 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4866 /* If this happens, we have to extend this to schedule
4867 optimally. Return 0 for now. */
4868 return 0;
4869
4870 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4871 {
4872 if (! recog_memoized (dep_insn))
4873 return 0;
4874 switch (get_attr_type (dep_insn))
4875 {
4876 case TYPE_FPDIVSGL:
4877 case TYPE_FPDIVDBL:
4878 case TYPE_FPSQRTSGL:
4879 case TYPE_FPSQRTDBL:
4880 /* An ALU flop can't be issued until two cycles before a
4881 preceding divide or sqrt operation has finished if
4882 the target of the ALU flop is any of the sources
4883 (or destination) of the divide or sqrt operation. */
4884 return insn_default_latency (dep_insn) - 2;
4885
4886 default:
4887 return 0;
4888 }
4889 }
4890 }
4891
4892 /* For other anti dependencies, the cost is 0. */
4893 return 0;
4894
4895 case REG_DEP_OUTPUT:
4896 /* Output dependency; DEP_INSN writes a register that INSN writes some
4897 cycles later. */
4898 if (attr_type == TYPE_FPLOAD)
4899 {
4900 rtx pat = PATTERN (insn);
4901 rtx dep_pat = PATTERN (dep_insn);
4902 if (GET_CODE (pat) == PARALLEL)
4903 {
4904 /* This happens for the fldXs,mb patterns. */
4905 pat = XVECEXP (pat, 0, 0);
4906 }
4907 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4908 /* If this happens, we have to extend this to schedule
4909 optimally. Return 0 for now. */
4910 return 0;
4911
4912 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4913 {
4914 if (! recog_memoized (dep_insn))
4915 return 0;
4916 switch (get_attr_type (dep_insn))
4917 {
4918 case TYPE_FPALU:
4919 case TYPE_FPMULSGL:
4920 case TYPE_FPMULDBL:
4921 case TYPE_FPDIVSGL:
4922 case TYPE_FPDIVDBL:
4923 case TYPE_FPSQRTSGL:
4924 case TYPE_FPSQRTDBL:
4925 /* A fpload can't be issued until one cycle before a
4926 preceding arithmetic operation has finished if
4927 the target of the fpload is the destination of the
4928 arithmetic operation.
4929
4930 Exception: For PA7100LC, PA7200 and PA7300, the cost
4931 is 3 cycles, unless they bundle together. We also
4932 pay the penalty if the second insn is a fpload. */
4933 return insn_default_latency (dep_insn) - 1;
4934
4935 default:
4936 return 0;
4937 }
4938 }
4939 }
4940 else if (attr_type == TYPE_FPALU)
4941 {
4942 rtx pat = PATTERN (insn);
4943 rtx dep_pat = PATTERN (dep_insn);
4944 if (GET_CODE (pat) == PARALLEL)
4945 {
4946 /* This happens for the fldXs,mb patterns. */
4947 pat = XVECEXP (pat, 0, 0);
4948 }
4949 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4950 /* If this happens, we have to extend this to schedule
4951 optimally. Return 0 for now. */
4952 return 0;
4953
4954 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4955 {
4956 if (! recog_memoized (dep_insn))
4957 return 0;
4958 switch (get_attr_type (dep_insn))
4959 {
4960 case TYPE_FPDIVSGL:
4961 case TYPE_FPDIVDBL:
4962 case TYPE_FPSQRTSGL:
4963 case TYPE_FPSQRTDBL:
4964 /* An ALU flop can't be issued until two cycles before a
4965 preceding divide or sqrt operation has finished if
4966 the target of the ALU flop is also the target of
4967 the divide or sqrt operation. */
4968 return insn_default_latency (dep_insn) - 2;
4969
4970 default:
4971 return 0;
4972 }
4973 }
4974 }
4975
4976 /* For other output dependencies, the cost is 0. */
4977 return 0;
4978
4979 default:
4980 gcc_unreachable ();
4981 }
4982 }
4983
4984 /* Adjust scheduling priorities. We use this to try and keep addil
4985 and the next use of %r1 close together. */
4986 static int
4987 pa_adjust_priority (rtx_insn *insn, int priority)
4988 {
4989 rtx set = single_set (insn);
4990 rtx src, dest;
4991 if (set)
4992 {
4993 src = SET_SRC (set);
4994 dest = SET_DEST (set);
4995 if (GET_CODE (src) == LO_SUM
4996 && symbolic_operand (XEXP (src, 1), VOIDmode)
4997 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4998 priority >>= 3;
4999
5000 else if (GET_CODE (src) == MEM
5001 && GET_CODE (XEXP (src, 0)) == LO_SUM
5002 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
5003 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
5004 priority >>= 1;
5005
5006 else if (GET_CODE (dest) == MEM
5007 && GET_CODE (XEXP (dest, 0)) == LO_SUM
5008 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
5009 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
5010 priority >>= 3;
5011 }
5012 return priority;
5013 }
5014
5015 /* The 700 can only issue a single insn at a time.
5016 The 7XXX processors can issue two insns at a time.
5017 The 8000 can issue 4 insns at a time. */
5018 static int
5019 pa_issue_rate (void)
5020 {
5021 switch (pa_cpu)
5022 {
5023 case PROCESSOR_700: return 1;
5024 case PROCESSOR_7100: return 2;
5025 case PROCESSOR_7100LC: return 2;
5026 case PROCESSOR_7200: return 2;
5027 case PROCESSOR_7300: return 2;
5028 case PROCESSOR_8000: return 4;
5029
5030 default:
5031 gcc_unreachable ();
5032 }
5033 }
5034
5035
5036
5037 /* Return any length plus adjustment needed by INSN which already has
5038 its length computed as LENGTH. Return LENGTH if no adjustment is
5039 necessary.
5040
5041 Also compute the length of an inline block move here as it is too
5042 complicated to express as a length attribute in pa.md. */
5043 int
5044 pa_adjust_insn_length (rtx_insn *insn, int length)
5045 {
5046 rtx pat = PATTERN (insn);
5047
5048 /* If length is negative or undefined, provide initial length. */
5049 if ((unsigned int) length >= INT_MAX)
5050 {
5051 if (GET_CODE (pat) == SEQUENCE)
5052 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5053
5054 switch (get_attr_type (insn))
5055 {
5056 case TYPE_MILLI:
5057 length = pa_attr_length_millicode_call (insn);
5058 break;
5059 case TYPE_CALL:
5060 length = pa_attr_length_call (insn, 0);
5061 break;
5062 case TYPE_SIBCALL:
5063 length = pa_attr_length_call (insn, 1);
5064 break;
5065 case TYPE_DYNCALL:
5066 length = pa_attr_length_indirect_call (insn);
5067 break;
5068 case TYPE_SH_FUNC_ADRS:
5069 length = pa_attr_length_millicode_call (insn) + 20;
5070 break;
5071 default:
5072 gcc_unreachable ();
5073 }
5074 }
5075
5076 /* Block move pattern. */
5077 if (NONJUMP_INSN_P (insn)
5078 && GET_CODE (pat) == PARALLEL
5079 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5080 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5081 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5082 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5083 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5084 length += compute_movmem_length (insn) - 4;
5085 /* Block clear pattern. */
5086 else if (NONJUMP_INSN_P (insn)
5087 && GET_CODE (pat) == PARALLEL
5088 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5089 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5090 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5091 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5092 length += compute_clrmem_length (insn) - 4;
5093 /* Conditional branch with an unfilled delay slot. */
5094 else if (JUMP_P (insn) && ! simplejump_p (insn))
5095 {
5096 /* Adjust a short backwards conditional with an unfilled delay slot. */
5097 if (GET_CODE (pat) == SET
5098 && length == 4
5099 && JUMP_LABEL (insn) != NULL_RTX
5100 && ! forward_branch_p (insn))
5101 length += 4;
5102 else if (GET_CODE (pat) == PARALLEL
5103 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5104 && length == 4)
5105 length += 4;
5106 /* Adjust dbra insn with short backwards conditional branch with
5107 unfilled delay slot -- only for case where counter is in a
5108 general register register. */
5109 else if (GET_CODE (pat) == PARALLEL
5110 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5111 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5112 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5113 && length == 4
5114 && ! forward_branch_p (insn))
5115 length += 4;
5116 }
5117 return length;
5118 }
5119
5120 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5121
5122 static bool
5123 pa_print_operand_punct_valid_p (unsigned char code)
5124 {
5125 if (code == '@'
5126 || code == '#'
5127 || code == '*'
5128 || code == '^')
5129 return true;
5130
5131 return false;
5132 }
5133
5134 /* Print operand X (an rtx) in assembler syntax to file FILE.
5135 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5136 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5137
5138 void
5139 pa_print_operand (FILE *file, rtx x, int code)
5140 {
5141 switch (code)
5142 {
5143 case '#':
5144 /* Output a 'nop' if there's nothing for the delay slot. */
5145 if (dbr_sequence_length () == 0)
5146 fputs ("\n\tnop", file);
5147 return;
5148 case '*':
5149 /* Output a nullification completer if there's nothing for the */
5150 /* delay slot or nullification is requested. */
5151 if (dbr_sequence_length () == 0 ||
5152 (final_sequence &&
5153 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5154 fputs (",n", file);
5155 return;
5156 case 'R':
5157 /* Print out the second register name of a register pair.
5158 I.e., R (6) => 7. */
5159 fputs (reg_names[REGNO (x) + 1], file);
5160 return;
5161 case 'r':
5162 /* A register or zero. */
5163 if (x == const0_rtx
5164 || (x == CONST0_RTX (DFmode))
5165 || (x == CONST0_RTX (SFmode)))
5166 {
5167 fputs ("%r0", file);
5168 return;
5169 }
5170 else
5171 break;
5172 case 'f':
5173 /* A register or zero (floating point). */
5174 if (x == const0_rtx
5175 || (x == CONST0_RTX (DFmode))
5176 || (x == CONST0_RTX (SFmode)))
5177 {
5178 fputs ("%fr0", file);
5179 return;
5180 }
5181 else
5182 break;
5183 case 'A':
5184 {
5185 rtx xoperands[2];
5186
5187 xoperands[0] = XEXP (XEXP (x, 0), 0);
5188 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5189 pa_output_global_address (file, xoperands[1], 0);
5190 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5191 return;
5192 }
5193
5194 case 'C': /* Plain (C)ondition */
5195 case 'X':
5196 switch (GET_CODE (x))
5197 {
5198 case EQ:
5199 fputs ("=", file); break;
5200 case NE:
5201 fputs ("<>", file); break;
5202 case GT:
5203 fputs (">", file); break;
5204 case GE:
5205 fputs (">=", file); break;
5206 case GEU:
5207 fputs (">>=", file); break;
5208 case GTU:
5209 fputs (">>", file); break;
5210 case LT:
5211 fputs ("<", file); break;
5212 case LE:
5213 fputs ("<=", file); break;
5214 case LEU:
5215 fputs ("<<=", file); break;
5216 case LTU:
5217 fputs ("<<", file); break;
5218 default:
5219 gcc_unreachable ();
5220 }
5221 return;
5222 case 'N': /* Condition, (N)egated */
5223 switch (GET_CODE (x))
5224 {
5225 case EQ:
5226 fputs ("<>", file); break;
5227 case NE:
5228 fputs ("=", file); break;
5229 case GT:
5230 fputs ("<=", file); break;
5231 case GE:
5232 fputs ("<", file); break;
5233 case GEU:
5234 fputs ("<<", file); break;
5235 case GTU:
5236 fputs ("<<=", file); break;
5237 case LT:
5238 fputs (">=", file); break;
5239 case LE:
5240 fputs (">", file); break;
5241 case LEU:
5242 fputs (">>", file); break;
5243 case LTU:
5244 fputs (">>=", file); break;
5245 default:
5246 gcc_unreachable ();
5247 }
5248 return;
5249 /* For floating point comparisons. Note that the output
5250 predicates are the complement of the desired mode. The
5251 conditions for GT, GE, LT, LE and LTGT cause an invalid
5252 operation exception if the result is unordered and this
5253 exception is enabled in the floating-point status register. */
5254 case 'Y':
5255 switch (GET_CODE (x))
5256 {
5257 case EQ:
5258 fputs ("!=", file); break;
5259 case NE:
5260 fputs ("=", file); break;
5261 case GT:
5262 fputs ("!>", file); break;
5263 case GE:
5264 fputs ("!>=", file); break;
5265 case LT:
5266 fputs ("!<", file); break;
5267 case LE:
5268 fputs ("!<=", file); break;
5269 case LTGT:
5270 fputs ("!<>", file); break;
5271 case UNLE:
5272 fputs ("!?<=", file); break;
5273 case UNLT:
5274 fputs ("!?<", file); break;
5275 case UNGE:
5276 fputs ("!?>=", file); break;
5277 case UNGT:
5278 fputs ("!?>", file); break;
5279 case UNEQ:
5280 fputs ("!?=", file); break;
5281 case UNORDERED:
5282 fputs ("!?", file); break;
5283 case ORDERED:
5284 fputs ("?", file); break;
5285 default:
5286 gcc_unreachable ();
5287 }
5288 return;
5289 case 'S': /* Condition, operands are (S)wapped. */
5290 switch (GET_CODE (x))
5291 {
5292 case EQ:
5293 fputs ("=", file); break;
5294 case NE:
5295 fputs ("<>", file); break;
5296 case GT:
5297 fputs ("<", file); break;
5298 case GE:
5299 fputs ("<=", file); break;
5300 case GEU:
5301 fputs ("<<=", file); break;
5302 case GTU:
5303 fputs ("<<", file); break;
5304 case LT:
5305 fputs (">", file); break;
5306 case LE:
5307 fputs (">=", file); break;
5308 case LEU:
5309 fputs (">>=", file); break;
5310 case LTU:
5311 fputs (">>", file); break;
5312 default:
5313 gcc_unreachable ();
5314 }
5315 return;
5316 case 'B': /* Condition, (B)oth swapped and negate. */
5317 switch (GET_CODE (x))
5318 {
5319 case EQ:
5320 fputs ("<>", file); break;
5321 case NE:
5322 fputs ("=", file); break;
5323 case GT:
5324 fputs (">=", file); break;
5325 case GE:
5326 fputs (">", file); break;
5327 case GEU:
5328 fputs (">>", file); break;
5329 case GTU:
5330 fputs (">>=", file); break;
5331 case LT:
5332 fputs ("<=", file); break;
5333 case LE:
5334 fputs ("<", file); break;
5335 case LEU:
5336 fputs ("<<", file); break;
5337 case LTU:
5338 fputs ("<<=", file); break;
5339 default:
5340 gcc_unreachable ();
5341 }
5342 return;
5343 case 'k':
5344 gcc_assert (GET_CODE (x) == CONST_INT);
5345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5346 return;
5347 case 'Q':
5348 gcc_assert (GET_CODE (x) == CONST_INT);
5349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5350 return;
5351 case 'L':
5352 gcc_assert (GET_CODE (x) == CONST_INT);
5353 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5354 return;
5355 case 'o':
5356 gcc_assert (GET_CODE (x) == CONST_INT
5357 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5358 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5359 return;
5360 case 'O':
5361 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5362 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5363 return;
5364 case 'p':
5365 gcc_assert (GET_CODE (x) == CONST_INT);
5366 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5367 return;
5368 case 'P':
5369 gcc_assert (GET_CODE (x) == CONST_INT);
5370 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5371 return;
5372 case 'I':
5373 if (GET_CODE (x) == CONST_INT)
5374 fputs ("i", file);
5375 return;
5376 case 'M':
5377 case 'F':
5378 switch (GET_CODE (XEXP (x, 0)))
5379 {
5380 case PRE_DEC:
5381 case PRE_INC:
5382 if (ASSEMBLER_DIALECT == 0)
5383 fputs ("s,mb", file);
5384 else
5385 fputs (",mb", file);
5386 break;
5387 case POST_DEC:
5388 case POST_INC:
5389 if (ASSEMBLER_DIALECT == 0)
5390 fputs ("s,ma", file);
5391 else
5392 fputs (",ma", file);
5393 break;
5394 case PLUS:
5395 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5396 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5397 {
5398 if (ASSEMBLER_DIALECT == 0)
5399 fputs ("x", file);
5400 }
5401 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5402 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5403 {
5404 if (ASSEMBLER_DIALECT == 0)
5405 fputs ("x,s", file);
5406 else
5407 fputs (",s", file);
5408 }
5409 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5410 fputs ("s", file);
5411 break;
5412 default:
5413 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5414 fputs ("s", file);
5415 break;
5416 }
5417 return;
5418 case 'G':
5419 pa_output_global_address (file, x, 0);
5420 return;
5421 case 'H':
5422 pa_output_global_address (file, x, 1);
5423 return;
5424 case 0: /* Don't do anything special */
5425 break;
5426 case 'Z':
5427 {
5428 unsigned op[3];
5429 compute_zdepwi_operands (INTVAL (x), op);
5430 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5431 return;
5432 }
5433 case 'z':
5434 {
5435 unsigned op[3];
5436 compute_zdepdi_operands (INTVAL (x), op);
5437 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5438 return;
5439 }
5440 case 'c':
5441 /* We can get here from a .vtable_inherit due to our
5442 CONSTANT_ADDRESS_P rejecting perfectly good constant
5443 addresses. */
5444 break;
5445 default:
5446 gcc_unreachable ();
5447 }
5448 if (GET_CODE (x) == REG)
5449 {
5450 fputs (reg_names [REGNO (x)], file);
5451 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5452 {
5453 fputs ("R", file);
5454 return;
5455 }
5456 if (FP_REG_P (x)
5457 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5458 && (REGNO (x) & 1) == 0)
5459 fputs ("L", file);
5460 }
5461 else if (GET_CODE (x) == MEM)
5462 {
5463 int size = GET_MODE_SIZE (GET_MODE (x));
5464 rtx base = NULL_RTX;
5465 switch (GET_CODE (XEXP (x, 0)))
5466 {
5467 case PRE_DEC:
5468 case POST_DEC:
5469 base = XEXP (XEXP (x, 0), 0);
5470 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5471 break;
5472 case PRE_INC:
5473 case POST_INC:
5474 base = XEXP (XEXP (x, 0), 0);
5475 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5476 break;
5477 case PLUS:
5478 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5479 fprintf (file, "%s(%s)",
5480 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5481 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5482 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5483 fprintf (file, "%s(%s)",
5484 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5485 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5486 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5487 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5488 {
5489 /* Because the REG_POINTER flag can get lost during reload,
5490 pa_legitimate_address_p canonicalizes the order of the
5491 index and base registers in the combined move patterns. */
5492 rtx base = XEXP (XEXP (x, 0), 1);
5493 rtx index = XEXP (XEXP (x, 0), 0);
5494
5495 fprintf (file, "%s(%s)",
5496 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5497 }
5498 else
5499 output_address (GET_MODE (x), XEXP (x, 0));
5500 break;
5501 default:
5502 output_address (GET_MODE (x), XEXP (x, 0));
5503 break;
5504 }
5505 }
5506 else
5507 output_addr_const (file, x);
5508 }
5509
5510 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5511
5512 void
5513 pa_output_global_address (FILE *file, rtx x, int round_constant)
5514 {
5515
5516 /* Imagine (high (const (plus ...))). */
5517 if (GET_CODE (x) == HIGH)
5518 x = XEXP (x, 0);
5519
5520 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5521 output_addr_const (file, x);
5522 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5523 {
5524 output_addr_const (file, x);
5525 fputs ("-$global$", file);
5526 }
5527 else if (GET_CODE (x) == CONST)
5528 {
5529 const char *sep = "";
5530 int offset = 0; /* assembler wants -$global$ at end */
5531 rtx base = NULL_RTX;
5532
5533 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5534 {
5535 case LABEL_REF:
5536 case SYMBOL_REF:
5537 base = XEXP (XEXP (x, 0), 0);
5538 output_addr_const (file, base);
5539 break;
5540 case CONST_INT:
5541 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5542 break;
5543 default:
5544 gcc_unreachable ();
5545 }
5546
5547 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5548 {
5549 case LABEL_REF:
5550 case SYMBOL_REF:
5551 base = XEXP (XEXP (x, 0), 1);
5552 output_addr_const (file, base);
5553 break;
5554 case CONST_INT:
5555 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5556 break;
5557 default:
5558 gcc_unreachable ();
5559 }
5560
5561 /* How bogus. The compiler is apparently responsible for
5562 rounding the constant if it uses an LR field selector.
5563
5564 The linker and/or assembler seem a better place since
5565 they have to do this kind of thing already.
5566
5567 If we fail to do this, HP's optimizing linker may eliminate
5568 an addil, but not update the ldw/stw/ldo instruction that
5569 uses the result of the addil. */
5570 if (round_constant)
5571 offset = ((offset + 0x1000) & ~0x1fff);
5572
5573 switch (GET_CODE (XEXP (x, 0)))
5574 {
5575 case PLUS:
5576 if (offset < 0)
5577 {
5578 offset = -offset;
5579 sep = "-";
5580 }
5581 else
5582 sep = "+";
5583 break;
5584
5585 case MINUS:
5586 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5587 sep = "-";
5588 break;
5589
5590 default:
5591 gcc_unreachable ();
5592 }
5593
5594 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5595 fputs ("-$global$", file);
5596 if (offset)
5597 fprintf (file, "%s%d", sep, offset);
5598 }
5599 else
5600 output_addr_const (file, x);
5601 }
5602
5603 /* Output boilerplate text to appear at the beginning of the file.
5604 There are several possible versions. */
5605 #define aputs(x) fputs(x, asm_out_file)
5606 static inline void
5607 pa_file_start_level (void)
5608 {
5609 if (TARGET_64BIT)
5610 aputs ("\t.LEVEL 2.0w\n");
5611 else if (TARGET_PA_20)
5612 aputs ("\t.LEVEL 2.0\n");
5613 else if (TARGET_PA_11)
5614 aputs ("\t.LEVEL 1.1\n");
5615 else
5616 aputs ("\t.LEVEL 1.0\n");
5617 }
5618
5619 static inline void
5620 pa_file_start_space (int sortspace)
5621 {
5622 aputs ("\t.SPACE $PRIVATE$");
5623 if (sortspace)
5624 aputs (",SORT=16");
5625 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5626 if (flag_tm)
5627 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5628 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5629 "\n\t.SPACE $TEXT$");
5630 if (sortspace)
5631 aputs (",SORT=8");
5632 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5633 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5634 }
5635
5636 static inline void
5637 pa_file_start_file (int want_version)
5638 {
5639 if (write_symbols != NO_DEBUG)
5640 {
5641 output_file_directive (asm_out_file, main_input_filename);
5642 if (want_version)
5643 aputs ("\t.version\t\"01.01\"\n");
5644 }
5645 }
5646
5647 static inline void
5648 pa_file_start_mcount (const char *aswhat)
5649 {
5650 if (profile_flag)
5651 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5652 }
5653
5654 static void
5655 pa_elf_file_start (void)
5656 {
5657 pa_file_start_level ();
5658 pa_file_start_mcount ("ENTRY");
5659 pa_file_start_file (0);
5660 }
5661
5662 static void
5663 pa_som_file_start (void)
5664 {
5665 pa_file_start_level ();
5666 pa_file_start_space (0);
5667 aputs ("\t.IMPORT $global$,DATA\n"
5668 "\t.IMPORT $$dyncall,MILLICODE\n");
5669 pa_file_start_mcount ("CODE");
5670 pa_file_start_file (0);
5671 }
5672
5673 static void
5674 pa_linux_file_start (void)
5675 {
5676 pa_file_start_file (1);
5677 pa_file_start_level ();
5678 pa_file_start_mcount ("CODE");
5679 }
5680
5681 static void
5682 pa_hpux64_gas_file_start (void)
5683 {
5684 pa_file_start_level ();
5685 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5686 if (profile_flag)
5687 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5688 #endif
5689 pa_file_start_file (1);
5690 }
5691
5692 static void
5693 pa_hpux64_hpas_file_start (void)
5694 {
5695 pa_file_start_level ();
5696 pa_file_start_space (1);
5697 pa_file_start_mcount ("CODE");
5698 pa_file_start_file (0);
5699 }
5700 #undef aputs
5701
5702 /* Search the deferred plabel list for SYMBOL and return its internal
5703 label. If an entry for SYMBOL is not found, a new entry is created. */
5704
5705 rtx
5706 pa_get_deferred_plabel (rtx symbol)
5707 {
5708 const char *fname = XSTR (symbol, 0);
5709 size_t i;
5710
5711 /* See if we have already put this function on the list of deferred
5712 plabels. This list is generally small, so a liner search is not
5713 too ugly. If it proves too slow replace it with something faster. */
5714 for (i = 0; i < n_deferred_plabels; i++)
5715 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5716 break;
5717
5718 /* If the deferred plabel list is empty, or this entry was not found
5719 on the list, create a new entry on the list. */
5720 if (deferred_plabels == NULL || i == n_deferred_plabels)
5721 {
5722 tree id;
5723
5724 if (deferred_plabels == 0)
5725 deferred_plabels = ggc_alloc<deferred_plabel> ();
5726 else
5727 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5728 deferred_plabels,
5729 n_deferred_plabels + 1);
5730
5731 i = n_deferred_plabels++;
5732 deferred_plabels[i].internal_label = gen_label_rtx ();
5733 deferred_plabels[i].symbol = symbol;
5734
5735 /* Gross. We have just implicitly taken the address of this
5736 function. Mark it in the same manner as assemble_name. */
5737 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5738 if (id)
5739 mark_referenced (id);
5740 }
5741
5742 return deferred_plabels[i].internal_label;
5743 }
5744
5745 static void
5746 output_deferred_plabels (void)
5747 {
5748 size_t i;
5749
5750 /* If we have some deferred plabels, then we need to switch into the
5751 data or readonly data section, and align it to a 4 byte boundary
5752 before outputting the deferred plabels. */
5753 if (n_deferred_plabels)
5754 {
5755 switch_to_section (flag_pic ? data_section : readonly_data_section);
5756 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5757 }
5758
5759 /* Now output the deferred plabels. */
5760 for (i = 0; i < n_deferred_plabels; i++)
5761 {
5762 targetm.asm_out.internal_label (asm_out_file, "L",
5763 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5764 assemble_integer (deferred_plabels[i].symbol,
5765 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5766 }
5767 }
5768
5769 /* Initialize optabs to point to emulation routines. */
5770
5771 static void
5772 pa_init_libfuncs (void)
5773 {
5774 if (HPUX_LONG_DOUBLE_LIBRARY)
5775 {
5776 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5777 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5778 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5779 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5780 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5781 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5782 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5783 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5784 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5785
5786 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5787 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5788 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5789 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5790 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5791 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5792 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5793
5794 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5795 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5796 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5797 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5798
5799 set_conv_libfunc (sfix_optab, SImode, TFmode,
5800 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5801 : "_U_Qfcnvfxt_quad_to_sgl");
5802 set_conv_libfunc (sfix_optab, DImode, TFmode,
5803 "_U_Qfcnvfxt_quad_to_dbl");
5804 set_conv_libfunc (ufix_optab, SImode, TFmode,
5805 "_U_Qfcnvfxt_quad_to_usgl");
5806 set_conv_libfunc (ufix_optab, DImode, TFmode,
5807 "_U_Qfcnvfxt_quad_to_udbl");
5808
5809 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5810 "_U_Qfcnvxf_sgl_to_quad");
5811 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5812 "_U_Qfcnvxf_dbl_to_quad");
5813 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5814 "_U_Qfcnvxf_usgl_to_quad");
5815 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5816 "_U_Qfcnvxf_udbl_to_quad");
5817 }
5818
5819 if (TARGET_SYNC_LIBCALL)
5820 init_sync_libfuncs (8);
5821 }
5822
5823 /* HP's millicode routines mean something special to the assembler.
5824 Keep track of which ones we have used. */
5825
5826 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5827 static void import_milli (enum millicodes);
5828 static char imported[(int) end1000];
5829 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5830 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5831 #define MILLI_START 10
5832
5833 static void
5834 import_milli (enum millicodes code)
5835 {
5836 char str[sizeof (import_string)];
5837
5838 if (!imported[(int) code])
5839 {
5840 imported[(int) code] = 1;
5841 strcpy (str, import_string);
5842 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5843 output_asm_insn (str, 0);
5844 }
5845 }
5846
5847 /* The register constraints have put the operands and return value in
5848 the proper registers. */
5849
5850 const char *
5851 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5852 {
5853 import_milli (mulI);
5854 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5855 }
5856
5857 /* Emit the rtl for doing a division by a constant. */
5858
5859 /* Do magic division millicodes exist for this value? */
5860 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5861
5862 /* We'll use an array to keep track of the magic millicodes and
5863 whether or not we've used them already. [n][0] is signed, [n][1] is
5864 unsigned. */
5865
5866 static int div_milli[16][2];
5867
5868 int
5869 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5870 {
5871 if (GET_CODE (operands[2]) == CONST_INT
5872 && INTVAL (operands[2]) > 0
5873 && INTVAL (operands[2]) < 16
5874 && pa_magic_milli[INTVAL (operands[2])])
5875 {
5876 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5877
5878 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5879 emit
5880 (gen_rtx_PARALLEL
5881 (VOIDmode,
5882 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5883 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5884 SImode,
5885 gen_rtx_REG (SImode, 26),
5886 operands[2])),
5887 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5888 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5889 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5890 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5891 gen_rtx_CLOBBER (VOIDmode, ret))));
5892 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5893 return 1;
5894 }
5895 return 0;
5896 }
5897
5898 const char *
5899 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5900 {
5901 int divisor;
5902
5903 /* If the divisor is a constant, try to use one of the special
5904 opcodes .*/
5905 if (GET_CODE (operands[0]) == CONST_INT)
5906 {
5907 static char buf[100];
5908 divisor = INTVAL (operands[0]);
5909 if (!div_milli[divisor][unsignedp])
5910 {
5911 div_milli[divisor][unsignedp] = 1;
5912 if (unsignedp)
5913 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5914 else
5915 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5916 }
5917 if (unsignedp)
5918 {
5919 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5920 INTVAL (operands[0]));
5921 return pa_output_millicode_call (insn,
5922 gen_rtx_SYMBOL_REF (SImode, buf));
5923 }
5924 else
5925 {
5926 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5927 INTVAL (operands[0]));
5928 return pa_output_millicode_call (insn,
5929 gen_rtx_SYMBOL_REF (SImode, buf));
5930 }
5931 }
5932 /* Divisor isn't a special constant. */
5933 else
5934 {
5935 if (unsignedp)
5936 {
5937 import_milli (divU);
5938 return pa_output_millicode_call (insn,
5939 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5940 }
5941 else
5942 {
5943 import_milli (divI);
5944 return pa_output_millicode_call (insn,
5945 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5946 }
5947 }
5948 }
5949
5950 /* Output a $$rem millicode to do mod. */
5951
5952 const char *
5953 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5954 {
5955 if (unsignedp)
5956 {
5957 import_milli (remU);
5958 return pa_output_millicode_call (insn,
5959 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5960 }
5961 else
5962 {
5963 import_milli (remI);
5964 return pa_output_millicode_call (insn,
5965 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5966 }
5967 }
5968
5969 void
5970 pa_output_arg_descriptor (rtx_insn *call_insn)
5971 {
5972 const char *arg_regs[4];
5973 machine_mode arg_mode;
5974 rtx link;
5975 int i, output_flag = 0;
5976 int regno;
5977
5978 /* We neither need nor want argument location descriptors for the
5979 64bit runtime environment or the ELF32 environment. */
5980 if (TARGET_64BIT || TARGET_ELF32)
5981 return;
5982
5983 for (i = 0; i < 4; i++)
5984 arg_regs[i] = 0;
5985
5986 /* Specify explicitly that no argument relocations should take place
5987 if using the portable runtime calling conventions. */
5988 if (TARGET_PORTABLE_RUNTIME)
5989 {
5990 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5991 asm_out_file);
5992 return;
5993 }
5994
5995 gcc_assert (CALL_P (call_insn));
5996 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5997 link; link = XEXP (link, 1))
5998 {
5999 rtx use = XEXP (link, 0);
6000
6001 if (! (GET_CODE (use) == USE
6002 && GET_CODE (XEXP (use, 0)) == REG
6003 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6004 continue;
6005
6006 arg_mode = GET_MODE (XEXP (use, 0));
6007 regno = REGNO (XEXP (use, 0));
6008 if (regno >= 23 && regno <= 26)
6009 {
6010 arg_regs[26 - regno] = "GR";
6011 if (arg_mode == DImode)
6012 arg_regs[25 - regno] = "GR";
6013 }
6014 else if (regno >= 32 && regno <= 39)
6015 {
6016 if (arg_mode == SFmode)
6017 arg_regs[(regno - 32) / 2] = "FR";
6018 else
6019 {
6020 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6021 arg_regs[(regno - 34) / 2] = "FR";
6022 arg_regs[(regno - 34) / 2 + 1] = "FU";
6023 #else
6024 arg_regs[(regno - 34) / 2] = "FU";
6025 arg_regs[(regno - 34) / 2 + 1] = "FR";
6026 #endif
6027 }
6028 }
6029 }
6030 fputs ("\t.CALL ", asm_out_file);
6031 for (i = 0; i < 4; i++)
6032 {
6033 if (arg_regs[i])
6034 {
6035 if (output_flag++)
6036 fputc (',', asm_out_file);
6037 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6038 }
6039 }
6040 fputc ('\n', asm_out_file);
6041 }
6042 \f
6043 /* Inform reload about cases where moving X with a mode MODE to or from
6044 a register in RCLASS requires an extra scratch or immediate register.
6045 Return the class needed for the immediate register. */
6046
6047 static reg_class_t
6048 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6049 machine_mode mode, secondary_reload_info *sri)
6050 {
6051 int regno;
6052 enum reg_class rclass = (enum reg_class) rclass_i;
6053
6054 /* Handle the easy stuff first. */
6055 if (rclass == R1_REGS)
6056 return NO_REGS;
6057
6058 if (REG_P (x))
6059 {
6060 regno = REGNO (x);
6061 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6062 return NO_REGS;
6063 }
6064 else
6065 regno = -1;
6066
6067 /* If we have something like (mem (mem (...)), we can safely assume the
6068 inner MEM will end up in a general register after reloading, so there's
6069 no need for a secondary reload. */
6070 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6071 return NO_REGS;
6072
6073 /* Trying to load a constant into a FP register during PIC code
6074 generation requires %r1 as a scratch register. For float modes,
6075 the only legitimate constant is CONST0_RTX. However, there are
6076 a few patterns that accept constant double operands. */
6077 if (flag_pic
6078 && FP_REG_CLASS_P (rclass)
6079 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6080 {
6081 switch (mode)
6082 {
6083 case E_SImode:
6084 sri->icode = CODE_FOR_reload_insi_r1;
6085 break;
6086
6087 case E_DImode:
6088 sri->icode = CODE_FOR_reload_indi_r1;
6089 break;
6090
6091 case E_SFmode:
6092 sri->icode = CODE_FOR_reload_insf_r1;
6093 break;
6094
6095 case E_DFmode:
6096 sri->icode = CODE_FOR_reload_indf_r1;
6097 break;
6098
6099 default:
6100 gcc_unreachable ();
6101 }
6102 return NO_REGS;
6103 }
6104
6105 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6106 register when we're generating PIC code or when the operand isn't
6107 readonly. */
6108 if (pa_symbolic_expression_p (x))
6109 {
6110 if (GET_CODE (x) == HIGH)
6111 x = XEXP (x, 0);
6112
6113 if (flag_pic || !read_only_operand (x, VOIDmode))
6114 {
6115 switch (mode)
6116 {
6117 case E_SImode:
6118 sri->icode = CODE_FOR_reload_insi_r1;
6119 break;
6120
6121 case E_DImode:
6122 sri->icode = CODE_FOR_reload_indi_r1;
6123 break;
6124
6125 default:
6126 gcc_unreachable ();
6127 }
6128 return NO_REGS;
6129 }
6130 }
6131
6132 /* Profiling showed the PA port spends about 1.3% of its compilation
6133 time in true_regnum from calls inside pa_secondary_reload_class. */
6134 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6135 regno = true_regnum (x);
6136
6137 /* Handle reloads for floating point loads and stores. */
6138 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6139 && FP_REG_CLASS_P (rclass))
6140 {
6141 if (MEM_P (x))
6142 {
6143 x = XEXP (x, 0);
6144
6145 /* We don't need a secondary reload for indexed memory addresses.
6146
6147 When INT14_OK_STRICT is true, it might appear that we could
6148 directly allow register indirect memory addresses. However,
6149 this doesn't work because we don't support SUBREGs in
6150 floating-point register copies and reload doesn't tell us
6151 when it's going to use a SUBREG. */
6152 if (IS_INDEX_ADDR_P (x))
6153 return NO_REGS;
6154 }
6155
6156 /* Request a secondary reload with a general scratch register
6157 for everything else. ??? Could symbolic operands be handled
6158 directly when generating non-pic PA 2.0 code? */
6159 sri->icode = (in_p
6160 ? direct_optab_handler (reload_in_optab, mode)
6161 : direct_optab_handler (reload_out_optab, mode));
6162 return NO_REGS;
6163 }
6164
6165 /* A SAR<->FP register copy requires an intermediate general register
6166 and secondary memory. We need a secondary reload with a general
6167 scratch register for spills. */
6168 if (rclass == SHIFT_REGS)
6169 {
6170 /* Handle spill. */
6171 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6172 {
6173 sri->icode = (in_p
6174 ? direct_optab_handler (reload_in_optab, mode)
6175 : direct_optab_handler (reload_out_optab, mode));
6176 return NO_REGS;
6177 }
6178
6179 /* Handle FP copy. */
6180 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6181 return GENERAL_REGS;
6182 }
6183
6184 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6185 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6186 && FP_REG_CLASS_P (rclass))
6187 return GENERAL_REGS;
6188
6189 return NO_REGS;
6190 }
6191
6192 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6193 is only marked as live on entry by df-scan when it is a fixed
6194 register. It isn't a fixed register in the 64-bit runtime,
6195 so we need to mark it here. */
6196
6197 static void
6198 pa_extra_live_on_entry (bitmap regs)
6199 {
6200 if (TARGET_64BIT)
6201 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6202 }
6203
6204 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6205 to prevent it from being deleted. */
6206
6207 rtx
6208 pa_eh_return_handler_rtx (void)
6209 {
6210 rtx tmp;
6211
6212 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6213 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6214 tmp = gen_rtx_MEM (word_mode, tmp);
6215 tmp->volatil = 1;
6216 return tmp;
6217 }
6218
6219 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6220 by invisible reference. As a GCC extension, we also pass anything
6221 with a zero or variable size by reference.
6222
6223 The 64-bit runtime does not describe passing any types by invisible
6224 reference. The internals of GCC can't currently handle passing
6225 empty structures, and zero or variable length arrays when they are
6226 not passed entirely on the stack or by reference. Thus, as a GCC
6227 extension, we pass these types by reference. The HP compiler doesn't
6228 support these types, so hopefully there shouldn't be any compatibility
6229 issues. This may have to be revisited when HP releases a C99 compiler
6230 or updates the ABI. */
6231
6232 static bool
6233 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6234 machine_mode mode, const_tree type,
6235 bool named ATTRIBUTE_UNUSED)
6236 {
6237 HOST_WIDE_INT size;
6238
6239 if (type)
6240 size = int_size_in_bytes (type);
6241 else
6242 size = GET_MODE_SIZE (mode);
6243
6244 if (TARGET_64BIT)
6245 return size <= 0;
6246 else
6247 return size <= 0 || size > 8;
6248 }
6249
6250 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6251
6252 static pad_direction
6253 pa_function_arg_padding (machine_mode mode, const_tree type)
6254 {
6255 if (mode == BLKmode
6256 || (TARGET_64BIT
6257 && type
6258 && (AGGREGATE_TYPE_P (type)
6259 || TREE_CODE (type) == COMPLEX_TYPE
6260 || TREE_CODE (type) == VECTOR_TYPE)))
6261 {
6262 /* Return PAD_NONE if justification is not required. */
6263 if (type
6264 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6265 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6266 return PAD_NONE;
6267
6268 /* The directions set here are ignored when a BLKmode argument larger
6269 than a word is placed in a register. Different code is used for
6270 the stack and registers. This makes it difficult to have a
6271 consistent data representation for both the stack and registers.
6272 For both runtimes, the justification and padding for arguments on
6273 the stack and in registers should be identical. */
6274 if (TARGET_64BIT)
6275 /* The 64-bit runtime specifies left justification for aggregates. */
6276 return PAD_UPWARD;
6277 else
6278 /* The 32-bit runtime architecture specifies right justification.
6279 When the argument is passed on the stack, the argument is padded
6280 with garbage on the left. The HP compiler pads with zeros. */
6281 return PAD_DOWNWARD;
6282 }
6283
6284 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6285 return PAD_DOWNWARD;
6286 else
6287 return PAD_NONE;
6288 }
6289
6290 \f
6291 /* Do what is necessary for `va_start'. We look at the current function
6292 to determine if stdargs or varargs is used and fill in an initial
6293 va_list. A pointer to this constructor is returned. */
6294
6295 static rtx
6296 hppa_builtin_saveregs (void)
6297 {
6298 rtx offset, dest;
6299 tree fntype = TREE_TYPE (current_function_decl);
6300 int argadj = ((!stdarg_p (fntype))
6301 ? UNITS_PER_WORD : 0);
6302
6303 if (argadj)
6304 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6305 else
6306 offset = crtl->args.arg_offset_rtx;
6307
6308 if (TARGET_64BIT)
6309 {
6310 int i, off;
6311
6312 /* Adjust for varargs/stdarg differences. */
6313 if (argadj)
6314 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6315 else
6316 offset = crtl->args.arg_offset_rtx;
6317
6318 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6319 from the incoming arg pointer and growing to larger addresses. */
6320 for (i = 26, off = -64; i >= 19; i--, off += 8)
6321 emit_move_insn (gen_rtx_MEM (word_mode,
6322 plus_constant (Pmode,
6323 arg_pointer_rtx, off)),
6324 gen_rtx_REG (word_mode, i));
6325
6326 /* The incoming args pointer points just beyond the flushback area;
6327 normally this is not a serious concern. However, when we are doing
6328 varargs/stdargs we want to make the arg pointer point to the start
6329 of the incoming argument area. */
6330 emit_move_insn (virtual_incoming_args_rtx,
6331 plus_constant (Pmode, arg_pointer_rtx, -64));
6332
6333 /* Now return a pointer to the first anonymous argument. */
6334 return copy_to_reg (expand_binop (Pmode, add_optab,
6335 virtual_incoming_args_rtx,
6336 offset, 0, 0, OPTAB_LIB_WIDEN));
6337 }
6338
6339 /* Store general registers on the stack. */
6340 dest = gen_rtx_MEM (BLKmode,
6341 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6342 -16));
6343 set_mem_alias_set (dest, get_varargs_alias_set ());
6344 set_mem_align (dest, BITS_PER_WORD);
6345 move_block_from_reg (23, dest, 4);
6346
6347 /* move_block_from_reg will emit code to store the argument registers
6348 individually as scalar stores.
6349
6350 However, other insns may later load from the same addresses for
6351 a structure load (passing a struct to a varargs routine).
6352
6353 The alias code assumes that such aliasing can never happen, so we
6354 have to keep memory referencing insns from moving up beyond the
6355 last argument register store. So we emit a blockage insn here. */
6356 emit_insn (gen_blockage ());
6357
6358 return copy_to_reg (expand_binop (Pmode, add_optab,
6359 crtl->args.internal_arg_pointer,
6360 offset, 0, 0, OPTAB_LIB_WIDEN));
6361 }
6362
6363 static void
6364 hppa_va_start (tree valist, rtx nextarg)
6365 {
6366 nextarg = expand_builtin_saveregs ();
6367 std_expand_builtin_va_start (valist, nextarg);
6368 }
6369
6370 static tree
6371 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6372 gimple_seq *post_p)
6373 {
6374 if (TARGET_64BIT)
6375 {
6376 /* Args grow upward. We can use the generic routines. */
6377 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6378 }
6379 else /* !TARGET_64BIT */
6380 {
6381 tree ptr = build_pointer_type (type);
6382 tree valist_type;
6383 tree t, u;
6384 unsigned int size, ofs;
6385 bool indirect;
6386
6387 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6388 if (indirect)
6389 {
6390 type = ptr;
6391 ptr = build_pointer_type (type);
6392 }
6393 size = int_size_in_bytes (type);
6394 valist_type = TREE_TYPE (valist);
6395
6396 /* Args grow down. Not handled by generic routines. */
6397
6398 u = fold_convert (sizetype, size_in_bytes (type));
6399 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6400 t = fold_build_pointer_plus (valist, u);
6401
6402 /* Align to 4 or 8 byte boundary depending on argument size. */
6403
6404 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6405 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6406 t = fold_convert (valist_type, t);
6407
6408 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6409
6410 ofs = (8 - size) % 4;
6411 if (ofs != 0)
6412 t = fold_build_pointer_plus_hwi (t, ofs);
6413
6414 t = fold_convert (ptr, t);
6415 t = build_va_arg_indirect_ref (t);
6416
6417 if (indirect)
6418 t = build_va_arg_indirect_ref (t);
6419
6420 return t;
6421 }
6422 }
6423
6424 /* True if MODE is valid for the target. By "valid", we mean able to
6425 be manipulated in non-trivial ways. In particular, this means all
6426 the arithmetic is supported.
6427
6428 Currently, TImode is not valid as the HP 64-bit runtime documentation
6429 doesn't document the alignment and calling conventions for this type.
6430 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6431 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6432
6433 static bool
6434 pa_scalar_mode_supported_p (scalar_mode mode)
6435 {
6436 int precision = GET_MODE_PRECISION (mode);
6437
6438 switch (GET_MODE_CLASS (mode))
6439 {
6440 case MODE_PARTIAL_INT:
6441 case MODE_INT:
6442 if (precision == CHAR_TYPE_SIZE)
6443 return true;
6444 if (precision == SHORT_TYPE_SIZE)
6445 return true;
6446 if (precision == INT_TYPE_SIZE)
6447 return true;
6448 if (precision == LONG_TYPE_SIZE)
6449 return true;
6450 if (precision == LONG_LONG_TYPE_SIZE)
6451 return true;
6452 return false;
6453
6454 case MODE_FLOAT:
6455 if (precision == FLOAT_TYPE_SIZE)
6456 return true;
6457 if (precision == DOUBLE_TYPE_SIZE)
6458 return true;
6459 if (precision == LONG_DOUBLE_TYPE_SIZE)
6460 return true;
6461 return false;
6462
6463 case MODE_DECIMAL_FLOAT:
6464 return false;
6465
6466 default:
6467 gcc_unreachable ();
6468 }
6469 }
6470
6471 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6472 it branches into the delay slot. Otherwise, return FALSE. */
6473
6474 static bool
6475 branch_to_delay_slot_p (rtx_insn *insn)
6476 {
6477 rtx_insn *jump_insn;
6478
6479 if (dbr_sequence_length ())
6480 return FALSE;
6481
6482 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6483 while (insn)
6484 {
6485 insn = next_active_insn (insn);
6486 if (jump_insn == insn)
6487 return TRUE;
6488
6489 /* We can't rely on the length of asms. So, we return FALSE when
6490 the branch is followed by an asm. */
6491 if (!insn
6492 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6493 || asm_noperands (PATTERN (insn)) >= 0
6494 || get_attr_length (insn) > 0)
6495 break;
6496 }
6497
6498 return FALSE;
6499 }
6500
6501 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6502
6503 This occurs when INSN has an unfilled delay slot and is followed
6504 by an asm. Disaster can occur if the asm is empty and the jump
6505 branches into the delay slot. So, we add a nop in the delay slot
6506 when this occurs. */
6507
6508 static bool
6509 branch_needs_nop_p (rtx_insn *insn)
6510 {
6511 rtx_insn *jump_insn;
6512
6513 if (dbr_sequence_length ())
6514 return FALSE;
6515
6516 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6517 while (insn)
6518 {
6519 insn = next_active_insn (insn);
6520 if (!insn || jump_insn == insn)
6521 return TRUE;
6522
6523 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6524 || asm_noperands (PATTERN (insn)) >= 0)
6525 && get_attr_length (insn) > 0)
6526 break;
6527 }
6528
6529 return FALSE;
6530 }
6531
6532 /* Return TRUE if INSN, a forward jump insn, can use nullification
6533 to skip the following instruction. This avoids an extra cycle due
6534 to a mis-predicted branch when we fall through. */
6535
6536 static bool
6537 use_skip_p (rtx_insn *insn)
6538 {
6539 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6540
6541 while (insn)
6542 {
6543 insn = next_active_insn (insn);
6544
6545 /* We can't rely on the length of asms, so we can't skip asms. */
6546 if (!insn
6547 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6548 || asm_noperands (PATTERN (insn)) >= 0)
6549 break;
6550 if (get_attr_length (insn) == 4
6551 && jump_insn == next_active_insn (insn))
6552 return TRUE;
6553 if (get_attr_length (insn) > 0)
6554 break;
6555 }
6556
6557 return FALSE;
6558 }
6559
6560 /* This routine handles all the normal conditional branch sequences we
6561 might need to generate. It handles compare immediate vs compare
6562 register, nullification of delay slots, varying length branches,
6563 negated branches, and all combinations of the above. It returns the
6564 output appropriate to emit the branch corresponding to all given
6565 parameters. */
6566
6567 const char *
6568 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6569 {
6570 static char buf[100];
6571 bool useskip;
6572 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6573 int length = get_attr_length (insn);
6574 int xdelay;
6575
6576 /* A conditional branch to the following instruction (e.g. the delay slot)
6577 is asking for a disaster. This can happen when not optimizing and
6578 when jump optimization fails.
6579
6580 While it is usually safe to emit nothing, this can fail if the
6581 preceding instruction is a nullified branch with an empty delay
6582 slot and the same branch target as this branch. We could check
6583 for this but jump optimization should eliminate nop jumps. It
6584 is always safe to emit a nop. */
6585 if (branch_to_delay_slot_p (insn))
6586 return "nop";
6587
6588 /* The doubleword form of the cmpib instruction doesn't have the LEU
6589 and GTU conditions while the cmpb instruction does. Since we accept
6590 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6591 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6592 operands[2] = gen_rtx_REG (DImode, 0);
6593 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6594 operands[1] = gen_rtx_REG (DImode, 0);
6595
6596 /* If this is a long branch with its delay slot unfilled, set `nullify'
6597 as it can nullify the delay slot and save a nop. */
6598 if (length == 8 && dbr_sequence_length () == 0)
6599 nullify = 1;
6600
6601 /* If this is a short forward conditional branch which did not get
6602 its delay slot filled, the delay slot can still be nullified. */
6603 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6604 nullify = forward_branch_p (insn);
6605
6606 /* A forward branch over a single nullified insn can be done with a
6607 comclr instruction. This avoids a single cycle penalty due to
6608 mis-predicted branch if we fall through (branch not taken). */
6609 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6610
6611 switch (length)
6612 {
6613 /* All short conditional branches except backwards with an unfilled
6614 delay slot. */
6615 case 4:
6616 if (useskip)
6617 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6618 else
6619 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6620 if (GET_MODE (operands[1]) == DImode)
6621 strcat (buf, "*");
6622 if (negated)
6623 strcat (buf, "%B3");
6624 else
6625 strcat (buf, "%S3");
6626 if (useskip)
6627 strcat (buf, " %2,%r1,%%r0");
6628 else if (nullify)
6629 {
6630 if (branch_needs_nop_p (insn))
6631 strcat (buf, ",n %2,%r1,%0%#");
6632 else
6633 strcat (buf, ",n %2,%r1,%0");
6634 }
6635 else
6636 strcat (buf, " %2,%r1,%0");
6637 break;
6638
6639 /* All long conditionals. Note a short backward branch with an
6640 unfilled delay slot is treated just like a long backward branch
6641 with an unfilled delay slot. */
6642 case 8:
6643 /* Handle weird backwards branch with a filled delay slot
6644 which is nullified. */
6645 if (dbr_sequence_length () != 0
6646 && ! forward_branch_p (insn)
6647 && nullify)
6648 {
6649 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6650 if (GET_MODE (operands[1]) == DImode)
6651 strcat (buf, "*");
6652 if (negated)
6653 strcat (buf, "%S3");
6654 else
6655 strcat (buf, "%B3");
6656 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6657 }
6658 /* Handle short backwards branch with an unfilled delay slot.
6659 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6660 taken and untaken branches. */
6661 else if (dbr_sequence_length () == 0
6662 && ! forward_branch_p (insn)
6663 && INSN_ADDRESSES_SET_P ()
6664 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6665 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6666 {
6667 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6668 if (GET_MODE (operands[1]) == DImode)
6669 strcat (buf, "*");
6670 if (negated)
6671 strcat (buf, "%B3 %2,%r1,%0%#");
6672 else
6673 strcat (buf, "%S3 %2,%r1,%0%#");
6674 }
6675 else
6676 {
6677 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6678 if (GET_MODE (operands[1]) == DImode)
6679 strcat (buf, "*");
6680 if (negated)
6681 strcat (buf, "%S3");
6682 else
6683 strcat (buf, "%B3");
6684 if (nullify)
6685 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6686 else
6687 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6688 }
6689 break;
6690
6691 default:
6692 /* The reversed conditional branch must branch over one additional
6693 instruction if the delay slot is filled and needs to be extracted
6694 by pa_output_lbranch. If the delay slot is empty or this is a
6695 nullified forward branch, the instruction after the reversed
6696 condition branch must be nullified. */
6697 if (dbr_sequence_length () == 0
6698 || (nullify && forward_branch_p (insn)))
6699 {
6700 nullify = 1;
6701 xdelay = 0;
6702 operands[4] = GEN_INT (length);
6703 }
6704 else
6705 {
6706 xdelay = 1;
6707 operands[4] = GEN_INT (length + 4);
6708 }
6709
6710 /* Create a reversed conditional branch which branches around
6711 the following insns. */
6712 if (GET_MODE (operands[1]) != DImode)
6713 {
6714 if (nullify)
6715 {
6716 if (negated)
6717 strcpy (buf,
6718 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6719 else
6720 strcpy (buf,
6721 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6722 }
6723 else
6724 {
6725 if (negated)
6726 strcpy (buf,
6727 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6728 else
6729 strcpy (buf,
6730 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6731 }
6732 }
6733 else
6734 {
6735 if (nullify)
6736 {
6737 if (negated)
6738 strcpy (buf,
6739 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6740 else
6741 strcpy (buf,
6742 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6743 }
6744 else
6745 {
6746 if (negated)
6747 strcpy (buf,
6748 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6749 else
6750 strcpy (buf,
6751 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6752 }
6753 }
6754
6755 output_asm_insn (buf, operands);
6756 return pa_output_lbranch (operands[0], insn, xdelay);
6757 }
6758 return buf;
6759 }
6760
6761 /* Output a PIC pc-relative instruction sequence to load the address of
6762 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6763 or a code label. OPERANDS[1] specifies the register to use to load
6764 the program counter. OPERANDS[3] may be used for label generation
6765 The sequence is always three instructions in length. The program
6766 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6767 Register %r1 is clobbered. */
6768
6769 static void
6770 pa_output_pic_pcrel_sequence (rtx *operands)
6771 {
6772 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6773 if (TARGET_PA_20)
6774 {
6775 /* We can use mfia to determine the current program counter. */
6776 if (TARGET_SOM || !TARGET_GAS)
6777 {
6778 operands[3] = gen_label_rtx ();
6779 targetm.asm_out.internal_label (asm_out_file, "L",
6780 CODE_LABEL_NUMBER (operands[3]));
6781 output_asm_insn ("mfia %1", operands);
6782 output_asm_insn ("addil L'%0-%l3,%1", operands);
6783 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6784 }
6785 else
6786 {
6787 output_asm_insn ("mfia %1", operands);
6788 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6789 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6790 }
6791 }
6792 else
6793 {
6794 /* We need to use a branch to determine the current program counter. */
6795 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6796 if (TARGET_SOM || !TARGET_GAS)
6797 {
6798 operands[3] = gen_label_rtx ();
6799 output_asm_insn ("addil L'%0-%l3,%1", operands);
6800 targetm.asm_out.internal_label (asm_out_file, "L",
6801 CODE_LABEL_NUMBER (operands[3]));
6802 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6803 }
6804 else
6805 {
6806 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6807 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6808 }
6809 }
6810 }
6811
6812 /* This routine handles output of long unconditional branches that
6813 exceed the maximum range of a simple branch instruction. Since
6814 we don't have a register available for the branch, we save register
6815 %r1 in the frame marker, load the branch destination DEST into %r1,
6816 execute the branch, and restore %r1 in the delay slot of the branch.
6817
6818 Since long branches may have an insn in the delay slot and the
6819 delay slot is used to restore %r1, we in general need to extract
6820 this insn and execute it before the branch. However, to facilitate
6821 use of this function by conditional branches, we also provide an
6822 option to not extract the delay insn so that it will be emitted
6823 after the long branch. So, if there is an insn in the delay slot,
6824 it is extracted if XDELAY is nonzero.
6825
6826 The lengths of the various long-branch sequences are 20, 16 and 24
6827 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6828
6829 const char *
6830 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6831 {
6832 rtx xoperands[4];
6833
6834 xoperands[0] = dest;
6835
6836 /* First, free up the delay slot. */
6837 if (xdelay && dbr_sequence_length () != 0)
6838 {
6839 /* We can't handle a jump in the delay slot. */
6840 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6841
6842 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6843 optimize, 0, NULL);
6844
6845 /* Now delete the delay insn. */
6846 SET_INSN_DELETED (NEXT_INSN (insn));
6847 }
6848
6849 /* Output an insn to save %r1. The runtime documentation doesn't
6850 specify whether the "Clean Up" slot in the callers frame can
6851 be clobbered by the callee. It isn't copied by HP's builtin
6852 alloca, so this suggests that it can be clobbered if necessary.
6853 The "Static Link" location is copied by HP builtin alloca, so
6854 we avoid using it. Using the cleanup slot might be a problem
6855 if we have to interoperate with languages that pass cleanup
6856 information. However, it should be possible to handle these
6857 situations with GCC's asm feature.
6858
6859 The "Current RP" slot is reserved for the called procedure, so
6860 we try to use it when we don't have a frame of our own. It's
6861 rather unlikely that we won't have a frame when we need to emit
6862 a very long branch.
6863
6864 Really the way to go long term is a register scavenger; goto
6865 the target of the jump and find a register which we can use
6866 as a scratch to hold the value in %r1. Then, we wouldn't have
6867 to free up the delay slot or clobber a slot that may be needed
6868 for other purposes. */
6869 if (TARGET_64BIT)
6870 {
6871 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6872 /* Use the return pointer slot in the frame marker. */
6873 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6874 else
6875 /* Use the slot at -40 in the frame marker since HP builtin
6876 alloca doesn't copy it. */
6877 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6878 }
6879 else
6880 {
6881 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6882 /* Use the return pointer slot in the frame marker. */
6883 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6884 else
6885 /* Use the "Clean Up" slot in the frame marker. In GCC,
6886 the only other use of this location is for copying a
6887 floating point double argument from a floating-point
6888 register to two general registers. The copy is done
6889 as an "atomic" operation when outputting a call, so it
6890 won't interfere with our using the location here. */
6891 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6892 }
6893
6894 if (TARGET_PORTABLE_RUNTIME)
6895 {
6896 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6897 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6898 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6899 }
6900 else if (flag_pic)
6901 {
6902 xoperands[1] = gen_rtx_REG (Pmode, 1);
6903 xoperands[2] = xoperands[1];
6904 pa_output_pic_pcrel_sequence (xoperands);
6905 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6906 }
6907 else
6908 /* Now output a very long branch to the original target. */
6909 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6910
6911 /* Now restore the value of %r1 in the delay slot. */
6912 if (TARGET_64BIT)
6913 {
6914 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6915 return "ldd -16(%%r30),%%r1";
6916 else
6917 return "ldd -40(%%r30),%%r1";
6918 }
6919 else
6920 {
6921 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6922 return "ldw -20(%%r30),%%r1";
6923 else
6924 return "ldw -12(%%r30),%%r1";
6925 }
6926 }
6927
6928 /* This routine handles all the branch-on-bit conditional branch sequences we
6929 might need to generate. It handles nullification of delay slots,
6930 varying length branches, negated branches and all combinations of the
6931 above. it returns the appropriate output template to emit the branch. */
6932
6933 const char *
6934 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6935 {
6936 static char buf[100];
6937 bool useskip;
6938 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6939 int length = get_attr_length (insn);
6940 int xdelay;
6941
6942 /* A conditional branch to the following instruction (e.g. the delay slot) is
6943 asking for a disaster. I do not think this can happen as this pattern
6944 is only used when optimizing; jump optimization should eliminate the
6945 jump. But be prepared just in case. */
6946
6947 if (branch_to_delay_slot_p (insn))
6948 return "nop";
6949
6950 /* If this is a long branch with its delay slot unfilled, set `nullify'
6951 as it can nullify the delay slot and save a nop. */
6952 if (length == 8 && dbr_sequence_length () == 0)
6953 nullify = 1;
6954
6955 /* If this is a short forward conditional branch which did not get
6956 its delay slot filled, the delay slot can still be nullified. */
6957 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6958 nullify = forward_branch_p (insn);
6959
6960 /* A forward branch over a single nullified insn can be done with a
6961 extrs instruction. This avoids a single cycle penalty due to
6962 mis-predicted branch if we fall through (branch not taken). */
6963 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6964
6965 switch (length)
6966 {
6967
6968 /* All short conditional branches except backwards with an unfilled
6969 delay slot. */
6970 case 4:
6971 if (useskip)
6972 strcpy (buf, "{extrs,|extrw,s,}");
6973 else
6974 strcpy (buf, "bb,");
6975 if (useskip && GET_MODE (operands[0]) == DImode)
6976 strcpy (buf, "extrd,s,*");
6977 else if (GET_MODE (operands[0]) == DImode)
6978 strcpy (buf, "bb,*");
6979 if ((which == 0 && negated)
6980 || (which == 1 && ! negated))
6981 strcat (buf, ">=");
6982 else
6983 strcat (buf, "<");
6984 if (useskip)
6985 strcat (buf, " %0,%1,1,%%r0");
6986 else if (nullify && negated)
6987 {
6988 if (branch_needs_nop_p (insn))
6989 strcat (buf, ",n %0,%1,%3%#");
6990 else
6991 strcat (buf, ",n %0,%1,%3");
6992 }
6993 else if (nullify && ! negated)
6994 {
6995 if (branch_needs_nop_p (insn))
6996 strcat (buf, ",n %0,%1,%2%#");
6997 else
6998 strcat (buf, ",n %0,%1,%2");
6999 }
7000 else if (! nullify && negated)
7001 strcat (buf, " %0,%1,%3");
7002 else if (! nullify && ! negated)
7003 strcat (buf, " %0,%1,%2");
7004 break;
7005
7006 /* All long conditionals. Note a short backward branch with an
7007 unfilled delay slot is treated just like a long backward branch
7008 with an unfilled delay slot. */
7009 case 8:
7010 /* Handle weird backwards branch with a filled delay slot
7011 which is nullified. */
7012 if (dbr_sequence_length () != 0
7013 && ! forward_branch_p (insn)
7014 && nullify)
7015 {
7016 strcpy (buf, "bb,");
7017 if (GET_MODE (operands[0]) == DImode)
7018 strcat (buf, "*");
7019 if ((which == 0 && negated)
7020 || (which == 1 && ! negated))
7021 strcat (buf, "<");
7022 else
7023 strcat (buf, ">=");
7024 if (negated)
7025 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7026 else
7027 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7028 }
7029 /* Handle short backwards branch with an unfilled delay slot.
7030 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7031 taken and untaken branches. */
7032 else if (dbr_sequence_length () == 0
7033 && ! forward_branch_p (insn)
7034 && INSN_ADDRESSES_SET_P ()
7035 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7036 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7037 {
7038 strcpy (buf, "bb,");
7039 if (GET_MODE (operands[0]) == DImode)
7040 strcat (buf, "*");
7041 if ((which == 0 && negated)
7042 || (which == 1 && ! negated))
7043 strcat (buf, ">=");
7044 else
7045 strcat (buf, "<");
7046 if (negated)
7047 strcat (buf, " %0,%1,%3%#");
7048 else
7049 strcat (buf, " %0,%1,%2%#");
7050 }
7051 else
7052 {
7053 if (GET_MODE (operands[0]) == DImode)
7054 strcpy (buf, "extrd,s,*");
7055 else
7056 strcpy (buf, "{extrs,|extrw,s,}");
7057 if ((which == 0 && negated)
7058 || (which == 1 && ! negated))
7059 strcat (buf, "<");
7060 else
7061 strcat (buf, ">=");
7062 if (nullify && negated)
7063 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7064 else if (nullify && ! negated)
7065 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7066 else if (negated)
7067 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7068 else
7069 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7070 }
7071 break;
7072
7073 default:
7074 /* The reversed conditional branch must branch over one additional
7075 instruction if the delay slot is filled and needs to be extracted
7076 by pa_output_lbranch. If the delay slot is empty or this is a
7077 nullified forward branch, the instruction after the reversed
7078 condition branch must be nullified. */
7079 if (dbr_sequence_length () == 0
7080 || (nullify && forward_branch_p (insn)))
7081 {
7082 nullify = 1;
7083 xdelay = 0;
7084 operands[4] = GEN_INT (length);
7085 }
7086 else
7087 {
7088 xdelay = 1;
7089 operands[4] = GEN_INT (length + 4);
7090 }
7091
7092 if (GET_MODE (operands[0]) == DImode)
7093 strcpy (buf, "bb,*");
7094 else
7095 strcpy (buf, "bb,");
7096 if ((which == 0 && negated)
7097 || (which == 1 && !negated))
7098 strcat (buf, "<");
7099 else
7100 strcat (buf, ">=");
7101 if (nullify)
7102 strcat (buf, ",n %0,%1,.+%4");
7103 else
7104 strcat (buf, " %0,%1,.+%4");
7105 output_asm_insn (buf, operands);
7106 return pa_output_lbranch (negated ? operands[3] : operands[2],
7107 insn, xdelay);
7108 }
7109 return buf;
7110 }
7111
7112 /* This routine handles all the branch-on-variable-bit conditional branch
7113 sequences we might need to generate. It handles nullification of delay
7114 slots, varying length branches, negated branches and all combinations
7115 of the above. it returns the appropriate output template to emit the
7116 branch. */
7117
7118 const char *
7119 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7120 int which)
7121 {
7122 static char buf[100];
7123 bool useskip;
7124 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7125 int length = get_attr_length (insn);
7126 int xdelay;
7127
7128 /* A conditional branch to the following instruction (e.g. the delay slot) is
7129 asking for a disaster. I do not think this can happen as this pattern
7130 is only used when optimizing; jump optimization should eliminate the
7131 jump. But be prepared just in case. */
7132
7133 if (branch_to_delay_slot_p (insn))
7134 return "nop";
7135
7136 /* If this is a long branch with its delay slot unfilled, set `nullify'
7137 as it can nullify the delay slot and save a nop. */
7138 if (length == 8 && dbr_sequence_length () == 0)
7139 nullify = 1;
7140
7141 /* If this is a short forward conditional branch which did not get
7142 its delay slot filled, the delay slot can still be nullified. */
7143 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7144 nullify = forward_branch_p (insn);
7145
7146 /* A forward branch over a single nullified insn can be done with a
7147 extrs instruction. This avoids a single cycle penalty due to
7148 mis-predicted branch if we fall through (branch not taken). */
7149 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7150
7151 switch (length)
7152 {
7153
7154 /* All short conditional branches except backwards with an unfilled
7155 delay slot. */
7156 case 4:
7157 if (useskip)
7158 strcpy (buf, "{vextrs,|extrw,s,}");
7159 else
7160 strcpy (buf, "{bvb,|bb,}");
7161 if (useskip && GET_MODE (operands[0]) == DImode)
7162 strcpy (buf, "extrd,s,*");
7163 else if (GET_MODE (operands[0]) == DImode)
7164 strcpy (buf, "bb,*");
7165 if ((which == 0 && negated)
7166 || (which == 1 && ! negated))
7167 strcat (buf, ">=");
7168 else
7169 strcat (buf, "<");
7170 if (useskip)
7171 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7172 else if (nullify && negated)
7173 {
7174 if (branch_needs_nop_p (insn))
7175 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7176 else
7177 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7178 }
7179 else if (nullify && ! negated)
7180 {
7181 if (branch_needs_nop_p (insn))
7182 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7183 else
7184 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7185 }
7186 else if (! nullify && negated)
7187 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7188 else if (! nullify && ! negated)
7189 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7190 break;
7191
7192 /* All long conditionals. Note a short backward branch with an
7193 unfilled delay slot is treated just like a long backward branch
7194 with an unfilled delay slot. */
7195 case 8:
7196 /* Handle weird backwards branch with a filled delay slot
7197 which is nullified. */
7198 if (dbr_sequence_length () != 0
7199 && ! forward_branch_p (insn)
7200 && nullify)
7201 {
7202 strcpy (buf, "{bvb,|bb,}");
7203 if (GET_MODE (operands[0]) == DImode)
7204 strcat (buf, "*");
7205 if ((which == 0 && negated)
7206 || (which == 1 && ! negated))
7207 strcat (buf, "<");
7208 else
7209 strcat (buf, ">=");
7210 if (negated)
7211 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7212 else
7213 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7214 }
7215 /* Handle short backwards branch with an unfilled delay slot.
7216 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7217 taken and untaken branches. */
7218 else if (dbr_sequence_length () == 0
7219 && ! forward_branch_p (insn)
7220 && INSN_ADDRESSES_SET_P ()
7221 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7222 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7223 {
7224 strcpy (buf, "{bvb,|bb,}");
7225 if (GET_MODE (operands[0]) == DImode)
7226 strcat (buf, "*");
7227 if ((which == 0 && negated)
7228 || (which == 1 && ! negated))
7229 strcat (buf, ">=");
7230 else
7231 strcat (buf, "<");
7232 if (negated)
7233 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7234 else
7235 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7236 }
7237 else
7238 {
7239 strcpy (buf, "{vextrs,|extrw,s,}");
7240 if (GET_MODE (operands[0]) == DImode)
7241 strcpy (buf, "extrd,s,*");
7242 if ((which == 0 && negated)
7243 || (which == 1 && ! negated))
7244 strcat (buf, "<");
7245 else
7246 strcat (buf, ">=");
7247 if (nullify && negated)
7248 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7249 else if (nullify && ! negated)
7250 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7251 else if (negated)
7252 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7253 else
7254 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7255 }
7256 break;
7257
7258 default:
7259 /* The reversed conditional branch must branch over one additional
7260 instruction if the delay slot is filled and needs to be extracted
7261 by pa_output_lbranch. If the delay slot is empty or this is a
7262 nullified forward branch, the instruction after the reversed
7263 condition branch must be nullified. */
7264 if (dbr_sequence_length () == 0
7265 || (nullify && forward_branch_p (insn)))
7266 {
7267 nullify = 1;
7268 xdelay = 0;
7269 operands[4] = GEN_INT (length);
7270 }
7271 else
7272 {
7273 xdelay = 1;
7274 operands[4] = GEN_INT (length + 4);
7275 }
7276
7277 if (GET_MODE (operands[0]) == DImode)
7278 strcpy (buf, "bb,*");
7279 else
7280 strcpy (buf, "{bvb,|bb,}");
7281 if ((which == 0 && negated)
7282 || (which == 1 && !negated))
7283 strcat (buf, "<");
7284 else
7285 strcat (buf, ">=");
7286 if (nullify)
7287 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7288 else
7289 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7290 output_asm_insn (buf, operands);
7291 return pa_output_lbranch (negated ? operands[3] : operands[2],
7292 insn, xdelay);
7293 }
7294 return buf;
7295 }
7296
7297 /* Return the output template for emitting a dbra type insn.
7298
7299 Note it may perform some output operations on its own before
7300 returning the final output string. */
7301 const char *
7302 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7303 {
7304 int length = get_attr_length (insn);
7305
7306 /* A conditional branch to the following instruction (e.g. the delay slot) is
7307 asking for a disaster. Be prepared! */
7308
7309 if (branch_to_delay_slot_p (insn))
7310 {
7311 if (which_alternative == 0)
7312 return "ldo %1(%0),%0";
7313 else if (which_alternative == 1)
7314 {
7315 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7316 output_asm_insn ("ldw -16(%%r30),%4", operands);
7317 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7318 return "{fldws|fldw} -16(%%r30),%0";
7319 }
7320 else
7321 {
7322 output_asm_insn ("ldw %0,%4", operands);
7323 return "ldo %1(%4),%4\n\tstw %4,%0";
7324 }
7325 }
7326
7327 if (which_alternative == 0)
7328 {
7329 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7330 int xdelay;
7331
7332 /* If this is a long branch with its delay slot unfilled, set `nullify'
7333 as it can nullify the delay slot and save a nop. */
7334 if (length == 8 && dbr_sequence_length () == 0)
7335 nullify = 1;
7336
7337 /* If this is a short forward conditional branch which did not get
7338 its delay slot filled, the delay slot can still be nullified. */
7339 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7340 nullify = forward_branch_p (insn);
7341
7342 switch (length)
7343 {
7344 case 4:
7345 if (nullify)
7346 {
7347 if (branch_needs_nop_p (insn))
7348 return "addib,%C2,n %1,%0,%3%#";
7349 else
7350 return "addib,%C2,n %1,%0,%3";
7351 }
7352 else
7353 return "addib,%C2 %1,%0,%3";
7354
7355 case 8:
7356 /* Handle weird backwards branch with a fulled delay slot
7357 which is nullified. */
7358 if (dbr_sequence_length () != 0
7359 && ! forward_branch_p (insn)
7360 && nullify)
7361 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7362 /* Handle short backwards branch with an unfilled delay slot.
7363 Using a addb;nop rather than addi;bl saves 1 cycle for both
7364 taken and untaken branches. */
7365 else if (dbr_sequence_length () == 0
7366 && ! forward_branch_p (insn)
7367 && INSN_ADDRESSES_SET_P ()
7368 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7369 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7370 return "addib,%C2 %1,%0,%3%#";
7371
7372 /* Handle normal cases. */
7373 if (nullify)
7374 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7375 else
7376 return "addi,%N2 %1,%0,%0\n\tb %3";
7377
7378 default:
7379 /* The reversed conditional branch must branch over one additional
7380 instruction if the delay slot is filled and needs to be extracted
7381 by pa_output_lbranch. If the delay slot is empty or this is a
7382 nullified forward branch, the instruction after the reversed
7383 condition branch must be nullified. */
7384 if (dbr_sequence_length () == 0
7385 || (nullify && forward_branch_p (insn)))
7386 {
7387 nullify = 1;
7388 xdelay = 0;
7389 operands[4] = GEN_INT (length);
7390 }
7391 else
7392 {
7393 xdelay = 1;
7394 operands[4] = GEN_INT (length + 4);
7395 }
7396
7397 if (nullify)
7398 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7399 else
7400 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7401
7402 return pa_output_lbranch (operands[3], insn, xdelay);
7403 }
7404
7405 }
7406 /* Deal with gross reload from FP register case. */
7407 else if (which_alternative == 1)
7408 {
7409 /* Move loop counter from FP register to MEM then into a GR,
7410 increment the GR, store the GR into MEM, and finally reload
7411 the FP register from MEM from within the branch's delay slot. */
7412 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7413 operands);
7414 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7415 if (length == 24)
7416 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7417 else if (length == 28)
7418 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7419 else
7420 {
7421 operands[5] = GEN_INT (length - 16);
7422 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7423 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7424 return pa_output_lbranch (operands[3], insn, 0);
7425 }
7426 }
7427 /* Deal with gross reload from memory case. */
7428 else
7429 {
7430 /* Reload loop counter from memory, the store back to memory
7431 happens in the branch's delay slot. */
7432 output_asm_insn ("ldw %0,%4", operands);
7433 if (length == 12)
7434 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7435 else if (length == 16)
7436 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7437 else
7438 {
7439 operands[5] = GEN_INT (length - 4);
7440 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7441 return pa_output_lbranch (operands[3], insn, 0);
7442 }
7443 }
7444 }
7445
7446 /* Return the output template for emitting a movb type insn.
7447
7448 Note it may perform some output operations on its own before
7449 returning the final output string. */
7450 const char *
7451 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7452 int reverse_comparison)
7453 {
7454 int length = get_attr_length (insn);
7455
7456 /* A conditional branch to the following instruction (e.g. the delay slot) is
7457 asking for a disaster. Be prepared! */
7458
7459 if (branch_to_delay_slot_p (insn))
7460 {
7461 if (which_alternative == 0)
7462 return "copy %1,%0";
7463 else if (which_alternative == 1)
7464 {
7465 output_asm_insn ("stw %1,-16(%%r30)", operands);
7466 return "{fldws|fldw} -16(%%r30),%0";
7467 }
7468 else if (which_alternative == 2)
7469 return "stw %1,%0";
7470 else
7471 return "mtsar %r1";
7472 }
7473
7474 /* Support the second variant. */
7475 if (reverse_comparison)
7476 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7477
7478 if (which_alternative == 0)
7479 {
7480 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7481 int xdelay;
7482
7483 /* If this is a long branch with its delay slot unfilled, set `nullify'
7484 as it can nullify the delay slot and save a nop. */
7485 if (length == 8 && dbr_sequence_length () == 0)
7486 nullify = 1;
7487
7488 /* If this is a short forward conditional branch which did not get
7489 its delay slot filled, the delay slot can still be nullified. */
7490 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7491 nullify = forward_branch_p (insn);
7492
7493 switch (length)
7494 {
7495 case 4:
7496 if (nullify)
7497 {
7498 if (branch_needs_nop_p (insn))
7499 return "movb,%C2,n %1,%0,%3%#";
7500 else
7501 return "movb,%C2,n %1,%0,%3";
7502 }
7503 else
7504 return "movb,%C2 %1,%0,%3";
7505
7506 case 8:
7507 /* Handle weird backwards branch with a filled delay slot
7508 which is nullified. */
7509 if (dbr_sequence_length () != 0
7510 && ! forward_branch_p (insn)
7511 && nullify)
7512 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7513
7514 /* Handle short backwards branch with an unfilled delay slot.
7515 Using a movb;nop rather than or;bl saves 1 cycle for both
7516 taken and untaken branches. */
7517 else if (dbr_sequence_length () == 0
7518 && ! forward_branch_p (insn)
7519 && INSN_ADDRESSES_SET_P ()
7520 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7521 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7522 return "movb,%C2 %1,%0,%3%#";
7523 /* Handle normal cases. */
7524 if (nullify)
7525 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7526 else
7527 return "or,%N2 %1,%%r0,%0\n\tb %3";
7528
7529 default:
7530 /* The reversed conditional branch must branch over one additional
7531 instruction if the delay slot is filled and needs to be extracted
7532 by pa_output_lbranch. If the delay slot is empty or this is a
7533 nullified forward branch, the instruction after the reversed
7534 condition branch must be nullified. */
7535 if (dbr_sequence_length () == 0
7536 || (nullify && forward_branch_p (insn)))
7537 {
7538 nullify = 1;
7539 xdelay = 0;
7540 operands[4] = GEN_INT (length);
7541 }
7542 else
7543 {
7544 xdelay = 1;
7545 operands[4] = GEN_INT (length + 4);
7546 }
7547
7548 if (nullify)
7549 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7550 else
7551 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7552
7553 return pa_output_lbranch (operands[3], insn, xdelay);
7554 }
7555 }
7556 /* Deal with gross reload for FP destination register case. */
7557 else if (which_alternative == 1)
7558 {
7559 /* Move source register to MEM, perform the branch test, then
7560 finally load the FP register from MEM from within the branch's
7561 delay slot. */
7562 output_asm_insn ("stw %1,-16(%%r30)", operands);
7563 if (length == 12)
7564 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7565 else if (length == 16)
7566 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7567 else
7568 {
7569 operands[4] = GEN_INT (length - 4);
7570 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7571 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7572 return pa_output_lbranch (operands[3], insn, 0);
7573 }
7574 }
7575 /* Deal with gross reload from memory case. */
7576 else if (which_alternative == 2)
7577 {
7578 /* Reload loop counter from memory, the store back to memory
7579 happens in the branch's delay slot. */
7580 if (length == 8)
7581 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7582 else if (length == 12)
7583 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7584 else
7585 {
7586 operands[4] = GEN_INT (length);
7587 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7588 operands);
7589 return pa_output_lbranch (operands[3], insn, 0);
7590 }
7591 }
7592 /* Handle SAR as a destination. */
7593 else
7594 {
7595 if (length == 8)
7596 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7597 else if (length == 12)
7598 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7599 else
7600 {
7601 operands[4] = GEN_INT (length);
7602 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7603 operands);
7604 return pa_output_lbranch (operands[3], insn, 0);
7605 }
7606 }
7607 }
7608
7609 /* Copy any FP arguments in INSN into integer registers. */
7610 static void
7611 copy_fp_args (rtx_insn *insn)
7612 {
7613 rtx link;
7614 rtx xoperands[2];
7615
7616 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7617 {
7618 int arg_mode, regno;
7619 rtx use = XEXP (link, 0);
7620
7621 if (! (GET_CODE (use) == USE
7622 && GET_CODE (XEXP (use, 0)) == REG
7623 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7624 continue;
7625
7626 arg_mode = GET_MODE (XEXP (use, 0));
7627 regno = REGNO (XEXP (use, 0));
7628
7629 /* Is it a floating point register? */
7630 if (regno >= 32 && regno <= 39)
7631 {
7632 /* Copy the FP register into an integer register via memory. */
7633 if (arg_mode == SFmode)
7634 {
7635 xoperands[0] = XEXP (use, 0);
7636 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7637 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7638 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7639 }
7640 else
7641 {
7642 xoperands[0] = XEXP (use, 0);
7643 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7644 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7645 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7646 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7647 }
7648 }
7649 }
7650 }
7651
7652 /* Compute length of the FP argument copy sequence for INSN. */
7653 static int
7654 length_fp_args (rtx_insn *insn)
7655 {
7656 int length = 0;
7657 rtx link;
7658
7659 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7660 {
7661 int arg_mode, regno;
7662 rtx use = XEXP (link, 0);
7663
7664 if (! (GET_CODE (use) == USE
7665 && GET_CODE (XEXP (use, 0)) == REG
7666 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7667 continue;
7668
7669 arg_mode = GET_MODE (XEXP (use, 0));
7670 regno = REGNO (XEXP (use, 0));
7671
7672 /* Is it a floating point register? */
7673 if (regno >= 32 && regno <= 39)
7674 {
7675 if (arg_mode == SFmode)
7676 length += 8;
7677 else
7678 length += 12;
7679 }
7680 }
7681
7682 return length;
7683 }
7684
7685 /* Return the attribute length for the millicode call instruction INSN.
7686 The length must match the code generated by pa_output_millicode_call.
7687 We include the delay slot in the returned length as it is better to
7688 over estimate the length than to under estimate it. */
7689
7690 int
7691 pa_attr_length_millicode_call (rtx_insn *insn)
7692 {
7693 unsigned long distance = -1;
7694 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7695
7696 if (INSN_ADDRESSES_SET_P ())
7697 {
7698 distance = (total + insn_current_reference_address (insn));
7699 if (distance < total)
7700 distance = -1;
7701 }
7702
7703 if (TARGET_64BIT)
7704 {
7705 if (!TARGET_LONG_CALLS && distance < 7600000)
7706 return 8;
7707
7708 return 20;
7709 }
7710 else if (TARGET_PORTABLE_RUNTIME)
7711 return 24;
7712 else
7713 {
7714 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7715 return 8;
7716
7717 if (!flag_pic)
7718 return 12;
7719
7720 return 24;
7721 }
7722 }
7723
7724 /* INSN is a function call.
7725
7726 CALL_DEST is the routine we are calling. */
7727
7728 const char *
7729 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7730 {
7731 int attr_length = get_attr_length (insn);
7732 int seq_length = dbr_sequence_length ();
7733 rtx xoperands[4];
7734
7735 xoperands[0] = call_dest;
7736
7737 /* Handle the common case where we are sure that the branch will
7738 reach the beginning of the $CODE$ subspace. The within reach
7739 form of the $$sh_func_adrs call has a length of 28. Because it
7740 has an attribute type of sh_func_adrs, it never has a nonzero
7741 sequence length (i.e., the delay slot is never filled). */
7742 if (!TARGET_LONG_CALLS
7743 && (attr_length == 8
7744 || (attr_length == 28
7745 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7746 {
7747 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7748 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7749 }
7750 else
7751 {
7752 if (TARGET_64BIT)
7753 {
7754 /* It might seem that one insn could be saved by accessing
7755 the millicode function using the linkage table. However,
7756 this doesn't work in shared libraries and other dynamically
7757 loaded objects. Using a pc-relative sequence also avoids
7758 problems related to the implicit use of the gp register. */
7759 xoperands[1] = gen_rtx_REG (Pmode, 1);
7760 xoperands[2] = xoperands[1];
7761 pa_output_pic_pcrel_sequence (xoperands);
7762 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7763 }
7764 else if (TARGET_PORTABLE_RUNTIME)
7765 {
7766 /* Pure portable runtime doesn't allow be/ble; we also don't
7767 have PIC support in the assembler/linker, so this sequence
7768 is needed. */
7769
7770 /* Get the address of our target into %r1. */
7771 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7772 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7773
7774 /* Get our return address into %r31. */
7775 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7776 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7777
7778 /* Jump to our target address in %r1. */
7779 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7780 }
7781 else if (!flag_pic)
7782 {
7783 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7784 if (TARGET_PA_20)
7785 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7786 else
7787 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7788 }
7789 else
7790 {
7791 xoperands[1] = gen_rtx_REG (Pmode, 31);
7792 xoperands[2] = gen_rtx_REG (Pmode, 1);
7793 pa_output_pic_pcrel_sequence (xoperands);
7794
7795 /* Adjust return address. */
7796 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7797
7798 /* Jump to our target address in %r1. */
7799 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7800 }
7801 }
7802
7803 if (seq_length == 0)
7804 output_asm_insn ("nop", xoperands);
7805
7806 return "";
7807 }
7808
7809 /* Return the attribute length of the call instruction INSN. The SIBCALL
7810 flag indicates whether INSN is a regular call or a sibling call. The
7811 length returned must be longer than the code actually generated by
7812 pa_output_call. Since branch shortening is done before delay branch
7813 sequencing, there is no way to determine whether or not the delay
7814 slot will be filled during branch shortening. Even when the delay
7815 slot is filled, we may have to add a nop if the delay slot contains
7816 a branch that can't reach its target. Thus, we always have to include
7817 the delay slot in the length estimate. This used to be done in
7818 pa_adjust_insn_length but we do it here now as some sequences always
7819 fill the delay slot and we can save four bytes in the estimate for
7820 these sequences. */
7821
7822 int
7823 pa_attr_length_call (rtx_insn *insn, int sibcall)
7824 {
7825 int local_call;
7826 rtx call, call_dest;
7827 tree call_decl;
7828 int length = 0;
7829 rtx pat = PATTERN (insn);
7830 unsigned long distance = -1;
7831
7832 gcc_assert (CALL_P (insn));
7833
7834 if (INSN_ADDRESSES_SET_P ())
7835 {
7836 unsigned long total;
7837
7838 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7839 distance = (total + insn_current_reference_address (insn));
7840 if (distance < total)
7841 distance = -1;
7842 }
7843
7844 gcc_assert (GET_CODE (pat) == PARALLEL);
7845
7846 /* Get the call rtx. */
7847 call = XVECEXP (pat, 0, 0);
7848 if (GET_CODE (call) == SET)
7849 call = SET_SRC (call);
7850
7851 gcc_assert (GET_CODE (call) == CALL);
7852
7853 /* Determine if this is a local call. */
7854 call_dest = XEXP (XEXP (call, 0), 0);
7855 call_decl = SYMBOL_REF_DECL (call_dest);
7856 local_call = call_decl && targetm.binds_local_p (call_decl);
7857
7858 /* pc-relative branch. */
7859 if (!TARGET_LONG_CALLS
7860 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7861 || distance < MAX_PCREL17F_OFFSET))
7862 length += 8;
7863
7864 /* 64-bit plabel sequence. */
7865 else if (TARGET_64BIT && !local_call)
7866 length += sibcall ? 28 : 24;
7867
7868 /* non-pic long absolute branch sequence. */
7869 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7870 length += 12;
7871
7872 /* long pc-relative branch sequence. */
7873 else if (TARGET_LONG_PIC_SDIFF_CALL
7874 || (TARGET_GAS && !TARGET_SOM && local_call))
7875 {
7876 length += 20;
7877
7878 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7879 length += 8;
7880 }
7881
7882 /* 32-bit plabel sequence. */
7883 else
7884 {
7885 length += 32;
7886
7887 if (TARGET_SOM)
7888 length += length_fp_args (insn);
7889
7890 if (flag_pic)
7891 length += 4;
7892
7893 if (!TARGET_PA_20)
7894 {
7895 if (!sibcall)
7896 length += 8;
7897
7898 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7899 length += 8;
7900 }
7901 }
7902
7903 return length;
7904 }
7905
7906 /* INSN is a function call.
7907
7908 CALL_DEST is the routine we are calling. */
7909
7910 const char *
7911 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7912 {
7913 int seq_length = dbr_sequence_length ();
7914 tree call_decl = SYMBOL_REF_DECL (call_dest);
7915 int local_call = call_decl && targetm.binds_local_p (call_decl);
7916 rtx xoperands[4];
7917
7918 xoperands[0] = call_dest;
7919
7920 /* Handle the common case where we're sure that the branch will reach
7921 the beginning of the "$CODE$" subspace. This is the beginning of
7922 the current function if we are in a named section. */
7923 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7924 {
7925 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7926 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7927 }
7928 else
7929 {
7930 if (TARGET_64BIT && !local_call)
7931 {
7932 /* ??? As far as I can tell, the HP linker doesn't support the
7933 long pc-relative sequence described in the 64-bit runtime
7934 architecture. So, we use a slightly longer indirect call. */
7935 xoperands[0] = pa_get_deferred_plabel (call_dest);
7936 xoperands[1] = gen_label_rtx ();
7937
7938 /* If this isn't a sibcall, we put the load of %r27 into the
7939 delay slot. We can't do this in a sibcall as we don't
7940 have a second call-clobbered scratch register available.
7941 We don't need to do anything when generating fast indirect
7942 calls. */
7943 if (seq_length != 0 && !sibcall)
7944 {
7945 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7946 optimize, 0, NULL);
7947
7948 /* Now delete the delay insn. */
7949 SET_INSN_DELETED (NEXT_INSN (insn));
7950 seq_length = 0;
7951 }
7952
7953 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7954 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7955 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7956
7957 if (sibcall)
7958 {
7959 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7960 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7961 output_asm_insn ("bve (%%r1)", xoperands);
7962 }
7963 else
7964 {
7965 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7966 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7967 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7968 seq_length = 1;
7969 }
7970 }
7971 else
7972 {
7973 int indirect_call = 0;
7974
7975 /* Emit a long call. There are several different sequences
7976 of increasing length and complexity. In most cases,
7977 they don't allow an instruction in the delay slot. */
7978 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7979 && !TARGET_LONG_PIC_SDIFF_CALL
7980 && !(TARGET_GAS && !TARGET_SOM && local_call)
7981 && !TARGET_64BIT)
7982 indirect_call = 1;
7983
7984 if (seq_length != 0
7985 && !sibcall
7986 && (!TARGET_PA_20
7987 || indirect_call
7988 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7989 {
7990 /* A non-jump insn in the delay slot. By definition we can
7991 emit this insn before the call (and in fact before argument
7992 relocating. */
7993 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7994 NULL);
7995
7996 /* Now delete the delay insn. */
7997 SET_INSN_DELETED (NEXT_INSN (insn));
7998 seq_length = 0;
7999 }
8000
8001 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8002 {
8003 /* This is the best sequence for making long calls in
8004 non-pic code. Unfortunately, GNU ld doesn't provide
8005 the stub needed for external calls, and GAS's support
8006 for this with the SOM linker is buggy. It is safe
8007 to use this for local calls. */
8008 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8009 if (sibcall)
8010 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8011 else
8012 {
8013 if (TARGET_PA_20)
8014 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8015 xoperands);
8016 else
8017 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8018
8019 output_asm_insn ("copy %%r31,%%r2", xoperands);
8020 seq_length = 1;
8021 }
8022 }
8023 else
8024 {
8025 /* The HP assembler and linker can handle relocations for
8026 the difference of two symbols. The HP assembler
8027 recognizes the sequence as a pc-relative call and
8028 the linker provides stubs when needed. */
8029
8030 /* GAS currently can't generate the relocations that
8031 are needed for the SOM linker under HP-UX using this
8032 sequence. The GNU linker doesn't generate the stubs
8033 that are needed for external calls on TARGET_ELF32
8034 with this sequence. For now, we have to use a longer
8035 plabel sequence when using GAS for non local calls. */
8036 if (TARGET_LONG_PIC_SDIFF_CALL
8037 || (TARGET_GAS && !TARGET_SOM && local_call))
8038 {
8039 xoperands[1] = gen_rtx_REG (Pmode, 1);
8040 xoperands[2] = xoperands[1];
8041 pa_output_pic_pcrel_sequence (xoperands);
8042 }
8043 else
8044 {
8045 /* Emit a long plabel-based call sequence. This is
8046 essentially an inline implementation of $$dyncall.
8047 We don't actually try to call $$dyncall as this is
8048 as difficult as calling the function itself. */
8049 xoperands[0] = pa_get_deferred_plabel (call_dest);
8050 xoperands[1] = gen_label_rtx ();
8051
8052 /* Since the call is indirect, FP arguments in registers
8053 need to be copied to the general registers. Then, the
8054 argument relocation stub will copy them back. */
8055 if (TARGET_SOM)
8056 copy_fp_args (insn);
8057
8058 if (flag_pic)
8059 {
8060 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8061 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8062 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8063 }
8064 else
8065 {
8066 output_asm_insn ("addil LR'%0-$global$,%%r27",
8067 xoperands);
8068 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8069 xoperands);
8070 }
8071
8072 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8073 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8074 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8075 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8076
8077 if (!sibcall && !TARGET_PA_20)
8078 {
8079 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8080 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8081 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8082 else
8083 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8084 }
8085 }
8086
8087 if (TARGET_PA_20)
8088 {
8089 if (sibcall)
8090 output_asm_insn ("bve (%%r1)", xoperands);
8091 else
8092 {
8093 if (indirect_call)
8094 {
8095 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8096 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8097 seq_length = 1;
8098 }
8099 else
8100 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8101 }
8102 }
8103 else
8104 {
8105 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8106 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8107 xoperands);
8108
8109 if (sibcall)
8110 {
8111 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8112 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8113 else
8114 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8115 }
8116 else
8117 {
8118 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8119 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8120 else
8121 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8122
8123 if (indirect_call)
8124 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8125 else
8126 output_asm_insn ("copy %%r31,%%r2", xoperands);
8127 seq_length = 1;
8128 }
8129 }
8130 }
8131 }
8132 }
8133
8134 if (seq_length == 0)
8135 output_asm_insn ("nop", xoperands);
8136
8137 return "";
8138 }
8139
8140 /* Return the attribute length of the indirect call instruction INSN.
8141 The length must match the code generated by output_indirect call.
8142 The returned length includes the delay slot. Currently, the delay
8143 slot of an indirect call sequence is not exposed and it is used by
8144 the sequence itself. */
8145
8146 int
8147 pa_attr_length_indirect_call (rtx_insn *insn)
8148 {
8149 unsigned long distance = -1;
8150 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8151
8152 if (INSN_ADDRESSES_SET_P ())
8153 {
8154 distance = (total + insn_current_reference_address (insn));
8155 if (distance < total)
8156 distance = -1;
8157 }
8158
8159 if (TARGET_64BIT)
8160 return 12;
8161
8162 if (TARGET_FAST_INDIRECT_CALLS)
8163 return 8;
8164
8165 if (TARGET_PORTABLE_RUNTIME)
8166 return 16;
8167
8168 /* Inline version of $$dyncall. */
8169 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8170 return 20;
8171
8172 if (!TARGET_LONG_CALLS
8173 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8174 || distance < MAX_PCREL17F_OFFSET))
8175 return 8;
8176
8177 /* Out of reach, can use ble. */
8178 if (!flag_pic)
8179 return 12;
8180
8181 /* Inline version of $$dyncall. */
8182 if (TARGET_NO_SPACE_REGS || TARGET_PA_20)
8183 return 20;
8184
8185 if (!optimize_size)
8186 return 36;
8187
8188 /* Long PIC pc-relative call. */
8189 return 20;
8190 }
8191
8192 const char *
8193 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8194 {
8195 rtx xoperands[4];
8196 int length;
8197
8198 if (TARGET_64BIT)
8199 {
8200 xoperands[0] = call_dest;
8201 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8202 "bve,l (%%r2),%%r2\n\t"
8203 "ldd 24(%0),%%r27", xoperands);
8204 return "";
8205 }
8206
8207 /* First the special case for kernels, level 0 systems, etc. */
8208 if (TARGET_FAST_INDIRECT_CALLS)
8209 {
8210 pa_output_arg_descriptor (insn);
8211 if (TARGET_PA_20)
8212 return "bve,l,n (%%r22),%%r2\n\tnop";
8213 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8214 }
8215
8216 if (TARGET_PORTABLE_RUNTIME)
8217 {
8218 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8219 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8220 pa_output_arg_descriptor (insn);
8221 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8222 }
8223
8224 /* Maybe emit a fast inline version of $$dyncall. */
8225 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8226 {
8227 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8228 "ldw 2(%%r22),%%r19\n\t"
8229 "ldw -2(%%r22),%%r22", xoperands);
8230 pa_output_arg_descriptor (insn);
8231 if (TARGET_NO_SPACE_REGS)
8232 {
8233 if (TARGET_PA_20)
8234 return "bve,l,n (%%r22),%%r2\n\tnop";
8235 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8236 }
8237 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8238 }
8239
8240 /* Now the normal case -- we can reach $$dyncall directly or
8241 we're sure that we can get there via a long-branch stub.
8242
8243 No need to check target flags as the length uniquely identifies
8244 the remaining cases. */
8245 length = pa_attr_length_indirect_call (insn);
8246 if (length == 8)
8247 {
8248 pa_output_arg_descriptor (insn);
8249
8250 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8251 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8252 variant of the B,L instruction can't be used on the SOM target. */
8253 if (TARGET_PA_20 && !TARGET_SOM)
8254 return "b,l,n $$dyncall,%%r2\n\tnop";
8255 else
8256 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8257 }
8258
8259 /* Long millicode call, but we are not generating PIC or portable runtime
8260 code. */
8261 if (length == 12)
8262 {
8263 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8264 pa_output_arg_descriptor (insn);
8265 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8266 }
8267
8268 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8269 pc-relative call sequence is five instructions. The inline PA 2.0
8270 version of $$dyncall is also five instructions. The PA 1.X versions
8271 are longer but still an overall win. */
8272 if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size)
8273 {
8274 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8275 "ldw 2(%%r22),%%r19\n\t"
8276 "ldw -2(%%r22),%%r22", xoperands);
8277 if (TARGET_NO_SPACE_REGS)
8278 {
8279 pa_output_arg_descriptor (insn);
8280 if (TARGET_PA_20)
8281 return "bve,l,n (%%r22),%%r2\n\tnop";
8282 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8283 }
8284 if (TARGET_PA_20)
8285 {
8286 pa_output_arg_descriptor (insn);
8287 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8288 }
8289 output_asm_insn ("bl .+8,%%r2\n\t"
8290 "ldo 16(%%r2),%%r2\n\t"
8291 "ldsid (%%r22),%%r1\n\t"
8292 "mtsp %%r1,%%sr0", xoperands);
8293 pa_output_arg_descriptor (insn);
8294 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8295 }
8296
8297 /* We need a long PIC call to $$dyncall. */
8298 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8299 xoperands[1] = gen_rtx_REG (Pmode, 2);
8300 xoperands[2] = gen_rtx_REG (Pmode, 1);
8301 pa_output_pic_pcrel_sequence (xoperands);
8302 pa_output_arg_descriptor (insn);
8303 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8304 }
8305
8306 /* In HPUX 8.0's shared library scheme, special relocations are needed
8307 for function labels if they might be passed to a function
8308 in a shared library (because shared libraries don't live in code
8309 space), and special magic is needed to construct their address. */
8310
8311 void
8312 pa_encode_label (rtx sym)
8313 {
8314 const char *str = XSTR (sym, 0);
8315 int len = strlen (str) + 1;
8316 char *newstr, *p;
8317
8318 p = newstr = XALLOCAVEC (char, len + 1);
8319 *p++ = '@';
8320 strcpy (p, str);
8321
8322 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8323 }
8324
8325 static void
8326 pa_encode_section_info (tree decl, rtx rtl, int first)
8327 {
8328 int old_referenced = 0;
8329
8330 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8331 old_referenced
8332 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8333
8334 default_encode_section_info (decl, rtl, first);
8335
8336 if (first && TEXT_SPACE_P (decl))
8337 {
8338 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8339 if (TREE_CODE (decl) == FUNCTION_DECL)
8340 pa_encode_label (XEXP (rtl, 0));
8341 }
8342 else if (old_referenced)
8343 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8344 }
8345
8346 /* This is sort of inverse to pa_encode_section_info. */
8347
8348 static const char *
8349 pa_strip_name_encoding (const char *str)
8350 {
8351 str += (*str == '@');
8352 str += (*str == '*');
8353 return str;
8354 }
8355
8356 /* Returns 1 if OP is a function label involved in a simple addition
8357 with a constant. Used to keep certain patterns from matching
8358 during instruction combination. */
8359 int
8360 pa_is_function_label_plus_const (rtx op)
8361 {
8362 /* Strip off any CONST. */
8363 if (GET_CODE (op) == CONST)
8364 op = XEXP (op, 0);
8365
8366 return (GET_CODE (op) == PLUS
8367 && function_label_operand (XEXP (op, 0), VOIDmode)
8368 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8369 }
8370
8371 /* Output assembly code for a thunk to FUNCTION. */
8372
8373 static void
8374 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8375 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8376 tree function)
8377 {
8378 static unsigned int current_thunk_number;
8379 int val_14 = VAL_14_BITS_P (delta);
8380 unsigned int old_last_address = last_address, nbytes = 0;
8381 char label[17];
8382 rtx xoperands[4];
8383
8384 xoperands[0] = XEXP (DECL_RTL (function), 0);
8385 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8386 xoperands[2] = GEN_INT (delta);
8387
8388 final_start_function (emit_barrier (), file, 1);
8389
8390 /* Output the thunk. We know that the function is in the same
8391 translation unit (i.e., the same space) as the thunk, and that
8392 thunks are output after their method. Thus, we don't need an
8393 external branch to reach the function. With SOM and GAS,
8394 functions and thunks are effectively in different sections.
8395 Thus, we can always use a IA-relative branch and the linker
8396 will add a long branch stub if necessary.
8397
8398 However, we have to be careful when generating PIC code on the
8399 SOM port to ensure that the sequence does not transfer to an
8400 import stub for the target function as this could clobber the
8401 return value saved at SP-24. This would also apply to the
8402 32-bit linux port if the multi-space model is implemented. */
8403 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8404 && !(flag_pic && TREE_PUBLIC (function))
8405 && (TARGET_GAS || last_address < 262132))
8406 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8407 && ((targetm_common.have_named_sections
8408 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8409 /* The GNU 64-bit linker has rather poor stub management.
8410 So, we use a long branch from thunks that aren't in
8411 the same section as the target function. */
8412 && ((!TARGET_64BIT
8413 && (DECL_SECTION_NAME (thunk_fndecl)
8414 != DECL_SECTION_NAME (function)))
8415 || ((DECL_SECTION_NAME (thunk_fndecl)
8416 == DECL_SECTION_NAME (function))
8417 && last_address < 262132)))
8418 /* In this case, we need to be able to reach the start of
8419 the stub table even though the function is likely closer
8420 and can be jumped to directly. */
8421 || (targetm_common.have_named_sections
8422 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8423 && DECL_SECTION_NAME (function) == NULL
8424 && total_code_bytes < MAX_PCREL17F_OFFSET)
8425 /* Likewise. */
8426 || (!targetm_common.have_named_sections
8427 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8428 {
8429 if (!val_14)
8430 output_asm_insn ("addil L'%2,%%r26", xoperands);
8431
8432 output_asm_insn ("b %0", xoperands);
8433
8434 if (val_14)
8435 {
8436 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8437 nbytes += 8;
8438 }
8439 else
8440 {
8441 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8442 nbytes += 12;
8443 }
8444 }
8445 else if (TARGET_64BIT)
8446 {
8447 rtx xop[4];
8448
8449 /* We only have one call-clobbered scratch register, so we can't
8450 make use of the delay slot if delta doesn't fit in 14 bits. */
8451 if (!val_14)
8452 {
8453 output_asm_insn ("addil L'%2,%%r26", xoperands);
8454 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8455 }
8456
8457 /* Load function address into %r1. */
8458 xop[0] = xoperands[0];
8459 xop[1] = gen_rtx_REG (Pmode, 1);
8460 xop[2] = xop[1];
8461 pa_output_pic_pcrel_sequence (xop);
8462
8463 if (val_14)
8464 {
8465 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8466 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8467 nbytes += 20;
8468 }
8469 else
8470 {
8471 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8472 nbytes += 24;
8473 }
8474 }
8475 else if (TARGET_PORTABLE_RUNTIME)
8476 {
8477 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8478 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8479
8480 if (!val_14)
8481 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8482
8483 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8484
8485 if (val_14)
8486 {
8487 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8488 nbytes += 16;
8489 }
8490 else
8491 {
8492 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8493 nbytes += 20;
8494 }
8495 }
8496 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8497 {
8498 /* The function is accessible from outside this module. The only
8499 way to avoid an import stub between the thunk and function is to
8500 call the function directly with an indirect sequence similar to
8501 that used by $$dyncall. This is possible because $$dyncall acts
8502 as the import stub in an indirect call. */
8503 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8504 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8505 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8506 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8507 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8508 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8509 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8510 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8511 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8512
8513 if (!val_14)
8514 {
8515 output_asm_insn ("addil L'%2,%%r26", xoperands);
8516 nbytes += 4;
8517 }
8518
8519 if (TARGET_PA_20)
8520 {
8521 output_asm_insn ("bve (%%r22)", xoperands);
8522 nbytes += 36;
8523 }
8524 else if (TARGET_NO_SPACE_REGS)
8525 {
8526 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8527 nbytes += 36;
8528 }
8529 else
8530 {
8531 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8532 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8533 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8534 nbytes += 44;
8535 }
8536
8537 if (val_14)
8538 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8539 else
8540 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8541 }
8542 else if (flag_pic)
8543 {
8544 rtx xop[4];
8545
8546 /* Load function address into %r22. */
8547 xop[0] = xoperands[0];
8548 xop[1] = gen_rtx_REG (Pmode, 1);
8549 xop[2] = gen_rtx_REG (Pmode, 22);
8550 pa_output_pic_pcrel_sequence (xop);
8551
8552 if (!val_14)
8553 output_asm_insn ("addil L'%2,%%r26", xoperands);
8554
8555 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8556
8557 if (val_14)
8558 {
8559 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8560 nbytes += 20;
8561 }
8562 else
8563 {
8564 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8565 nbytes += 24;
8566 }
8567 }
8568 else
8569 {
8570 if (!val_14)
8571 output_asm_insn ("addil L'%2,%%r26", xoperands);
8572
8573 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8574 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8575
8576 if (val_14)
8577 {
8578 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8579 nbytes += 12;
8580 }
8581 else
8582 {
8583 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8584 nbytes += 16;
8585 }
8586 }
8587
8588 final_end_function ();
8589
8590 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8591 {
8592 switch_to_section (data_section);
8593 output_asm_insn (".align 4", xoperands);
8594 ASM_OUTPUT_LABEL (file, label);
8595 output_asm_insn (".word P'%0", xoperands);
8596 }
8597
8598 current_thunk_number++;
8599 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8600 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8601 last_address += nbytes;
8602 if (old_last_address > last_address)
8603 last_address = UINT_MAX;
8604 update_total_code_bytes (nbytes);
8605 }
8606
8607 /* Only direct calls to static functions are allowed to be sibling (tail)
8608 call optimized.
8609
8610 This restriction is necessary because some linker generated stubs will
8611 store return pointers into rp' in some cases which might clobber a
8612 live value already in rp'.
8613
8614 In a sibcall the current function and the target function share stack
8615 space. Thus if the path to the current function and the path to the
8616 target function save a value in rp', they save the value into the
8617 same stack slot, which has undesirable consequences.
8618
8619 Because of the deferred binding nature of shared libraries any function
8620 with external scope could be in a different load module and thus require
8621 rp' to be saved when calling that function. So sibcall optimizations
8622 can only be safe for static function.
8623
8624 Note that GCC never needs return value relocations, so we don't have to
8625 worry about static calls with return value relocations (which require
8626 saving rp').
8627
8628 It is safe to perform a sibcall optimization when the target function
8629 will never return. */
8630 static bool
8631 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8632 {
8633 if (TARGET_PORTABLE_RUNTIME)
8634 return false;
8635
8636 /* Sibcalls are not ok because the arg pointer register is not a fixed
8637 register. This prevents the sibcall optimization from occurring. In
8638 addition, there are problems with stub placement using GNU ld. This
8639 is because a normal sibcall branch uses a 17-bit relocation while
8640 a regular call branch uses a 22-bit relocation. As a result, more
8641 care needs to be taken in the placement of long-branch stubs. */
8642 if (TARGET_64BIT)
8643 return false;
8644
8645 /* Sibcalls are only ok within a translation unit. */
8646 return (decl && !TREE_PUBLIC (decl));
8647 }
8648
8649 /* ??? Addition is not commutative on the PA due to the weird implicit
8650 space register selection rules for memory addresses. Therefore, we
8651 don't consider a + b == b + a, as this might be inside a MEM. */
8652 static bool
8653 pa_commutative_p (const_rtx x, int outer_code)
8654 {
8655 return (COMMUTATIVE_P (x)
8656 && (TARGET_NO_SPACE_REGS
8657 || (outer_code != UNKNOWN && outer_code != MEM)
8658 || GET_CODE (x) != PLUS));
8659 }
8660
8661 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8662 use in fmpyadd instructions. */
8663 int
8664 pa_fmpyaddoperands (rtx *operands)
8665 {
8666 machine_mode mode = GET_MODE (operands[0]);
8667
8668 /* Must be a floating point mode. */
8669 if (mode != SFmode && mode != DFmode)
8670 return 0;
8671
8672 /* All modes must be the same. */
8673 if (! (mode == GET_MODE (operands[1])
8674 && mode == GET_MODE (operands[2])
8675 && mode == GET_MODE (operands[3])
8676 && mode == GET_MODE (operands[4])
8677 && mode == GET_MODE (operands[5])))
8678 return 0;
8679
8680 /* All operands must be registers. */
8681 if (! (GET_CODE (operands[1]) == REG
8682 && GET_CODE (operands[2]) == REG
8683 && GET_CODE (operands[3]) == REG
8684 && GET_CODE (operands[4]) == REG
8685 && GET_CODE (operands[5]) == REG))
8686 return 0;
8687
8688 /* Only 2 real operands to the addition. One of the input operands must
8689 be the same as the output operand. */
8690 if (! rtx_equal_p (operands[3], operands[4])
8691 && ! rtx_equal_p (operands[3], operands[5]))
8692 return 0;
8693
8694 /* Inout operand of add cannot conflict with any operands from multiply. */
8695 if (rtx_equal_p (operands[3], operands[0])
8696 || rtx_equal_p (operands[3], operands[1])
8697 || rtx_equal_p (operands[3], operands[2]))
8698 return 0;
8699
8700 /* multiply cannot feed into addition operands. */
8701 if (rtx_equal_p (operands[4], operands[0])
8702 || rtx_equal_p (operands[5], operands[0]))
8703 return 0;
8704
8705 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8706 if (mode == SFmode
8707 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8708 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8709 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8710 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8711 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8712 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8713 return 0;
8714
8715 /* Passed. Operands are suitable for fmpyadd. */
8716 return 1;
8717 }
8718
8719 #if !defined(USE_COLLECT2)
8720 static void
8721 pa_asm_out_constructor (rtx symbol, int priority)
8722 {
8723 if (!function_label_operand (symbol, VOIDmode))
8724 pa_encode_label (symbol);
8725
8726 #ifdef CTORS_SECTION_ASM_OP
8727 default_ctor_section_asm_out_constructor (symbol, priority);
8728 #else
8729 # ifdef TARGET_ASM_NAMED_SECTION
8730 default_named_section_asm_out_constructor (symbol, priority);
8731 # else
8732 default_stabs_asm_out_constructor (symbol, priority);
8733 # endif
8734 #endif
8735 }
8736
8737 static void
8738 pa_asm_out_destructor (rtx symbol, int priority)
8739 {
8740 if (!function_label_operand (symbol, VOIDmode))
8741 pa_encode_label (symbol);
8742
8743 #ifdef DTORS_SECTION_ASM_OP
8744 default_dtor_section_asm_out_destructor (symbol, priority);
8745 #else
8746 # ifdef TARGET_ASM_NAMED_SECTION
8747 default_named_section_asm_out_destructor (symbol, priority);
8748 # else
8749 default_stabs_asm_out_destructor (symbol, priority);
8750 # endif
8751 #endif
8752 }
8753 #endif
8754
8755 /* This function places uninitialized global data in the bss section.
8756 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8757 function on the SOM port to prevent uninitialized global data from
8758 being placed in the data section. */
8759
8760 void
8761 pa_asm_output_aligned_bss (FILE *stream,
8762 const char *name,
8763 unsigned HOST_WIDE_INT size,
8764 unsigned int align)
8765 {
8766 switch_to_section (bss_section);
8767 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8768
8769 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8770 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8771 #endif
8772
8773 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8774 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8775 #endif
8776
8777 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8778 ASM_OUTPUT_LABEL (stream, name);
8779 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8780 }
8781
8782 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8783 that doesn't allow the alignment of global common storage to be directly
8784 specified. The SOM linker aligns common storage based on the rounded
8785 value of the NUM_BYTES parameter in the .comm directive. It's not
8786 possible to use the .align directive as it doesn't affect the alignment
8787 of the label associated with a .comm directive. */
8788
8789 void
8790 pa_asm_output_aligned_common (FILE *stream,
8791 const char *name,
8792 unsigned HOST_WIDE_INT size,
8793 unsigned int align)
8794 {
8795 unsigned int max_common_align;
8796
8797 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8798 if (align > max_common_align)
8799 {
8800 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8801 "for global common data. Using %u",
8802 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8803 align = max_common_align;
8804 }
8805
8806 switch_to_section (bss_section);
8807
8808 assemble_name (stream, name);
8809 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8810 MAX (size, align / BITS_PER_UNIT));
8811 }
8812
8813 /* We can't use .comm for local common storage as the SOM linker effectively
8814 treats the symbol as universal and uses the same storage for local symbols
8815 with the same name in different object files. The .block directive
8816 reserves an uninitialized block of storage. However, it's not common
8817 storage. Fortunately, GCC never requests common storage with the same
8818 name in any given translation unit. */
8819
8820 void
8821 pa_asm_output_aligned_local (FILE *stream,
8822 const char *name,
8823 unsigned HOST_WIDE_INT size,
8824 unsigned int align)
8825 {
8826 switch_to_section (bss_section);
8827 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8828
8829 #ifdef LOCAL_ASM_OP
8830 fprintf (stream, "%s", LOCAL_ASM_OP);
8831 assemble_name (stream, name);
8832 fprintf (stream, "\n");
8833 #endif
8834
8835 ASM_OUTPUT_LABEL (stream, name);
8836 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8837 }
8838
8839 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8840 use in fmpysub instructions. */
8841 int
8842 pa_fmpysuboperands (rtx *operands)
8843 {
8844 machine_mode mode = GET_MODE (operands[0]);
8845
8846 /* Must be a floating point mode. */
8847 if (mode != SFmode && mode != DFmode)
8848 return 0;
8849
8850 /* All modes must be the same. */
8851 if (! (mode == GET_MODE (operands[1])
8852 && mode == GET_MODE (operands[2])
8853 && mode == GET_MODE (operands[3])
8854 && mode == GET_MODE (operands[4])
8855 && mode == GET_MODE (operands[5])))
8856 return 0;
8857
8858 /* All operands must be registers. */
8859 if (! (GET_CODE (operands[1]) == REG
8860 && GET_CODE (operands[2]) == REG
8861 && GET_CODE (operands[3]) == REG
8862 && GET_CODE (operands[4]) == REG
8863 && GET_CODE (operands[5]) == REG))
8864 return 0;
8865
8866 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8867 operation, so operands[4] must be the same as operand[3]. */
8868 if (! rtx_equal_p (operands[3], operands[4]))
8869 return 0;
8870
8871 /* multiply cannot feed into subtraction. */
8872 if (rtx_equal_p (operands[5], operands[0]))
8873 return 0;
8874
8875 /* Inout operand of sub cannot conflict with any operands from multiply. */
8876 if (rtx_equal_p (operands[3], operands[0])
8877 || rtx_equal_p (operands[3], operands[1])
8878 || rtx_equal_p (operands[3], operands[2]))
8879 return 0;
8880
8881 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8882 if (mode == SFmode
8883 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8884 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8885 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8886 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8887 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8888 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8889 return 0;
8890
8891 /* Passed. Operands are suitable for fmpysub. */
8892 return 1;
8893 }
8894
8895 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8896 constants for a MULT embedded inside a memory address. */
8897 int
8898 pa_mem_shadd_constant_p (int val)
8899 {
8900 if (val == 2 || val == 4 || val == 8)
8901 return 1;
8902 else
8903 return 0;
8904 }
8905
8906 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8907 constants for shadd instructions. */
8908 int
8909 pa_shadd_constant_p (int val)
8910 {
8911 if (val == 1 || val == 2 || val == 3)
8912 return 1;
8913 else
8914 return 0;
8915 }
8916
8917 /* Return TRUE if INSN branches forward. */
8918
8919 static bool
8920 forward_branch_p (rtx_insn *insn)
8921 {
8922 rtx lab = JUMP_LABEL (insn);
8923
8924 /* The INSN must have a jump label. */
8925 gcc_assert (lab != NULL_RTX);
8926
8927 if (INSN_ADDRESSES_SET_P ())
8928 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8929
8930 while (insn)
8931 {
8932 if (insn == lab)
8933 return true;
8934 else
8935 insn = NEXT_INSN (insn);
8936 }
8937
8938 return false;
8939 }
8940
8941 /* Output an unconditional move and branch insn. */
8942
8943 const char *
8944 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8945 {
8946 int length = get_attr_length (insn);
8947
8948 /* These are the cases in which we win. */
8949 if (length == 4)
8950 return "mov%I1b,tr %1,%0,%2";
8951
8952 /* None of the following cases win, but they don't lose either. */
8953 if (length == 8)
8954 {
8955 if (dbr_sequence_length () == 0)
8956 {
8957 /* Nothing in the delay slot, fake it by putting the combined
8958 insn (the copy or add) in the delay slot of a bl. */
8959 if (GET_CODE (operands[1]) == CONST_INT)
8960 return "b %2\n\tldi %1,%0";
8961 else
8962 return "b %2\n\tcopy %1,%0";
8963 }
8964 else
8965 {
8966 /* Something in the delay slot, but we've got a long branch. */
8967 if (GET_CODE (operands[1]) == CONST_INT)
8968 return "ldi %1,%0\n\tb %2";
8969 else
8970 return "copy %1,%0\n\tb %2";
8971 }
8972 }
8973
8974 if (GET_CODE (operands[1]) == CONST_INT)
8975 output_asm_insn ("ldi %1,%0", operands);
8976 else
8977 output_asm_insn ("copy %1,%0", operands);
8978 return pa_output_lbranch (operands[2], insn, 1);
8979 }
8980
8981 /* Output an unconditional add and branch insn. */
8982
8983 const char *
8984 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8985 {
8986 int length = get_attr_length (insn);
8987
8988 /* To make life easy we want operand0 to be the shared input/output
8989 operand and operand1 to be the readonly operand. */
8990 if (operands[0] == operands[1])
8991 operands[1] = operands[2];
8992
8993 /* These are the cases in which we win. */
8994 if (length == 4)
8995 return "add%I1b,tr %1,%0,%3";
8996
8997 /* None of the following cases win, but they don't lose either. */
8998 if (length == 8)
8999 {
9000 if (dbr_sequence_length () == 0)
9001 /* Nothing in the delay slot, fake it by putting the combined
9002 insn (the copy or add) in the delay slot of a bl. */
9003 return "b %3\n\tadd%I1 %1,%0,%0";
9004 else
9005 /* Something in the delay slot, but we've got a long branch. */
9006 return "add%I1 %1,%0,%0\n\tb %3";
9007 }
9008
9009 output_asm_insn ("add%I1 %1,%0,%0", operands);
9010 return pa_output_lbranch (operands[3], insn, 1);
9011 }
9012
9013 /* We use this hook to perform a PA specific optimization which is difficult
9014 to do in earlier passes. */
9015
9016 static void
9017 pa_reorg (void)
9018 {
9019 remove_useless_addtr_insns (1);
9020
9021 if (pa_cpu < PROCESSOR_8000)
9022 pa_combine_instructions ();
9023 }
9024
9025 /* The PA has a number of odd instructions which can perform multiple
9026 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9027 it may be profitable to combine two instructions into one instruction
9028 with two outputs. It's not profitable PA2.0 machines because the
9029 two outputs would take two slots in the reorder buffers.
9030
9031 This routine finds instructions which can be combined and combines
9032 them. We only support some of the potential combinations, and we
9033 only try common ways to find suitable instructions.
9034
9035 * addb can add two registers or a register and a small integer
9036 and jump to a nearby (+-8k) location. Normally the jump to the
9037 nearby location is conditional on the result of the add, but by
9038 using the "true" condition we can make the jump unconditional.
9039 Thus addb can perform two independent operations in one insn.
9040
9041 * movb is similar to addb in that it can perform a reg->reg
9042 or small immediate->reg copy and jump to a nearby (+-8k location).
9043
9044 * fmpyadd and fmpysub can perform a FP multiply and either an
9045 FP add or FP sub if the operands of the multiply and add/sub are
9046 independent (there are other minor restrictions). Note both
9047 the fmpy and fadd/fsub can in theory move to better spots according
9048 to data dependencies, but for now we require the fmpy stay at a
9049 fixed location.
9050
9051 * Many of the memory operations can perform pre & post updates
9052 of index registers. GCC's pre/post increment/decrement addressing
9053 is far too simple to take advantage of all the possibilities. This
9054 pass may not be suitable since those insns may not be independent.
9055
9056 * comclr can compare two ints or an int and a register, nullify
9057 the following instruction and zero some other register. This
9058 is more difficult to use as it's harder to find an insn which
9059 will generate a comclr than finding something like an unconditional
9060 branch. (conditional moves & long branches create comclr insns).
9061
9062 * Most arithmetic operations can conditionally skip the next
9063 instruction. They can be viewed as "perform this operation
9064 and conditionally jump to this nearby location" (where nearby
9065 is an insns away). These are difficult to use due to the
9066 branch length restrictions. */
9067
9068 static void
9069 pa_combine_instructions (void)
9070 {
9071 rtx_insn *anchor;
9072
9073 /* This can get expensive since the basic algorithm is on the
9074 order of O(n^2) (or worse). Only do it for -O2 or higher
9075 levels of optimization. */
9076 if (optimize < 2)
9077 return;
9078
9079 /* Walk down the list of insns looking for "anchor" insns which
9080 may be combined with "floating" insns. As the name implies,
9081 "anchor" instructions don't move, while "floating" insns may
9082 move around. */
9083 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9084 rtx_insn *new_rtx = make_insn_raw (par);
9085
9086 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9087 {
9088 enum attr_pa_combine_type anchor_attr;
9089 enum attr_pa_combine_type floater_attr;
9090
9091 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9092 Also ignore any special USE insns. */
9093 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9094 || GET_CODE (PATTERN (anchor)) == USE
9095 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9096 continue;
9097
9098 anchor_attr = get_attr_pa_combine_type (anchor);
9099 /* See if anchor is an insn suitable for combination. */
9100 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9101 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9102 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9103 && ! forward_branch_p (anchor)))
9104 {
9105 rtx_insn *floater;
9106
9107 for (floater = PREV_INSN (anchor);
9108 floater;
9109 floater = PREV_INSN (floater))
9110 {
9111 if (NOTE_P (floater)
9112 || (NONJUMP_INSN_P (floater)
9113 && (GET_CODE (PATTERN (floater)) == USE
9114 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9115 continue;
9116
9117 /* Anything except a regular INSN will stop our search. */
9118 if (! NONJUMP_INSN_P (floater))
9119 {
9120 floater = NULL;
9121 break;
9122 }
9123
9124 /* See if FLOATER is suitable for combination with the
9125 anchor. */
9126 floater_attr = get_attr_pa_combine_type (floater);
9127 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9128 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9129 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9130 && floater_attr == PA_COMBINE_TYPE_FMPY))
9131 {
9132 /* If ANCHOR and FLOATER can be combined, then we're
9133 done with this pass. */
9134 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9135 SET_DEST (PATTERN (floater)),
9136 XEXP (SET_SRC (PATTERN (floater)), 0),
9137 XEXP (SET_SRC (PATTERN (floater)), 1)))
9138 break;
9139 }
9140
9141 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9142 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9143 {
9144 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9145 {
9146 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9147 SET_DEST (PATTERN (floater)),
9148 XEXP (SET_SRC (PATTERN (floater)), 0),
9149 XEXP (SET_SRC (PATTERN (floater)), 1)))
9150 break;
9151 }
9152 else
9153 {
9154 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9155 SET_DEST (PATTERN (floater)),
9156 SET_SRC (PATTERN (floater)),
9157 SET_SRC (PATTERN (floater))))
9158 break;
9159 }
9160 }
9161 }
9162
9163 /* If we didn't find anything on the backwards scan try forwards. */
9164 if (!floater
9165 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9166 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9167 {
9168 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9169 {
9170 if (NOTE_P (floater)
9171 || (NONJUMP_INSN_P (floater)
9172 && (GET_CODE (PATTERN (floater)) == USE
9173 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9174
9175 continue;
9176
9177 /* Anything except a regular INSN will stop our search. */
9178 if (! NONJUMP_INSN_P (floater))
9179 {
9180 floater = NULL;
9181 break;
9182 }
9183
9184 /* See if FLOATER is suitable for combination with the
9185 anchor. */
9186 floater_attr = get_attr_pa_combine_type (floater);
9187 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9188 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9189 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9190 && floater_attr == PA_COMBINE_TYPE_FMPY))
9191 {
9192 /* If ANCHOR and FLOATER can be combined, then we're
9193 done with this pass. */
9194 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9195 SET_DEST (PATTERN (floater)),
9196 XEXP (SET_SRC (PATTERN (floater)),
9197 0),
9198 XEXP (SET_SRC (PATTERN (floater)),
9199 1)))
9200 break;
9201 }
9202 }
9203 }
9204
9205 /* FLOATER will be nonzero if we found a suitable floating
9206 insn for combination with ANCHOR. */
9207 if (floater
9208 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9209 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9210 {
9211 /* Emit the new instruction and delete the old anchor. */
9212 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9213 copy_rtx (PATTERN (floater)));
9214 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9215 emit_insn_before (temp, anchor);
9216
9217 SET_INSN_DELETED (anchor);
9218
9219 /* Emit a special USE insn for FLOATER, then delete
9220 the floating insn. */
9221 temp = copy_rtx (PATTERN (floater));
9222 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9223 delete_insn (floater);
9224
9225 continue;
9226 }
9227 else if (floater
9228 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9229 {
9230 /* Emit the new_jump instruction and delete the old anchor. */
9231 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9232 copy_rtx (PATTERN (floater)));
9233 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9234 temp = emit_jump_insn_before (temp, anchor);
9235
9236 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9237 SET_INSN_DELETED (anchor);
9238
9239 /* Emit a special USE insn for FLOATER, then delete
9240 the floating insn. */
9241 temp = copy_rtx (PATTERN (floater));
9242 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9243 delete_insn (floater);
9244 continue;
9245 }
9246 }
9247 }
9248 }
9249
9250 static int
9251 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9252 int reversed, rtx dest,
9253 rtx src1, rtx src2)
9254 {
9255 int insn_code_number;
9256 rtx_insn *start, *end;
9257
9258 /* Create a PARALLEL with the patterns of ANCHOR and
9259 FLOATER, try to recognize it, then test constraints
9260 for the resulting pattern.
9261
9262 If the pattern doesn't match or the constraints
9263 aren't met keep searching for a suitable floater
9264 insn. */
9265 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9266 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9267 INSN_CODE (new_rtx) = -1;
9268 insn_code_number = recog_memoized (new_rtx);
9269 basic_block bb = BLOCK_FOR_INSN (anchor);
9270 if (insn_code_number < 0
9271 || (extract_insn (new_rtx),
9272 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9273 return 0;
9274
9275 if (reversed)
9276 {
9277 start = anchor;
9278 end = floater;
9279 }
9280 else
9281 {
9282 start = floater;
9283 end = anchor;
9284 }
9285
9286 /* There's up to three operands to consider. One
9287 output and two inputs.
9288
9289 The output must not be used between FLOATER & ANCHOR
9290 exclusive. The inputs must not be set between
9291 FLOATER and ANCHOR exclusive. */
9292
9293 if (reg_used_between_p (dest, start, end))
9294 return 0;
9295
9296 if (reg_set_between_p (src1, start, end))
9297 return 0;
9298
9299 if (reg_set_between_p (src2, start, end))
9300 return 0;
9301
9302 /* If we get here, then everything is good. */
9303 return 1;
9304 }
9305
9306 /* Return nonzero if references for INSN are delayed.
9307
9308 Millicode insns are actually function calls with some special
9309 constraints on arguments and register usage.
9310
9311 Millicode calls always expect their arguments in the integer argument
9312 registers, and always return their result in %r29 (ret1). They
9313 are expected to clobber their arguments, %r1, %r29, and the return
9314 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9315
9316 This function tells reorg that the references to arguments and
9317 millicode calls do not appear to happen until after the millicode call.
9318 This allows reorg to put insns which set the argument registers into the
9319 delay slot of the millicode call -- thus they act more like traditional
9320 CALL_INSNs.
9321
9322 Note we cannot consider side effects of the insn to be delayed because
9323 the branch and link insn will clobber the return pointer. If we happened
9324 to use the return pointer in the delay slot of the call, then we lose.
9325
9326 get_attr_type will try to recognize the given insn, so make sure to
9327 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9328 in particular. */
9329 int
9330 pa_insn_refs_are_delayed (rtx_insn *insn)
9331 {
9332 return ((NONJUMP_INSN_P (insn)
9333 && GET_CODE (PATTERN (insn)) != SEQUENCE
9334 && GET_CODE (PATTERN (insn)) != USE
9335 && GET_CODE (PATTERN (insn)) != CLOBBER
9336 && get_attr_type (insn) == TYPE_MILLI));
9337 }
9338
9339 /* Promote the return value, but not the arguments. */
9340
9341 static machine_mode
9342 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9343 machine_mode mode,
9344 int *punsignedp ATTRIBUTE_UNUSED,
9345 const_tree fntype ATTRIBUTE_UNUSED,
9346 int for_return)
9347 {
9348 if (for_return == 0)
9349 return mode;
9350 return promote_mode (type, mode, punsignedp);
9351 }
9352
9353 /* On the HP-PA the value is found in register(s) 28(-29), unless
9354 the mode is SF or DF. Then the value is returned in fr4 (32).
9355
9356 This must perform the same promotions as PROMOTE_MODE, else promoting
9357 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9358
9359 Small structures must be returned in a PARALLEL on PA64 in order
9360 to match the HP Compiler ABI. */
9361
9362 static rtx
9363 pa_function_value (const_tree valtype,
9364 const_tree func ATTRIBUTE_UNUSED,
9365 bool outgoing ATTRIBUTE_UNUSED)
9366 {
9367 machine_mode valmode;
9368
9369 if (AGGREGATE_TYPE_P (valtype)
9370 || TREE_CODE (valtype) == COMPLEX_TYPE
9371 || TREE_CODE (valtype) == VECTOR_TYPE)
9372 {
9373 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9374
9375 /* Handle aggregates that fit exactly in a word or double word. */
9376 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9377 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9378
9379 if (TARGET_64BIT)
9380 {
9381 /* Aggregates with a size less than or equal to 128 bits are
9382 returned in GR 28(-29). They are left justified. The pad
9383 bits are undefined. Larger aggregates are returned in
9384 memory. */
9385 rtx loc[2];
9386 int i, offset = 0;
9387 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9388
9389 for (i = 0; i < ub; i++)
9390 {
9391 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9392 gen_rtx_REG (DImode, 28 + i),
9393 GEN_INT (offset));
9394 offset += 8;
9395 }
9396
9397 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9398 }
9399 else if (valsize > UNITS_PER_WORD)
9400 {
9401 /* Aggregates 5 to 8 bytes in size are returned in general
9402 registers r28-r29 in the same manner as other non
9403 floating-point objects. The data is right-justified and
9404 zero-extended to 64 bits. This is opposite to the normal
9405 justification used on big endian targets and requires
9406 special treatment. */
9407 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9408 gen_rtx_REG (DImode, 28), const0_rtx);
9409 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9410 }
9411 }
9412
9413 if ((INTEGRAL_TYPE_P (valtype)
9414 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9415 || POINTER_TYPE_P (valtype))
9416 valmode = word_mode;
9417 else
9418 valmode = TYPE_MODE (valtype);
9419
9420 if (TREE_CODE (valtype) == REAL_TYPE
9421 && !AGGREGATE_TYPE_P (valtype)
9422 && TYPE_MODE (valtype) != TFmode
9423 && !TARGET_SOFT_FLOAT)
9424 return gen_rtx_REG (valmode, 32);
9425
9426 return gen_rtx_REG (valmode, 28);
9427 }
9428
9429 /* Implement the TARGET_LIBCALL_VALUE hook. */
9430
9431 static rtx
9432 pa_libcall_value (machine_mode mode,
9433 const_rtx fun ATTRIBUTE_UNUSED)
9434 {
9435 if (! TARGET_SOFT_FLOAT
9436 && (mode == SFmode || mode == DFmode))
9437 return gen_rtx_REG (mode, 32);
9438 else
9439 return gen_rtx_REG (mode, 28);
9440 }
9441
9442 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9443
9444 static bool
9445 pa_function_value_regno_p (const unsigned int regno)
9446 {
9447 if (regno == 28
9448 || (! TARGET_SOFT_FLOAT && regno == 32))
9449 return true;
9450
9451 return false;
9452 }
9453
9454 /* Update the data in CUM to advance over an argument
9455 of mode MODE and data type TYPE.
9456 (TYPE is null for libcalls where that information may not be available.) */
9457
9458 static void
9459 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9460 const_tree type, bool named ATTRIBUTE_UNUSED)
9461 {
9462 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9463 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9464
9465 cum->nargs_prototype--;
9466 cum->words += (arg_size
9467 + ((cum->words & 01)
9468 && type != NULL_TREE
9469 && arg_size > 1));
9470 }
9471
9472 /* Return the location of a parameter that is passed in a register or NULL
9473 if the parameter has any component that is passed in memory.
9474
9475 This is new code and will be pushed to into the net sources after
9476 further testing.
9477
9478 ??? We might want to restructure this so that it looks more like other
9479 ports. */
9480 static rtx
9481 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9482 const_tree type, bool named ATTRIBUTE_UNUSED)
9483 {
9484 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9485 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9486 int alignment = 0;
9487 int arg_size;
9488 int fpr_reg_base;
9489 int gpr_reg_base;
9490 rtx retval;
9491
9492 if (mode == VOIDmode)
9493 return NULL_RTX;
9494
9495 arg_size = FUNCTION_ARG_SIZE (mode, type);
9496
9497 /* If this arg would be passed partially or totally on the stack, then
9498 this routine should return zero. pa_arg_partial_bytes will
9499 handle arguments which are split between regs and stack slots if
9500 the ABI mandates split arguments. */
9501 if (!TARGET_64BIT)
9502 {
9503 /* The 32-bit ABI does not split arguments. */
9504 if (cum->words + arg_size > max_arg_words)
9505 return NULL_RTX;
9506 }
9507 else
9508 {
9509 if (arg_size > 1)
9510 alignment = cum->words & 1;
9511 if (cum->words + alignment >= max_arg_words)
9512 return NULL_RTX;
9513 }
9514
9515 /* The 32bit ABIs and the 64bit ABIs are rather different,
9516 particularly in their handling of FP registers. We might
9517 be able to cleverly share code between them, but I'm not
9518 going to bother in the hope that splitting them up results
9519 in code that is more easily understood. */
9520
9521 if (TARGET_64BIT)
9522 {
9523 /* Advance the base registers to their current locations.
9524
9525 Remember, gprs grow towards smaller register numbers while
9526 fprs grow to higher register numbers. Also remember that
9527 although FP regs are 32-bit addressable, we pretend that
9528 the registers are 64-bits wide. */
9529 gpr_reg_base = 26 - cum->words;
9530 fpr_reg_base = 32 + cum->words;
9531
9532 /* Arguments wider than one word and small aggregates need special
9533 treatment. */
9534 if (arg_size > 1
9535 || mode == BLKmode
9536 || (type && (AGGREGATE_TYPE_P (type)
9537 || TREE_CODE (type) == COMPLEX_TYPE
9538 || TREE_CODE (type) == VECTOR_TYPE)))
9539 {
9540 /* Double-extended precision (80-bit), quad-precision (128-bit)
9541 and aggregates including complex numbers are aligned on
9542 128-bit boundaries. The first eight 64-bit argument slots
9543 are associated one-to-one, with general registers r26
9544 through r19, and also with floating-point registers fr4
9545 through fr11. Arguments larger than one word are always
9546 passed in general registers.
9547
9548 Using a PARALLEL with a word mode register results in left
9549 justified data on a big-endian target. */
9550
9551 rtx loc[8];
9552 int i, offset = 0, ub = arg_size;
9553
9554 /* Align the base register. */
9555 gpr_reg_base -= alignment;
9556
9557 ub = MIN (ub, max_arg_words - cum->words - alignment);
9558 for (i = 0; i < ub; i++)
9559 {
9560 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9561 gen_rtx_REG (DImode, gpr_reg_base),
9562 GEN_INT (offset));
9563 gpr_reg_base -= 1;
9564 offset += 8;
9565 }
9566
9567 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9568 }
9569 }
9570 else
9571 {
9572 /* If the argument is larger than a word, then we know precisely
9573 which registers we must use. */
9574 if (arg_size > 1)
9575 {
9576 if (cum->words)
9577 {
9578 gpr_reg_base = 23;
9579 fpr_reg_base = 38;
9580 }
9581 else
9582 {
9583 gpr_reg_base = 25;
9584 fpr_reg_base = 34;
9585 }
9586
9587 /* Structures 5 to 8 bytes in size are passed in the general
9588 registers in the same manner as other non floating-point
9589 objects. The data is right-justified and zero-extended
9590 to 64 bits. This is opposite to the normal justification
9591 used on big endian targets and requires special treatment.
9592 We now define BLOCK_REG_PADDING to pad these objects.
9593 Aggregates, complex and vector types are passed in the same
9594 manner as structures. */
9595 if (mode == BLKmode
9596 || (type && (AGGREGATE_TYPE_P (type)
9597 || TREE_CODE (type) == COMPLEX_TYPE
9598 || TREE_CODE (type) == VECTOR_TYPE)))
9599 {
9600 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9601 gen_rtx_REG (DImode, gpr_reg_base),
9602 const0_rtx);
9603 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9604 }
9605 }
9606 else
9607 {
9608 /* We have a single word (32 bits). A simple computation
9609 will get us the register #s we need. */
9610 gpr_reg_base = 26 - cum->words;
9611 fpr_reg_base = 32 + 2 * cum->words;
9612 }
9613 }
9614
9615 /* Determine if the argument needs to be passed in both general and
9616 floating point registers. */
9617 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9618 /* If we are doing soft-float with portable runtime, then there
9619 is no need to worry about FP regs. */
9620 && !TARGET_SOFT_FLOAT
9621 /* The parameter must be some kind of scalar float, else we just
9622 pass it in integer registers. */
9623 && GET_MODE_CLASS (mode) == MODE_FLOAT
9624 /* The target function must not have a prototype. */
9625 && cum->nargs_prototype <= 0
9626 /* libcalls do not need to pass items in both FP and general
9627 registers. */
9628 && type != NULL_TREE
9629 /* All this hair applies to "outgoing" args only. This includes
9630 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9631 && !cum->incoming)
9632 /* Also pass outgoing floating arguments in both registers in indirect
9633 calls with the 32 bit ABI and the HP assembler since there is no
9634 way to the specify argument locations in static functions. */
9635 || (!TARGET_64BIT
9636 && !TARGET_GAS
9637 && !cum->incoming
9638 && cum->indirect
9639 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9640 {
9641 retval
9642 = gen_rtx_PARALLEL
9643 (mode,
9644 gen_rtvec (2,
9645 gen_rtx_EXPR_LIST (VOIDmode,
9646 gen_rtx_REG (mode, fpr_reg_base),
9647 const0_rtx),
9648 gen_rtx_EXPR_LIST (VOIDmode,
9649 gen_rtx_REG (mode, gpr_reg_base),
9650 const0_rtx)));
9651 }
9652 else
9653 {
9654 /* See if we should pass this parameter in a general register. */
9655 if (TARGET_SOFT_FLOAT
9656 /* Indirect calls in the normal 32bit ABI require all arguments
9657 to be passed in general registers. */
9658 || (!TARGET_PORTABLE_RUNTIME
9659 && !TARGET_64BIT
9660 && !TARGET_ELF32
9661 && cum->indirect)
9662 /* If the parameter is not a scalar floating-point parameter,
9663 then it belongs in GPRs. */
9664 || GET_MODE_CLASS (mode) != MODE_FLOAT
9665 /* Structure with single SFmode field belongs in GPR. */
9666 || (type && AGGREGATE_TYPE_P (type)))
9667 retval = gen_rtx_REG (mode, gpr_reg_base);
9668 else
9669 retval = gen_rtx_REG (mode, fpr_reg_base);
9670 }
9671 return retval;
9672 }
9673
9674 /* Arguments larger than one word are double word aligned. */
9675
9676 static unsigned int
9677 pa_function_arg_boundary (machine_mode mode, const_tree type)
9678 {
9679 bool singleword = (type
9680 ? (integer_zerop (TYPE_SIZE (type))
9681 || !TREE_CONSTANT (TYPE_SIZE (type))
9682 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9683 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9684
9685 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9686 }
9687
9688 /* If this arg would be passed totally in registers or totally on the stack,
9689 then this routine should return zero. */
9690
9691 static int
9692 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9693 tree type, bool named ATTRIBUTE_UNUSED)
9694 {
9695 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9696 unsigned int max_arg_words = 8;
9697 unsigned int offset = 0;
9698
9699 if (!TARGET_64BIT)
9700 return 0;
9701
9702 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9703 offset = 1;
9704
9705 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9706 /* Arg fits fully into registers. */
9707 return 0;
9708 else if (cum->words + offset >= max_arg_words)
9709 /* Arg fully on the stack. */
9710 return 0;
9711 else
9712 /* Arg is split. */
9713 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9714 }
9715
9716
9717 /* A get_unnamed_section callback for switching to the text section.
9718
9719 This function is only used with SOM. Because we don't support
9720 named subspaces, we can only create a new subspace or switch back
9721 to the default text subspace. */
9722
9723 static void
9724 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9725 {
9726 gcc_assert (TARGET_SOM);
9727 if (TARGET_GAS)
9728 {
9729 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9730 {
9731 /* We only want to emit a .nsubspa directive once at the
9732 start of the function. */
9733 cfun->machine->in_nsubspa = 1;
9734
9735 /* Create a new subspace for the text. This provides
9736 better stub placement and one-only functions. */
9737 if (cfun->decl
9738 && DECL_ONE_ONLY (cfun->decl)
9739 && !DECL_WEAK (cfun->decl))
9740 {
9741 output_section_asm_op ("\t.SPACE $TEXT$\n"
9742 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9743 "ACCESS=44,SORT=24,COMDAT");
9744 return;
9745 }
9746 }
9747 else
9748 {
9749 /* There isn't a current function or the body of the current
9750 function has been completed. So, we are changing to the
9751 text section to output debugging information. Thus, we
9752 need to forget that we are in the text section so that
9753 varasm.c will call us when text_section is selected again. */
9754 gcc_assert (!cfun || !cfun->machine
9755 || cfun->machine->in_nsubspa == 2);
9756 in_section = NULL;
9757 }
9758 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9759 return;
9760 }
9761 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9762 }
9763
9764 /* A get_unnamed_section callback for switching to comdat data
9765 sections. This function is only used with SOM. */
9766
9767 static void
9768 som_output_comdat_data_section_asm_op (const void *data)
9769 {
9770 in_section = NULL;
9771 output_section_asm_op (data);
9772 }
9773
9774 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9775
9776 static void
9777 pa_som_asm_init_sections (void)
9778 {
9779 text_section
9780 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9781
9782 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9783 is not being generated. */
9784 som_readonly_data_section
9785 = get_unnamed_section (0, output_section_asm_op,
9786 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9787
9788 /* When secondary definitions are not supported, SOM makes readonly
9789 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9790 the comdat flag. */
9791 som_one_only_readonly_data_section
9792 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9793 "\t.SPACE $TEXT$\n"
9794 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9795 "ACCESS=0x2c,SORT=16,COMDAT");
9796
9797
9798 /* When secondary definitions are not supported, SOM makes data one-only
9799 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9800 som_one_only_data_section
9801 = get_unnamed_section (SECTION_WRITE,
9802 som_output_comdat_data_section_asm_op,
9803 "\t.SPACE $PRIVATE$\n"
9804 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9805 "ACCESS=31,SORT=24,COMDAT");
9806
9807 if (flag_tm)
9808 som_tm_clone_table_section
9809 = get_unnamed_section (0, output_section_asm_op,
9810 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9811
9812 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9813 which reference data within the $TEXT$ space (for example constant
9814 strings in the $LIT$ subspace).
9815
9816 The assemblers (GAS and HP as) both have problems with handling
9817 the difference of two symbols which is the other correct way to
9818 reference constant data during PIC code generation.
9819
9820 So, there's no way to reference constant data which is in the
9821 $TEXT$ space during PIC generation. Instead place all constant
9822 data into the $PRIVATE$ subspace (this reduces sharing, but it
9823 works correctly). */
9824 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9825
9826 /* We must not have a reference to an external symbol defined in a
9827 shared library in a readonly section, else the SOM linker will
9828 complain.
9829
9830 So, we force exception information into the data section. */
9831 exception_section = data_section;
9832 }
9833
9834 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9835
9836 static section *
9837 pa_som_tm_clone_table_section (void)
9838 {
9839 return som_tm_clone_table_section;
9840 }
9841
9842 /* On hpux10, the linker will give an error if we have a reference
9843 in the read-only data section to a symbol defined in a shared
9844 library. Therefore, expressions that might require a reloc can
9845 not be placed in the read-only data section. */
9846
9847 static section *
9848 pa_select_section (tree exp, int reloc,
9849 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9850 {
9851 if (TREE_CODE (exp) == VAR_DECL
9852 && TREE_READONLY (exp)
9853 && !TREE_THIS_VOLATILE (exp)
9854 && DECL_INITIAL (exp)
9855 && (DECL_INITIAL (exp) == error_mark_node
9856 || TREE_CONSTANT (DECL_INITIAL (exp)))
9857 && !reloc)
9858 {
9859 if (TARGET_SOM
9860 && DECL_ONE_ONLY (exp)
9861 && !DECL_WEAK (exp))
9862 return som_one_only_readonly_data_section;
9863 else
9864 return readonly_data_section;
9865 }
9866 else if (CONSTANT_CLASS_P (exp) && !reloc)
9867 return readonly_data_section;
9868 else if (TARGET_SOM
9869 && TREE_CODE (exp) == VAR_DECL
9870 && DECL_ONE_ONLY (exp)
9871 && !DECL_WEAK (exp))
9872 return som_one_only_data_section;
9873 else
9874 return data_section;
9875 }
9876
9877 /* Implement pa_reloc_rw_mask. */
9878
9879 static int
9880 pa_reloc_rw_mask (void)
9881 {
9882 /* We force (const (plus (symbol) (const_int))) to memory when the
9883 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9884 handle this construct in read-only memory and we want to avoid
9885 this for ELF. So, we always force an RTX needing relocation to
9886 the data section. */
9887 return 3;
9888 }
9889
9890 static void
9891 pa_globalize_label (FILE *stream, const char *name)
9892 {
9893 /* We only handle DATA objects here, functions are globalized in
9894 ASM_DECLARE_FUNCTION_NAME. */
9895 if (! FUNCTION_NAME_P (name))
9896 {
9897 fputs ("\t.EXPORT ", stream);
9898 assemble_name (stream, name);
9899 fputs (",DATA\n", stream);
9900 }
9901 }
9902
9903 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9904
9905 static rtx
9906 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9907 int incoming ATTRIBUTE_UNUSED)
9908 {
9909 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9910 }
9911
9912 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9913
9914 bool
9915 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9916 {
9917 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9918 PA64 ABI says that objects larger than 128 bits are returned in memory.
9919 Note, int_size_in_bytes can return -1 if the size of the object is
9920 variable or larger than the maximum value that can be expressed as
9921 a HOST_WIDE_INT. It can also return zero for an empty type. The
9922 simplest way to handle variable and empty types is to pass them in
9923 memory. This avoids problems in defining the boundaries of argument
9924 slots, allocating registers, etc. */
9925 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9926 || int_size_in_bytes (type) <= 0);
9927 }
9928
9929 /* Structure to hold declaration and name of external symbols that are
9930 emitted by GCC. We generate a vector of these symbols and output them
9931 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9932 This avoids putting out names that are never really used. */
9933
9934 typedef struct GTY(()) extern_symbol
9935 {
9936 tree decl;
9937 const char *name;
9938 } extern_symbol;
9939
9940 /* Define gc'd vector type for extern_symbol. */
9941
9942 /* Vector of extern_symbol pointers. */
9943 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9944
9945 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9946 /* Mark DECL (name NAME) as an external reference (assembler output
9947 file FILE). This saves the names to output at the end of the file
9948 if actually referenced. */
9949
9950 void
9951 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9952 {
9953 gcc_assert (file == asm_out_file);
9954 extern_symbol p = {decl, name};
9955 vec_safe_push (extern_symbols, p);
9956 }
9957
9958 /* Output text required at the end of an assembler file.
9959 This includes deferred plabels and .import directives for
9960 all external symbols that were actually referenced. */
9961
9962 static void
9963 pa_hpux_file_end (void)
9964 {
9965 unsigned int i;
9966 extern_symbol *p;
9967
9968 if (!NO_DEFERRED_PROFILE_COUNTERS)
9969 output_deferred_profile_counters ();
9970
9971 output_deferred_plabels ();
9972
9973 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9974 {
9975 tree decl = p->decl;
9976
9977 if (!TREE_ASM_WRITTEN (decl)
9978 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9979 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9980 }
9981
9982 vec_free (extern_symbols);
9983 }
9984 #endif
9985
9986 /* Return true if a change from mode FROM to mode TO for a register
9987 in register class RCLASS is invalid. */
9988
9989 bool
9990 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9991 enum reg_class rclass)
9992 {
9993 if (from == to)
9994 return false;
9995
9996 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9997 return false;
9998
9999 /* Reject changes to/from modes with zero size. */
10000 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10001 return true;
10002
10003 /* Reject changes to/from complex and vector modes. */
10004 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10005 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10006 return true;
10007
10008 /* There is no way to load QImode or HImode values directly from memory
10009 to a FP register. SImode loads to the FP registers are not zero
10010 extended. On the 64-bit target, this conflicts with the definition
10011 of LOAD_EXTEND_OP. Thus, we can't allow changing between modes with
10012 different sizes in the floating-point registers. */
10013 if (MAYBE_FP_REG_CLASS_P (rclass))
10014 return true;
10015
10016 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10017 in specific sets of registers. Thus, we cannot allow changing
10018 to a larger mode when it's larger than a word. */
10019 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10020 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10021 return true;
10022
10023 return false;
10024 }
10025
10026 /* Implement TARGET_MODES_TIEABLE_P.
10027
10028 We should return FALSE for QImode and HImode because these modes
10029 are not ok in the floating-point registers. However, this prevents
10030 tieing these modes to SImode and DImode in the general registers.
10031 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10032 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10033 in the floating-point registers. */
10034
10035 static bool
10036 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10037 {
10038 /* Don't tie modes in different classes. */
10039 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10040 return false;
10041
10042 return true;
10043 }
10044
10045 \f
10046 /* Length in units of the trampoline instruction code. */
10047
10048 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10049
10050
10051 /* Output assembler code for a block containing the constant parts
10052 of a trampoline, leaving space for the variable parts.\
10053
10054 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10055 and then branches to the specified routine.
10056
10057 This code template is copied from text segment to stack location
10058 and then patched with pa_trampoline_init to contain valid values,
10059 and then entered as a subroutine.
10060
10061 It is best to keep this as small as possible to avoid having to
10062 flush multiple lines in the cache. */
10063
10064 static void
10065 pa_asm_trampoline_template (FILE *f)
10066 {
10067 if (!TARGET_64BIT)
10068 {
10069 fputs ("\tldw 36(%r22),%r21\n", f);
10070 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10071 if (ASSEMBLER_DIALECT == 0)
10072 fputs ("\tdepi 0,31,2,%r21\n", f);
10073 else
10074 fputs ("\tdepwi 0,31,2,%r21\n", f);
10075 fputs ("\tldw 4(%r21),%r19\n", f);
10076 fputs ("\tldw 0(%r21),%r21\n", f);
10077 if (TARGET_PA_20)
10078 {
10079 fputs ("\tbve (%r21)\n", f);
10080 fputs ("\tldw 40(%r22),%r29\n", f);
10081 fputs ("\t.word 0\n", f);
10082 fputs ("\t.word 0\n", f);
10083 }
10084 else
10085 {
10086 fputs ("\tldsid (%r21),%r1\n", f);
10087 fputs ("\tmtsp %r1,%sr0\n", f);
10088 fputs ("\tbe 0(%sr0,%r21)\n", f);
10089 fputs ("\tldw 40(%r22),%r29\n", f);
10090 }
10091 fputs ("\t.word 0\n", f);
10092 fputs ("\t.word 0\n", f);
10093 fputs ("\t.word 0\n", f);
10094 fputs ("\t.word 0\n", f);
10095 }
10096 else
10097 {
10098 fputs ("\t.dword 0\n", f);
10099 fputs ("\t.dword 0\n", f);
10100 fputs ("\t.dword 0\n", f);
10101 fputs ("\t.dword 0\n", f);
10102 fputs ("\tmfia %r31\n", f);
10103 fputs ("\tldd 24(%r31),%r1\n", f);
10104 fputs ("\tldd 24(%r1),%r27\n", f);
10105 fputs ("\tldd 16(%r1),%r1\n", f);
10106 fputs ("\tbve (%r1)\n", f);
10107 fputs ("\tldd 32(%r31),%r31\n", f);
10108 fputs ("\t.dword 0 ; fptr\n", f);
10109 fputs ("\t.dword 0 ; static link\n", f);
10110 }
10111 }
10112
10113 /* Emit RTL insns to initialize the variable parts of a trampoline.
10114 FNADDR is an RTX for the address of the function's pure code.
10115 CXT is an RTX for the static chain value for the function.
10116
10117 Move the function address to the trampoline template at offset 36.
10118 Move the static chain value to trampoline template at offset 40.
10119 Move the trampoline address to trampoline template at offset 44.
10120 Move r19 to trampoline template at offset 48. The latter two
10121 words create a plabel for the indirect call to the trampoline.
10122
10123 A similar sequence is used for the 64-bit port but the plabel is
10124 at the beginning of the trampoline.
10125
10126 Finally, the cache entries for the trampoline code are flushed.
10127 This is necessary to ensure that the trampoline instruction sequence
10128 is written to memory prior to any attempts at prefetching the code
10129 sequence. */
10130
10131 static void
10132 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10133 {
10134 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10135 rtx start_addr = gen_reg_rtx (Pmode);
10136 rtx end_addr = gen_reg_rtx (Pmode);
10137 rtx line_length = gen_reg_rtx (Pmode);
10138 rtx r_tramp, tmp;
10139
10140 emit_block_move (m_tramp, assemble_trampoline_template (),
10141 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10142 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10143
10144 if (!TARGET_64BIT)
10145 {
10146 tmp = adjust_address (m_tramp, Pmode, 36);
10147 emit_move_insn (tmp, fnaddr);
10148 tmp = adjust_address (m_tramp, Pmode, 40);
10149 emit_move_insn (tmp, chain_value);
10150
10151 /* Create a fat pointer for the trampoline. */
10152 tmp = adjust_address (m_tramp, Pmode, 44);
10153 emit_move_insn (tmp, r_tramp);
10154 tmp = adjust_address (m_tramp, Pmode, 48);
10155 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10156
10157 /* fdc and fic only use registers for the address to flush,
10158 they do not accept integer displacements. We align the
10159 start and end addresses to the beginning of their respective
10160 cache lines to minimize the number of lines flushed. */
10161 emit_insn (gen_andsi3 (start_addr, r_tramp,
10162 GEN_INT (-MIN_CACHELINE_SIZE)));
10163 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10164 TRAMPOLINE_CODE_SIZE-1));
10165 emit_insn (gen_andsi3 (end_addr, tmp,
10166 GEN_INT (-MIN_CACHELINE_SIZE)));
10167 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10168 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10169 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10170 gen_reg_rtx (Pmode),
10171 gen_reg_rtx (Pmode)));
10172 }
10173 else
10174 {
10175 tmp = adjust_address (m_tramp, Pmode, 56);
10176 emit_move_insn (tmp, fnaddr);
10177 tmp = adjust_address (m_tramp, Pmode, 64);
10178 emit_move_insn (tmp, chain_value);
10179
10180 /* Create a fat pointer for the trampoline. */
10181 tmp = adjust_address (m_tramp, Pmode, 16);
10182 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10183 r_tramp, 32)));
10184 tmp = adjust_address (m_tramp, Pmode, 24);
10185 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10186
10187 /* fdc and fic only use registers for the address to flush,
10188 they do not accept integer displacements. We align the
10189 start and end addresses to the beginning of their respective
10190 cache lines to minimize the number of lines flushed. */
10191 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10192 emit_insn (gen_anddi3 (start_addr, tmp,
10193 GEN_INT (-MIN_CACHELINE_SIZE)));
10194 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10195 TRAMPOLINE_CODE_SIZE - 1));
10196 emit_insn (gen_anddi3 (end_addr, tmp,
10197 GEN_INT (-MIN_CACHELINE_SIZE)));
10198 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10199 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10200 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10201 gen_reg_rtx (Pmode),
10202 gen_reg_rtx (Pmode)));
10203 }
10204
10205 #ifdef HAVE_ENABLE_EXECUTE_STACK
10206  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10207 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10208 #endif
10209 }
10210
10211 /* Perform any machine-specific adjustment in the address of the trampoline.
10212 ADDR contains the address that was passed to pa_trampoline_init.
10213 Adjust the trampoline address to point to the plabel at offset 44. */
10214
10215 static rtx
10216 pa_trampoline_adjust_address (rtx addr)
10217 {
10218 if (!TARGET_64BIT)
10219 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10220 return addr;
10221 }
10222
10223 static rtx
10224 pa_delegitimize_address (rtx orig_x)
10225 {
10226 rtx x = delegitimize_mem_from_attrs (orig_x);
10227
10228 if (GET_CODE (x) == LO_SUM
10229 && GET_CODE (XEXP (x, 1)) == UNSPEC
10230 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10231 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10232 return x;
10233 }
10234 \f
10235 static rtx
10236 pa_internal_arg_pointer (void)
10237 {
10238 /* The argument pointer and the hard frame pointer are the same in
10239 the 32-bit runtime, so we don't need a copy. */
10240 if (TARGET_64BIT)
10241 return copy_to_reg (virtual_incoming_args_rtx);
10242 else
10243 return virtual_incoming_args_rtx;
10244 }
10245
10246 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10247 Frame pointer elimination is automatically handled. */
10248
10249 static bool
10250 pa_can_eliminate (const int from, const int to)
10251 {
10252 /* The argument cannot be eliminated in the 64-bit runtime. */
10253 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10254 return false;
10255
10256 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10257 ? ! frame_pointer_needed
10258 : true);
10259 }
10260
10261 /* Define the offset between two registers, FROM to be eliminated and its
10262 replacement TO, at the start of a routine. */
10263 HOST_WIDE_INT
10264 pa_initial_elimination_offset (int from, int to)
10265 {
10266 HOST_WIDE_INT offset;
10267
10268 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10269 && to == STACK_POINTER_REGNUM)
10270 offset = -pa_compute_frame_size (get_frame_size (), 0);
10271 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10272 offset = 0;
10273 else
10274 gcc_unreachable ();
10275
10276 return offset;
10277 }
10278
10279 static void
10280 pa_conditional_register_usage (void)
10281 {
10282 int i;
10283
10284 if (!TARGET_64BIT && !TARGET_PA_11)
10285 {
10286 for (i = 56; i <= FP_REG_LAST; i++)
10287 fixed_regs[i] = call_used_regs[i] = 1;
10288 for (i = 33; i < 56; i += 2)
10289 fixed_regs[i] = call_used_regs[i] = 1;
10290 }
10291 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10292 {
10293 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10294 fixed_regs[i] = call_used_regs[i] = 1;
10295 }
10296 if (flag_pic)
10297 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10298 }
10299
10300 /* Target hook for c_mode_for_suffix. */
10301
10302 static machine_mode
10303 pa_c_mode_for_suffix (char suffix)
10304 {
10305 if (HPUX_LONG_DOUBLE_LIBRARY)
10306 {
10307 if (suffix == 'q')
10308 return TFmode;
10309 }
10310
10311 return VOIDmode;
10312 }
10313
10314 /* Target hook for function_section. */
10315
10316 static section *
10317 pa_function_section (tree decl, enum node_frequency freq,
10318 bool startup, bool exit)
10319 {
10320 /* Put functions in text section if target doesn't have named sections. */
10321 if (!targetm_common.have_named_sections)
10322 return text_section;
10323
10324 /* Force nested functions into the same section as the containing
10325 function. */
10326 if (decl
10327 && DECL_SECTION_NAME (decl) == NULL
10328 && DECL_CONTEXT (decl) != NULL_TREE
10329 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10330 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10331 return function_section (DECL_CONTEXT (decl));
10332
10333 /* Otherwise, use the default function section. */
10334 return default_function_section (decl, freq, startup, exit);
10335 }
10336
10337 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10338
10339 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10340 that need more than three instructions to load prior to reload. This
10341 limit is somewhat arbitrary. It takes three instructions to load a
10342 CONST_INT from memory but two are memory accesses. It may be better
10343 to increase the allowed range for CONST_INTS. We may also be able
10344 to handle CONST_DOUBLES. */
10345
10346 static bool
10347 pa_legitimate_constant_p (machine_mode mode, rtx x)
10348 {
10349 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10350 return false;
10351
10352 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10353 return false;
10354
10355 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10356 legitimate constants. The other variants can't be handled by
10357 the move patterns after reload starts. */
10358 if (tls_referenced_p (x))
10359 return false;
10360
10361 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10362 return false;
10363
10364 if (TARGET_64BIT
10365 && HOST_BITS_PER_WIDE_INT > 32
10366 && GET_CODE (x) == CONST_INT
10367 && !reload_in_progress
10368 && !reload_completed
10369 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10370 && !pa_cint_ok_for_move (UINTVAL (x)))
10371 return false;
10372
10373 if (function_label_operand (x, mode))
10374 return false;
10375
10376 return true;
10377 }
10378
10379 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10380
10381 static unsigned int
10382 pa_section_type_flags (tree decl, const char *name, int reloc)
10383 {
10384 unsigned int flags;
10385
10386 flags = default_section_type_flags (decl, name, reloc);
10387
10388 /* Function labels are placed in the constant pool. This can
10389 cause a section conflict if decls are put in ".data.rel.ro"
10390 or ".data.rel.ro.local" using the __attribute__ construct. */
10391 if (strcmp (name, ".data.rel.ro") == 0
10392 || strcmp (name, ".data.rel.ro.local") == 0)
10393 flags |= SECTION_WRITE | SECTION_RELRO;
10394
10395 return flags;
10396 }
10397
10398 /* pa_legitimate_address_p recognizes an RTL expression that is a
10399 valid memory address for an instruction. The MODE argument is the
10400 machine mode for the MEM expression that wants to use this address.
10401
10402 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10403 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10404 available with floating point loads and stores, and integer loads.
10405 We get better code by allowing indexed addresses in the initial
10406 RTL generation.
10407
10408 The acceptance of indexed addresses as legitimate implies that we
10409 must provide patterns for doing indexed integer stores, or the move
10410 expanders must force the address of an indexed store to a register.
10411 We have adopted the latter approach.
10412
10413 Another function of pa_legitimate_address_p is to ensure that
10414 the base register is a valid pointer for indexed instructions.
10415 On targets that have non-equivalent space registers, we have to
10416 know at the time of assembler output which register in a REG+REG
10417 pair is the base register. The REG_POINTER flag is sometimes lost
10418 in reload and the following passes, so it can't be relied on during
10419 code generation. Thus, we either have to canonicalize the order
10420 of the registers in REG+REG indexed addresses, or treat REG+REG
10421 addresses separately and provide patterns for both permutations.
10422
10423 The latter approach requires several hundred additional lines of
10424 code in pa.md. The downside to canonicalizing is that a PLUS
10425 in the wrong order can't combine to form to make a scaled indexed
10426 memory operand. As we won't need to canonicalize the operands if
10427 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10428
10429 We initially break out scaled indexed addresses in canonical order
10430 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10431 scaled indexed addresses during RTL generation. However, fold_rtx
10432 has its own opinion on how the operands of a PLUS should be ordered.
10433 If one of the operands is equivalent to a constant, it will make
10434 that operand the second operand. As the base register is likely to
10435 be equivalent to a SYMBOL_REF, we have made it the second operand.
10436
10437 pa_legitimate_address_p accepts REG+REG as legitimate when the
10438 operands are in the order INDEX+BASE on targets with non-equivalent
10439 space registers, and in any order on targets with equivalent space
10440 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10441
10442 We treat a SYMBOL_REF as legitimate if it is part of the current
10443 function's constant-pool, because such addresses can actually be
10444 output as REG+SMALLINT. */
10445
10446 static bool
10447 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10448 {
10449 if ((REG_P (x)
10450 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10451 : REG_OK_FOR_BASE_P (x)))
10452 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10453 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10454 && REG_P (XEXP (x, 0))
10455 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10456 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10457 return true;
10458
10459 if (GET_CODE (x) == PLUS)
10460 {
10461 rtx base, index;
10462
10463 /* For REG+REG, the base register should be in XEXP (x, 1),
10464 so check it first. */
10465 if (REG_P (XEXP (x, 1))
10466 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10467 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10468 base = XEXP (x, 1), index = XEXP (x, 0);
10469 else if (REG_P (XEXP (x, 0))
10470 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10471 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10472 base = XEXP (x, 0), index = XEXP (x, 1);
10473 else
10474 return false;
10475
10476 if (GET_CODE (index) == CONST_INT)
10477 {
10478 if (INT_5_BITS (index))
10479 return true;
10480
10481 /* When INT14_OK_STRICT is false, a secondary reload is needed
10482 to adjust the displacement of SImode and DImode floating point
10483 instructions but this may fail when the register also needs
10484 reloading. So, we return false when STRICT is true. We
10485 also reject long displacements for float mode addresses since
10486 the majority of accesses will use floating point instructions
10487 that don't support 14-bit offsets. */
10488 if (!INT14_OK_STRICT
10489 && (strict || !(reload_in_progress || reload_completed))
10490 && mode != QImode
10491 && mode != HImode)
10492 return false;
10493
10494 return base14_operand (index, mode);
10495 }
10496
10497 if (!TARGET_DISABLE_INDEXING
10498 /* Only accept the "canonical" INDEX+BASE operand order
10499 on targets with non-equivalent space registers. */
10500 && (TARGET_NO_SPACE_REGS
10501 ? REG_P (index)
10502 : (base == XEXP (x, 1) && REG_P (index)
10503 && (reload_completed
10504 || (reload_in_progress && HARD_REGISTER_P (base))
10505 || REG_POINTER (base))
10506 && (reload_completed
10507 || (reload_in_progress && HARD_REGISTER_P (index))
10508 || !REG_POINTER (index))))
10509 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10510 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10511 : REG_OK_FOR_INDEX_P (index))
10512 && borx_reg_operand (base, Pmode)
10513 && borx_reg_operand (index, Pmode))
10514 return true;
10515
10516 if (!TARGET_DISABLE_INDEXING
10517 && GET_CODE (index) == MULT
10518 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10519 && REG_P (XEXP (index, 0))
10520 && GET_MODE (XEXP (index, 0)) == Pmode
10521 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10522 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10523 && GET_CODE (XEXP (index, 1)) == CONST_INT
10524 && INTVAL (XEXP (index, 1))
10525 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10526 && borx_reg_operand (base, Pmode))
10527 return true;
10528
10529 return false;
10530 }
10531
10532 if (GET_CODE (x) == LO_SUM)
10533 {
10534 rtx y = XEXP (x, 0);
10535
10536 if (GET_CODE (y) == SUBREG)
10537 y = SUBREG_REG (y);
10538
10539 if (REG_P (y)
10540 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10541 : REG_OK_FOR_BASE_P (y)))
10542 {
10543 /* Needed for -fPIC */
10544 if (mode == Pmode
10545 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10546 return true;
10547
10548 if (!INT14_OK_STRICT
10549 && (strict || !(reload_in_progress || reload_completed))
10550 && mode != QImode
10551 && mode != HImode)
10552 return false;
10553
10554 if (CONSTANT_P (XEXP (x, 1)))
10555 return true;
10556 }
10557 return false;
10558 }
10559
10560 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10561 return true;
10562
10563 return false;
10564 }
10565
10566 /* Look for machine dependent ways to make the invalid address AD a
10567 valid address.
10568
10569 For the PA, transform:
10570
10571 memory(X + <large int>)
10572
10573 into:
10574
10575 if (<large int> & mask) >= 16
10576 Y = (<large int> & ~mask) + mask + 1 Round up.
10577 else
10578 Y = (<large int> & ~mask) Round down.
10579 Z = X + Y
10580 memory (Z + (<large int> - Y));
10581
10582 This makes reload inheritance and reload_cse work better since Z
10583 can be reused.
10584
10585 There may be more opportunities to improve code with this hook. */
10586
10587 rtx
10588 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10589 int opnum, int type,
10590 int ind_levels ATTRIBUTE_UNUSED)
10591 {
10592 long offset, newoffset, mask;
10593 rtx new_rtx, temp = NULL_RTX;
10594
10595 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10596 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10597
10598 if (optimize && GET_CODE (ad) == PLUS)
10599 temp = simplify_binary_operation (PLUS, Pmode,
10600 XEXP (ad, 0), XEXP (ad, 1));
10601
10602 new_rtx = temp ? temp : ad;
10603
10604 if (optimize
10605 && GET_CODE (new_rtx) == PLUS
10606 && GET_CODE (XEXP (new_rtx, 0)) == REG
10607 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10608 {
10609 offset = INTVAL (XEXP ((new_rtx), 1));
10610
10611 /* Choose rounding direction. Round up if we are >= halfway. */
10612 if ((offset & mask) >= ((mask + 1) / 2))
10613 newoffset = (offset & ~mask) + mask + 1;
10614 else
10615 newoffset = offset & ~mask;
10616
10617 /* Ensure that long displacements are aligned. */
10618 if (mask == 0x3fff
10619 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10620 || (TARGET_64BIT && (mode) == DImode)))
10621 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10622
10623 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10624 {
10625 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10626 GEN_INT (newoffset));
10627 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10628 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10629 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10630 opnum, (enum reload_type) type);
10631 return ad;
10632 }
10633 }
10634
10635 return NULL_RTX;
10636 }
10637
10638 /* Output address vector. */
10639
10640 void
10641 pa_output_addr_vec (rtx lab, rtx body)
10642 {
10643 int idx, vlen = XVECLEN (body, 0);
10644
10645 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10646 if (TARGET_GAS)
10647 fputs ("\t.begin_brtab\n", asm_out_file);
10648 for (idx = 0; idx < vlen; idx++)
10649 {
10650 ASM_OUTPUT_ADDR_VEC_ELT
10651 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10652 }
10653 if (TARGET_GAS)
10654 fputs ("\t.end_brtab\n", asm_out_file);
10655 }
10656
10657 /* Output address difference vector. */
10658
10659 void
10660 pa_output_addr_diff_vec (rtx lab, rtx body)
10661 {
10662 rtx base = XEXP (XEXP (body, 0), 0);
10663 int idx, vlen = XVECLEN (body, 1);
10664
10665 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10666 if (TARGET_GAS)
10667 fputs ("\t.begin_brtab\n", asm_out_file);
10668 for (idx = 0; idx < vlen; idx++)
10669 {
10670 ASM_OUTPUT_ADDR_DIFF_ELT
10671 (asm_out_file,
10672 body,
10673 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10674 CODE_LABEL_NUMBER (base));
10675 }
10676 if (TARGET_GAS)
10677 fputs ("\t.end_brtab\n", asm_out_file);
10678 }
10679
10680 /* This is a helper function for the other atomic operations. This function
10681 emits a loop that contains SEQ that iterates until a compare-and-swap
10682 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10683 a set of instructions that takes a value from OLD_REG as an input and
10684 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10685 set to the current contents of MEM. After SEQ, a compare-and-swap will
10686 attempt to update MEM with NEW_REG. The function returns true when the
10687 loop was generated successfully. */
10688
10689 static bool
10690 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10691 {
10692 machine_mode mode = GET_MODE (mem);
10693 rtx_code_label *label;
10694 rtx cmp_reg, success, oldval;
10695
10696 /* The loop we want to generate looks like
10697
10698 cmp_reg = mem;
10699 label:
10700 old_reg = cmp_reg;
10701 seq;
10702 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10703 if (success)
10704 goto label;
10705
10706 Note that we only do the plain load from memory once. Subsequent
10707 iterations use the value loaded by the compare-and-swap pattern. */
10708
10709 label = gen_label_rtx ();
10710 cmp_reg = gen_reg_rtx (mode);
10711
10712 emit_move_insn (cmp_reg, mem);
10713 emit_label (label);
10714 emit_move_insn (old_reg, cmp_reg);
10715 if (seq)
10716 emit_insn (seq);
10717
10718 success = NULL_RTX;
10719 oldval = cmp_reg;
10720 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10721 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10722 MEMMODEL_RELAXED))
10723 return false;
10724
10725 if (oldval != cmp_reg)
10726 emit_move_insn (cmp_reg, oldval);
10727
10728 /* Mark this jump predicted not taken. */
10729 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10730 GET_MODE (success), 1, label,
10731 profile_probability::guessed_never ());
10732 return true;
10733 }
10734
10735 /* This function tries to implement an atomic exchange operation using a
10736 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10737 *MEM are returned, using TARGET if possible. No memory model is required
10738 since a compare_and_swap loop is seq-cst. */
10739
10740 rtx
10741 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10742 {
10743 machine_mode mode = GET_MODE (mem);
10744
10745 if (can_compare_and_swap_p (mode, true))
10746 {
10747 if (!target || !register_operand (target, mode))
10748 target = gen_reg_rtx (mode);
10749 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10750 return target;
10751 }
10752
10753 return NULL_RTX;
10754 }
10755
10756 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10757 arguments passed by hidden reference in the 32-bit HP runtime. Users
10758 can override this behavior for better compatibility with openmp at the
10759 risk of library incompatibilities. Arguments are always passed by value
10760 in the 64-bit HP runtime. */
10761
10762 static bool
10763 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10764 machine_mode mode ATTRIBUTE_UNUSED,
10765 const_tree type ATTRIBUTE_UNUSED,
10766 bool named ATTRIBUTE_UNUSED)
10767 {
10768 return !TARGET_CALLER_COPIES;
10769 }
10770
10771 /* Implement TARGET_HARD_REGNO_NREGS. */
10772
10773 static unsigned int
10774 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
10775 {
10776 return PA_HARD_REGNO_NREGS (regno, mode);
10777 }
10778
10779 /* Implement TARGET_HARD_REGNO_MODE_OK. */
10780
10781 static bool
10782 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10783 {
10784 return PA_HARD_REGNO_MODE_OK (regno, mode);
10785 }
10786
10787 #include "gt-pa.h"