]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pa/pa.c
Machine modes for address printing.
[thirdparty/gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "df.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "optabs.h"
32 #include "regs.h"
33 #include "emit-rtl.h"
34 #include "recog.h"
35 #include "diagnostic-core.h"
36 #include "insn-attr.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "calls.h"
42 #include "output.h"
43 #include "except.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "reload.h"
47 #include "common/common-target.h"
48 #include "langhooks.h"
49 #include "cfgrtl.h"
50 #include "opts.h"
51 #include "builtins.h"
52
53 /* This file should be included last. */
54 #include "target-def.h"
55
56 /* Return nonzero if there is a bypass for the output of
57 OUT_INSN and the fp store IN_INSN. */
58 int
59 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
60 {
61 machine_mode store_mode;
62 machine_mode other_mode;
63 rtx set;
64
65 if (recog_memoized (in_insn) < 0
66 || (get_attr_type (in_insn) != TYPE_FPSTORE
67 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
68 || recog_memoized (out_insn) < 0)
69 return 0;
70
71 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
72
73 set = single_set (out_insn);
74 if (!set)
75 return 0;
76
77 other_mode = GET_MODE (SET_SRC (set));
78
79 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
80 }
81
82
83 #ifndef DO_FRAME_NOTES
84 #ifdef INCOMING_RETURN_ADDR_RTX
85 #define DO_FRAME_NOTES 1
86 #else
87 #define DO_FRAME_NOTES 0
88 #endif
89 #endif
90
91 static void pa_option_override (void);
92 static void copy_reg_pointer (rtx, rtx);
93 static void fix_range (const char *);
94 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
95 reg_class_t);
96 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
97 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
98 static inline rtx force_mode (machine_mode, rtx);
99 static void pa_reorg (void);
100 static void pa_combine_instructions (void);
101 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
102 rtx, rtx);
103 static bool forward_branch_p (rtx_insn *);
104 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
105 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static int compute_movmem_length (rtx_insn *);
107 static int compute_clrmem_length (rtx_insn *);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, HOST_WIDE_INT, int);
111 static void store_reg_modify (int, int, HOST_WIDE_INT);
112 static void load_reg (int, HOST_WIDE_INT, int);
113 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
114 static rtx pa_function_value (const_tree, const_tree, bool);
115 static rtx pa_libcall_value (machine_mode, const_rtx);
116 static bool pa_function_value_regno_p (const unsigned int);
117 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static void update_total_code_bytes (unsigned int);
119 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
120 static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
121 static int pa_adjust_priority (rtx_insn *, int);
122 static int pa_issue_rate (void);
123 static int pa_reloc_rw_mask (void);
124 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
125 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
126 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
127 ATTRIBUTE_UNUSED;
128 static void pa_encode_section_info (tree, rtx, int);
129 static const char *pa_strip_name_encoding (const char *);
130 static bool pa_function_ok_for_sibcall (tree, tree);
131 static void pa_globalize_label (FILE *, const char *)
132 ATTRIBUTE_UNUSED;
133 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
134 HOST_WIDE_INT, tree);
135 #if !defined(USE_COLLECT2)
136 static void pa_asm_out_constructor (rtx, int);
137 static void pa_asm_out_destructor (rtx, int);
138 #endif
139 static void pa_init_builtins (void);
140 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
141 static rtx hppa_builtin_saveregs (void);
142 static void hppa_va_start (tree, rtx);
143 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
144 static bool pa_scalar_mode_supported_p (machine_mode);
145 static bool pa_commutative_p (const_rtx x, int outer_code);
146 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
147 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
148 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
149 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
150 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
151 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
152 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
153 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
154 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
155 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
156 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
157 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
158 static void output_deferred_plabels (void);
159 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
160 #ifdef ASM_OUTPUT_EXTERNAL_REAL
161 static void pa_hpux_file_end (void);
162 #endif
163 static void pa_init_libfuncs (void);
164 static rtx pa_struct_value_rtx (tree, int);
165 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
166 const_tree, bool);
167 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
168 tree, bool);
169 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
170 const_tree, bool);
171 static rtx pa_function_arg (cumulative_args_t, machine_mode,
172 const_tree, bool);
173 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
174 static struct machine_function * pa_init_machine_status (void);
175 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
176 machine_mode,
177 secondary_reload_info *);
178 static void pa_extra_live_on_entry (bitmap);
179 static machine_mode pa_promote_function_mode (const_tree,
180 machine_mode, int *,
181 const_tree, int);
182
183 static void pa_asm_trampoline_template (FILE *);
184 static void pa_trampoline_init (rtx, tree, rtx);
185 static rtx pa_trampoline_adjust_address (rtx);
186 static rtx pa_delegitimize_address (rtx);
187 static bool pa_print_operand_punct_valid_p (unsigned char);
188 static rtx pa_internal_arg_pointer (void);
189 static bool pa_can_eliminate (const int, const int);
190 static void pa_conditional_register_usage (void);
191 static machine_mode pa_c_mode_for_suffix (char);
192 static section *pa_function_section (tree, enum node_frequency, bool, bool);
193 static bool pa_cannot_force_const_mem (machine_mode, rtx);
194 static bool pa_legitimate_constant_p (machine_mode, rtx);
195 static unsigned int pa_section_type_flags (tree, const char *, int);
196 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
197
198 /* The following extra sections are only used for SOM. */
199 static GTY(()) section *som_readonly_data_section;
200 static GTY(()) section *som_one_only_readonly_data_section;
201 static GTY(()) section *som_one_only_data_section;
202 static GTY(()) section *som_tm_clone_table_section;
203
204 /* Counts for the number of callee-saved general and floating point
205 registers which were saved by the current function's prologue. */
206 static int gr_saved, fr_saved;
207
208 /* Boolean indicating whether the return pointer was saved by the
209 current function's prologue. */
210 static bool rp_saved;
211
212 static rtx find_addr_reg (rtx);
213
214 /* Keep track of the number of bytes we have output in the CODE subspace
215 during this compilation so we'll know when to emit inline long-calls. */
216 unsigned long total_code_bytes;
217
218 /* The last address of the previous function plus the number of bytes in
219 associated thunks that have been output. This is used to determine if
220 a thunk can use an IA-relative branch to reach its target function. */
221 static unsigned int last_address;
222
223 /* Variables to handle plabels that we discover are necessary at assembly
224 output time. They are output after the current function. */
225 struct GTY(()) deferred_plabel
226 {
227 rtx internal_label;
228 rtx symbol;
229 };
230 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
231 deferred_plabels;
232 static size_t n_deferred_plabels = 0;
233 \f
234 /* Initialize the GCC target structure. */
235
236 #undef TARGET_OPTION_OVERRIDE
237 #define TARGET_OPTION_OVERRIDE pa_option_override
238
239 #undef TARGET_ASM_ALIGNED_HI_OP
240 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
241 #undef TARGET_ASM_ALIGNED_SI_OP
242 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
245 #undef TARGET_ASM_UNALIGNED_HI_OP
246 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
247 #undef TARGET_ASM_UNALIGNED_SI_OP
248 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
249 #undef TARGET_ASM_UNALIGNED_DI_OP
250 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
251 #undef TARGET_ASM_INTEGER
252 #define TARGET_ASM_INTEGER pa_assemble_integer
253
254 #undef TARGET_ASM_FUNCTION_PROLOGUE
255 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
256 #undef TARGET_ASM_FUNCTION_EPILOGUE
257 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
258
259 #undef TARGET_FUNCTION_VALUE
260 #define TARGET_FUNCTION_VALUE pa_function_value
261 #undef TARGET_LIBCALL_VALUE
262 #define TARGET_LIBCALL_VALUE pa_libcall_value
263 #undef TARGET_FUNCTION_VALUE_REGNO_P
264 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
265
266 #undef TARGET_LEGITIMIZE_ADDRESS
267 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
268
269 #undef TARGET_SCHED_ADJUST_COST
270 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
271 #undef TARGET_SCHED_ADJUST_PRIORITY
272 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
273 #undef TARGET_SCHED_ISSUE_RATE
274 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
275
276 #undef TARGET_ENCODE_SECTION_INFO
277 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
278 #undef TARGET_STRIP_NAME_ENCODING
279 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
280
281 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
282 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
283
284 #undef TARGET_COMMUTATIVE_P
285 #define TARGET_COMMUTATIVE_P pa_commutative_p
286
287 #undef TARGET_ASM_OUTPUT_MI_THUNK
288 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
289 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
290 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
291
292 #undef TARGET_ASM_FILE_END
293 #ifdef ASM_OUTPUT_EXTERNAL_REAL
294 #define TARGET_ASM_FILE_END pa_hpux_file_end
295 #else
296 #define TARGET_ASM_FILE_END output_deferred_plabels
297 #endif
298
299 #undef TARGET_ASM_RELOC_RW_MASK
300 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
301
302 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
303 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
304
305 #if !defined(USE_COLLECT2)
306 #undef TARGET_ASM_CONSTRUCTOR
307 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
308 #undef TARGET_ASM_DESTRUCTOR
309 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
310 #endif
311
312 #undef TARGET_INIT_BUILTINS
313 #define TARGET_INIT_BUILTINS pa_init_builtins
314
315 #undef TARGET_EXPAND_BUILTIN
316 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
317
318 #undef TARGET_REGISTER_MOVE_COST
319 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
320 #undef TARGET_RTX_COSTS
321 #define TARGET_RTX_COSTS hppa_rtx_costs
322 #undef TARGET_ADDRESS_COST
323 #define TARGET_ADDRESS_COST hppa_address_cost
324
325 #undef TARGET_MACHINE_DEPENDENT_REORG
326 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
327
328 #undef TARGET_INIT_LIBFUNCS
329 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
330
331 #undef TARGET_PROMOTE_FUNCTION_MODE
332 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
333 #undef TARGET_PROMOTE_PROTOTYPES
334 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
335
336 #undef TARGET_STRUCT_VALUE_RTX
337 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
338 #undef TARGET_RETURN_IN_MEMORY
339 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
340 #undef TARGET_MUST_PASS_IN_STACK
341 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
342 #undef TARGET_PASS_BY_REFERENCE
343 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
344 #undef TARGET_CALLEE_COPIES
345 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
346 #undef TARGET_ARG_PARTIAL_BYTES
347 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
348 #undef TARGET_FUNCTION_ARG
349 #define TARGET_FUNCTION_ARG pa_function_arg
350 #undef TARGET_FUNCTION_ARG_ADVANCE
351 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
352 #undef TARGET_FUNCTION_ARG_BOUNDARY
353 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
354
355 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
356 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
357 #undef TARGET_EXPAND_BUILTIN_VA_START
358 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
359 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
360 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
361
362 #undef TARGET_SCALAR_MODE_SUPPORTED_P
363 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
364
365 #undef TARGET_CANNOT_FORCE_CONST_MEM
366 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
367
368 #undef TARGET_SECONDARY_RELOAD
369 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
370
371 #undef TARGET_EXTRA_LIVE_ON_ENTRY
372 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
373
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
380 #undef TARGET_DELEGITIMIZE_ADDRESS
381 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
382 #undef TARGET_INTERNAL_ARG_POINTER
383 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
384 #undef TARGET_CAN_ELIMINATE
385 #define TARGET_CAN_ELIMINATE pa_can_eliminate
386 #undef TARGET_CONDITIONAL_REGISTER_USAGE
387 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
388 #undef TARGET_C_MODE_FOR_SUFFIX
389 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
390 #undef TARGET_ASM_FUNCTION_SECTION
391 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
392
393 #undef TARGET_LEGITIMATE_CONSTANT_P
394 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
395 #undef TARGET_SECTION_TYPE_FLAGS
396 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
397 #undef TARGET_LEGITIMATE_ADDRESS_P
398 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
399
400 struct gcc_target targetm = TARGET_INITIALIZER;
401 \f
402 /* Parse the -mfixed-range= option string. */
403
404 static void
405 fix_range (const char *const_str)
406 {
407 int i, first, last;
408 char *str, *dash, *comma;
409
410 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
411 REG2 are either register names or register numbers. The effect
412 of this option is to mark the registers in the range from REG1 to
413 REG2 as ``fixed'' so they won't be used by the compiler. This is
414 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
415
416 i = strlen (const_str);
417 str = (char *) alloca (i + 1);
418 memcpy (str, const_str, i + 1);
419
420 while (1)
421 {
422 dash = strchr (str, '-');
423 if (!dash)
424 {
425 warning (0, "value of -mfixed-range must have form REG1-REG2");
426 return;
427 }
428 *dash = '\0';
429
430 comma = strchr (dash + 1, ',');
431 if (comma)
432 *comma = '\0';
433
434 first = decode_reg_name (str);
435 if (first < 0)
436 {
437 warning (0, "unknown register name: %s", str);
438 return;
439 }
440
441 last = decode_reg_name (dash + 1);
442 if (last < 0)
443 {
444 warning (0, "unknown register name: %s", dash + 1);
445 return;
446 }
447
448 *dash = '-';
449
450 if (first > last)
451 {
452 warning (0, "%s-%s is an empty range", str, dash + 1);
453 return;
454 }
455
456 for (i = first; i <= last; ++i)
457 fixed_regs[i] = call_used_regs[i] = 1;
458
459 if (!comma)
460 break;
461
462 *comma = ',';
463 str = comma + 1;
464 }
465
466 /* Check if all floating point registers have been fixed. */
467 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
468 if (!fixed_regs[i])
469 break;
470
471 if (i > FP_REG_LAST)
472 target_flags |= MASK_DISABLE_FPREGS;
473 }
474
475 /* Implement the TARGET_OPTION_OVERRIDE hook. */
476
477 static void
478 pa_option_override (void)
479 {
480 unsigned int i;
481 cl_deferred_option *opt;
482 vec<cl_deferred_option> *v
483 = (vec<cl_deferred_option> *) pa_deferred_options;
484
485 if (v)
486 FOR_EACH_VEC_ELT (*v, i, opt)
487 {
488 switch (opt->opt_index)
489 {
490 case OPT_mfixed_range_:
491 fix_range (opt->arg);
492 break;
493
494 default:
495 gcc_unreachable ();
496 }
497 }
498
499 if (flag_pic && TARGET_PORTABLE_RUNTIME)
500 {
501 warning (0, "PIC code generation is not supported in the portable runtime model");
502 }
503
504 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
505 {
506 warning (0, "PIC code generation is not compatible with fast indirect calls");
507 }
508
509 if (! TARGET_GAS && write_symbols != NO_DEBUG)
510 {
511 warning (0, "-g is only supported when using GAS on this processor,");
512 warning (0, "-g option disabled");
513 write_symbols = NO_DEBUG;
514 }
515
516 /* We only support the "big PIC" model now. And we always generate PIC
517 code when in 64bit mode. */
518 if (flag_pic == 1 || TARGET_64BIT)
519 flag_pic = 2;
520
521 /* Disable -freorder-blocks-and-partition as we don't support hot and
522 cold partitioning. */
523 if (flag_reorder_blocks_and_partition)
524 {
525 inform (input_location,
526 "-freorder-blocks-and-partition does not work "
527 "on this architecture");
528 flag_reorder_blocks_and_partition = 0;
529 flag_reorder_blocks = 1;
530 }
531
532 /* We can't guarantee that .dword is available for 32-bit targets. */
533 if (UNITS_PER_WORD == 4)
534 targetm.asm_out.aligned_op.di = NULL;
535
536 /* The unaligned ops are only available when using GAS. */
537 if (!TARGET_GAS)
538 {
539 targetm.asm_out.unaligned_op.hi = NULL;
540 targetm.asm_out.unaligned_op.si = NULL;
541 targetm.asm_out.unaligned_op.di = NULL;
542 }
543
544 init_machine_status = pa_init_machine_status;
545 }
546
547 enum pa_builtins
548 {
549 PA_BUILTIN_COPYSIGNQ,
550 PA_BUILTIN_FABSQ,
551 PA_BUILTIN_INFQ,
552 PA_BUILTIN_HUGE_VALQ,
553 PA_BUILTIN_max
554 };
555
556 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
557
558 static void
559 pa_init_builtins (void)
560 {
561 #ifdef DONT_HAVE_FPUTC_UNLOCKED
562 {
563 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
564 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
565 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
566 }
567 #endif
568 #if TARGET_HPUX_11
569 {
570 tree decl;
571
572 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
573 set_user_assembler_name (decl, "_Isfinite");
574 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
575 set_user_assembler_name (decl, "_Isfinitef");
576 }
577 #endif
578
579 if (HPUX_LONG_DOUBLE_LIBRARY)
580 {
581 tree decl, ftype;
582
583 /* Under HPUX, the __float128 type is a synonym for "long double". */
584 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
585 "__float128");
586
587 /* TFmode support builtins. */
588 ftype = build_function_type_list (long_double_type_node,
589 long_double_type_node,
590 NULL_TREE);
591 decl = add_builtin_function ("__builtin_fabsq", ftype,
592 PA_BUILTIN_FABSQ, BUILT_IN_MD,
593 "_U_Qfabs", NULL_TREE);
594 TREE_READONLY (decl) = 1;
595 pa_builtins[PA_BUILTIN_FABSQ] = decl;
596
597 ftype = build_function_type_list (long_double_type_node,
598 long_double_type_node,
599 long_double_type_node,
600 NULL_TREE);
601 decl = add_builtin_function ("__builtin_copysignq", ftype,
602 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
603 "_U_Qfcopysign", NULL_TREE);
604 TREE_READONLY (decl) = 1;
605 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
606
607 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
608 decl = add_builtin_function ("__builtin_infq", ftype,
609 PA_BUILTIN_INFQ, BUILT_IN_MD,
610 NULL, NULL_TREE);
611 pa_builtins[PA_BUILTIN_INFQ] = decl;
612
613 decl = add_builtin_function ("__builtin_huge_valq", ftype,
614 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
615 NULL, NULL_TREE);
616 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
617 }
618 }
619
620 static rtx
621 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
622 machine_mode mode ATTRIBUTE_UNUSED,
623 int ignore ATTRIBUTE_UNUSED)
624 {
625 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
626 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
627
628 switch (fcode)
629 {
630 case PA_BUILTIN_FABSQ:
631 case PA_BUILTIN_COPYSIGNQ:
632 return expand_call (exp, target, ignore);
633
634 case PA_BUILTIN_INFQ:
635 case PA_BUILTIN_HUGE_VALQ:
636 {
637 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
638 REAL_VALUE_TYPE inf;
639 rtx tmp;
640
641 real_inf (&inf);
642 tmp = const_double_from_real_value (inf, target_mode);
643
644 tmp = validize_mem (force_const_mem (target_mode, tmp));
645
646 if (target == 0)
647 target = gen_reg_rtx (target_mode);
648
649 emit_move_insn (target, tmp);
650 return target;
651 }
652
653 default:
654 gcc_unreachable ();
655 }
656
657 return NULL_RTX;
658 }
659
660 /* Function to init struct machine_function.
661 This will be called, via a pointer variable,
662 from push_function_context. */
663
664 static struct machine_function *
665 pa_init_machine_status (void)
666 {
667 return ggc_cleared_alloc<machine_function> ();
668 }
669
670 /* If FROM is a probable pointer register, mark TO as a probable
671 pointer register with the same pointer alignment as FROM. */
672
673 static void
674 copy_reg_pointer (rtx to, rtx from)
675 {
676 if (REG_POINTER (from))
677 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
678 }
679
680 /* Return 1 if X contains a symbolic expression. We know these
681 expressions will have one of a few well defined forms, so
682 we need only check those forms. */
683 int
684 pa_symbolic_expression_p (rtx x)
685 {
686
687 /* Strip off any HIGH. */
688 if (GET_CODE (x) == HIGH)
689 x = XEXP (x, 0);
690
691 return symbolic_operand (x, VOIDmode);
692 }
693
694 /* Accept any constant that can be moved in one instruction into a
695 general register. */
696 int
697 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
698 {
699 /* OK if ldo, ldil, or zdepi, can be used. */
700 return (VAL_14_BITS_P (ival)
701 || pa_ldil_cint_p (ival)
702 || pa_zdepi_cint_p (ival));
703 }
704 \f
705 /* True iff ldil can be used to load this CONST_INT. The least
706 significant 11 bits of the value must be zero and the value must
707 not change sign when extended from 32 to 64 bits. */
708 int
709 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
710 {
711 unsigned HOST_WIDE_INT x;
712
713 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
714 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
715 }
716
717 /* True iff zdepi can be used to generate this CONST_INT.
718 zdepi first sign extends a 5-bit signed number to a given field
719 length, then places this field anywhere in a zero. */
720 int
721 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
722 {
723 unsigned HOST_WIDE_INT lsb_mask, t;
724
725 /* This might not be obvious, but it's at least fast.
726 This function is critical; we don't have the time loops would take. */
727 lsb_mask = x & -x;
728 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
729 /* Return true iff t is a power of two. */
730 return ((t & (t - 1)) == 0);
731 }
732
733 /* True iff depi or extru can be used to compute (reg & mask).
734 Accept bit pattern like these:
735 0....01....1
736 1....10....0
737 1..10..01..1 */
738 int
739 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
740 {
741 mask = ~mask;
742 mask += mask & -mask;
743 return (mask & (mask - 1)) == 0;
744 }
745
746 /* True iff depi can be used to compute (reg | MASK). */
747 int
748 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
749 {
750 mask += mask & -mask;
751 return (mask & (mask - 1)) == 0;
752 }
753 \f
754 /* Legitimize PIC addresses. If the address is already
755 position-independent, we return ORIG. Newly generated
756 position-independent addresses go to REG. If we need more
757 than one register, we lose. */
758
759 static rtx
760 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
761 {
762 rtx pic_ref = orig;
763
764 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
765
766 /* Labels need special handling. */
767 if (pic_label_operand (orig, mode))
768 {
769 rtx_insn *insn;
770
771 /* We do not want to go through the movXX expanders here since that
772 would create recursion.
773
774 Nor do we really want to call a generator for a named pattern
775 since that requires multiple patterns if we want to support
776 multiple word sizes.
777
778 So instead we just emit the raw set, which avoids the movXX
779 expanders completely. */
780 mark_reg_pointer (reg, BITS_PER_UNIT);
781 insn = emit_insn (gen_rtx_SET (reg, orig));
782
783 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
784 add_reg_note (insn, REG_EQUAL, orig);
785
786 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
787 and update LABEL_NUSES because this is not done automatically. */
788 if (reload_in_progress || reload_completed)
789 {
790 /* Extract LABEL_REF. */
791 if (GET_CODE (orig) == CONST)
792 orig = XEXP (XEXP (orig, 0), 0);
793 /* Extract CODE_LABEL. */
794 orig = XEXP (orig, 0);
795 add_reg_note (insn, REG_LABEL_OPERAND, orig);
796 /* Make sure we have label and not a note. */
797 if (LABEL_P (orig))
798 LABEL_NUSES (orig)++;
799 }
800 crtl->uses_pic_offset_table = 1;
801 return reg;
802 }
803 if (GET_CODE (orig) == SYMBOL_REF)
804 {
805 rtx_insn *insn;
806 rtx tmp_reg;
807
808 gcc_assert (reg);
809
810 /* Before reload, allocate a temporary register for the intermediate
811 result. This allows the sequence to be deleted when the final
812 result is unused and the insns are trivially dead. */
813 tmp_reg = ((reload_in_progress || reload_completed)
814 ? reg : gen_reg_rtx (Pmode));
815
816 if (function_label_operand (orig, VOIDmode))
817 {
818 /* Force function label into memory in word mode. */
819 orig = XEXP (force_const_mem (word_mode, orig), 0);
820 /* Load plabel address from DLT. */
821 emit_move_insn (tmp_reg,
822 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
823 gen_rtx_HIGH (word_mode, orig)));
824 pic_ref
825 = gen_const_mem (Pmode,
826 gen_rtx_LO_SUM (Pmode, tmp_reg,
827 gen_rtx_UNSPEC (Pmode,
828 gen_rtvec (1, orig),
829 UNSPEC_DLTIND14R)));
830 emit_move_insn (reg, pic_ref);
831 /* Now load address of function descriptor. */
832 pic_ref = gen_rtx_MEM (Pmode, reg);
833 }
834 else
835 {
836 /* Load symbol reference from DLT. */
837 emit_move_insn (tmp_reg,
838 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
839 gen_rtx_HIGH (word_mode, orig)));
840 pic_ref
841 = gen_const_mem (Pmode,
842 gen_rtx_LO_SUM (Pmode, tmp_reg,
843 gen_rtx_UNSPEC (Pmode,
844 gen_rtvec (1, orig),
845 UNSPEC_DLTIND14R)));
846 }
847
848 crtl->uses_pic_offset_table = 1;
849 mark_reg_pointer (reg, BITS_PER_UNIT);
850 insn = emit_move_insn (reg, pic_ref);
851
852 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
853 set_unique_reg_note (insn, REG_EQUAL, orig);
854
855 return reg;
856 }
857 else if (GET_CODE (orig) == CONST)
858 {
859 rtx base;
860
861 if (GET_CODE (XEXP (orig, 0)) == PLUS
862 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
863 return orig;
864
865 gcc_assert (reg);
866 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
867
868 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
869 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
870 base == reg ? 0 : reg);
871
872 if (GET_CODE (orig) == CONST_INT)
873 {
874 if (INT_14_BITS (orig))
875 return plus_constant (Pmode, base, INTVAL (orig));
876 orig = force_reg (Pmode, orig);
877 }
878 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
879 /* Likewise, should we set special REG_NOTEs here? */
880 }
881
882 return pic_ref;
883 }
884
885 static GTY(()) rtx gen_tls_tga;
886
887 static rtx
888 gen_tls_get_addr (void)
889 {
890 if (!gen_tls_tga)
891 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
892 return gen_tls_tga;
893 }
894
895 static rtx
896 hppa_tls_call (rtx arg)
897 {
898 rtx ret;
899
900 ret = gen_reg_rtx (Pmode);
901 emit_library_call_value (gen_tls_get_addr (), ret,
902 LCT_CONST, Pmode, 1, arg, Pmode);
903
904 return ret;
905 }
906
907 static rtx
908 legitimize_tls_address (rtx addr)
909 {
910 rtx ret, tmp, t1, t2, tp;
911 rtx_insn *insn;
912
913 /* Currently, we can't handle anything but a SYMBOL_REF. */
914 if (GET_CODE (addr) != SYMBOL_REF)
915 return addr;
916
917 switch (SYMBOL_REF_TLS_MODEL (addr))
918 {
919 case TLS_MODEL_GLOBAL_DYNAMIC:
920 tmp = gen_reg_rtx (Pmode);
921 if (flag_pic)
922 emit_insn (gen_tgd_load_pic (tmp, addr));
923 else
924 emit_insn (gen_tgd_load (tmp, addr));
925 ret = hppa_tls_call (tmp);
926 break;
927
928 case TLS_MODEL_LOCAL_DYNAMIC:
929 ret = gen_reg_rtx (Pmode);
930 tmp = gen_reg_rtx (Pmode);
931 start_sequence ();
932 if (flag_pic)
933 emit_insn (gen_tld_load_pic (tmp, addr));
934 else
935 emit_insn (gen_tld_load (tmp, addr));
936 t1 = hppa_tls_call (tmp);
937 insn = get_insns ();
938 end_sequence ();
939 t2 = gen_reg_rtx (Pmode);
940 emit_libcall_block (insn, t2, t1,
941 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
942 UNSPEC_TLSLDBASE));
943 emit_insn (gen_tld_offset_load (ret, addr, t2));
944 break;
945
946 case TLS_MODEL_INITIAL_EXEC:
947 tp = gen_reg_rtx (Pmode);
948 tmp = gen_reg_rtx (Pmode);
949 ret = gen_reg_rtx (Pmode);
950 emit_insn (gen_tp_load (tp));
951 if (flag_pic)
952 emit_insn (gen_tie_load_pic (tmp, addr));
953 else
954 emit_insn (gen_tie_load (tmp, addr));
955 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
956 break;
957
958 case TLS_MODEL_LOCAL_EXEC:
959 tp = gen_reg_rtx (Pmode);
960 ret = gen_reg_rtx (Pmode);
961 emit_insn (gen_tp_load (tp));
962 emit_insn (gen_tle_load (ret, addr, tp));
963 break;
964
965 default:
966 gcc_unreachable ();
967 }
968
969 return ret;
970 }
971
972 /* Helper for hppa_legitimize_address. Given X, return true if it
973 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
974
975 This respectively represent canonical shift-add rtxs or scaled
976 memory addresses. */
977 static bool
978 mem_shadd_or_shadd_rtx_p (rtx x)
979 {
980 return ((GET_CODE (x) == ASHIFT
981 || GET_CODE (x) == MULT)
982 && GET_CODE (XEXP (x, 1)) == CONST_INT
983 && ((GET_CODE (x) == ASHIFT
984 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
985 || (GET_CODE (x) == MULT
986 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
987 }
988
989 /* Try machine-dependent ways of modifying an illegitimate address
990 to be legitimate. If we find one, return the new, valid address.
991 This macro is used in only one place: `memory_address' in explow.c.
992
993 OLDX is the address as it was before break_out_memory_refs was called.
994 In some cases it is useful to look at this to decide what needs to be done.
995
996 It is always safe for this macro to do nothing. It exists to recognize
997 opportunities to optimize the output.
998
999 For the PA, transform:
1000
1001 memory(X + <large int>)
1002
1003 into:
1004
1005 if (<large int> & mask) >= 16
1006 Y = (<large int> & ~mask) + mask + 1 Round up.
1007 else
1008 Y = (<large int> & ~mask) Round down.
1009 Z = X + Y
1010 memory (Z + (<large int> - Y));
1011
1012 This is for CSE to find several similar references, and only use one Z.
1013
1014 X can either be a SYMBOL_REF or REG, but because combine cannot
1015 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1016 D will not fit in 14 bits.
1017
1018 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1019 0x1f as the mask.
1020
1021 MODE_INT references allow displacements which fit in 14 bits, so use
1022 0x3fff as the mask.
1023
1024 This relies on the fact that most mode MODE_FLOAT references will use FP
1025 registers and most mode MODE_INT references will use integer registers.
1026 (In the rare case of an FP register used in an integer MODE, we depend
1027 on secondary reloads to clean things up.)
1028
1029
1030 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1031 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1032 addressing modes to be used).
1033
1034 Note that the addresses passed into hppa_legitimize_address always
1035 come from a MEM, so we only have to match the MULT form on incoming
1036 addresses. But to be future proof we also match the ASHIFT form.
1037
1038 However, this routine always places those shift-add sequences into
1039 registers, so we have to generate the ASHIFT form as our output.
1040
1041 Put X and Z into registers. Then put the entire expression into
1042 a register. */
1043
1044 rtx
1045 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1046 machine_mode mode)
1047 {
1048 rtx orig = x;
1049
1050 /* We need to canonicalize the order of operands in unscaled indexed
1051 addresses since the code that checks if an address is valid doesn't
1052 always try both orders. */
1053 if (!TARGET_NO_SPACE_REGS
1054 && GET_CODE (x) == PLUS
1055 && GET_MODE (x) == Pmode
1056 && REG_P (XEXP (x, 0))
1057 && REG_P (XEXP (x, 1))
1058 && REG_POINTER (XEXP (x, 0))
1059 && !REG_POINTER (XEXP (x, 1)))
1060 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1061
1062 if (tls_referenced_p (x))
1063 return legitimize_tls_address (x);
1064 else if (flag_pic)
1065 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1066
1067 /* Strip off CONST. */
1068 if (GET_CODE (x) == CONST)
1069 x = XEXP (x, 0);
1070
1071 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1072 That should always be safe. */
1073 if (GET_CODE (x) == PLUS
1074 && GET_CODE (XEXP (x, 0)) == REG
1075 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1076 {
1077 rtx reg = force_reg (Pmode, XEXP (x, 1));
1078 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1079 }
1080
1081 /* Note we must reject symbols which represent function addresses
1082 since the assembler/linker can't handle arithmetic on plabels. */
1083 if (GET_CODE (x) == PLUS
1084 && GET_CODE (XEXP (x, 1)) == CONST_INT
1085 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1086 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1087 || GET_CODE (XEXP (x, 0)) == REG))
1088 {
1089 rtx int_part, ptr_reg;
1090 int newoffset;
1091 int offset = INTVAL (XEXP (x, 1));
1092 int mask;
1093
1094 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1095 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1096
1097 /* Choose which way to round the offset. Round up if we
1098 are >= halfway to the next boundary. */
1099 if ((offset & mask) >= ((mask + 1) / 2))
1100 newoffset = (offset & ~ mask) + mask + 1;
1101 else
1102 newoffset = (offset & ~ mask);
1103
1104 /* If the newoffset will not fit in 14 bits (ldo), then
1105 handling this would take 4 or 5 instructions (2 to load
1106 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1107 add the new offset and the SYMBOL_REF.) Combine can
1108 not handle 4->2 or 5->2 combinations, so do not create
1109 them. */
1110 if (! VAL_14_BITS_P (newoffset)
1111 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1112 {
1113 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1114 rtx tmp_reg
1115 = force_reg (Pmode,
1116 gen_rtx_HIGH (Pmode, const_part));
1117 ptr_reg
1118 = force_reg (Pmode,
1119 gen_rtx_LO_SUM (Pmode,
1120 tmp_reg, const_part));
1121 }
1122 else
1123 {
1124 if (! VAL_14_BITS_P (newoffset))
1125 int_part = force_reg (Pmode, GEN_INT (newoffset));
1126 else
1127 int_part = GEN_INT (newoffset);
1128
1129 ptr_reg = force_reg (Pmode,
1130 gen_rtx_PLUS (Pmode,
1131 force_reg (Pmode, XEXP (x, 0)),
1132 int_part));
1133 }
1134 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1135 }
1136
1137 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1138
1139 if (GET_CODE (x) == PLUS
1140 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1141 && (OBJECT_P (XEXP (x, 1))
1142 || GET_CODE (XEXP (x, 1)) == SUBREG)
1143 && GET_CODE (XEXP (x, 1)) != CONST)
1144 {
1145 /* If we were given a MULT, we must fix the constant
1146 as we're going to create the ASHIFT form. */
1147 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1148 if (GET_CODE (XEXP (x, 0)) == MULT)
1149 shift_val = exact_log2 (shift_val);
1150
1151 rtx reg1, reg2;
1152 reg1 = XEXP (x, 1);
1153 if (GET_CODE (reg1) != REG)
1154 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1155
1156 reg2 = XEXP (XEXP (x, 0), 0);
1157 if (GET_CODE (reg2) != REG)
1158 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1159
1160 return force_reg (Pmode,
1161 gen_rtx_PLUS (Pmode,
1162 gen_rtx_ASHIFT (Pmode, reg2,
1163 GEN_INT (shift_val)),
1164 reg1));
1165 }
1166
1167 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1168
1169 Only do so for floating point modes since this is more speculative
1170 and we lose if it's an integer store. */
1171 if (GET_CODE (x) == PLUS
1172 && GET_CODE (XEXP (x, 0)) == PLUS
1173 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1174 && (mode == SFmode || mode == DFmode))
1175 {
1176 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1177
1178 /* If we were given a MULT, we must fix the constant
1179 as we're going to create the ASHIFT form. */
1180 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1181 shift_val = exact_log2 (shift_val);
1182
1183 /* Try and figure out what to use as a base register. */
1184 rtx reg1, reg2, base, idx;
1185
1186 reg1 = XEXP (XEXP (x, 0), 1);
1187 reg2 = XEXP (x, 1);
1188 base = NULL_RTX;
1189 idx = NULL_RTX;
1190
1191 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1192 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1193 it's a base register below. */
1194 if (GET_CODE (reg1) != REG)
1195 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1196
1197 if (GET_CODE (reg2) != REG)
1198 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1199
1200 /* Figure out what the base and index are. */
1201
1202 if (GET_CODE (reg1) == REG
1203 && REG_POINTER (reg1))
1204 {
1205 base = reg1;
1206 idx = gen_rtx_PLUS (Pmode,
1207 gen_rtx_ASHIFT (Pmode,
1208 XEXP (XEXP (XEXP (x, 0), 0), 0),
1209 GEN_INT (shift_val)),
1210 XEXP (x, 1));
1211 }
1212 else if (GET_CODE (reg2) == REG
1213 && REG_POINTER (reg2))
1214 {
1215 base = reg2;
1216 idx = XEXP (x, 0);
1217 }
1218
1219 if (base == 0)
1220 return orig;
1221
1222 /* If the index adds a large constant, try to scale the
1223 constant so that it can be loaded with only one insn. */
1224 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1225 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1226 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1227 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1228 {
1229 /* Divide the CONST_INT by the scale factor, then add it to A. */
1230 int val = INTVAL (XEXP (idx, 1));
1231 val /= (1 << shift_val);
1232
1233 reg1 = XEXP (XEXP (idx, 0), 0);
1234 if (GET_CODE (reg1) != REG)
1235 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1236
1237 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1238
1239 /* We can now generate a simple scaled indexed address. */
1240 return
1241 force_reg
1242 (Pmode, gen_rtx_PLUS (Pmode,
1243 gen_rtx_ASHIFT (Pmode, reg1,
1244 GEN_INT (shift_val)),
1245 base));
1246 }
1247
1248 /* If B + C is still a valid base register, then add them. */
1249 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1250 && INTVAL (XEXP (idx, 1)) <= 4096
1251 && INTVAL (XEXP (idx, 1)) >= -4096)
1252 {
1253 rtx reg1, reg2;
1254
1255 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1256
1257 reg2 = XEXP (XEXP (idx, 0), 0);
1258 if (GET_CODE (reg2) != CONST_INT)
1259 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1260
1261 return force_reg (Pmode,
1262 gen_rtx_PLUS (Pmode,
1263 gen_rtx_ASHIFT (Pmode, reg2,
1264 GEN_INT (shift_val)),
1265 reg1));
1266 }
1267
1268 /* Get the index into a register, then add the base + index and
1269 return a register holding the result. */
1270
1271 /* First get A into a register. */
1272 reg1 = XEXP (XEXP (idx, 0), 0);
1273 if (GET_CODE (reg1) != REG)
1274 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1275
1276 /* And get B into a register. */
1277 reg2 = XEXP (idx, 1);
1278 if (GET_CODE (reg2) != REG)
1279 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1280
1281 reg1 = force_reg (Pmode,
1282 gen_rtx_PLUS (Pmode,
1283 gen_rtx_ASHIFT (Pmode, reg1,
1284 GEN_INT (shift_val)),
1285 reg2));
1286
1287 /* Add the result to our base register and return. */
1288 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1289
1290 }
1291
1292 /* Uh-oh. We might have an address for x[n-100000]. This needs
1293 special handling to avoid creating an indexed memory address
1294 with x-100000 as the base.
1295
1296 If the constant part is small enough, then it's still safe because
1297 there is a guard page at the beginning and end of the data segment.
1298
1299 Scaled references are common enough that we want to try and rearrange the
1300 terms so that we can use indexing for these addresses too. Only
1301 do the optimization for floatint point modes. */
1302
1303 if (GET_CODE (x) == PLUS
1304 && pa_symbolic_expression_p (XEXP (x, 1)))
1305 {
1306 /* Ugly. We modify things here so that the address offset specified
1307 by the index expression is computed first, then added to x to form
1308 the entire address. */
1309
1310 rtx regx1, regx2, regy1, regy2, y;
1311
1312 /* Strip off any CONST. */
1313 y = XEXP (x, 1);
1314 if (GET_CODE (y) == CONST)
1315 y = XEXP (y, 0);
1316
1317 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1318 {
1319 /* See if this looks like
1320 (plus (mult (reg) (mem_shadd_const))
1321 (const (plus (symbol_ref) (const_int))))
1322
1323 Where const_int is small. In that case the const
1324 expression is a valid pointer for indexing.
1325
1326 If const_int is big, but can be divided evenly by shadd_const
1327 and added to (reg). This allows more scaled indexed addresses. */
1328 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1329 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1330 && GET_CODE (XEXP (y, 1)) == CONST_INT
1331 && INTVAL (XEXP (y, 1)) >= -4096
1332 && INTVAL (XEXP (y, 1)) <= 4095)
1333 {
1334 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1335
1336 /* If we were given a MULT, we must fix the constant
1337 as we're going to create the ASHIFT form. */
1338 if (GET_CODE (XEXP (x, 0)) == MULT)
1339 shift_val = exact_log2 (shift_val);
1340
1341 rtx reg1, reg2;
1342
1343 reg1 = XEXP (x, 1);
1344 if (GET_CODE (reg1) != REG)
1345 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1346
1347 reg2 = XEXP (XEXP (x, 0), 0);
1348 if (GET_CODE (reg2) != REG)
1349 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1350
1351 return
1352 force_reg (Pmode,
1353 gen_rtx_PLUS (Pmode,
1354 gen_rtx_ASHIFT (Pmode,
1355 reg2,
1356 GEN_INT (shift_val)),
1357 reg1));
1358 }
1359 else if ((mode == DFmode || mode == SFmode)
1360 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1361 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1362 && GET_CODE (XEXP (y, 1)) == CONST_INT
1363 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1364 {
1365 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1366
1367 /* If we were given a MULT, we must fix the constant
1368 as we're going to create the ASHIFT form. */
1369 if (GET_CODE (XEXP (x, 0)) == MULT)
1370 shift_val = exact_log2 (shift_val);
1371
1372 regx1
1373 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1374 / INTVAL (XEXP (XEXP (x, 0), 1))));
1375 regx2 = XEXP (XEXP (x, 0), 0);
1376 if (GET_CODE (regx2) != REG)
1377 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1378 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1379 regx2, regx1));
1380 return
1381 force_reg (Pmode,
1382 gen_rtx_PLUS (Pmode,
1383 gen_rtx_ASHIFT (Pmode, regx2,
1384 GEN_INT (shift_val)),
1385 force_reg (Pmode, XEXP (y, 0))));
1386 }
1387 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1388 && INTVAL (XEXP (y, 1)) >= -4096
1389 && INTVAL (XEXP (y, 1)) <= 4095)
1390 {
1391 /* This is safe because of the guard page at the
1392 beginning and end of the data space. Just
1393 return the original address. */
1394 return orig;
1395 }
1396 else
1397 {
1398 /* Doesn't look like one we can optimize. */
1399 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1400 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1401 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1402 regx1 = force_reg (Pmode,
1403 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1404 regx1, regy2));
1405 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1406 }
1407 }
1408 }
1409
1410 return orig;
1411 }
1412
1413 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1414
1415 Compute extra cost of moving data between one register class
1416 and another.
1417
1418 Make moves from SAR so expensive they should never happen. We used to
1419 have 0xffff here, but that generates overflow in rare cases.
1420
1421 Copies involving a FP register and a non-FP register are relatively
1422 expensive because they must go through memory.
1423
1424 Other copies are reasonably cheap. */
1425
1426 static int
1427 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1428 reg_class_t from, reg_class_t to)
1429 {
1430 if (from == SHIFT_REGS)
1431 return 0x100;
1432 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1433 return 18;
1434 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1435 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1436 return 16;
1437 else
1438 return 2;
1439 }
1440
1441 /* For the HPPA, REG and REG+CONST is cost 0
1442 and addresses involving symbolic constants are cost 2.
1443
1444 PIC addresses are very expensive.
1445
1446 It is no coincidence that this has the same structure
1447 as pa_legitimate_address_p. */
1448
1449 static int
1450 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1451 addr_space_t as ATTRIBUTE_UNUSED,
1452 bool speed ATTRIBUTE_UNUSED)
1453 {
1454 switch (GET_CODE (X))
1455 {
1456 case REG:
1457 case PLUS:
1458 case LO_SUM:
1459 return 1;
1460 case HIGH:
1461 return 2;
1462 default:
1463 return 4;
1464 }
1465 }
1466
1467 /* Compute a (partial) cost for rtx X. Return true if the complete
1468 cost has been computed, and false if subexpressions should be
1469 scanned. In either case, *TOTAL contains the cost result. */
1470
1471 static bool
1472 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1473 int opno ATTRIBUTE_UNUSED,
1474 int *total, bool speed ATTRIBUTE_UNUSED)
1475 {
1476 int factor;
1477 int code = GET_CODE (x);
1478
1479 switch (code)
1480 {
1481 case CONST_INT:
1482 if (INTVAL (x) == 0)
1483 *total = 0;
1484 else if (INT_14_BITS (x))
1485 *total = 1;
1486 else
1487 *total = 2;
1488 return true;
1489
1490 case HIGH:
1491 *total = 2;
1492 return true;
1493
1494 case CONST:
1495 case LABEL_REF:
1496 case SYMBOL_REF:
1497 *total = 4;
1498 return true;
1499
1500 case CONST_DOUBLE:
1501 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1502 && outer_code != SET)
1503 *total = 0;
1504 else
1505 *total = 8;
1506 return true;
1507
1508 case MULT:
1509 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1510 {
1511 *total = COSTS_N_INSNS (3);
1512 return true;
1513 }
1514
1515 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1516 factor = GET_MODE_SIZE (mode) / 4;
1517 if (factor == 0)
1518 factor = 1;
1519
1520 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1521 *total = factor * factor * COSTS_N_INSNS (8);
1522 else
1523 *total = factor * factor * COSTS_N_INSNS (20);
1524 return true;
1525
1526 case DIV:
1527 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1528 {
1529 *total = COSTS_N_INSNS (14);
1530 return true;
1531 }
1532 /* FALLTHRU */
1533
1534 case UDIV:
1535 case MOD:
1536 case UMOD:
1537 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1538 factor = GET_MODE_SIZE (mode) / 4;
1539 if (factor == 0)
1540 factor = 1;
1541
1542 *total = factor * factor * COSTS_N_INSNS (60);
1543 return true;
1544
1545 case PLUS: /* this includes shNadd insns */
1546 case MINUS:
1547 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1548 {
1549 *total = COSTS_N_INSNS (3);
1550 return true;
1551 }
1552
1553 /* A size N times larger than UNITS_PER_WORD needs N times as
1554 many insns, taking N times as long. */
1555 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1556 if (factor == 0)
1557 factor = 1;
1558 *total = factor * COSTS_N_INSNS (1);
1559 return true;
1560
1561 case ASHIFT:
1562 case ASHIFTRT:
1563 case LSHIFTRT:
1564 *total = COSTS_N_INSNS (1);
1565 return true;
1566
1567 default:
1568 return false;
1569 }
1570 }
1571
1572 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1573 new rtx with the correct mode. */
1574 static inline rtx
1575 force_mode (machine_mode mode, rtx orig)
1576 {
1577 if (mode == GET_MODE (orig))
1578 return orig;
1579
1580 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1581
1582 return gen_rtx_REG (mode, REGNO (orig));
1583 }
1584
1585 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1586
1587 static bool
1588 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1589 {
1590 return tls_referenced_p (x);
1591 }
1592
1593 /* Emit insns to move operands[1] into operands[0].
1594
1595 Return 1 if we have written out everything that needs to be done to
1596 do the move. Otherwise, return 0 and the caller will emit the move
1597 normally.
1598
1599 Note SCRATCH_REG may not be in the proper mode depending on how it
1600 will be used. This routine is responsible for creating a new copy
1601 of SCRATCH_REG in the proper mode. */
1602
1603 int
1604 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1605 {
1606 register rtx operand0 = operands[0];
1607 register rtx operand1 = operands[1];
1608 register rtx tem;
1609
1610 /* We can only handle indexed addresses in the destination operand
1611 of floating point stores. Thus, we need to break out indexed
1612 addresses from the destination operand. */
1613 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1614 {
1615 gcc_assert (can_create_pseudo_p ());
1616
1617 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1618 operand0 = replace_equiv_address (operand0, tem);
1619 }
1620
1621 /* On targets with non-equivalent space registers, break out unscaled
1622 indexed addresses from the source operand before the final CSE.
1623 We have to do this because the REG_POINTER flag is not correctly
1624 carried through various optimization passes and CSE may substitute
1625 a pseudo without the pointer set for one with the pointer set. As
1626 a result, we loose various opportunities to create insns with
1627 unscaled indexed addresses. */
1628 if (!TARGET_NO_SPACE_REGS
1629 && !cse_not_expected
1630 && GET_CODE (operand1) == MEM
1631 && GET_CODE (XEXP (operand1, 0)) == PLUS
1632 && REG_P (XEXP (XEXP (operand1, 0), 0))
1633 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1634 operand1
1635 = replace_equiv_address (operand1,
1636 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1637
1638 if (scratch_reg
1639 && reload_in_progress && GET_CODE (operand0) == REG
1640 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1641 operand0 = reg_equiv_mem (REGNO (operand0));
1642 else if (scratch_reg
1643 && reload_in_progress && GET_CODE (operand0) == SUBREG
1644 && GET_CODE (SUBREG_REG (operand0)) == REG
1645 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1646 {
1647 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1648 the code which tracks sets/uses for delete_output_reload. */
1649 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1650 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1651 SUBREG_BYTE (operand0));
1652 operand0 = alter_subreg (&temp, true);
1653 }
1654
1655 if (scratch_reg
1656 && reload_in_progress && GET_CODE (operand1) == REG
1657 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1658 operand1 = reg_equiv_mem (REGNO (operand1));
1659 else if (scratch_reg
1660 && reload_in_progress && GET_CODE (operand1) == SUBREG
1661 && GET_CODE (SUBREG_REG (operand1)) == REG
1662 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1663 {
1664 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1665 the code which tracks sets/uses for delete_output_reload. */
1666 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1667 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1668 SUBREG_BYTE (operand1));
1669 operand1 = alter_subreg (&temp, true);
1670 }
1671
1672 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1673 && ((tem = find_replacement (&XEXP (operand0, 0)))
1674 != XEXP (operand0, 0)))
1675 operand0 = replace_equiv_address (operand0, tem);
1676
1677 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1678 && ((tem = find_replacement (&XEXP (operand1, 0)))
1679 != XEXP (operand1, 0)))
1680 operand1 = replace_equiv_address (operand1, tem);
1681
1682 /* Handle secondary reloads for loads/stores of FP registers from
1683 REG+D addresses where D does not fit in 5 or 14 bits, including
1684 (subreg (mem (addr))) cases. */
1685 if (scratch_reg
1686 && fp_reg_operand (operand0, mode)
1687 && (MEM_P (operand1)
1688 || (GET_CODE (operand1) == SUBREG
1689 && MEM_P (XEXP (operand1, 0))))
1690 && !floating_point_store_memory_operand (operand1, mode))
1691 {
1692 if (GET_CODE (operand1) == SUBREG)
1693 operand1 = XEXP (operand1, 0);
1694
1695 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1696 it in WORD_MODE regardless of what mode it was originally given
1697 to us. */
1698 scratch_reg = force_mode (word_mode, scratch_reg);
1699
1700 /* D might not fit in 14 bits either; for such cases load D into
1701 scratch reg. */
1702 if (reg_plus_base_memory_operand (operand1, mode)
1703 && !(TARGET_PA_20
1704 && !TARGET_ELF32
1705 && INT_14_BITS (XEXP (XEXP (operand1, 0), 1))))
1706 {
1707 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1708 emit_move_insn (scratch_reg,
1709 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1710 Pmode,
1711 XEXP (XEXP (operand1, 0), 0),
1712 scratch_reg));
1713 }
1714 else
1715 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1716 emit_insn (gen_rtx_SET (operand0,
1717 replace_equiv_address (operand1, scratch_reg)));
1718 return 1;
1719 }
1720 else if (scratch_reg
1721 && fp_reg_operand (operand1, mode)
1722 && (MEM_P (operand0)
1723 || (GET_CODE (operand0) == SUBREG
1724 && MEM_P (XEXP (operand0, 0))))
1725 && !floating_point_store_memory_operand (operand0, mode))
1726 {
1727 if (GET_CODE (operand0) == SUBREG)
1728 operand0 = XEXP (operand0, 0);
1729
1730 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1731 it in WORD_MODE regardless of what mode it was originally given
1732 to us. */
1733 scratch_reg = force_mode (word_mode, scratch_reg);
1734
1735 /* D might not fit in 14 bits either; for such cases load D into
1736 scratch reg. */
1737 if (reg_plus_base_memory_operand (operand0, mode)
1738 && !(TARGET_PA_20
1739 && !TARGET_ELF32
1740 && INT_14_BITS (XEXP (XEXP (operand0, 0), 1))))
1741 {
1742 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1743 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1744 0)),
1745 Pmode,
1746 XEXP (XEXP (operand0, 0),
1747 0),
1748 scratch_reg));
1749 }
1750 else
1751 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1752 emit_insn (gen_rtx_SET (replace_equiv_address (operand0, scratch_reg),
1753 operand1));
1754 return 1;
1755 }
1756 /* Handle secondary reloads for loads of FP registers from constant
1757 expressions by forcing the constant into memory. For the most part,
1758 this is only necessary for SImode and DImode.
1759
1760 Use scratch_reg to hold the address of the memory location. */
1761 else if (scratch_reg
1762 && CONSTANT_P (operand1)
1763 && fp_reg_operand (operand0, mode))
1764 {
1765 rtx const_mem, xoperands[2];
1766
1767 if (operand1 == CONST0_RTX (mode))
1768 {
1769 emit_insn (gen_rtx_SET (operand0, operand1));
1770 return 1;
1771 }
1772
1773 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1774 it in WORD_MODE regardless of what mode it was originally given
1775 to us. */
1776 scratch_reg = force_mode (word_mode, scratch_reg);
1777
1778 /* Force the constant into memory and put the address of the
1779 memory location into scratch_reg. */
1780 const_mem = force_const_mem (mode, operand1);
1781 xoperands[0] = scratch_reg;
1782 xoperands[1] = XEXP (const_mem, 0);
1783 pa_emit_move_sequence (xoperands, Pmode, 0);
1784
1785 /* Now load the destination register. */
1786 emit_insn (gen_rtx_SET (operand0,
1787 replace_equiv_address (const_mem, scratch_reg)));
1788 return 1;
1789 }
1790 /* Handle secondary reloads for SAR. These occur when trying to load
1791 the SAR from memory or a constant. */
1792 else if (scratch_reg
1793 && GET_CODE (operand0) == REG
1794 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1795 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1796 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1797 {
1798 /* D might not fit in 14 bits either; for such cases load D into
1799 scratch reg. */
1800 if (GET_CODE (operand1) == MEM
1801 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1802 {
1803 /* We are reloading the address into the scratch register, so we
1804 want to make sure the scratch register is a full register. */
1805 scratch_reg = force_mode (word_mode, scratch_reg);
1806
1807 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1808 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1809 0)),
1810 Pmode,
1811 XEXP (XEXP (operand1, 0),
1812 0),
1813 scratch_reg));
1814
1815 /* Now we are going to load the scratch register from memory,
1816 we want to load it in the same width as the original MEM,
1817 which must be the same as the width of the ultimate destination,
1818 OPERAND0. */
1819 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1820
1821 emit_move_insn (scratch_reg,
1822 replace_equiv_address (operand1, scratch_reg));
1823 }
1824 else
1825 {
1826 /* We want to load the scratch register using the same mode as
1827 the ultimate destination. */
1828 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1829
1830 emit_move_insn (scratch_reg, operand1);
1831 }
1832
1833 /* And emit the insn to set the ultimate destination. We know that
1834 the scratch register has the same mode as the destination at this
1835 point. */
1836 emit_move_insn (operand0, scratch_reg);
1837 return 1;
1838 }
1839 /* Handle the most common case: storing into a register. */
1840 else if (register_operand (operand0, mode))
1841 {
1842 /* Legitimize TLS symbol references. This happens for references
1843 that aren't a legitimate constant. */
1844 if (PA_SYMBOL_REF_TLS_P (operand1))
1845 operand1 = legitimize_tls_address (operand1);
1846
1847 if (register_operand (operand1, mode)
1848 || (GET_CODE (operand1) == CONST_INT
1849 && pa_cint_ok_for_move (UINTVAL (operand1)))
1850 || (operand1 == CONST0_RTX (mode))
1851 || (GET_CODE (operand1) == HIGH
1852 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1853 /* Only `general_operands' can come here, so MEM is ok. */
1854 || GET_CODE (operand1) == MEM)
1855 {
1856 /* Various sets are created during RTL generation which don't
1857 have the REG_POINTER flag correctly set. After the CSE pass,
1858 instruction recognition can fail if we don't consistently
1859 set this flag when performing register copies. This should
1860 also improve the opportunities for creating insns that use
1861 unscaled indexing. */
1862 if (REG_P (operand0) && REG_P (operand1))
1863 {
1864 if (REG_POINTER (operand1)
1865 && !REG_POINTER (operand0)
1866 && !HARD_REGISTER_P (operand0))
1867 copy_reg_pointer (operand0, operand1);
1868 }
1869
1870 /* When MEMs are broken out, the REG_POINTER flag doesn't
1871 get set. In some cases, we can set the REG_POINTER flag
1872 from the declaration for the MEM. */
1873 if (REG_P (operand0)
1874 && GET_CODE (operand1) == MEM
1875 && !REG_POINTER (operand0))
1876 {
1877 tree decl = MEM_EXPR (operand1);
1878
1879 /* Set the register pointer flag and register alignment
1880 if the declaration for this memory reference is a
1881 pointer type. */
1882 if (decl)
1883 {
1884 tree type;
1885
1886 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1887 tree operand 1. */
1888 if (TREE_CODE (decl) == COMPONENT_REF)
1889 decl = TREE_OPERAND (decl, 1);
1890
1891 type = TREE_TYPE (decl);
1892 type = strip_array_types (type);
1893
1894 if (POINTER_TYPE_P (type))
1895 {
1896 int align;
1897
1898 type = TREE_TYPE (type);
1899 /* Using TYPE_ALIGN_OK is rather conservative as
1900 only the ada frontend actually sets it. */
1901 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1902 : BITS_PER_UNIT);
1903 mark_reg_pointer (operand0, align);
1904 }
1905 }
1906 }
1907
1908 emit_insn (gen_rtx_SET (operand0, operand1));
1909 return 1;
1910 }
1911 }
1912 else if (GET_CODE (operand0) == MEM)
1913 {
1914 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1915 && !(reload_in_progress || reload_completed))
1916 {
1917 rtx temp = gen_reg_rtx (DFmode);
1918
1919 emit_insn (gen_rtx_SET (temp, operand1));
1920 emit_insn (gen_rtx_SET (operand0, temp));
1921 return 1;
1922 }
1923 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1924 {
1925 /* Run this case quickly. */
1926 emit_insn (gen_rtx_SET (operand0, operand1));
1927 return 1;
1928 }
1929 if (! (reload_in_progress || reload_completed))
1930 {
1931 operands[0] = validize_mem (operand0);
1932 operands[1] = operand1 = force_reg (mode, operand1);
1933 }
1934 }
1935
1936 /* Simplify the source if we need to.
1937 Note we do have to handle function labels here, even though we do
1938 not consider them legitimate constants. Loop optimizations can
1939 call the emit_move_xxx with one as a source. */
1940 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1941 || (GET_CODE (operand1) == HIGH
1942 && symbolic_operand (XEXP (operand1, 0), mode))
1943 || function_label_operand (operand1, VOIDmode)
1944 || tls_referenced_p (operand1))
1945 {
1946 int ishighonly = 0;
1947
1948 if (GET_CODE (operand1) == HIGH)
1949 {
1950 ishighonly = 1;
1951 operand1 = XEXP (operand1, 0);
1952 }
1953 if (symbolic_operand (operand1, mode))
1954 {
1955 /* Argh. The assembler and linker can't handle arithmetic
1956 involving plabels.
1957
1958 So we force the plabel into memory, load operand0 from
1959 the memory location, then add in the constant part. */
1960 if ((GET_CODE (operand1) == CONST
1961 && GET_CODE (XEXP (operand1, 0)) == PLUS
1962 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1963 VOIDmode))
1964 || function_label_operand (operand1, VOIDmode))
1965 {
1966 rtx temp, const_part;
1967
1968 /* Figure out what (if any) scratch register to use. */
1969 if (reload_in_progress || reload_completed)
1970 {
1971 scratch_reg = scratch_reg ? scratch_reg : operand0;
1972 /* SCRATCH_REG will hold an address and maybe the actual
1973 data. We want it in WORD_MODE regardless of what mode it
1974 was originally given to us. */
1975 scratch_reg = force_mode (word_mode, scratch_reg);
1976 }
1977 else if (flag_pic)
1978 scratch_reg = gen_reg_rtx (Pmode);
1979
1980 if (GET_CODE (operand1) == CONST)
1981 {
1982 /* Save away the constant part of the expression. */
1983 const_part = XEXP (XEXP (operand1, 0), 1);
1984 gcc_assert (GET_CODE (const_part) == CONST_INT);
1985
1986 /* Force the function label into memory. */
1987 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1988 }
1989 else
1990 {
1991 /* No constant part. */
1992 const_part = NULL_RTX;
1993
1994 /* Force the function label into memory. */
1995 temp = force_const_mem (mode, operand1);
1996 }
1997
1998
1999 /* Get the address of the memory location. PIC-ify it if
2000 necessary. */
2001 temp = XEXP (temp, 0);
2002 if (flag_pic)
2003 temp = legitimize_pic_address (temp, mode, scratch_reg);
2004
2005 /* Put the address of the memory location into our destination
2006 register. */
2007 operands[1] = temp;
2008 pa_emit_move_sequence (operands, mode, scratch_reg);
2009
2010 /* Now load from the memory location into our destination
2011 register. */
2012 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2013 pa_emit_move_sequence (operands, mode, scratch_reg);
2014
2015 /* And add back in the constant part. */
2016 if (const_part != NULL_RTX)
2017 expand_inc (operand0, const_part);
2018
2019 return 1;
2020 }
2021
2022 if (flag_pic)
2023 {
2024 rtx_insn *insn;
2025 rtx temp;
2026
2027 if (reload_in_progress || reload_completed)
2028 {
2029 temp = scratch_reg ? scratch_reg : operand0;
2030 /* TEMP will hold an address and maybe the actual
2031 data. We want it in WORD_MODE regardless of what mode it
2032 was originally given to us. */
2033 temp = force_mode (word_mode, temp);
2034 }
2035 else
2036 temp = gen_reg_rtx (Pmode);
2037
2038 /* Force (const (plus (symbol) (const_int))) to memory
2039 if the const_int will not fit in 14 bits. Although
2040 this requires a relocation, the instruction sequence
2041 needed to load the value is shorter. */
2042 if (GET_CODE (operand1) == CONST
2043 && GET_CODE (XEXP (operand1, 0)) == PLUS
2044 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2045 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2046 {
2047 rtx x, m = force_const_mem (mode, operand1);
2048
2049 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2050 x = replace_equiv_address (m, x);
2051 insn = emit_move_insn (operand0, x);
2052 }
2053 else
2054 {
2055 operands[1] = legitimize_pic_address (operand1, mode, temp);
2056 if (REG_P (operand0) && REG_P (operands[1]))
2057 copy_reg_pointer (operand0, operands[1]);
2058 insn = emit_move_insn (operand0, operands[1]);
2059 }
2060
2061 /* Put a REG_EQUAL note on this insn. */
2062 set_unique_reg_note (insn, REG_EQUAL, operand1);
2063 }
2064 /* On the HPPA, references to data space are supposed to use dp,
2065 register 27, but showing it in the RTL inhibits various cse
2066 and loop optimizations. */
2067 else
2068 {
2069 rtx temp, set;
2070
2071 if (reload_in_progress || reload_completed)
2072 {
2073 temp = scratch_reg ? scratch_reg : operand0;
2074 /* TEMP will hold an address and maybe the actual
2075 data. We want it in WORD_MODE regardless of what mode it
2076 was originally given to us. */
2077 temp = force_mode (word_mode, temp);
2078 }
2079 else
2080 temp = gen_reg_rtx (mode);
2081
2082 /* Loading a SYMBOL_REF into a register makes that register
2083 safe to be used as the base in an indexed address.
2084
2085 Don't mark hard registers though. That loses. */
2086 if (GET_CODE (operand0) == REG
2087 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2088 mark_reg_pointer (operand0, BITS_PER_UNIT);
2089 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2090 mark_reg_pointer (temp, BITS_PER_UNIT);
2091
2092 if (ishighonly)
2093 set = gen_rtx_SET (operand0, temp);
2094 else
2095 set = gen_rtx_SET (operand0,
2096 gen_rtx_LO_SUM (mode, temp, operand1));
2097
2098 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2099 emit_insn (set);
2100
2101 }
2102 return 1;
2103 }
2104 else if (tls_referenced_p (operand1))
2105 {
2106 rtx tmp = operand1;
2107 rtx addend = NULL;
2108
2109 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2110 {
2111 addend = XEXP (XEXP (tmp, 0), 1);
2112 tmp = XEXP (XEXP (tmp, 0), 0);
2113 }
2114
2115 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2116 tmp = legitimize_tls_address (tmp);
2117 if (addend)
2118 {
2119 tmp = gen_rtx_PLUS (mode, tmp, addend);
2120 tmp = force_operand (tmp, operands[0]);
2121 }
2122 operands[1] = tmp;
2123 }
2124 else if (GET_CODE (operand1) != CONST_INT
2125 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2126 {
2127 rtx temp;
2128 rtx_insn *insn;
2129 rtx op1 = operand1;
2130 HOST_WIDE_INT value = 0;
2131 HOST_WIDE_INT insv = 0;
2132 int insert = 0;
2133
2134 if (GET_CODE (operand1) == CONST_INT)
2135 value = INTVAL (operand1);
2136
2137 if (TARGET_64BIT
2138 && GET_CODE (operand1) == CONST_INT
2139 && HOST_BITS_PER_WIDE_INT > 32
2140 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2141 {
2142 HOST_WIDE_INT nval;
2143
2144 /* Extract the low order 32 bits of the value and sign extend.
2145 If the new value is the same as the original value, we can
2146 can use the original value as-is. If the new value is
2147 different, we use it and insert the most-significant 32-bits
2148 of the original value into the final result. */
2149 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2150 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2151 if (value != nval)
2152 {
2153 #if HOST_BITS_PER_WIDE_INT > 32
2154 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2155 #endif
2156 insert = 1;
2157 value = nval;
2158 operand1 = GEN_INT (nval);
2159 }
2160 }
2161
2162 if (reload_in_progress || reload_completed)
2163 temp = scratch_reg ? scratch_reg : operand0;
2164 else
2165 temp = gen_reg_rtx (mode);
2166
2167 /* We don't directly split DImode constants on 32-bit targets
2168 because PLUS uses an 11-bit immediate and the insn sequence
2169 generated is not as efficient as the one using HIGH/LO_SUM. */
2170 if (GET_CODE (operand1) == CONST_INT
2171 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2172 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2173 && !insert)
2174 {
2175 /* Directly break constant into high and low parts. This
2176 provides better optimization opportunities because various
2177 passes recognize constants split with PLUS but not LO_SUM.
2178 We use a 14-bit signed low part except when the addition
2179 of 0x4000 to the high part might change the sign of the
2180 high part. */
2181 HOST_WIDE_INT low = value & 0x3fff;
2182 HOST_WIDE_INT high = value & ~ 0x3fff;
2183
2184 if (low >= 0x2000)
2185 {
2186 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2187 high += 0x2000;
2188 else
2189 high += 0x4000;
2190 }
2191
2192 low = value - high;
2193
2194 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2195 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2196 }
2197 else
2198 {
2199 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2200 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2201 }
2202
2203 insn = emit_move_insn (operands[0], operands[1]);
2204
2205 /* Now insert the most significant 32 bits of the value
2206 into the register. When we don't have a second register
2207 available, it could take up to nine instructions to load
2208 a 64-bit integer constant. Prior to reload, we force
2209 constants that would take more than three instructions
2210 to load to the constant pool. During and after reload,
2211 we have to handle all possible values. */
2212 if (insert)
2213 {
2214 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2215 register and the value to be inserted is outside the
2216 range that can be loaded with three depdi instructions. */
2217 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2218 {
2219 operand1 = GEN_INT (insv);
2220
2221 emit_insn (gen_rtx_SET (temp,
2222 gen_rtx_HIGH (mode, operand1)));
2223 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2224 if (mode == DImode)
2225 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2226 const0_rtx, temp));
2227 else
2228 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2229 const0_rtx, temp));
2230 }
2231 else
2232 {
2233 int len = 5, pos = 27;
2234
2235 /* Insert the bits using the depdi instruction. */
2236 while (pos >= 0)
2237 {
2238 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2239 HOST_WIDE_INT sign = v5 < 0;
2240
2241 /* Left extend the insertion. */
2242 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2243 while (pos > 0 && (insv & 1) == sign)
2244 {
2245 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2246 len += 1;
2247 pos -= 1;
2248 }
2249
2250 if (mode == DImode)
2251 insn = emit_insn (gen_insvdi (operand0,
2252 GEN_INT (len),
2253 GEN_INT (pos),
2254 GEN_INT (v5)));
2255 else
2256 insn = emit_insn (gen_insvsi (operand0,
2257 GEN_INT (len),
2258 GEN_INT (pos),
2259 GEN_INT (v5)));
2260
2261 len = pos > 0 && pos < 5 ? pos : 5;
2262 pos -= len;
2263 }
2264 }
2265 }
2266
2267 set_unique_reg_note (insn, REG_EQUAL, op1);
2268
2269 return 1;
2270 }
2271 }
2272 /* Now have insn-emit do whatever it normally does. */
2273 return 0;
2274 }
2275
2276 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2277 it will need a link/runtime reloc). */
2278
2279 int
2280 pa_reloc_needed (tree exp)
2281 {
2282 int reloc = 0;
2283
2284 switch (TREE_CODE (exp))
2285 {
2286 case ADDR_EXPR:
2287 return 1;
2288
2289 case POINTER_PLUS_EXPR:
2290 case PLUS_EXPR:
2291 case MINUS_EXPR:
2292 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2293 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2294 break;
2295
2296 CASE_CONVERT:
2297 case NON_LVALUE_EXPR:
2298 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2299 break;
2300
2301 case CONSTRUCTOR:
2302 {
2303 tree value;
2304 unsigned HOST_WIDE_INT ix;
2305
2306 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2307 if (value)
2308 reloc |= pa_reloc_needed (value);
2309 }
2310 break;
2311
2312 case ERROR_MARK:
2313 break;
2314
2315 default:
2316 break;
2317 }
2318 return reloc;
2319 }
2320
2321 \f
2322 /* Return the best assembler insn template
2323 for moving operands[1] into operands[0] as a fullword. */
2324 const char *
2325 pa_singlemove_string (rtx *operands)
2326 {
2327 HOST_WIDE_INT intval;
2328
2329 if (GET_CODE (operands[0]) == MEM)
2330 return "stw %r1,%0";
2331 if (GET_CODE (operands[1]) == MEM)
2332 return "ldw %1,%0";
2333 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2334 {
2335 long i;
2336
2337 gcc_assert (GET_MODE (operands[1]) == SFmode);
2338
2339 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2340 bit pattern. */
2341 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2342
2343 operands[1] = GEN_INT (i);
2344 /* Fall through to CONST_INT case. */
2345 }
2346 if (GET_CODE (operands[1]) == CONST_INT)
2347 {
2348 intval = INTVAL (operands[1]);
2349
2350 if (VAL_14_BITS_P (intval))
2351 return "ldi %1,%0";
2352 else if ((intval & 0x7ff) == 0)
2353 return "ldil L'%1,%0";
2354 else if (pa_zdepi_cint_p (intval))
2355 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2356 else
2357 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2358 }
2359 return "copy %1,%0";
2360 }
2361 \f
2362
2363 /* Compute position (in OP[1]) and width (in OP[2])
2364 useful for copying IMM to a register using the zdepi
2365 instructions. Store the immediate value to insert in OP[0]. */
2366 static void
2367 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2368 {
2369 int lsb, len;
2370
2371 /* Find the least significant set bit in IMM. */
2372 for (lsb = 0; lsb < 32; lsb++)
2373 {
2374 if ((imm & 1) != 0)
2375 break;
2376 imm >>= 1;
2377 }
2378
2379 /* Choose variants based on *sign* of the 5-bit field. */
2380 if ((imm & 0x10) == 0)
2381 len = (lsb <= 28) ? 4 : 32 - lsb;
2382 else
2383 {
2384 /* Find the width of the bitstring in IMM. */
2385 for (len = 5; len < 32 - lsb; len++)
2386 {
2387 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2388 break;
2389 }
2390
2391 /* Sign extend IMM as a 5-bit value. */
2392 imm = (imm & 0xf) - 0x10;
2393 }
2394
2395 op[0] = imm;
2396 op[1] = 31 - lsb;
2397 op[2] = len;
2398 }
2399
2400 /* Compute position (in OP[1]) and width (in OP[2])
2401 useful for copying IMM to a register using the depdi,z
2402 instructions. Store the immediate value to insert in OP[0]. */
2403
2404 static void
2405 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2406 {
2407 int lsb, len, maxlen;
2408
2409 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2410
2411 /* Find the least significant set bit in IMM. */
2412 for (lsb = 0; lsb < maxlen; lsb++)
2413 {
2414 if ((imm & 1) != 0)
2415 break;
2416 imm >>= 1;
2417 }
2418
2419 /* Choose variants based on *sign* of the 5-bit field. */
2420 if ((imm & 0x10) == 0)
2421 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2422 else
2423 {
2424 /* Find the width of the bitstring in IMM. */
2425 for (len = 5; len < maxlen - lsb; len++)
2426 {
2427 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2428 break;
2429 }
2430
2431 /* Extend length if host is narrow and IMM is negative. */
2432 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2433 len += 32;
2434
2435 /* Sign extend IMM as a 5-bit value. */
2436 imm = (imm & 0xf) - 0x10;
2437 }
2438
2439 op[0] = imm;
2440 op[1] = 63 - lsb;
2441 op[2] = len;
2442 }
2443
2444 /* Output assembler code to perform a doubleword move insn
2445 with operands OPERANDS. */
2446
2447 const char *
2448 pa_output_move_double (rtx *operands)
2449 {
2450 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2451 rtx latehalf[2];
2452 rtx addreg0 = 0, addreg1 = 0;
2453 int highonly = 0;
2454
2455 /* First classify both operands. */
2456
2457 if (REG_P (operands[0]))
2458 optype0 = REGOP;
2459 else if (offsettable_memref_p (operands[0]))
2460 optype0 = OFFSOP;
2461 else if (GET_CODE (operands[0]) == MEM)
2462 optype0 = MEMOP;
2463 else
2464 optype0 = RNDOP;
2465
2466 if (REG_P (operands[1]))
2467 optype1 = REGOP;
2468 else if (CONSTANT_P (operands[1]))
2469 optype1 = CNSTOP;
2470 else if (offsettable_memref_p (operands[1]))
2471 optype1 = OFFSOP;
2472 else if (GET_CODE (operands[1]) == MEM)
2473 optype1 = MEMOP;
2474 else
2475 optype1 = RNDOP;
2476
2477 /* Check for the cases that the operand constraints are not
2478 supposed to allow to happen. */
2479 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2480
2481 /* Handle copies between general and floating registers. */
2482
2483 if (optype0 == REGOP && optype1 == REGOP
2484 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2485 {
2486 if (FP_REG_P (operands[0]))
2487 {
2488 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2489 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2490 return "{fldds|fldd} -16(%%sp),%0";
2491 }
2492 else
2493 {
2494 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2495 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2496 return "{ldws|ldw} -12(%%sp),%R0";
2497 }
2498 }
2499
2500 /* Handle auto decrementing and incrementing loads and stores
2501 specifically, since the structure of the function doesn't work
2502 for them without major modification. Do it better when we learn
2503 this port about the general inc/dec addressing of PA.
2504 (This was written by tege. Chide him if it doesn't work.) */
2505
2506 if (optype0 == MEMOP)
2507 {
2508 /* We have to output the address syntax ourselves, since print_operand
2509 doesn't deal with the addresses we want to use. Fix this later. */
2510
2511 rtx addr = XEXP (operands[0], 0);
2512 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2513 {
2514 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2515
2516 operands[0] = XEXP (addr, 0);
2517 gcc_assert (GET_CODE (operands[1]) == REG
2518 && GET_CODE (operands[0]) == REG);
2519
2520 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2521
2522 /* No overlap between high target register and address
2523 register. (We do this in a non-obvious way to
2524 save a register file writeback) */
2525 if (GET_CODE (addr) == POST_INC)
2526 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2527 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2528 }
2529 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2530 {
2531 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2532
2533 operands[0] = XEXP (addr, 0);
2534 gcc_assert (GET_CODE (operands[1]) == REG
2535 && GET_CODE (operands[0]) == REG);
2536
2537 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2538 /* No overlap between high target register and address
2539 register. (We do this in a non-obvious way to save a
2540 register file writeback) */
2541 if (GET_CODE (addr) == PRE_INC)
2542 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2543 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2544 }
2545 }
2546 if (optype1 == MEMOP)
2547 {
2548 /* We have to output the address syntax ourselves, since print_operand
2549 doesn't deal with the addresses we want to use. Fix this later. */
2550
2551 rtx addr = XEXP (operands[1], 0);
2552 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2553 {
2554 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2555
2556 operands[1] = XEXP (addr, 0);
2557 gcc_assert (GET_CODE (operands[0]) == REG
2558 && GET_CODE (operands[1]) == REG);
2559
2560 if (!reg_overlap_mentioned_p (high_reg, addr))
2561 {
2562 /* No overlap between high target register and address
2563 register. (We do this in a non-obvious way to
2564 save a register file writeback) */
2565 if (GET_CODE (addr) == POST_INC)
2566 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2567 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2568 }
2569 else
2570 {
2571 /* This is an undefined situation. We should load into the
2572 address register *and* update that register. Probably
2573 we don't need to handle this at all. */
2574 if (GET_CODE (addr) == POST_INC)
2575 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2576 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2577 }
2578 }
2579 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2580 {
2581 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2582
2583 operands[1] = XEXP (addr, 0);
2584 gcc_assert (GET_CODE (operands[0]) == REG
2585 && GET_CODE (operands[1]) == REG);
2586
2587 if (!reg_overlap_mentioned_p (high_reg, addr))
2588 {
2589 /* No overlap between high target register and address
2590 register. (We do this in a non-obvious way to
2591 save a register file writeback) */
2592 if (GET_CODE (addr) == PRE_INC)
2593 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2594 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2595 }
2596 else
2597 {
2598 /* This is an undefined situation. We should load into the
2599 address register *and* update that register. Probably
2600 we don't need to handle this at all. */
2601 if (GET_CODE (addr) == PRE_INC)
2602 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2603 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2604 }
2605 }
2606 else if (GET_CODE (addr) == PLUS
2607 && GET_CODE (XEXP (addr, 0)) == MULT)
2608 {
2609 rtx xoperands[4];
2610
2611 /* Load address into left half of destination register. */
2612 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2613 xoperands[1] = XEXP (addr, 1);
2614 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2615 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2616 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2617 xoperands);
2618 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2619 }
2620 else if (GET_CODE (addr) == PLUS
2621 && REG_P (XEXP (addr, 0))
2622 && REG_P (XEXP (addr, 1)))
2623 {
2624 rtx xoperands[3];
2625
2626 /* Load address into left half of destination register. */
2627 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2628 xoperands[1] = XEXP (addr, 0);
2629 xoperands[2] = XEXP (addr, 1);
2630 output_asm_insn ("{addl|add,l} %1,%2,%0",
2631 xoperands);
2632 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2633 }
2634 }
2635
2636 /* If an operand is an unoffsettable memory ref, find a register
2637 we can increment temporarily to make it refer to the second word. */
2638
2639 if (optype0 == MEMOP)
2640 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2641
2642 if (optype1 == MEMOP)
2643 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2644
2645 /* Ok, we can do one word at a time.
2646 Normally we do the low-numbered word first.
2647
2648 In either case, set up in LATEHALF the operands to use
2649 for the high-numbered word and in some cases alter the
2650 operands in OPERANDS to be suitable for the low-numbered word. */
2651
2652 if (optype0 == REGOP)
2653 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2654 else if (optype0 == OFFSOP)
2655 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2656 else
2657 latehalf[0] = operands[0];
2658
2659 if (optype1 == REGOP)
2660 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2661 else if (optype1 == OFFSOP)
2662 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2663 else if (optype1 == CNSTOP)
2664 {
2665 if (GET_CODE (operands[1]) == HIGH)
2666 {
2667 operands[1] = XEXP (operands[1], 0);
2668 highonly = 1;
2669 }
2670 split_double (operands[1], &operands[1], &latehalf[1]);
2671 }
2672 else
2673 latehalf[1] = operands[1];
2674
2675 /* If the first move would clobber the source of the second one,
2676 do them in the other order.
2677
2678 This can happen in two cases:
2679
2680 mem -> register where the first half of the destination register
2681 is the same register used in the memory's address. Reload
2682 can create such insns.
2683
2684 mem in this case will be either register indirect or register
2685 indirect plus a valid offset.
2686
2687 register -> register move where REGNO(dst) == REGNO(src + 1)
2688 someone (Tim/Tege?) claimed this can happen for parameter loads.
2689
2690 Handle mem -> register case first. */
2691 if (optype0 == REGOP
2692 && (optype1 == MEMOP || optype1 == OFFSOP)
2693 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2694 {
2695 /* Do the late half first. */
2696 if (addreg1)
2697 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2698 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2699
2700 /* Then clobber. */
2701 if (addreg1)
2702 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2703 return pa_singlemove_string (operands);
2704 }
2705
2706 /* Now handle register -> register case. */
2707 if (optype0 == REGOP && optype1 == REGOP
2708 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2709 {
2710 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2711 return pa_singlemove_string (operands);
2712 }
2713
2714 /* Normal case: do the two words, low-numbered first. */
2715
2716 output_asm_insn (pa_singlemove_string (operands), operands);
2717
2718 /* Make any unoffsettable addresses point at high-numbered word. */
2719 if (addreg0)
2720 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2721 if (addreg1)
2722 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2723
2724 /* Do high-numbered word. */
2725 if (highonly)
2726 output_asm_insn ("ldil L'%1,%0", latehalf);
2727 else
2728 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2729
2730 /* Undo the adds we just did. */
2731 if (addreg0)
2732 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2733 if (addreg1)
2734 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2735
2736 return "";
2737 }
2738 \f
2739 const char *
2740 pa_output_fp_move_double (rtx *operands)
2741 {
2742 if (FP_REG_P (operands[0]))
2743 {
2744 if (FP_REG_P (operands[1])
2745 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2746 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2747 else
2748 output_asm_insn ("fldd%F1 %1,%0", operands);
2749 }
2750 else if (FP_REG_P (operands[1]))
2751 {
2752 output_asm_insn ("fstd%F0 %1,%0", operands);
2753 }
2754 else
2755 {
2756 rtx xoperands[2];
2757
2758 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2759
2760 /* This is a pain. You have to be prepared to deal with an
2761 arbitrary address here including pre/post increment/decrement.
2762
2763 so avoid this in the MD. */
2764 gcc_assert (GET_CODE (operands[0]) == REG);
2765
2766 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2767 xoperands[0] = operands[0];
2768 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2769 }
2770 return "";
2771 }
2772 \f
2773 /* Return a REG that occurs in ADDR with coefficient 1.
2774 ADDR can be effectively incremented by incrementing REG. */
2775
2776 static rtx
2777 find_addr_reg (rtx addr)
2778 {
2779 while (GET_CODE (addr) == PLUS)
2780 {
2781 if (GET_CODE (XEXP (addr, 0)) == REG)
2782 addr = XEXP (addr, 0);
2783 else if (GET_CODE (XEXP (addr, 1)) == REG)
2784 addr = XEXP (addr, 1);
2785 else if (CONSTANT_P (XEXP (addr, 0)))
2786 addr = XEXP (addr, 1);
2787 else if (CONSTANT_P (XEXP (addr, 1)))
2788 addr = XEXP (addr, 0);
2789 else
2790 gcc_unreachable ();
2791 }
2792 gcc_assert (GET_CODE (addr) == REG);
2793 return addr;
2794 }
2795
2796 /* Emit code to perform a block move.
2797
2798 OPERANDS[0] is the destination pointer as a REG, clobbered.
2799 OPERANDS[1] is the source pointer as a REG, clobbered.
2800 OPERANDS[2] is a register for temporary storage.
2801 OPERANDS[3] is a register for temporary storage.
2802 OPERANDS[4] is the size as a CONST_INT
2803 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2804 OPERANDS[6] is another temporary register. */
2805
2806 const char *
2807 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2808 {
2809 int align = INTVAL (operands[5]);
2810 unsigned long n_bytes = INTVAL (operands[4]);
2811
2812 /* We can't move more than a word at a time because the PA
2813 has no longer integer move insns. (Could use fp mem ops?) */
2814 if (align > (TARGET_64BIT ? 8 : 4))
2815 align = (TARGET_64BIT ? 8 : 4);
2816
2817 /* Note that we know each loop below will execute at least twice
2818 (else we would have open-coded the copy). */
2819 switch (align)
2820 {
2821 case 8:
2822 /* Pre-adjust the loop counter. */
2823 operands[4] = GEN_INT (n_bytes - 16);
2824 output_asm_insn ("ldi %4,%2", operands);
2825
2826 /* Copying loop. */
2827 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2828 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2829 output_asm_insn ("std,ma %3,8(%0)", operands);
2830 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2831 output_asm_insn ("std,ma %6,8(%0)", operands);
2832
2833 /* Handle the residual. There could be up to 7 bytes of
2834 residual to copy! */
2835 if (n_bytes % 16 != 0)
2836 {
2837 operands[4] = GEN_INT (n_bytes % 8);
2838 if (n_bytes % 16 >= 8)
2839 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2840 if (n_bytes % 8 != 0)
2841 output_asm_insn ("ldd 0(%1),%6", operands);
2842 if (n_bytes % 16 >= 8)
2843 output_asm_insn ("std,ma %3,8(%0)", operands);
2844 if (n_bytes % 8 != 0)
2845 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2846 }
2847 return "";
2848
2849 case 4:
2850 /* Pre-adjust the loop counter. */
2851 operands[4] = GEN_INT (n_bytes - 8);
2852 output_asm_insn ("ldi %4,%2", operands);
2853
2854 /* Copying loop. */
2855 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2856 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2857 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2858 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2859 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2860
2861 /* Handle the residual. There could be up to 7 bytes of
2862 residual to copy! */
2863 if (n_bytes % 8 != 0)
2864 {
2865 operands[4] = GEN_INT (n_bytes % 4);
2866 if (n_bytes % 8 >= 4)
2867 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2868 if (n_bytes % 4 != 0)
2869 output_asm_insn ("ldw 0(%1),%6", operands);
2870 if (n_bytes % 8 >= 4)
2871 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2872 if (n_bytes % 4 != 0)
2873 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2874 }
2875 return "";
2876
2877 case 2:
2878 /* Pre-adjust the loop counter. */
2879 operands[4] = GEN_INT (n_bytes - 4);
2880 output_asm_insn ("ldi %4,%2", operands);
2881
2882 /* Copying loop. */
2883 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2884 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2885 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2886 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2887 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2888
2889 /* Handle the residual. */
2890 if (n_bytes % 4 != 0)
2891 {
2892 if (n_bytes % 4 >= 2)
2893 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2894 if (n_bytes % 2 != 0)
2895 output_asm_insn ("ldb 0(%1),%6", operands);
2896 if (n_bytes % 4 >= 2)
2897 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2898 if (n_bytes % 2 != 0)
2899 output_asm_insn ("stb %6,0(%0)", operands);
2900 }
2901 return "";
2902
2903 case 1:
2904 /* Pre-adjust the loop counter. */
2905 operands[4] = GEN_INT (n_bytes - 2);
2906 output_asm_insn ("ldi %4,%2", operands);
2907
2908 /* Copying loop. */
2909 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2910 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2911 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2912 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2913 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2914
2915 /* Handle the residual. */
2916 if (n_bytes % 2 != 0)
2917 {
2918 output_asm_insn ("ldb 0(%1),%3", operands);
2919 output_asm_insn ("stb %3,0(%0)", operands);
2920 }
2921 return "";
2922
2923 default:
2924 gcc_unreachable ();
2925 }
2926 }
2927
2928 /* Count the number of insns necessary to handle this block move.
2929
2930 Basic structure is the same as emit_block_move, except that we
2931 count insns rather than emit them. */
2932
2933 static int
2934 compute_movmem_length (rtx_insn *insn)
2935 {
2936 rtx pat = PATTERN (insn);
2937 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2938 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2939 unsigned int n_insns = 0;
2940
2941 /* We can't move more than four bytes at a time because the PA
2942 has no longer integer move insns. (Could use fp mem ops?) */
2943 if (align > (TARGET_64BIT ? 8 : 4))
2944 align = (TARGET_64BIT ? 8 : 4);
2945
2946 /* The basic copying loop. */
2947 n_insns = 6;
2948
2949 /* Residuals. */
2950 if (n_bytes % (2 * align) != 0)
2951 {
2952 if ((n_bytes % (2 * align)) >= align)
2953 n_insns += 2;
2954
2955 if ((n_bytes % align) != 0)
2956 n_insns += 2;
2957 }
2958
2959 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2960 return n_insns * 4;
2961 }
2962
2963 /* Emit code to perform a block clear.
2964
2965 OPERANDS[0] is the destination pointer as a REG, clobbered.
2966 OPERANDS[1] is a register for temporary storage.
2967 OPERANDS[2] is the size as a CONST_INT
2968 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2969
2970 const char *
2971 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2972 {
2973 int align = INTVAL (operands[3]);
2974 unsigned long n_bytes = INTVAL (operands[2]);
2975
2976 /* We can't clear more than a word at a time because the PA
2977 has no longer integer move insns. */
2978 if (align > (TARGET_64BIT ? 8 : 4))
2979 align = (TARGET_64BIT ? 8 : 4);
2980
2981 /* Note that we know each loop below will execute at least twice
2982 (else we would have open-coded the copy). */
2983 switch (align)
2984 {
2985 case 8:
2986 /* Pre-adjust the loop counter. */
2987 operands[2] = GEN_INT (n_bytes - 16);
2988 output_asm_insn ("ldi %2,%1", operands);
2989
2990 /* Loop. */
2991 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2992 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2993 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2994
2995 /* Handle the residual. There could be up to 7 bytes of
2996 residual to copy! */
2997 if (n_bytes % 16 != 0)
2998 {
2999 operands[2] = GEN_INT (n_bytes % 8);
3000 if (n_bytes % 16 >= 8)
3001 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3002 if (n_bytes % 8 != 0)
3003 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3004 }
3005 return "";
3006
3007 case 4:
3008 /* Pre-adjust the loop counter. */
3009 operands[2] = GEN_INT (n_bytes - 8);
3010 output_asm_insn ("ldi %2,%1", operands);
3011
3012 /* Loop. */
3013 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3014 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3015 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3016
3017 /* Handle the residual. There could be up to 7 bytes of
3018 residual to copy! */
3019 if (n_bytes % 8 != 0)
3020 {
3021 operands[2] = GEN_INT (n_bytes % 4);
3022 if (n_bytes % 8 >= 4)
3023 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3024 if (n_bytes % 4 != 0)
3025 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3026 }
3027 return "";
3028
3029 case 2:
3030 /* Pre-adjust the loop counter. */
3031 operands[2] = GEN_INT (n_bytes - 4);
3032 output_asm_insn ("ldi %2,%1", operands);
3033
3034 /* Loop. */
3035 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3036 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3037 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3038
3039 /* Handle the residual. */
3040 if (n_bytes % 4 != 0)
3041 {
3042 if (n_bytes % 4 >= 2)
3043 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3044 if (n_bytes % 2 != 0)
3045 output_asm_insn ("stb %%r0,0(%0)", operands);
3046 }
3047 return "";
3048
3049 case 1:
3050 /* Pre-adjust the loop counter. */
3051 operands[2] = GEN_INT (n_bytes - 2);
3052 output_asm_insn ("ldi %2,%1", operands);
3053
3054 /* Loop. */
3055 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3056 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3057 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3058
3059 /* Handle the residual. */
3060 if (n_bytes % 2 != 0)
3061 output_asm_insn ("stb %%r0,0(%0)", operands);
3062
3063 return "";
3064
3065 default:
3066 gcc_unreachable ();
3067 }
3068 }
3069
3070 /* Count the number of insns necessary to handle this block move.
3071
3072 Basic structure is the same as emit_block_move, except that we
3073 count insns rather than emit them. */
3074
3075 static int
3076 compute_clrmem_length (rtx_insn *insn)
3077 {
3078 rtx pat = PATTERN (insn);
3079 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3080 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3081 unsigned int n_insns = 0;
3082
3083 /* We can't clear more than a word at a time because the PA
3084 has no longer integer move insns. */
3085 if (align > (TARGET_64BIT ? 8 : 4))
3086 align = (TARGET_64BIT ? 8 : 4);
3087
3088 /* The basic loop. */
3089 n_insns = 4;
3090
3091 /* Residuals. */
3092 if (n_bytes % (2 * align) != 0)
3093 {
3094 if ((n_bytes % (2 * align)) >= align)
3095 n_insns++;
3096
3097 if ((n_bytes % align) != 0)
3098 n_insns++;
3099 }
3100
3101 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3102 return n_insns * 4;
3103 }
3104 \f
3105
3106 const char *
3107 pa_output_and (rtx *operands)
3108 {
3109 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3110 {
3111 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3112 int ls0, ls1, ms0, p, len;
3113
3114 for (ls0 = 0; ls0 < 32; ls0++)
3115 if ((mask & (1 << ls0)) == 0)
3116 break;
3117
3118 for (ls1 = ls0; ls1 < 32; ls1++)
3119 if ((mask & (1 << ls1)) != 0)
3120 break;
3121
3122 for (ms0 = ls1; ms0 < 32; ms0++)
3123 if ((mask & (1 << ms0)) == 0)
3124 break;
3125
3126 gcc_assert (ms0 == 32);
3127
3128 if (ls1 == 32)
3129 {
3130 len = ls0;
3131
3132 gcc_assert (len);
3133
3134 operands[2] = GEN_INT (len);
3135 return "{extru|extrw,u} %1,31,%2,%0";
3136 }
3137 else
3138 {
3139 /* We could use this `depi' for the case above as well, but `depi'
3140 requires one more register file access than an `extru'. */
3141
3142 p = 31 - ls0;
3143 len = ls1 - ls0;
3144
3145 operands[2] = GEN_INT (p);
3146 operands[3] = GEN_INT (len);
3147 return "{depi|depwi} 0,%2,%3,%0";
3148 }
3149 }
3150 else
3151 return "and %1,%2,%0";
3152 }
3153
3154 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3155 storing the result in operands[0]. */
3156 const char *
3157 pa_output_64bit_and (rtx *operands)
3158 {
3159 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3160 {
3161 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3162 int ls0, ls1, ms0, p, len;
3163
3164 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3165 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3166 break;
3167
3168 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3169 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3170 break;
3171
3172 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3173 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3174 break;
3175
3176 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3177
3178 if (ls1 == HOST_BITS_PER_WIDE_INT)
3179 {
3180 len = ls0;
3181
3182 gcc_assert (len);
3183
3184 operands[2] = GEN_INT (len);
3185 return "extrd,u %1,63,%2,%0";
3186 }
3187 else
3188 {
3189 /* We could use this `depi' for the case above as well, but `depi'
3190 requires one more register file access than an `extru'. */
3191
3192 p = 63 - ls0;
3193 len = ls1 - ls0;
3194
3195 operands[2] = GEN_INT (p);
3196 operands[3] = GEN_INT (len);
3197 return "depdi 0,%2,%3,%0";
3198 }
3199 }
3200 else
3201 return "and %1,%2,%0";
3202 }
3203
3204 const char *
3205 pa_output_ior (rtx *operands)
3206 {
3207 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3208 int bs0, bs1, p, len;
3209
3210 if (INTVAL (operands[2]) == 0)
3211 return "copy %1,%0";
3212
3213 for (bs0 = 0; bs0 < 32; bs0++)
3214 if ((mask & (1 << bs0)) != 0)
3215 break;
3216
3217 for (bs1 = bs0; bs1 < 32; bs1++)
3218 if ((mask & (1 << bs1)) == 0)
3219 break;
3220
3221 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3222
3223 p = 31 - bs0;
3224 len = bs1 - bs0;
3225
3226 operands[2] = GEN_INT (p);
3227 operands[3] = GEN_INT (len);
3228 return "{depi|depwi} -1,%2,%3,%0";
3229 }
3230
3231 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3232 storing the result in operands[0]. */
3233 const char *
3234 pa_output_64bit_ior (rtx *operands)
3235 {
3236 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3237 int bs0, bs1, p, len;
3238
3239 if (INTVAL (operands[2]) == 0)
3240 return "copy %1,%0";
3241
3242 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3243 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3244 break;
3245
3246 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3247 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3248 break;
3249
3250 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3251 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3252
3253 p = 63 - bs0;
3254 len = bs1 - bs0;
3255
3256 operands[2] = GEN_INT (p);
3257 operands[3] = GEN_INT (len);
3258 return "depdi -1,%2,%3,%0";
3259 }
3260 \f
3261 /* Target hook for assembling integer objects. This code handles
3262 aligned SI and DI integers specially since function references
3263 must be preceded by P%. */
3264
3265 static bool
3266 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3267 {
3268 if (size == UNITS_PER_WORD
3269 && aligned_p
3270 && function_label_operand (x, VOIDmode))
3271 {
3272 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3273
3274 /* We don't want an OPD when generating fast indirect calls. */
3275 if (!TARGET_FAST_INDIRECT_CALLS)
3276 fputs ("P%", asm_out_file);
3277
3278 output_addr_const (asm_out_file, x);
3279 fputc ('\n', asm_out_file);
3280 return true;
3281 }
3282 return default_assemble_integer (x, size, aligned_p);
3283 }
3284 \f
3285 /* Output an ascii string. */
3286 void
3287 pa_output_ascii (FILE *file, const char *p, int size)
3288 {
3289 int i;
3290 int chars_output;
3291 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3292
3293 /* The HP assembler can only take strings of 256 characters at one
3294 time. This is a limitation on input line length, *not* the
3295 length of the string. Sigh. Even worse, it seems that the
3296 restriction is in number of input characters (see \xnn &
3297 \whatever). So we have to do this very carefully. */
3298
3299 fputs ("\t.STRING \"", file);
3300
3301 chars_output = 0;
3302 for (i = 0; i < size; i += 4)
3303 {
3304 int co = 0;
3305 int io = 0;
3306 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3307 {
3308 register unsigned int c = (unsigned char) p[i + io];
3309
3310 if (c == '\"' || c == '\\')
3311 partial_output[co++] = '\\';
3312 if (c >= ' ' && c < 0177)
3313 partial_output[co++] = c;
3314 else
3315 {
3316 unsigned int hexd;
3317 partial_output[co++] = '\\';
3318 partial_output[co++] = 'x';
3319 hexd = c / 16 - 0 + '0';
3320 if (hexd > '9')
3321 hexd -= '9' - 'a' + 1;
3322 partial_output[co++] = hexd;
3323 hexd = c % 16 - 0 + '0';
3324 if (hexd > '9')
3325 hexd -= '9' - 'a' + 1;
3326 partial_output[co++] = hexd;
3327 }
3328 }
3329 if (chars_output + co > 243)
3330 {
3331 fputs ("\"\n\t.STRING \"", file);
3332 chars_output = 0;
3333 }
3334 fwrite (partial_output, 1, (size_t) co, file);
3335 chars_output += co;
3336 co = 0;
3337 }
3338 fputs ("\"\n", file);
3339 }
3340
3341 /* Try to rewrite floating point comparisons & branches to avoid
3342 useless add,tr insns.
3343
3344 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3345 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3346 first attempt to remove useless add,tr insns. It is zero
3347 for the second pass as reorg sometimes leaves bogus REG_DEAD
3348 notes lying around.
3349
3350 When CHECK_NOTES is zero we can only eliminate add,tr insns
3351 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3352 instructions. */
3353 static void
3354 remove_useless_addtr_insns (int check_notes)
3355 {
3356 rtx_insn *insn;
3357 static int pass = 0;
3358
3359 /* This is fairly cheap, so always run it when optimizing. */
3360 if (optimize > 0)
3361 {
3362 int fcmp_count = 0;
3363 int fbranch_count = 0;
3364
3365 /* Walk all the insns in this function looking for fcmp & fbranch
3366 instructions. Keep track of how many of each we find. */
3367 for (insn = get_insns (); insn; insn = next_insn (insn))
3368 {
3369 rtx tmp;
3370
3371 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3372 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3373 continue;
3374
3375 tmp = PATTERN (insn);
3376
3377 /* It must be a set. */
3378 if (GET_CODE (tmp) != SET)
3379 continue;
3380
3381 /* If the destination is CCFP, then we've found an fcmp insn. */
3382 tmp = SET_DEST (tmp);
3383 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3384 {
3385 fcmp_count++;
3386 continue;
3387 }
3388
3389 tmp = PATTERN (insn);
3390 /* If this is an fbranch instruction, bump the fbranch counter. */
3391 if (GET_CODE (tmp) == SET
3392 && SET_DEST (tmp) == pc_rtx
3393 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3394 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3395 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3396 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3397 {
3398 fbranch_count++;
3399 continue;
3400 }
3401 }
3402
3403
3404 /* Find all floating point compare + branch insns. If possible,
3405 reverse the comparison & the branch to avoid add,tr insns. */
3406 for (insn = get_insns (); insn; insn = next_insn (insn))
3407 {
3408 rtx tmp;
3409 rtx_insn *next;
3410
3411 /* Ignore anything that isn't an INSN. */
3412 if (! NONJUMP_INSN_P (insn))
3413 continue;
3414
3415 tmp = PATTERN (insn);
3416
3417 /* It must be a set. */
3418 if (GET_CODE (tmp) != SET)
3419 continue;
3420
3421 /* The destination must be CCFP, which is register zero. */
3422 tmp = SET_DEST (tmp);
3423 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3424 continue;
3425
3426 /* INSN should be a set of CCFP.
3427
3428 See if the result of this insn is used in a reversed FP
3429 conditional branch. If so, reverse our condition and
3430 the branch. Doing so avoids useless add,tr insns. */
3431 next = next_insn (insn);
3432 while (next)
3433 {
3434 /* Jumps, calls and labels stop our search. */
3435 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3436 break;
3437
3438 /* As does another fcmp insn. */
3439 if (NONJUMP_INSN_P (next)
3440 && GET_CODE (PATTERN (next)) == SET
3441 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3442 && REGNO (SET_DEST (PATTERN (next))) == 0)
3443 break;
3444
3445 next = next_insn (next);
3446 }
3447
3448 /* Is NEXT_INSN a branch? */
3449 if (next && JUMP_P (next))
3450 {
3451 rtx pattern = PATTERN (next);
3452
3453 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3454 and CCFP dies, then reverse our conditional and the branch
3455 to avoid the add,tr. */
3456 if (GET_CODE (pattern) == SET
3457 && SET_DEST (pattern) == pc_rtx
3458 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3459 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3460 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3461 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3462 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3463 && (fcmp_count == fbranch_count
3464 || (check_notes
3465 && find_regno_note (next, REG_DEAD, 0))))
3466 {
3467 /* Reverse the branch. */
3468 tmp = XEXP (SET_SRC (pattern), 1);
3469 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3470 XEXP (SET_SRC (pattern), 2) = tmp;
3471 INSN_CODE (next) = -1;
3472
3473 /* Reverse our condition. */
3474 tmp = PATTERN (insn);
3475 PUT_CODE (XEXP (tmp, 1),
3476 (reverse_condition_maybe_unordered
3477 (GET_CODE (XEXP (tmp, 1)))));
3478 }
3479 }
3480 }
3481 }
3482
3483 pass = !pass;
3484
3485 }
3486 \f
3487 /* You may have trouble believing this, but this is the 32 bit HP-PA
3488 stack layout. Wow.
3489
3490 Offset Contents
3491
3492 Variable arguments (optional; any number may be allocated)
3493
3494 SP-(4*(N+9)) arg word N
3495 : :
3496 SP-56 arg word 5
3497 SP-52 arg word 4
3498
3499 Fixed arguments (must be allocated; may remain unused)
3500
3501 SP-48 arg word 3
3502 SP-44 arg word 2
3503 SP-40 arg word 1
3504 SP-36 arg word 0
3505
3506 Frame Marker
3507
3508 SP-32 External Data Pointer (DP)
3509 SP-28 External sr4
3510 SP-24 External/stub RP (RP')
3511 SP-20 Current RP
3512 SP-16 Static Link
3513 SP-12 Clean up
3514 SP-8 Calling Stub RP (RP'')
3515 SP-4 Previous SP
3516
3517 Top of Frame
3518
3519 SP-0 Stack Pointer (points to next available address)
3520
3521 */
3522
3523 /* This function saves registers as follows. Registers marked with ' are
3524 this function's registers (as opposed to the previous function's).
3525 If a frame_pointer isn't needed, r4 is saved as a general register;
3526 the space for the frame pointer is still allocated, though, to keep
3527 things simple.
3528
3529
3530 Top of Frame
3531
3532 SP (FP') Previous FP
3533 SP + 4 Alignment filler (sigh)
3534 SP + 8 Space for locals reserved here.
3535 .
3536 .
3537 .
3538 SP + n All call saved register used.
3539 .
3540 .
3541 .
3542 SP + o All call saved fp registers used.
3543 .
3544 .
3545 .
3546 SP + p (SP') points to next available address.
3547
3548 */
3549
3550 /* Global variables set by output_function_prologue(). */
3551 /* Size of frame. Need to know this to emit return insns from
3552 leaf procedures. */
3553 static HOST_WIDE_INT actual_fsize, local_fsize;
3554 static int save_fregs;
3555
3556 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3557 Handle case where DISP > 8k by using the add_high_const patterns.
3558
3559 Note in DISP > 8k case, we will leave the high part of the address
3560 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3561
3562 static void
3563 store_reg (int reg, HOST_WIDE_INT disp, int base)
3564 {
3565 rtx dest, src, basereg;
3566 rtx_insn *insn;
3567
3568 src = gen_rtx_REG (word_mode, reg);
3569 basereg = gen_rtx_REG (Pmode, base);
3570 if (VAL_14_BITS_P (disp))
3571 {
3572 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3573 insn = emit_move_insn (dest, src);
3574 }
3575 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3576 {
3577 rtx delta = GEN_INT (disp);
3578 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3579
3580 emit_move_insn (tmpreg, delta);
3581 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3582 if (DO_FRAME_NOTES)
3583 {
3584 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3585 gen_rtx_SET (tmpreg,
3586 gen_rtx_PLUS (Pmode, basereg, delta)));
3587 RTX_FRAME_RELATED_P (insn) = 1;
3588 }
3589 dest = gen_rtx_MEM (word_mode, tmpreg);
3590 insn = emit_move_insn (dest, src);
3591 }
3592 else
3593 {
3594 rtx delta = GEN_INT (disp);
3595 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3596 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3597
3598 emit_move_insn (tmpreg, high);
3599 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3600 insn = emit_move_insn (dest, src);
3601 if (DO_FRAME_NOTES)
3602 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3603 gen_rtx_SET (gen_rtx_MEM (word_mode,
3604 gen_rtx_PLUS (word_mode,
3605 basereg,
3606 delta)),
3607 src));
3608 }
3609
3610 if (DO_FRAME_NOTES)
3611 RTX_FRAME_RELATED_P (insn) = 1;
3612 }
3613
3614 /* Emit RTL to store REG at the memory location specified by BASE and then
3615 add MOD to BASE. MOD must be <= 8k. */
3616
3617 static void
3618 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3619 {
3620 rtx basereg, srcreg, delta;
3621 rtx_insn *insn;
3622
3623 gcc_assert (VAL_14_BITS_P (mod));
3624
3625 basereg = gen_rtx_REG (Pmode, base);
3626 srcreg = gen_rtx_REG (word_mode, reg);
3627 delta = GEN_INT (mod);
3628
3629 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3630 if (DO_FRAME_NOTES)
3631 {
3632 RTX_FRAME_RELATED_P (insn) = 1;
3633
3634 /* RTX_FRAME_RELATED_P must be set on each frame related set
3635 in a parallel with more than one element. */
3636 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3637 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3638 }
3639 }
3640
3641 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3642 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3643 whether to add a frame note or not.
3644
3645 In the DISP > 8k case, we leave the high part of the address in %r1.
3646 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3647
3648 static void
3649 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3650 {
3651 rtx_insn *insn;
3652
3653 if (VAL_14_BITS_P (disp))
3654 {
3655 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3656 plus_constant (Pmode,
3657 gen_rtx_REG (Pmode, base), disp));
3658 }
3659 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3660 {
3661 rtx basereg = gen_rtx_REG (Pmode, base);
3662 rtx delta = GEN_INT (disp);
3663 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3664
3665 emit_move_insn (tmpreg, delta);
3666 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3667 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3668 if (DO_FRAME_NOTES)
3669 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3670 gen_rtx_SET (tmpreg,
3671 gen_rtx_PLUS (Pmode, basereg, delta)));
3672 }
3673 else
3674 {
3675 rtx basereg = gen_rtx_REG (Pmode, base);
3676 rtx delta = GEN_INT (disp);
3677 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3678
3679 emit_move_insn (tmpreg,
3680 gen_rtx_PLUS (Pmode, basereg,
3681 gen_rtx_HIGH (Pmode, delta)));
3682 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3683 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3684 }
3685
3686 if (DO_FRAME_NOTES && note)
3687 RTX_FRAME_RELATED_P (insn) = 1;
3688 }
3689
3690 HOST_WIDE_INT
3691 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3692 {
3693 int freg_saved = 0;
3694 int i, j;
3695
3696 /* The code in pa_expand_prologue and pa_expand_epilogue must
3697 be consistent with the rounding and size calculation done here.
3698 Change them at the same time. */
3699
3700 /* We do our own stack alignment. First, round the size of the
3701 stack locals up to a word boundary. */
3702 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3703
3704 /* Space for previous frame pointer + filler. If any frame is
3705 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3706 waste some space here for the sake of HP compatibility. The
3707 first slot is only used when the frame pointer is needed. */
3708 if (size || frame_pointer_needed)
3709 size += STARTING_FRAME_OFFSET;
3710
3711 /* If the current function calls __builtin_eh_return, then we need
3712 to allocate stack space for registers that will hold data for
3713 the exception handler. */
3714 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3715 {
3716 unsigned int i;
3717
3718 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3719 continue;
3720 size += i * UNITS_PER_WORD;
3721 }
3722
3723 /* Account for space used by the callee general register saves. */
3724 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3725 if (df_regs_ever_live_p (i))
3726 size += UNITS_PER_WORD;
3727
3728 /* Account for space used by the callee floating point register saves. */
3729 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3730 if (df_regs_ever_live_p (i)
3731 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3732 {
3733 freg_saved = 1;
3734
3735 /* We always save both halves of the FP register, so always
3736 increment the frame size by 8 bytes. */
3737 size += 8;
3738 }
3739
3740 /* If any of the floating registers are saved, account for the
3741 alignment needed for the floating point register save block. */
3742 if (freg_saved)
3743 {
3744 size = (size + 7) & ~7;
3745 if (fregs_live)
3746 *fregs_live = 1;
3747 }
3748
3749 /* The various ABIs include space for the outgoing parameters in the
3750 size of the current function's stack frame. We don't need to align
3751 for the outgoing arguments as their alignment is set by the final
3752 rounding for the frame as a whole. */
3753 size += crtl->outgoing_args_size;
3754
3755 /* Allocate space for the fixed frame marker. This space must be
3756 allocated for any function that makes calls or allocates
3757 stack space. */
3758 if (!crtl->is_leaf || size)
3759 size += TARGET_64BIT ? 48 : 32;
3760
3761 /* Finally, round to the preferred stack boundary. */
3762 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3763 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3764 }
3765
3766 /* Generate the assembly code for function entry. FILE is a stdio
3767 stream to output the code to. SIZE is an int: how many units of
3768 temporary storage to allocate.
3769
3770 Refer to the array `regs_ever_live' to determine which registers to
3771 save; `regs_ever_live[I]' is nonzero if register number I is ever
3772 used in the function. This function is responsible for knowing
3773 which registers should not be saved even if used. */
3774
3775 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3776 of memory. If any fpu reg is used in the function, we allocate
3777 such a block here, at the bottom of the frame, just in case it's needed.
3778
3779 If this function is a leaf procedure, then we may choose not
3780 to do a "save" insn. The decision about whether or not
3781 to do this is made in regclass.c. */
3782
3783 static void
3784 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3785 {
3786 /* The function's label and associated .PROC must never be
3787 separated and must be output *after* any profiling declarations
3788 to avoid changing spaces/subspaces within a procedure. */
3789 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3790 fputs ("\t.PROC\n", file);
3791
3792 /* pa_expand_prologue does the dirty work now. We just need
3793 to output the assembler directives which denote the start
3794 of a function. */
3795 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3796 if (crtl->is_leaf)
3797 fputs (",NO_CALLS", file);
3798 else
3799 fputs (",CALLS", file);
3800 if (rp_saved)
3801 fputs (",SAVE_RP", file);
3802
3803 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3804 at the beginning of the frame and that it is used as the frame
3805 pointer for the frame. We do this because our current frame
3806 layout doesn't conform to that specified in the HP runtime
3807 documentation and we need a way to indicate to programs such as
3808 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3809 isn't used by HP compilers but is supported by the assembler.
3810 However, SAVE_SP is supposed to indicate that the previous stack
3811 pointer has been saved in the frame marker. */
3812 if (frame_pointer_needed)
3813 fputs (",SAVE_SP", file);
3814
3815 /* Pass on information about the number of callee register saves
3816 performed in the prologue.
3817
3818 The compiler is supposed to pass the highest register number
3819 saved, the assembler then has to adjust that number before
3820 entering it into the unwind descriptor (to account for any
3821 caller saved registers with lower register numbers than the
3822 first callee saved register). */
3823 if (gr_saved)
3824 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3825
3826 if (fr_saved)
3827 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3828
3829 fputs ("\n\t.ENTRY\n", file);
3830
3831 remove_useless_addtr_insns (0);
3832 }
3833
3834 void
3835 pa_expand_prologue (void)
3836 {
3837 int merge_sp_adjust_with_store = 0;
3838 HOST_WIDE_INT size = get_frame_size ();
3839 HOST_WIDE_INT offset;
3840 int i;
3841 rtx tmpreg;
3842 rtx_insn *insn;
3843
3844 gr_saved = 0;
3845 fr_saved = 0;
3846 save_fregs = 0;
3847
3848 /* Compute total size for frame pointer, filler, locals and rounding to
3849 the next word boundary. Similar code appears in pa_compute_frame_size
3850 and must be changed in tandem with this code. */
3851 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3852 if (local_fsize || frame_pointer_needed)
3853 local_fsize += STARTING_FRAME_OFFSET;
3854
3855 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3856 if (flag_stack_usage_info)
3857 current_function_static_stack_size = actual_fsize;
3858
3859 /* Compute a few things we will use often. */
3860 tmpreg = gen_rtx_REG (word_mode, 1);
3861
3862 /* Save RP first. The calling conventions manual states RP will
3863 always be stored into the caller's frame at sp - 20 or sp - 16
3864 depending on which ABI is in use. */
3865 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3866 {
3867 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3868 rp_saved = true;
3869 }
3870 else
3871 rp_saved = false;
3872
3873 /* Allocate the local frame and set up the frame pointer if needed. */
3874 if (actual_fsize != 0)
3875 {
3876 if (frame_pointer_needed)
3877 {
3878 /* Copy the old frame pointer temporarily into %r1. Set up the
3879 new stack pointer, then store away the saved old frame pointer
3880 into the stack at sp and at the same time update the stack
3881 pointer by actual_fsize bytes. Two versions, first
3882 handles small (<8k) frames. The second handles large (>=8k)
3883 frames. */
3884 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3885 if (DO_FRAME_NOTES)
3886 RTX_FRAME_RELATED_P (insn) = 1;
3887
3888 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3889 if (DO_FRAME_NOTES)
3890 RTX_FRAME_RELATED_P (insn) = 1;
3891
3892 if (VAL_14_BITS_P (actual_fsize))
3893 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3894 else
3895 {
3896 /* It is incorrect to store the saved frame pointer at *sp,
3897 then increment sp (writes beyond the current stack boundary).
3898
3899 So instead use stwm to store at *sp and post-increment the
3900 stack pointer as an atomic operation. Then increment sp to
3901 finish allocating the new frame. */
3902 HOST_WIDE_INT adjust1 = 8192 - 64;
3903 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3904
3905 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3906 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3907 adjust2, 1);
3908 }
3909
3910 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3911 we need to store the previous stack pointer (frame pointer)
3912 into the frame marker on targets that use the HP unwind
3913 library. This allows the HP unwind library to be used to
3914 unwind GCC frames. However, we are not fully compatible
3915 with the HP library because our frame layout differs from
3916 that specified in the HP runtime specification.
3917
3918 We don't want a frame note on this instruction as the frame
3919 marker moves during dynamic stack allocation.
3920
3921 This instruction also serves as a blockage to prevent
3922 register spills from being scheduled before the stack
3923 pointer is raised. This is necessary as we store
3924 registers using the frame pointer as a base register,
3925 and the frame pointer is set before sp is raised. */
3926 if (TARGET_HPUX_UNWIND_LIBRARY)
3927 {
3928 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3929 GEN_INT (TARGET_64BIT ? -8 : -4));
3930
3931 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3932 hard_frame_pointer_rtx);
3933 }
3934 else
3935 emit_insn (gen_blockage ());
3936 }
3937 /* no frame pointer needed. */
3938 else
3939 {
3940 /* In some cases we can perform the first callee register save
3941 and allocating the stack frame at the same time. If so, just
3942 make a note of it and defer allocating the frame until saving
3943 the callee registers. */
3944 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3945 merge_sp_adjust_with_store = 1;
3946 /* Can not optimize. Adjust the stack frame by actual_fsize
3947 bytes. */
3948 else
3949 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3950 actual_fsize, 1);
3951 }
3952 }
3953
3954 /* Normal register save.
3955
3956 Do not save the frame pointer in the frame_pointer_needed case. It
3957 was done earlier. */
3958 if (frame_pointer_needed)
3959 {
3960 offset = local_fsize;
3961
3962 /* Saving the EH return data registers in the frame is the simplest
3963 way to get the frame unwind information emitted. We put them
3964 just before the general registers. */
3965 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3966 {
3967 unsigned int i, regno;
3968
3969 for (i = 0; ; ++i)
3970 {
3971 regno = EH_RETURN_DATA_REGNO (i);
3972 if (regno == INVALID_REGNUM)
3973 break;
3974
3975 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3976 offset += UNITS_PER_WORD;
3977 }
3978 }
3979
3980 for (i = 18; i >= 4; i--)
3981 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3982 {
3983 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3984 offset += UNITS_PER_WORD;
3985 gr_saved++;
3986 }
3987 /* Account for %r3 which is saved in a special place. */
3988 gr_saved++;
3989 }
3990 /* No frame pointer needed. */
3991 else
3992 {
3993 offset = local_fsize - actual_fsize;
3994
3995 /* Saving the EH return data registers in the frame is the simplest
3996 way to get the frame unwind information emitted. */
3997 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3998 {
3999 unsigned int i, regno;
4000
4001 for (i = 0; ; ++i)
4002 {
4003 regno = EH_RETURN_DATA_REGNO (i);
4004 if (regno == INVALID_REGNUM)
4005 break;
4006
4007 /* If merge_sp_adjust_with_store is nonzero, then we can
4008 optimize the first save. */
4009 if (merge_sp_adjust_with_store)
4010 {
4011 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4012 merge_sp_adjust_with_store = 0;
4013 }
4014 else
4015 store_reg (regno, offset, STACK_POINTER_REGNUM);
4016 offset += UNITS_PER_WORD;
4017 }
4018 }
4019
4020 for (i = 18; i >= 3; i--)
4021 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4022 {
4023 /* If merge_sp_adjust_with_store is nonzero, then we can
4024 optimize the first GR save. */
4025 if (merge_sp_adjust_with_store)
4026 {
4027 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4028 merge_sp_adjust_with_store = 0;
4029 }
4030 else
4031 store_reg (i, offset, STACK_POINTER_REGNUM);
4032 offset += UNITS_PER_WORD;
4033 gr_saved++;
4034 }
4035
4036 /* If we wanted to merge the SP adjustment with a GR save, but we never
4037 did any GR saves, then just emit the adjustment here. */
4038 if (merge_sp_adjust_with_store)
4039 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4040 actual_fsize, 1);
4041 }
4042
4043 /* The hppa calling conventions say that %r19, the pic offset
4044 register, is saved at sp - 32 (in this function's frame)
4045 when generating PIC code. FIXME: What is the correct thing
4046 to do for functions which make no calls and allocate no
4047 frame? Do we need to allocate a frame, or can we just omit
4048 the save? For now we'll just omit the save.
4049
4050 We don't want a note on this insn as the frame marker can
4051 move if there is a dynamic stack allocation. */
4052 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4053 {
4054 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4055
4056 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4057
4058 }
4059
4060 /* Align pointer properly (doubleword boundary). */
4061 offset = (offset + 7) & ~7;
4062
4063 /* Floating point register store. */
4064 if (save_fregs)
4065 {
4066 rtx base;
4067
4068 /* First get the frame or stack pointer to the start of the FP register
4069 save area. */
4070 if (frame_pointer_needed)
4071 {
4072 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4073 base = hard_frame_pointer_rtx;
4074 }
4075 else
4076 {
4077 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4078 base = stack_pointer_rtx;
4079 }
4080
4081 /* Now actually save the FP registers. */
4082 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4083 {
4084 if (df_regs_ever_live_p (i)
4085 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4086 {
4087 rtx addr, reg;
4088 rtx_insn *insn;
4089 addr = gen_rtx_MEM (DFmode,
4090 gen_rtx_POST_INC (word_mode, tmpreg));
4091 reg = gen_rtx_REG (DFmode, i);
4092 insn = emit_move_insn (addr, reg);
4093 if (DO_FRAME_NOTES)
4094 {
4095 RTX_FRAME_RELATED_P (insn) = 1;
4096 if (TARGET_64BIT)
4097 {
4098 rtx mem = gen_rtx_MEM (DFmode,
4099 plus_constant (Pmode, base,
4100 offset));
4101 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4102 gen_rtx_SET (mem, reg));
4103 }
4104 else
4105 {
4106 rtx meml = gen_rtx_MEM (SFmode,
4107 plus_constant (Pmode, base,
4108 offset));
4109 rtx memr = gen_rtx_MEM (SFmode,
4110 plus_constant (Pmode, base,
4111 offset + 4));
4112 rtx regl = gen_rtx_REG (SFmode, i);
4113 rtx regr = gen_rtx_REG (SFmode, i + 1);
4114 rtx setl = gen_rtx_SET (meml, regl);
4115 rtx setr = gen_rtx_SET (memr, regr);
4116 rtvec vec;
4117
4118 RTX_FRAME_RELATED_P (setl) = 1;
4119 RTX_FRAME_RELATED_P (setr) = 1;
4120 vec = gen_rtvec (2, setl, setr);
4121 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4122 gen_rtx_SEQUENCE (VOIDmode, vec));
4123 }
4124 }
4125 offset += GET_MODE_SIZE (DFmode);
4126 fr_saved++;
4127 }
4128 }
4129 }
4130 }
4131
4132 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4133 Handle case where DISP > 8k by using the add_high_const patterns. */
4134
4135 static void
4136 load_reg (int reg, HOST_WIDE_INT disp, int base)
4137 {
4138 rtx dest = gen_rtx_REG (word_mode, reg);
4139 rtx basereg = gen_rtx_REG (Pmode, base);
4140 rtx src;
4141
4142 if (VAL_14_BITS_P (disp))
4143 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4144 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4145 {
4146 rtx delta = GEN_INT (disp);
4147 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4148
4149 emit_move_insn (tmpreg, delta);
4150 if (TARGET_DISABLE_INDEXING)
4151 {
4152 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4153 src = gen_rtx_MEM (word_mode, tmpreg);
4154 }
4155 else
4156 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4157 }
4158 else
4159 {
4160 rtx delta = GEN_INT (disp);
4161 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4162 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4163
4164 emit_move_insn (tmpreg, high);
4165 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4166 }
4167
4168 emit_move_insn (dest, src);
4169 }
4170
4171 /* Update the total code bytes output to the text section. */
4172
4173 static void
4174 update_total_code_bytes (unsigned int nbytes)
4175 {
4176 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4177 && !IN_NAMED_SECTION_P (cfun->decl))
4178 {
4179 unsigned int old_total = total_code_bytes;
4180
4181 total_code_bytes += nbytes;
4182
4183 /* Be prepared to handle overflows. */
4184 if (old_total > total_code_bytes)
4185 total_code_bytes = UINT_MAX;
4186 }
4187 }
4188
4189 /* This function generates the assembly code for function exit.
4190 Args are as for output_function_prologue ().
4191
4192 The function epilogue should not depend on the current stack
4193 pointer! It should use the frame pointer only. This is mandatory
4194 because of alloca; we also take advantage of it to omit stack
4195 adjustments before returning. */
4196
4197 static void
4198 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4199 {
4200 rtx_insn *insn = get_last_insn ();
4201 bool extra_nop;
4202
4203 /* pa_expand_epilogue does the dirty work now. We just need
4204 to output the assembler directives which denote the end
4205 of a function.
4206
4207 To make debuggers happy, emit a nop if the epilogue was completely
4208 eliminated due to a volatile call as the last insn in the
4209 current function. That way the return address (in %r2) will
4210 always point to a valid instruction in the current function. */
4211
4212 /* Get the last real insn. */
4213 if (NOTE_P (insn))
4214 insn = prev_real_insn (insn);
4215
4216 /* If it is a sequence, then look inside. */
4217 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4218 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4219
4220 /* If insn is a CALL_INSN, then it must be a call to a volatile
4221 function (otherwise there would be epilogue insns). */
4222 if (insn && CALL_P (insn))
4223 {
4224 fputs ("\tnop\n", file);
4225 extra_nop = true;
4226 }
4227 else
4228 extra_nop = false;
4229
4230 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4231
4232 if (TARGET_SOM && TARGET_GAS)
4233 {
4234 /* We are done with this subspace except possibly for some additional
4235 debug information. Forget that we are in this subspace to ensure
4236 that the next function is output in its own subspace. */
4237 in_section = NULL;
4238 cfun->machine->in_nsubspa = 2;
4239 }
4240
4241 /* Thunks do their own insn accounting. */
4242 if (cfun->is_thunk)
4243 return;
4244
4245 if (INSN_ADDRESSES_SET_P ())
4246 {
4247 last_address = extra_nop ? 4 : 0;
4248 insn = get_last_nonnote_insn ();
4249 if (insn)
4250 {
4251 last_address += INSN_ADDRESSES (INSN_UID (insn));
4252 if (INSN_P (insn))
4253 last_address += insn_default_length (insn);
4254 }
4255 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4256 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4257 }
4258 else
4259 last_address = UINT_MAX;
4260
4261 /* Finally, update the total number of code bytes output so far. */
4262 update_total_code_bytes (last_address);
4263 }
4264
4265 void
4266 pa_expand_epilogue (void)
4267 {
4268 rtx tmpreg;
4269 HOST_WIDE_INT offset;
4270 HOST_WIDE_INT ret_off = 0;
4271 int i;
4272 int merge_sp_adjust_with_load = 0;
4273
4274 /* We will use this often. */
4275 tmpreg = gen_rtx_REG (word_mode, 1);
4276
4277 /* Try to restore RP early to avoid load/use interlocks when
4278 RP gets used in the return (bv) instruction. This appears to still
4279 be necessary even when we schedule the prologue and epilogue. */
4280 if (rp_saved)
4281 {
4282 ret_off = TARGET_64BIT ? -16 : -20;
4283 if (frame_pointer_needed)
4284 {
4285 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4286 ret_off = 0;
4287 }
4288 else
4289 {
4290 /* No frame pointer, and stack is smaller than 8k. */
4291 if (VAL_14_BITS_P (ret_off - actual_fsize))
4292 {
4293 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4294 ret_off = 0;
4295 }
4296 }
4297 }
4298
4299 /* General register restores. */
4300 if (frame_pointer_needed)
4301 {
4302 offset = local_fsize;
4303
4304 /* If the current function calls __builtin_eh_return, then we need
4305 to restore the saved EH data registers. */
4306 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4307 {
4308 unsigned int i, regno;
4309
4310 for (i = 0; ; ++i)
4311 {
4312 regno = EH_RETURN_DATA_REGNO (i);
4313 if (regno == INVALID_REGNUM)
4314 break;
4315
4316 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4317 offset += UNITS_PER_WORD;
4318 }
4319 }
4320
4321 for (i = 18; i >= 4; i--)
4322 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4323 {
4324 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4325 offset += UNITS_PER_WORD;
4326 }
4327 }
4328 else
4329 {
4330 offset = local_fsize - actual_fsize;
4331
4332 /* If the current function calls __builtin_eh_return, then we need
4333 to restore the saved EH data registers. */
4334 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4335 {
4336 unsigned int i, regno;
4337
4338 for (i = 0; ; ++i)
4339 {
4340 regno = EH_RETURN_DATA_REGNO (i);
4341 if (regno == INVALID_REGNUM)
4342 break;
4343
4344 /* Only for the first load.
4345 merge_sp_adjust_with_load holds the register load
4346 with which we will merge the sp adjustment. */
4347 if (merge_sp_adjust_with_load == 0
4348 && local_fsize == 0
4349 && VAL_14_BITS_P (-actual_fsize))
4350 merge_sp_adjust_with_load = regno;
4351 else
4352 load_reg (regno, offset, STACK_POINTER_REGNUM);
4353 offset += UNITS_PER_WORD;
4354 }
4355 }
4356
4357 for (i = 18; i >= 3; i--)
4358 {
4359 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4360 {
4361 /* Only for the first load.
4362 merge_sp_adjust_with_load holds the register load
4363 with which we will merge the sp adjustment. */
4364 if (merge_sp_adjust_with_load == 0
4365 && local_fsize == 0
4366 && VAL_14_BITS_P (-actual_fsize))
4367 merge_sp_adjust_with_load = i;
4368 else
4369 load_reg (i, offset, STACK_POINTER_REGNUM);
4370 offset += UNITS_PER_WORD;
4371 }
4372 }
4373 }
4374
4375 /* Align pointer properly (doubleword boundary). */
4376 offset = (offset + 7) & ~7;
4377
4378 /* FP register restores. */
4379 if (save_fregs)
4380 {
4381 /* Adjust the register to index off of. */
4382 if (frame_pointer_needed)
4383 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4384 else
4385 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4386
4387 /* Actually do the restores now. */
4388 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4389 if (df_regs_ever_live_p (i)
4390 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4391 {
4392 rtx src = gen_rtx_MEM (DFmode,
4393 gen_rtx_POST_INC (word_mode, tmpreg));
4394 rtx dest = gen_rtx_REG (DFmode, i);
4395 emit_move_insn (dest, src);
4396 }
4397 }
4398
4399 /* Emit a blockage insn here to keep these insns from being moved to
4400 an earlier spot in the epilogue, or into the main instruction stream.
4401
4402 This is necessary as we must not cut the stack back before all the
4403 restores are finished. */
4404 emit_insn (gen_blockage ());
4405
4406 /* Reset stack pointer (and possibly frame pointer). The stack
4407 pointer is initially set to fp + 64 to avoid a race condition. */
4408 if (frame_pointer_needed)
4409 {
4410 rtx delta = GEN_INT (-64);
4411
4412 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4413 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4414 stack_pointer_rtx, delta));
4415 }
4416 /* If we were deferring a callee register restore, do it now. */
4417 else if (merge_sp_adjust_with_load)
4418 {
4419 rtx delta = GEN_INT (-actual_fsize);
4420 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4421
4422 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4423 }
4424 else if (actual_fsize != 0)
4425 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4426 - actual_fsize, 0);
4427
4428 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4429 frame greater than 8k), do so now. */
4430 if (ret_off != 0)
4431 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4432
4433 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4434 {
4435 rtx sa = EH_RETURN_STACKADJ_RTX;
4436
4437 emit_insn (gen_blockage ());
4438 emit_insn (TARGET_64BIT
4439 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4440 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4441 }
4442 }
4443
4444 bool
4445 pa_can_use_return_insn (void)
4446 {
4447 if (!reload_completed)
4448 return false;
4449
4450 if (frame_pointer_needed)
4451 return false;
4452
4453 if (df_regs_ever_live_p (2))
4454 return false;
4455
4456 if (crtl->profile)
4457 return false;
4458
4459 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4460 }
4461
4462 rtx
4463 hppa_pic_save_rtx (void)
4464 {
4465 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4466 }
4467
4468 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4469 #define NO_DEFERRED_PROFILE_COUNTERS 0
4470 #endif
4471
4472
4473 /* Vector of funcdef numbers. */
4474 static vec<int> funcdef_nos;
4475
4476 /* Output deferred profile counters. */
4477 static void
4478 output_deferred_profile_counters (void)
4479 {
4480 unsigned int i;
4481 int align, n;
4482
4483 if (funcdef_nos.is_empty ())
4484 return;
4485
4486 switch_to_section (data_section);
4487 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4488 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4489
4490 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4491 {
4492 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4493 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4494 }
4495
4496 funcdef_nos.release ();
4497 }
4498
4499 void
4500 hppa_profile_hook (int label_no)
4501 {
4502 /* We use SImode for the address of the function in both 32 and
4503 64-bit code to avoid having to provide DImode versions of the
4504 lcla2 and load_offset_label_address insn patterns. */
4505 rtx reg = gen_reg_rtx (SImode);
4506 rtx_code_label *label_rtx = gen_label_rtx ();
4507 rtx begin_label_rtx;
4508 rtx_insn *call_insn;
4509 char begin_label_name[16];
4510
4511 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4512 label_no);
4513 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4514
4515 if (TARGET_64BIT)
4516 emit_move_insn (arg_pointer_rtx,
4517 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4518 GEN_INT (64)));
4519
4520 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4521
4522 /* The address of the function is loaded into %r25 with an instruction-
4523 relative sequence that avoids the use of relocations. The sequence
4524 is split so that the load_offset_label_address instruction can
4525 occupy the delay slot of the call to _mcount. */
4526 if (TARGET_PA_20)
4527 emit_insn (gen_lcla2 (reg, label_rtx));
4528 else
4529 emit_insn (gen_lcla1 (reg, label_rtx));
4530
4531 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4532 reg, begin_label_rtx, label_rtx));
4533
4534 #if !NO_DEFERRED_PROFILE_COUNTERS
4535 {
4536 rtx count_label_rtx, addr, r24;
4537 char count_label_name[16];
4538
4539 funcdef_nos.safe_push (label_no);
4540 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4541 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4542
4543 addr = force_reg (Pmode, count_label_rtx);
4544 r24 = gen_rtx_REG (Pmode, 24);
4545 emit_move_insn (r24, addr);
4546
4547 call_insn =
4548 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4549 gen_rtx_SYMBOL_REF (Pmode,
4550 "_mcount")),
4551 GEN_INT (TARGET_64BIT ? 24 : 12)));
4552
4553 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4554 }
4555 #else
4556
4557 call_insn =
4558 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4559 gen_rtx_SYMBOL_REF (Pmode,
4560 "_mcount")),
4561 GEN_INT (TARGET_64BIT ? 16 : 8)));
4562
4563 #endif
4564
4565 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4566 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4567
4568 /* Indicate the _mcount call cannot throw, nor will it execute a
4569 non-local goto. */
4570 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4571 }
4572
4573 /* Fetch the return address for the frame COUNT steps up from
4574 the current frame, after the prologue. FRAMEADDR is the
4575 frame pointer of the COUNT frame.
4576
4577 We want to ignore any export stub remnants here. To handle this,
4578 we examine the code at the return address, and if it is an export
4579 stub, we return a memory rtx for the stub return address stored
4580 at frame-24.
4581
4582 The value returned is used in two different ways:
4583
4584 1. To find a function's caller.
4585
4586 2. To change the return address for a function.
4587
4588 This function handles most instances of case 1; however, it will
4589 fail if there are two levels of stubs to execute on the return
4590 path. The only way I believe that can happen is if the return value
4591 needs a parameter relocation, which never happens for C code.
4592
4593 This function handles most instances of case 2; however, it will
4594 fail if we did not originally have stub code on the return path
4595 but will need stub code on the new return path. This can happen if
4596 the caller & callee are both in the main program, but the new
4597 return location is in a shared library. */
4598
4599 rtx
4600 pa_return_addr_rtx (int count, rtx frameaddr)
4601 {
4602 rtx label;
4603 rtx rp;
4604 rtx saved_rp;
4605 rtx ins;
4606
4607 /* The instruction stream at the return address of a PA1.X export stub is:
4608
4609 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4610 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4611 0x00011820 | stub+16: mtsp r1,sr0
4612 0xe0400002 | stub+20: be,n 0(sr0,rp)
4613
4614 0xe0400002 must be specified as -532676606 so that it won't be
4615 rejected as an invalid immediate operand on 64-bit hosts.
4616
4617 The instruction stream at the return address of a PA2.0 export stub is:
4618
4619 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4620 0xe840d002 | stub+12: bve,n (rp)
4621 */
4622
4623 HOST_WIDE_INT insns[4];
4624 int i, len;
4625
4626 if (count != 0)
4627 return NULL_RTX;
4628
4629 rp = get_hard_reg_initial_val (Pmode, 2);
4630
4631 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4632 return rp;
4633
4634 /* If there is no export stub then just use the value saved from
4635 the return pointer register. */
4636
4637 saved_rp = gen_reg_rtx (Pmode);
4638 emit_move_insn (saved_rp, rp);
4639
4640 /* Get pointer to the instruction stream. We have to mask out the
4641 privilege level from the two low order bits of the return address
4642 pointer here so that ins will point to the start of the first
4643 instruction that would have been executed if we returned. */
4644 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4645 label = gen_label_rtx ();
4646
4647 if (TARGET_PA_20)
4648 {
4649 insns[0] = 0x4bc23fd1;
4650 insns[1] = -398405630;
4651 len = 2;
4652 }
4653 else
4654 {
4655 insns[0] = 0x4bc23fd1;
4656 insns[1] = 0x004010a1;
4657 insns[2] = 0x00011820;
4658 insns[3] = -532676606;
4659 len = 4;
4660 }
4661
4662 /* Check the instruction stream at the normal return address for the
4663 export stub. If it is an export stub, than our return address is
4664 really in -24[frameaddr]. */
4665
4666 for (i = 0; i < len; i++)
4667 {
4668 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4669 rtx op1 = GEN_INT (insns[i]);
4670 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4671 }
4672
4673 /* Here we know that our return address points to an export
4674 stub. We don't want to return the address of the export stub,
4675 but rather the return address of the export stub. That return
4676 address is stored at -24[frameaddr]. */
4677
4678 emit_move_insn (saved_rp,
4679 gen_rtx_MEM (Pmode,
4680 memory_address (Pmode,
4681 plus_constant (Pmode, frameaddr,
4682 -24))));
4683
4684 emit_label (label);
4685
4686 return saved_rp;
4687 }
4688
4689 void
4690 pa_emit_bcond_fp (rtx operands[])
4691 {
4692 enum rtx_code code = GET_CODE (operands[0]);
4693 rtx operand0 = operands[1];
4694 rtx operand1 = operands[2];
4695 rtx label = operands[3];
4696
4697 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4698 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4699
4700 emit_jump_insn (gen_rtx_SET (pc_rtx,
4701 gen_rtx_IF_THEN_ELSE (VOIDmode,
4702 gen_rtx_fmt_ee (NE,
4703 VOIDmode,
4704 gen_rtx_REG (CCFPmode, 0),
4705 const0_rtx),
4706 gen_rtx_LABEL_REF (VOIDmode, label),
4707 pc_rtx)));
4708
4709 }
4710
4711 /* Adjust the cost of a scheduling dependency. Return the new cost of
4712 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4713
4714 static int
4715 pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4716 {
4717 enum attr_type attr_type;
4718
4719 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4720 true dependencies as they are described with bypasses now. */
4721 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4722 return cost;
4723
4724 if (! recog_memoized (insn))
4725 return 0;
4726
4727 attr_type = get_attr_type (insn);
4728
4729 switch (REG_NOTE_KIND (link))
4730 {
4731 case REG_DEP_ANTI:
4732 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4733 cycles later. */
4734
4735 if (attr_type == TYPE_FPLOAD)
4736 {
4737 rtx pat = PATTERN (insn);
4738 rtx dep_pat = PATTERN (dep_insn);
4739 if (GET_CODE (pat) == PARALLEL)
4740 {
4741 /* This happens for the fldXs,mb patterns. */
4742 pat = XVECEXP (pat, 0, 0);
4743 }
4744 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4745 /* If this happens, we have to extend this to schedule
4746 optimally. Return 0 for now. */
4747 return 0;
4748
4749 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4750 {
4751 if (! recog_memoized (dep_insn))
4752 return 0;
4753 switch (get_attr_type (dep_insn))
4754 {
4755 case TYPE_FPALU:
4756 case TYPE_FPMULSGL:
4757 case TYPE_FPMULDBL:
4758 case TYPE_FPDIVSGL:
4759 case TYPE_FPDIVDBL:
4760 case TYPE_FPSQRTSGL:
4761 case TYPE_FPSQRTDBL:
4762 /* A fpload can't be issued until one cycle before a
4763 preceding arithmetic operation has finished if
4764 the target of the fpload is any of the sources
4765 (or destination) of the arithmetic operation. */
4766 return insn_default_latency (dep_insn) - 1;
4767
4768 default:
4769 return 0;
4770 }
4771 }
4772 }
4773 else if (attr_type == TYPE_FPALU)
4774 {
4775 rtx pat = PATTERN (insn);
4776 rtx dep_pat = PATTERN (dep_insn);
4777 if (GET_CODE (pat) == PARALLEL)
4778 {
4779 /* This happens for the fldXs,mb patterns. */
4780 pat = XVECEXP (pat, 0, 0);
4781 }
4782 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4783 /* If this happens, we have to extend this to schedule
4784 optimally. Return 0 for now. */
4785 return 0;
4786
4787 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4788 {
4789 if (! recog_memoized (dep_insn))
4790 return 0;
4791 switch (get_attr_type (dep_insn))
4792 {
4793 case TYPE_FPDIVSGL:
4794 case TYPE_FPDIVDBL:
4795 case TYPE_FPSQRTSGL:
4796 case TYPE_FPSQRTDBL:
4797 /* An ALU flop can't be issued until two cycles before a
4798 preceding divide or sqrt operation has finished if
4799 the target of the ALU flop is any of the sources
4800 (or destination) of the divide or sqrt operation. */
4801 return insn_default_latency (dep_insn) - 2;
4802
4803 default:
4804 return 0;
4805 }
4806 }
4807 }
4808
4809 /* For other anti dependencies, the cost is 0. */
4810 return 0;
4811
4812 case REG_DEP_OUTPUT:
4813 /* Output dependency; DEP_INSN writes a register that INSN writes some
4814 cycles later. */
4815 if (attr_type == TYPE_FPLOAD)
4816 {
4817 rtx pat = PATTERN (insn);
4818 rtx dep_pat = PATTERN (dep_insn);
4819 if (GET_CODE (pat) == PARALLEL)
4820 {
4821 /* This happens for the fldXs,mb patterns. */
4822 pat = XVECEXP (pat, 0, 0);
4823 }
4824 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4825 /* If this happens, we have to extend this to schedule
4826 optimally. Return 0 for now. */
4827 return 0;
4828
4829 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4830 {
4831 if (! recog_memoized (dep_insn))
4832 return 0;
4833 switch (get_attr_type (dep_insn))
4834 {
4835 case TYPE_FPALU:
4836 case TYPE_FPMULSGL:
4837 case TYPE_FPMULDBL:
4838 case TYPE_FPDIVSGL:
4839 case TYPE_FPDIVDBL:
4840 case TYPE_FPSQRTSGL:
4841 case TYPE_FPSQRTDBL:
4842 /* A fpload can't be issued until one cycle before a
4843 preceding arithmetic operation has finished if
4844 the target of the fpload is the destination of the
4845 arithmetic operation.
4846
4847 Exception: For PA7100LC, PA7200 and PA7300, the cost
4848 is 3 cycles, unless they bundle together. We also
4849 pay the penalty if the second insn is a fpload. */
4850 return insn_default_latency (dep_insn) - 1;
4851
4852 default:
4853 return 0;
4854 }
4855 }
4856 }
4857 else if (attr_type == TYPE_FPALU)
4858 {
4859 rtx pat = PATTERN (insn);
4860 rtx dep_pat = PATTERN (dep_insn);
4861 if (GET_CODE (pat) == PARALLEL)
4862 {
4863 /* This happens for the fldXs,mb patterns. */
4864 pat = XVECEXP (pat, 0, 0);
4865 }
4866 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4867 /* If this happens, we have to extend this to schedule
4868 optimally. Return 0 for now. */
4869 return 0;
4870
4871 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4872 {
4873 if (! recog_memoized (dep_insn))
4874 return 0;
4875 switch (get_attr_type (dep_insn))
4876 {
4877 case TYPE_FPDIVSGL:
4878 case TYPE_FPDIVDBL:
4879 case TYPE_FPSQRTSGL:
4880 case TYPE_FPSQRTDBL:
4881 /* An ALU flop can't be issued until two cycles before a
4882 preceding divide or sqrt operation has finished if
4883 the target of the ALU flop is also the target of
4884 the divide or sqrt operation. */
4885 return insn_default_latency (dep_insn) - 2;
4886
4887 default:
4888 return 0;
4889 }
4890 }
4891 }
4892
4893 /* For other output dependencies, the cost is 0. */
4894 return 0;
4895
4896 default:
4897 gcc_unreachable ();
4898 }
4899 }
4900
4901 /* Adjust scheduling priorities. We use this to try and keep addil
4902 and the next use of %r1 close together. */
4903 static int
4904 pa_adjust_priority (rtx_insn *insn, int priority)
4905 {
4906 rtx set = single_set (insn);
4907 rtx src, dest;
4908 if (set)
4909 {
4910 src = SET_SRC (set);
4911 dest = SET_DEST (set);
4912 if (GET_CODE (src) == LO_SUM
4913 && symbolic_operand (XEXP (src, 1), VOIDmode)
4914 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4915 priority >>= 3;
4916
4917 else if (GET_CODE (src) == MEM
4918 && GET_CODE (XEXP (src, 0)) == LO_SUM
4919 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4920 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4921 priority >>= 1;
4922
4923 else if (GET_CODE (dest) == MEM
4924 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4925 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4926 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4927 priority >>= 3;
4928 }
4929 return priority;
4930 }
4931
4932 /* The 700 can only issue a single insn at a time.
4933 The 7XXX processors can issue two insns at a time.
4934 The 8000 can issue 4 insns at a time. */
4935 static int
4936 pa_issue_rate (void)
4937 {
4938 switch (pa_cpu)
4939 {
4940 case PROCESSOR_700: return 1;
4941 case PROCESSOR_7100: return 2;
4942 case PROCESSOR_7100LC: return 2;
4943 case PROCESSOR_7200: return 2;
4944 case PROCESSOR_7300: return 2;
4945 case PROCESSOR_8000: return 4;
4946
4947 default:
4948 gcc_unreachable ();
4949 }
4950 }
4951
4952
4953
4954 /* Return any length plus adjustment needed by INSN which already has
4955 its length computed as LENGTH. Return LENGTH if no adjustment is
4956 necessary.
4957
4958 Also compute the length of an inline block move here as it is too
4959 complicated to express as a length attribute in pa.md. */
4960 int
4961 pa_adjust_insn_length (rtx_insn *insn, int length)
4962 {
4963 rtx pat = PATTERN (insn);
4964
4965 /* If length is negative or undefined, provide initial length. */
4966 if ((unsigned int) length >= INT_MAX)
4967 {
4968 if (GET_CODE (pat) == SEQUENCE)
4969 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
4970
4971 switch (get_attr_type (insn))
4972 {
4973 case TYPE_MILLI:
4974 length = pa_attr_length_millicode_call (insn);
4975 break;
4976 case TYPE_CALL:
4977 length = pa_attr_length_call (insn, 0);
4978 break;
4979 case TYPE_SIBCALL:
4980 length = pa_attr_length_call (insn, 1);
4981 break;
4982 case TYPE_DYNCALL:
4983 length = pa_attr_length_indirect_call (insn);
4984 break;
4985 case TYPE_SH_FUNC_ADRS:
4986 length = pa_attr_length_millicode_call (insn) + 20;
4987 break;
4988 default:
4989 gcc_unreachable ();
4990 }
4991 }
4992
4993 /* Block move pattern. */
4994 if (NONJUMP_INSN_P (insn)
4995 && GET_CODE (pat) == PARALLEL
4996 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4997 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4998 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4999 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5000 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5001 length += compute_movmem_length (insn) - 4;
5002 /* Block clear pattern. */
5003 else if (NONJUMP_INSN_P (insn)
5004 && GET_CODE (pat) == PARALLEL
5005 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5006 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5007 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5008 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5009 length += compute_clrmem_length (insn) - 4;
5010 /* Conditional branch with an unfilled delay slot. */
5011 else if (JUMP_P (insn) && ! simplejump_p (insn))
5012 {
5013 /* Adjust a short backwards conditional with an unfilled delay slot. */
5014 if (GET_CODE (pat) == SET
5015 && length == 4
5016 && JUMP_LABEL (insn) != NULL_RTX
5017 && ! forward_branch_p (insn))
5018 length += 4;
5019 else if (GET_CODE (pat) == PARALLEL
5020 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5021 && length == 4)
5022 length += 4;
5023 /* Adjust dbra insn with short backwards conditional branch with
5024 unfilled delay slot -- only for case where counter is in a
5025 general register register. */
5026 else if (GET_CODE (pat) == PARALLEL
5027 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5028 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5029 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5030 && length == 4
5031 && ! forward_branch_p (insn))
5032 length += 4;
5033 }
5034 return length;
5035 }
5036
5037 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5038
5039 static bool
5040 pa_print_operand_punct_valid_p (unsigned char code)
5041 {
5042 if (code == '@'
5043 || code == '#'
5044 || code == '*'
5045 || code == '^')
5046 return true;
5047
5048 return false;
5049 }
5050
5051 /* Print operand X (an rtx) in assembler syntax to file FILE.
5052 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5053 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5054
5055 void
5056 pa_print_operand (FILE *file, rtx x, int code)
5057 {
5058 switch (code)
5059 {
5060 case '#':
5061 /* Output a 'nop' if there's nothing for the delay slot. */
5062 if (dbr_sequence_length () == 0)
5063 fputs ("\n\tnop", file);
5064 return;
5065 case '*':
5066 /* Output a nullification completer if there's nothing for the */
5067 /* delay slot or nullification is requested. */
5068 if (dbr_sequence_length () == 0 ||
5069 (final_sequence &&
5070 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5071 fputs (",n", file);
5072 return;
5073 case 'R':
5074 /* Print out the second register name of a register pair.
5075 I.e., R (6) => 7. */
5076 fputs (reg_names[REGNO (x) + 1], file);
5077 return;
5078 case 'r':
5079 /* A register or zero. */
5080 if (x == const0_rtx
5081 || (x == CONST0_RTX (DFmode))
5082 || (x == CONST0_RTX (SFmode)))
5083 {
5084 fputs ("%r0", file);
5085 return;
5086 }
5087 else
5088 break;
5089 case 'f':
5090 /* A register or zero (floating point). */
5091 if (x == const0_rtx
5092 || (x == CONST0_RTX (DFmode))
5093 || (x == CONST0_RTX (SFmode)))
5094 {
5095 fputs ("%fr0", file);
5096 return;
5097 }
5098 else
5099 break;
5100 case 'A':
5101 {
5102 rtx xoperands[2];
5103
5104 xoperands[0] = XEXP (XEXP (x, 0), 0);
5105 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5106 pa_output_global_address (file, xoperands[1], 0);
5107 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5108 return;
5109 }
5110
5111 case 'C': /* Plain (C)ondition */
5112 case 'X':
5113 switch (GET_CODE (x))
5114 {
5115 case EQ:
5116 fputs ("=", file); break;
5117 case NE:
5118 fputs ("<>", file); break;
5119 case GT:
5120 fputs (">", file); break;
5121 case GE:
5122 fputs (">=", file); break;
5123 case GEU:
5124 fputs (">>=", file); break;
5125 case GTU:
5126 fputs (">>", file); break;
5127 case LT:
5128 fputs ("<", file); break;
5129 case LE:
5130 fputs ("<=", file); break;
5131 case LEU:
5132 fputs ("<<=", file); break;
5133 case LTU:
5134 fputs ("<<", file); break;
5135 default:
5136 gcc_unreachable ();
5137 }
5138 return;
5139 case 'N': /* Condition, (N)egated */
5140 switch (GET_CODE (x))
5141 {
5142 case EQ:
5143 fputs ("<>", file); break;
5144 case NE:
5145 fputs ("=", file); break;
5146 case GT:
5147 fputs ("<=", file); break;
5148 case GE:
5149 fputs ("<", file); break;
5150 case GEU:
5151 fputs ("<<", file); break;
5152 case GTU:
5153 fputs ("<<=", file); break;
5154 case LT:
5155 fputs (">=", file); break;
5156 case LE:
5157 fputs (">", file); break;
5158 case LEU:
5159 fputs (">>", file); break;
5160 case LTU:
5161 fputs (">>=", file); break;
5162 default:
5163 gcc_unreachable ();
5164 }
5165 return;
5166 /* For floating point comparisons. Note that the output
5167 predicates are the complement of the desired mode. The
5168 conditions for GT, GE, LT, LE and LTGT cause an invalid
5169 operation exception if the result is unordered and this
5170 exception is enabled in the floating-point status register. */
5171 case 'Y':
5172 switch (GET_CODE (x))
5173 {
5174 case EQ:
5175 fputs ("!=", file); break;
5176 case NE:
5177 fputs ("=", file); break;
5178 case GT:
5179 fputs ("!>", file); break;
5180 case GE:
5181 fputs ("!>=", file); break;
5182 case LT:
5183 fputs ("!<", file); break;
5184 case LE:
5185 fputs ("!<=", file); break;
5186 case LTGT:
5187 fputs ("!<>", file); break;
5188 case UNLE:
5189 fputs ("!?<=", file); break;
5190 case UNLT:
5191 fputs ("!?<", file); break;
5192 case UNGE:
5193 fputs ("!?>=", file); break;
5194 case UNGT:
5195 fputs ("!?>", file); break;
5196 case UNEQ:
5197 fputs ("!?=", file); break;
5198 case UNORDERED:
5199 fputs ("!?", file); break;
5200 case ORDERED:
5201 fputs ("?", file); break;
5202 default:
5203 gcc_unreachable ();
5204 }
5205 return;
5206 case 'S': /* Condition, operands are (S)wapped. */
5207 switch (GET_CODE (x))
5208 {
5209 case EQ:
5210 fputs ("=", file); break;
5211 case NE:
5212 fputs ("<>", file); break;
5213 case GT:
5214 fputs ("<", file); break;
5215 case GE:
5216 fputs ("<=", file); break;
5217 case GEU:
5218 fputs ("<<=", file); break;
5219 case GTU:
5220 fputs ("<<", file); break;
5221 case LT:
5222 fputs (">", file); break;
5223 case LE:
5224 fputs (">=", file); break;
5225 case LEU:
5226 fputs (">>=", file); break;
5227 case LTU:
5228 fputs (">>", file); break;
5229 default:
5230 gcc_unreachable ();
5231 }
5232 return;
5233 case 'B': /* Condition, (B)oth swapped and negate. */
5234 switch (GET_CODE (x))
5235 {
5236 case EQ:
5237 fputs ("<>", file); break;
5238 case NE:
5239 fputs ("=", file); break;
5240 case GT:
5241 fputs (">=", file); break;
5242 case GE:
5243 fputs (">", file); break;
5244 case GEU:
5245 fputs (">>", file); break;
5246 case GTU:
5247 fputs (">>=", file); break;
5248 case LT:
5249 fputs ("<=", file); break;
5250 case LE:
5251 fputs ("<", file); break;
5252 case LEU:
5253 fputs ("<<", file); break;
5254 case LTU:
5255 fputs ("<<=", file); break;
5256 default:
5257 gcc_unreachable ();
5258 }
5259 return;
5260 case 'k':
5261 gcc_assert (GET_CODE (x) == CONST_INT);
5262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5263 return;
5264 case 'Q':
5265 gcc_assert (GET_CODE (x) == CONST_INT);
5266 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5267 return;
5268 case 'L':
5269 gcc_assert (GET_CODE (x) == CONST_INT);
5270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5271 return;
5272 case 'o':
5273 gcc_assert (GET_CODE (x) == CONST_INT
5274 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5275 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5276 return;
5277 case 'O':
5278 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5279 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5280 return;
5281 case 'p':
5282 gcc_assert (GET_CODE (x) == CONST_INT);
5283 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5284 return;
5285 case 'P':
5286 gcc_assert (GET_CODE (x) == CONST_INT);
5287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5288 return;
5289 case 'I':
5290 if (GET_CODE (x) == CONST_INT)
5291 fputs ("i", file);
5292 return;
5293 case 'M':
5294 case 'F':
5295 switch (GET_CODE (XEXP (x, 0)))
5296 {
5297 case PRE_DEC:
5298 case PRE_INC:
5299 if (ASSEMBLER_DIALECT == 0)
5300 fputs ("s,mb", file);
5301 else
5302 fputs (",mb", file);
5303 break;
5304 case POST_DEC:
5305 case POST_INC:
5306 if (ASSEMBLER_DIALECT == 0)
5307 fputs ("s,ma", file);
5308 else
5309 fputs (",ma", file);
5310 break;
5311 case PLUS:
5312 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5313 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5314 {
5315 if (ASSEMBLER_DIALECT == 0)
5316 fputs ("x", file);
5317 }
5318 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5319 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5320 {
5321 if (ASSEMBLER_DIALECT == 0)
5322 fputs ("x,s", file);
5323 else
5324 fputs (",s", file);
5325 }
5326 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5327 fputs ("s", file);
5328 break;
5329 default:
5330 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5331 fputs ("s", file);
5332 break;
5333 }
5334 return;
5335 case 'G':
5336 pa_output_global_address (file, x, 0);
5337 return;
5338 case 'H':
5339 pa_output_global_address (file, x, 1);
5340 return;
5341 case 0: /* Don't do anything special */
5342 break;
5343 case 'Z':
5344 {
5345 unsigned op[3];
5346 compute_zdepwi_operands (INTVAL (x), op);
5347 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5348 return;
5349 }
5350 case 'z':
5351 {
5352 unsigned op[3];
5353 compute_zdepdi_operands (INTVAL (x), op);
5354 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5355 return;
5356 }
5357 case 'c':
5358 /* We can get here from a .vtable_inherit due to our
5359 CONSTANT_ADDRESS_P rejecting perfectly good constant
5360 addresses. */
5361 break;
5362 default:
5363 gcc_unreachable ();
5364 }
5365 if (GET_CODE (x) == REG)
5366 {
5367 fputs (reg_names [REGNO (x)], file);
5368 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5369 {
5370 fputs ("R", file);
5371 return;
5372 }
5373 if (FP_REG_P (x)
5374 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5375 && (REGNO (x) & 1) == 0)
5376 fputs ("L", file);
5377 }
5378 else if (GET_CODE (x) == MEM)
5379 {
5380 int size = GET_MODE_SIZE (GET_MODE (x));
5381 rtx base = NULL_RTX;
5382 switch (GET_CODE (XEXP (x, 0)))
5383 {
5384 case PRE_DEC:
5385 case POST_DEC:
5386 base = XEXP (XEXP (x, 0), 0);
5387 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5388 break;
5389 case PRE_INC:
5390 case POST_INC:
5391 base = XEXP (XEXP (x, 0), 0);
5392 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5393 break;
5394 case PLUS:
5395 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5396 fprintf (file, "%s(%s)",
5397 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5398 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5399 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5400 fprintf (file, "%s(%s)",
5401 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5402 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5403 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5404 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5405 {
5406 /* Because the REG_POINTER flag can get lost during reload,
5407 pa_legitimate_address_p canonicalizes the order of the
5408 index and base registers in the combined move patterns. */
5409 rtx base = XEXP (XEXP (x, 0), 1);
5410 rtx index = XEXP (XEXP (x, 0), 0);
5411
5412 fprintf (file, "%s(%s)",
5413 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5414 }
5415 else
5416 output_address (GET_MODE (x), XEXP (x, 0));
5417 break;
5418 default:
5419 output_address (GET_MODE (x), XEXP (x, 0));
5420 break;
5421 }
5422 }
5423 else
5424 output_addr_const (file, x);
5425 }
5426
5427 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5428
5429 void
5430 pa_output_global_address (FILE *file, rtx x, int round_constant)
5431 {
5432
5433 /* Imagine (high (const (plus ...))). */
5434 if (GET_CODE (x) == HIGH)
5435 x = XEXP (x, 0);
5436
5437 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5438 output_addr_const (file, x);
5439 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5440 {
5441 output_addr_const (file, x);
5442 fputs ("-$global$", file);
5443 }
5444 else if (GET_CODE (x) == CONST)
5445 {
5446 const char *sep = "";
5447 int offset = 0; /* assembler wants -$global$ at end */
5448 rtx base = NULL_RTX;
5449
5450 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5451 {
5452 case LABEL_REF:
5453 case SYMBOL_REF:
5454 base = XEXP (XEXP (x, 0), 0);
5455 output_addr_const (file, base);
5456 break;
5457 case CONST_INT:
5458 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5459 break;
5460 default:
5461 gcc_unreachable ();
5462 }
5463
5464 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5465 {
5466 case LABEL_REF:
5467 case SYMBOL_REF:
5468 base = XEXP (XEXP (x, 0), 1);
5469 output_addr_const (file, base);
5470 break;
5471 case CONST_INT:
5472 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5473 break;
5474 default:
5475 gcc_unreachable ();
5476 }
5477
5478 /* How bogus. The compiler is apparently responsible for
5479 rounding the constant if it uses an LR field selector.
5480
5481 The linker and/or assembler seem a better place since
5482 they have to do this kind of thing already.
5483
5484 If we fail to do this, HP's optimizing linker may eliminate
5485 an addil, but not update the ldw/stw/ldo instruction that
5486 uses the result of the addil. */
5487 if (round_constant)
5488 offset = ((offset + 0x1000) & ~0x1fff);
5489
5490 switch (GET_CODE (XEXP (x, 0)))
5491 {
5492 case PLUS:
5493 if (offset < 0)
5494 {
5495 offset = -offset;
5496 sep = "-";
5497 }
5498 else
5499 sep = "+";
5500 break;
5501
5502 case MINUS:
5503 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5504 sep = "-";
5505 break;
5506
5507 default:
5508 gcc_unreachable ();
5509 }
5510
5511 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5512 fputs ("-$global$", file);
5513 if (offset)
5514 fprintf (file, "%s%d", sep, offset);
5515 }
5516 else
5517 output_addr_const (file, x);
5518 }
5519
5520 /* Output boilerplate text to appear at the beginning of the file.
5521 There are several possible versions. */
5522 #define aputs(x) fputs(x, asm_out_file)
5523 static inline void
5524 pa_file_start_level (void)
5525 {
5526 if (TARGET_64BIT)
5527 aputs ("\t.LEVEL 2.0w\n");
5528 else if (TARGET_PA_20)
5529 aputs ("\t.LEVEL 2.0\n");
5530 else if (TARGET_PA_11)
5531 aputs ("\t.LEVEL 1.1\n");
5532 else
5533 aputs ("\t.LEVEL 1.0\n");
5534 }
5535
5536 static inline void
5537 pa_file_start_space (int sortspace)
5538 {
5539 aputs ("\t.SPACE $PRIVATE$");
5540 if (sortspace)
5541 aputs (",SORT=16");
5542 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5543 if (flag_tm)
5544 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5545 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5546 "\n\t.SPACE $TEXT$");
5547 if (sortspace)
5548 aputs (",SORT=8");
5549 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5550 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5551 }
5552
5553 static inline void
5554 pa_file_start_file (int want_version)
5555 {
5556 if (write_symbols != NO_DEBUG)
5557 {
5558 output_file_directive (asm_out_file, main_input_filename);
5559 if (want_version)
5560 aputs ("\t.version\t\"01.01\"\n");
5561 }
5562 }
5563
5564 static inline void
5565 pa_file_start_mcount (const char *aswhat)
5566 {
5567 if (profile_flag)
5568 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5569 }
5570
5571 static void
5572 pa_elf_file_start (void)
5573 {
5574 pa_file_start_level ();
5575 pa_file_start_mcount ("ENTRY");
5576 pa_file_start_file (0);
5577 }
5578
5579 static void
5580 pa_som_file_start (void)
5581 {
5582 pa_file_start_level ();
5583 pa_file_start_space (0);
5584 aputs ("\t.IMPORT $global$,DATA\n"
5585 "\t.IMPORT $$dyncall,MILLICODE\n");
5586 pa_file_start_mcount ("CODE");
5587 pa_file_start_file (0);
5588 }
5589
5590 static void
5591 pa_linux_file_start (void)
5592 {
5593 pa_file_start_file (1);
5594 pa_file_start_level ();
5595 pa_file_start_mcount ("CODE");
5596 }
5597
5598 static void
5599 pa_hpux64_gas_file_start (void)
5600 {
5601 pa_file_start_level ();
5602 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5603 if (profile_flag)
5604 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5605 #endif
5606 pa_file_start_file (1);
5607 }
5608
5609 static void
5610 pa_hpux64_hpas_file_start (void)
5611 {
5612 pa_file_start_level ();
5613 pa_file_start_space (1);
5614 pa_file_start_mcount ("CODE");
5615 pa_file_start_file (0);
5616 }
5617 #undef aputs
5618
5619 /* Search the deferred plabel list for SYMBOL and return its internal
5620 label. If an entry for SYMBOL is not found, a new entry is created. */
5621
5622 rtx
5623 pa_get_deferred_plabel (rtx symbol)
5624 {
5625 const char *fname = XSTR (symbol, 0);
5626 size_t i;
5627
5628 /* See if we have already put this function on the list of deferred
5629 plabels. This list is generally small, so a liner search is not
5630 too ugly. If it proves too slow replace it with something faster. */
5631 for (i = 0; i < n_deferred_plabels; i++)
5632 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5633 break;
5634
5635 /* If the deferred plabel list is empty, or this entry was not found
5636 on the list, create a new entry on the list. */
5637 if (deferred_plabels == NULL || i == n_deferred_plabels)
5638 {
5639 tree id;
5640
5641 if (deferred_plabels == 0)
5642 deferred_plabels = ggc_alloc<deferred_plabel> ();
5643 else
5644 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5645 deferred_plabels,
5646 n_deferred_plabels + 1);
5647
5648 i = n_deferred_plabels++;
5649 deferred_plabels[i].internal_label = gen_label_rtx ();
5650 deferred_plabels[i].symbol = symbol;
5651
5652 /* Gross. We have just implicitly taken the address of this
5653 function. Mark it in the same manner as assemble_name. */
5654 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5655 if (id)
5656 mark_referenced (id);
5657 }
5658
5659 return deferred_plabels[i].internal_label;
5660 }
5661
5662 static void
5663 output_deferred_plabels (void)
5664 {
5665 size_t i;
5666
5667 /* If we have some deferred plabels, then we need to switch into the
5668 data or readonly data section, and align it to a 4 byte boundary
5669 before outputting the deferred plabels. */
5670 if (n_deferred_plabels)
5671 {
5672 switch_to_section (flag_pic ? data_section : readonly_data_section);
5673 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5674 }
5675
5676 /* Now output the deferred plabels. */
5677 for (i = 0; i < n_deferred_plabels; i++)
5678 {
5679 targetm.asm_out.internal_label (asm_out_file, "L",
5680 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5681 assemble_integer (deferred_plabels[i].symbol,
5682 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5683 }
5684 }
5685
5686 /* Initialize optabs to point to emulation routines. */
5687
5688 static void
5689 pa_init_libfuncs (void)
5690 {
5691 if (HPUX_LONG_DOUBLE_LIBRARY)
5692 {
5693 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5694 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5695 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5696 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5697 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5698 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5699 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5700 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5701 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5702
5703 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5704 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5705 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5706 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5707 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5708 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5709 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5710
5711 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5712 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5713 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5714 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5715
5716 set_conv_libfunc (sfix_optab, SImode, TFmode,
5717 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5718 : "_U_Qfcnvfxt_quad_to_sgl");
5719 set_conv_libfunc (sfix_optab, DImode, TFmode,
5720 "_U_Qfcnvfxt_quad_to_dbl");
5721 set_conv_libfunc (ufix_optab, SImode, TFmode,
5722 "_U_Qfcnvfxt_quad_to_usgl");
5723 set_conv_libfunc (ufix_optab, DImode, TFmode,
5724 "_U_Qfcnvfxt_quad_to_udbl");
5725
5726 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5727 "_U_Qfcnvxf_sgl_to_quad");
5728 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5729 "_U_Qfcnvxf_dbl_to_quad");
5730 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5731 "_U_Qfcnvxf_usgl_to_quad");
5732 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5733 "_U_Qfcnvxf_udbl_to_quad");
5734 }
5735
5736 if (TARGET_SYNC_LIBCALL)
5737 init_sync_libfuncs (8);
5738 }
5739
5740 /* HP's millicode routines mean something special to the assembler.
5741 Keep track of which ones we have used. */
5742
5743 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5744 static void import_milli (enum millicodes);
5745 static char imported[(int) end1000];
5746 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5747 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5748 #define MILLI_START 10
5749
5750 static void
5751 import_milli (enum millicodes code)
5752 {
5753 char str[sizeof (import_string)];
5754
5755 if (!imported[(int) code])
5756 {
5757 imported[(int) code] = 1;
5758 strcpy (str, import_string);
5759 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5760 output_asm_insn (str, 0);
5761 }
5762 }
5763
5764 /* The register constraints have put the operands and return value in
5765 the proper registers. */
5766
5767 const char *
5768 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5769 {
5770 import_milli (mulI);
5771 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5772 }
5773
5774 /* Emit the rtl for doing a division by a constant. */
5775
5776 /* Do magic division millicodes exist for this value? */
5777 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5778
5779 /* We'll use an array to keep track of the magic millicodes and
5780 whether or not we've used them already. [n][0] is signed, [n][1] is
5781 unsigned. */
5782
5783 static int div_milli[16][2];
5784
5785 int
5786 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5787 {
5788 if (GET_CODE (operands[2]) == CONST_INT
5789 && INTVAL (operands[2]) > 0
5790 && INTVAL (operands[2]) < 16
5791 && pa_magic_milli[INTVAL (operands[2])])
5792 {
5793 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5794
5795 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5796 emit
5797 (gen_rtx_PARALLEL
5798 (VOIDmode,
5799 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5800 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5801 SImode,
5802 gen_rtx_REG (SImode, 26),
5803 operands[2])),
5804 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5805 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5806 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5807 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5808 gen_rtx_CLOBBER (VOIDmode, ret))));
5809 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5810 return 1;
5811 }
5812 return 0;
5813 }
5814
5815 const char *
5816 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5817 {
5818 int divisor;
5819
5820 /* If the divisor is a constant, try to use one of the special
5821 opcodes .*/
5822 if (GET_CODE (operands[0]) == CONST_INT)
5823 {
5824 static char buf[100];
5825 divisor = INTVAL (operands[0]);
5826 if (!div_milli[divisor][unsignedp])
5827 {
5828 div_milli[divisor][unsignedp] = 1;
5829 if (unsignedp)
5830 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5831 else
5832 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5833 }
5834 if (unsignedp)
5835 {
5836 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5837 INTVAL (operands[0]));
5838 return pa_output_millicode_call (insn,
5839 gen_rtx_SYMBOL_REF (SImode, buf));
5840 }
5841 else
5842 {
5843 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5844 INTVAL (operands[0]));
5845 return pa_output_millicode_call (insn,
5846 gen_rtx_SYMBOL_REF (SImode, buf));
5847 }
5848 }
5849 /* Divisor isn't a special constant. */
5850 else
5851 {
5852 if (unsignedp)
5853 {
5854 import_milli (divU);
5855 return pa_output_millicode_call (insn,
5856 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5857 }
5858 else
5859 {
5860 import_milli (divI);
5861 return pa_output_millicode_call (insn,
5862 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5863 }
5864 }
5865 }
5866
5867 /* Output a $$rem millicode to do mod. */
5868
5869 const char *
5870 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5871 {
5872 if (unsignedp)
5873 {
5874 import_milli (remU);
5875 return pa_output_millicode_call (insn,
5876 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5877 }
5878 else
5879 {
5880 import_milli (remI);
5881 return pa_output_millicode_call (insn,
5882 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5883 }
5884 }
5885
5886 void
5887 pa_output_arg_descriptor (rtx_insn *call_insn)
5888 {
5889 const char *arg_regs[4];
5890 machine_mode arg_mode;
5891 rtx link;
5892 int i, output_flag = 0;
5893 int regno;
5894
5895 /* We neither need nor want argument location descriptors for the
5896 64bit runtime environment or the ELF32 environment. */
5897 if (TARGET_64BIT || TARGET_ELF32)
5898 return;
5899
5900 for (i = 0; i < 4; i++)
5901 arg_regs[i] = 0;
5902
5903 /* Specify explicitly that no argument relocations should take place
5904 if using the portable runtime calling conventions. */
5905 if (TARGET_PORTABLE_RUNTIME)
5906 {
5907 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5908 asm_out_file);
5909 return;
5910 }
5911
5912 gcc_assert (CALL_P (call_insn));
5913 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5914 link; link = XEXP (link, 1))
5915 {
5916 rtx use = XEXP (link, 0);
5917
5918 if (! (GET_CODE (use) == USE
5919 && GET_CODE (XEXP (use, 0)) == REG
5920 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5921 continue;
5922
5923 arg_mode = GET_MODE (XEXP (use, 0));
5924 regno = REGNO (XEXP (use, 0));
5925 if (regno >= 23 && regno <= 26)
5926 {
5927 arg_regs[26 - regno] = "GR";
5928 if (arg_mode == DImode)
5929 arg_regs[25 - regno] = "GR";
5930 }
5931 else if (regno >= 32 && regno <= 39)
5932 {
5933 if (arg_mode == SFmode)
5934 arg_regs[(regno - 32) / 2] = "FR";
5935 else
5936 {
5937 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5938 arg_regs[(regno - 34) / 2] = "FR";
5939 arg_regs[(regno - 34) / 2 + 1] = "FU";
5940 #else
5941 arg_regs[(regno - 34) / 2] = "FU";
5942 arg_regs[(regno - 34) / 2 + 1] = "FR";
5943 #endif
5944 }
5945 }
5946 }
5947 fputs ("\t.CALL ", asm_out_file);
5948 for (i = 0; i < 4; i++)
5949 {
5950 if (arg_regs[i])
5951 {
5952 if (output_flag++)
5953 fputc (',', asm_out_file);
5954 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5955 }
5956 }
5957 fputc ('\n', asm_out_file);
5958 }
5959 \f
5960 /* Inform reload about cases where moving X with a mode MODE to or from
5961 a register in RCLASS requires an extra scratch or immediate register.
5962 Return the class needed for the immediate register. */
5963
5964 static reg_class_t
5965 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5966 machine_mode mode, secondary_reload_info *sri)
5967 {
5968 int regno;
5969 enum reg_class rclass = (enum reg_class) rclass_i;
5970
5971 /* Handle the easy stuff first. */
5972 if (rclass == R1_REGS)
5973 return NO_REGS;
5974
5975 if (REG_P (x))
5976 {
5977 regno = REGNO (x);
5978 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5979 return NO_REGS;
5980 }
5981 else
5982 regno = -1;
5983
5984 /* If we have something like (mem (mem (...)), we can safely assume the
5985 inner MEM will end up in a general register after reloading, so there's
5986 no need for a secondary reload. */
5987 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5988 return NO_REGS;
5989
5990 /* Trying to load a constant into a FP register during PIC code
5991 generation requires %r1 as a scratch register. For float modes,
5992 the only legitimate constant is CONST0_RTX. However, there are
5993 a few patterns that accept constant double operands. */
5994 if (flag_pic
5995 && FP_REG_CLASS_P (rclass)
5996 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5997 {
5998 switch (mode)
5999 {
6000 case SImode:
6001 sri->icode = CODE_FOR_reload_insi_r1;
6002 break;
6003
6004 case DImode:
6005 sri->icode = CODE_FOR_reload_indi_r1;
6006 break;
6007
6008 case SFmode:
6009 sri->icode = CODE_FOR_reload_insf_r1;
6010 break;
6011
6012 case DFmode:
6013 sri->icode = CODE_FOR_reload_indf_r1;
6014 break;
6015
6016 default:
6017 gcc_unreachable ();
6018 }
6019 return NO_REGS;
6020 }
6021
6022 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6023 register when we're generating PIC code or when the operand isn't
6024 readonly. */
6025 if (pa_symbolic_expression_p (x))
6026 {
6027 if (GET_CODE (x) == HIGH)
6028 x = XEXP (x, 0);
6029
6030 if (flag_pic || !read_only_operand (x, VOIDmode))
6031 {
6032 switch (mode)
6033 {
6034 case SImode:
6035 sri->icode = CODE_FOR_reload_insi_r1;
6036 break;
6037
6038 case DImode:
6039 sri->icode = CODE_FOR_reload_indi_r1;
6040 break;
6041
6042 default:
6043 gcc_unreachable ();
6044 }
6045 return NO_REGS;
6046 }
6047 }
6048
6049 /* Profiling showed the PA port spends about 1.3% of its compilation
6050 time in true_regnum from calls inside pa_secondary_reload_class. */
6051 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6052 regno = true_regnum (x);
6053
6054 /* Handle reloads for floating point loads and stores. */
6055 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6056 && FP_REG_CLASS_P (rclass))
6057 {
6058 if (MEM_P (x))
6059 {
6060 x = XEXP (x, 0);
6061
6062 /* We don't need a secondary reload for indexed memory addresses.
6063
6064 When INT14_OK_STRICT is true, it might appear that we could
6065 directly allow register indirect memory addresses. However,
6066 this doesn't work because we don't support SUBREGs in
6067 floating-point register copies and reload doesn't tell us
6068 when it's going to use a SUBREG. */
6069 if (IS_INDEX_ADDR_P (x))
6070 return NO_REGS;
6071 }
6072
6073 /* Request a secondary reload with a general scratch register
6074 for everything else. ??? Could symbolic operands be handled
6075 directly when generating non-pic PA 2.0 code? */
6076 sri->icode = (in_p
6077 ? direct_optab_handler (reload_in_optab, mode)
6078 : direct_optab_handler (reload_out_optab, mode));
6079 return NO_REGS;
6080 }
6081
6082 /* A SAR<->FP register copy requires an intermediate general register
6083 and secondary memory. We need a secondary reload with a general
6084 scratch register for spills. */
6085 if (rclass == SHIFT_REGS)
6086 {
6087 /* Handle spill. */
6088 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6089 {
6090 sri->icode = (in_p
6091 ? direct_optab_handler (reload_in_optab, mode)
6092 : direct_optab_handler (reload_out_optab, mode));
6093 return NO_REGS;
6094 }
6095
6096 /* Handle FP copy. */
6097 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6098 return GENERAL_REGS;
6099 }
6100
6101 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6102 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6103 && FP_REG_CLASS_P (rclass))
6104 return GENERAL_REGS;
6105
6106 return NO_REGS;
6107 }
6108
6109 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6110 is only marked as live on entry by df-scan when it is a fixed
6111 register. It isn't a fixed register in the 64-bit runtime,
6112 so we need to mark it here. */
6113
6114 static void
6115 pa_extra_live_on_entry (bitmap regs)
6116 {
6117 if (TARGET_64BIT)
6118 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6119 }
6120
6121 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6122 to prevent it from being deleted. */
6123
6124 rtx
6125 pa_eh_return_handler_rtx (void)
6126 {
6127 rtx tmp;
6128
6129 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6130 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6131 tmp = gen_rtx_MEM (word_mode, tmp);
6132 tmp->volatil = 1;
6133 return tmp;
6134 }
6135
6136 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6137 by invisible reference. As a GCC extension, we also pass anything
6138 with a zero or variable size by reference.
6139
6140 The 64-bit runtime does not describe passing any types by invisible
6141 reference. The internals of GCC can't currently handle passing
6142 empty structures, and zero or variable length arrays when they are
6143 not passed entirely on the stack or by reference. Thus, as a GCC
6144 extension, we pass these types by reference. The HP compiler doesn't
6145 support these types, so hopefully there shouldn't be any compatibility
6146 issues. This may have to be revisited when HP releases a C99 compiler
6147 or updates the ABI. */
6148
6149 static bool
6150 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6151 machine_mode mode, const_tree type,
6152 bool named ATTRIBUTE_UNUSED)
6153 {
6154 HOST_WIDE_INT size;
6155
6156 if (type)
6157 size = int_size_in_bytes (type);
6158 else
6159 size = GET_MODE_SIZE (mode);
6160
6161 if (TARGET_64BIT)
6162 return size <= 0;
6163 else
6164 return size <= 0 || size > 8;
6165 }
6166
6167 enum direction
6168 pa_function_arg_padding (machine_mode mode, const_tree type)
6169 {
6170 if (mode == BLKmode
6171 || (TARGET_64BIT
6172 && type
6173 && (AGGREGATE_TYPE_P (type)
6174 || TREE_CODE (type) == COMPLEX_TYPE
6175 || TREE_CODE (type) == VECTOR_TYPE)))
6176 {
6177 /* Return none if justification is not required. */
6178 if (type
6179 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6180 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6181 return none;
6182
6183 /* The directions set here are ignored when a BLKmode argument larger
6184 than a word is placed in a register. Different code is used for
6185 the stack and registers. This makes it difficult to have a
6186 consistent data representation for both the stack and registers.
6187 For both runtimes, the justification and padding for arguments on
6188 the stack and in registers should be identical. */
6189 if (TARGET_64BIT)
6190 /* The 64-bit runtime specifies left justification for aggregates. */
6191 return upward;
6192 else
6193 /* The 32-bit runtime architecture specifies right justification.
6194 When the argument is passed on the stack, the argument is padded
6195 with garbage on the left. The HP compiler pads with zeros. */
6196 return downward;
6197 }
6198
6199 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6200 return downward;
6201 else
6202 return none;
6203 }
6204
6205 \f
6206 /* Do what is necessary for `va_start'. We look at the current function
6207 to determine if stdargs or varargs is used and fill in an initial
6208 va_list. A pointer to this constructor is returned. */
6209
6210 static rtx
6211 hppa_builtin_saveregs (void)
6212 {
6213 rtx offset, dest;
6214 tree fntype = TREE_TYPE (current_function_decl);
6215 int argadj = ((!stdarg_p (fntype))
6216 ? UNITS_PER_WORD : 0);
6217
6218 if (argadj)
6219 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6220 else
6221 offset = crtl->args.arg_offset_rtx;
6222
6223 if (TARGET_64BIT)
6224 {
6225 int i, off;
6226
6227 /* Adjust for varargs/stdarg differences. */
6228 if (argadj)
6229 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6230 else
6231 offset = crtl->args.arg_offset_rtx;
6232
6233 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6234 from the incoming arg pointer and growing to larger addresses. */
6235 for (i = 26, off = -64; i >= 19; i--, off += 8)
6236 emit_move_insn (gen_rtx_MEM (word_mode,
6237 plus_constant (Pmode,
6238 arg_pointer_rtx, off)),
6239 gen_rtx_REG (word_mode, i));
6240
6241 /* The incoming args pointer points just beyond the flushback area;
6242 normally this is not a serious concern. However, when we are doing
6243 varargs/stdargs we want to make the arg pointer point to the start
6244 of the incoming argument area. */
6245 emit_move_insn (virtual_incoming_args_rtx,
6246 plus_constant (Pmode, arg_pointer_rtx, -64));
6247
6248 /* Now return a pointer to the first anonymous argument. */
6249 return copy_to_reg (expand_binop (Pmode, add_optab,
6250 virtual_incoming_args_rtx,
6251 offset, 0, 0, OPTAB_LIB_WIDEN));
6252 }
6253
6254 /* Store general registers on the stack. */
6255 dest = gen_rtx_MEM (BLKmode,
6256 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6257 -16));
6258 set_mem_alias_set (dest, get_varargs_alias_set ());
6259 set_mem_align (dest, BITS_PER_WORD);
6260 move_block_from_reg (23, dest, 4);
6261
6262 /* move_block_from_reg will emit code to store the argument registers
6263 individually as scalar stores.
6264
6265 However, other insns may later load from the same addresses for
6266 a structure load (passing a struct to a varargs routine).
6267
6268 The alias code assumes that such aliasing can never happen, so we
6269 have to keep memory referencing insns from moving up beyond the
6270 last argument register store. So we emit a blockage insn here. */
6271 emit_insn (gen_blockage ());
6272
6273 return copy_to_reg (expand_binop (Pmode, add_optab,
6274 crtl->args.internal_arg_pointer,
6275 offset, 0, 0, OPTAB_LIB_WIDEN));
6276 }
6277
6278 static void
6279 hppa_va_start (tree valist, rtx nextarg)
6280 {
6281 nextarg = expand_builtin_saveregs ();
6282 std_expand_builtin_va_start (valist, nextarg);
6283 }
6284
6285 static tree
6286 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6287 gimple_seq *post_p)
6288 {
6289 if (TARGET_64BIT)
6290 {
6291 /* Args grow upward. We can use the generic routines. */
6292 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6293 }
6294 else /* !TARGET_64BIT */
6295 {
6296 tree ptr = build_pointer_type (type);
6297 tree valist_type;
6298 tree t, u;
6299 unsigned int size, ofs;
6300 bool indirect;
6301
6302 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6303 if (indirect)
6304 {
6305 type = ptr;
6306 ptr = build_pointer_type (type);
6307 }
6308 size = int_size_in_bytes (type);
6309 valist_type = TREE_TYPE (valist);
6310
6311 /* Args grow down. Not handled by generic routines. */
6312
6313 u = fold_convert (sizetype, size_in_bytes (type));
6314 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6315 t = fold_build_pointer_plus (valist, u);
6316
6317 /* Align to 4 or 8 byte boundary depending on argument size. */
6318
6319 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6320 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6321 t = fold_convert (valist_type, t);
6322
6323 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6324
6325 ofs = (8 - size) % 4;
6326 if (ofs != 0)
6327 t = fold_build_pointer_plus_hwi (t, ofs);
6328
6329 t = fold_convert (ptr, t);
6330 t = build_va_arg_indirect_ref (t);
6331
6332 if (indirect)
6333 t = build_va_arg_indirect_ref (t);
6334
6335 return t;
6336 }
6337 }
6338
6339 /* True if MODE is valid for the target. By "valid", we mean able to
6340 be manipulated in non-trivial ways. In particular, this means all
6341 the arithmetic is supported.
6342
6343 Currently, TImode is not valid as the HP 64-bit runtime documentation
6344 doesn't document the alignment and calling conventions for this type.
6345 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6346 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6347
6348 static bool
6349 pa_scalar_mode_supported_p (machine_mode mode)
6350 {
6351 int precision = GET_MODE_PRECISION (mode);
6352
6353 switch (GET_MODE_CLASS (mode))
6354 {
6355 case MODE_PARTIAL_INT:
6356 case MODE_INT:
6357 if (precision == CHAR_TYPE_SIZE)
6358 return true;
6359 if (precision == SHORT_TYPE_SIZE)
6360 return true;
6361 if (precision == INT_TYPE_SIZE)
6362 return true;
6363 if (precision == LONG_TYPE_SIZE)
6364 return true;
6365 if (precision == LONG_LONG_TYPE_SIZE)
6366 return true;
6367 return false;
6368
6369 case MODE_FLOAT:
6370 if (precision == FLOAT_TYPE_SIZE)
6371 return true;
6372 if (precision == DOUBLE_TYPE_SIZE)
6373 return true;
6374 if (precision == LONG_DOUBLE_TYPE_SIZE)
6375 return true;
6376 return false;
6377
6378 case MODE_DECIMAL_FLOAT:
6379 return false;
6380
6381 default:
6382 gcc_unreachable ();
6383 }
6384 }
6385
6386 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6387 it branches into the delay slot. Otherwise, return FALSE. */
6388
6389 static bool
6390 branch_to_delay_slot_p (rtx_insn *insn)
6391 {
6392 rtx_insn *jump_insn;
6393
6394 if (dbr_sequence_length ())
6395 return FALSE;
6396
6397 jump_insn = next_active_insn (JUMP_LABEL (insn));
6398 while (insn)
6399 {
6400 insn = next_active_insn (insn);
6401 if (jump_insn == insn)
6402 return TRUE;
6403
6404 /* We can't rely on the length of asms. So, we return FALSE when
6405 the branch is followed by an asm. */
6406 if (!insn
6407 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6408 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6409 || get_attr_length (insn) > 0)
6410 break;
6411 }
6412
6413 return FALSE;
6414 }
6415
6416 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6417
6418 This occurs when INSN has an unfilled delay slot and is followed
6419 by an asm. Disaster can occur if the asm is empty and the jump
6420 branches into the delay slot. So, we add a nop in the delay slot
6421 when this occurs. */
6422
6423 static bool
6424 branch_needs_nop_p (rtx_insn *insn)
6425 {
6426 rtx_insn *jump_insn;
6427
6428 if (dbr_sequence_length ())
6429 return FALSE;
6430
6431 jump_insn = next_active_insn (JUMP_LABEL (insn));
6432 while (insn)
6433 {
6434 insn = next_active_insn (insn);
6435 if (!insn || jump_insn == insn)
6436 return TRUE;
6437
6438 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6439 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6440 && get_attr_length (insn) > 0)
6441 break;
6442 }
6443
6444 return FALSE;
6445 }
6446
6447 /* Return TRUE if INSN, a forward jump insn, can use nullification
6448 to skip the following instruction. This avoids an extra cycle due
6449 to a mis-predicted branch when we fall through. */
6450
6451 static bool
6452 use_skip_p (rtx_insn *insn)
6453 {
6454 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6455
6456 while (insn)
6457 {
6458 insn = next_active_insn (insn);
6459
6460 /* We can't rely on the length of asms, so we can't skip asms. */
6461 if (!insn
6462 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6463 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6464 break;
6465 if (get_attr_length (insn) == 4
6466 && jump_insn == next_active_insn (insn))
6467 return TRUE;
6468 if (get_attr_length (insn) > 0)
6469 break;
6470 }
6471
6472 return FALSE;
6473 }
6474
6475 /* This routine handles all the normal conditional branch sequences we
6476 might need to generate. It handles compare immediate vs compare
6477 register, nullification of delay slots, varying length branches,
6478 negated branches, and all combinations of the above. It returns the
6479 output appropriate to emit the branch corresponding to all given
6480 parameters. */
6481
6482 const char *
6483 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6484 {
6485 static char buf[100];
6486 bool useskip;
6487 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6488 int length = get_attr_length (insn);
6489 int xdelay;
6490
6491 /* A conditional branch to the following instruction (e.g. the delay slot)
6492 is asking for a disaster. This can happen when not optimizing and
6493 when jump optimization fails.
6494
6495 While it is usually safe to emit nothing, this can fail if the
6496 preceding instruction is a nullified branch with an empty delay
6497 slot and the same branch target as this branch. We could check
6498 for this but jump optimization should eliminate nop jumps. It
6499 is always safe to emit a nop. */
6500 if (branch_to_delay_slot_p (insn))
6501 return "nop";
6502
6503 /* The doubleword form of the cmpib instruction doesn't have the LEU
6504 and GTU conditions while the cmpb instruction does. Since we accept
6505 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6506 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6507 operands[2] = gen_rtx_REG (DImode, 0);
6508 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6509 operands[1] = gen_rtx_REG (DImode, 0);
6510
6511 /* If this is a long branch with its delay slot unfilled, set `nullify'
6512 as it can nullify the delay slot and save a nop. */
6513 if (length == 8 && dbr_sequence_length () == 0)
6514 nullify = 1;
6515
6516 /* If this is a short forward conditional branch which did not get
6517 its delay slot filled, the delay slot can still be nullified. */
6518 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6519 nullify = forward_branch_p (insn);
6520
6521 /* A forward branch over a single nullified insn can be done with a
6522 comclr instruction. This avoids a single cycle penalty due to
6523 mis-predicted branch if we fall through (branch not taken). */
6524 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6525
6526 switch (length)
6527 {
6528 /* All short conditional branches except backwards with an unfilled
6529 delay slot. */
6530 case 4:
6531 if (useskip)
6532 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6533 else
6534 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6535 if (GET_MODE (operands[1]) == DImode)
6536 strcat (buf, "*");
6537 if (negated)
6538 strcat (buf, "%B3");
6539 else
6540 strcat (buf, "%S3");
6541 if (useskip)
6542 strcat (buf, " %2,%r1,%%r0");
6543 else if (nullify)
6544 {
6545 if (branch_needs_nop_p (insn))
6546 strcat (buf, ",n %2,%r1,%0%#");
6547 else
6548 strcat (buf, ",n %2,%r1,%0");
6549 }
6550 else
6551 strcat (buf, " %2,%r1,%0");
6552 break;
6553
6554 /* All long conditionals. Note a short backward branch with an
6555 unfilled delay slot is treated just like a long backward branch
6556 with an unfilled delay slot. */
6557 case 8:
6558 /* Handle weird backwards branch with a filled delay slot
6559 which is nullified. */
6560 if (dbr_sequence_length () != 0
6561 && ! forward_branch_p (insn)
6562 && nullify)
6563 {
6564 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6565 if (GET_MODE (operands[1]) == DImode)
6566 strcat (buf, "*");
6567 if (negated)
6568 strcat (buf, "%S3");
6569 else
6570 strcat (buf, "%B3");
6571 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6572 }
6573 /* Handle short backwards branch with an unfilled delay slot.
6574 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6575 taken and untaken branches. */
6576 else if (dbr_sequence_length () == 0
6577 && ! forward_branch_p (insn)
6578 && INSN_ADDRESSES_SET_P ()
6579 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6580 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6581 {
6582 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6583 if (GET_MODE (operands[1]) == DImode)
6584 strcat (buf, "*");
6585 if (negated)
6586 strcat (buf, "%B3 %2,%r1,%0%#");
6587 else
6588 strcat (buf, "%S3 %2,%r1,%0%#");
6589 }
6590 else
6591 {
6592 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6593 if (GET_MODE (operands[1]) == DImode)
6594 strcat (buf, "*");
6595 if (negated)
6596 strcat (buf, "%S3");
6597 else
6598 strcat (buf, "%B3");
6599 if (nullify)
6600 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6601 else
6602 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6603 }
6604 break;
6605
6606 default:
6607 /* The reversed conditional branch must branch over one additional
6608 instruction if the delay slot is filled and needs to be extracted
6609 by pa_output_lbranch. If the delay slot is empty or this is a
6610 nullified forward branch, the instruction after the reversed
6611 condition branch must be nullified. */
6612 if (dbr_sequence_length () == 0
6613 || (nullify && forward_branch_p (insn)))
6614 {
6615 nullify = 1;
6616 xdelay = 0;
6617 operands[4] = GEN_INT (length);
6618 }
6619 else
6620 {
6621 xdelay = 1;
6622 operands[4] = GEN_INT (length + 4);
6623 }
6624
6625 /* Create a reversed conditional branch which branches around
6626 the following insns. */
6627 if (GET_MODE (operands[1]) != DImode)
6628 {
6629 if (nullify)
6630 {
6631 if (negated)
6632 strcpy (buf,
6633 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6634 else
6635 strcpy (buf,
6636 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6637 }
6638 else
6639 {
6640 if (negated)
6641 strcpy (buf,
6642 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6643 else
6644 strcpy (buf,
6645 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6646 }
6647 }
6648 else
6649 {
6650 if (nullify)
6651 {
6652 if (negated)
6653 strcpy (buf,
6654 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6655 else
6656 strcpy (buf,
6657 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6658 }
6659 else
6660 {
6661 if (negated)
6662 strcpy (buf,
6663 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6664 else
6665 strcpy (buf,
6666 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6667 }
6668 }
6669
6670 output_asm_insn (buf, operands);
6671 return pa_output_lbranch (operands[0], insn, xdelay);
6672 }
6673 return buf;
6674 }
6675
6676 /* This routine handles output of long unconditional branches that
6677 exceed the maximum range of a simple branch instruction. Since
6678 we don't have a register available for the branch, we save register
6679 %r1 in the frame marker, load the branch destination DEST into %r1,
6680 execute the branch, and restore %r1 in the delay slot of the branch.
6681
6682 Since long branches may have an insn in the delay slot and the
6683 delay slot is used to restore %r1, we in general need to extract
6684 this insn and execute it before the branch. However, to facilitate
6685 use of this function by conditional branches, we also provide an
6686 option to not extract the delay insn so that it will be emitted
6687 after the long branch. So, if there is an insn in the delay slot,
6688 it is extracted if XDELAY is nonzero.
6689
6690 The lengths of the various long-branch sequences are 20, 16 and 24
6691 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6692
6693 const char *
6694 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6695 {
6696 rtx xoperands[2];
6697
6698 xoperands[0] = dest;
6699
6700 /* First, free up the delay slot. */
6701 if (xdelay && dbr_sequence_length () != 0)
6702 {
6703 /* We can't handle a jump in the delay slot. */
6704 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6705
6706 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6707 optimize, 0, NULL);
6708
6709 /* Now delete the delay insn. */
6710 SET_INSN_DELETED (NEXT_INSN (insn));
6711 }
6712
6713 /* Output an insn to save %r1. The runtime documentation doesn't
6714 specify whether the "Clean Up" slot in the callers frame can
6715 be clobbered by the callee. It isn't copied by HP's builtin
6716 alloca, so this suggests that it can be clobbered if necessary.
6717 The "Static Link" location is copied by HP builtin alloca, so
6718 we avoid using it. Using the cleanup slot might be a problem
6719 if we have to interoperate with languages that pass cleanup
6720 information. However, it should be possible to handle these
6721 situations with GCC's asm feature.
6722
6723 The "Current RP" slot is reserved for the called procedure, so
6724 we try to use it when we don't have a frame of our own. It's
6725 rather unlikely that we won't have a frame when we need to emit
6726 a very long branch.
6727
6728 Really the way to go long term is a register scavenger; goto
6729 the target of the jump and find a register which we can use
6730 as a scratch to hold the value in %r1. Then, we wouldn't have
6731 to free up the delay slot or clobber a slot that may be needed
6732 for other purposes. */
6733 if (TARGET_64BIT)
6734 {
6735 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6736 /* Use the return pointer slot in the frame marker. */
6737 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6738 else
6739 /* Use the slot at -40 in the frame marker since HP builtin
6740 alloca doesn't copy it. */
6741 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6742 }
6743 else
6744 {
6745 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6746 /* Use the return pointer slot in the frame marker. */
6747 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6748 else
6749 /* Use the "Clean Up" slot in the frame marker. In GCC,
6750 the only other use of this location is for copying a
6751 floating point double argument from a floating-point
6752 register to two general registers. The copy is done
6753 as an "atomic" operation when outputting a call, so it
6754 won't interfere with our using the location here. */
6755 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6756 }
6757
6758 if (TARGET_PORTABLE_RUNTIME)
6759 {
6760 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6761 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6762 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6763 }
6764 else if (flag_pic)
6765 {
6766 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6767 if (TARGET_SOM || !TARGET_GAS)
6768 {
6769 xoperands[1] = gen_label_rtx ();
6770 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6771 targetm.asm_out.internal_label (asm_out_file, "L",
6772 CODE_LABEL_NUMBER (xoperands[1]));
6773 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6774 }
6775 else
6776 {
6777 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6778 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6779 }
6780 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6781 }
6782 else
6783 /* Now output a very long branch to the original target. */
6784 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6785
6786 /* Now restore the value of %r1 in the delay slot. */
6787 if (TARGET_64BIT)
6788 {
6789 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6790 return "ldd -16(%%r30),%%r1";
6791 else
6792 return "ldd -40(%%r30),%%r1";
6793 }
6794 else
6795 {
6796 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6797 return "ldw -20(%%r30),%%r1";
6798 else
6799 return "ldw -12(%%r30),%%r1";
6800 }
6801 }
6802
6803 /* This routine handles all the branch-on-bit conditional branch sequences we
6804 might need to generate. It handles nullification of delay slots,
6805 varying length branches, negated branches and all combinations of the
6806 above. it returns the appropriate output template to emit the branch. */
6807
6808 const char *
6809 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6810 {
6811 static char buf[100];
6812 bool useskip;
6813 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6814 int length = get_attr_length (insn);
6815 int xdelay;
6816
6817 /* A conditional branch to the following instruction (e.g. the delay slot) is
6818 asking for a disaster. I do not think this can happen as this pattern
6819 is only used when optimizing; jump optimization should eliminate the
6820 jump. But be prepared just in case. */
6821
6822 if (branch_to_delay_slot_p (insn))
6823 return "nop";
6824
6825 /* If this is a long branch with its delay slot unfilled, set `nullify'
6826 as it can nullify the delay slot and save a nop. */
6827 if (length == 8 && dbr_sequence_length () == 0)
6828 nullify = 1;
6829
6830 /* If this is a short forward conditional branch which did not get
6831 its delay slot filled, the delay slot can still be nullified. */
6832 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6833 nullify = forward_branch_p (insn);
6834
6835 /* A forward branch over a single nullified insn can be done with a
6836 extrs instruction. This avoids a single cycle penalty due to
6837 mis-predicted branch if we fall through (branch not taken). */
6838 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6839
6840 switch (length)
6841 {
6842
6843 /* All short conditional branches except backwards with an unfilled
6844 delay slot. */
6845 case 4:
6846 if (useskip)
6847 strcpy (buf, "{extrs,|extrw,s,}");
6848 else
6849 strcpy (buf, "bb,");
6850 if (useskip && GET_MODE (operands[0]) == DImode)
6851 strcpy (buf, "extrd,s,*");
6852 else if (GET_MODE (operands[0]) == DImode)
6853 strcpy (buf, "bb,*");
6854 if ((which == 0 && negated)
6855 || (which == 1 && ! negated))
6856 strcat (buf, ">=");
6857 else
6858 strcat (buf, "<");
6859 if (useskip)
6860 strcat (buf, " %0,%1,1,%%r0");
6861 else if (nullify && negated)
6862 {
6863 if (branch_needs_nop_p (insn))
6864 strcat (buf, ",n %0,%1,%3%#");
6865 else
6866 strcat (buf, ",n %0,%1,%3");
6867 }
6868 else if (nullify && ! negated)
6869 {
6870 if (branch_needs_nop_p (insn))
6871 strcat (buf, ",n %0,%1,%2%#");
6872 else
6873 strcat (buf, ",n %0,%1,%2");
6874 }
6875 else if (! nullify && negated)
6876 strcat (buf, " %0,%1,%3");
6877 else if (! nullify && ! negated)
6878 strcat (buf, " %0,%1,%2");
6879 break;
6880
6881 /* All long conditionals. Note a short backward branch with an
6882 unfilled delay slot is treated just like a long backward branch
6883 with an unfilled delay slot. */
6884 case 8:
6885 /* Handle weird backwards branch with a filled delay slot
6886 which is nullified. */
6887 if (dbr_sequence_length () != 0
6888 && ! forward_branch_p (insn)
6889 && nullify)
6890 {
6891 strcpy (buf, "bb,");
6892 if (GET_MODE (operands[0]) == DImode)
6893 strcat (buf, "*");
6894 if ((which == 0 && negated)
6895 || (which == 1 && ! negated))
6896 strcat (buf, "<");
6897 else
6898 strcat (buf, ">=");
6899 if (negated)
6900 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6901 else
6902 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6903 }
6904 /* Handle short backwards branch with an unfilled delay slot.
6905 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6906 taken and untaken branches. */
6907 else if (dbr_sequence_length () == 0
6908 && ! forward_branch_p (insn)
6909 && INSN_ADDRESSES_SET_P ()
6910 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6911 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6912 {
6913 strcpy (buf, "bb,");
6914 if (GET_MODE (operands[0]) == DImode)
6915 strcat (buf, "*");
6916 if ((which == 0 && negated)
6917 || (which == 1 && ! negated))
6918 strcat (buf, ">=");
6919 else
6920 strcat (buf, "<");
6921 if (negated)
6922 strcat (buf, " %0,%1,%3%#");
6923 else
6924 strcat (buf, " %0,%1,%2%#");
6925 }
6926 else
6927 {
6928 if (GET_MODE (operands[0]) == DImode)
6929 strcpy (buf, "extrd,s,*");
6930 else
6931 strcpy (buf, "{extrs,|extrw,s,}");
6932 if ((which == 0 && negated)
6933 || (which == 1 && ! negated))
6934 strcat (buf, "<");
6935 else
6936 strcat (buf, ">=");
6937 if (nullify && negated)
6938 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6939 else if (nullify && ! negated)
6940 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6941 else if (negated)
6942 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6943 else
6944 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6945 }
6946 break;
6947
6948 default:
6949 /* The reversed conditional branch must branch over one additional
6950 instruction if the delay slot is filled and needs to be extracted
6951 by pa_output_lbranch. If the delay slot is empty or this is a
6952 nullified forward branch, the instruction after the reversed
6953 condition branch must be nullified. */
6954 if (dbr_sequence_length () == 0
6955 || (nullify && forward_branch_p (insn)))
6956 {
6957 nullify = 1;
6958 xdelay = 0;
6959 operands[4] = GEN_INT (length);
6960 }
6961 else
6962 {
6963 xdelay = 1;
6964 operands[4] = GEN_INT (length + 4);
6965 }
6966
6967 if (GET_MODE (operands[0]) == DImode)
6968 strcpy (buf, "bb,*");
6969 else
6970 strcpy (buf, "bb,");
6971 if ((which == 0 && negated)
6972 || (which == 1 && !negated))
6973 strcat (buf, "<");
6974 else
6975 strcat (buf, ">=");
6976 if (nullify)
6977 strcat (buf, ",n %0,%1,.+%4");
6978 else
6979 strcat (buf, " %0,%1,.+%4");
6980 output_asm_insn (buf, operands);
6981 return pa_output_lbranch (negated ? operands[3] : operands[2],
6982 insn, xdelay);
6983 }
6984 return buf;
6985 }
6986
6987 /* This routine handles all the branch-on-variable-bit conditional branch
6988 sequences we might need to generate. It handles nullification of delay
6989 slots, varying length branches, negated branches and all combinations
6990 of the above. it returns the appropriate output template to emit the
6991 branch. */
6992
6993 const char *
6994 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
6995 int which)
6996 {
6997 static char buf[100];
6998 bool useskip;
6999 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7000 int length = get_attr_length (insn);
7001 int xdelay;
7002
7003 /* A conditional branch to the following instruction (e.g. the delay slot) is
7004 asking for a disaster. I do not think this can happen as this pattern
7005 is only used when optimizing; jump optimization should eliminate the
7006 jump. But be prepared just in case. */
7007
7008 if (branch_to_delay_slot_p (insn))
7009 return "nop";
7010
7011 /* If this is a long branch with its delay slot unfilled, set `nullify'
7012 as it can nullify the delay slot and save a nop. */
7013 if (length == 8 && dbr_sequence_length () == 0)
7014 nullify = 1;
7015
7016 /* If this is a short forward conditional branch which did not get
7017 its delay slot filled, the delay slot can still be nullified. */
7018 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7019 nullify = forward_branch_p (insn);
7020
7021 /* A forward branch over a single nullified insn can be done with a
7022 extrs instruction. This avoids a single cycle penalty due to
7023 mis-predicted branch if we fall through (branch not taken). */
7024 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7025
7026 switch (length)
7027 {
7028
7029 /* All short conditional branches except backwards with an unfilled
7030 delay slot. */
7031 case 4:
7032 if (useskip)
7033 strcpy (buf, "{vextrs,|extrw,s,}");
7034 else
7035 strcpy (buf, "{bvb,|bb,}");
7036 if (useskip && GET_MODE (operands[0]) == DImode)
7037 strcpy (buf, "extrd,s,*");
7038 else if (GET_MODE (operands[0]) == DImode)
7039 strcpy (buf, "bb,*");
7040 if ((which == 0 && negated)
7041 || (which == 1 && ! negated))
7042 strcat (buf, ">=");
7043 else
7044 strcat (buf, "<");
7045 if (useskip)
7046 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7047 else if (nullify && negated)
7048 {
7049 if (branch_needs_nop_p (insn))
7050 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7051 else
7052 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7053 }
7054 else if (nullify && ! negated)
7055 {
7056 if (branch_needs_nop_p (insn))
7057 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7058 else
7059 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7060 }
7061 else if (! nullify && negated)
7062 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7063 else if (! nullify && ! negated)
7064 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7065 break;
7066
7067 /* All long conditionals. Note a short backward branch with an
7068 unfilled delay slot is treated just like a long backward branch
7069 with an unfilled delay slot. */
7070 case 8:
7071 /* Handle weird backwards branch with a filled delay slot
7072 which is nullified. */
7073 if (dbr_sequence_length () != 0
7074 && ! forward_branch_p (insn)
7075 && nullify)
7076 {
7077 strcpy (buf, "{bvb,|bb,}");
7078 if (GET_MODE (operands[0]) == DImode)
7079 strcat (buf, "*");
7080 if ((which == 0 && negated)
7081 || (which == 1 && ! negated))
7082 strcat (buf, "<");
7083 else
7084 strcat (buf, ">=");
7085 if (negated)
7086 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7087 else
7088 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7089 }
7090 /* Handle short backwards branch with an unfilled delay slot.
7091 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7092 taken and untaken branches. */
7093 else if (dbr_sequence_length () == 0
7094 && ! forward_branch_p (insn)
7095 && INSN_ADDRESSES_SET_P ()
7096 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7097 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7098 {
7099 strcpy (buf, "{bvb,|bb,}");
7100 if (GET_MODE (operands[0]) == DImode)
7101 strcat (buf, "*");
7102 if ((which == 0 && negated)
7103 || (which == 1 && ! negated))
7104 strcat (buf, ">=");
7105 else
7106 strcat (buf, "<");
7107 if (negated)
7108 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7109 else
7110 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7111 }
7112 else
7113 {
7114 strcpy (buf, "{vextrs,|extrw,s,}");
7115 if (GET_MODE (operands[0]) == DImode)
7116 strcpy (buf, "extrd,s,*");
7117 if ((which == 0 && negated)
7118 || (which == 1 && ! negated))
7119 strcat (buf, "<");
7120 else
7121 strcat (buf, ">=");
7122 if (nullify && negated)
7123 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7124 else if (nullify && ! negated)
7125 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7126 else if (negated)
7127 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7128 else
7129 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7130 }
7131 break;
7132
7133 default:
7134 /* The reversed conditional branch must branch over one additional
7135 instruction if the delay slot is filled and needs to be extracted
7136 by pa_output_lbranch. If the delay slot is empty or this is a
7137 nullified forward branch, the instruction after the reversed
7138 condition branch must be nullified. */
7139 if (dbr_sequence_length () == 0
7140 || (nullify && forward_branch_p (insn)))
7141 {
7142 nullify = 1;
7143 xdelay = 0;
7144 operands[4] = GEN_INT (length);
7145 }
7146 else
7147 {
7148 xdelay = 1;
7149 operands[4] = GEN_INT (length + 4);
7150 }
7151
7152 if (GET_MODE (operands[0]) == DImode)
7153 strcpy (buf, "bb,*");
7154 else
7155 strcpy (buf, "{bvb,|bb,}");
7156 if ((which == 0 && negated)
7157 || (which == 1 && !negated))
7158 strcat (buf, "<");
7159 else
7160 strcat (buf, ">=");
7161 if (nullify)
7162 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7163 else
7164 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7165 output_asm_insn (buf, operands);
7166 return pa_output_lbranch (negated ? operands[3] : operands[2],
7167 insn, xdelay);
7168 }
7169 return buf;
7170 }
7171
7172 /* Return the output template for emitting a dbra type insn.
7173
7174 Note it may perform some output operations on its own before
7175 returning the final output string. */
7176 const char *
7177 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7178 {
7179 int length = get_attr_length (insn);
7180
7181 /* A conditional branch to the following instruction (e.g. the delay slot) is
7182 asking for a disaster. Be prepared! */
7183
7184 if (branch_to_delay_slot_p (insn))
7185 {
7186 if (which_alternative == 0)
7187 return "ldo %1(%0),%0";
7188 else if (which_alternative == 1)
7189 {
7190 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7191 output_asm_insn ("ldw -16(%%r30),%4", operands);
7192 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7193 return "{fldws|fldw} -16(%%r30),%0";
7194 }
7195 else
7196 {
7197 output_asm_insn ("ldw %0,%4", operands);
7198 return "ldo %1(%4),%4\n\tstw %4,%0";
7199 }
7200 }
7201
7202 if (which_alternative == 0)
7203 {
7204 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7205 int xdelay;
7206
7207 /* If this is a long branch with its delay slot unfilled, set `nullify'
7208 as it can nullify the delay slot and save a nop. */
7209 if (length == 8 && dbr_sequence_length () == 0)
7210 nullify = 1;
7211
7212 /* If this is a short forward conditional branch which did not get
7213 its delay slot filled, the delay slot can still be nullified. */
7214 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7215 nullify = forward_branch_p (insn);
7216
7217 switch (length)
7218 {
7219 case 4:
7220 if (nullify)
7221 {
7222 if (branch_needs_nop_p (insn))
7223 return "addib,%C2,n %1,%0,%3%#";
7224 else
7225 return "addib,%C2,n %1,%0,%3";
7226 }
7227 else
7228 return "addib,%C2 %1,%0,%3";
7229
7230 case 8:
7231 /* Handle weird backwards branch with a fulled delay slot
7232 which is nullified. */
7233 if (dbr_sequence_length () != 0
7234 && ! forward_branch_p (insn)
7235 && nullify)
7236 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7237 /* Handle short backwards branch with an unfilled delay slot.
7238 Using a addb;nop rather than addi;bl saves 1 cycle for both
7239 taken and untaken branches. */
7240 else if (dbr_sequence_length () == 0
7241 && ! forward_branch_p (insn)
7242 && INSN_ADDRESSES_SET_P ()
7243 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7244 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7245 return "addib,%C2 %1,%0,%3%#";
7246
7247 /* Handle normal cases. */
7248 if (nullify)
7249 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7250 else
7251 return "addi,%N2 %1,%0,%0\n\tb %3";
7252
7253 default:
7254 /* The reversed conditional branch must branch over one additional
7255 instruction if the delay slot is filled and needs to be extracted
7256 by pa_output_lbranch. If the delay slot is empty or this is a
7257 nullified forward branch, the instruction after the reversed
7258 condition branch must be nullified. */
7259 if (dbr_sequence_length () == 0
7260 || (nullify && forward_branch_p (insn)))
7261 {
7262 nullify = 1;
7263 xdelay = 0;
7264 operands[4] = GEN_INT (length);
7265 }
7266 else
7267 {
7268 xdelay = 1;
7269 operands[4] = GEN_INT (length + 4);
7270 }
7271
7272 if (nullify)
7273 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7274 else
7275 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7276
7277 return pa_output_lbranch (operands[3], insn, xdelay);
7278 }
7279
7280 }
7281 /* Deal with gross reload from FP register case. */
7282 else if (which_alternative == 1)
7283 {
7284 /* Move loop counter from FP register to MEM then into a GR,
7285 increment the GR, store the GR into MEM, and finally reload
7286 the FP register from MEM from within the branch's delay slot. */
7287 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7288 operands);
7289 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7290 if (length == 24)
7291 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7292 else if (length == 28)
7293 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7294 else
7295 {
7296 operands[5] = GEN_INT (length - 16);
7297 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7298 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7299 return pa_output_lbranch (operands[3], insn, 0);
7300 }
7301 }
7302 /* Deal with gross reload from memory case. */
7303 else
7304 {
7305 /* Reload loop counter from memory, the store back to memory
7306 happens in the branch's delay slot. */
7307 output_asm_insn ("ldw %0,%4", operands);
7308 if (length == 12)
7309 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7310 else if (length == 16)
7311 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7312 else
7313 {
7314 operands[5] = GEN_INT (length - 4);
7315 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7316 return pa_output_lbranch (operands[3], insn, 0);
7317 }
7318 }
7319 }
7320
7321 /* Return the output template for emitting a movb type insn.
7322
7323 Note it may perform some output operations on its own before
7324 returning the final output string. */
7325 const char *
7326 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7327 int reverse_comparison)
7328 {
7329 int length = get_attr_length (insn);
7330
7331 /* A conditional branch to the following instruction (e.g. the delay slot) is
7332 asking for a disaster. Be prepared! */
7333
7334 if (branch_to_delay_slot_p (insn))
7335 {
7336 if (which_alternative == 0)
7337 return "copy %1,%0";
7338 else if (which_alternative == 1)
7339 {
7340 output_asm_insn ("stw %1,-16(%%r30)", operands);
7341 return "{fldws|fldw} -16(%%r30),%0";
7342 }
7343 else if (which_alternative == 2)
7344 return "stw %1,%0";
7345 else
7346 return "mtsar %r1";
7347 }
7348
7349 /* Support the second variant. */
7350 if (reverse_comparison)
7351 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7352
7353 if (which_alternative == 0)
7354 {
7355 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7356 int xdelay;
7357
7358 /* If this is a long branch with its delay slot unfilled, set `nullify'
7359 as it can nullify the delay slot and save a nop. */
7360 if (length == 8 && dbr_sequence_length () == 0)
7361 nullify = 1;
7362
7363 /* If this is a short forward conditional branch which did not get
7364 its delay slot filled, the delay slot can still be nullified. */
7365 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7366 nullify = forward_branch_p (insn);
7367
7368 switch (length)
7369 {
7370 case 4:
7371 if (nullify)
7372 {
7373 if (branch_needs_nop_p (insn))
7374 return "movb,%C2,n %1,%0,%3%#";
7375 else
7376 return "movb,%C2,n %1,%0,%3";
7377 }
7378 else
7379 return "movb,%C2 %1,%0,%3";
7380
7381 case 8:
7382 /* Handle weird backwards branch with a filled delay slot
7383 which is nullified. */
7384 if (dbr_sequence_length () != 0
7385 && ! forward_branch_p (insn)
7386 && nullify)
7387 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7388
7389 /* Handle short backwards branch with an unfilled delay slot.
7390 Using a movb;nop rather than or;bl saves 1 cycle for both
7391 taken and untaken branches. */
7392 else if (dbr_sequence_length () == 0
7393 && ! forward_branch_p (insn)
7394 && INSN_ADDRESSES_SET_P ()
7395 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7396 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7397 return "movb,%C2 %1,%0,%3%#";
7398 /* Handle normal cases. */
7399 if (nullify)
7400 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7401 else
7402 return "or,%N2 %1,%%r0,%0\n\tb %3";
7403
7404 default:
7405 /* The reversed conditional branch must branch over one additional
7406 instruction if the delay slot is filled and needs to be extracted
7407 by pa_output_lbranch. If the delay slot is empty or this is a
7408 nullified forward branch, the instruction after the reversed
7409 condition branch must be nullified. */
7410 if (dbr_sequence_length () == 0
7411 || (nullify && forward_branch_p (insn)))
7412 {
7413 nullify = 1;
7414 xdelay = 0;
7415 operands[4] = GEN_INT (length);
7416 }
7417 else
7418 {
7419 xdelay = 1;
7420 operands[4] = GEN_INT (length + 4);
7421 }
7422
7423 if (nullify)
7424 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7425 else
7426 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7427
7428 return pa_output_lbranch (operands[3], insn, xdelay);
7429 }
7430 }
7431 /* Deal with gross reload for FP destination register case. */
7432 else if (which_alternative == 1)
7433 {
7434 /* Move source register to MEM, perform the branch test, then
7435 finally load the FP register from MEM from within the branch's
7436 delay slot. */
7437 output_asm_insn ("stw %1,-16(%%r30)", operands);
7438 if (length == 12)
7439 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7440 else if (length == 16)
7441 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7442 else
7443 {
7444 operands[4] = GEN_INT (length - 4);
7445 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7446 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7447 return pa_output_lbranch (operands[3], insn, 0);
7448 }
7449 }
7450 /* Deal with gross reload from memory case. */
7451 else if (which_alternative == 2)
7452 {
7453 /* Reload loop counter from memory, the store back to memory
7454 happens in the branch's delay slot. */
7455 if (length == 8)
7456 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7457 else if (length == 12)
7458 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7459 else
7460 {
7461 operands[4] = GEN_INT (length);
7462 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7463 operands);
7464 return pa_output_lbranch (operands[3], insn, 0);
7465 }
7466 }
7467 /* Handle SAR as a destination. */
7468 else
7469 {
7470 if (length == 8)
7471 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7472 else if (length == 12)
7473 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7474 else
7475 {
7476 operands[4] = GEN_INT (length);
7477 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7478 operands);
7479 return pa_output_lbranch (operands[3], insn, 0);
7480 }
7481 }
7482 }
7483
7484 /* Copy any FP arguments in INSN into integer registers. */
7485 static void
7486 copy_fp_args (rtx_insn *insn)
7487 {
7488 rtx link;
7489 rtx xoperands[2];
7490
7491 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7492 {
7493 int arg_mode, regno;
7494 rtx use = XEXP (link, 0);
7495
7496 if (! (GET_CODE (use) == USE
7497 && GET_CODE (XEXP (use, 0)) == REG
7498 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7499 continue;
7500
7501 arg_mode = GET_MODE (XEXP (use, 0));
7502 regno = REGNO (XEXP (use, 0));
7503
7504 /* Is it a floating point register? */
7505 if (regno >= 32 && regno <= 39)
7506 {
7507 /* Copy the FP register into an integer register via memory. */
7508 if (arg_mode == SFmode)
7509 {
7510 xoperands[0] = XEXP (use, 0);
7511 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7512 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7513 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7514 }
7515 else
7516 {
7517 xoperands[0] = XEXP (use, 0);
7518 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7519 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7520 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7521 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7522 }
7523 }
7524 }
7525 }
7526
7527 /* Compute length of the FP argument copy sequence for INSN. */
7528 static int
7529 length_fp_args (rtx_insn *insn)
7530 {
7531 int length = 0;
7532 rtx link;
7533
7534 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7535 {
7536 int arg_mode, regno;
7537 rtx use = XEXP (link, 0);
7538
7539 if (! (GET_CODE (use) == USE
7540 && GET_CODE (XEXP (use, 0)) == REG
7541 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7542 continue;
7543
7544 arg_mode = GET_MODE (XEXP (use, 0));
7545 regno = REGNO (XEXP (use, 0));
7546
7547 /* Is it a floating point register? */
7548 if (regno >= 32 && regno <= 39)
7549 {
7550 if (arg_mode == SFmode)
7551 length += 8;
7552 else
7553 length += 12;
7554 }
7555 }
7556
7557 return length;
7558 }
7559
7560 /* Return the attribute length for the millicode call instruction INSN.
7561 The length must match the code generated by pa_output_millicode_call.
7562 We include the delay slot in the returned length as it is better to
7563 over estimate the length than to under estimate it. */
7564
7565 int
7566 pa_attr_length_millicode_call (rtx_insn *insn)
7567 {
7568 unsigned long distance = -1;
7569 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7570
7571 if (INSN_ADDRESSES_SET_P ())
7572 {
7573 distance = (total + insn_current_reference_address (insn));
7574 if (distance < total)
7575 distance = -1;
7576 }
7577
7578 if (TARGET_64BIT)
7579 {
7580 if (!TARGET_LONG_CALLS && distance < 7600000)
7581 return 8;
7582
7583 return 20;
7584 }
7585 else if (TARGET_PORTABLE_RUNTIME)
7586 return 24;
7587 else
7588 {
7589 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7590 return 8;
7591
7592 if (!flag_pic)
7593 return 12;
7594
7595 return 24;
7596 }
7597 }
7598
7599 /* INSN is a function call.
7600
7601 CALL_DEST is the routine we are calling. */
7602
7603 const char *
7604 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7605 {
7606 int attr_length = get_attr_length (insn);
7607 int seq_length = dbr_sequence_length ();
7608 rtx xoperands[3];
7609
7610 xoperands[0] = call_dest;
7611 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7612
7613 /* Handle the common case where we are sure that the branch will
7614 reach the beginning of the $CODE$ subspace. The within reach
7615 form of the $$sh_func_adrs call has a length of 28. Because it
7616 has an attribute type of sh_func_adrs, it never has a nonzero
7617 sequence length (i.e., the delay slot is never filled). */
7618 if (!TARGET_LONG_CALLS
7619 && (attr_length == 8
7620 || (attr_length == 28
7621 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7622 {
7623 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7624 }
7625 else
7626 {
7627 if (TARGET_64BIT)
7628 {
7629 /* It might seem that one insn could be saved by accessing
7630 the millicode function using the linkage table. However,
7631 this doesn't work in shared libraries and other dynamically
7632 loaded objects. Using a pc-relative sequence also avoids
7633 problems related to the implicit use of the gp register. */
7634 output_asm_insn ("b,l .+8,%%r1", xoperands);
7635
7636 if (TARGET_GAS)
7637 {
7638 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7639 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7640 }
7641 else
7642 {
7643 xoperands[1] = gen_label_rtx ();
7644 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7645 targetm.asm_out.internal_label (asm_out_file, "L",
7646 CODE_LABEL_NUMBER (xoperands[1]));
7647 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7648 }
7649
7650 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7651 }
7652 else if (TARGET_PORTABLE_RUNTIME)
7653 {
7654 /* Pure portable runtime doesn't allow be/ble; we also don't
7655 have PIC support in the assembler/linker, so this sequence
7656 is needed. */
7657
7658 /* Get the address of our target into %r1. */
7659 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7660 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7661
7662 /* Get our return address into %r31. */
7663 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7664 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7665
7666 /* Jump to our target address in %r1. */
7667 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7668 }
7669 else if (!flag_pic)
7670 {
7671 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7672 if (TARGET_PA_20)
7673 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7674 else
7675 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7676 }
7677 else
7678 {
7679 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7680 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7681
7682 if (TARGET_SOM || !TARGET_GAS)
7683 {
7684 /* The HP assembler can generate relocations for the
7685 difference of two symbols. GAS can do this for a
7686 millicode symbol but not an arbitrary external
7687 symbol when generating SOM output. */
7688 xoperands[1] = gen_label_rtx ();
7689 targetm.asm_out.internal_label (asm_out_file, "L",
7690 CODE_LABEL_NUMBER (xoperands[1]));
7691 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7692 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7693 }
7694 else
7695 {
7696 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7697 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7698 xoperands);
7699 }
7700
7701 /* Jump to our target address in %r1. */
7702 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7703 }
7704 }
7705
7706 if (seq_length == 0)
7707 output_asm_insn ("nop", xoperands);
7708
7709 return "";
7710 }
7711
7712 /* Return the attribute length of the call instruction INSN. The SIBCALL
7713 flag indicates whether INSN is a regular call or a sibling call. The
7714 length returned must be longer than the code actually generated by
7715 pa_output_call. Since branch shortening is done before delay branch
7716 sequencing, there is no way to determine whether or not the delay
7717 slot will be filled during branch shortening. Even when the delay
7718 slot is filled, we may have to add a nop if the delay slot contains
7719 a branch that can't reach its target. Thus, we always have to include
7720 the delay slot in the length estimate. This used to be done in
7721 pa_adjust_insn_length but we do it here now as some sequences always
7722 fill the delay slot and we can save four bytes in the estimate for
7723 these sequences. */
7724
7725 int
7726 pa_attr_length_call (rtx_insn *insn, int sibcall)
7727 {
7728 int local_call;
7729 rtx call, call_dest;
7730 tree call_decl;
7731 int length = 0;
7732 rtx pat = PATTERN (insn);
7733 unsigned long distance = -1;
7734
7735 gcc_assert (CALL_P (insn));
7736
7737 if (INSN_ADDRESSES_SET_P ())
7738 {
7739 unsigned long total;
7740
7741 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7742 distance = (total + insn_current_reference_address (insn));
7743 if (distance < total)
7744 distance = -1;
7745 }
7746
7747 gcc_assert (GET_CODE (pat) == PARALLEL);
7748
7749 /* Get the call rtx. */
7750 call = XVECEXP (pat, 0, 0);
7751 if (GET_CODE (call) == SET)
7752 call = SET_SRC (call);
7753
7754 gcc_assert (GET_CODE (call) == CALL);
7755
7756 /* Determine if this is a local call. */
7757 call_dest = XEXP (XEXP (call, 0), 0);
7758 call_decl = SYMBOL_REF_DECL (call_dest);
7759 local_call = call_decl && targetm.binds_local_p (call_decl);
7760
7761 /* pc-relative branch. */
7762 if (!TARGET_LONG_CALLS
7763 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7764 || distance < MAX_PCREL17F_OFFSET))
7765 length += 8;
7766
7767 /* 64-bit plabel sequence. */
7768 else if (TARGET_64BIT && !local_call)
7769 length += sibcall ? 28 : 24;
7770
7771 /* non-pic long absolute branch sequence. */
7772 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7773 length += 12;
7774
7775 /* long pc-relative branch sequence. */
7776 else if (TARGET_LONG_PIC_SDIFF_CALL
7777 || (TARGET_GAS && !TARGET_SOM
7778 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7779 {
7780 length += 20;
7781
7782 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7783 length += 8;
7784 }
7785
7786 /* 32-bit plabel sequence. */
7787 else
7788 {
7789 length += 32;
7790
7791 if (TARGET_SOM)
7792 length += length_fp_args (insn);
7793
7794 if (flag_pic)
7795 length += 4;
7796
7797 if (!TARGET_PA_20)
7798 {
7799 if (!sibcall)
7800 length += 8;
7801
7802 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7803 length += 8;
7804 }
7805 }
7806
7807 return length;
7808 }
7809
7810 /* INSN is a function call.
7811
7812 CALL_DEST is the routine we are calling. */
7813
7814 const char *
7815 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7816 {
7817 int seq_length = dbr_sequence_length ();
7818 tree call_decl = SYMBOL_REF_DECL (call_dest);
7819 int local_call = call_decl && targetm.binds_local_p (call_decl);
7820 rtx xoperands[2];
7821
7822 xoperands[0] = call_dest;
7823
7824 /* Handle the common case where we're sure that the branch will reach
7825 the beginning of the "$CODE$" subspace. This is the beginning of
7826 the current function if we are in a named section. */
7827 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7828 {
7829 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7830 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7831 }
7832 else
7833 {
7834 if (TARGET_64BIT && !local_call)
7835 {
7836 /* ??? As far as I can tell, the HP linker doesn't support the
7837 long pc-relative sequence described in the 64-bit runtime
7838 architecture. So, we use a slightly longer indirect call. */
7839 xoperands[0] = pa_get_deferred_plabel (call_dest);
7840 xoperands[1] = gen_label_rtx ();
7841
7842 /* If this isn't a sibcall, we put the load of %r27 into the
7843 delay slot. We can't do this in a sibcall as we don't
7844 have a second call-clobbered scratch register available.
7845 We don't need to do anything when generating fast indirect
7846 calls. */
7847 if (seq_length != 0 && !sibcall)
7848 {
7849 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7850 optimize, 0, NULL);
7851
7852 /* Now delete the delay insn. */
7853 SET_INSN_DELETED (NEXT_INSN (insn));
7854 seq_length = 0;
7855 }
7856
7857 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7858 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7859 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7860
7861 if (sibcall)
7862 {
7863 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7864 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7865 output_asm_insn ("bve (%%r1)", xoperands);
7866 }
7867 else
7868 {
7869 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7870 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7871 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7872 seq_length = 1;
7873 }
7874 }
7875 else
7876 {
7877 int indirect_call = 0;
7878
7879 /* Emit a long call. There are several different sequences
7880 of increasing length and complexity. In most cases,
7881 they don't allow an instruction in the delay slot. */
7882 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7883 && !TARGET_LONG_PIC_SDIFF_CALL
7884 && !(TARGET_GAS && !TARGET_SOM
7885 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7886 && !TARGET_64BIT)
7887 indirect_call = 1;
7888
7889 if (seq_length != 0
7890 && !sibcall
7891 && (!TARGET_PA_20
7892 || indirect_call
7893 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7894 {
7895 /* A non-jump insn in the delay slot. By definition we can
7896 emit this insn before the call (and in fact before argument
7897 relocating. */
7898 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7899 NULL);
7900
7901 /* Now delete the delay insn. */
7902 SET_INSN_DELETED (NEXT_INSN (insn));
7903 seq_length = 0;
7904 }
7905
7906 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7907 {
7908 /* This is the best sequence for making long calls in
7909 non-pic code. Unfortunately, GNU ld doesn't provide
7910 the stub needed for external calls, and GAS's support
7911 for this with the SOM linker is buggy. It is safe
7912 to use this for local calls. */
7913 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7914 if (sibcall)
7915 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7916 else
7917 {
7918 if (TARGET_PA_20)
7919 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7920 xoperands);
7921 else
7922 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7923
7924 output_asm_insn ("copy %%r31,%%r2", xoperands);
7925 seq_length = 1;
7926 }
7927 }
7928 else
7929 {
7930 if (TARGET_LONG_PIC_SDIFF_CALL)
7931 {
7932 /* The HP assembler and linker can handle relocations
7933 for the difference of two symbols. The HP assembler
7934 recognizes the sequence as a pc-relative call and
7935 the linker provides stubs when needed. */
7936 xoperands[1] = gen_label_rtx ();
7937 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7938 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7939 targetm.asm_out.internal_label (asm_out_file, "L",
7940 CODE_LABEL_NUMBER (xoperands[1]));
7941 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7942 }
7943 else if (TARGET_GAS && !TARGET_SOM
7944 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7945 {
7946 /* GAS currently can't generate the relocations that
7947 are needed for the SOM linker under HP-UX using this
7948 sequence. The GNU linker doesn't generate the stubs
7949 that are needed for external calls on TARGET_ELF32
7950 with this sequence. For now, we have to use a
7951 longer plabel sequence when using GAS. */
7952 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7953 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7954 xoperands);
7955 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7956 xoperands);
7957 }
7958 else
7959 {
7960 /* Emit a long plabel-based call sequence. This is
7961 essentially an inline implementation of $$dyncall.
7962 We don't actually try to call $$dyncall as this is
7963 as difficult as calling the function itself. */
7964 xoperands[0] = pa_get_deferred_plabel (call_dest);
7965 xoperands[1] = gen_label_rtx ();
7966
7967 /* Since the call is indirect, FP arguments in registers
7968 need to be copied to the general registers. Then, the
7969 argument relocation stub will copy them back. */
7970 if (TARGET_SOM)
7971 copy_fp_args (insn);
7972
7973 if (flag_pic)
7974 {
7975 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7976 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7977 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7978 }
7979 else
7980 {
7981 output_asm_insn ("addil LR'%0-$global$,%%r27",
7982 xoperands);
7983 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7984 xoperands);
7985 }
7986
7987 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7988 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7989 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7990 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7991
7992 if (!sibcall && !TARGET_PA_20)
7993 {
7994 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7995 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7996 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7997 else
7998 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7999 }
8000 }
8001
8002 if (TARGET_PA_20)
8003 {
8004 if (sibcall)
8005 output_asm_insn ("bve (%%r1)", xoperands);
8006 else
8007 {
8008 if (indirect_call)
8009 {
8010 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8011 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8012 seq_length = 1;
8013 }
8014 else
8015 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8016 }
8017 }
8018 else
8019 {
8020 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8021 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8022 xoperands);
8023
8024 if (sibcall)
8025 {
8026 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8027 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8028 else
8029 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8030 }
8031 else
8032 {
8033 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8034 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8035 else
8036 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8037
8038 if (indirect_call)
8039 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8040 else
8041 output_asm_insn ("copy %%r31,%%r2", xoperands);
8042 seq_length = 1;
8043 }
8044 }
8045 }
8046 }
8047 }
8048
8049 if (seq_length == 0)
8050 output_asm_insn ("nop", xoperands);
8051
8052 return "";
8053 }
8054
8055 /* Return the attribute length of the indirect call instruction INSN.
8056 The length must match the code generated by output_indirect call.
8057 The returned length includes the delay slot. Currently, the delay
8058 slot of an indirect call sequence is not exposed and it is used by
8059 the sequence itself. */
8060
8061 int
8062 pa_attr_length_indirect_call (rtx_insn *insn)
8063 {
8064 unsigned long distance = -1;
8065 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8066
8067 if (INSN_ADDRESSES_SET_P ())
8068 {
8069 distance = (total + insn_current_reference_address (insn));
8070 if (distance < total)
8071 distance = -1;
8072 }
8073
8074 if (TARGET_64BIT)
8075 return 12;
8076
8077 if (TARGET_FAST_INDIRECT_CALLS
8078 || (!TARGET_LONG_CALLS
8079 && !TARGET_PORTABLE_RUNTIME
8080 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8081 || distance < MAX_PCREL17F_OFFSET)))
8082 return 8;
8083
8084 if (flag_pic)
8085 return 20;
8086
8087 if (TARGET_PORTABLE_RUNTIME)
8088 return 16;
8089
8090 /* Out of reach, can use ble. */
8091 return 12;
8092 }
8093
8094 const char *
8095 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8096 {
8097 rtx xoperands[1];
8098
8099 if (TARGET_64BIT)
8100 {
8101 xoperands[0] = call_dest;
8102 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8103 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8104 return "";
8105 }
8106
8107 /* First the special case for kernels, level 0 systems, etc. */
8108 if (TARGET_FAST_INDIRECT_CALLS)
8109 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8110
8111 /* Now the normal case -- we can reach $$dyncall directly or
8112 we're sure that we can get there via a long-branch stub.
8113
8114 No need to check target flags as the length uniquely identifies
8115 the remaining cases. */
8116 if (pa_attr_length_indirect_call (insn) == 8)
8117 {
8118 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8119 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8120 variant of the B,L instruction can't be used on the SOM target. */
8121 if (TARGET_PA_20 && !TARGET_SOM)
8122 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8123 else
8124 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8125 }
8126
8127 /* Long millicode call, but we are not generating PIC or portable runtime
8128 code. */
8129 if (pa_attr_length_indirect_call (insn) == 12)
8130 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8131
8132 /* Long millicode call for portable runtime. */
8133 if (pa_attr_length_indirect_call (insn) == 16)
8134 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8135
8136 /* We need a long PIC call to $$dyncall. */
8137 xoperands[0] = NULL_RTX;
8138 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8139 if (TARGET_SOM || !TARGET_GAS)
8140 {
8141 xoperands[0] = gen_label_rtx ();
8142 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
8143 targetm.asm_out.internal_label (asm_out_file, "L",
8144 CODE_LABEL_NUMBER (xoperands[0]));
8145 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8146 }
8147 else
8148 {
8149 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
8150 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8151 xoperands);
8152 }
8153 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8154 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
8155 return "";
8156 }
8157
8158 /* In HPUX 8.0's shared library scheme, special relocations are needed
8159 for function labels if they might be passed to a function
8160 in a shared library (because shared libraries don't live in code
8161 space), and special magic is needed to construct their address. */
8162
8163 void
8164 pa_encode_label (rtx sym)
8165 {
8166 const char *str = XSTR (sym, 0);
8167 int len = strlen (str) + 1;
8168 char *newstr, *p;
8169
8170 p = newstr = XALLOCAVEC (char, len + 1);
8171 *p++ = '@';
8172 strcpy (p, str);
8173
8174 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8175 }
8176
8177 static void
8178 pa_encode_section_info (tree decl, rtx rtl, int first)
8179 {
8180 int old_referenced = 0;
8181
8182 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8183 old_referenced
8184 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8185
8186 default_encode_section_info (decl, rtl, first);
8187
8188 if (first && TEXT_SPACE_P (decl))
8189 {
8190 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8191 if (TREE_CODE (decl) == FUNCTION_DECL)
8192 pa_encode_label (XEXP (rtl, 0));
8193 }
8194 else if (old_referenced)
8195 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8196 }
8197
8198 /* This is sort of inverse to pa_encode_section_info. */
8199
8200 static const char *
8201 pa_strip_name_encoding (const char *str)
8202 {
8203 str += (*str == '@');
8204 str += (*str == '*');
8205 return str;
8206 }
8207
8208 /* Returns 1 if OP is a function label involved in a simple addition
8209 with a constant. Used to keep certain patterns from matching
8210 during instruction combination. */
8211 int
8212 pa_is_function_label_plus_const (rtx op)
8213 {
8214 /* Strip off any CONST. */
8215 if (GET_CODE (op) == CONST)
8216 op = XEXP (op, 0);
8217
8218 return (GET_CODE (op) == PLUS
8219 && function_label_operand (XEXP (op, 0), VOIDmode)
8220 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8221 }
8222
8223 /* Output assembly code for a thunk to FUNCTION. */
8224
8225 static void
8226 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8227 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8228 tree function)
8229 {
8230 static unsigned int current_thunk_number;
8231 int val_14 = VAL_14_BITS_P (delta);
8232 unsigned int old_last_address = last_address, nbytes = 0;
8233 char label[16];
8234 rtx xoperands[4];
8235
8236 xoperands[0] = XEXP (DECL_RTL (function), 0);
8237 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8238 xoperands[2] = GEN_INT (delta);
8239
8240 final_start_function (emit_barrier (), file, 1);
8241
8242 /* Output the thunk. We know that the function is in the same
8243 translation unit (i.e., the same space) as the thunk, and that
8244 thunks are output after their method. Thus, we don't need an
8245 external branch to reach the function. With SOM and GAS,
8246 functions and thunks are effectively in different sections.
8247 Thus, we can always use a IA-relative branch and the linker
8248 will add a long branch stub if necessary.
8249
8250 However, we have to be careful when generating PIC code on the
8251 SOM port to ensure that the sequence does not transfer to an
8252 import stub for the target function as this could clobber the
8253 return value saved at SP-24. This would also apply to the
8254 32-bit linux port if the multi-space model is implemented. */
8255 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8256 && !(flag_pic && TREE_PUBLIC (function))
8257 && (TARGET_GAS || last_address < 262132))
8258 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8259 && ((targetm_common.have_named_sections
8260 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8261 /* The GNU 64-bit linker has rather poor stub management.
8262 So, we use a long branch from thunks that aren't in
8263 the same section as the target function. */
8264 && ((!TARGET_64BIT
8265 && (DECL_SECTION_NAME (thunk_fndecl)
8266 != DECL_SECTION_NAME (function)))
8267 || ((DECL_SECTION_NAME (thunk_fndecl)
8268 == DECL_SECTION_NAME (function))
8269 && last_address < 262132)))
8270 /* In this case, we need to be able to reach the start of
8271 the stub table even though the function is likely closer
8272 and can be jumped to directly. */
8273 || (targetm_common.have_named_sections
8274 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8275 && DECL_SECTION_NAME (function) == NULL
8276 && total_code_bytes < MAX_PCREL17F_OFFSET)
8277 /* Likewise. */
8278 || (!targetm_common.have_named_sections
8279 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8280 {
8281 if (!val_14)
8282 output_asm_insn ("addil L'%2,%%r26", xoperands);
8283
8284 output_asm_insn ("b %0", xoperands);
8285
8286 if (val_14)
8287 {
8288 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8289 nbytes += 8;
8290 }
8291 else
8292 {
8293 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8294 nbytes += 12;
8295 }
8296 }
8297 else if (TARGET_64BIT)
8298 {
8299 /* We only have one call-clobbered scratch register, so we can't
8300 make use of the delay slot if delta doesn't fit in 14 bits. */
8301 if (!val_14)
8302 {
8303 output_asm_insn ("addil L'%2,%%r26", xoperands);
8304 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8305 }
8306
8307 output_asm_insn ("b,l .+8,%%r1", xoperands);
8308
8309 if (TARGET_GAS)
8310 {
8311 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8312 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8313 }
8314 else
8315 {
8316 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8317 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8318 }
8319
8320 if (val_14)
8321 {
8322 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8323 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8324 nbytes += 20;
8325 }
8326 else
8327 {
8328 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8329 nbytes += 24;
8330 }
8331 }
8332 else if (TARGET_PORTABLE_RUNTIME)
8333 {
8334 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8335 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8336
8337 if (!val_14)
8338 output_asm_insn ("addil L'%2,%%r26", xoperands);
8339
8340 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8341
8342 if (val_14)
8343 {
8344 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8345 nbytes += 16;
8346 }
8347 else
8348 {
8349 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8350 nbytes += 20;
8351 }
8352 }
8353 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8354 {
8355 /* The function is accessible from outside this module. The only
8356 way to avoid an import stub between the thunk and function is to
8357 call the function directly with an indirect sequence similar to
8358 that used by $$dyncall. This is possible because $$dyncall acts
8359 as the import stub in an indirect call. */
8360 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8361 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8362 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8363 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8364 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8365 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8366 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8367 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8368 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8369
8370 if (!val_14)
8371 {
8372 output_asm_insn ("addil L'%2,%%r26", xoperands);
8373 nbytes += 4;
8374 }
8375
8376 if (TARGET_PA_20)
8377 {
8378 output_asm_insn ("bve (%%r22)", xoperands);
8379 nbytes += 36;
8380 }
8381 else if (TARGET_NO_SPACE_REGS)
8382 {
8383 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8384 nbytes += 36;
8385 }
8386 else
8387 {
8388 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8389 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8390 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8391 nbytes += 44;
8392 }
8393
8394 if (val_14)
8395 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8396 else
8397 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8398 }
8399 else if (flag_pic)
8400 {
8401 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8402
8403 if (TARGET_SOM || !TARGET_GAS)
8404 {
8405 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8406 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8407 }
8408 else
8409 {
8410 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8411 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8412 }
8413
8414 if (!val_14)
8415 output_asm_insn ("addil L'%2,%%r26", xoperands);
8416
8417 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8418
8419 if (val_14)
8420 {
8421 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8422 nbytes += 20;
8423 }
8424 else
8425 {
8426 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8427 nbytes += 24;
8428 }
8429 }
8430 else
8431 {
8432 if (!val_14)
8433 output_asm_insn ("addil L'%2,%%r26", xoperands);
8434
8435 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8436 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8437
8438 if (val_14)
8439 {
8440 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8441 nbytes += 12;
8442 }
8443 else
8444 {
8445 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8446 nbytes += 16;
8447 }
8448 }
8449
8450 final_end_function ();
8451
8452 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8453 {
8454 switch_to_section (data_section);
8455 output_asm_insn (".align 4", xoperands);
8456 ASM_OUTPUT_LABEL (file, label);
8457 output_asm_insn (".word P'%0", xoperands);
8458 }
8459
8460 current_thunk_number++;
8461 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8462 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8463 last_address += nbytes;
8464 if (old_last_address > last_address)
8465 last_address = UINT_MAX;
8466 update_total_code_bytes (nbytes);
8467 }
8468
8469 /* Only direct calls to static functions are allowed to be sibling (tail)
8470 call optimized.
8471
8472 This restriction is necessary because some linker generated stubs will
8473 store return pointers into rp' in some cases which might clobber a
8474 live value already in rp'.
8475
8476 In a sibcall the current function and the target function share stack
8477 space. Thus if the path to the current function and the path to the
8478 target function save a value in rp', they save the value into the
8479 same stack slot, which has undesirable consequences.
8480
8481 Because of the deferred binding nature of shared libraries any function
8482 with external scope could be in a different load module and thus require
8483 rp' to be saved when calling that function. So sibcall optimizations
8484 can only be safe for static function.
8485
8486 Note that GCC never needs return value relocations, so we don't have to
8487 worry about static calls with return value relocations (which require
8488 saving rp').
8489
8490 It is safe to perform a sibcall optimization when the target function
8491 will never return. */
8492 static bool
8493 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8494 {
8495 if (TARGET_PORTABLE_RUNTIME)
8496 return false;
8497
8498 /* Sibcalls are not ok because the arg pointer register is not a fixed
8499 register. This prevents the sibcall optimization from occurring. In
8500 addition, there are problems with stub placement using GNU ld. This
8501 is because a normal sibcall branch uses a 17-bit relocation while
8502 a regular call branch uses a 22-bit relocation. As a result, more
8503 care needs to be taken in the placement of long-branch stubs. */
8504 if (TARGET_64BIT)
8505 return false;
8506
8507 /* Sibcalls are only ok within a translation unit. */
8508 return (decl && !TREE_PUBLIC (decl));
8509 }
8510
8511 /* ??? Addition is not commutative on the PA due to the weird implicit
8512 space register selection rules for memory addresses. Therefore, we
8513 don't consider a + b == b + a, as this might be inside a MEM. */
8514 static bool
8515 pa_commutative_p (const_rtx x, int outer_code)
8516 {
8517 return (COMMUTATIVE_P (x)
8518 && (TARGET_NO_SPACE_REGS
8519 || (outer_code != UNKNOWN && outer_code != MEM)
8520 || GET_CODE (x) != PLUS));
8521 }
8522
8523 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8524 use in fmpyadd instructions. */
8525 int
8526 pa_fmpyaddoperands (rtx *operands)
8527 {
8528 machine_mode mode = GET_MODE (operands[0]);
8529
8530 /* Must be a floating point mode. */
8531 if (mode != SFmode && mode != DFmode)
8532 return 0;
8533
8534 /* All modes must be the same. */
8535 if (! (mode == GET_MODE (operands[1])
8536 && mode == GET_MODE (operands[2])
8537 && mode == GET_MODE (operands[3])
8538 && mode == GET_MODE (operands[4])
8539 && mode == GET_MODE (operands[5])))
8540 return 0;
8541
8542 /* All operands must be registers. */
8543 if (! (GET_CODE (operands[1]) == REG
8544 && GET_CODE (operands[2]) == REG
8545 && GET_CODE (operands[3]) == REG
8546 && GET_CODE (operands[4]) == REG
8547 && GET_CODE (operands[5]) == REG))
8548 return 0;
8549
8550 /* Only 2 real operands to the addition. One of the input operands must
8551 be the same as the output operand. */
8552 if (! rtx_equal_p (operands[3], operands[4])
8553 && ! rtx_equal_p (operands[3], operands[5]))
8554 return 0;
8555
8556 /* Inout operand of add cannot conflict with any operands from multiply. */
8557 if (rtx_equal_p (operands[3], operands[0])
8558 || rtx_equal_p (operands[3], operands[1])
8559 || rtx_equal_p (operands[3], operands[2]))
8560 return 0;
8561
8562 /* multiply cannot feed into addition operands. */
8563 if (rtx_equal_p (operands[4], operands[0])
8564 || rtx_equal_p (operands[5], operands[0]))
8565 return 0;
8566
8567 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8568 if (mode == SFmode
8569 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8570 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8571 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8572 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8573 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8574 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8575 return 0;
8576
8577 /* Passed. Operands are suitable for fmpyadd. */
8578 return 1;
8579 }
8580
8581 #if !defined(USE_COLLECT2)
8582 static void
8583 pa_asm_out_constructor (rtx symbol, int priority)
8584 {
8585 if (!function_label_operand (symbol, VOIDmode))
8586 pa_encode_label (symbol);
8587
8588 #ifdef CTORS_SECTION_ASM_OP
8589 default_ctor_section_asm_out_constructor (symbol, priority);
8590 #else
8591 # ifdef TARGET_ASM_NAMED_SECTION
8592 default_named_section_asm_out_constructor (symbol, priority);
8593 # else
8594 default_stabs_asm_out_constructor (symbol, priority);
8595 # endif
8596 #endif
8597 }
8598
8599 static void
8600 pa_asm_out_destructor (rtx symbol, int priority)
8601 {
8602 if (!function_label_operand (symbol, VOIDmode))
8603 pa_encode_label (symbol);
8604
8605 #ifdef DTORS_SECTION_ASM_OP
8606 default_dtor_section_asm_out_destructor (symbol, priority);
8607 #else
8608 # ifdef TARGET_ASM_NAMED_SECTION
8609 default_named_section_asm_out_destructor (symbol, priority);
8610 # else
8611 default_stabs_asm_out_destructor (symbol, priority);
8612 # endif
8613 #endif
8614 }
8615 #endif
8616
8617 /* This function places uninitialized global data in the bss section.
8618 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8619 function on the SOM port to prevent uninitialized global data from
8620 being placed in the data section. */
8621
8622 void
8623 pa_asm_output_aligned_bss (FILE *stream,
8624 const char *name,
8625 unsigned HOST_WIDE_INT size,
8626 unsigned int align)
8627 {
8628 switch_to_section (bss_section);
8629 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8630
8631 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8632 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8633 #endif
8634
8635 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8636 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8637 #endif
8638
8639 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8640 ASM_OUTPUT_LABEL (stream, name);
8641 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8642 }
8643
8644 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8645 that doesn't allow the alignment of global common storage to be directly
8646 specified. The SOM linker aligns common storage based on the rounded
8647 value of the NUM_BYTES parameter in the .comm directive. It's not
8648 possible to use the .align directive as it doesn't affect the alignment
8649 of the label associated with a .comm directive. */
8650
8651 void
8652 pa_asm_output_aligned_common (FILE *stream,
8653 const char *name,
8654 unsigned HOST_WIDE_INT size,
8655 unsigned int align)
8656 {
8657 unsigned int max_common_align;
8658
8659 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8660 if (align > max_common_align)
8661 {
8662 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8663 "for global common data. Using %u",
8664 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8665 align = max_common_align;
8666 }
8667
8668 switch_to_section (bss_section);
8669
8670 assemble_name (stream, name);
8671 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8672 MAX (size, align / BITS_PER_UNIT));
8673 }
8674
8675 /* We can't use .comm for local common storage as the SOM linker effectively
8676 treats the symbol as universal and uses the same storage for local symbols
8677 with the same name in different object files. The .block directive
8678 reserves an uninitialized block of storage. However, it's not common
8679 storage. Fortunately, GCC never requests common storage with the same
8680 name in any given translation unit. */
8681
8682 void
8683 pa_asm_output_aligned_local (FILE *stream,
8684 const char *name,
8685 unsigned HOST_WIDE_INT size,
8686 unsigned int align)
8687 {
8688 switch_to_section (bss_section);
8689 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8690
8691 #ifdef LOCAL_ASM_OP
8692 fprintf (stream, "%s", LOCAL_ASM_OP);
8693 assemble_name (stream, name);
8694 fprintf (stream, "\n");
8695 #endif
8696
8697 ASM_OUTPUT_LABEL (stream, name);
8698 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8699 }
8700
8701 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8702 use in fmpysub instructions. */
8703 int
8704 pa_fmpysuboperands (rtx *operands)
8705 {
8706 machine_mode mode = GET_MODE (operands[0]);
8707
8708 /* Must be a floating point mode. */
8709 if (mode != SFmode && mode != DFmode)
8710 return 0;
8711
8712 /* All modes must be the same. */
8713 if (! (mode == GET_MODE (operands[1])
8714 && mode == GET_MODE (operands[2])
8715 && mode == GET_MODE (operands[3])
8716 && mode == GET_MODE (operands[4])
8717 && mode == GET_MODE (operands[5])))
8718 return 0;
8719
8720 /* All operands must be registers. */
8721 if (! (GET_CODE (operands[1]) == REG
8722 && GET_CODE (operands[2]) == REG
8723 && GET_CODE (operands[3]) == REG
8724 && GET_CODE (operands[4]) == REG
8725 && GET_CODE (operands[5]) == REG))
8726 return 0;
8727
8728 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8729 operation, so operands[4] must be the same as operand[3]. */
8730 if (! rtx_equal_p (operands[3], operands[4]))
8731 return 0;
8732
8733 /* multiply cannot feed into subtraction. */
8734 if (rtx_equal_p (operands[5], operands[0]))
8735 return 0;
8736
8737 /* Inout operand of sub cannot conflict with any operands from multiply. */
8738 if (rtx_equal_p (operands[3], operands[0])
8739 || rtx_equal_p (operands[3], operands[1])
8740 || rtx_equal_p (operands[3], operands[2]))
8741 return 0;
8742
8743 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8744 if (mode == SFmode
8745 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8746 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8747 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8748 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8749 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8750 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8751 return 0;
8752
8753 /* Passed. Operands are suitable for fmpysub. */
8754 return 1;
8755 }
8756
8757 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8758 constants for a MULT embedded inside a memory address. */
8759 int
8760 pa_mem_shadd_constant_p (int val)
8761 {
8762 if (val == 2 || val == 4 || val == 8)
8763 return 1;
8764 else
8765 return 0;
8766 }
8767
8768 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8769 constants for shadd instructions. */
8770 int
8771 pa_shadd_constant_p (int val)
8772 {
8773 if (val == 1 || val == 2 || val == 3)
8774 return 1;
8775 else
8776 return 0;
8777 }
8778
8779 /* Return TRUE if INSN branches forward. */
8780
8781 static bool
8782 forward_branch_p (rtx_insn *insn)
8783 {
8784 rtx lab = JUMP_LABEL (insn);
8785
8786 /* The INSN must have a jump label. */
8787 gcc_assert (lab != NULL_RTX);
8788
8789 if (INSN_ADDRESSES_SET_P ())
8790 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8791
8792 while (insn)
8793 {
8794 if (insn == lab)
8795 return true;
8796 else
8797 insn = NEXT_INSN (insn);
8798 }
8799
8800 return false;
8801 }
8802
8803 /* Output an unconditional move and branch insn. */
8804
8805 const char *
8806 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8807 {
8808 int length = get_attr_length (insn);
8809
8810 /* These are the cases in which we win. */
8811 if (length == 4)
8812 return "mov%I1b,tr %1,%0,%2";
8813
8814 /* None of the following cases win, but they don't lose either. */
8815 if (length == 8)
8816 {
8817 if (dbr_sequence_length () == 0)
8818 {
8819 /* Nothing in the delay slot, fake it by putting the combined
8820 insn (the copy or add) in the delay slot of a bl. */
8821 if (GET_CODE (operands[1]) == CONST_INT)
8822 return "b %2\n\tldi %1,%0";
8823 else
8824 return "b %2\n\tcopy %1,%0";
8825 }
8826 else
8827 {
8828 /* Something in the delay slot, but we've got a long branch. */
8829 if (GET_CODE (operands[1]) == CONST_INT)
8830 return "ldi %1,%0\n\tb %2";
8831 else
8832 return "copy %1,%0\n\tb %2";
8833 }
8834 }
8835
8836 if (GET_CODE (operands[1]) == CONST_INT)
8837 output_asm_insn ("ldi %1,%0", operands);
8838 else
8839 output_asm_insn ("copy %1,%0", operands);
8840 return pa_output_lbranch (operands[2], insn, 1);
8841 }
8842
8843 /* Output an unconditional add and branch insn. */
8844
8845 const char *
8846 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8847 {
8848 int length = get_attr_length (insn);
8849
8850 /* To make life easy we want operand0 to be the shared input/output
8851 operand and operand1 to be the readonly operand. */
8852 if (operands[0] == operands[1])
8853 operands[1] = operands[2];
8854
8855 /* These are the cases in which we win. */
8856 if (length == 4)
8857 return "add%I1b,tr %1,%0,%3";
8858
8859 /* None of the following cases win, but they don't lose either. */
8860 if (length == 8)
8861 {
8862 if (dbr_sequence_length () == 0)
8863 /* Nothing in the delay slot, fake it by putting the combined
8864 insn (the copy or add) in the delay slot of a bl. */
8865 return "b %3\n\tadd%I1 %1,%0,%0";
8866 else
8867 /* Something in the delay slot, but we've got a long branch. */
8868 return "add%I1 %1,%0,%0\n\tb %3";
8869 }
8870
8871 output_asm_insn ("add%I1 %1,%0,%0", operands);
8872 return pa_output_lbranch (operands[3], insn, 1);
8873 }
8874
8875 /* We use this hook to perform a PA specific optimization which is difficult
8876 to do in earlier passes. */
8877
8878 static void
8879 pa_reorg (void)
8880 {
8881 remove_useless_addtr_insns (1);
8882
8883 if (pa_cpu < PROCESSOR_8000)
8884 pa_combine_instructions ();
8885 }
8886
8887 /* The PA has a number of odd instructions which can perform multiple
8888 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8889 it may be profitable to combine two instructions into one instruction
8890 with two outputs. It's not profitable PA2.0 machines because the
8891 two outputs would take two slots in the reorder buffers.
8892
8893 This routine finds instructions which can be combined and combines
8894 them. We only support some of the potential combinations, and we
8895 only try common ways to find suitable instructions.
8896
8897 * addb can add two registers or a register and a small integer
8898 and jump to a nearby (+-8k) location. Normally the jump to the
8899 nearby location is conditional on the result of the add, but by
8900 using the "true" condition we can make the jump unconditional.
8901 Thus addb can perform two independent operations in one insn.
8902
8903 * movb is similar to addb in that it can perform a reg->reg
8904 or small immediate->reg copy and jump to a nearby (+-8k location).
8905
8906 * fmpyadd and fmpysub can perform a FP multiply and either an
8907 FP add or FP sub if the operands of the multiply and add/sub are
8908 independent (there are other minor restrictions). Note both
8909 the fmpy and fadd/fsub can in theory move to better spots according
8910 to data dependencies, but for now we require the fmpy stay at a
8911 fixed location.
8912
8913 * Many of the memory operations can perform pre & post updates
8914 of index registers. GCC's pre/post increment/decrement addressing
8915 is far too simple to take advantage of all the possibilities. This
8916 pass may not be suitable since those insns may not be independent.
8917
8918 * comclr can compare two ints or an int and a register, nullify
8919 the following instruction and zero some other register. This
8920 is more difficult to use as it's harder to find an insn which
8921 will generate a comclr than finding something like an unconditional
8922 branch. (conditional moves & long branches create comclr insns).
8923
8924 * Most arithmetic operations can conditionally skip the next
8925 instruction. They can be viewed as "perform this operation
8926 and conditionally jump to this nearby location" (where nearby
8927 is an insns away). These are difficult to use due to the
8928 branch length restrictions. */
8929
8930 static void
8931 pa_combine_instructions (void)
8932 {
8933 rtx_insn *anchor;
8934
8935 /* This can get expensive since the basic algorithm is on the
8936 order of O(n^2) (or worse). Only do it for -O2 or higher
8937 levels of optimization. */
8938 if (optimize < 2)
8939 return;
8940
8941 /* Walk down the list of insns looking for "anchor" insns which
8942 may be combined with "floating" insns. As the name implies,
8943 "anchor" instructions don't move, while "floating" insns may
8944 move around. */
8945 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8946 rtx_insn *new_rtx = make_insn_raw (par);
8947
8948 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8949 {
8950 enum attr_pa_combine_type anchor_attr;
8951 enum attr_pa_combine_type floater_attr;
8952
8953 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8954 Also ignore any special USE insns. */
8955 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
8956 || GET_CODE (PATTERN (anchor)) == USE
8957 || GET_CODE (PATTERN (anchor)) == CLOBBER)
8958 continue;
8959
8960 anchor_attr = get_attr_pa_combine_type (anchor);
8961 /* See if anchor is an insn suitable for combination. */
8962 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8963 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8964 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8965 && ! forward_branch_p (anchor)))
8966 {
8967 rtx_insn *floater;
8968
8969 for (floater = PREV_INSN (anchor);
8970 floater;
8971 floater = PREV_INSN (floater))
8972 {
8973 if (NOTE_P (floater)
8974 || (NONJUMP_INSN_P (floater)
8975 && (GET_CODE (PATTERN (floater)) == USE
8976 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8977 continue;
8978
8979 /* Anything except a regular INSN will stop our search. */
8980 if (! NONJUMP_INSN_P (floater))
8981 {
8982 floater = NULL;
8983 break;
8984 }
8985
8986 /* See if FLOATER is suitable for combination with the
8987 anchor. */
8988 floater_attr = get_attr_pa_combine_type (floater);
8989 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8990 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8991 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8992 && floater_attr == PA_COMBINE_TYPE_FMPY))
8993 {
8994 /* If ANCHOR and FLOATER can be combined, then we're
8995 done with this pass. */
8996 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
8997 SET_DEST (PATTERN (floater)),
8998 XEXP (SET_SRC (PATTERN (floater)), 0),
8999 XEXP (SET_SRC (PATTERN (floater)), 1)))
9000 break;
9001 }
9002
9003 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9004 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9005 {
9006 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9007 {
9008 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9009 SET_DEST (PATTERN (floater)),
9010 XEXP (SET_SRC (PATTERN (floater)), 0),
9011 XEXP (SET_SRC (PATTERN (floater)), 1)))
9012 break;
9013 }
9014 else
9015 {
9016 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9017 SET_DEST (PATTERN (floater)),
9018 SET_SRC (PATTERN (floater)),
9019 SET_SRC (PATTERN (floater))))
9020 break;
9021 }
9022 }
9023 }
9024
9025 /* If we didn't find anything on the backwards scan try forwards. */
9026 if (!floater
9027 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9028 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9029 {
9030 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9031 {
9032 if (NOTE_P (floater)
9033 || (NONJUMP_INSN_P (floater)
9034 && (GET_CODE (PATTERN (floater)) == USE
9035 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9036
9037 continue;
9038
9039 /* Anything except a regular INSN will stop our search. */
9040 if (! NONJUMP_INSN_P (floater))
9041 {
9042 floater = NULL;
9043 break;
9044 }
9045
9046 /* See if FLOATER is suitable for combination with the
9047 anchor. */
9048 floater_attr = get_attr_pa_combine_type (floater);
9049 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9050 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9051 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9052 && floater_attr == PA_COMBINE_TYPE_FMPY))
9053 {
9054 /* If ANCHOR and FLOATER can be combined, then we're
9055 done with this pass. */
9056 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9057 SET_DEST (PATTERN (floater)),
9058 XEXP (SET_SRC (PATTERN (floater)),
9059 0),
9060 XEXP (SET_SRC (PATTERN (floater)),
9061 1)))
9062 break;
9063 }
9064 }
9065 }
9066
9067 /* FLOATER will be nonzero if we found a suitable floating
9068 insn for combination with ANCHOR. */
9069 if (floater
9070 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9071 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9072 {
9073 /* Emit the new instruction and delete the old anchor. */
9074 emit_insn_before (gen_rtx_PARALLEL
9075 (VOIDmode,
9076 gen_rtvec (2, PATTERN (anchor),
9077 PATTERN (floater))),
9078 anchor);
9079
9080 SET_INSN_DELETED (anchor);
9081
9082 /* Emit a special USE insn for FLOATER, then delete
9083 the floating insn. */
9084 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9085 delete_insn (floater);
9086
9087 continue;
9088 }
9089 else if (floater
9090 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9091 {
9092 rtx temp;
9093 /* Emit the new_jump instruction and delete the old anchor. */
9094 temp
9095 = emit_jump_insn_before (gen_rtx_PARALLEL
9096 (VOIDmode,
9097 gen_rtvec (2, PATTERN (anchor),
9098 PATTERN (floater))),
9099 anchor);
9100
9101 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9102 SET_INSN_DELETED (anchor);
9103
9104 /* Emit a special USE insn for FLOATER, then delete
9105 the floating insn. */
9106 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9107 delete_insn (floater);
9108 continue;
9109 }
9110 }
9111 }
9112 }
9113
9114 static int
9115 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9116 int reversed, rtx dest,
9117 rtx src1, rtx src2)
9118 {
9119 int insn_code_number;
9120 rtx_insn *start, *end;
9121
9122 /* Create a PARALLEL with the patterns of ANCHOR and
9123 FLOATER, try to recognize it, then test constraints
9124 for the resulting pattern.
9125
9126 If the pattern doesn't match or the constraints
9127 aren't met keep searching for a suitable floater
9128 insn. */
9129 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9130 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9131 INSN_CODE (new_rtx) = -1;
9132 insn_code_number = recog_memoized (new_rtx);
9133 basic_block bb = BLOCK_FOR_INSN (anchor);
9134 if (insn_code_number < 0
9135 || (extract_insn (new_rtx),
9136 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9137 return 0;
9138
9139 if (reversed)
9140 {
9141 start = anchor;
9142 end = floater;
9143 }
9144 else
9145 {
9146 start = floater;
9147 end = anchor;
9148 }
9149
9150 /* There's up to three operands to consider. One
9151 output and two inputs.
9152
9153 The output must not be used between FLOATER & ANCHOR
9154 exclusive. The inputs must not be set between
9155 FLOATER and ANCHOR exclusive. */
9156
9157 if (reg_used_between_p (dest, start, end))
9158 return 0;
9159
9160 if (reg_set_between_p (src1, start, end))
9161 return 0;
9162
9163 if (reg_set_between_p (src2, start, end))
9164 return 0;
9165
9166 /* If we get here, then everything is good. */
9167 return 1;
9168 }
9169
9170 /* Return nonzero if references for INSN are delayed.
9171
9172 Millicode insns are actually function calls with some special
9173 constraints on arguments and register usage.
9174
9175 Millicode calls always expect their arguments in the integer argument
9176 registers, and always return their result in %r29 (ret1). They
9177 are expected to clobber their arguments, %r1, %r29, and the return
9178 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9179
9180 This function tells reorg that the references to arguments and
9181 millicode calls do not appear to happen until after the millicode call.
9182 This allows reorg to put insns which set the argument registers into the
9183 delay slot of the millicode call -- thus they act more like traditional
9184 CALL_INSNs.
9185
9186 Note we cannot consider side effects of the insn to be delayed because
9187 the branch and link insn will clobber the return pointer. If we happened
9188 to use the return pointer in the delay slot of the call, then we lose.
9189
9190 get_attr_type will try to recognize the given insn, so make sure to
9191 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9192 in particular. */
9193 int
9194 pa_insn_refs_are_delayed (rtx_insn *insn)
9195 {
9196 return ((NONJUMP_INSN_P (insn)
9197 && GET_CODE (PATTERN (insn)) != SEQUENCE
9198 && GET_CODE (PATTERN (insn)) != USE
9199 && GET_CODE (PATTERN (insn)) != CLOBBER
9200 && get_attr_type (insn) == TYPE_MILLI));
9201 }
9202
9203 /* Promote the return value, but not the arguments. */
9204
9205 static machine_mode
9206 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9207 machine_mode mode,
9208 int *punsignedp ATTRIBUTE_UNUSED,
9209 const_tree fntype ATTRIBUTE_UNUSED,
9210 int for_return)
9211 {
9212 if (for_return == 0)
9213 return mode;
9214 return promote_mode (type, mode, punsignedp);
9215 }
9216
9217 /* On the HP-PA the value is found in register(s) 28(-29), unless
9218 the mode is SF or DF. Then the value is returned in fr4 (32).
9219
9220 This must perform the same promotions as PROMOTE_MODE, else promoting
9221 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9222
9223 Small structures must be returned in a PARALLEL on PA64 in order
9224 to match the HP Compiler ABI. */
9225
9226 static rtx
9227 pa_function_value (const_tree valtype,
9228 const_tree func ATTRIBUTE_UNUSED,
9229 bool outgoing ATTRIBUTE_UNUSED)
9230 {
9231 machine_mode valmode;
9232
9233 if (AGGREGATE_TYPE_P (valtype)
9234 || TREE_CODE (valtype) == COMPLEX_TYPE
9235 || TREE_CODE (valtype) == VECTOR_TYPE)
9236 {
9237 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9238
9239 /* Handle aggregates that fit exactly in a word or double word. */
9240 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9241 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9242
9243 if (TARGET_64BIT)
9244 {
9245 /* Aggregates with a size less than or equal to 128 bits are
9246 returned in GR 28(-29). They are left justified. The pad
9247 bits are undefined. Larger aggregates are returned in
9248 memory. */
9249 rtx loc[2];
9250 int i, offset = 0;
9251 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9252
9253 for (i = 0; i < ub; i++)
9254 {
9255 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9256 gen_rtx_REG (DImode, 28 + i),
9257 GEN_INT (offset));
9258 offset += 8;
9259 }
9260
9261 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9262 }
9263 else if (valsize > UNITS_PER_WORD)
9264 {
9265 /* Aggregates 5 to 8 bytes in size are returned in general
9266 registers r28-r29 in the same manner as other non
9267 floating-point objects. The data is right-justified and
9268 zero-extended to 64 bits. This is opposite to the normal
9269 justification used on big endian targets and requires
9270 special treatment. */
9271 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9272 gen_rtx_REG (DImode, 28), const0_rtx);
9273 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9274 }
9275 }
9276
9277 if ((INTEGRAL_TYPE_P (valtype)
9278 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9279 || POINTER_TYPE_P (valtype))
9280 valmode = word_mode;
9281 else
9282 valmode = TYPE_MODE (valtype);
9283
9284 if (TREE_CODE (valtype) == REAL_TYPE
9285 && !AGGREGATE_TYPE_P (valtype)
9286 && TYPE_MODE (valtype) != TFmode
9287 && !TARGET_SOFT_FLOAT)
9288 return gen_rtx_REG (valmode, 32);
9289
9290 return gen_rtx_REG (valmode, 28);
9291 }
9292
9293 /* Implement the TARGET_LIBCALL_VALUE hook. */
9294
9295 static rtx
9296 pa_libcall_value (machine_mode mode,
9297 const_rtx fun ATTRIBUTE_UNUSED)
9298 {
9299 if (! TARGET_SOFT_FLOAT
9300 && (mode == SFmode || mode == DFmode))
9301 return gen_rtx_REG (mode, 32);
9302 else
9303 return gen_rtx_REG (mode, 28);
9304 }
9305
9306 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9307
9308 static bool
9309 pa_function_value_regno_p (const unsigned int regno)
9310 {
9311 if (regno == 28
9312 || (! TARGET_SOFT_FLOAT && regno == 32))
9313 return true;
9314
9315 return false;
9316 }
9317
9318 /* Update the data in CUM to advance over an argument
9319 of mode MODE and data type TYPE.
9320 (TYPE is null for libcalls where that information may not be available.) */
9321
9322 static void
9323 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9324 const_tree type, bool named ATTRIBUTE_UNUSED)
9325 {
9326 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9327 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9328
9329 cum->nargs_prototype--;
9330 cum->words += (arg_size
9331 + ((cum->words & 01)
9332 && type != NULL_TREE
9333 && arg_size > 1));
9334 }
9335
9336 /* Return the location of a parameter that is passed in a register or NULL
9337 if the parameter has any component that is passed in memory.
9338
9339 This is new code and will be pushed to into the net sources after
9340 further testing.
9341
9342 ??? We might want to restructure this so that it looks more like other
9343 ports. */
9344 static rtx
9345 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9346 const_tree type, bool named ATTRIBUTE_UNUSED)
9347 {
9348 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9349 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9350 int alignment = 0;
9351 int arg_size;
9352 int fpr_reg_base;
9353 int gpr_reg_base;
9354 rtx retval;
9355
9356 if (mode == VOIDmode)
9357 return NULL_RTX;
9358
9359 arg_size = FUNCTION_ARG_SIZE (mode, type);
9360
9361 /* If this arg would be passed partially or totally on the stack, then
9362 this routine should return zero. pa_arg_partial_bytes will
9363 handle arguments which are split between regs and stack slots if
9364 the ABI mandates split arguments. */
9365 if (!TARGET_64BIT)
9366 {
9367 /* The 32-bit ABI does not split arguments. */
9368 if (cum->words + arg_size > max_arg_words)
9369 return NULL_RTX;
9370 }
9371 else
9372 {
9373 if (arg_size > 1)
9374 alignment = cum->words & 1;
9375 if (cum->words + alignment >= max_arg_words)
9376 return NULL_RTX;
9377 }
9378
9379 /* The 32bit ABIs and the 64bit ABIs are rather different,
9380 particularly in their handling of FP registers. We might
9381 be able to cleverly share code between them, but I'm not
9382 going to bother in the hope that splitting them up results
9383 in code that is more easily understood. */
9384
9385 if (TARGET_64BIT)
9386 {
9387 /* Advance the base registers to their current locations.
9388
9389 Remember, gprs grow towards smaller register numbers while
9390 fprs grow to higher register numbers. Also remember that
9391 although FP regs are 32-bit addressable, we pretend that
9392 the registers are 64-bits wide. */
9393 gpr_reg_base = 26 - cum->words;
9394 fpr_reg_base = 32 + cum->words;
9395
9396 /* Arguments wider than one word and small aggregates need special
9397 treatment. */
9398 if (arg_size > 1
9399 || mode == BLKmode
9400 || (type && (AGGREGATE_TYPE_P (type)
9401 || TREE_CODE (type) == COMPLEX_TYPE
9402 || TREE_CODE (type) == VECTOR_TYPE)))
9403 {
9404 /* Double-extended precision (80-bit), quad-precision (128-bit)
9405 and aggregates including complex numbers are aligned on
9406 128-bit boundaries. The first eight 64-bit argument slots
9407 are associated one-to-one, with general registers r26
9408 through r19, and also with floating-point registers fr4
9409 through fr11. Arguments larger than one word are always
9410 passed in general registers.
9411
9412 Using a PARALLEL with a word mode register results in left
9413 justified data on a big-endian target. */
9414
9415 rtx loc[8];
9416 int i, offset = 0, ub = arg_size;
9417
9418 /* Align the base register. */
9419 gpr_reg_base -= alignment;
9420
9421 ub = MIN (ub, max_arg_words - cum->words - alignment);
9422 for (i = 0; i < ub; i++)
9423 {
9424 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9425 gen_rtx_REG (DImode, gpr_reg_base),
9426 GEN_INT (offset));
9427 gpr_reg_base -= 1;
9428 offset += 8;
9429 }
9430
9431 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9432 }
9433 }
9434 else
9435 {
9436 /* If the argument is larger than a word, then we know precisely
9437 which registers we must use. */
9438 if (arg_size > 1)
9439 {
9440 if (cum->words)
9441 {
9442 gpr_reg_base = 23;
9443 fpr_reg_base = 38;
9444 }
9445 else
9446 {
9447 gpr_reg_base = 25;
9448 fpr_reg_base = 34;
9449 }
9450
9451 /* Structures 5 to 8 bytes in size are passed in the general
9452 registers in the same manner as other non floating-point
9453 objects. The data is right-justified and zero-extended
9454 to 64 bits. This is opposite to the normal justification
9455 used on big endian targets and requires special treatment.
9456 We now define BLOCK_REG_PADDING to pad these objects.
9457 Aggregates, complex and vector types are passed in the same
9458 manner as structures. */
9459 if (mode == BLKmode
9460 || (type && (AGGREGATE_TYPE_P (type)
9461 || TREE_CODE (type) == COMPLEX_TYPE
9462 || TREE_CODE (type) == VECTOR_TYPE)))
9463 {
9464 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9465 gen_rtx_REG (DImode, gpr_reg_base),
9466 const0_rtx);
9467 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9468 }
9469 }
9470 else
9471 {
9472 /* We have a single word (32 bits). A simple computation
9473 will get us the register #s we need. */
9474 gpr_reg_base = 26 - cum->words;
9475 fpr_reg_base = 32 + 2 * cum->words;
9476 }
9477 }
9478
9479 /* Determine if the argument needs to be passed in both general and
9480 floating point registers. */
9481 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9482 /* If we are doing soft-float with portable runtime, then there
9483 is no need to worry about FP regs. */
9484 && !TARGET_SOFT_FLOAT
9485 /* The parameter must be some kind of scalar float, else we just
9486 pass it in integer registers. */
9487 && GET_MODE_CLASS (mode) == MODE_FLOAT
9488 /* The target function must not have a prototype. */
9489 && cum->nargs_prototype <= 0
9490 /* libcalls do not need to pass items in both FP and general
9491 registers. */
9492 && type != NULL_TREE
9493 /* All this hair applies to "outgoing" args only. This includes
9494 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9495 && !cum->incoming)
9496 /* Also pass outgoing floating arguments in both registers in indirect
9497 calls with the 32 bit ABI and the HP assembler since there is no
9498 way to the specify argument locations in static functions. */
9499 || (!TARGET_64BIT
9500 && !TARGET_GAS
9501 && !cum->incoming
9502 && cum->indirect
9503 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9504 {
9505 retval
9506 = gen_rtx_PARALLEL
9507 (mode,
9508 gen_rtvec (2,
9509 gen_rtx_EXPR_LIST (VOIDmode,
9510 gen_rtx_REG (mode, fpr_reg_base),
9511 const0_rtx),
9512 gen_rtx_EXPR_LIST (VOIDmode,
9513 gen_rtx_REG (mode, gpr_reg_base),
9514 const0_rtx)));
9515 }
9516 else
9517 {
9518 /* See if we should pass this parameter in a general register. */
9519 if (TARGET_SOFT_FLOAT
9520 /* Indirect calls in the normal 32bit ABI require all arguments
9521 to be passed in general registers. */
9522 || (!TARGET_PORTABLE_RUNTIME
9523 && !TARGET_64BIT
9524 && !TARGET_ELF32
9525 && cum->indirect)
9526 /* If the parameter is not a scalar floating-point parameter,
9527 then it belongs in GPRs. */
9528 || GET_MODE_CLASS (mode) != MODE_FLOAT
9529 /* Structure with single SFmode field belongs in GPR. */
9530 || (type && AGGREGATE_TYPE_P (type)))
9531 retval = gen_rtx_REG (mode, gpr_reg_base);
9532 else
9533 retval = gen_rtx_REG (mode, fpr_reg_base);
9534 }
9535 return retval;
9536 }
9537
9538 /* Arguments larger than one word are double word aligned. */
9539
9540 static unsigned int
9541 pa_function_arg_boundary (machine_mode mode, const_tree type)
9542 {
9543 bool singleword = (type
9544 ? (integer_zerop (TYPE_SIZE (type))
9545 || !TREE_CONSTANT (TYPE_SIZE (type))
9546 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9547 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9548
9549 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9550 }
9551
9552 /* If this arg would be passed totally in registers or totally on the stack,
9553 then this routine should return zero. */
9554
9555 static int
9556 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9557 tree type, bool named ATTRIBUTE_UNUSED)
9558 {
9559 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9560 unsigned int max_arg_words = 8;
9561 unsigned int offset = 0;
9562
9563 if (!TARGET_64BIT)
9564 return 0;
9565
9566 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9567 offset = 1;
9568
9569 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9570 /* Arg fits fully into registers. */
9571 return 0;
9572 else if (cum->words + offset >= max_arg_words)
9573 /* Arg fully on the stack. */
9574 return 0;
9575 else
9576 /* Arg is split. */
9577 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9578 }
9579
9580
9581 /* A get_unnamed_section callback for switching to the text section.
9582
9583 This function is only used with SOM. Because we don't support
9584 named subspaces, we can only create a new subspace or switch back
9585 to the default text subspace. */
9586
9587 static void
9588 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9589 {
9590 gcc_assert (TARGET_SOM);
9591 if (TARGET_GAS)
9592 {
9593 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9594 {
9595 /* We only want to emit a .nsubspa directive once at the
9596 start of the function. */
9597 cfun->machine->in_nsubspa = 1;
9598
9599 /* Create a new subspace for the text. This provides
9600 better stub placement and one-only functions. */
9601 if (cfun->decl
9602 && DECL_ONE_ONLY (cfun->decl)
9603 && !DECL_WEAK (cfun->decl))
9604 {
9605 output_section_asm_op ("\t.SPACE $TEXT$\n"
9606 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9607 "ACCESS=44,SORT=24,COMDAT");
9608 return;
9609 }
9610 }
9611 else
9612 {
9613 /* There isn't a current function or the body of the current
9614 function has been completed. So, we are changing to the
9615 text section to output debugging information. Thus, we
9616 need to forget that we are in the text section so that
9617 varasm.c will call us when text_section is selected again. */
9618 gcc_assert (!cfun || !cfun->machine
9619 || cfun->machine->in_nsubspa == 2);
9620 in_section = NULL;
9621 }
9622 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9623 return;
9624 }
9625 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9626 }
9627
9628 /* A get_unnamed_section callback for switching to comdat data
9629 sections. This function is only used with SOM. */
9630
9631 static void
9632 som_output_comdat_data_section_asm_op (const void *data)
9633 {
9634 in_section = NULL;
9635 output_section_asm_op (data);
9636 }
9637
9638 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9639
9640 static void
9641 pa_som_asm_init_sections (void)
9642 {
9643 text_section
9644 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9645
9646 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9647 is not being generated. */
9648 som_readonly_data_section
9649 = get_unnamed_section (0, output_section_asm_op,
9650 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9651
9652 /* When secondary definitions are not supported, SOM makes readonly
9653 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9654 the comdat flag. */
9655 som_one_only_readonly_data_section
9656 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9657 "\t.SPACE $TEXT$\n"
9658 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9659 "ACCESS=0x2c,SORT=16,COMDAT");
9660
9661
9662 /* When secondary definitions are not supported, SOM makes data one-only
9663 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9664 som_one_only_data_section
9665 = get_unnamed_section (SECTION_WRITE,
9666 som_output_comdat_data_section_asm_op,
9667 "\t.SPACE $PRIVATE$\n"
9668 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9669 "ACCESS=31,SORT=24,COMDAT");
9670
9671 if (flag_tm)
9672 som_tm_clone_table_section
9673 = get_unnamed_section (0, output_section_asm_op,
9674 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9675
9676 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9677 which reference data within the $TEXT$ space (for example constant
9678 strings in the $LIT$ subspace).
9679
9680 The assemblers (GAS and HP as) both have problems with handling
9681 the difference of two symbols which is the other correct way to
9682 reference constant data during PIC code generation.
9683
9684 So, there's no way to reference constant data which is in the
9685 $TEXT$ space during PIC generation. Instead place all constant
9686 data into the $PRIVATE$ subspace (this reduces sharing, but it
9687 works correctly). */
9688 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9689
9690 /* We must not have a reference to an external symbol defined in a
9691 shared library in a readonly section, else the SOM linker will
9692 complain.
9693
9694 So, we force exception information into the data section. */
9695 exception_section = data_section;
9696 }
9697
9698 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9699
9700 static section *
9701 pa_som_tm_clone_table_section (void)
9702 {
9703 return som_tm_clone_table_section;
9704 }
9705
9706 /* On hpux10, the linker will give an error if we have a reference
9707 in the read-only data section to a symbol defined in a shared
9708 library. Therefore, expressions that might require a reloc can
9709 not be placed in the read-only data section. */
9710
9711 static section *
9712 pa_select_section (tree exp, int reloc,
9713 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9714 {
9715 if (TREE_CODE (exp) == VAR_DECL
9716 && TREE_READONLY (exp)
9717 && !TREE_THIS_VOLATILE (exp)
9718 && DECL_INITIAL (exp)
9719 && (DECL_INITIAL (exp) == error_mark_node
9720 || TREE_CONSTANT (DECL_INITIAL (exp)))
9721 && !reloc)
9722 {
9723 if (TARGET_SOM
9724 && DECL_ONE_ONLY (exp)
9725 && !DECL_WEAK (exp))
9726 return som_one_only_readonly_data_section;
9727 else
9728 return readonly_data_section;
9729 }
9730 else if (CONSTANT_CLASS_P (exp) && !reloc)
9731 return readonly_data_section;
9732 else if (TARGET_SOM
9733 && TREE_CODE (exp) == VAR_DECL
9734 && DECL_ONE_ONLY (exp)
9735 && !DECL_WEAK (exp))
9736 return som_one_only_data_section;
9737 else
9738 return data_section;
9739 }
9740
9741 /* Implement pa_reloc_rw_mask. */
9742
9743 static int
9744 pa_reloc_rw_mask (void)
9745 {
9746 /* We force (const (plus (symbol) (const_int))) to memory when the
9747 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9748 handle this construct in read-only memory and we want to avoid
9749 this for ELF. So, we always force an RTX needing relocation to
9750 the data section. */
9751 return 3;
9752 }
9753
9754 static void
9755 pa_globalize_label (FILE *stream, const char *name)
9756 {
9757 /* We only handle DATA objects here, functions are globalized in
9758 ASM_DECLARE_FUNCTION_NAME. */
9759 if (! FUNCTION_NAME_P (name))
9760 {
9761 fputs ("\t.EXPORT ", stream);
9762 assemble_name (stream, name);
9763 fputs (",DATA\n", stream);
9764 }
9765 }
9766
9767 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9768
9769 static rtx
9770 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9771 int incoming ATTRIBUTE_UNUSED)
9772 {
9773 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9774 }
9775
9776 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9777
9778 bool
9779 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9780 {
9781 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9782 PA64 ABI says that objects larger than 128 bits are returned in memory.
9783 Note, int_size_in_bytes can return -1 if the size of the object is
9784 variable or larger than the maximum value that can be expressed as
9785 a HOST_WIDE_INT. It can also return zero for an empty type. The
9786 simplest way to handle variable and empty types is to pass them in
9787 memory. This avoids problems in defining the boundaries of argument
9788 slots, allocating registers, etc. */
9789 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9790 || int_size_in_bytes (type) <= 0);
9791 }
9792
9793 /* Structure to hold declaration and name of external symbols that are
9794 emitted by GCC. We generate a vector of these symbols and output them
9795 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9796 This avoids putting out names that are never really used. */
9797
9798 typedef struct GTY(()) extern_symbol
9799 {
9800 tree decl;
9801 const char *name;
9802 } extern_symbol;
9803
9804 /* Define gc'd vector type for extern_symbol. */
9805
9806 /* Vector of extern_symbol pointers. */
9807 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9808
9809 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9810 /* Mark DECL (name NAME) as an external reference (assembler output
9811 file FILE). This saves the names to output at the end of the file
9812 if actually referenced. */
9813
9814 void
9815 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9816 {
9817 gcc_assert (file == asm_out_file);
9818 extern_symbol p = {decl, name};
9819 vec_safe_push (extern_symbols, p);
9820 }
9821
9822 /* Output text required at the end of an assembler file.
9823 This includes deferred plabels and .import directives for
9824 all external symbols that were actually referenced. */
9825
9826 static void
9827 pa_hpux_file_end (void)
9828 {
9829 unsigned int i;
9830 extern_symbol *p;
9831
9832 if (!NO_DEFERRED_PROFILE_COUNTERS)
9833 output_deferred_profile_counters ();
9834
9835 output_deferred_plabels ();
9836
9837 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9838 {
9839 tree decl = p->decl;
9840
9841 if (!TREE_ASM_WRITTEN (decl)
9842 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9843 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9844 }
9845
9846 vec_free (extern_symbols);
9847 }
9848 #endif
9849
9850 /* Return true if a change from mode FROM to mode TO for a register
9851 in register class RCLASS is invalid. */
9852
9853 bool
9854 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9855 enum reg_class rclass)
9856 {
9857 if (from == to)
9858 return false;
9859
9860 /* Reject changes to/from complex and vector modes. */
9861 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9862 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9863 return true;
9864
9865 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9866 return false;
9867
9868 /* There is no way to load QImode or HImode values directly from
9869 memory. SImode loads to the FP registers are not zero extended.
9870 On the 64-bit target, this conflicts with the definition of
9871 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9872 with different sizes in the floating-point registers. */
9873 if (MAYBE_FP_REG_CLASS_P (rclass))
9874 return true;
9875
9876 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9877 in specific sets of registers. Thus, we cannot allow changing
9878 to a larger mode when it's larger than a word. */
9879 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9880 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9881 return true;
9882
9883 return false;
9884 }
9885
9886 /* Returns TRUE if it is a good idea to tie two pseudo registers
9887 when one has mode MODE1 and one has mode MODE2.
9888 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9889 for any hard reg, then this must be FALSE for correct output.
9890
9891 We should return FALSE for QImode and HImode because these modes
9892 are not ok in the floating-point registers. However, this prevents
9893 tieing these modes to SImode and DImode in the general registers.
9894 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9895 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9896 in the floating-point registers. */
9897
9898 bool
9899 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9900 {
9901 /* Don't tie modes in different classes. */
9902 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9903 return false;
9904
9905 return true;
9906 }
9907
9908 \f
9909 /* Length in units of the trampoline instruction code. */
9910
9911 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9912
9913
9914 /* Output assembler code for a block containing the constant parts
9915 of a trampoline, leaving space for the variable parts.\
9916
9917 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9918 and then branches to the specified routine.
9919
9920 This code template is copied from text segment to stack location
9921 and then patched with pa_trampoline_init to contain valid values,
9922 and then entered as a subroutine.
9923
9924 It is best to keep this as small as possible to avoid having to
9925 flush multiple lines in the cache. */
9926
9927 static void
9928 pa_asm_trampoline_template (FILE *f)
9929 {
9930 if (!TARGET_64BIT)
9931 {
9932 fputs ("\tldw 36(%r22),%r21\n", f);
9933 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9934 if (ASSEMBLER_DIALECT == 0)
9935 fputs ("\tdepi 0,31,2,%r21\n", f);
9936 else
9937 fputs ("\tdepwi 0,31,2,%r21\n", f);
9938 fputs ("\tldw 4(%r21),%r19\n", f);
9939 fputs ("\tldw 0(%r21),%r21\n", f);
9940 if (TARGET_PA_20)
9941 {
9942 fputs ("\tbve (%r21)\n", f);
9943 fputs ("\tldw 40(%r22),%r29\n", f);
9944 fputs ("\t.word 0\n", f);
9945 fputs ("\t.word 0\n", f);
9946 }
9947 else
9948 {
9949 fputs ("\tldsid (%r21),%r1\n", f);
9950 fputs ("\tmtsp %r1,%sr0\n", f);
9951 fputs ("\tbe 0(%sr0,%r21)\n", f);
9952 fputs ("\tldw 40(%r22),%r29\n", f);
9953 }
9954 fputs ("\t.word 0\n", f);
9955 fputs ("\t.word 0\n", f);
9956 fputs ("\t.word 0\n", f);
9957 fputs ("\t.word 0\n", f);
9958 }
9959 else
9960 {
9961 fputs ("\t.dword 0\n", f);
9962 fputs ("\t.dword 0\n", f);
9963 fputs ("\t.dword 0\n", f);
9964 fputs ("\t.dword 0\n", f);
9965 fputs ("\tmfia %r31\n", f);
9966 fputs ("\tldd 24(%r31),%r1\n", f);
9967 fputs ("\tldd 24(%r1),%r27\n", f);
9968 fputs ("\tldd 16(%r1),%r1\n", f);
9969 fputs ("\tbve (%r1)\n", f);
9970 fputs ("\tldd 32(%r31),%r31\n", f);
9971 fputs ("\t.dword 0 ; fptr\n", f);
9972 fputs ("\t.dword 0 ; static link\n", f);
9973 }
9974 }
9975
9976 /* Emit RTL insns to initialize the variable parts of a trampoline.
9977 FNADDR is an RTX for the address of the function's pure code.
9978 CXT is an RTX for the static chain value for the function.
9979
9980 Move the function address to the trampoline template at offset 36.
9981 Move the static chain value to trampoline template at offset 40.
9982 Move the trampoline address to trampoline template at offset 44.
9983 Move r19 to trampoline template at offset 48. The latter two
9984 words create a plabel for the indirect call to the trampoline.
9985
9986 A similar sequence is used for the 64-bit port but the plabel is
9987 at the beginning of the trampoline.
9988
9989 Finally, the cache entries for the trampoline code are flushed.
9990 This is necessary to ensure that the trampoline instruction sequence
9991 is written to memory prior to any attempts at prefetching the code
9992 sequence. */
9993
9994 static void
9995 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9996 {
9997 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9998 rtx start_addr = gen_reg_rtx (Pmode);
9999 rtx end_addr = gen_reg_rtx (Pmode);
10000 rtx line_length = gen_reg_rtx (Pmode);
10001 rtx r_tramp, tmp;
10002
10003 emit_block_move (m_tramp, assemble_trampoline_template (),
10004 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10005 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10006
10007 if (!TARGET_64BIT)
10008 {
10009 tmp = adjust_address (m_tramp, Pmode, 36);
10010 emit_move_insn (tmp, fnaddr);
10011 tmp = adjust_address (m_tramp, Pmode, 40);
10012 emit_move_insn (tmp, chain_value);
10013
10014 /* Create a fat pointer for the trampoline. */
10015 tmp = adjust_address (m_tramp, Pmode, 44);
10016 emit_move_insn (tmp, r_tramp);
10017 tmp = adjust_address (m_tramp, Pmode, 48);
10018 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10019
10020 /* fdc and fic only use registers for the address to flush,
10021 they do not accept integer displacements. We align the
10022 start and end addresses to the beginning of their respective
10023 cache lines to minimize the number of lines flushed. */
10024 emit_insn (gen_andsi3 (start_addr, r_tramp,
10025 GEN_INT (-MIN_CACHELINE_SIZE)));
10026 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10027 TRAMPOLINE_CODE_SIZE-1));
10028 emit_insn (gen_andsi3 (end_addr, tmp,
10029 GEN_INT (-MIN_CACHELINE_SIZE)));
10030 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10031 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10032 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10033 gen_reg_rtx (Pmode),
10034 gen_reg_rtx (Pmode)));
10035 }
10036 else
10037 {
10038 tmp = adjust_address (m_tramp, Pmode, 56);
10039 emit_move_insn (tmp, fnaddr);
10040 tmp = adjust_address (m_tramp, Pmode, 64);
10041 emit_move_insn (tmp, chain_value);
10042
10043 /* Create a fat pointer for the trampoline. */
10044 tmp = adjust_address (m_tramp, Pmode, 16);
10045 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10046 r_tramp, 32)));
10047 tmp = adjust_address (m_tramp, Pmode, 24);
10048 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10049
10050 /* fdc and fic only use registers for the address to flush,
10051 they do not accept integer displacements. We align the
10052 start and end addresses to the beginning of their respective
10053 cache lines to minimize the number of lines flushed. */
10054 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10055 emit_insn (gen_anddi3 (start_addr, tmp,
10056 GEN_INT (-MIN_CACHELINE_SIZE)));
10057 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10058 TRAMPOLINE_CODE_SIZE - 1));
10059 emit_insn (gen_anddi3 (end_addr, tmp,
10060 GEN_INT (-MIN_CACHELINE_SIZE)));
10061 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10062 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10063 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10064 gen_reg_rtx (Pmode),
10065 gen_reg_rtx (Pmode)));
10066 }
10067
10068 #ifdef HAVE_ENABLE_EXECUTE_STACK
10069  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10070      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10071 #endif
10072 }
10073
10074 /* Perform any machine-specific adjustment in the address of the trampoline.
10075 ADDR contains the address that was passed to pa_trampoline_init.
10076 Adjust the trampoline address to point to the plabel at offset 44. */
10077
10078 static rtx
10079 pa_trampoline_adjust_address (rtx addr)
10080 {
10081 if (!TARGET_64BIT)
10082 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10083 return addr;
10084 }
10085
10086 static rtx
10087 pa_delegitimize_address (rtx orig_x)
10088 {
10089 rtx x = delegitimize_mem_from_attrs (orig_x);
10090
10091 if (GET_CODE (x) == LO_SUM
10092 && GET_CODE (XEXP (x, 1)) == UNSPEC
10093 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10094 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10095 return x;
10096 }
10097 \f
10098 static rtx
10099 pa_internal_arg_pointer (void)
10100 {
10101 /* The argument pointer and the hard frame pointer are the same in
10102 the 32-bit runtime, so we don't need a copy. */
10103 if (TARGET_64BIT)
10104 return copy_to_reg (virtual_incoming_args_rtx);
10105 else
10106 return virtual_incoming_args_rtx;
10107 }
10108
10109 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10110 Frame pointer elimination is automatically handled. */
10111
10112 static bool
10113 pa_can_eliminate (const int from, const int to)
10114 {
10115 /* The argument cannot be eliminated in the 64-bit runtime. */
10116 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10117 return false;
10118
10119 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10120 ? ! frame_pointer_needed
10121 : true);
10122 }
10123
10124 /* Define the offset between two registers, FROM to be eliminated and its
10125 replacement TO, at the start of a routine. */
10126 HOST_WIDE_INT
10127 pa_initial_elimination_offset (int from, int to)
10128 {
10129 HOST_WIDE_INT offset;
10130
10131 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10132 && to == STACK_POINTER_REGNUM)
10133 offset = -pa_compute_frame_size (get_frame_size (), 0);
10134 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10135 offset = 0;
10136 else
10137 gcc_unreachable ();
10138
10139 return offset;
10140 }
10141
10142 static void
10143 pa_conditional_register_usage (void)
10144 {
10145 int i;
10146
10147 if (!TARGET_64BIT && !TARGET_PA_11)
10148 {
10149 for (i = 56; i <= FP_REG_LAST; i++)
10150 fixed_regs[i] = call_used_regs[i] = 1;
10151 for (i = 33; i < 56; i += 2)
10152 fixed_regs[i] = call_used_regs[i] = 1;
10153 }
10154 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10155 {
10156 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10157 fixed_regs[i] = call_used_regs[i] = 1;
10158 }
10159 if (flag_pic)
10160 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10161 }
10162
10163 /* Target hook for c_mode_for_suffix. */
10164
10165 static machine_mode
10166 pa_c_mode_for_suffix (char suffix)
10167 {
10168 if (HPUX_LONG_DOUBLE_LIBRARY)
10169 {
10170 if (suffix == 'q')
10171 return TFmode;
10172 }
10173
10174 return VOIDmode;
10175 }
10176
10177 /* Target hook for function_section. */
10178
10179 static section *
10180 pa_function_section (tree decl, enum node_frequency freq,
10181 bool startup, bool exit)
10182 {
10183 /* Put functions in text section if target doesn't have named sections. */
10184 if (!targetm_common.have_named_sections)
10185 return text_section;
10186
10187 /* Force nested functions into the same section as the containing
10188 function. */
10189 if (decl
10190 && DECL_SECTION_NAME (decl) == NULL
10191 && DECL_CONTEXT (decl) != NULL_TREE
10192 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10193 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10194 return function_section (DECL_CONTEXT (decl));
10195
10196 /* Otherwise, use the default function section. */
10197 return default_function_section (decl, freq, startup, exit);
10198 }
10199
10200 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10201
10202 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10203 that need more than three instructions to load prior to reload. This
10204 limit is somewhat arbitrary. It takes three instructions to load a
10205 CONST_INT from memory but two are memory accesses. It may be better
10206 to increase the allowed range for CONST_INTS. We may also be able
10207 to handle CONST_DOUBLES. */
10208
10209 static bool
10210 pa_legitimate_constant_p (machine_mode mode, rtx x)
10211 {
10212 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10213 return false;
10214
10215 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10216 return false;
10217
10218 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10219 legitimate constants. The other variants can't be handled by
10220 the move patterns after reload starts. */
10221 if (tls_referenced_p (x))
10222 return false;
10223
10224 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10225 return false;
10226
10227 if (TARGET_64BIT
10228 && HOST_BITS_PER_WIDE_INT > 32
10229 && GET_CODE (x) == CONST_INT
10230 && !reload_in_progress
10231 && !reload_completed
10232 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10233 && !pa_cint_ok_for_move (UINTVAL (x)))
10234 return false;
10235
10236 if (function_label_operand (x, mode))
10237 return false;
10238
10239 return true;
10240 }
10241
10242 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10243
10244 static unsigned int
10245 pa_section_type_flags (tree decl, const char *name, int reloc)
10246 {
10247 unsigned int flags;
10248
10249 flags = default_section_type_flags (decl, name, reloc);
10250
10251 /* Function labels are placed in the constant pool. This can
10252 cause a section conflict if decls are put in ".data.rel.ro"
10253 or ".data.rel.ro.local" using the __attribute__ construct. */
10254 if (strcmp (name, ".data.rel.ro") == 0
10255 || strcmp (name, ".data.rel.ro.local") == 0)
10256 flags |= SECTION_WRITE | SECTION_RELRO;
10257
10258 return flags;
10259 }
10260
10261 /* pa_legitimate_address_p recognizes an RTL expression that is a
10262 valid memory address for an instruction. The MODE argument is the
10263 machine mode for the MEM expression that wants to use this address.
10264
10265 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10266 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10267 available with floating point loads and stores, and integer loads.
10268 We get better code by allowing indexed addresses in the initial
10269 RTL generation.
10270
10271 The acceptance of indexed addresses as legitimate implies that we
10272 must provide patterns for doing indexed integer stores, or the move
10273 expanders must force the address of an indexed store to a register.
10274 We have adopted the latter approach.
10275
10276 Another function of pa_legitimate_address_p is to ensure that
10277 the base register is a valid pointer for indexed instructions.
10278 On targets that have non-equivalent space registers, we have to
10279 know at the time of assembler output which register in a REG+REG
10280 pair is the base register. The REG_POINTER flag is sometimes lost
10281 in reload and the following passes, so it can't be relied on during
10282 code generation. Thus, we either have to canonicalize the order
10283 of the registers in REG+REG indexed addresses, or treat REG+REG
10284 addresses separately and provide patterns for both permutations.
10285
10286 The latter approach requires several hundred additional lines of
10287 code in pa.md. The downside to canonicalizing is that a PLUS
10288 in the wrong order can't combine to form to make a scaled indexed
10289 memory operand. As we won't need to canonicalize the operands if
10290 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10291
10292 We initially break out scaled indexed addresses in canonical order
10293 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10294 scaled indexed addresses during RTL generation. However, fold_rtx
10295 has its own opinion on how the operands of a PLUS should be ordered.
10296 If one of the operands is equivalent to a constant, it will make
10297 that operand the second operand. As the base register is likely to
10298 be equivalent to a SYMBOL_REF, we have made it the second operand.
10299
10300 pa_legitimate_address_p accepts REG+REG as legitimate when the
10301 operands are in the order INDEX+BASE on targets with non-equivalent
10302 space registers, and in any order on targets with equivalent space
10303 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10304
10305 We treat a SYMBOL_REF as legitimate if it is part of the current
10306 function's constant-pool, because such addresses can actually be
10307 output as REG+SMALLINT. */
10308
10309 static bool
10310 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10311 {
10312 if ((REG_P (x)
10313 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10314 : REG_OK_FOR_BASE_P (x)))
10315 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10316 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10317 && REG_P (XEXP (x, 0))
10318 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10319 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10320 return true;
10321
10322 if (GET_CODE (x) == PLUS)
10323 {
10324 rtx base, index;
10325
10326 /* For REG+REG, the base register should be in XEXP (x, 1),
10327 so check it first. */
10328 if (REG_P (XEXP (x, 1))
10329 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10330 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10331 base = XEXP (x, 1), index = XEXP (x, 0);
10332 else if (REG_P (XEXP (x, 0))
10333 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10334 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10335 base = XEXP (x, 0), index = XEXP (x, 1);
10336 else
10337 return false;
10338
10339 if (GET_CODE (index) == CONST_INT)
10340 {
10341 if (INT_5_BITS (index))
10342 return true;
10343
10344 /* When INT14_OK_STRICT is false, a secondary reload is needed
10345 to adjust the displacement of SImode and DImode floating point
10346 instructions but this may fail when the register also needs
10347 reloading. So, we return false when STRICT is true. We
10348 also reject long displacements for float mode addresses since
10349 the majority of accesses will use floating point instructions
10350 that don't support 14-bit offsets. */
10351 if (!INT14_OK_STRICT
10352 && (strict || !(reload_in_progress || reload_completed))
10353 && mode != QImode
10354 && mode != HImode)
10355 return false;
10356
10357 return base14_operand (index, mode);
10358 }
10359
10360 if (!TARGET_DISABLE_INDEXING
10361 /* Only accept the "canonical" INDEX+BASE operand order
10362 on targets with non-equivalent space registers. */
10363 && (TARGET_NO_SPACE_REGS
10364 ? REG_P (index)
10365 : (base == XEXP (x, 1) && REG_P (index)
10366 && (reload_completed
10367 || (reload_in_progress && HARD_REGISTER_P (base))
10368 || REG_POINTER (base))
10369 && (reload_completed
10370 || (reload_in_progress && HARD_REGISTER_P (index))
10371 || !REG_POINTER (index))))
10372 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10373 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10374 : REG_OK_FOR_INDEX_P (index))
10375 && borx_reg_operand (base, Pmode)
10376 && borx_reg_operand (index, Pmode))
10377 return true;
10378
10379 if (!TARGET_DISABLE_INDEXING
10380 && GET_CODE (index) == MULT
10381 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10382 && REG_P (XEXP (index, 0))
10383 && GET_MODE (XEXP (index, 0)) == Pmode
10384 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10385 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10386 && GET_CODE (XEXP (index, 1)) == CONST_INT
10387 && INTVAL (XEXP (index, 1))
10388 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10389 && borx_reg_operand (base, Pmode))
10390 return true;
10391
10392 return false;
10393 }
10394
10395 if (GET_CODE (x) == LO_SUM)
10396 {
10397 rtx y = XEXP (x, 0);
10398
10399 if (GET_CODE (y) == SUBREG)
10400 y = SUBREG_REG (y);
10401
10402 if (REG_P (y)
10403 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10404 : REG_OK_FOR_BASE_P (y)))
10405 {
10406 /* Needed for -fPIC */
10407 if (mode == Pmode
10408 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10409 return true;
10410
10411 if (!INT14_OK_STRICT
10412 && (strict || !(reload_in_progress || reload_completed))
10413 && mode != QImode
10414 && mode != HImode)
10415 return false;
10416
10417 if (CONSTANT_P (XEXP (x, 1)))
10418 return true;
10419 }
10420 return false;
10421 }
10422
10423 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10424 return true;
10425
10426 return false;
10427 }
10428
10429 /* Look for machine dependent ways to make the invalid address AD a
10430 valid address.
10431
10432 For the PA, transform:
10433
10434 memory(X + <large int>)
10435
10436 into:
10437
10438 if (<large int> & mask) >= 16
10439 Y = (<large int> & ~mask) + mask + 1 Round up.
10440 else
10441 Y = (<large int> & ~mask) Round down.
10442 Z = X + Y
10443 memory (Z + (<large int> - Y));
10444
10445 This makes reload inheritance and reload_cse work better since Z
10446 can be reused.
10447
10448 There may be more opportunities to improve code with this hook. */
10449
10450 rtx
10451 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10452 int opnum, int type,
10453 int ind_levels ATTRIBUTE_UNUSED)
10454 {
10455 long offset, newoffset, mask;
10456 rtx new_rtx, temp = NULL_RTX;
10457
10458 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10459 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10460
10461 if (optimize && GET_CODE (ad) == PLUS)
10462 temp = simplify_binary_operation (PLUS, Pmode,
10463 XEXP (ad, 0), XEXP (ad, 1));
10464
10465 new_rtx = temp ? temp : ad;
10466
10467 if (optimize
10468 && GET_CODE (new_rtx) == PLUS
10469 && GET_CODE (XEXP (new_rtx, 0)) == REG
10470 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10471 {
10472 offset = INTVAL (XEXP ((new_rtx), 1));
10473
10474 /* Choose rounding direction. Round up if we are >= halfway. */
10475 if ((offset & mask) >= ((mask + 1) / 2))
10476 newoffset = (offset & ~mask) + mask + 1;
10477 else
10478 newoffset = offset & ~mask;
10479
10480 /* Ensure that long displacements are aligned. */
10481 if (mask == 0x3fff
10482 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10483 || (TARGET_64BIT && (mode) == DImode)))
10484 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10485
10486 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10487 {
10488 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10489 GEN_INT (newoffset));
10490 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10491 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10492 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10493 opnum, (enum reload_type) type);
10494 return ad;
10495 }
10496 }
10497
10498 return NULL_RTX;
10499 }
10500
10501 /* Output address vector. */
10502
10503 void
10504 pa_output_addr_vec (rtx lab, rtx body)
10505 {
10506 int idx, vlen = XVECLEN (body, 0);
10507
10508 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10509 if (TARGET_GAS)
10510 fputs ("\t.begin_brtab\n", asm_out_file);
10511 for (idx = 0; idx < vlen; idx++)
10512 {
10513 ASM_OUTPUT_ADDR_VEC_ELT
10514 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10515 }
10516 if (TARGET_GAS)
10517 fputs ("\t.end_brtab\n", asm_out_file);
10518 }
10519
10520 /* Output address difference vector. */
10521
10522 void
10523 pa_output_addr_diff_vec (rtx lab, rtx body)
10524 {
10525 rtx base = XEXP (XEXP (body, 0), 0);
10526 int idx, vlen = XVECLEN (body, 1);
10527
10528 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10529 if (TARGET_GAS)
10530 fputs ("\t.begin_brtab\n", asm_out_file);
10531 for (idx = 0; idx < vlen; idx++)
10532 {
10533 ASM_OUTPUT_ADDR_DIFF_ELT
10534 (asm_out_file,
10535 body,
10536 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10537 CODE_LABEL_NUMBER (base));
10538 }
10539 if (TARGET_GAS)
10540 fputs ("\t.end_brtab\n", asm_out_file);
10541 }
10542
10543 /* This is a helper function for the other atomic operations. This function
10544 emits a loop that contains SEQ that iterates until a compare-and-swap
10545 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10546 a set of instructions that takes a value from OLD_REG as an input and
10547 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10548 set to the current contents of MEM. After SEQ, a compare-and-swap will
10549 attempt to update MEM with NEW_REG. The function returns true when the
10550 loop was generated successfully. */
10551
10552 static bool
10553 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10554 {
10555 machine_mode mode = GET_MODE (mem);
10556 rtx_code_label *label;
10557 rtx cmp_reg, success, oldval;
10558
10559 /* The loop we want to generate looks like
10560
10561 cmp_reg = mem;
10562 label:
10563 old_reg = cmp_reg;
10564 seq;
10565 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10566 if (success)
10567 goto label;
10568
10569 Note that we only do the plain load from memory once. Subsequent
10570 iterations use the value loaded by the compare-and-swap pattern. */
10571
10572 label = gen_label_rtx ();
10573 cmp_reg = gen_reg_rtx (mode);
10574
10575 emit_move_insn (cmp_reg, mem);
10576 emit_label (label);
10577 emit_move_insn (old_reg, cmp_reg);
10578 if (seq)
10579 emit_insn (seq);
10580
10581 success = NULL_RTX;
10582 oldval = cmp_reg;
10583 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10584 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10585 MEMMODEL_RELAXED))
10586 return false;
10587
10588 if (oldval != cmp_reg)
10589 emit_move_insn (cmp_reg, oldval);
10590
10591 /* Mark this jump predicted not taken. */
10592 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10593 GET_MODE (success), 1, label, 0);
10594 return true;
10595 }
10596
10597 /* This function tries to implement an atomic exchange operation using a
10598 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10599 *MEM are returned, using TARGET if possible. No memory model is required
10600 since a compare_and_swap loop is seq-cst. */
10601
10602 rtx
10603 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10604 {
10605 machine_mode mode = GET_MODE (mem);
10606
10607 if (can_compare_and_swap_p (mode, true))
10608 {
10609 if (!target || !register_operand (target, mode))
10610 target = gen_reg_rtx (mode);
10611 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10612 return target;
10613 }
10614
10615 return NULL_RTX;
10616 }
10617
10618 #include "gt-pa.h"