1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
33 #include "stringpool.h"
41 #include "diagnostic-core.h"
42 #include "insn-attr.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
48 #include "print-tree.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
57 #include "sched-int.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
70 #include "tree-pass.h"
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #include "gstab.h" /* for N_SLINE */
78 #include "case-cfn-macros.h"
81 /* This file should be included last. */
82 #include "target-def.h"
84 #ifndef TARGET_NO_PROTOTYPE
85 #define TARGET_NO_PROTOTYPE 0
88 #define min(A,B) ((A) < (B) ? (A) : (B))
89 #define max(A,B) ((A) > (B) ? (A) : (B))
91 /* Structure used to define the rs6000 stack */
92 typedef struct rs6000_stack
{
93 int reload_completed
; /* stack info won't change from here on */
94 int first_gp_reg_save
; /* first callee saved GP register used */
95 int first_fp_reg_save
; /* first callee saved FP register used */
96 int first_altivec_reg_save
; /* first callee saved AltiVec register used */
97 int lr_save_p
; /* true if the link reg needs to be saved */
98 int cr_save_p
; /* true if the CR reg needs to be saved */
99 unsigned int vrsave_mask
; /* mask of vec registers to save */
100 int push_p
; /* true if we need to allocate stack space */
101 int calls_p
; /* true if the function makes any calls */
102 int world_save_p
; /* true if we're saving *everything*:
103 r13-r31, cr, f14-f31, vrsave, v20-v31 */
104 enum rs6000_abi abi
; /* which ABI to use */
105 int gp_save_offset
; /* offset to save GP regs from initial SP */
106 int fp_save_offset
; /* offset to save FP regs from initial SP */
107 int altivec_save_offset
; /* offset to save AltiVec regs from initial SP */
108 int lr_save_offset
; /* offset to save LR from initial SP */
109 int cr_save_offset
; /* offset to save CR from initial SP */
110 int vrsave_save_offset
; /* offset to save VRSAVE from initial SP */
111 int spe_gp_save_offset
; /* offset to save spe 64-bit gprs */
112 int varargs_save_offset
; /* offset to save the varargs registers */
113 int ehrd_offset
; /* offset to EH return data */
114 int ehcr_offset
; /* offset to EH CR field data */
115 int reg_size
; /* register size (4 or 8) */
116 HOST_WIDE_INT vars_size
; /* variable save area size */
117 int parm_size
; /* outgoing parameter size */
118 int save_size
; /* save area size */
119 int fixed_size
; /* fixed size of stack frame */
120 int gp_size
; /* size of saved GP registers */
121 int fp_size
; /* size of saved FP registers */
122 int altivec_size
; /* size of saved AltiVec registers */
123 int cr_size
; /* size to hold CR if not in fixed area */
124 int vrsave_size
; /* size to hold VRSAVE */
125 int altivec_padding_size
; /* size of altivec alignment padding */
126 int spe_gp_size
; /* size of 64-bit GPR save size for SPE */
127 int spe_padding_size
;
128 HOST_WIDE_INT total_size
; /* total bytes allocated for stack */
129 int spe_64bit_regs_used
;
133 /* A C structure for machine-specific, per-function data.
134 This is added to the cfun structure. */
135 typedef struct GTY(()) machine_function
137 /* Whether the instruction chain has been scanned already. */
138 int spe_insn_chain_scanned_p
;
139 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
140 int ra_needs_full_frame
;
141 /* Flags if __builtin_return_address (0) was used. */
143 /* Cache lr_save_p after expansion of builtin_eh_return. */
145 /* Whether we need to save the TOC to the reserved stack location in the
146 function prologue. */
147 bool save_toc_in_prologue
;
148 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
149 varargs save area. */
150 HOST_WIDE_INT varargs_save_offset
;
151 /* Temporary stack slot to use for SDmode copies. This slot is
152 64-bits wide and is allocated early enough so that the offset
153 does not overflow the 16-bit load/store offset field. */
154 rtx sdmode_stack_slot
;
155 /* Alternative internal arg pointer for -fsplit-stack. */
156 rtx split_stack_arg_pointer
;
157 bool split_stack_argp_used
;
158 /* Flag if r2 setup is needed with ELFv2 ABI. */
159 bool r2_setup_needed
;
160 /* The number of components we use for separate shrink-wrapping. */
162 /* The components already handled by separate shrink-wrapping, which should
163 not be considered by the prologue and epilogue. */
164 bool gpr_is_wrapped_separately
[32];
165 bool fpr_is_wrapped_separately
[32];
166 bool lr_is_wrapped_separately
;
169 /* Support targetm.vectorize.builtin_mask_for_load. */
170 static GTY(()) tree altivec_builtin_mask_for_load
;
172 /* Set to nonzero once AIX common-mode calls have been defined. */
173 static GTY(()) int common_mode_defined
;
175 /* Label number of label created for -mrelocatable, to call to so we can
176 get the address of the GOT section */
177 static int rs6000_pic_labelno
;
180 /* Counter for labels which are to be placed in .fixup. */
181 int fixuplabelno
= 0;
184 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
187 /* Specify the machine mode that pointers have. After generation of rtl, the
188 compiler makes no further distinction between pointers and any other objects
189 of this machine mode. */
190 scalar_int_mode rs6000_pmode
;
192 /* Width in bits of a pointer. */
193 unsigned rs6000_pointer_size
;
195 #ifdef HAVE_AS_GNU_ATTRIBUTE
196 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
197 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
199 /* Flag whether floating point values have been passed/returned.
200 Note that this doesn't say whether fprs are used, since the
201 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
202 should be set for soft-float values passed in gprs and ieee128
203 values passed in vsx registers. */
204 static bool rs6000_passes_float
;
205 static bool rs6000_passes_long_double
;
206 /* Flag whether vector values have been passed/returned. */
207 static bool rs6000_passes_vector
;
208 /* Flag whether small (<= 8 byte) structures have been returned. */
209 static bool rs6000_returns_struct
;
212 /* Value is TRUE if register/mode pair is acceptable. */
213 bool rs6000_hard_regno_mode_ok_p
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
215 /* Maximum number of registers needed for a given register class and mode. */
216 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
218 /* How many registers are needed for a given register and mode. */
219 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
221 /* Map register number to register class. */
222 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
224 static int dbg_cost_ctrl
;
226 /* Built in types. */
227 tree rs6000_builtin_types
[RS6000_BTI_MAX
];
228 tree rs6000_builtin_decls
[RS6000_BUILTIN_COUNT
];
230 /* Flag to say the TOC is initialized */
231 int toc_initialized
, need_toc_init
;
232 char toc_label_name
[10];
234 /* Cached value of rs6000_variable_issue. This is cached in
235 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
236 static short cached_can_issue_more
;
238 static GTY(()) section
*read_only_data_section
;
239 static GTY(()) section
*private_data_section
;
240 static GTY(()) section
*tls_data_section
;
241 static GTY(()) section
*tls_private_data_section
;
242 static GTY(()) section
*read_only_private_data_section
;
243 static GTY(()) section
*sdata2_section
;
244 static GTY(()) section
*toc_section
;
246 struct builtin_description
248 const HOST_WIDE_INT mask
;
249 const enum insn_code icode
;
250 const char *const name
;
251 const enum rs6000_builtins code
;
254 /* Describe the vector unit used for modes. */
255 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
256 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
258 /* Register classes for various constraints that are based on the target
260 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
262 /* Describe the alignment of a vector. */
263 int rs6000_vector_align
[NUM_MACHINE_MODES
];
265 /* Map selected modes to types for builtins. */
266 static GTY(()) tree builtin_mode_to_type
[MAX_MACHINE_MODE
][2];
268 /* What modes to automatically generate reciprocal divide estimate (fre) and
269 reciprocal sqrt (frsqrte) for. */
270 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
272 /* Masks to determine which reciprocal esitmate instructions to generate
274 enum rs6000_recip_mask
{
275 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
276 RECIP_DF_DIV
= 0x002,
277 RECIP_V4SF_DIV
= 0x004,
278 RECIP_V2DF_DIV
= 0x008,
280 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
281 RECIP_DF_RSQRT
= 0x020,
282 RECIP_V4SF_RSQRT
= 0x040,
283 RECIP_V2DF_RSQRT
= 0x080,
285 /* Various combination of flags for -mrecip=xxx. */
287 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
288 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
289 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
291 RECIP_HIGH_PRECISION
= RECIP_ALL
,
293 /* On low precision machines like the power5, don't enable double precision
294 reciprocal square root estimate, since it isn't accurate enough. */
295 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
298 /* -mrecip options. */
301 const char *string
; /* option name */
302 unsigned int mask
; /* mask bits to set */
303 } recip_options
[] = {
304 { "all", RECIP_ALL
},
305 { "none", RECIP_NONE
},
306 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
308 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
309 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
310 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
311 | RECIP_V2DF_RSQRT
) },
312 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
313 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
316 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
322 { "power9", PPC_PLATFORM_POWER9
},
323 { "power8", PPC_PLATFORM_POWER8
},
324 { "power7", PPC_PLATFORM_POWER7
},
325 { "power6x", PPC_PLATFORM_POWER6X
},
326 { "power6", PPC_PLATFORM_POWER6
},
327 { "power5+", PPC_PLATFORM_POWER5_PLUS
},
328 { "power5", PPC_PLATFORM_POWER5
},
329 { "ppc970", PPC_PLATFORM_PPC970
},
330 { "power4", PPC_PLATFORM_POWER4
},
331 { "ppca2", PPC_PLATFORM_PPCA2
},
332 { "ppc476", PPC_PLATFORM_PPC476
},
333 { "ppc464", PPC_PLATFORM_PPC464
},
334 { "ppc440", PPC_PLATFORM_PPC440
},
335 { "ppc405", PPC_PLATFORM_PPC405
},
336 { "ppc-cell-be", PPC_PLATFORM_CELL_BE
}
339 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
345 } cpu_supports_info
[] = {
346 /* AT_HWCAP masks. */
347 { "4xxmac", PPC_FEATURE_HAS_4xxMAC
, 0 },
348 { "altivec", PPC_FEATURE_HAS_ALTIVEC
, 0 },
349 { "arch_2_05", PPC_FEATURE_ARCH_2_05
, 0 },
350 { "arch_2_06", PPC_FEATURE_ARCH_2_06
, 0 },
351 { "archpmu", PPC_FEATURE_PERFMON_COMPAT
, 0 },
352 { "booke", PPC_FEATURE_BOOKE
, 0 },
353 { "cellbe", PPC_FEATURE_CELL_BE
, 0 },
354 { "dfp", PPC_FEATURE_HAS_DFP
, 0 },
355 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE
, 0 },
356 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE
, 0 },
357 { "fpu", PPC_FEATURE_HAS_FPU
, 0 },
358 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP
, 0 },
359 { "mmu", PPC_FEATURE_HAS_MMU
, 0 },
360 { "notb", PPC_FEATURE_NO_TB
, 0 },
361 { "pa6t", PPC_FEATURE_PA6T
, 0 },
362 { "power4", PPC_FEATURE_POWER4
, 0 },
363 { "power5", PPC_FEATURE_POWER5
, 0 },
364 { "power5+", PPC_FEATURE_POWER5_PLUS
, 0 },
365 { "power6x", PPC_FEATURE_POWER6_EXT
, 0 },
366 { "ppc32", PPC_FEATURE_32
, 0 },
367 { "ppc601", PPC_FEATURE_601_INSTR
, 0 },
368 { "ppc64", PPC_FEATURE_64
, 0 },
369 { "ppcle", PPC_FEATURE_PPC_LE
, 0 },
370 { "smt", PPC_FEATURE_SMT
, 0 },
371 { "spe", PPC_FEATURE_HAS_SPE
, 0 },
372 { "true_le", PPC_FEATURE_TRUE_LE
, 0 },
373 { "ucache", PPC_FEATURE_UNIFIED_CACHE
, 0 },
374 { "vsx", PPC_FEATURE_HAS_VSX
, 0 },
376 /* AT_HWCAP2 masks. */
377 { "arch_2_07", PPC_FEATURE2_ARCH_2_07
, 1 },
378 { "dscr", PPC_FEATURE2_HAS_DSCR
, 1 },
379 { "ebb", PPC_FEATURE2_HAS_EBB
, 1 },
380 { "htm", PPC_FEATURE2_HAS_HTM
, 1 },
381 { "htm-nosc", PPC_FEATURE2_HTM_NOSC
, 1 },
382 { "isel", PPC_FEATURE2_HAS_ISEL
, 1 },
383 { "tar", PPC_FEATURE2_HAS_TAR
, 1 },
384 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO
, 1 },
385 { "arch_3_00", PPC_FEATURE2_ARCH_3_00
, 1 },
386 { "ieee128", PPC_FEATURE2_HAS_IEEE128
, 1 }
389 /* Newer LIBCs explicitly export this symbol to declare that they provide
390 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
391 reference to this symbol whenever we expand a CPU builtin, so that
392 we never link against an old LIBC. */
393 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
395 /* True if we have expanded a CPU builtin. */
398 /* Pointer to function (in powerpcspe-c.c) that can define or undefine target
399 macros that have changed. Languages that don't support the preprocessor
400 don't link in powerpcspe-c.c, so we can't call it directly. */
401 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
403 /* Simplfy register classes into simpler classifications. We assume
404 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
405 check for standard register classes (gpr/floating/altivec/vsx) and
406 floating/vector classes (float/altivec/vsx). */
408 enum rs6000_reg_type
{
421 /* Map register class to register type. */
422 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
424 /* First/last register type for the 'normal' register types (i.e. general
425 purpose, floating point, altivec, and VSX registers). */
426 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
428 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
431 /* Register classes we care about in secondary reload or go if legitimate
432 address. We only need to worry about GPR, FPR, and Altivec registers here,
433 along an ANY field that is the OR of the 3 register classes. */
435 enum rs6000_reload_reg_type
{
436 RELOAD_REG_GPR
, /* General purpose registers. */
437 RELOAD_REG_FPR
, /* Traditional floating point regs. */
438 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
439 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
443 /* For setting up register classes, loop through the 3 register classes mapping
444 into real registers, and skip the ANY class, which is just an OR of the
446 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
447 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
449 /* Map reload register type to a register in the register class. */
450 struct reload_reg_map_type
{
451 const char *name
; /* Register class name. */
452 int reg
; /* Register in the register class. */
455 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
456 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
457 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
458 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
459 { "Any", -1 }, /* RELOAD_REG_ANY. */
462 /* Mask bits for each register class, indexed per mode. Historically the
463 compiler has been more restrictive which types can do PRE_MODIFY instead of
464 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
465 typedef unsigned char addr_mask_type
;
467 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
468 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
469 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
470 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
471 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
472 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
473 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
474 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
476 /* Register type masks based on the type, of valid addressing modes. */
477 struct rs6000_reg_addr
{
478 enum insn_code reload_load
; /* INSN to reload for loading. */
479 enum insn_code reload_store
; /* INSN to reload for storing. */
480 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
481 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
482 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
483 enum insn_code fusion_gpr_ld
; /* INSN for fusing gpr ADDIS/loads. */
484 /* INSNs for fusing addi with loads
485 or stores for each reg. class. */
486 enum insn_code fusion_addi_ld
[(int)N_RELOAD_REG
];
487 enum insn_code fusion_addi_st
[(int)N_RELOAD_REG
];
488 /* INSNs for fusing addis with loads
489 or stores for each reg. class. */
490 enum insn_code fusion_addis_ld
[(int)N_RELOAD_REG
];
491 enum insn_code fusion_addis_st
[(int)N_RELOAD_REG
];
492 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
493 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
494 bool fused_toc
; /* Mode supports TOC fusion. */
497 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
499 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
501 mode_supports_pre_incdec_p (machine_mode mode
)
503 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
507 /* Helper function to say whether a mode supports PRE_MODIFY. */
509 mode_supports_pre_modify_p (machine_mode mode
)
511 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
515 /* Given that there exists at least one variable that is set (produced)
516 by OUT_INSN and read (consumed) by IN_INSN, return true iff
517 IN_INSN represents one or more memory store operations and none of
518 the variables set by OUT_INSN is used by IN_INSN as the address of a
519 store operation. If either IN_INSN or OUT_INSN does not represent
520 a "single" RTL SET expression (as loosely defined by the
521 implementation of the single_set function) or a PARALLEL with only
522 SETs, CLOBBERs, and USEs inside, this function returns false.
524 This rs6000-specific version of store_data_bypass_p checks for
525 certain conditions that result in assertion failures (and internal
526 compiler errors) in the generic store_data_bypass_p function and
527 returns false rather than calling store_data_bypass_p if one of the
528 problematic conditions is detected. */
531 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
538 in_set
= single_set (in_insn
);
541 if (MEM_P (SET_DEST (in_set
)))
543 out_set
= single_set (out_insn
);
546 out_pat
= PATTERN (out_insn
);
547 if (GET_CODE (out_pat
) == PARALLEL
)
549 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
551 out_exp
= XVECEXP (out_pat
, 0, i
);
552 if ((GET_CODE (out_exp
) == CLOBBER
)
553 || (GET_CODE (out_exp
) == USE
))
555 else if (GET_CODE (out_exp
) != SET
)
564 in_pat
= PATTERN (in_insn
);
565 if (GET_CODE (in_pat
) != PARALLEL
)
568 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
570 in_exp
= XVECEXP (in_pat
, 0, i
);
571 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
573 else if (GET_CODE (in_exp
) != SET
)
576 if (MEM_P (SET_DEST (in_exp
)))
578 out_set
= single_set (out_insn
);
581 out_pat
= PATTERN (out_insn
);
582 if (GET_CODE (out_pat
) != PARALLEL
)
584 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
586 out_exp
= XVECEXP (out_pat
, 0, j
);
587 if ((GET_CODE (out_exp
) == CLOBBER
)
588 || (GET_CODE (out_exp
) == USE
))
590 else if (GET_CODE (out_exp
) != SET
)
597 return store_data_bypass_p (out_insn
, in_insn
);
600 /* Return true if we have D-form addressing in altivec registers. */
602 mode_supports_vmx_dform (machine_mode mode
)
604 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
607 /* Return true if we have D-form addressing in VSX registers. This addressing
608 is more limited than normal d-form addressing in that the offset must be
609 aligned on a 16-byte boundary. */
611 mode_supports_vsx_dform_quad (machine_mode mode
)
613 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
618 /* Target cpu costs. */
620 struct processor_costs
{
621 const int mulsi
; /* cost of SImode multiplication. */
622 const int mulsi_const
; /* cost of SImode multiplication by constant. */
623 const int mulsi_const9
; /* cost of SImode mult by short constant. */
624 const int muldi
; /* cost of DImode multiplication. */
625 const int divsi
; /* cost of SImode division. */
626 const int divdi
; /* cost of DImode division. */
627 const int fp
; /* cost of simple SFmode and DFmode insns. */
628 const int dmul
; /* cost of DFmode multiplication (and fmadd). */
629 const int sdiv
; /* cost of SFmode division (fdivs). */
630 const int ddiv
; /* cost of DFmode division (fdiv). */
631 const int cache_line_size
; /* cache line size in bytes. */
632 const int l1_cache_size
; /* size of l1 cache, in kilobytes. */
633 const int l2_cache_size
; /* size of l2 cache, in kilobytes. */
634 const int simultaneous_prefetches
; /* number of parallel prefetch
636 const int sfdf_convert
; /* cost of SF->DF conversion. */
639 const struct processor_costs
*rs6000_cost
;
641 /* Processor costs (relative to an add) */
643 /* Instruction size costs on 32bit processors. */
645 struct processor_costs size32_cost
= {
646 COSTS_N_INSNS (1), /* mulsi */
647 COSTS_N_INSNS (1), /* mulsi_const */
648 COSTS_N_INSNS (1), /* mulsi_const9 */
649 COSTS_N_INSNS (1), /* muldi */
650 COSTS_N_INSNS (1), /* divsi */
651 COSTS_N_INSNS (1), /* divdi */
652 COSTS_N_INSNS (1), /* fp */
653 COSTS_N_INSNS (1), /* dmul */
654 COSTS_N_INSNS (1), /* sdiv */
655 COSTS_N_INSNS (1), /* ddiv */
656 32, /* cache line size */
660 0, /* SF->DF convert */
663 /* Instruction size costs on 64bit processors. */
665 struct processor_costs size64_cost
= {
666 COSTS_N_INSNS (1), /* mulsi */
667 COSTS_N_INSNS (1), /* mulsi_const */
668 COSTS_N_INSNS (1), /* mulsi_const9 */
669 COSTS_N_INSNS (1), /* muldi */
670 COSTS_N_INSNS (1), /* divsi */
671 COSTS_N_INSNS (1), /* divdi */
672 COSTS_N_INSNS (1), /* fp */
673 COSTS_N_INSNS (1), /* dmul */
674 COSTS_N_INSNS (1), /* sdiv */
675 COSTS_N_INSNS (1), /* ddiv */
676 128, /* cache line size */
680 0, /* SF->DF convert */
683 /* Instruction costs on RS64A processors. */
685 struct processor_costs rs64a_cost
= {
686 COSTS_N_INSNS (20), /* mulsi */
687 COSTS_N_INSNS (12), /* mulsi_const */
688 COSTS_N_INSNS (8), /* mulsi_const9 */
689 COSTS_N_INSNS (34), /* muldi */
690 COSTS_N_INSNS (65), /* divsi */
691 COSTS_N_INSNS (67), /* divdi */
692 COSTS_N_INSNS (4), /* fp */
693 COSTS_N_INSNS (4), /* dmul */
694 COSTS_N_INSNS (31), /* sdiv */
695 COSTS_N_INSNS (31), /* ddiv */
696 128, /* cache line size */
700 0, /* SF->DF convert */
703 /* Instruction costs on MPCCORE processors. */
705 struct processor_costs mpccore_cost
= {
706 COSTS_N_INSNS (2), /* mulsi */
707 COSTS_N_INSNS (2), /* mulsi_const */
708 COSTS_N_INSNS (2), /* mulsi_const9 */
709 COSTS_N_INSNS (2), /* muldi */
710 COSTS_N_INSNS (6), /* divsi */
711 COSTS_N_INSNS (6), /* divdi */
712 COSTS_N_INSNS (4), /* fp */
713 COSTS_N_INSNS (5), /* dmul */
714 COSTS_N_INSNS (10), /* sdiv */
715 COSTS_N_INSNS (17), /* ddiv */
716 32, /* cache line size */
720 0, /* SF->DF convert */
723 /* Instruction costs on PPC403 processors. */
725 struct processor_costs ppc403_cost
= {
726 COSTS_N_INSNS (4), /* mulsi */
727 COSTS_N_INSNS (4), /* mulsi_const */
728 COSTS_N_INSNS (4), /* mulsi_const9 */
729 COSTS_N_INSNS (4), /* muldi */
730 COSTS_N_INSNS (33), /* divsi */
731 COSTS_N_INSNS (33), /* divdi */
732 COSTS_N_INSNS (11), /* fp */
733 COSTS_N_INSNS (11), /* dmul */
734 COSTS_N_INSNS (11), /* sdiv */
735 COSTS_N_INSNS (11), /* ddiv */
736 32, /* cache line size */
740 0, /* SF->DF convert */
743 /* Instruction costs on PPC405 processors. */
745 struct processor_costs ppc405_cost
= {
746 COSTS_N_INSNS (5), /* mulsi */
747 COSTS_N_INSNS (4), /* mulsi_const */
748 COSTS_N_INSNS (3), /* mulsi_const9 */
749 COSTS_N_INSNS (5), /* muldi */
750 COSTS_N_INSNS (35), /* divsi */
751 COSTS_N_INSNS (35), /* divdi */
752 COSTS_N_INSNS (11), /* fp */
753 COSTS_N_INSNS (11), /* dmul */
754 COSTS_N_INSNS (11), /* sdiv */
755 COSTS_N_INSNS (11), /* ddiv */
756 32, /* cache line size */
760 0, /* SF->DF convert */
763 /* Instruction costs on PPC440 processors. */
765 struct processor_costs ppc440_cost
= {
766 COSTS_N_INSNS (3), /* mulsi */
767 COSTS_N_INSNS (2), /* mulsi_const */
768 COSTS_N_INSNS (2), /* mulsi_const9 */
769 COSTS_N_INSNS (3), /* muldi */
770 COSTS_N_INSNS (34), /* divsi */
771 COSTS_N_INSNS (34), /* divdi */
772 COSTS_N_INSNS (5), /* fp */
773 COSTS_N_INSNS (5), /* dmul */
774 COSTS_N_INSNS (19), /* sdiv */
775 COSTS_N_INSNS (33), /* ddiv */
776 32, /* cache line size */
780 0, /* SF->DF convert */
783 /* Instruction costs on PPC476 processors. */
785 struct processor_costs ppc476_cost
= {
786 COSTS_N_INSNS (4), /* mulsi */
787 COSTS_N_INSNS (4), /* mulsi_const */
788 COSTS_N_INSNS (4), /* mulsi_const9 */
789 COSTS_N_INSNS (4), /* muldi */
790 COSTS_N_INSNS (11), /* divsi */
791 COSTS_N_INSNS (11), /* divdi */
792 COSTS_N_INSNS (6), /* fp */
793 COSTS_N_INSNS (6), /* dmul */
794 COSTS_N_INSNS (19), /* sdiv */
795 COSTS_N_INSNS (33), /* ddiv */
796 32, /* l1 cache line size */
800 0, /* SF->DF convert */
803 /* Instruction costs on PPC601 processors. */
805 struct processor_costs ppc601_cost
= {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (5), /* mulsi_const */
808 COSTS_N_INSNS (5), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (36), /* divsi */
811 COSTS_N_INSNS (36), /* divdi */
812 COSTS_N_INSNS (4), /* fp */
813 COSTS_N_INSNS (5), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
820 0, /* SF->DF convert */
823 /* Instruction costs on PPC603 processors. */
825 struct processor_costs ppc603_cost
= {
826 COSTS_N_INSNS (5), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (2), /* mulsi_const9 */
829 COSTS_N_INSNS (5), /* muldi */
830 COSTS_N_INSNS (37), /* divsi */
831 COSTS_N_INSNS (37), /* divdi */
832 COSTS_N_INSNS (3), /* fp */
833 COSTS_N_INSNS (4), /* dmul */
834 COSTS_N_INSNS (18), /* sdiv */
835 COSTS_N_INSNS (33), /* ddiv */
836 32, /* cache line size */
840 0, /* SF->DF convert */
843 /* Instruction costs on PPC604 processors. */
845 struct processor_costs ppc604_cost
= {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (20), /* divsi */
851 COSTS_N_INSNS (20), /* divdi */
852 COSTS_N_INSNS (3), /* fp */
853 COSTS_N_INSNS (3), /* dmul */
854 COSTS_N_INSNS (18), /* sdiv */
855 COSTS_N_INSNS (32), /* ddiv */
856 32, /* cache line size */
860 0, /* SF->DF convert */
863 /* Instruction costs on PPC604e processors. */
865 struct processor_costs ppc604e_cost
= {
866 COSTS_N_INSNS (2), /* mulsi */
867 COSTS_N_INSNS (2), /* mulsi_const */
868 COSTS_N_INSNS (2), /* mulsi_const9 */
869 COSTS_N_INSNS (2), /* muldi */
870 COSTS_N_INSNS (20), /* divsi */
871 COSTS_N_INSNS (20), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (3), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (32), /* ddiv */
876 32, /* cache line size */
880 0, /* SF->DF convert */
883 /* Instruction costs on PPC620 processors. */
885 struct processor_costs ppc620_cost
= {
886 COSTS_N_INSNS (5), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (3), /* mulsi_const9 */
889 COSTS_N_INSNS (7), /* muldi */
890 COSTS_N_INSNS (21), /* divsi */
891 COSTS_N_INSNS (37), /* divdi */
892 COSTS_N_INSNS (3), /* fp */
893 COSTS_N_INSNS (3), /* dmul */
894 COSTS_N_INSNS (18), /* sdiv */
895 COSTS_N_INSNS (32), /* ddiv */
896 128, /* cache line size */
900 0, /* SF->DF convert */
903 /* Instruction costs on PPC630 processors. */
905 struct processor_costs ppc630_cost
= {
906 COSTS_N_INSNS (5), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (3), /* mulsi_const9 */
909 COSTS_N_INSNS (7), /* muldi */
910 COSTS_N_INSNS (21), /* divsi */
911 COSTS_N_INSNS (37), /* divdi */
912 COSTS_N_INSNS (3), /* fp */
913 COSTS_N_INSNS (3), /* dmul */
914 COSTS_N_INSNS (17), /* sdiv */
915 COSTS_N_INSNS (21), /* ddiv */
916 128, /* cache line size */
920 0, /* SF->DF convert */
923 /* Instruction costs on Cell processor. */
924 /* COSTS_N_INSNS (1) ~ one add. */
926 struct processor_costs ppccell_cost
= {
927 COSTS_N_INSNS (9/2)+2, /* mulsi */
928 COSTS_N_INSNS (6/2), /* mulsi_const */
929 COSTS_N_INSNS (6/2), /* mulsi_const9 */
930 COSTS_N_INSNS (15/2)+2, /* muldi */
931 COSTS_N_INSNS (38/2), /* divsi */
932 COSTS_N_INSNS (70/2), /* divdi */
933 COSTS_N_INSNS (10/2), /* fp */
934 COSTS_N_INSNS (10/2), /* dmul */
935 COSTS_N_INSNS (74/2), /* sdiv */
936 COSTS_N_INSNS (74/2), /* ddiv */
937 128, /* cache line size */
941 0, /* SF->DF convert */
944 /* Instruction costs on PPC750 and PPC7400 processors. */
946 struct processor_costs ppc750_cost
= {
947 COSTS_N_INSNS (5), /* mulsi */
948 COSTS_N_INSNS (3), /* mulsi_const */
949 COSTS_N_INSNS (2), /* mulsi_const9 */
950 COSTS_N_INSNS (5), /* muldi */
951 COSTS_N_INSNS (17), /* divsi */
952 COSTS_N_INSNS (17), /* divdi */
953 COSTS_N_INSNS (3), /* fp */
954 COSTS_N_INSNS (3), /* dmul */
955 COSTS_N_INSNS (17), /* sdiv */
956 COSTS_N_INSNS (31), /* ddiv */
957 32, /* cache line size */
961 0, /* SF->DF convert */
964 /* Instruction costs on PPC7450 processors. */
966 struct processor_costs ppc7450_cost
= {
967 COSTS_N_INSNS (4), /* mulsi */
968 COSTS_N_INSNS (3), /* mulsi_const */
969 COSTS_N_INSNS (3), /* mulsi_const9 */
970 COSTS_N_INSNS (4), /* muldi */
971 COSTS_N_INSNS (23), /* divsi */
972 COSTS_N_INSNS (23), /* divdi */
973 COSTS_N_INSNS (5), /* fp */
974 COSTS_N_INSNS (5), /* dmul */
975 COSTS_N_INSNS (21), /* sdiv */
976 COSTS_N_INSNS (35), /* ddiv */
977 32, /* cache line size */
981 0, /* SF->DF convert */
984 /* Instruction costs on PPC8540 processors. */
986 struct processor_costs ppc8540_cost
= {
987 COSTS_N_INSNS (4), /* mulsi */
988 COSTS_N_INSNS (4), /* mulsi_const */
989 COSTS_N_INSNS (4), /* mulsi_const9 */
990 COSTS_N_INSNS (4), /* muldi */
991 COSTS_N_INSNS (19), /* divsi */
992 COSTS_N_INSNS (19), /* divdi */
993 COSTS_N_INSNS (4), /* fp */
994 COSTS_N_INSNS (4), /* dmul */
995 COSTS_N_INSNS (29), /* sdiv */
996 COSTS_N_INSNS (29), /* ddiv */
997 32, /* cache line size */
1000 1, /* prefetch streams /*/
1001 0, /* SF->DF convert */
1004 /* Instruction costs on E300C2 and E300C3 cores. */
1006 struct processor_costs ppce300c2c3_cost
= {
1007 COSTS_N_INSNS (4), /* mulsi */
1008 COSTS_N_INSNS (4), /* mulsi_const */
1009 COSTS_N_INSNS (4), /* mulsi_const9 */
1010 COSTS_N_INSNS (4), /* muldi */
1011 COSTS_N_INSNS (19), /* divsi */
1012 COSTS_N_INSNS (19), /* divdi */
1013 COSTS_N_INSNS (3), /* fp */
1014 COSTS_N_INSNS (4), /* dmul */
1015 COSTS_N_INSNS (18), /* sdiv */
1016 COSTS_N_INSNS (33), /* ddiv */
1020 1, /* prefetch streams /*/
1021 0, /* SF->DF convert */
1024 /* Instruction costs on PPCE500MC processors. */
1026 struct processor_costs ppce500mc_cost
= {
1027 COSTS_N_INSNS (4), /* mulsi */
1028 COSTS_N_INSNS (4), /* mulsi_const */
1029 COSTS_N_INSNS (4), /* mulsi_const9 */
1030 COSTS_N_INSNS (4), /* muldi */
1031 COSTS_N_INSNS (14), /* divsi */
1032 COSTS_N_INSNS (14), /* divdi */
1033 COSTS_N_INSNS (8), /* fp */
1034 COSTS_N_INSNS (10), /* dmul */
1035 COSTS_N_INSNS (36), /* sdiv */
1036 COSTS_N_INSNS (66), /* ddiv */
1037 64, /* cache line size */
1040 1, /* prefetch streams /*/
1041 0, /* SF->DF convert */
1044 /* Instruction costs on PPCE500MC64 processors. */
1046 struct processor_costs ppce500mc64_cost
= {
1047 COSTS_N_INSNS (4), /* mulsi */
1048 COSTS_N_INSNS (4), /* mulsi_const */
1049 COSTS_N_INSNS (4), /* mulsi_const9 */
1050 COSTS_N_INSNS (4), /* muldi */
1051 COSTS_N_INSNS (14), /* divsi */
1052 COSTS_N_INSNS (14), /* divdi */
1053 COSTS_N_INSNS (4), /* fp */
1054 COSTS_N_INSNS (10), /* dmul */
1055 COSTS_N_INSNS (36), /* sdiv */
1056 COSTS_N_INSNS (66), /* ddiv */
1057 64, /* cache line size */
1060 1, /* prefetch streams /*/
1061 0, /* SF->DF convert */
1064 /* Instruction costs on PPCE5500 processors. */
1066 struct processor_costs ppce5500_cost
= {
1067 COSTS_N_INSNS (5), /* mulsi */
1068 COSTS_N_INSNS (5), /* mulsi_const */
1069 COSTS_N_INSNS (4), /* mulsi_const9 */
1070 COSTS_N_INSNS (5), /* muldi */
1071 COSTS_N_INSNS (14), /* divsi */
1072 COSTS_N_INSNS (14), /* divdi */
1073 COSTS_N_INSNS (7), /* fp */
1074 COSTS_N_INSNS (10), /* dmul */
1075 COSTS_N_INSNS (36), /* sdiv */
1076 COSTS_N_INSNS (66), /* ddiv */
1077 64, /* cache line size */
1080 1, /* prefetch streams /*/
1081 0, /* SF->DF convert */
1084 /* Instruction costs on PPCE6500 processors. */
1086 struct processor_costs ppce6500_cost
= {
1087 COSTS_N_INSNS (5), /* mulsi */
1088 COSTS_N_INSNS (5), /* mulsi_const */
1089 COSTS_N_INSNS (4), /* mulsi_const9 */
1090 COSTS_N_INSNS (5), /* muldi */
1091 COSTS_N_INSNS (14), /* divsi */
1092 COSTS_N_INSNS (14), /* divdi */
1093 COSTS_N_INSNS (7), /* fp */
1094 COSTS_N_INSNS (10), /* dmul */
1095 COSTS_N_INSNS (36), /* sdiv */
1096 COSTS_N_INSNS (66), /* ddiv */
1097 64, /* cache line size */
1100 1, /* prefetch streams /*/
1101 0, /* SF->DF convert */
1104 /* Instruction costs on AppliedMicro Titan processors. */
1106 struct processor_costs titan_cost
= {
1107 COSTS_N_INSNS (5), /* mulsi */
1108 COSTS_N_INSNS (5), /* mulsi_const */
1109 COSTS_N_INSNS (5), /* mulsi_const9 */
1110 COSTS_N_INSNS (5), /* muldi */
1111 COSTS_N_INSNS (18), /* divsi */
1112 COSTS_N_INSNS (18), /* divdi */
1113 COSTS_N_INSNS (10), /* fp */
1114 COSTS_N_INSNS (10), /* dmul */
1115 COSTS_N_INSNS (46), /* sdiv */
1116 COSTS_N_INSNS (72), /* ddiv */
1117 32, /* cache line size */
1120 1, /* prefetch streams /*/
1121 0, /* SF->DF convert */
1124 /* Instruction costs on POWER4 and POWER5 processors. */
1126 struct processor_costs power4_cost
= {
1127 COSTS_N_INSNS (3), /* mulsi */
1128 COSTS_N_INSNS (2), /* mulsi_const */
1129 COSTS_N_INSNS (2), /* mulsi_const9 */
1130 COSTS_N_INSNS (4), /* muldi */
1131 COSTS_N_INSNS (18), /* divsi */
1132 COSTS_N_INSNS (34), /* divdi */
1133 COSTS_N_INSNS (3), /* fp */
1134 COSTS_N_INSNS (3), /* dmul */
1135 COSTS_N_INSNS (17), /* sdiv */
1136 COSTS_N_INSNS (17), /* ddiv */
1137 128, /* cache line size */
1139 1024, /* l2 cache */
1140 8, /* prefetch streams /*/
1141 0, /* SF->DF convert */
1144 /* Instruction costs on POWER6 processors. */
1146 struct processor_costs power6_cost
= {
1147 COSTS_N_INSNS (8), /* mulsi */
1148 COSTS_N_INSNS (8), /* mulsi_const */
1149 COSTS_N_INSNS (8), /* mulsi_const9 */
1150 COSTS_N_INSNS (8), /* muldi */
1151 COSTS_N_INSNS (22), /* divsi */
1152 COSTS_N_INSNS (28), /* divdi */
1153 COSTS_N_INSNS (3), /* fp */
1154 COSTS_N_INSNS (3), /* dmul */
1155 COSTS_N_INSNS (13), /* sdiv */
1156 COSTS_N_INSNS (16), /* ddiv */
1157 128, /* cache line size */
1159 2048, /* l2 cache */
1160 16, /* prefetch streams */
1161 0, /* SF->DF convert */
1164 /* Instruction costs on POWER7 processors. */
1166 struct processor_costs power7_cost
= {
1167 COSTS_N_INSNS (2), /* mulsi */
1168 COSTS_N_INSNS (2), /* mulsi_const */
1169 COSTS_N_INSNS (2), /* mulsi_const9 */
1170 COSTS_N_INSNS (2), /* muldi */
1171 COSTS_N_INSNS (18), /* divsi */
1172 COSTS_N_INSNS (34), /* divdi */
1173 COSTS_N_INSNS (3), /* fp */
1174 COSTS_N_INSNS (3), /* dmul */
1175 COSTS_N_INSNS (13), /* sdiv */
1176 COSTS_N_INSNS (16), /* ddiv */
1177 128, /* cache line size */
1180 12, /* prefetch streams */
1181 COSTS_N_INSNS (3), /* SF->DF convert */
1184 /* Instruction costs on POWER8 processors. */
1186 struct processor_costs power8_cost
= {
1187 COSTS_N_INSNS (3), /* mulsi */
1188 COSTS_N_INSNS (3), /* mulsi_const */
1189 COSTS_N_INSNS (3), /* mulsi_const9 */
1190 COSTS_N_INSNS (3), /* muldi */
1191 COSTS_N_INSNS (19), /* divsi */
1192 COSTS_N_INSNS (35), /* divdi */
1193 COSTS_N_INSNS (3), /* fp */
1194 COSTS_N_INSNS (3), /* dmul */
1195 COSTS_N_INSNS (14), /* sdiv */
1196 COSTS_N_INSNS (17), /* ddiv */
1197 128, /* cache line size */
1200 12, /* prefetch streams */
1201 COSTS_N_INSNS (3), /* SF->DF convert */
1204 /* Instruction costs on POWER9 processors. */
1206 struct processor_costs power9_cost
= {
1207 COSTS_N_INSNS (3), /* mulsi */
1208 COSTS_N_INSNS (3), /* mulsi_const */
1209 COSTS_N_INSNS (3), /* mulsi_const9 */
1210 COSTS_N_INSNS (3), /* muldi */
1211 COSTS_N_INSNS (8), /* divsi */
1212 COSTS_N_INSNS (12), /* divdi */
1213 COSTS_N_INSNS (3), /* fp */
1214 COSTS_N_INSNS (3), /* dmul */
1215 COSTS_N_INSNS (13), /* sdiv */
1216 COSTS_N_INSNS (18), /* ddiv */
1217 128, /* cache line size */
1220 8, /* prefetch streams */
1221 COSTS_N_INSNS (3), /* SF->DF convert */
1224 /* Instruction costs on POWER A2 processors. */
1226 struct processor_costs ppca2_cost
= {
1227 COSTS_N_INSNS (16), /* mulsi */
1228 COSTS_N_INSNS (16), /* mulsi_const */
1229 COSTS_N_INSNS (16), /* mulsi_const9 */
1230 COSTS_N_INSNS (16), /* muldi */
1231 COSTS_N_INSNS (22), /* divsi */
1232 COSTS_N_INSNS (28), /* divdi */
1233 COSTS_N_INSNS (3), /* fp */
1234 COSTS_N_INSNS (3), /* dmul */
1235 COSTS_N_INSNS (59), /* sdiv */
1236 COSTS_N_INSNS (72), /* ddiv */
1239 2048, /* l2 cache */
1240 16, /* prefetch streams */
1241 0, /* SF->DF convert */
1245 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1246 #undef RS6000_BUILTIN_0
1247 #undef RS6000_BUILTIN_1
1248 #undef RS6000_BUILTIN_2
1249 #undef RS6000_BUILTIN_3
1250 #undef RS6000_BUILTIN_A
1251 #undef RS6000_BUILTIN_D
1252 #undef RS6000_BUILTIN_E
1253 #undef RS6000_BUILTIN_H
1254 #undef RS6000_BUILTIN_P
1255 #undef RS6000_BUILTIN_Q
1256 #undef RS6000_BUILTIN_S
1257 #undef RS6000_BUILTIN_X
1259 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1260 { NAME, ICODE, MASK, ATTR },
1262 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1263 { NAME, ICODE, MASK, ATTR },
1265 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1266 { NAME, ICODE, MASK, ATTR },
1268 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1269 { NAME, ICODE, MASK, ATTR },
1271 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1272 { NAME, ICODE, MASK, ATTR },
1274 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1275 { NAME, ICODE, MASK, ATTR },
1277 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1278 { NAME, ICODE, MASK, ATTR },
1280 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1281 { NAME, ICODE, MASK, ATTR },
1283 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1284 { NAME, ICODE, MASK, ATTR },
1286 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1287 { NAME, ICODE, MASK, ATTR },
1289 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1290 { NAME, ICODE, MASK, ATTR },
1292 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1293 { NAME, ICODE, MASK, ATTR },
1295 struct rs6000_builtin_info_type
{
1297 const enum insn_code icode
;
1298 const HOST_WIDE_INT mask
;
1299 const unsigned attr
;
1302 static const struct rs6000_builtin_info_type rs6000_builtin_info
[] =
1304 #include "powerpcspe-builtin.def"
1307 #undef RS6000_BUILTIN_0
1308 #undef RS6000_BUILTIN_1
1309 #undef RS6000_BUILTIN_2
1310 #undef RS6000_BUILTIN_3
1311 #undef RS6000_BUILTIN_A
1312 #undef RS6000_BUILTIN_D
1313 #undef RS6000_BUILTIN_E
1314 #undef RS6000_BUILTIN_H
1315 #undef RS6000_BUILTIN_P
1316 #undef RS6000_BUILTIN_Q
1317 #undef RS6000_BUILTIN_S
1318 #undef RS6000_BUILTIN_X
1320 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1321 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1324 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1325 static bool spe_func_has_64bit_regs_p (void);
1326 static struct machine_function
* rs6000_init_machine_status (void);
1327 static int rs6000_ra_ever_killed (void);
1328 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1329 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1330 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1331 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1332 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1333 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1334 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1335 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1337 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1339 static bool is_microcoded_insn (rtx_insn
*);
1340 static bool is_nonpipeline_insn (rtx_insn
*);
1341 static bool is_cracked_insn (rtx_insn
*);
1342 static bool is_load_insn (rtx
, rtx
*);
1343 static bool is_store_insn (rtx
, rtx
*);
1344 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1345 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1346 static bool insn_must_be_first_in_group (rtx_insn
*);
1347 static bool insn_must_be_last_in_group (rtx_insn
*);
1348 static void altivec_init_builtins (void);
1349 static tree
builtin_function_type (machine_mode
, machine_mode
,
1350 machine_mode
, machine_mode
,
1351 enum rs6000_builtins
, const char *name
);
1352 static void rs6000_common_init_builtins (void);
1353 static void paired_init_builtins (void);
1354 static rtx
paired_expand_predicate_builtin (enum insn_code
, tree
, rtx
);
1355 static void spe_init_builtins (void);
1356 static void htm_init_builtins (void);
1357 static rtx
spe_expand_predicate_builtin (enum insn_code
, tree
, rtx
);
1358 static rtx
spe_expand_evsel_builtin (enum insn_code
, tree
, rtx
);
1359 static int rs6000_emit_int_cmove (rtx
, rtx
, rtx
, rtx
);
1360 static rs6000_stack_t
*rs6000_stack_info (void);
1361 static void is_altivec_return_reg (rtx
, void *);
1362 int easy_vector_constant (rtx
, machine_mode
);
1363 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1364 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1365 static rtx
rs6000_darwin64_record_arg (CUMULATIVE_ARGS
*, const_tree
,
1368 static void macho_branch_islands (void);
1370 static rtx
rs6000_legitimize_reload_address (rtx
, machine_mode
, int, int,
1372 static rtx
rs6000_debug_legitimize_reload_address (rtx
, machine_mode
, int,
1374 static bool rs6000_mode_dependent_address (const_rtx
);
1375 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1376 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1378 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1381 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1382 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1384 static bool rs6000_secondary_memory_needed (enum reg_class
, enum reg_class
,
1386 static bool rs6000_debug_secondary_memory_needed (enum reg_class
,
1389 static bool rs6000_cannot_change_mode_class (machine_mode
,
1392 static bool rs6000_debug_cannot_change_mode_class (machine_mode
,
1395 static bool rs6000_save_toc_in_prologue_p (void);
1396 static rtx
rs6000_internal_arg_pointer (void);
1398 rtx (*rs6000_legitimize_reload_address_ptr
) (rtx
, machine_mode
, int, int,
1400 = rs6000_legitimize_reload_address
;
1402 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1403 = rs6000_mode_dependent_address
;
1405 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1407 = rs6000_secondary_reload_class
;
1409 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1410 = rs6000_preferred_reload_class
;
1412 bool (*rs6000_secondary_memory_needed_ptr
) (enum reg_class
, enum reg_class
,
1414 = rs6000_secondary_memory_needed
;
1416 bool (*rs6000_cannot_change_mode_class_ptr
) (machine_mode
,
1419 = rs6000_cannot_change_mode_class
;
1421 const int INSN_NOT_AVAILABLE
= -1;
1423 static void rs6000_print_isa_options (FILE *, int, const char *,
1425 static void rs6000_print_builtin_options (FILE *, int, const char *,
1427 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1429 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1430 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1431 enum rs6000_reg_type
,
1433 secondary_reload_info
*,
1435 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1436 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused
));
1437 static tree
rs6000_fold_builtin (tree
, int, tree
*, bool);
1439 /* Hash table stuff for keeping track of TOC entries. */
1441 struct GTY((for_user
)) toc_hash_struct
1443 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1444 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1446 machine_mode key_mode
;
1450 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1452 static hashval_t
hash (toc_hash_struct
*);
1453 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1456 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1458 /* Hash table to keep track of the argument types for builtin functions. */
1460 struct GTY((for_user
)) builtin_hash_struct
1463 machine_mode mode
[4]; /* return value + 3 arguments. */
1464 unsigned char uns_p
[4]; /* and whether the types are unsigned. */
1467 struct builtin_hasher
: ggc_ptr_hash
<builtin_hash_struct
>
1469 static hashval_t
hash (builtin_hash_struct
*);
1470 static bool equal (builtin_hash_struct
*, builtin_hash_struct
*);
1473 static GTY (()) hash_table
<builtin_hasher
> *builtin_hash_table
;
1476 /* Default register names. */
1477 char rs6000_reg_names
[][8] =
1479 "0", "1", "2", "3", "4", "5", "6", "7",
1480 "8", "9", "10", "11", "12", "13", "14", "15",
1481 "16", "17", "18", "19", "20", "21", "22", "23",
1482 "24", "25", "26", "27", "28", "29", "30", "31",
1483 "0", "1", "2", "3", "4", "5", "6", "7",
1484 "8", "9", "10", "11", "12", "13", "14", "15",
1485 "16", "17", "18", "19", "20", "21", "22", "23",
1486 "24", "25", "26", "27", "28", "29", "30", "31",
1487 "mq", "lr", "ctr","ap",
1488 "0", "1", "2", "3", "4", "5", "6", "7",
1490 /* AltiVec registers. */
1491 "0", "1", "2", "3", "4", "5", "6", "7",
1492 "8", "9", "10", "11", "12", "13", "14", "15",
1493 "16", "17", "18", "19", "20", "21", "22", "23",
1494 "24", "25", "26", "27", "28", "29", "30", "31",
1496 /* SPE registers. */
1497 "spe_acc", "spefscr",
1498 /* Soft frame pointer. */
1500 /* HTM SPR registers. */
1501 "tfhar", "tfiar", "texasr",
1502 /* SPE High registers. */
1503 "0", "1", "2", "3", "4", "5", "6", "7",
1504 "8", "9", "10", "11", "12", "13", "14", "15",
1505 "16", "17", "18", "19", "20", "21", "22", "23",
1506 "24", "25", "26", "27", "28", "29", "30", "31"
1509 #ifdef TARGET_REGNAMES
1510 static const char alt_reg_names
[][8] =
1512 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1513 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1514 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1515 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1516 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1517 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1518 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1519 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1520 "mq", "lr", "ctr", "ap",
1521 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1523 /* AltiVec registers. */
1524 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1525 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1526 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1527 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1529 /* SPE registers. */
1530 "spe_acc", "spefscr",
1531 /* Soft frame pointer. */
1533 /* HTM SPR registers. */
1534 "tfhar", "tfiar", "texasr",
1535 /* SPE High registers. */
1536 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1537 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1538 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1539 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1543 /* Table of valid machine attributes. */
1545 static const struct attribute_spec rs6000_attribute_table
[] =
1547 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1548 affects_type_identity } */
1549 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute
,
1551 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute
,
1553 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute
,
1555 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute
,
1557 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute
,
1559 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1560 SUBTARGET_ATTRIBUTE_TABLE
,
1562 { NULL
, 0, 0, false, false, false, NULL
, false }
1565 #ifndef TARGET_PROFILE_KERNEL
1566 #define TARGET_PROFILE_KERNEL 0
1569 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1570 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1572 /* Initialize the GCC target structure. */
1573 #undef TARGET_ATTRIBUTE_TABLE
1574 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1575 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1576 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1577 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1578 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1580 #undef TARGET_ASM_ALIGNED_DI_OP
1581 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1583 /* Default unaligned ops are only provided for ELF. Find the ops needed
1584 for non-ELF systems. */
1585 #ifndef OBJECT_FORMAT_ELF
1587 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1589 #undef TARGET_ASM_UNALIGNED_HI_OP
1590 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1591 #undef TARGET_ASM_UNALIGNED_SI_OP
1592 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1593 #undef TARGET_ASM_UNALIGNED_DI_OP
1594 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1597 #undef TARGET_ASM_UNALIGNED_HI_OP
1598 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1599 #undef TARGET_ASM_UNALIGNED_SI_OP
1600 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1601 #undef TARGET_ASM_UNALIGNED_DI_OP
1602 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1603 #undef TARGET_ASM_ALIGNED_DI_OP
1604 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1608 /* This hook deals with fixups for relocatable code and DI-mode objects
1610 #undef TARGET_ASM_INTEGER
1611 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1613 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1614 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1615 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1618 #undef TARGET_SET_UP_BY_PROLOGUE
1619 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1621 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1622 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1623 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1624 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1625 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1626 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1627 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1628 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1629 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1630 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1631 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1632 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1634 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1635 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1637 #undef TARGET_INTERNAL_ARG_POINTER
1638 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1640 #undef TARGET_HAVE_TLS
1641 #define TARGET_HAVE_TLS HAVE_AS_TLS
1643 #undef TARGET_CANNOT_FORCE_CONST_MEM
1644 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1646 #undef TARGET_DELEGITIMIZE_ADDRESS
1647 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1649 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1650 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1652 #undef TARGET_LEGITIMATE_COMBINED_INSN
1653 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1655 #undef TARGET_ASM_FUNCTION_PROLOGUE
1656 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1657 #undef TARGET_ASM_FUNCTION_EPILOGUE
1658 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1660 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1661 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1663 #undef TARGET_LEGITIMIZE_ADDRESS
1664 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1666 #undef TARGET_SCHED_VARIABLE_ISSUE
1667 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1669 #undef TARGET_SCHED_ISSUE_RATE
1670 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1671 #undef TARGET_SCHED_ADJUST_COST
1672 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1673 #undef TARGET_SCHED_ADJUST_PRIORITY
1674 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1675 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1676 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1677 #undef TARGET_SCHED_INIT
1678 #define TARGET_SCHED_INIT rs6000_sched_init
1679 #undef TARGET_SCHED_FINISH
1680 #define TARGET_SCHED_FINISH rs6000_sched_finish
1681 #undef TARGET_SCHED_REORDER
1682 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1683 #undef TARGET_SCHED_REORDER2
1684 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1686 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1687 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1689 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1690 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1692 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1693 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1694 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1695 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1696 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1697 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1698 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1699 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1701 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1702 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1704 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1705 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1706 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1707 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1708 rs6000_builtin_support_vector_misalignment
1709 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1710 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1711 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1712 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1713 rs6000_builtin_vectorization_cost
1714 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1715 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1716 rs6000_preferred_simd_mode
1717 #undef TARGET_VECTORIZE_INIT_COST
1718 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1719 #undef TARGET_VECTORIZE_ADD_STMT_COST
1720 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1721 #undef TARGET_VECTORIZE_FINISH_COST
1722 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1723 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1724 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1726 #undef TARGET_INIT_BUILTINS
1727 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1728 #undef TARGET_BUILTIN_DECL
1729 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1731 #undef TARGET_FOLD_BUILTIN
1732 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1733 #undef TARGET_GIMPLE_FOLD_BUILTIN
1734 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1736 #undef TARGET_EXPAND_BUILTIN
1737 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1739 #undef TARGET_MANGLE_TYPE
1740 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1742 #undef TARGET_INIT_LIBFUNCS
1743 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1746 #undef TARGET_BINDS_LOCAL_P
1747 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1750 #undef TARGET_MS_BITFIELD_LAYOUT_P
1751 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1753 #undef TARGET_ASM_OUTPUT_MI_THUNK
1754 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1756 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1757 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1759 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1760 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1762 #undef TARGET_REGISTER_MOVE_COST
1763 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1764 #undef TARGET_MEMORY_MOVE_COST
1765 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1766 #undef TARGET_CANNOT_COPY_INSN_P
1767 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1768 #undef TARGET_RTX_COSTS
1769 #define TARGET_RTX_COSTS rs6000_rtx_costs
1770 #undef TARGET_ADDRESS_COST
1771 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1773 #undef TARGET_DWARF_REGISTER_SPAN
1774 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1776 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1777 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1779 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1780 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1782 #undef TARGET_PROMOTE_FUNCTION_MODE
1783 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1785 #undef TARGET_RETURN_IN_MEMORY
1786 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1788 #undef TARGET_RETURN_IN_MSB
1789 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1791 #undef TARGET_SETUP_INCOMING_VARARGS
1792 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1794 /* Always strict argument naming on rs6000. */
1795 #undef TARGET_STRICT_ARGUMENT_NAMING
1796 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1797 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1798 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1799 #undef TARGET_SPLIT_COMPLEX_ARG
1800 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1801 #undef TARGET_MUST_PASS_IN_STACK
1802 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1803 #undef TARGET_PASS_BY_REFERENCE
1804 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1805 #undef TARGET_ARG_PARTIAL_BYTES
1806 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1807 #undef TARGET_FUNCTION_ARG_ADVANCE
1808 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1809 #undef TARGET_FUNCTION_ARG
1810 #define TARGET_FUNCTION_ARG rs6000_function_arg
1811 #undef TARGET_FUNCTION_ARG_BOUNDARY
1812 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1814 #undef TARGET_BUILD_BUILTIN_VA_LIST
1815 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1817 #undef TARGET_EXPAND_BUILTIN_VA_START
1818 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1820 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1821 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1823 #undef TARGET_EH_RETURN_FILTER_MODE
1824 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1826 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1827 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1829 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1830 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1832 #undef TARGET_FLOATN_MODE
1833 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1835 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1836 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1838 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1839 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1841 #undef TARGET_MD_ASM_ADJUST
1842 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1844 #undef TARGET_OPTION_OVERRIDE
1845 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1847 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1848 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1849 rs6000_builtin_vectorized_function
1851 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1852 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1853 rs6000_builtin_md_vectorized_function
1855 #undef TARGET_STACK_PROTECT_GUARD
1856 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1859 #undef TARGET_STACK_PROTECT_FAIL
1860 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1864 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1865 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1868 /* Use a 32-bit anchor range. This leads to sequences like:
1870 addis tmp,anchor,high
1873 where tmp itself acts as an anchor, and can be shared between
1874 accesses to the same 64k page. */
1875 #undef TARGET_MIN_ANCHOR_OFFSET
1876 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1877 #undef TARGET_MAX_ANCHOR_OFFSET
1878 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1879 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1880 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1881 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1882 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1884 #undef TARGET_BUILTIN_RECIPROCAL
1885 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1887 #undef TARGET_EXPAND_TO_RTL_HOOK
1888 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1890 #undef TARGET_INSTANTIATE_DECLS
1891 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1893 #undef TARGET_SECONDARY_RELOAD
1894 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1896 #undef TARGET_LEGITIMATE_ADDRESS_P
1897 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1899 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1900 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1903 #define TARGET_LRA_P rs6000_lra_p
1905 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1906 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1908 #undef TARGET_CAN_ELIMINATE
1909 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1911 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1912 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1914 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1915 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1917 #undef TARGET_TRAMPOLINE_INIT
1918 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1920 #undef TARGET_FUNCTION_VALUE
1921 #define TARGET_FUNCTION_VALUE rs6000_function_value
1923 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1924 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1926 #undef TARGET_OPTION_SAVE
1927 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1929 #undef TARGET_OPTION_RESTORE
1930 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1932 #undef TARGET_OPTION_PRINT
1933 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1935 #undef TARGET_CAN_INLINE_P
1936 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1938 #undef TARGET_SET_CURRENT_FUNCTION
1939 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1941 #undef TARGET_LEGITIMATE_CONSTANT_P
1942 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1944 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1945 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1947 #undef TARGET_CAN_USE_DOLOOP_P
1948 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1950 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1951 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1953 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1954 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1955 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1956 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1957 #undef TARGET_UNWIND_WORD_MODE
1958 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1960 #undef TARGET_OFFLOAD_OPTIONS
1961 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1963 #undef TARGET_C_MODE_FOR_SUFFIX
1964 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1966 #undef TARGET_INVALID_BINARY_OP
1967 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1969 #undef TARGET_OPTAB_SUPPORTED_P
1970 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1972 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1973 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1976 /* Processor table. */
1979 const char *const name
; /* Canonical processor name. */
1980 const enum processor_type processor
; /* Processor type enum value. */
1981 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1984 static struct rs6000_ptt
const processor_target_table
[] =
1986 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1987 #include "powerpcspe-cpus.def"
1991 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1995 rs6000_cpu_name_lookup (const char *name
)
2001 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
2002 if (! strcmp (name
, processor_target_table
[i
].name
))
2010 /* Return number of consecutive hard regs needed starting at reg REGNO
2011 to hold something of mode MODE.
2012 This is ordinarily the length in words of a value of mode MODE
2013 but can be less for certain modes in special long registers.
2015 For the SPE, GPRs are 64 bits but only 32 bits are visible in
2016 scalar instructions. The upper 32 bits are only available to the
2019 POWER and PowerPC GPRs hold 32 bits worth;
2020 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2023 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
2025 unsigned HOST_WIDE_INT reg_size
;
2027 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2028 128-bit floating point that can go in vector registers, which has VSX
2029 memory addressing. */
2030 if (FP_REGNO_P (regno
))
2031 reg_size
= (VECTOR_MEM_VSX_P (mode
) || FLOAT128_VECTOR_P (mode
)
2032 ? UNITS_PER_VSX_WORD
2033 : UNITS_PER_FP_WORD
);
2035 else if (SPE_SIMD_REGNO_P (regno
) && TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
2036 reg_size
= UNITS_PER_SPE_WORD
;
2038 else if (ALTIVEC_REGNO_P (regno
))
2039 reg_size
= UNITS_PER_ALTIVEC_WORD
;
2041 /* The value returned for SCmode in the E500 double case is 2 for
2042 ABI compatibility; storing an SCmode value in a single register
2043 would require function_arg and rs6000_spe_function_arg to handle
2044 SCmode so as to pass the value correctly in a pair of
2046 else if (TARGET_E500_DOUBLE
&& FLOAT_MODE_P (mode
) && mode
!= SCmode
2047 && !DECIMAL_FLOAT_MODE_P (mode
) && SPE_SIMD_REGNO_P (regno
))
2048 reg_size
= UNITS_PER_FP_WORD
;
2051 reg_size
= UNITS_PER_WORD
;
2053 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
2056 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2059 rs6000_hard_regno_mode_ok (int regno
, machine_mode mode
)
2061 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
2063 if (COMPLEX_MODE_P (mode
))
2064 mode
= GET_MODE_INNER (mode
);
2066 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2067 register combinations, and use PTImode where we need to deal with quad
2068 word memory operations. Don't allow quad words in the argument or frame
2069 pointer registers, just registers 0..31. */
2070 if (mode
== PTImode
)
2071 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
2072 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
2073 && ((regno
& 1) == 0));
2075 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2076 implementations. Don't allow an item to be split between a FP register
2077 and an Altivec register. Allow TImode in all VSX registers if the user
2079 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
2080 && (VECTOR_MEM_VSX_P (mode
)
2081 || FLOAT128_VECTOR_P (mode
)
2082 || reg_addr
[mode
].scalar_in_vmx_p
2083 || (TARGET_VSX_TIMODE
&& mode
== TImode
)
2084 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
2086 if (FP_REGNO_P (regno
))
2087 return FP_REGNO_P (last_regno
);
2089 if (ALTIVEC_REGNO_P (regno
))
2091 if (GET_MODE_SIZE (mode
) != 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
2094 return ALTIVEC_REGNO_P (last_regno
);
2098 /* The GPRs can hold any mode, but values bigger than one register
2099 cannot go past R31. */
2100 if (INT_REGNO_P (regno
))
2101 return INT_REGNO_P (last_regno
);
2103 /* The float registers (except for VSX vector modes) can only hold floating
2104 modes and DImode. */
2105 if (FP_REGNO_P (regno
))
2107 if (FLOAT128_VECTOR_P (mode
))
2110 if (SCALAR_FLOAT_MODE_P (mode
)
2111 && (mode
!= TDmode
|| (regno
% 2) == 0)
2112 && FP_REGNO_P (last_regno
))
2115 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2117 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
2120 if (TARGET_VSX_SMALL_INTEGER
)
2125 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
2130 if (PAIRED_SIMD_REGNO_P (regno
) && TARGET_PAIRED_FLOAT
2131 && PAIRED_VECTOR_MODE (mode
))
2137 /* The CR register can only hold CC modes. */
2138 if (CR_REGNO_P (regno
))
2139 return GET_MODE_CLASS (mode
) == MODE_CC
;
2141 if (CA_REGNO_P (regno
))
2142 return mode
== Pmode
|| mode
== SImode
;
2144 /* AltiVec only in AldyVec registers. */
2145 if (ALTIVEC_REGNO_P (regno
))
2146 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
2147 || mode
== V1TImode
);
2149 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2150 if (SPE_SIMD_REGNO_P (regno
) && TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
2153 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2154 and it must be able to fit within the register set. */
2156 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
2159 /* Print interesting facts about registers. */
2161 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2165 for (r
= first_regno
; r
<= last_regno
; ++r
)
2167 const char *comma
= "";
2170 if (first_regno
== last_regno
)
2171 fprintf (stderr
, "%s:\t", reg_name
);
2173 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2176 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2177 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2181 fprintf (stderr
, ",\n\t");
2186 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2187 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2188 rs6000_hard_regno_nregs
[m
][r
]);
2190 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2195 if (call_used_regs
[r
])
2199 fprintf (stderr
, ",\n\t");
2204 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2212 fprintf (stderr
, ",\n\t");
2217 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2223 fprintf (stderr
, ",\n\t");
2227 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2228 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2233 fprintf (stderr
, ",\n\t");
2237 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2242 rs6000_debug_vector_unit (enum rs6000_vector v
)
2248 case VECTOR_NONE
: ret
= "none"; break;
2249 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2250 case VECTOR_VSX
: ret
= "vsx"; break;
2251 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2252 case VECTOR_PAIRED
: ret
= "paired"; break;
2253 case VECTOR_SPE
: ret
= "spe"; break;
2254 case VECTOR_OTHER
: ret
= "other"; break;
2255 default: ret
= "unknown"; break;
2261 /* Inner function printing just the address mask for a particular reload
2263 DEBUG_FUNCTION
char *
2264 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2269 if ((mask
& RELOAD_REG_VALID
) != 0)
2271 else if (keep_spaces
)
2274 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2276 else if (keep_spaces
)
2279 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2281 else if (keep_spaces
)
2284 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2286 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2288 else if (keep_spaces
)
2291 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2293 else if (keep_spaces
)
2296 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2298 else if (keep_spaces
)
2301 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2303 else if (keep_spaces
)
2311 /* Print the address masks in a human readble fashion. */
2313 rs6000_debug_print_mode (ssize_t m
)
2319 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2320 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2321 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2322 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2324 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2325 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2326 fprintf (stderr
, " Reload=%c%c",
2327 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2328 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2330 spaces
+= sizeof (" Reload=sl") - 1;
2332 if (reg_addr
[m
].scalar_in_vmx_p
)
2334 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2338 spaces
+= sizeof (" Upper=y") - 1;
2340 fuse_extra_p
= ((reg_addr
[m
].fusion_gpr_ld
!= CODE_FOR_nothing
)
2341 || reg_addr
[m
].fused_toc
);
2344 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2346 if (rc
!= RELOAD_REG_ANY
)
2348 if (reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
2349 || reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
2350 || reg_addr
[m
].fusion_addi_st
[rc
] != CODE_FOR_nothing
2351 || reg_addr
[m
].fusion_addis_ld
[rc
] != CODE_FOR_nothing
2352 || reg_addr
[m
].fusion_addis_st
[rc
] != CODE_FOR_nothing
)
2354 fuse_extra_p
= true;
2363 fprintf (stderr
, "%*s Fuse:", spaces
, "");
2366 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2368 if (rc
!= RELOAD_REG_ANY
)
2372 if (reg_addr
[m
].fusion_addis_ld
[rc
] != CODE_FOR_nothing
)
2374 else if (reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
)
2379 if (reg_addr
[m
].fusion_addis_st
[rc
] != CODE_FOR_nothing
)
2381 else if (reg_addr
[m
].fusion_addi_st
[rc
] != CODE_FOR_nothing
)
2386 if (load
== '-' && store
== '-')
2390 fprintf (stderr
, "%*s%c=%c%c", (spaces
+ 1), "",
2391 reload_reg_map
[rc
].name
[0], load
, store
);
2397 if (reg_addr
[m
].fusion_gpr_ld
!= CODE_FOR_nothing
)
2399 fprintf (stderr
, "%*sP8gpr", (spaces
+ 1), "");
2403 spaces
+= sizeof (" P8gpr") - 1;
2405 if (reg_addr
[m
].fused_toc
)
2407 fprintf (stderr
, "%*sToc", (spaces
+ 1), "");
2411 spaces
+= sizeof (" Toc") - 1;
2414 spaces
+= sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2416 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2417 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2419 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2421 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2422 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2425 fputs ("\n", stderr
);
2428 #define DEBUG_FMT_ID "%-32s= "
2429 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2430 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2431 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2433 /* Print various interesting information with -mdebug=reg. */
2435 rs6000_debug_reg_global (void)
2437 static const char *const tf
[2] = { "false", "true" };
2438 const char *nl
= (const char *)0;
2441 char costly_num
[20];
2443 char flags_buffer
[40];
2444 const char *costly_str
;
2445 const char *nop_str
;
2446 const char *trace_str
;
2447 const char *abi_str
;
2448 const char *cmodel_str
;
2449 struct cl_target_option cl_opts
;
2451 /* Modes we want tieable information on. */
2452 static const machine_mode print_tieable_modes
[] = {
2490 /* Virtual regs we are interested in. */
2491 const static struct {
2492 int regno
; /* register number. */
2493 const char *name
; /* register name. */
2494 } virtual_regs
[] = {
2495 { STACK_POINTER_REGNUM
, "stack pointer:" },
2496 { TOC_REGNUM
, "toc: " },
2497 { STATIC_CHAIN_REGNUM
, "static chain: " },
2498 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2499 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2500 { ARG_POINTER_REGNUM
, "arg pointer: " },
2501 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2502 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2503 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2504 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2505 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2506 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2507 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2508 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2509 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2510 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2513 fputs ("\nHard register information:\n", stderr
);
2514 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2515 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2516 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2519 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2520 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2521 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2522 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2523 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2524 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2525 rs6000_debug_reg_print (SPE_ACC_REGNO
, SPE_ACC_REGNO
, "spe_a");
2526 rs6000_debug_reg_print (SPEFSCR_REGNO
, SPEFSCR_REGNO
, "spe_f");
2528 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2529 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2530 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2534 "d reg_class = %s\n"
2535 "f reg_class = %s\n"
2536 "v reg_class = %s\n"
2537 "wa reg_class = %s\n"
2538 "wb reg_class = %s\n"
2539 "wd reg_class = %s\n"
2540 "we reg_class = %s\n"
2541 "wf reg_class = %s\n"
2542 "wg reg_class = %s\n"
2543 "wh reg_class = %s\n"
2544 "wi reg_class = %s\n"
2545 "wj reg_class = %s\n"
2546 "wk reg_class = %s\n"
2547 "wl reg_class = %s\n"
2548 "wm reg_class = %s\n"
2549 "wo reg_class = %s\n"
2550 "wp reg_class = %s\n"
2551 "wq reg_class = %s\n"
2552 "wr reg_class = %s\n"
2553 "ws reg_class = %s\n"
2554 "wt reg_class = %s\n"
2555 "wu reg_class = %s\n"
2556 "wv reg_class = %s\n"
2557 "ww reg_class = %s\n"
2558 "wx reg_class = %s\n"
2559 "wy reg_class = %s\n"
2560 "wz reg_class = %s\n"
2561 "wA reg_class = %s\n"
2562 "wH reg_class = %s\n"
2563 "wI reg_class = %s\n"
2564 "wJ reg_class = %s\n"
2565 "wK reg_class = %s\n"
2567 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2568 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_f
]],
2569 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2570 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2571 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wb
]],
2572 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wd
]],
2573 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2574 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wf
]],
2575 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wg
]],
2576 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wh
]],
2577 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wi
]],
2578 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wj
]],
2579 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wk
]],
2580 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wl
]],
2581 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wm
]],
2582 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wo
]],
2583 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wp
]],
2584 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wq
]],
2585 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2586 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_ws
]],
2587 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wt
]],
2588 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wu
]],
2589 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wv
]],
2590 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_ww
]],
2591 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2592 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wy
]],
2593 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wz
]],
2594 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]],
2595 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wH
]],
2596 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wI
]],
2597 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wJ
]],
2598 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wK
]]);
2601 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2602 rs6000_debug_print_mode (m
);
2604 fputs ("\n", stderr
);
2606 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2608 machine_mode mode1
= print_tieable_modes
[m1
];
2609 bool first_time
= true;
2611 nl
= (const char *)0;
2612 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2614 machine_mode mode2
= print_tieable_modes
[m2
];
2615 if (mode1
!= mode2
&& MODES_TIEABLE_P (mode1
, mode2
))
2619 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2624 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2629 fputs ("\n", stderr
);
2635 if (rs6000_recip_control
)
2637 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2639 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2640 if (rs6000_recip_bits
[m
])
2643 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2645 (RS6000_RECIP_AUTO_RE_P (m
)
2647 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2648 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2650 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2653 fputs ("\n", stderr
);
2656 if (rs6000_cpu_index
>= 0)
2658 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2660 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2662 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2663 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2666 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2668 if (rs6000_tune_index
>= 0)
2670 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2672 = processor_target_table
[rs6000_tune_index
].target_enable
;
2674 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2675 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2678 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2680 cl_target_option_save (&cl_opts
, &global_options
);
2681 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2684 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2685 rs6000_isa_flags_explicit
);
2687 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2688 rs6000_builtin_mask
);
2690 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2692 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2693 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2695 switch (rs6000_sched_costly_dep
)
2697 case max_dep_latency
:
2698 costly_str
= "max_dep_latency";
2702 costly_str
= "no_dep_costly";
2705 case all_deps_costly
:
2706 costly_str
= "all_deps_costly";
2709 case true_store_to_load_dep_costly
:
2710 costly_str
= "true_store_to_load_dep_costly";
2713 case store_to_load_dep_costly
:
2714 costly_str
= "store_to_load_dep_costly";
2718 costly_str
= costly_num
;
2719 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2723 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2725 switch (rs6000_sched_insert_nops
)
2727 case sched_finish_regroup_exact
:
2728 nop_str
= "sched_finish_regroup_exact";
2731 case sched_finish_pad_groups
:
2732 nop_str
= "sched_finish_pad_groups";
2735 case sched_finish_none
:
2736 nop_str
= "sched_finish_none";
2741 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2745 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2747 switch (rs6000_sdata
)
2754 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2758 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2762 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2767 switch (rs6000_traceback
)
2769 case traceback_default
: trace_str
= "default"; break;
2770 case traceback_none
: trace_str
= "none"; break;
2771 case traceback_part
: trace_str
= "part"; break;
2772 case traceback_full
: trace_str
= "full"; break;
2773 default: trace_str
= "unknown"; break;
2776 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2778 switch (rs6000_current_cmodel
)
2780 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2781 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2782 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2783 default: cmodel_str
= "unknown"; break;
2786 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2788 switch (rs6000_current_abi
)
2790 case ABI_NONE
: abi_str
= "none"; break;
2791 case ABI_AIX
: abi_str
= "aix"; break;
2792 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2793 case ABI_V4
: abi_str
= "V4"; break;
2794 case ABI_DARWIN
: abi_str
= "darwin"; break;
2795 default: abi_str
= "unknown"; break;
2798 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2800 if (rs6000_altivec_abi
)
2801 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2804 fprintf (stderr
, DEBUG_FMT_S
, "spe_abi", "true");
2806 if (rs6000_darwin64_abi
)
2807 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2809 if (rs6000_float_gprs
)
2810 fprintf (stderr
, DEBUG_FMT_S
, "float_gprs", "true");
2812 fprintf (stderr
, DEBUG_FMT_S
, "fprs",
2813 (TARGET_FPRS
? "true" : "false"));
2815 fprintf (stderr
, DEBUG_FMT_S
, "single_float",
2816 (TARGET_SINGLE_FLOAT
? "true" : "false"));
2818 fprintf (stderr
, DEBUG_FMT_S
, "double_float",
2819 (TARGET_DOUBLE_FLOAT
? "true" : "false"));
2821 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2822 (TARGET_SOFT_FLOAT
? "true" : "false"));
2824 fprintf (stderr
, DEBUG_FMT_S
, "e500_single",
2825 (TARGET_E500_SINGLE
? "true" : "false"));
2827 fprintf (stderr
, DEBUG_FMT_S
, "e500_double",
2828 (TARGET_E500_DOUBLE
? "true" : "false"));
2830 if (TARGET_LINK_STACK
)
2831 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2833 fprintf (stderr
, DEBUG_FMT_S
, "lra", TARGET_LRA
? "true" : "false");
2835 if (TARGET_P8_FUSION
)
2839 strcpy (options
, (TARGET_P9_FUSION
) ? "power9" : "power8");
2840 if (TARGET_TOC_FUSION
)
2841 strcat (options
, ", toc");
2843 if (TARGET_P8_FUSION_SIGN
)
2844 strcat (options
, ", sign");
2846 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2849 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2850 TARGET_SECURE_PLT
? "secure" : "bss");
2851 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2852 aix_struct_return
? "aix" : "sysv");
2853 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2854 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2855 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2856 tf
[!!rs6000_align_branch_targets
]);
2857 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2858 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2859 rs6000_long_double_type_size
);
2860 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2861 (int)rs6000_sched_restricted_insns_priority
);
2862 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2864 fprintf (stderr
, DEBUG_FMT_D
, "Number of rs6000 builtins",
2865 (int)RS6000_BUILTIN_COUNT
);
2867 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2868 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2871 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2872 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2874 if (TARGET_DIRECT_MOVE_128
)
2875 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2876 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2880 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2881 legitimate address support to figure out the appropriate addressing to
2885 rs6000_setup_reg_addr_masks (void)
2887 ssize_t rc
, reg
, m
, nregs
;
2888 addr_mask_type any_addr_mask
, addr_mask
;
2890 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2892 machine_mode m2
= (machine_mode
) m
;
2893 bool complex_p
= false;
2894 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2897 if (COMPLEX_MODE_P (m2
))
2900 m2
= GET_MODE_INNER (m2
);
2903 msize
= GET_MODE_SIZE (m2
);
2905 /* SDmode is special in that we want to access it only via REG+REG
2906 addressing on power7 and above, since we want to use the LFIWZX and
2907 STFIWZX instructions to load it. */
2908 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2911 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2914 reg
= reload_reg_map
[rc
].reg
;
2916 /* Can mode values go in the GPR/FPR/Altivec registers? */
2917 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2919 bool small_int_vsx_p
= (small_int_p
2920 && (rc
== RELOAD_REG_FPR
2921 || rc
== RELOAD_REG_VMX
));
2923 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2924 addr_mask
|= RELOAD_REG_VALID
;
2926 /* Indicate if the mode takes more than 1 physical register. If
2927 it takes a single register, indicate it can do REG+REG
2928 addressing. Small integers in VSX registers can only do
2929 REG+REG addressing. */
2930 if (small_int_vsx_p
)
2931 addr_mask
|= RELOAD_REG_INDEXED
;
2932 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2933 addr_mask
|= RELOAD_REG_MULTIPLE
;
2935 addr_mask
|= RELOAD_REG_INDEXED
;
2937 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2938 addressing. Restrict addressing on SPE for 64-bit types
2939 because of the SUBREG hackery used to address 64-bit floats in
2940 '32-bit' GPRs. If we allow scalars into Altivec registers,
2941 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2944 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2946 && !VECTOR_MODE_P (m2
)
2947 && !FLOAT128_VECTOR_P (m2
)
2950 && (m2
!= DFmode
|| !TARGET_UPPER_REGS_DF
)
2951 && (m2
!= SFmode
|| !TARGET_UPPER_REGS_SF
)
2952 && !(TARGET_E500_DOUBLE
&& msize
== 8))
2954 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2956 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2957 we don't allow PRE_MODIFY for some multi-register
2962 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2966 if (TARGET_POWERPC64
)
2967 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2973 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2979 /* GPR and FPR registers can do REG+OFFSET addressing, except
2980 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2981 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2982 if ((addr_mask
!= 0) && !indexed_only_p
2984 && (rc
== RELOAD_REG_GPR
2985 || ((msize
== 8 || m2
== SFmode
)
2986 && (rc
== RELOAD_REG_FPR
2987 || (rc
== RELOAD_REG_VMX
2988 && TARGET_P9_DFORM_SCALAR
)))))
2989 addr_mask
|= RELOAD_REG_OFFSET
;
2991 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2992 instructions are enabled. The offset for 128-bit VSX registers is
2993 only 12-bits. While GPRs can handle the full offset range, VSX
2994 registers can only handle the restricted range. */
2995 else if ((addr_mask
!= 0) && !indexed_only_p
2996 && msize
== 16 && TARGET_P9_DFORM_VECTOR
2997 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2998 || (m2
== TImode
&& TARGET_VSX_TIMODE
)))
3000 addr_mask
|= RELOAD_REG_OFFSET
;
3001 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
3002 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
3005 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3006 addressing on 128-bit types. */
3007 if (rc
== RELOAD_REG_VMX
&& msize
== 16
3008 && (addr_mask
& RELOAD_REG_VALID
) != 0)
3009 addr_mask
|= RELOAD_REG_AND_M16
;
3011 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
3012 any_addr_mask
|= addr_mask
;
3015 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
3020 /* Initialize the various global tables that are based on register size. */
3022 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
3028 /* Precalculate REGNO_REG_CLASS. */
3029 rs6000_regno_regclass
[0] = GENERAL_REGS
;
3030 for (r
= 1; r
< 32; ++r
)
3031 rs6000_regno_regclass
[r
] = BASE_REGS
;
3033 for (r
= 32; r
< 64; ++r
)
3034 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
3036 for (r
= 64; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3037 rs6000_regno_regclass
[r
] = NO_REGS
;
3039 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
3040 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
3042 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
3043 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
3044 rs6000_regno_regclass
[r
] = CR_REGS
;
3046 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
3047 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
3048 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
3049 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
3050 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
3051 rs6000_regno_regclass
[SPE_ACC_REGNO
] = SPE_ACC_REGS
;
3052 rs6000_regno_regclass
[SPEFSCR_REGNO
] = SPEFSCR_REGS
;
3053 rs6000_regno_regclass
[TFHAR_REGNO
] = SPR_REGS
;
3054 rs6000_regno_regclass
[TFIAR_REGNO
] = SPR_REGS
;
3055 rs6000_regno_regclass
[TEXASR_REGNO
] = SPR_REGS
;
3056 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
3057 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
3059 /* Precalculate register class to simpler reload register class. We don't
3060 need all of the register classes that are combinations of different
3061 classes, just the simple ones that have constraint letters. */
3062 for (c
= 0; c
< N_REG_CLASSES
; c
++)
3063 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
3065 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
3066 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
3067 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
3068 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
3069 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
3070 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
3071 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
3072 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
3073 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
3074 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
3075 reg_class_to_reg_type
[(int)SPE_ACC_REGS
] = SPE_ACC_TYPE
;
3076 reg_class_to_reg_type
[(int)SPEFSCR_REGS
] = SPEFSCR_REG_TYPE
;
3080 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
3081 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
3085 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
3086 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
3089 /* Precalculate the valid memory formats as well as the vector information,
3090 this must be set up before the rs6000_hard_regno_nregs_internal calls
3092 gcc_assert ((int)VECTOR_NONE
== 0);
3093 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
3094 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_unit
));
3096 gcc_assert ((int)CODE_FOR_nothing
== 0);
3097 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
3099 gcc_assert ((int)NO_REGS
== 0);
3100 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
3102 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3103 believes it can use native alignment or still uses 128-bit alignment. */
3104 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
3115 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3116 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3117 if (TARGET_FLOAT128_TYPE
)
3119 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
3120 rs6000_vector_align
[KFmode
] = 128;
3122 if (FLOAT128_IEEE_P (TFmode
))
3124 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
3125 rs6000_vector_align
[TFmode
] = 128;
3129 /* V2DF mode, VSX only. */
3132 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
3133 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
3134 rs6000_vector_align
[V2DFmode
] = align64
;
3137 /* V4SF mode, either VSX or Altivec. */
3140 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
3141 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
3142 rs6000_vector_align
[V4SFmode
] = align32
;
3144 else if (TARGET_ALTIVEC
)
3146 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
3147 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
3148 rs6000_vector_align
[V4SFmode
] = align32
;
3151 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3155 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
3156 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
3157 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
3158 rs6000_vector_align
[V4SImode
] = align32
;
3159 rs6000_vector_align
[V8HImode
] = align32
;
3160 rs6000_vector_align
[V16QImode
] = align32
;
3164 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
3165 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
3166 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
3170 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
3171 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
3172 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
3176 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3177 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3180 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
3181 rs6000_vector_unit
[V2DImode
]
3182 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3183 rs6000_vector_align
[V2DImode
] = align64
;
3185 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
3186 rs6000_vector_unit
[V1TImode
]
3187 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3188 rs6000_vector_align
[V1TImode
] = 128;
3191 /* DFmode, see if we want to use the VSX unit. Memory is handled
3192 differently, so don't set rs6000_vector_mem. */
3193 if (TARGET_VSX
&& TARGET_VSX_SCALAR_DOUBLE
)
3195 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
3196 rs6000_vector_align
[DFmode
] = 64;
3199 /* SFmode, see if we want to use the VSX unit. */
3200 if (TARGET_P8_VECTOR
&& TARGET_VSX_SCALAR_FLOAT
)
3202 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
3203 rs6000_vector_align
[SFmode
] = 32;
3206 /* Allow TImode in VSX register and set the VSX memory macros. */
3207 if (TARGET_VSX
&& TARGET_VSX_TIMODE
)
3209 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
3210 rs6000_vector_align
[TImode
] = align64
;
3213 /* TODO add SPE and paired floating point vector support. */
3215 /* Register class constraints for the constraints that depend on compile
3216 switches. When the VSX code was added, different constraints were added
3217 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3218 of the VSX registers are used. The register classes for scalar floating
3219 point types is set, based on whether we allow that type into the upper
3220 (Altivec) registers. GCC has register classes to target the Altivec
3221 registers for load/store operations, to select using a VSX memory
3222 operation instead of the traditional floating point operation. The
3225 d - Register class to use with traditional DFmode instructions.
3226 f - Register class to use with traditional SFmode instructions.
3227 v - Altivec register.
3228 wa - Any VSX register.
3229 wc - Reserved to represent individual CR bits (used in LLVM).
3230 wd - Preferred register class for V2DFmode.
3231 wf - Preferred register class for V4SFmode.
3232 wg - Float register for power6x move insns.
3233 wh - FP register for direct move instructions.
3234 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3235 wj - FP or VSX register to hold 64-bit integers for direct moves.
3236 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3237 wl - Float register if we can do 32-bit signed int loads.
3238 wm - VSX register for ISA 2.07 direct move operations.
3239 wn - always NO_REGS.
3240 wr - GPR if 64-bit mode is permitted.
3241 ws - Register class to do ISA 2.06 DF operations.
3242 wt - VSX register for TImode in VSX registers.
3243 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3244 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3245 ww - Register class to do SF conversions in with VSX operations.
3246 wx - Float register if we can do 32-bit int stores.
3247 wy - Register class to do ISA 2.07 SF operations.
3248 wz - Float register if we can do 32-bit unsigned int loads.
3249 wH - Altivec register if SImode is allowed in VSX registers.
3250 wI - VSX register if SImode is allowed in VSX registers.
3251 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3252 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3254 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
3255 rs6000_constraints
[RS6000_CONSTRAINT_f
] = FLOAT_REGS
; /* SFmode */
3257 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
3258 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
; /* DFmode */
3262 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
3263 rs6000_constraints
[RS6000_CONSTRAINT_wd
] = VSX_REGS
; /* V2DFmode */
3264 rs6000_constraints
[RS6000_CONSTRAINT_wf
] = VSX_REGS
; /* V4SFmode */
3266 if (TARGET_VSX_TIMODE
)
3267 rs6000_constraints
[RS6000_CONSTRAINT_wt
] = VSX_REGS
; /* TImode */
3269 if (TARGET_UPPER_REGS_DF
) /* DFmode */
3271 rs6000_constraints
[RS6000_CONSTRAINT_ws
] = VSX_REGS
;
3272 rs6000_constraints
[RS6000_CONSTRAINT_wv
] = ALTIVEC_REGS
;
3275 rs6000_constraints
[RS6000_CONSTRAINT_ws
] = FLOAT_REGS
;
3277 if (TARGET_UPPER_REGS_DI
) /* DImode */
3278 rs6000_constraints
[RS6000_CONSTRAINT_wi
] = VSX_REGS
;
3280 rs6000_constraints
[RS6000_CONSTRAINT_wi
] = FLOAT_REGS
;
3283 /* Add conditional constraints based on various options, to allow us to
3284 collapse multiple insn patterns. */
3286 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
3288 if (TARGET_MFPGPR
) /* DFmode */
3289 rs6000_constraints
[RS6000_CONSTRAINT_wg
] = FLOAT_REGS
;
3292 rs6000_constraints
[RS6000_CONSTRAINT_wl
] = FLOAT_REGS
; /* DImode */
3294 if (TARGET_DIRECT_MOVE
)
3296 rs6000_constraints
[RS6000_CONSTRAINT_wh
] = FLOAT_REGS
;
3297 rs6000_constraints
[RS6000_CONSTRAINT_wj
] /* DImode */
3298 = rs6000_constraints
[RS6000_CONSTRAINT_wi
];
3299 rs6000_constraints
[RS6000_CONSTRAINT_wk
] /* DFmode */
3300 = rs6000_constraints
[RS6000_CONSTRAINT_ws
];
3301 rs6000_constraints
[RS6000_CONSTRAINT_wm
] = VSX_REGS
;
3304 if (TARGET_POWERPC64
)
3306 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
3307 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
3310 if (TARGET_P8_VECTOR
&& TARGET_UPPER_REGS_SF
) /* SFmode */
3312 rs6000_constraints
[RS6000_CONSTRAINT_wu
] = ALTIVEC_REGS
;
3313 rs6000_constraints
[RS6000_CONSTRAINT_wy
] = VSX_REGS
;
3314 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = VSX_REGS
;
3316 else if (TARGET_P8_VECTOR
)
3318 rs6000_constraints
[RS6000_CONSTRAINT_wy
] = FLOAT_REGS
;
3319 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = FLOAT_REGS
;
3321 else if (TARGET_VSX
)
3322 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = FLOAT_REGS
;
3325 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
3328 rs6000_constraints
[RS6000_CONSTRAINT_wz
] = FLOAT_REGS
; /* DImode */
3330 if (TARGET_FLOAT128_TYPE
)
3332 rs6000_constraints
[RS6000_CONSTRAINT_wq
] = VSX_REGS
; /* KFmode */
3333 if (FLOAT128_IEEE_P (TFmode
))
3334 rs6000_constraints
[RS6000_CONSTRAINT_wp
] = VSX_REGS
; /* TFmode */
3337 /* Support for new D-form instructions. */
3338 if (TARGET_P9_DFORM_SCALAR
)
3339 rs6000_constraints
[RS6000_CONSTRAINT_wb
] = ALTIVEC_REGS
;
3341 /* Support for ISA 3.0 (power9) vectors. */
3342 if (TARGET_P9_VECTOR
)
3343 rs6000_constraints
[RS6000_CONSTRAINT_wo
] = VSX_REGS
;
3345 /* Support for new direct moves (ISA 3.0 + 64bit). */
3346 if (TARGET_DIRECT_MOVE_128
)
3347 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
3349 /* Support small integers in VSX registers. */
3350 if (TARGET_VSX_SMALL_INTEGER
)
3352 rs6000_constraints
[RS6000_CONSTRAINT_wH
] = ALTIVEC_REGS
;
3353 rs6000_constraints
[RS6000_CONSTRAINT_wI
] = FLOAT_REGS
;
3354 if (TARGET_P9_VECTOR
)
3356 rs6000_constraints
[RS6000_CONSTRAINT_wJ
] = FLOAT_REGS
;
3357 rs6000_constraints
[RS6000_CONSTRAINT_wK
] = ALTIVEC_REGS
;
3361 /* Set up the reload helper and direct move functions. */
3362 if (TARGET_VSX
|| TARGET_ALTIVEC
)
3366 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
3367 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
3368 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
3369 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
3370 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
3371 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
3372 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
3373 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3374 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3375 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3376 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3377 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3378 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3379 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3380 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3381 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3382 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3383 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3384 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3385 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3387 if (FLOAT128_VECTOR_P (KFmode
))
3389 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3390 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3393 if (FLOAT128_VECTOR_P (TFmode
))
3395 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3396 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3399 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3401 if (TARGET_NO_SDMODE_STACK
)
3403 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3404 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3407 if (TARGET_VSX_TIMODE
)
3409 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3410 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3413 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3415 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3416 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3417 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3418 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3419 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3420 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3421 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3422 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3423 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3425 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3426 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3427 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3428 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3429 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3430 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3431 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3432 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3433 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3435 if (FLOAT128_VECTOR_P (KFmode
))
3437 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3438 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3441 if (FLOAT128_VECTOR_P (TFmode
))
3443 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3444 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3450 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3451 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3452 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3453 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3454 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3455 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3456 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3457 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3458 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3459 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3460 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3461 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3462 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3463 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3464 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3465 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3466 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3467 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3468 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3469 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3471 if (FLOAT128_VECTOR_P (KFmode
))
3473 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3474 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3477 if (FLOAT128_IEEE_P (TFmode
))
3479 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3480 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3483 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3485 if (TARGET_NO_SDMODE_STACK
)
3487 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3488 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3491 if (TARGET_VSX_TIMODE
)
3493 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3494 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3497 if (TARGET_DIRECT_MOVE
)
3499 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3500 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3501 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3505 if (TARGET_UPPER_REGS_DF
)
3506 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3508 if (TARGET_UPPER_REGS_DI
)
3509 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3511 if (TARGET_UPPER_REGS_SF
)
3512 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3514 if (TARGET_VSX_SMALL_INTEGER
)
3516 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3517 if (TARGET_P9_VECTOR
)
3519 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3520 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3525 /* Setup the fusion operations. */
3526 if (TARGET_P8_FUSION
)
3528 reg_addr
[QImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_qi
;
3529 reg_addr
[HImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_hi
;
3530 reg_addr
[SImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_si
;
3532 reg_addr
[DImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_di
;
3535 if (TARGET_P9_FUSION
)
3538 enum machine_mode mode
; /* mode of the fused type. */
3539 enum machine_mode pmode
; /* pointer mode. */
3540 enum rs6000_reload_reg_type rtype
; /* register type. */
3541 enum insn_code load
; /* load insn. */
3542 enum insn_code store
; /* store insn. */
3545 static const struct fuse_insns addis_insns
[] = {
3546 { E_SFmode
, E_DImode
, RELOAD_REG_FPR
,
3547 CODE_FOR_fusion_vsx_di_sf_load
,
3548 CODE_FOR_fusion_vsx_di_sf_store
},
3550 { E_SFmode
, E_SImode
, RELOAD_REG_FPR
,
3551 CODE_FOR_fusion_vsx_si_sf_load
,
3552 CODE_FOR_fusion_vsx_si_sf_store
},
3554 { E_DFmode
, E_DImode
, RELOAD_REG_FPR
,
3555 CODE_FOR_fusion_vsx_di_df_load
,
3556 CODE_FOR_fusion_vsx_di_df_store
},
3558 { E_DFmode
, E_SImode
, RELOAD_REG_FPR
,
3559 CODE_FOR_fusion_vsx_si_df_load
,
3560 CODE_FOR_fusion_vsx_si_df_store
},
3562 { E_DImode
, E_DImode
, RELOAD_REG_FPR
,
3563 CODE_FOR_fusion_vsx_di_di_load
,
3564 CODE_FOR_fusion_vsx_di_di_store
},
3566 { E_DImode
, E_SImode
, RELOAD_REG_FPR
,
3567 CODE_FOR_fusion_vsx_si_di_load
,
3568 CODE_FOR_fusion_vsx_si_di_store
},
3570 { E_QImode
, E_DImode
, RELOAD_REG_GPR
,
3571 CODE_FOR_fusion_gpr_di_qi_load
,
3572 CODE_FOR_fusion_gpr_di_qi_store
},
3574 { E_QImode
, E_SImode
, RELOAD_REG_GPR
,
3575 CODE_FOR_fusion_gpr_si_qi_load
,
3576 CODE_FOR_fusion_gpr_si_qi_store
},
3578 { E_HImode
, E_DImode
, RELOAD_REG_GPR
,
3579 CODE_FOR_fusion_gpr_di_hi_load
,
3580 CODE_FOR_fusion_gpr_di_hi_store
},
3582 { E_HImode
, E_SImode
, RELOAD_REG_GPR
,
3583 CODE_FOR_fusion_gpr_si_hi_load
,
3584 CODE_FOR_fusion_gpr_si_hi_store
},
3586 { E_SImode
, E_DImode
, RELOAD_REG_GPR
,
3587 CODE_FOR_fusion_gpr_di_si_load
,
3588 CODE_FOR_fusion_gpr_di_si_store
},
3590 { E_SImode
, E_SImode
, RELOAD_REG_GPR
,
3591 CODE_FOR_fusion_gpr_si_si_load
,
3592 CODE_FOR_fusion_gpr_si_si_store
},
3594 { E_SFmode
, E_DImode
, RELOAD_REG_GPR
,
3595 CODE_FOR_fusion_gpr_di_sf_load
,
3596 CODE_FOR_fusion_gpr_di_sf_store
},
3598 { E_SFmode
, E_SImode
, RELOAD_REG_GPR
,
3599 CODE_FOR_fusion_gpr_si_sf_load
,
3600 CODE_FOR_fusion_gpr_si_sf_store
},
3602 { E_DImode
, E_DImode
, RELOAD_REG_GPR
,
3603 CODE_FOR_fusion_gpr_di_di_load
,
3604 CODE_FOR_fusion_gpr_di_di_store
},
3606 { E_DFmode
, E_DImode
, RELOAD_REG_GPR
,
3607 CODE_FOR_fusion_gpr_di_df_load
,
3608 CODE_FOR_fusion_gpr_di_df_store
},
3611 machine_mode cur_pmode
= Pmode
;
3614 for (i
= 0; i
< ARRAY_SIZE (addis_insns
); i
++)
3616 machine_mode xmode
= addis_insns
[i
].mode
;
3617 enum rs6000_reload_reg_type rtype
= addis_insns
[i
].rtype
;
3619 if (addis_insns
[i
].pmode
!= cur_pmode
)
3622 if (rtype
== RELOAD_REG_FPR
3623 && (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
))
3626 reg_addr
[xmode
].fusion_addis_ld
[rtype
] = addis_insns
[i
].load
;
3627 reg_addr
[xmode
].fusion_addis_st
[rtype
] = addis_insns
[i
].store
;
3629 if (rtype
== RELOAD_REG_FPR
&& TARGET_P9_DFORM_SCALAR
)
3631 reg_addr
[xmode
].fusion_addis_ld
[RELOAD_REG_VMX
]
3632 = addis_insns
[i
].load
;
3633 reg_addr
[xmode
].fusion_addis_st
[RELOAD_REG_VMX
]
3634 = addis_insns
[i
].store
;
3639 /* Note which types we support fusing TOC setup plus memory insn. We only do
3640 fused TOCs for medium/large code models. */
3641 if (TARGET_P8_FUSION
&& TARGET_TOC_FUSION
&& TARGET_POWERPC64
3642 && (TARGET_CMODEL
!= CMODEL_SMALL
))
3644 reg_addr
[QImode
].fused_toc
= true;
3645 reg_addr
[HImode
].fused_toc
= true;
3646 reg_addr
[SImode
].fused_toc
= true;
3647 reg_addr
[DImode
].fused_toc
= true;
3648 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
3650 if (TARGET_SINGLE_FLOAT
)
3651 reg_addr
[SFmode
].fused_toc
= true;
3652 if (TARGET_DOUBLE_FLOAT
)
3653 reg_addr
[DFmode
].fused_toc
= true;
3657 /* Precalculate HARD_REGNO_NREGS. */
3658 for (r
= 0; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3659 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3660 rs6000_hard_regno_nregs
[m
][r
]
3661 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
)m
);
3663 /* Precalculate HARD_REGNO_MODE_OK. */
3664 for (r
= 0; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3665 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3666 if (rs6000_hard_regno_mode_ok (r
, (machine_mode
)m
))
3667 rs6000_hard_regno_mode_ok_p
[m
][r
] = true;
3669 /* Precalculate CLASS_MAX_NREGS sizes. */
3670 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3674 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3675 reg_size
= UNITS_PER_VSX_WORD
;
3677 else if (c
== ALTIVEC_REGS
)
3678 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3680 else if (c
== FLOAT_REGS
)
3681 reg_size
= UNITS_PER_FP_WORD
;
3684 reg_size
= UNITS_PER_WORD
;
3686 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3688 machine_mode m2
= (machine_mode
)m
;
3689 int reg_size2
= reg_size
;
3691 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3693 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3694 reg_size2
= UNITS_PER_FP_WORD
;
3696 rs6000_class_max_nregs
[m
][c
]
3697 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3701 if (TARGET_E500_DOUBLE
)
3702 rs6000_class_max_nregs
[DFmode
][GENERAL_REGS
] = 1;
3704 /* Calculate which modes to automatically generate code to use a the
3705 reciprocal divide and square root instructions. In the future, possibly
3706 automatically generate the instructions even if the user did not specify
3707 -mrecip. The older machines double precision reciprocal sqrt estimate is
3708 not accurate enough. */
3709 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3711 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3713 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3714 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3715 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3716 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3717 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3719 if (TARGET_FRSQRTES
)
3720 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3722 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3723 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3724 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3725 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3726 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3728 if (rs6000_recip_control
)
3730 if (!flag_finite_math_only
)
3731 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3732 if (flag_trapping_math
)
3733 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3734 if (!flag_reciprocal_math
)
3735 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3736 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3738 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3739 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3740 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3742 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3743 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3744 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3746 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3747 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3748 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3750 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3751 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3752 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3754 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3755 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3756 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3758 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3759 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3760 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3762 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3763 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3764 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3766 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3767 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3768 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3772 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3773 legitimate address support to figure out the appropriate addressing to
3775 rs6000_setup_reg_addr_masks ();
3777 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3779 if (TARGET_DEBUG_REG
)
3780 rs6000_debug_reg_global ();
3782 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3784 "SImode variable mult cost = %d\n"
3785 "SImode constant mult cost = %d\n"
3786 "SImode short constant mult cost = %d\n"
3787 "DImode multipliciation cost = %d\n"
3788 "SImode division cost = %d\n"
3789 "DImode division cost = %d\n"
3790 "Simple fp operation cost = %d\n"
3791 "DFmode multiplication cost = %d\n"
3792 "SFmode division cost = %d\n"
3793 "DFmode division cost = %d\n"
3794 "cache line size = %d\n"
3795 "l1 cache size = %d\n"
3796 "l2 cache size = %d\n"
3797 "simultaneous prefetches = %d\n"
3800 rs6000_cost
->mulsi_const
,
3801 rs6000_cost
->mulsi_const9
,
3809 rs6000_cost
->cache_line_size
,
3810 rs6000_cost
->l1_cache_size
,
3811 rs6000_cost
->l2_cache_size
,
3812 rs6000_cost
->simultaneous_prefetches
);
3817 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3820 darwin_rs6000_override_options (void)
3822 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3824 rs6000_altivec_abi
= 1;
3825 TARGET_ALTIVEC_VRSAVE
= 1;
3826 rs6000_current_abi
= ABI_DARWIN
;
3828 if (DEFAULT_ABI
== ABI_DARWIN
3830 darwin_one_byte_bool
= 1;
3832 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3834 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3835 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3839 rs6000_default_long_calls
= 1;
3840 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3843 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3845 if (!flag_mkernel
&& !flag_apple_kext
3847 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3848 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3850 /* Unless the user (not the configurer) has explicitly overridden
3851 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3852 G4 unless targeting the kernel. */
3855 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3856 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3857 && ! global_options_set
.x_rs6000_cpu_index
)
3859 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3864 /* If not otherwise specified by a target, make 'long double' equivalent to
3867 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3868 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3871 /* Return the builtin mask of the various options used that could affect which
3872 builtins were used. In the past we used target_flags, but we've run out of
3873 bits, and some options like SPE and PAIRED are no longer in
3877 rs6000_builtin_mask_calculate (void)
3879 return (((TARGET_ALTIVEC
) ? RS6000_BTM_ALTIVEC
: 0)
3880 | ((TARGET_CMPB
) ? RS6000_BTM_CMPB
: 0)
3881 | ((TARGET_VSX
) ? RS6000_BTM_VSX
: 0)
3882 | ((TARGET_SPE
) ? RS6000_BTM_SPE
: 0)
3883 | ((TARGET_PAIRED_FLOAT
) ? RS6000_BTM_PAIRED
: 0)
3884 | ((TARGET_FRE
) ? RS6000_BTM_FRE
: 0)
3885 | ((TARGET_FRES
) ? RS6000_BTM_FRES
: 0)
3886 | ((TARGET_FRSQRTE
) ? RS6000_BTM_FRSQRTE
: 0)
3887 | ((TARGET_FRSQRTES
) ? RS6000_BTM_FRSQRTES
: 0)
3888 | ((TARGET_POPCNTD
) ? RS6000_BTM_POPCNTD
: 0)
3889 | ((rs6000_cpu
== PROCESSOR_CELL
) ? RS6000_BTM_CELL
: 0)
3890 | ((TARGET_P8_VECTOR
) ? RS6000_BTM_P8_VECTOR
: 0)
3891 | ((TARGET_P9_VECTOR
) ? RS6000_BTM_P9_VECTOR
: 0)
3892 | ((TARGET_P9_MISC
) ? RS6000_BTM_P9_MISC
: 0)
3893 | ((TARGET_MODULO
) ? RS6000_BTM_MODULO
: 0)
3894 | ((TARGET_64BIT
) ? RS6000_BTM_64BIT
: 0)
3895 | ((TARGET_CRYPTO
) ? RS6000_BTM_CRYPTO
: 0)
3896 | ((TARGET_HTM
) ? RS6000_BTM_HTM
: 0)
3897 | ((TARGET_DFP
) ? RS6000_BTM_DFP
: 0)
3898 | ((TARGET_HARD_FLOAT
) ? RS6000_BTM_HARD_FLOAT
: 0)
3899 | ((TARGET_LONG_DOUBLE_128
) ? RS6000_BTM_LDBL128
: 0)
3900 | ((TARGET_FLOAT128_TYPE
) ? RS6000_BTM_FLOAT128
: 0));
3903 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3904 to clobber the XER[CA] bit because clobbering that bit without telling
3905 the compiler worked just fine with versions of GCC before GCC 5, and
3906 breaking a lot of older code in ways that are hard to track down is
3907 not such a great idea. */
3910 rs6000_md_asm_adjust (vec
<rtx
> &/*outputs*/, vec
<rtx
> &/*inputs*/,
3911 vec
<const char *> &/*constraints*/,
3912 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
3914 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3915 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3919 /* Override command line options.
3921 Combine build-specific configuration information with options
3922 specified on the command line to set various state variables which
3923 influence code generation, optimization, and expansion of built-in
3924 functions. Assure that command-line configuration preferences are
3925 compatible with each other and with the build configuration; issue
3926 warnings while adjusting configuration or error messages while
3927 rejecting configuration.
3929 Upon entry to this function:
3931 This function is called once at the beginning of
3932 compilation, and then again at the start and end of compiling
3933 each section of code that has a different configuration, as
3934 indicated, for example, by adding the
3936 __attribute__((__target__("cpu=power9")))
3938 qualifier to a function definition or, for example, by bracketing
3941 #pragma GCC target("altivec")
3945 #pragma GCC reset_options
3947 directives. Parameter global_init_p is true for the initial
3948 invocation, which initializes global variables, and false for all
3949 subsequent invocations.
3952 Various global state information is assumed to be valid. This
3953 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3954 default CPU specified at build configure time, TARGET_DEFAULT,
3955 representing the default set of option flags for the default
3956 target, and global_options_set.x_rs6000_isa_flags, representing
3957 which options were requested on the command line.
3959 Upon return from this function:
3961 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3962 was set by name on the command line. Additionally, if certain
3963 attributes are automatically enabled or disabled by this function
3964 in order to assure compatibility between options and
3965 configuration, the flags associated with those attributes are
3966 also set. By setting these "explicit bits", we avoid the risk
3967 that other code might accidentally overwrite these particular
3968 attributes with "default values".
3970 The various bits of rs6000_isa_flags are set to indicate the
3971 target options that have been selected for the most current
3972 compilation efforts. This has the effect of also turning on the
3973 associated TARGET_XXX values since these are macros which are
3974 generally defined to test the corresponding bit of the
3975 rs6000_isa_flags variable.
3977 The variable rs6000_builtin_mask is set to represent the target
3978 options for the most current compilation efforts, consistent with
3979 the current contents of rs6000_isa_flags. This variable controls
3980 expansion of built-in functions.
3982 Various other global variables and fields of global structures
3983 (over 50 in all) are initialized to reflect the desired options
3984 for the most current compilation efforts. */
3987 rs6000_option_override_internal (bool global_init_p
)
3990 bool have_cpu
= false;
3992 /* The default cpu requested at configure time, if any. */
3993 const char *implicit_cpu
= OPTION_TARGET_CPU_DEFAULT
;
3995 HOST_WIDE_INT set_masks
;
3996 HOST_WIDE_INT ignore_masks
;
3999 struct cl_target_option
*main_target_opt
4000 = ((global_init_p
|| target_option_default_node
== NULL
)
4001 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
4003 /* Print defaults. */
4004 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
4005 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
4007 /* Remember the explicit arguments. */
4009 rs6000_isa_flags_explicit
= global_options_set
.x_rs6000_isa_flags
;
4011 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4012 library functions, so warn about it. The flag may be useful for
4013 performance studies from time to time though, so don't disable it
4015 if (global_options_set
.x_rs6000_alignment_flags
4016 && rs6000_alignment_flags
== MASK_ALIGN_POWER
4017 && DEFAULT_ABI
== ABI_DARWIN
4019 warning (0, "-malign-power is not supported for 64-bit Darwin;"
4020 " it is incompatible with the installed C and C++ libraries");
4022 /* Numerous experiment shows that IRA based loop pressure
4023 calculation works better for RTL loop invariant motion on targets
4024 with enough (>= 32) registers. It is an expensive optimization.
4025 So it is on only for peak performance. */
4026 if (optimize
>= 3 && global_init_p
4027 && !global_options_set
.x_flag_ira_loop_pressure
)
4028 flag_ira_loop_pressure
= 1;
4030 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4031 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4032 options were already specified. */
4033 if (flag_sanitize
& SANITIZE_USER_ADDRESS
4034 && !global_options_set
.x_flag_asynchronous_unwind_tables
)
4035 flag_asynchronous_unwind_tables
= 1;
4037 /* Set the pointer size. */
4040 rs6000_pmode
= DImode
;
4041 rs6000_pointer_size
= 64;
4045 rs6000_pmode
= SImode
;
4046 rs6000_pointer_size
= 32;
4049 /* Some OSs don't support saving the high part of 64-bit registers on context
4050 switch. Other OSs don't support saving Altivec registers. On those OSs,
4051 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4052 if the user wants either, the user must explicitly specify them and we
4053 won't interfere with the user's specification. */
4055 set_masks
= POWERPC_MASKS
;
4056 #ifdef OS_MISSING_POWERPC64
4057 if (OS_MISSING_POWERPC64
)
4058 set_masks
&= ~OPTION_MASK_POWERPC64
;
4060 #ifdef OS_MISSING_ALTIVEC
4061 if (OS_MISSING_ALTIVEC
)
4062 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
4063 | OTHER_VSX_VECTOR_MASKS
);
4066 /* Don't override by the processor default if given explicitly. */
4067 set_masks
&= ~rs6000_isa_flags_explicit
;
4069 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4070 the cpu in a target attribute or pragma, but did not specify a tuning
4071 option, use the cpu for the tuning option rather than the option specified
4072 with -mtune on the command line. Process a '--with-cpu' configuration
4073 request as an implicit --cpu. */
4074 if (rs6000_cpu_index
>= 0)
4076 cpu_index
= rs6000_cpu_index
;
4079 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
4081 rs6000_cpu_index
= cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
4084 else if (implicit_cpu
)
4086 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (implicit_cpu
);
4091 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4092 const char *default_cpu
= ((!TARGET_POWERPC64
)
4094 : ((BYTES_BIG_ENDIAN
)
4098 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
4102 gcc_assert (cpu_index
>= 0);
4106 #ifndef HAVE_AS_POWER9
4107 if (processor_target_table
[rs6000_cpu_index
].processor
4108 == PROCESSOR_POWER9
)
4111 warning (0, "will not generate power9 instructions because "
4112 "assembler lacks power9 support");
4115 #ifndef HAVE_AS_POWER8
4116 if (processor_target_table
[rs6000_cpu_index
].processor
4117 == PROCESSOR_POWER8
)
4120 warning (0, "will not generate power8 instructions because "
4121 "assembler lacks power8 support");
4124 #ifndef HAVE_AS_POPCNTD
4125 if (processor_target_table
[rs6000_cpu_index
].processor
4126 == PROCESSOR_POWER7
)
4129 warning (0, "will not generate power7 instructions because "
4130 "assembler lacks power7 support");
4134 if (processor_target_table
[rs6000_cpu_index
].processor
4135 == PROCESSOR_POWER6
)
4138 warning (0, "will not generate power6 instructions because "
4139 "assembler lacks power6 support");
4142 #ifndef HAVE_AS_POPCNTB
4143 if (processor_target_table
[rs6000_cpu_index
].processor
4144 == PROCESSOR_POWER5
)
4147 warning (0, "will not generate power5 instructions because "
4148 "assembler lacks power5 support");
4154 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4155 const char *default_cpu
= (!TARGET_POWERPC64
4161 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
4165 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4166 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4167 with those from the cpu, except for options that were explicitly set. If
4168 we don't have a cpu, do not override the target bits set in
4172 rs6000_isa_flags
&= ~set_masks
;
4173 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
4178 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4179 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4180 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4181 to using rs6000_isa_flags, we need to do the initialization here.
4183 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4184 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4185 HOST_WIDE_INT flags
= ((TARGET_DEFAULT
) ? TARGET_DEFAULT
4186 : processor_target_table
[cpu_index
].target_enable
);
4187 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
4190 if (rs6000_tune_index
>= 0)
4191 tune_index
= rs6000_tune_index
;
4193 rs6000_tune_index
= tune_index
= cpu_index
;
4197 enum processor_type tune_proc
4198 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
4201 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
4202 if (processor_target_table
[i
].processor
== tune_proc
)
4204 rs6000_tune_index
= tune_index
= i
;
4209 gcc_assert (tune_index
>= 0);
4210 rs6000_cpu
= processor_target_table
[tune_index
].processor
;
4212 /* Pick defaults for SPE related control flags. Do this early to make sure
4213 that the TARGET_ macros are representative ASAP. */
4215 int spe_capable_cpu
=
4216 (rs6000_cpu
== PROCESSOR_PPC8540
4217 || rs6000_cpu
== PROCESSOR_PPC8548
);
4219 if (!global_options_set
.x_rs6000_spe_abi
)
4220 rs6000_spe_abi
= spe_capable_cpu
;
4222 if (!global_options_set
.x_rs6000_spe
)
4223 rs6000_spe
= spe_capable_cpu
;
4225 if (!global_options_set
.x_rs6000_float_gprs
)
4227 (rs6000_cpu
== PROCESSOR_PPC8540
? 1
4228 : rs6000_cpu
== PROCESSOR_PPC8548
? 2
4232 if (global_options_set
.x_rs6000_spe_abi
4235 error ("not configured for SPE ABI");
4237 if (global_options_set
.x_rs6000_spe
4240 error ("not configured for SPE instruction set");
4242 if (main_target_opt
!= NULL
4243 && ((main_target_opt
->x_rs6000_spe_abi
!= rs6000_spe_abi
)
4244 || (main_target_opt
->x_rs6000_spe
!= rs6000_spe
)
4245 || (main_target_opt
->x_rs6000_float_gprs
!= rs6000_float_gprs
)))
4246 error ("target attribute or pragma changes SPE ABI");
4248 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
4249 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
4250 || rs6000_cpu
== PROCESSOR_PPCE5500
)
4253 error ("AltiVec not supported in this target");
4255 error ("SPE not supported in this target");
4257 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
4260 error ("SPE not supported in this target");
4263 /* Disable Cell microcode if we are optimizing for the Cell
4264 and not optimizing for size. */
4265 if (rs6000_gen_cell_microcode
== -1)
4266 rs6000_gen_cell_microcode
= !(rs6000_cpu
== PROCESSOR_CELL
4269 /* If we are optimizing big endian systems for space and it's OK to
4270 use instructions that would be microcoded on the Cell, use the
4271 load/store multiple and string instructions. */
4272 if (BYTES_BIG_ENDIAN
&& optimize_size
&& rs6000_gen_cell_microcode
)
4273 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& (OPTION_MASK_MULTIPLE
4274 | OPTION_MASK_STRING
);
4276 /* Don't allow -mmultiple or -mstring on little endian systems
4277 unless the cpu is a 750, because the hardware doesn't support the
4278 instructions used in little endian mode, and causes an alignment
4279 trap. The 750 does not cause an alignment trap (except when the
4280 target is unaligned). */
4282 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
)
4284 if (TARGET_MULTIPLE
)
4286 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
4287 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
4288 warning (0, "-mmultiple is not supported on little endian systems");
4293 rs6000_isa_flags
&= ~OPTION_MASK_STRING
;
4294 if ((rs6000_isa_flags_explicit
& OPTION_MASK_STRING
) != 0)
4295 warning (0, "-mstring is not supported on little endian systems");
4299 /* If little-endian, default to -mstrict-align on older processors.
4300 Testing for htm matches power8 and later. */
4301 if (!BYTES_BIG_ENDIAN
4302 && !(processor_target_table
[tune_index
].target_enable
& OPTION_MASK_HTM
))
4303 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
4305 /* -maltivec={le,be} implies -maltivec. */
4306 if (rs6000_altivec_element_order
!= 0)
4307 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
4309 /* Disallow -maltivec=le in big endian mode for now. This is not
4310 known to be useful for anyone. */
4311 if (BYTES_BIG_ENDIAN
&& rs6000_altivec_element_order
== 1)
4313 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4314 rs6000_altivec_element_order
= 0;
4317 /* Add some warnings for VSX. */
4320 const char *msg
= NULL
;
4321 if (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
4322 || !TARGET_SINGLE_FLOAT
|| !TARGET_DOUBLE_FLOAT
)
4324 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4325 msg
= N_("-mvsx requires hardware floating point");
4328 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
4329 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4332 else if (TARGET_PAIRED_FLOAT
)
4333 msg
= N_("-mvsx and -mpaired are incompatible");
4334 else if (TARGET_AVOID_XFORM
> 0)
4335 msg
= N_("-mvsx needs indexed addressing");
4336 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
4337 & OPTION_MASK_ALTIVEC
))
4339 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4340 msg
= N_("-mvsx and -mno-altivec are incompatible");
4342 msg
= N_("-mno-altivec disables vsx");
4348 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
4349 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4353 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4354 the -mcpu setting to enable options that conflict. */
4355 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
4356 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
4357 | OPTION_MASK_ALTIVEC
4358 | OPTION_MASK_VSX
)) != 0)
4359 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
4360 | OPTION_MASK_DIRECT_MOVE
)
4361 & ~rs6000_isa_flags_explicit
);
4363 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4364 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
4366 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4367 off all of the options that depend on those flags. */
4368 ignore_masks
= rs6000_disable_incompatible_switches ();
4370 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4371 unless the user explicitly used the -mno-<option> to disable the code. */
4372 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_DFORM_SCALAR
4373 || TARGET_P9_DFORM_VECTOR
|| TARGET_P9_DFORM_BOTH
> 0)
4374 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
4375 else if (TARGET_P9_MINMAX
)
4379 if (cpu_index
== PROCESSOR_POWER9
)
4381 /* legacy behavior: allow -mcpu-power9 with certain
4382 capabilities explicitly disabled. */
4383 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
4384 /* However, reject this automatic fix if certain
4385 capabilities required for TARGET_P9_MINMAX support
4386 have been explicitly disabled. */
4387 if (((OPTION_MASK_VSX
| OPTION_MASK_UPPER_REGS_SF
4388 | OPTION_MASK_UPPER_REGS_DF
) & rs6000_isa_flags
)
4389 != (OPTION_MASK_VSX
| OPTION_MASK_UPPER_REGS_SF
4390 | OPTION_MASK_UPPER_REGS_DF
))
4391 error ("-mpower9-minmax incompatible with explicitly disabled options");
4394 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4395 "<xxx> less than power9");
4397 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
4398 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
4399 & rs6000_isa_flags_explicit
))
4400 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4401 were explicitly cleared. */
4402 error ("-mpower9-minmax incompatible with explicitly disabled options");
4404 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
4406 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
4407 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
4408 else if (TARGET_VSX
)
4409 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
4410 else if (TARGET_POPCNTD
)
4411 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
4412 else if (TARGET_DFP
)
4413 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
4414 else if (TARGET_CMPB
)
4415 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
4416 else if (TARGET_FPRND
)
4417 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
4418 else if (TARGET_POPCNTB
)
4419 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
4420 else if (TARGET_ALTIVEC
)
4421 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
4423 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
4425 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
4426 error ("-mcrypto requires -maltivec");
4427 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
4430 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
4432 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4433 error ("-mdirect-move requires -mvsx");
4434 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
4437 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
4439 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4440 error ("-mpower8-vector requires -maltivec");
4441 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4444 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
4446 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4447 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
4448 error ("-mpower8-vector requires -mvsx");
4449 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
4451 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4452 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4453 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4457 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4459 rs6000_isa_flags
|= OPTION_MASK_VSX
;
4460 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4464 if (TARGET_VSX_TIMODE
&& !TARGET_VSX
)
4466 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
)
4467 error ("-mvsx-timode requires -mvsx");
4468 rs6000_isa_flags
&= ~OPTION_MASK_VSX_TIMODE
;
4471 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
4473 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
4474 error ("-mhard-dfp requires -mhard-float");
4475 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
4478 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4479 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4480 set the individual option. */
4481 if (TARGET_UPPER_REGS
> 0)
4484 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
))
4486 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_DF
;
4487 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DF
;
4490 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
))
4492 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_DI
;
4493 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DI
;
4495 if (TARGET_P8_VECTOR
4496 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
))
4498 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_SF
;
4499 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_SF
;
4502 else if (TARGET_UPPER_REGS
== 0)
4505 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
))
4507 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DF
;
4508 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DF
;
4511 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
))
4513 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DI
;
4514 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DI
;
4516 if (TARGET_P8_VECTOR
4517 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
))
4519 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_SF
;
4520 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_SF
;
4524 if (TARGET_UPPER_REGS_DF
&& !TARGET_VSX
)
4526 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
)
4527 error ("-mupper-regs-df requires -mvsx");
4528 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DF
;
4531 if (TARGET_UPPER_REGS_DI
&& !TARGET_VSX
)
4533 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
)
4534 error ("-mupper-regs-di requires -mvsx");
4535 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DI
;
4538 if (TARGET_UPPER_REGS_SF
&& !TARGET_P8_VECTOR
)
4540 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
)
4541 error ("-mupper-regs-sf requires -mpower8-vector");
4542 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_SF
;
4545 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4546 silently turn off quad memory mode. */
4547 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
4549 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4550 warning (0, N_("-mquad-memory requires 64-bit mode"));
4552 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
4553 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4555 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
4556 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
4559 /* Non-atomic quad memory load/store are disabled for little endian, since
4560 the words are reversed, but atomic operations can still be done by
4561 swapping the words. */
4562 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
4564 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4565 warning (0, N_("-mquad-memory is not available in little endian mode"));
4567 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4570 /* Assume if the user asked for normal quad memory instructions, they want
4571 the atomic versions as well, unless they explicity told us not to use quad
4572 word atomic instructions. */
4573 if (TARGET_QUAD_MEMORY
4574 && !TARGET_QUAD_MEMORY_ATOMIC
4575 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4576 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4578 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4579 generating power8 instructions. */
4580 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4581 rs6000_isa_flags
|= (processor_target_table
[tune_index
].target_enable
4582 & OPTION_MASK_P8_FUSION
);
4584 /* Setting additional fusion flags turns on base fusion. */
4585 if (!TARGET_P8_FUSION
&& (TARGET_P8_FUSION_SIGN
|| TARGET_TOC_FUSION
))
4587 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4589 if (TARGET_P8_FUSION_SIGN
)
4590 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4592 if (TARGET_TOC_FUSION
)
4593 error ("-mtoc-fusion requires -mpower8-fusion");
4595 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4598 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4601 /* Power9 fusion is a superset over power8 fusion. */
4602 if (TARGET_P9_FUSION
&& !TARGET_P8_FUSION
)
4604 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4606 /* We prefer to not mention undocumented options in
4607 error messages. However, if users have managed to select
4608 power9-fusion without selecting power8-fusion, they
4609 already know about undocumented flags. */
4610 error ("-mpower9-fusion requires -mpower8-fusion");
4611 rs6000_isa_flags
&= ~OPTION_MASK_P9_FUSION
;
4614 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4617 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4618 generating power9 instructions. */
4619 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_FUSION
))
4620 rs6000_isa_flags
|= (processor_target_table
[tune_index
].target_enable
4621 & OPTION_MASK_P9_FUSION
);
4623 /* Power8 does not fuse sign extended loads with the addis. If we are
4624 optimizing at high levels for speed, convert a sign extended load into a
4625 zero extending load, and an explicit sign extension. */
4626 if (TARGET_P8_FUSION
4627 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4628 && optimize_function_for_speed_p (cfun
)
4630 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4632 /* TOC fusion requires 64-bit and medium/large code model. */
4633 if (TARGET_TOC_FUSION
&& !TARGET_POWERPC64
)
4635 rs6000_isa_flags
&= ~OPTION_MASK_TOC_FUSION
;
4636 if ((rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
) != 0)
4637 warning (0, N_("-mtoc-fusion requires 64-bit"));
4640 if (TARGET_TOC_FUSION
&& (TARGET_CMODEL
== CMODEL_SMALL
))
4642 rs6000_isa_flags
&= ~OPTION_MASK_TOC_FUSION
;
4643 if ((rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
) != 0)
4644 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4647 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4649 if (TARGET_P8_FUSION
&& !TARGET_TOC_FUSION
&& TARGET_POWERPC64
4650 && (TARGET_CMODEL
!= CMODEL_SMALL
)
4651 && !(rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
))
4652 rs6000_isa_flags
|= OPTION_MASK_TOC_FUSION
;
4654 /* ISA 3.0 vector instructions include ISA 2.07. */
4655 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4657 /* We prefer to not mention undocumented options in
4658 error messages. However, if users have managed to select
4659 power9-vector without selecting power8-vector, they
4660 already know about undocumented flags. */
4661 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4662 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4663 error ("-mpower9-vector requires -mpower8-vector");
4664 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4666 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4667 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4668 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4672 /* OPTION_MASK_P9_VECTOR is explicit and
4673 OPTION_MASK_P8_VECTOR is not explicit. */
4674 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4675 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4679 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4680 -mpower9-dform-vector. */
4681 if (TARGET_P9_DFORM_BOTH
> 0)
4683 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
))
4684 rs6000_isa_flags
|= OPTION_MASK_P9_DFORM_VECTOR
;
4686 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
))
4687 rs6000_isa_flags
|= OPTION_MASK_P9_DFORM_SCALAR
;
4689 else if (TARGET_P9_DFORM_BOTH
== 0)
4691 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
))
4692 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_VECTOR
;
4694 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
))
4695 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4698 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4699 if ((TARGET_P9_DFORM_SCALAR
|| TARGET_P9_DFORM_VECTOR
) && !TARGET_P9_VECTOR
)
4701 /* We prefer to not mention undocumented options in
4702 error messages. However, if users have managed to select
4703 power9-dform without selecting power9-vector, they
4704 already know about undocumented flags. */
4705 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
)
4706 && (rs6000_isa_flags_explicit
& (OPTION_MASK_P9_DFORM_SCALAR
4707 | OPTION_MASK_P9_DFORM_VECTOR
)))
4708 error ("-mpower9-dform requires -mpower9-vector");
4709 else if (rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
)
4712 ~(OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4713 rs6000_isa_flags_explicit
|=
4714 (OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4718 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4719 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4721 rs6000_isa_flags
|= OPTION_MASK_P9_VECTOR
;
4722 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4726 if ((TARGET_P9_DFORM_SCALAR
|| TARGET_P9_DFORM_VECTOR
)
4727 && !TARGET_DIRECT_MOVE
)
4729 /* We prefer to not mention undocumented options in
4730 error messages. However, if users have managed to select
4731 power9-dform without selecting direct-move, they
4732 already know about undocumented flags. */
4733 if ((rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4734 && ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
) ||
4735 (rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
) ||
4736 (TARGET_P9_DFORM_BOTH
== 1)))
4737 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4738 " require -mdirect-move");
4739 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
) == 0)
4741 rs6000_isa_flags
|= OPTION_MASK_DIRECT_MOVE
;
4742 rs6000_isa_flags_explicit
|= OPTION_MASK_DIRECT_MOVE
;
4747 ~(OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4748 rs6000_isa_flags_explicit
|=
4749 (OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4753 if (TARGET_P9_DFORM_SCALAR
&& !TARGET_UPPER_REGS_DF
)
4755 /* We prefer to not mention undocumented options in
4756 error messages. However, if users have managed to select
4757 power9-dform without selecting upper-regs-df, they
4758 already know about undocumented flags. */
4759 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
)
4760 error ("-mpower9-dform requires -mupper-regs-df");
4761 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4764 if (TARGET_P9_DFORM_SCALAR
&& !TARGET_UPPER_REGS_SF
)
4766 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
)
4767 error ("-mpower9-dform requires -mupper-regs-sf");
4768 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4771 /* Enable LRA by default. */
4772 if ((rs6000_isa_flags_explicit
& OPTION_MASK_LRA
) == 0)
4773 rs6000_isa_flags
|= OPTION_MASK_LRA
;
4775 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4776 but do show up with -mno-lra. Given -mlra will become the default once
4777 PR 69847 is fixed, turn off the options with problems by default if
4778 -mno-lra was used, and warn if the user explicitly asked for the option.
4780 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4781 Enable -mvsx-timode by default if LRA and VSX. */
4784 if (TARGET_VSX_TIMODE
)
4786 if ((rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
) != 0)
4787 warning (0, "-mvsx-timode might need -mlra");
4790 rs6000_isa_flags
&= ~OPTION_MASK_VSX_TIMODE
;
4796 if (TARGET_VSX
&& !TARGET_VSX_TIMODE
4797 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
) == 0)
4798 rs6000_isa_flags
|= OPTION_MASK_VSX_TIMODE
;
4801 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4802 support. If we only have ISA 2.06 support, and the user did not specify
4803 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4804 but we don't enable the full vectorization support */
4805 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4806 TARGET_ALLOW_MOVMISALIGN
= 1;
4808 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4810 if (TARGET_ALLOW_MOVMISALIGN
> 0
4811 && global_options_set
.x_TARGET_ALLOW_MOVMISALIGN
)
4812 error ("-mallow-movmisalign requires -mvsx");
4814 TARGET_ALLOW_MOVMISALIGN
= 0;
4817 /* Determine when unaligned vector accesses are permitted, and when
4818 they are preferred over masked Altivec loads. Note that if
4819 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4820 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4822 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4826 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4827 error ("-mefficient-unaligned-vsx requires -mvsx");
4829 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4832 else if (!TARGET_ALLOW_MOVMISALIGN
)
4834 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4835 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4837 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4841 /* Check whether we should allow small integers into VSX registers. We
4842 require direct move to prevent the register allocator from having to move
4843 variables through memory to do moves. SImode can be used on ISA 2.07,
4844 while HImode and QImode require ISA 3.0. */
4845 if (TARGET_VSX_SMALL_INTEGER
4846 && (!TARGET_DIRECT_MOVE
|| !TARGET_P8_VECTOR
|| !TARGET_UPPER_REGS_DI
))
4848 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_SMALL_INTEGER
)
4849 error ("-mvsx-small-integer requires -mpower8-vector, "
4850 "-mupper-regs-di, and -mdirect-move");
4852 rs6000_isa_flags
&= ~OPTION_MASK_VSX_SMALL_INTEGER
;
4855 /* Set long double size before the IEEE 128-bit tests. */
4856 if (!global_options_set
.x_rs6000_long_double_type_size
)
4858 if (main_target_opt
!= NULL
4859 && (main_target_opt
->x_rs6000_long_double_type_size
4860 != RS6000_DEFAULT_LONG_DOUBLE_SIZE
))
4861 error ("target attribute or pragma changes long double size");
4863 rs6000_long_double_type_size
= RS6000_DEFAULT_LONG_DOUBLE_SIZE
;
4866 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4867 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4868 pick up this default. */
4869 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4870 if (!global_options_set
.x_rs6000_ieeequad
)
4871 rs6000_ieeequad
= 1;
4874 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4875 sytems, but don't enable the __float128 keyword. */
4876 if (TARGET_VSX
&& TARGET_LONG_DOUBLE_128
4877 && (TARGET_FLOAT128_ENABLE_TYPE
|| TARGET_IEEEQUAD
)
4878 && ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_TYPE
) == 0))
4879 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_TYPE
;
4881 /* IEEE 128-bit floating point requires VSX support. */
4884 if (TARGET_FLOAT128_KEYWORD
)
4886 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4887 error ("-mfloat128 requires VSX support");
4889 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4890 | OPTION_MASK_FLOAT128_KEYWORD
4891 | OPTION_MASK_FLOAT128_HW
);
4894 else if (TARGET_FLOAT128_TYPE
)
4896 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_TYPE
) != 0)
4897 error ("-mfloat128-type requires VSX support");
4899 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4900 | OPTION_MASK_FLOAT128_KEYWORD
4901 | OPTION_MASK_FLOAT128_HW
);
4905 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4906 128-bit floating point support to be enabled. */
4907 if (!TARGET_FLOAT128_TYPE
)
4909 if (TARGET_FLOAT128_KEYWORD
)
4911 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4913 error ("-mfloat128 requires -mfloat128-type");
4914 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4915 | OPTION_MASK_FLOAT128_KEYWORD
4916 | OPTION_MASK_FLOAT128_HW
);
4919 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_TYPE
;
4922 if (TARGET_FLOAT128_HW
)
4924 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4926 error ("-mfloat128-hardware requires -mfloat128-type");
4927 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4930 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4931 | OPTION_MASK_FLOAT128_KEYWORD
4932 | OPTION_MASK_FLOAT128_HW
);
4936 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4937 -mfloat128-hardware by default. However, don't enable the __float128
4938 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4939 -mfloat128 option as well if it was not already set. */
4940 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
4941 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4942 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4943 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4945 if (TARGET_FLOAT128_HW
4946 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4948 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4949 error ("-mfloat128-hardware requires full ISA 3.0 support");
4951 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4954 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4956 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4957 error ("-mfloat128-hardware requires -m64");
4959 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4962 if (TARGET_FLOAT128_HW
&& !TARGET_FLOAT128_KEYWORD
4963 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0
4964 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4965 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4967 /* Print the options after updating the defaults. */
4968 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4969 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4971 /* E500mc does "better" if we inline more aggressively. Respect the
4972 user's opinion, though. */
4973 if (rs6000_block_move_inline_limit
== 0
4974 && (rs6000_cpu
== PROCESSOR_PPCE500MC
4975 || rs6000_cpu
== PROCESSOR_PPCE500MC64
4976 || rs6000_cpu
== PROCESSOR_PPCE5500
4977 || rs6000_cpu
== PROCESSOR_PPCE6500
))
4978 rs6000_block_move_inline_limit
= 128;
4980 /* store_one_arg depends on expand_block_move to handle at least the
4981 size of reg_parm_stack_space. */
4982 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4983 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4987 /* If the appropriate debug option is enabled, replace the target hooks
4988 with debug versions that call the real version and then prints
4989 debugging information. */
4990 if (TARGET_DEBUG_COST
)
4992 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4993 targetm
.address_cost
= rs6000_debug_address_cost
;
4994 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4997 if (TARGET_DEBUG_ADDR
)
4999 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
5000 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
5001 rs6000_secondary_reload_class_ptr
5002 = rs6000_debug_secondary_reload_class
;
5003 rs6000_secondary_memory_needed_ptr
5004 = rs6000_debug_secondary_memory_needed
;
5005 rs6000_cannot_change_mode_class_ptr
5006 = rs6000_debug_cannot_change_mode_class
;
5007 rs6000_preferred_reload_class_ptr
5008 = rs6000_debug_preferred_reload_class
;
5009 rs6000_legitimize_reload_address_ptr
5010 = rs6000_debug_legitimize_reload_address
;
5011 rs6000_mode_dependent_address_ptr
5012 = rs6000_debug_mode_dependent_address
;
5015 if (rs6000_veclibabi_name
)
5017 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
5018 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
5021 error ("unknown vectorization library ABI type (%s) for "
5022 "-mveclibabi= switch", rs6000_veclibabi_name
);
5028 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
5029 target attribute or pragma which automatically enables both options,
5030 unless the altivec ABI was set. This is set by default for 64-bit, but
5032 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
5033 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
5034 | OPTION_MASK_FLOAT128_TYPE
5035 | OPTION_MASK_FLOAT128_KEYWORD
)
5036 & ~rs6000_isa_flags_explicit
);
5038 /* Enable Altivec ABI for AIX -maltivec. */
5039 if (TARGET_XCOFF
&& (TARGET_ALTIVEC
|| TARGET_VSX
))
5041 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
5042 error ("target attribute or pragma changes AltiVec ABI");
5044 rs6000_altivec_abi
= 1;
5047 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
5048 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
5049 be explicitly overridden in either case. */
5052 if (!global_options_set
.x_rs6000_altivec_abi
5053 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
5055 if (main_target_opt
!= NULL
&&
5056 !main_target_opt
->x_rs6000_altivec_abi
)
5057 error ("target attribute or pragma changes AltiVec ABI");
5059 rs6000_altivec_abi
= 1;
5063 /* Set the Darwin64 ABI as default for 64-bit Darwin.
5064 So far, the only darwin64 targets are also MACH-O. */
5066 && DEFAULT_ABI
== ABI_DARWIN
5069 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
5070 error ("target attribute or pragma changes darwin64 ABI");
5073 rs6000_darwin64_abi
= 1;
5074 /* Default to natural alignment, for better performance. */
5075 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
5079 /* Place FP constants in the constant pool instead of TOC
5080 if section anchors enabled. */
5081 if (flag_section_anchors
5082 && !global_options_set
.x_TARGET_NO_FP_IN_TOC
)
5083 TARGET_NO_FP_IN_TOC
= 1;
5085 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
5086 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
5088 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5089 SUBTARGET_OVERRIDE_OPTIONS
;
5091 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5092 SUBSUBTARGET_OVERRIDE_OPTIONS
;
5094 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
5095 SUB3TARGET_OVERRIDE_OPTIONS
;
5098 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
5099 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
5101 /* For the E500 family of cores, reset the single/double FP flags to let us
5102 check that they remain constant across attributes or pragmas. Also,
5103 clear a possible request for string instructions, not supported and which
5104 we might have silently queried above for -Os.
5106 For other families, clear ISEL in case it was set implicitly.
5111 case PROCESSOR_PPC8540
:
5112 case PROCESSOR_PPC8548
:
5113 case PROCESSOR_PPCE500MC
:
5114 case PROCESSOR_PPCE500MC64
:
5115 case PROCESSOR_PPCE5500
:
5116 case PROCESSOR_PPCE6500
:
5118 rs6000_single_float
= TARGET_E500_SINGLE
|| TARGET_E500_DOUBLE
;
5119 rs6000_double_float
= TARGET_E500_DOUBLE
;
5121 rs6000_isa_flags
&= ~OPTION_MASK_STRING
;
5127 if (have_cpu
&& !(rs6000_isa_flags_explicit
& OPTION_MASK_ISEL
))
5128 rs6000_isa_flags
&= ~OPTION_MASK_ISEL
;
5133 if (main_target_opt
)
5135 if (main_target_opt
->x_rs6000_single_float
!= rs6000_single_float
)
5136 error ("target attribute or pragma changes single precision floating "
5138 if (main_target_opt
->x_rs6000_double_float
!= rs6000_double_float
)
5139 error ("target attribute or pragma changes double precision floating "
5143 /* Detect invalid option combinations with E500. */
5146 rs6000_always_hint
= (rs6000_cpu
!= PROCESSOR_POWER4
5147 && rs6000_cpu
!= PROCESSOR_POWER5
5148 && rs6000_cpu
!= PROCESSOR_POWER6
5149 && rs6000_cpu
!= PROCESSOR_POWER7
5150 && rs6000_cpu
!= PROCESSOR_POWER8
5151 && rs6000_cpu
!= PROCESSOR_POWER9
5152 && rs6000_cpu
!= PROCESSOR_PPCA2
5153 && rs6000_cpu
!= PROCESSOR_CELL
5154 && rs6000_cpu
!= PROCESSOR_PPC476
);
5155 rs6000_sched_groups
= (rs6000_cpu
== PROCESSOR_POWER4
5156 || rs6000_cpu
== PROCESSOR_POWER5
5157 || rs6000_cpu
== PROCESSOR_POWER7
5158 || rs6000_cpu
== PROCESSOR_POWER8
);
5159 rs6000_align_branch_targets
= (rs6000_cpu
== PROCESSOR_POWER4
5160 || rs6000_cpu
== PROCESSOR_POWER5
5161 || rs6000_cpu
== PROCESSOR_POWER6
5162 || rs6000_cpu
== PROCESSOR_POWER7
5163 || rs6000_cpu
== PROCESSOR_POWER8
5164 || rs6000_cpu
== PROCESSOR_POWER9
5165 || rs6000_cpu
== PROCESSOR_PPCE500MC
5166 || rs6000_cpu
== PROCESSOR_PPCE500MC64
5167 || rs6000_cpu
== PROCESSOR_PPCE5500
5168 || rs6000_cpu
== PROCESSOR_PPCE6500
);
5170 /* Allow debug switches to override the above settings. These are set to -1
5171 in powerpcspe.opt to indicate the user hasn't directly set the switch. */
5172 if (TARGET_ALWAYS_HINT
>= 0)
5173 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
5175 if (TARGET_SCHED_GROUPS
>= 0)
5176 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
5178 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
5179 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
5181 rs6000_sched_restricted_insns_priority
5182 = (rs6000_sched_groups
? 1 : 0);
5184 /* Handle -msched-costly-dep option. */
5185 rs6000_sched_costly_dep
5186 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
5188 if (rs6000_sched_costly_dep_str
)
5190 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
5191 rs6000_sched_costly_dep
= no_dep_costly
;
5192 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
5193 rs6000_sched_costly_dep
= all_deps_costly
;
5194 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
5195 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
5196 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
5197 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
5199 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
5200 atoi (rs6000_sched_costly_dep_str
));
5203 /* Handle -minsert-sched-nops option. */
5204 rs6000_sched_insert_nops
5205 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
5207 if (rs6000_sched_insert_nops_str
)
5209 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
5210 rs6000_sched_insert_nops
= sched_finish_none
;
5211 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
5212 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
5213 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
5214 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
5216 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
5217 atoi (rs6000_sched_insert_nops_str
));
5220 /* Handle stack protector */
5221 if (!global_options_set
.x_rs6000_stack_protector_guard
)
5222 #ifdef TARGET_THREAD_SSP_OFFSET
5223 rs6000_stack_protector_guard
= SSP_TLS
;
5225 rs6000_stack_protector_guard
= SSP_GLOBAL
;
5228 #ifdef TARGET_THREAD_SSP_OFFSET
5229 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
5230 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
5233 if (global_options_set
.x_rs6000_stack_protector_guard_offset_str
)
5236 const char *str
= rs6000_stack_protector_guard_offset_str
;
5239 long offset
= strtol (str
, &endp
, 0);
5240 if (!*str
|| *endp
|| errno
)
5241 error ("%qs is not a valid number "
5242 "in -mstack-protector-guard-offset=", str
);
5244 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
5245 || (TARGET_64BIT
&& (offset
& 3)))
5246 error ("%qs is not a valid offset "
5247 "in -mstack-protector-guard-offset=", str
);
5249 rs6000_stack_protector_guard_offset
= offset
;
5252 if (global_options_set
.x_rs6000_stack_protector_guard_reg_str
)
5254 const char *str
= rs6000_stack_protector_guard_reg_str
;
5255 int reg
= decode_reg_name (str
);
5257 if (!IN_RANGE (reg
, 1, 31))
5258 error ("%qs is not a valid base register "
5259 "in -mstack-protector-guard-reg=", str
);
5261 rs6000_stack_protector_guard_reg
= reg
;
5264 if (rs6000_stack_protector_guard
== SSP_TLS
5265 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
5266 error ("-mstack-protector-guard=tls needs a valid base register");
5270 #ifdef TARGET_REGNAMES
5271 /* If the user desires alternate register names, copy in the
5272 alternate names now. */
5273 if (TARGET_REGNAMES
)
5274 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
5277 /* Set aix_struct_return last, after the ABI is determined.
5278 If -maix-struct-return or -msvr4-struct-return was explicitly
5279 used, don't override with the ABI default. */
5280 if (!global_options_set
.x_aix_struct_return
)
5281 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
5284 /* IBM XL compiler defaults to unsigned bitfields. */
5285 if (TARGET_XL_COMPAT
)
5286 flag_signed_bitfields
= 0;
5289 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
5290 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
5292 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
5294 /* We can only guarantee the availability of DI pseudo-ops when
5295 assembling for 64-bit targets. */
5298 targetm
.asm_out
.aligned_op
.di
= NULL
;
5299 targetm
.asm_out
.unaligned_op
.di
= NULL
;
5303 /* Set branch target alignment, if not optimizing for size. */
5306 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5307 aligned 8byte to avoid misprediction by the branch predictor. */
5308 if (rs6000_cpu
== PROCESSOR_TITAN
5309 || rs6000_cpu
== PROCESSOR_CELL
)
5311 if (align_functions
<= 0)
5312 align_functions
= 8;
5313 if (align_jumps
<= 0)
5315 if (align_loops
<= 0)
5318 if (rs6000_align_branch_targets
)
5320 if (align_functions
<= 0)
5321 align_functions
= 16;
5322 if (align_jumps
<= 0)
5324 if (align_loops
<= 0)
5326 can_override_loop_align
= 1;
5330 if (align_jumps_max_skip
<= 0)
5331 align_jumps_max_skip
= 15;
5332 if (align_loops_max_skip
<= 0)
5333 align_loops_max_skip
= 15;
5336 /* Arrange to save and restore machine status around nested functions. */
5337 init_machine_status
= rs6000_init_machine_status
;
5339 /* We should always be splitting complex arguments, but we can't break
5340 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5341 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
5342 targetm
.calls
.split_complex_arg
= NULL
;
5344 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5345 if (DEFAULT_ABI
== ABI_AIX
)
5346 targetm
.calls
.custom_function_descriptors
= 0;
5349 /* Initialize rs6000_cost with the appropriate target costs. */
5351 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
5355 case PROCESSOR_RS64A
:
5356 rs6000_cost
= &rs64a_cost
;
5359 case PROCESSOR_MPCCORE
:
5360 rs6000_cost
= &mpccore_cost
;
5363 case PROCESSOR_PPC403
:
5364 rs6000_cost
= &ppc403_cost
;
5367 case PROCESSOR_PPC405
:
5368 rs6000_cost
= &ppc405_cost
;
5371 case PROCESSOR_PPC440
:
5372 rs6000_cost
= &ppc440_cost
;
5375 case PROCESSOR_PPC476
:
5376 rs6000_cost
= &ppc476_cost
;
5379 case PROCESSOR_PPC601
:
5380 rs6000_cost
= &ppc601_cost
;
5383 case PROCESSOR_PPC603
:
5384 rs6000_cost
= &ppc603_cost
;
5387 case PROCESSOR_PPC604
:
5388 rs6000_cost
= &ppc604_cost
;
5391 case PROCESSOR_PPC604e
:
5392 rs6000_cost
= &ppc604e_cost
;
5395 case PROCESSOR_PPC620
:
5396 rs6000_cost
= &ppc620_cost
;
5399 case PROCESSOR_PPC630
:
5400 rs6000_cost
= &ppc630_cost
;
5403 case PROCESSOR_CELL
:
5404 rs6000_cost
= &ppccell_cost
;
5407 case PROCESSOR_PPC750
:
5408 case PROCESSOR_PPC7400
:
5409 rs6000_cost
= &ppc750_cost
;
5412 case PROCESSOR_PPC7450
:
5413 rs6000_cost
= &ppc7450_cost
;
5416 case PROCESSOR_PPC8540
:
5417 case PROCESSOR_PPC8548
:
5418 rs6000_cost
= &ppc8540_cost
;
5421 case PROCESSOR_PPCE300C2
:
5422 case PROCESSOR_PPCE300C3
:
5423 rs6000_cost
= &ppce300c2c3_cost
;
5426 case PROCESSOR_PPCE500MC
:
5427 rs6000_cost
= &ppce500mc_cost
;
5430 case PROCESSOR_PPCE500MC64
:
5431 rs6000_cost
= &ppce500mc64_cost
;
5434 case PROCESSOR_PPCE5500
:
5435 rs6000_cost
= &ppce5500_cost
;
5438 case PROCESSOR_PPCE6500
:
5439 rs6000_cost
= &ppce6500_cost
;
5442 case PROCESSOR_TITAN
:
5443 rs6000_cost
= &titan_cost
;
5446 case PROCESSOR_POWER4
:
5447 case PROCESSOR_POWER5
:
5448 rs6000_cost
= &power4_cost
;
5451 case PROCESSOR_POWER6
:
5452 rs6000_cost
= &power6_cost
;
5455 case PROCESSOR_POWER7
:
5456 rs6000_cost
= &power7_cost
;
5459 case PROCESSOR_POWER8
:
5460 rs6000_cost
= &power8_cost
;
5463 case PROCESSOR_POWER9
:
5464 rs6000_cost
= &power9_cost
;
5467 case PROCESSOR_PPCA2
:
5468 rs6000_cost
= &ppca2_cost
;
5477 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
5478 rs6000_cost
->simultaneous_prefetches
,
5479 global_options
.x_param_values
,
5480 global_options_set
.x_param_values
);
5481 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, rs6000_cost
->l1_cache_size
,
5482 global_options
.x_param_values
,
5483 global_options_set
.x_param_values
);
5484 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
5485 rs6000_cost
->cache_line_size
,
5486 global_options
.x_param_values
,
5487 global_options_set
.x_param_values
);
5488 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, rs6000_cost
->l2_cache_size
,
5489 global_options
.x_param_values
,
5490 global_options_set
.x_param_values
);
5492 /* Increase loop peeling limits based on performance analysis. */
5493 maybe_set_param_value (PARAM_MAX_PEELED_INSNS
, 400,
5494 global_options
.x_param_values
,
5495 global_options_set
.x_param_values
);
5496 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS
, 400,
5497 global_options
.x_param_values
,
5498 global_options_set
.x_param_values
);
5500 /* Use the 'model' -fsched-pressure algorithm by default. */
5501 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
,
5502 SCHED_PRESSURE_MODEL
,
5503 global_options
.x_param_values
,
5504 global_options_set
.x_param_values
);
5506 /* If using typedef char *va_list, signal that
5507 __builtin_va_start (&ap, 0) can be optimized to
5508 ap = __builtin_next_arg (0). */
5509 if (DEFAULT_ABI
!= ABI_V4
)
5510 targetm
.expand_builtin_va_start
= NULL
;
5513 /* Set up single/double float flags.
5514 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5515 then set both flags. */
5516 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
5517 && rs6000_single_float
== 0 && rs6000_double_float
== 0)
5518 rs6000_single_float
= rs6000_double_float
= 1;
5520 /* If not explicitly specified via option, decide whether to generate indexed
5521 load/store instructions. A value of -1 indicates that the
5522 initial value of this variable has not been overwritten. During
5523 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5524 if (TARGET_AVOID_XFORM
== -1)
5525 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5526 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5527 need indexed accesses and the type used is the scalar type of the element
5528 being loaded or stored. */
5529 TARGET_AVOID_XFORM
= (rs6000_cpu
== PROCESSOR_POWER6
&& TARGET_CMPB
5530 && !TARGET_ALTIVEC
);
5532 /* Set the -mrecip options. */
5533 if (rs6000_recip_name
)
5535 char *p
= ASTRDUP (rs6000_recip_name
);
5537 unsigned int mask
, i
;
5540 while ((q
= strtok (p
, ",")) != NULL
)
5551 if (!strcmp (q
, "default"))
5552 mask
= ((TARGET_RECIP_PRECISION
)
5553 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
5556 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
5557 if (!strcmp (q
, recip_options
[i
].string
))
5559 mask
= recip_options
[i
].mask
;
5563 if (i
== ARRAY_SIZE (recip_options
))
5565 error ("unknown option for -mrecip=%s", q
);
5573 rs6000_recip_control
&= ~mask
;
5575 rs6000_recip_control
|= mask
;
5579 /* Set the builtin mask of the various options used that could affect which
5580 builtins were used. In the past we used target_flags, but we've run out
5581 of bits, and some options like SPE and PAIRED are no longer in
5583 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
5584 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
5585 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
5586 rs6000_builtin_mask
);
5588 /* Initialize all of the registers. */
5589 rs6000_init_hard_regno_mode_ok (global_init_p
);
5591 /* Save the initial options in case the user does function specific options */
5593 target_option_default_node
= target_option_current_node
5594 = build_target_option_node (&global_options
);
5596 /* If not explicitly specified via option, decide whether to generate the
5597 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5598 if (TARGET_LINK_STACK
== -1)
5599 SET_TARGET_LINK_STACK (rs6000_cpu
== PROCESSOR_PPC476
&& flag_pic
);
5604 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5605 define the target cpu type. */
5608 rs6000_option_override (void)
5610 (void) rs6000_option_override_internal (true);
5614 /* Implement targetm.vectorize.builtin_mask_for_load. */
5616 rs6000_builtin_mask_for_load (void)
5618 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5619 if ((TARGET_ALTIVEC
&& !TARGET_VSX
)
5620 || (TARGET_VSX
&& !TARGET_EFFICIENT_UNALIGNED_VSX
))
5621 return altivec_builtin_mask_for_load
;
5626 /* Implement LOOP_ALIGN. */
5628 rs6000_loop_align (rtx label
)
5633 /* Don't override loop alignment if -falign-loops was specified. */
5634 if (!can_override_loop_align
)
5635 return align_loops_log
;
5637 bb
= BLOCK_FOR_INSN (label
);
5638 ninsns
= num_loop_insns(bb
->loop_father
);
5640 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5641 if (ninsns
> 4 && ninsns
<= 8
5642 && (rs6000_cpu
== PROCESSOR_POWER4
5643 || rs6000_cpu
== PROCESSOR_POWER5
5644 || rs6000_cpu
== PROCESSOR_POWER6
5645 || rs6000_cpu
== PROCESSOR_POWER7
5646 || rs6000_cpu
== PROCESSOR_POWER8
5647 || rs6000_cpu
== PROCESSOR_POWER9
))
5650 return align_loops_log
;
5653 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5655 rs6000_loop_align_max_skip (rtx_insn
*label
)
5657 return (1 << rs6000_loop_align (label
)) - 1;
5660 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5661 after applying N number of iterations. This routine does not determine
5662 how may iterations are required to reach desired alignment. */
5665 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5672 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
5675 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
5685 /* Assuming that all other types are naturally aligned. CHECKME! */
5690 /* Return true if the vector misalignment factor is supported by the
5693 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
5700 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5703 /* Return if movmisalign pattern is not supported for this mode. */
5704 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
5707 if (misalignment
== -1)
5709 /* Misalignment factor is unknown at compile time but we know
5710 it's word aligned. */
5711 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5713 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5715 if (element_size
== 64 || element_size
== 32)
5722 /* VSX supports word-aligned vector. */
5723 if (misalignment
% 4 == 0)
5729 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5731 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5732 tree vectype
, int misalign
)
5737 switch (type_of_cost
)
5747 case cond_branch_not_taken
:
5756 case vec_promote_demote
:
5762 case cond_branch_taken
:
5765 case unaligned_load
:
5766 if (TARGET_P9_VECTOR
)
5769 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5772 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5774 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5776 /* Double word aligned. */
5784 /* Double word aligned. */
5788 /* Unknown misalignment. */
5801 /* Misaligned loads are not supported. */
5806 case unaligned_store
:
5807 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5810 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5812 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5814 /* Double word aligned. */
5822 /* Double word aligned. */
5826 /* Unknown misalignment. */
5839 /* Misaligned stores are not supported. */
5845 /* This is a rough approximation assuming non-constant elements
5846 constructed into a vector via element insertion. FIXME:
5847 vec_construct is not granular enough for uniformly good
5848 decisions. If the initialization is a splat, this is
5849 cheaper than we estimate. Improve this someday. */
5850 elem_type
= TREE_TYPE (vectype
);
5851 /* 32-bit vectors loaded into registers are stored as double
5852 precision, so we need 2 permutes, 2 converts, and 1 merge
5853 to construct a vector of short floats from them. */
5854 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5855 && TYPE_PRECISION (elem_type
) == 32)
5857 /* On POWER9, integer vector types are built up in GPRs and then
5858 use a direct move (2 cycles). For POWER8 this is even worse,
5859 as we need two direct moves and a merge, and the direct moves
5861 else if (INTEGRAL_TYPE_P (elem_type
))
5863 if (TARGET_P9_VECTOR
)
5864 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5866 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 11;
5869 /* V2DFmode doesn't need a direct move. */
5877 /* Implement targetm.vectorize.preferred_simd_mode. */
5880 rs6000_preferred_simd_mode (machine_mode mode
)
5889 if (TARGET_ALTIVEC
|| TARGET_VSX
)
5915 if (TARGET_PAIRED_FLOAT
5921 typedef struct _rs6000_cost_data
5923 struct loop
*loop_info
;
5927 /* Test for likely overcommitment of vector hardware resources. If a
5928 loop iteration is relatively large, and too large a percentage of
5929 instructions in the loop are vectorized, the cost model may not
5930 adequately reflect delays from unavailable vector resources.
5931 Penalize the loop body cost for this case. */
5934 rs6000_density_test (rs6000_cost_data
*data
)
5936 const int DENSITY_PCT_THRESHOLD
= 85;
5937 const int DENSITY_SIZE_THRESHOLD
= 70;
5938 const int DENSITY_PENALTY
= 10;
5939 struct loop
*loop
= data
->loop_info
;
5940 basic_block
*bbs
= get_loop_body (loop
);
5941 int nbbs
= loop
->num_nodes
;
5942 int vec_cost
= data
->cost
[vect_body
], not_vec_cost
= 0;
5945 for (i
= 0; i
< nbbs
; i
++)
5947 basic_block bb
= bbs
[i
];
5948 gimple_stmt_iterator gsi
;
5950 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5952 gimple
*stmt
= gsi_stmt (gsi
);
5953 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5955 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5956 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5962 density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5964 if (density_pct
> DENSITY_PCT_THRESHOLD
5965 && vec_cost
+ not_vec_cost
> DENSITY_SIZE_THRESHOLD
)
5967 data
->cost
[vect_body
] = vec_cost
* (100 + DENSITY_PENALTY
) / 100;
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_NOTE
, vect_location
,
5970 "density %d%%, cost %d exceeds threshold, penalizing "
5971 "loop body cost by %d%%", density_pct
,
5972 vec_cost
+ not_vec_cost
, DENSITY_PENALTY
);
5976 /* Implement targetm.vectorize.init_cost. */
5978 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5979 instruction is needed by the vectorization. */
5980 static bool rs6000_vect_nonmem
;
5983 rs6000_init_cost (struct loop
*loop_info
)
5985 rs6000_cost_data
*data
= XNEW (struct _rs6000_cost_data
);
5986 data
->loop_info
= loop_info
;
5987 data
->cost
[vect_prologue
] = 0;
5988 data
->cost
[vect_body
] = 0;
5989 data
->cost
[vect_epilogue
] = 0;
5990 rs6000_vect_nonmem
= false;
5994 /* Implement targetm.vectorize.add_stmt_cost. */
5997 rs6000_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
5998 struct _stmt_vec_info
*stmt_info
, int misalign
,
5999 enum vect_cost_model_location where
)
6001 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
6002 unsigned retval
= 0;
6004 if (flag_vect_cost_model
)
6006 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6007 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
6009 /* Statements in an inner loop relative to the loop being
6010 vectorized are weighted more heavily. The value here is
6011 arbitrary and could potentially be improved with analysis. */
6012 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6013 count
*= 50; /* FIXME. */
6015 retval
= (unsigned) (count
* stmt_cost
);
6016 cost_data
->cost
[where
] += retval
;
6018 /* Check whether we're doing something other than just a copy loop.
6019 Not all such loops may be profitably vectorized; see
6020 rs6000_finish_cost. */
6021 if ((kind
== vec_to_scalar
|| kind
== vec_perm
6022 || kind
== vec_promote_demote
|| kind
== vec_construct
6023 || kind
== scalar_to_vec
)
6024 || (where
== vect_body
&& kind
== vector_stmt
))
6025 rs6000_vect_nonmem
= true;
6031 /* Implement targetm.vectorize.finish_cost. */
6034 rs6000_finish_cost (void *data
, unsigned *prologue_cost
,
6035 unsigned *body_cost
, unsigned *epilogue_cost
)
6037 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
6039 if (cost_data
->loop_info
)
6040 rs6000_density_test (cost_data
);
6042 /* Don't vectorize minimum-vectorization-factor, simple copy loops
6043 that require versioning for any reason. The vectorization is at
6044 best a wash inside the loop, and the versioning checks make
6045 profitability highly unlikely and potentially quite harmful. */
6046 if (cost_data
->loop_info
)
6048 loop_vec_info vec_info
= loop_vec_info_for_loop (cost_data
->loop_info
);
6049 if (!rs6000_vect_nonmem
6050 && LOOP_VINFO_VECT_FACTOR (vec_info
) == 2
6051 && LOOP_REQUIRES_VERSIONING (vec_info
))
6052 cost_data
->cost
[vect_body
] += 10000;
6055 *prologue_cost
= cost_data
->cost
[vect_prologue
];
6056 *body_cost
= cost_data
->cost
[vect_body
];
6057 *epilogue_cost
= cost_data
->cost
[vect_epilogue
];
6060 /* Implement targetm.vectorize.destroy_cost_data. */
6063 rs6000_destroy_cost_data (void *data
)
6068 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
6069 library with vectorized intrinsics. */
6072 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
6076 const char *suffix
= NULL
;
6077 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
6080 machine_mode el_mode
, in_mode
;
6083 /* Libmass is suitable for unsafe math only as it does not correctly support
6084 parts of IEEE with the required precision such as denormals. Only support
6085 it if we have VSX to use the simd d2 or f4 functions.
6086 XXX: Add variable length support. */
6087 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
6090 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6091 n
= TYPE_VECTOR_SUBPARTS (type_out
);
6092 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6093 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6094 if (el_mode
!= in_mode
6130 if (el_mode
== DFmode
&& n
== 2)
6132 bdecl
= mathfn_built_in (double_type_node
, fn
);
6133 suffix
= "d2"; /* pow -> powd2 */
6135 else if (el_mode
== SFmode
&& n
== 4)
6137 bdecl
= mathfn_built_in (float_type_node
, fn
);
6138 suffix
= "4"; /* powf -> powf4 */
6150 gcc_assert (suffix
!= NULL
);
6151 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
6155 strcpy (name
, bname
+ sizeof ("__builtin_") - 1);
6156 strcat (name
, suffix
);
6159 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
6160 else if (n_args
== 2)
6161 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
6165 /* Build a function declaration for the vectorized function. */
6166 new_fndecl
= build_decl (BUILTINS_LOCATION
,
6167 FUNCTION_DECL
, get_identifier (name
), fntype
);
6168 TREE_PUBLIC (new_fndecl
) = 1;
6169 DECL_EXTERNAL (new_fndecl
) = 1;
6170 DECL_IS_NOVOPS (new_fndecl
) = 1;
6171 TREE_READONLY (new_fndecl
) = 1;
6176 /* Returns a function decl for a vectorized version of the builtin function
6177 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6178 if it is not available. */
6181 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
6184 machine_mode in_mode
, out_mode
;
6187 if (TARGET_DEBUG_BUILTIN
)
6188 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6189 combined_fn_name (combined_fn (fn
)),
6190 GET_MODE_NAME (TYPE_MODE (type_out
)),
6191 GET_MODE_NAME (TYPE_MODE (type_in
)));
6193 if (TREE_CODE (type_out
) != VECTOR_TYPE
6194 || TREE_CODE (type_in
) != VECTOR_TYPE
6195 || !TARGET_VECTORIZE_BUILTINS
)
6198 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6199 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
6200 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6201 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6206 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6207 && out_mode
== DFmode
&& out_n
== 2
6208 && in_mode
== DFmode
&& in_n
== 2)
6209 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNDP
];
6210 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6211 && out_mode
== SFmode
&& out_n
== 4
6212 && in_mode
== SFmode
&& in_n
== 4)
6213 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNSP
];
6214 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6215 && out_mode
== SFmode
&& out_n
== 4
6216 && in_mode
== SFmode
&& in_n
== 4)
6217 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_COPYSIGN_V4SF
];
6220 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6221 && out_mode
== DFmode
&& out_n
== 2
6222 && in_mode
== DFmode
&& in_n
== 2)
6223 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIP
];
6224 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6225 && out_mode
== SFmode
&& out_n
== 4
6226 && in_mode
== SFmode
&& in_n
== 4)
6227 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIP
];
6228 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6229 && out_mode
== SFmode
&& out_n
== 4
6230 && in_mode
== SFmode
&& in_n
== 4)
6231 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIP
];
6234 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6235 && out_mode
== DFmode
&& out_n
== 2
6236 && in_mode
== DFmode
&& in_n
== 2)
6237 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIM
];
6238 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6239 && out_mode
== SFmode
&& out_n
== 4
6240 && in_mode
== SFmode
&& in_n
== 4)
6241 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIM
];
6242 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6243 && out_mode
== SFmode
&& out_n
== 4
6244 && in_mode
== SFmode
&& in_n
== 4)
6245 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIM
];
6248 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6249 && out_mode
== DFmode
&& out_n
== 2
6250 && in_mode
== DFmode
&& in_n
== 2)
6251 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDDP
];
6252 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6253 && out_mode
== SFmode
&& out_n
== 4
6254 && in_mode
== SFmode
&& in_n
== 4)
6255 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDSP
];
6256 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6257 && out_mode
== SFmode
&& out_n
== 4
6258 && in_mode
== SFmode
&& in_n
== 4)
6259 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VMADDFP
];
6262 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6263 && out_mode
== DFmode
&& out_n
== 2
6264 && in_mode
== DFmode
&& in_n
== 2)
6265 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIZ
];
6266 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6267 && out_mode
== SFmode
&& out_n
== 4
6268 && in_mode
== SFmode
&& in_n
== 4)
6269 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIZ
];
6270 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6271 && out_mode
== SFmode
&& out_n
== 4
6272 && in_mode
== SFmode
&& in_n
== 4)
6273 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIZ
];
6276 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6277 && flag_unsafe_math_optimizations
6278 && out_mode
== DFmode
&& out_n
== 2
6279 && in_mode
== DFmode
&& in_n
== 2)
6280 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPI
];
6281 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6282 && flag_unsafe_math_optimizations
6283 && out_mode
== SFmode
&& out_n
== 4
6284 && in_mode
== SFmode
&& in_n
== 4)
6285 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPI
];
6288 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6289 && !flag_trapping_math
6290 && out_mode
== DFmode
&& out_n
== 2
6291 && in_mode
== DFmode
&& in_n
== 2)
6292 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIC
];
6293 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6294 && !flag_trapping_math
6295 && out_mode
== SFmode
&& out_n
== 4
6296 && in_mode
== SFmode
&& in_n
== 4)
6297 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIC
];
6303 /* Generate calls to libmass if appropriate. */
6304 if (rs6000_veclib_handler
)
6305 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
6310 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6313 rs6000_builtin_md_vectorized_function (tree fndecl
, tree type_out
,
6316 machine_mode in_mode
, out_mode
;
6319 if (TARGET_DEBUG_BUILTIN
)
6320 fprintf (stderr
, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6321 IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
6322 GET_MODE_NAME (TYPE_MODE (type_out
)),
6323 GET_MODE_NAME (TYPE_MODE (type_in
)));
6325 if (TREE_CODE (type_out
) != VECTOR_TYPE
6326 || TREE_CODE (type_in
) != VECTOR_TYPE
6327 || !TARGET_VECTORIZE_BUILTINS
)
6330 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6331 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
6332 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6333 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6335 enum rs6000_builtins fn
6336 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
6339 case RS6000_BUILTIN_RSQRTF
:
6340 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
6341 && out_mode
== SFmode
&& out_n
== 4
6342 && in_mode
== SFmode
&& in_n
== 4)
6343 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRSQRTFP
];
6345 case RS6000_BUILTIN_RSQRT
:
6346 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6347 && out_mode
== DFmode
&& out_n
== 2
6348 && in_mode
== DFmode
&& in_n
== 2)
6349 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
6351 case RS6000_BUILTIN_RECIPF
:
6352 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
6353 && out_mode
== SFmode
&& out_n
== 4
6354 && in_mode
== SFmode
&& in_n
== 4)
6355 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRECIPFP
];
6357 case RS6000_BUILTIN_RECIP
:
6358 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6359 && out_mode
== DFmode
&& out_n
== 2
6360 && in_mode
== DFmode
&& in_n
== 2)
6361 return rs6000_builtin_decls
[VSX_BUILTIN_RECIP_V2DF
];
6369 /* Default CPU string for rs6000*_file_start functions. */
6370 static const char *rs6000_default_cpu
;
6372 /* Do anything needed at the start of the asm file. */
6375 rs6000_file_start (void)
6378 const char *start
= buffer
;
6379 FILE *file
= asm_out_file
;
6381 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
6383 default_file_start ();
6385 if (flag_verbose_asm
)
6387 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
6389 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
6391 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
6395 if (global_options_set
.x_rs6000_cpu_index
)
6397 fprintf (file
, "%s -mcpu=%s", start
,
6398 processor_target_table
[rs6000_cpu_index
].name
);
6402 if (global_options_set
.x_rs6000_tune_index
)
6404 fprintf (file
, "%s -mtune=%s", start
,
6405 processor_target_table
[rs6000_tune_index
].name
);
6409 if (PPC405_ERRATUM77
)
6411 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
6415 #ifdef USING_ELFOS_H
6416 switch (rs6000_sdata
)
6418 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
6419 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
6420 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
6421 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
6424 if (rs6000_sdata
&& g_switch_value
)
6426 fprintf (file
, "%s -G %d", start
,
6436 #ifdef USING_ELFOS_H
6437 if (!(rs6000_default_cpu
&& rs6000_default_cpu
[0])
6438 && !global_options_set
.x_rs6000_cpu_index
)
6440 fputs ("\t.machine ", asm_out_file
);
6441 if ((rs6000_isa_flags
& OPTION_MASK_MODULO
) != 0)
6442 fputs ("power9\n", asm_out_file
);
6443 else if ((rs6000_isa_flags
& OPTION_MASK_DIRECT_MOVE
) != 0)
6444 fputs ("power8\n", asm_out_file
);
6445 else if ((rs6000_isa_flags
& OPTION_MASK_POPCNTD
) != 0)
6446 fputs ("power7\n", asm_out_file
);
6447 else if ((rs6000_isa_flags
& OPTION_MASK_CMPB
) != 0)
6448 fputs ("power6\n", asm_out_file
);
6449 else if ((rs6000_isa_flags
& OPTION_MASK_POPCNTB
) != 0)
6450 fputs ("power5\n", asm_out_file
);
6451 else if ((rs6000_isa_flags
& OPTION_MASK_MFCRF
) != 0)
6452 fputs ("power4\n", asm_out_file
);
6453 else if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) != 0)
6454 fputs ("ppc64\n", asm_out_file
);
6456 fputs ("ppc\n", asm_out_file
);
6460 if (DEFAULT_ABI
== ABI_ELFv2
)
6461 fprintf (file
, "\t.abiversion 2\n");
6465 /* Return nonzero if this function is known to have a null epilogue. */
6468 direct_return (void)
6470 if (reload_completed
)
6472 rs6000_stack_t
*info
= rs6000_stack_info ();
6474 if (info
->first_gp_reg_save
== 32
6475 && info
->first_fp_reg_save
== 64
6476 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6477 && ! info
->lr_save_p
6478 && ! info
->cr_save_p
6479 && info
->vrsave_size
== 0
6487 /* Return the number of instructions it takes to form a constant in an
6488 integer register. */
6491 num_insns_constant_wide (HOST_WIDE_INT value
)
6493 /* signed constant loadable with addi */
6494 if (((unsigned HOST_WIDE_INT
) value
+ 0x8000) < 0x10000)
6497 /* constant loadable with addis */
6498 else if ((value
& 0xffff) == 0
6499 && (value
>> 31 == -1 || value
>> 31 == 0))
6502 else if (TARGET_POWERPC64
)
6504 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
6505 HOST_WIDE_INT high
= value
>> 31;
6507 if (high
== 0 || high
== -1)
6513 return num_insns_constant_wide (high
) + 1;
6515 return num_insns_constant_wide (low
) + 1;
6517 return (num_insns_constant_wide (high
)
6518 + num_insns_constant_wide (low
) + 1);
6526 num_insns_constant (rtx op
, machine_mode mode
)
6528 HOST_WIDE_INT low
, high
;
6530 switch (GET_CODE (op
))
6533 if ((INTVAL (op
) >> 31) != 0 && (INTVAL (op
) >> 31) != -1
6534 && rs6000_is_valid_and_mask (op
, mode
))
6537 return num_insns_constant_wide (INTVAL (op
));
6539 case CONST_WIDE_INT
:
6542 int ins
= CONST_WIDE_INT_NUNITS (op
) - 1;
6543 for (i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6544 ins
+= num_insns_constant_wide (CONST_WIDE_INT_ELT (op
, i
));
6549 if (mode
== SFmode
|| mode
== SDmode
)
6553 if (DECIMAL_FLOAT_MODE_P (mode
))
6554 REAL_VALUE_TO_TARGET_DECIMAL32
6555 (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6557 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6558 return num_insns_constant_wide ((HOST_WIDE_INT
) l
);
6562 if (DECIMAL_FLOAT_MODE_P (mode
))
6563 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6565 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6566 high
= l
[WORDS_BIG_ENDIAN
== 0];
6567 low
= l
[WORDS_BIG_ENDIAN
!= 0];
6570 return (num_insns_constant_wide (low
)
6571 + num_insns_constant_wide (high
));
6574 if ((high
== 0 && low
>= 0)
6575 || (high
== -1 && low
< 0))
6576 return num_insns_constant_wide (low
);
6578 else if (rs6000_is_valid_and_mask (op
, mode
))
6582 return num_insns_constant_wide (high
) + 1;
6585 return (num_insns_constant_wide (high
)
6586 + num_insns_constant_wide (low
) + 1);
6594 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6595 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6596 corresponding element of the vector, but for V4SFmode and V2SFmode,
6597 the corresponding "float" is interpreted as an SImode integer. */
6600 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6604 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6605 gcc_assert (GET_MODE (op
) != V2DImode
6606 && GET_MODE (op
) != V2DFmode
);
6608 tmp
= CONST_VECTOR_ELT (op
, elt
);
6609 if (GET_MODE (op
) == V4SFmode
6610 || GET_MODE (op
) == V2SFmode
)
6611 tmp
= gen_lowpart (SImode
, tmp
);
6612 return INTVAL (tmp
);
6615 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6616 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6617 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6618 all items are set to the same value and contain COPIES replicas of the
6619 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6620 operand and the others are set to the value of the operand's msb. */
6623 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6625 machine_mode mode
= GET_MODE (op
);
6626 machine_mode inner
= GET_MODE_INNER (mode
);
6634 HOST_WIDE_INT splat_val
;
6635 HOST_WIDE_INT msb_val
;
6637 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6640 nunits
= GET_MODE_NUNITS (mode
);
6641 bitsize
= GET_MODE_BITSIZE (inner
);
6642 mask
= GET_MODE_MASK (inner
);
6644 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6646 msb_val
= val
>= 0 ? 0 : -1;
6648 /* Construct the value to be splatted, if possible. If not, return 0. */
6649 for (i
= 2; i
<= copies
; i
*= 2)
6651 HOST_WIDE_INT small_val
;
6653 small_val
= splat_val
>> bitsize
;
6655 if (splat_val
!= ((HOST_WIDE_INT
)
6656 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6657 | (small_val
& mask
)))
6659 splat_val
= small_val
;
6662 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6663 if (EASY_VECTOR_15 (splat_val
))
6666 /* Also check if we can splat, and then add the result to itself. Do so if
6667 the value is positive, of if the splat instruction is using OP's mode;
6668 for splat_val < 0, the splat and the add should use the same mode. */
6669 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6670 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6673 /* Also check if are loading up the most significant bit which can be done by
6674 loading up -1 and shifting the value left by -1. */
6675 else if (EASY_VECTOR_MSB (splat_val
, inner
))
6681 /* Check if VAL is present in every STEP-th element, and the
6682 other elements are filled with its most significant bit. */
6683 for (i
= 1; i
< nunits
; ++i
)
6685 HOST_WIDE_INT desired_val
;
6686 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6687 if ((i
& (step
- 1)) == 0)
6690 desired_val
= msb_val
;
6692 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6699 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6700 instruction, filling in the bottom elements with 0 or -1.
6702 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6703 for the number of zeroes to shift in, or negative for the number of 0xff
6706 OP is a CONST_VECTOR. */
6709 vspltis_shifted (rtx op
)
6711 machine_mode mode
= GET_MODE (op
);
6712 machine_mode inner
= GET_MODE_INNER (mode
);
6720 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6723 /* We need to create pseudo registers to do the shift, so don't recognize
6724 shift vector constants after reload. */
6725 if (!can_create_pseudo_p ())
6728 nunits
= GET_MODE_NUNITS (mode
);
6729 mask
= GET_MODE_MASK (inner
);
6731 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6733 /* Check if the value can really be the operand of a vspltis[bhw]. */
6734 if (EASY_VECTOR_15 (val
))
6737 /* Also check if we are loading up the most significant bit which can be done
6738 by loading up -1 and shifting the value left by -1. */
6739 else if (EASY_VECTOR_MSB (val
, inner
))
6745 /* Check if VAL is present in every STEP-th element until we find elements
6746 that are 0 or all 1 bits. */
6747 for (i
= 1; i
< nunits
; ++i
)
6749 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6750 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6752 /* If the value isn't the splat value, check for the remaining elements
6758 for (j
= i
+1; j
< nunits
; ++j
)
6760 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6761 if (const_vector_elt_as_int (op
, elt2
) != 0)
6765 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6768 else if ((elt_val
& mask
) == mask
)
6770 for (j
= i
+1; j
< nunits
; ++j
)
6772 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6773 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6777 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6785 /* If all elements are equal, we don't need to do VLSDOI. */
6790 /* Return true if OP is of the given MODE and can be synthesized
6791 with a vspltisb, vspltish or vspltisw. */
6794 easy_altivec_constant (rtx op
, machine_mode mode
)
6796 unsigned step
, copies
;
6798 if (mode
== VOIDmode
)
6799 mode
= GET_MODE (op
);
6800 else if (mode
!= GET_MODE (op
))
6803 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6805 if (mode
== V2DFmode
)
6806 return zero_constant (op
, mode
);
6808 else if (mode
== V2DImode
)
6810 if (GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
6811 || GET_CODE (CONST_VECTOR_ELT (op
, 1)) != CONST_INT
)
6814 if (zero_constant (op
, mode
))
6817 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6818 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6824 /* V1TImode is a special container for TImode. Ignore for now. */
6825 else if (mode
== V1TImode
)
6828 /* Start with a vspltisw. */
6829 step
= GET_MODE_NUNITS (mode
) / 4;
6832 if (vspltis_constant (op
, step
, copies
))
6835 /* Then try with a vspltish. */
6841 if (vspltis_constant (op
, step
, copies
))
6844 /* And finally a vspltisb. */
6850 if (vspltis_constant (op
, step
, copies
))
6853 if (vspltis_shifted (op
) != 0)
6859 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6860 result is OP. Abort if it is not possible. */
6863 gen_easy_altivec_constant (rtx op
)
6865 machine_mode mode
= GET_MODE (op
);
6866 int nunits
= GET_MODE_NUNITS (mode
);
6867 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6868 unsigned step
= nunits
/ 4;
6869 unsigned copies
= 1;
6871 /* Start with a vspltisw. */
6872 if (vspltis_constant (op
, step
, copies
))
6873 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6875 /* Then try with a vspltish. */
6881 if (vspltis_constant (op
, step
, copies
))
6882 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6884 /* And finally a vspltisb. */
6890 if (vspltis_constant (op
, step
, copies
))
6891 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6896 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6897 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6899 Return the number of instructions needed (1 or 2) into the address pointed
6902 Return the constant that is being split via CONSTANT_PTR. */
6905 xxspltib_constant_p (rtx op
,
6910 size_t nunits
= GET_MODE_NUNITS (mode
);
6912 HOST_WIDE_INT value
;
6915 /* Set the returned values to out of bound values. */
6916 *num_insns_ptr
= -1;
6917 *constant_ptr
= 256;
6919 if (!TARGET_P9_VECTOR
)
6922 if (mode
== VOIDmode
)
6923 mode
= GET_MODE (op
);
6925 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6928 /* Handle (vec_duplicate <constant>). */
6929 if (GET_CODE (op
) == VEC_DUPLICATE
)
6931 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6932 && mode
!= V2DImode
)
6935 element
= XEXP (op
, 0);
6936 if (!CONST_INT_P (element
))
6939 value
= INTVAL (element
);
6940 if (!IN_RANGE (value
, -128, 127))
6944 /* Handle (const_vector [...]). */
6945 else if (GET_CODE (op
) == CONST_VECTOR
)
6947 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6948 && mode
!= V2DImode
)
6951 element
= CONST_VECTOR_ELT (op
, 0);
6952 if (!CONST_INT_P (element
))
6955 value
= INTVAL (element
);
6956 if (!IN_RANGE (value
, -128, 127))
6959 for (i
= 1; i
< nunits
; i
++)
6961 element
= CONST_VECTOR_ELT (op
, i
);
6962 if (!CONST_INT_P (element
))
6965 if (value
!= INTVAL (element
))
6970 /* Handle integer constants being loaded into the upper part of the VSX
6971 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6972 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6973 else if (CONST_INT_P (op
))
6975 if (!SCALAR_INT_MODE_P (mode
))
6978 value
= INTVAL (op
);
6979 if (!IN_RANGE (value
, -128, 127))
6982 if (!IN_RANGE (value
, -1, 0))
6984 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6987 if (EASY_VECTOR_15 (value
))
6995 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6996 sign extend. Special case 0/-1 to allow getting any VSX register instead
6997 of an Altivec register. */
6998 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6999 && EASY_VECTOR_15 (value
))
7002 /* Return # of instructions and the constant byte for XXSPLTIB. */
7003 if (mode
== V16QImode
)
7006 else if (IN_RANGE (value
, -1, 0))
7012 *constant_ptr
= (int) value
;
7017 output_vec_const_move (rtx
*operands
)
7019 int cst
, cst2
, shift
;
7025 mode
= GET_MODE (dest
);
7029 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
7030 int xxspltib_value
= 256;
7033 if (zero_constant (vec
, mode
))
7035 if (TARGET_P9_VECTOR
)
7036 return "xxspltib %x0,0";
7038 else if (dest_vmx_p
)
7039 return "vspltisw %0,0";
7042 return "xxlxor %x0,%x0,%x0";
7045 if (all_ones_constant (vec
, mode
))
7047 if (TARGET_P9_VECTOR
)
7048 return "xxspltib %x0,255";
7050 else if (dest_vmx_p
)
7051 return "vspltisw %0,-1";
7053 else if (TARGET_P8_VECTOR
)
7054 return "xxlorc %x0,%x0,%x0";
7060 if (TARGET_P9_VECTOR
7061 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
7065 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
7066 return "xxspltib %x0,%2";
7077 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
7078 if (zero_constant (vec
, mode
))
7079 return "vspltisw %0,0";
7081 if (all_ones_constant (vec
, mode
))
7082 return "vspltisw %0,-1";
7084 /* Do we need to construct a value using VSLDOI? */
7085 shift
= vspltis_shifted (vec
);
7089 splat_vec
= gen_easy_altivec_constant (vec
);
7090 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
7091 operands
[1] = XEXP (splat_vec
, 0);
7092 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
7095 switch (GET_MODE (splat_vec
))
7098 return "vspltisw %0,%1";
7101 return "vspltish %0,%1";
7104 return "vspltisb %0,%1";
7111 gcc_assert (TARGET_SPE
);
7113 /* Vector constant 0 is handled as a splitter of V2SI, and in the
7114 pattern of V1DI, V4HI, and V2SF.
7116 FIXME: We should probably return # and add post reload
7117 splitters for these, but this way is so easy ;-). */
7118 cst
= INTVAL (CONST_VECTOR_ELT (vec
, 0));
7119 cst2
= INTVAL (CONST_VECTOR_ELT (vec
, 1));
7120 operands
[1] = CONST_VECTOR_ELT (vec
, 0);
7121 operands
[2] = CONST_VECTOR_ELT (vec
, 1);
7123 return "li %0,%1\n\tevmergelo %0,%0,%0";
7124 else if (WORDS_BIG_ENDIAN
)
7125 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
7127 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
7130 /* Initialize TARGET of vector PAIRED to VALS. */
7133 paired_expand_vector_init (rtx target
, rtx vals
)
7135 machine_mode mode
= GET_MODE (target
);
7136 int n_elts
= GET_MODE_NUNITS (mode
);
7138 rtx x
, new_rtx
, tmp
, constant_op
, op1
, op2
;
7141 for (i
= 0; i
< n_elts
; ++i
)
7143 x
= XVECEXP (vals
, 0, i
);
7144 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
7149 /* Load from constant pool. */
7150 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
7156 /* The vector is initialized only with non-constants. */
7157 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, XVECEXP (vals
, 0, 0),
7158 XVECEXP (vals
, 0, 1));
7160 emit_move_insn (target
, new_rtx
);
7164 /* One field is non-constant and the other one is a constant. Load the
7165 constant from the constant pool and use ps_merge instruction to
7166 construct the whole vector. */
7167 op1
= XVECEXP (vals
, 0, 0);
7168 op2
= XVECEXP (vals
, 0, 1);
7170 constant_op
= (CONSTANT_P (op1
)) ? op1
: op2
;
7172 tmp
= gen_reg_rtx (GET_MODE (constant_op
));
7173 emit_move_insn (tmp
, constant_op
);
7175 if (CONSTANT_P (op1
))
7176 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, tmp
, op2
);
7178 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, op1
, tmp
);
7180 emit_move_insn (target
, new_rtx
);
7184 paired_expand_vector_move (rtx operands
[])
7186 rtx op0
= operands
[0], op1
= operands
[1];
7188 emit_move_insn (op0
, op1
);
7191 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7192 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7193 operands for the relation operation COND. This is a recursive
7197 paired_emit_vector_compare (enum rtx_code rcode
,
7198 rtx dest
, rtx op0
, rtx op1
,
7199 rtx cc_op0
, rtx cc_op1
)
7201 rtx tmp
= gen_reg_rtx (V2SFmode
);
7204 gcc_assert (TARGET_PAIRED_FLOAT
);
7205 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
7211 paired_emit_vector_compare (GE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7215 emit_insn (gen_subv2sf3 (tmp
, cc_op0
, cc_op1
));
7216 emit_insn (gen_selv2sf4 (dest
, tmp
, op0
, op1
, CONST0_RTX (SFmode
)));
7220 paired_emit_vector_compare (GE
, dest
, op0
, op1
, cc_op1
, cc_op0
);
7223 paired_emit_vector_compare (LE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7226 tmp1
= gen_reg_rtx (V2SFmode
);
7227 max
= gen_reg_rtx (V2SFmode
);
7228 min
= gen_reg_rtx (V2SFmode
);
7229 gen_reg_rtx (V2SFmode
);
7231 emit_insn (gen_subv2sf3 (tmp
, cc_op0
, cc_op1
));
7232 emit_insn (gen_selv2sf4
7233 (max
, tmp
, cc_op0
, cc_op1
, CONST0_RTX (SFmode
)));
7234 emit_insn (gen_subv2sf3 (tmp
, cc_op1
, cc_op0
));
7235 emit_insn (gen_selv2sf4
7236 (min
, tmp
, cc_op0
, cc_op1
, CONST0_RTX (SFmode
)));
7237 emit_insn (gen_subv2sf3 (tmp1
, min
, max
));
7238 emit_insn (gen_selv2sf4 (dest
, tmp1
, op0
, op1
, CONST0_RTX (SFmode
)));
7241 paired_emit_vector_compare (EQ
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7244 paired_emit_vector_compare (LE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7247 paired_emit_vector_compare (LT
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7250 paired_emit_vector_compare (GE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7253 paired_emit_vector_compare (GT
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7262 /* Emit vector conditional expression.
7263 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7264 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7267 paired_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
7268 rtx cond
, rtx cc_op0
, rtx cc_op1
)
7270 enum rtx_code rcode
= GET_CODE (cond
);
7272 if (!TARGET_PAIRED_FLOAT
)
7275 paired_emit_vector_compare (rcode
, dest
, op1
, op2
, cc_op0
, cc_op1
);
7280 /* Initialize vector TARGET to VALS. */
7283 rs6000_expand_vector_init (rtx target
, rtx vals
)
7285 machine_mode mode
= GET_MODE (target
);
7286 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7287 int n_elts
= GET_MODE_NUNITS (mode
);
7288 int n_var
= 0, one_var
= -1;
7289 bool all_same
= true, all_const_zero
= true;
7293 for (i
= 0; i
< n_elts
; ++i
)
7295 x
= XVECEXP (vals
, 0, i
);
7296 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
7297 ++n_var
, one_var
= i
;
7298 else if (x
!= CONST0_RTX (inner_mode
))
7299 all_const_zero
= false;
7301 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7307 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
7308 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
7309 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
7311 /* Zero register. */
7312 emit_move_insn (target
, CONST0_RTX (mode
));
7315 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
7317 /* Splat immediate. */
7318 emit_insn (gen_rtx_SET (target
, const_vec
));
7323 /* Load from constant pool. */
7324 emit_move_insn (target
, const_vec
);
7329 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7330 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
7334 size_t num_elements
= all_same
? 1 : 2;
7335 for (i
= 0; i
< num_elements
; i
++)
7337 op
[i
] = XVECEXP (vals
, 0, i
);
7338 /* Just in case there is a SUBREG with a smaller mode, do a
7340 if (GET_MODE (op
[i
]) != inner_mode
)
7342 rtx tmp
= gen_reg_rtx (inner_mode
);
7343 convert_move (tmp
, op
[i
], 0);
7346 /* Allow load with splat double word. */
7347 else if (MEM_P (op
[i
]))
7350 op
[i
] = force_reg (inner_mode
, op
[i
]);
7352 else if (!REG_P (op
[i
]))
7353 op
[i
] = force_reg (inner_mode
, op
[i
]);
7358 if (mode
== V2DFmode
)
7359 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
7361 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
7365 if (mode
== V2DFmode
)
7366 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
7368 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
7373 /* Special case initializing vector int if we are on 64-bit systems with
7374 direct move or we have the ISA 3.0 instructions. */
7375 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
7376 && TARGET_DIRECT_MOVE_64BIT
)
7380 rtx element0
= XVECEXP (vals
, 0, 0);
7381 if (MEM_P (element0
))
7382 element0
= rs6000_address_for_fpconvert (element0
);
7384 element0
= force_reg (SImode
, element0
);
7386 if (TARGET_P9_VECTOR
)
7387 emit_insn (gen_vsx_splat_v4si (target
, element0
));
7390 rtx tmp
= gen_reg_rtx (DImode
);
7391 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
7392 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
7401 for (i
= 0; i
< 4; i
++)
7403 elements
[i
] = XVECEXP (vals
, 0, i
);
7404 if (!CONST_INT_P (elements
[i
]) && !REG_P (elements
[i
]))
7405 elements
[i
] = copy_to_mode_reg (SImode
, elements
[i
]);
7408 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
7409 elements
[2], elements
[3]));
7414 /* With single precision floating point on VSX, know that internally single
7415 precision is actually represented as a double, and either make 2 V2DF
7416 vectors, and convert these vectors to single precision, or do one
7417 conversion, and splat the result to the other elements. */
7418 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
7422 rtx element0
= XVECEXP (vals
, 0, 0);
7424 if (TARGET_P9_VECTOR
)
7426 if (MEM_P (element0
))
7427 element0
= rs6000_address_for_fpconvert (element0
);
7429 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
7434 rtx freg
= gen_reg_rtx (V4SFmode
);
7435 rtx sreg
= force_reg (SFmode
, element0
);
7436 rtx cvt
= (TARGET_XSCVDPSPN
7437 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
7438 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
7441 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
7447 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
7448 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
7449 rtx flt_even
= gen_reg_rtx (V4SFmode
);
7450 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
7451 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
7452 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
7453 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
7454 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
7456 /* Use VMRGEW if we can instead of doing a permute. */
7457 if (TARGET_P8_VECTOR
)
7459 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op2
));
7460 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op1
, op3
));
7461 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7462 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7463 if (BYTES_BIG_ENDIAN
)
7464 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_even
, flt_odd
));
7466 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_odd
, flt_even
));
7470 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
7471 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
7472 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7473 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7474 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
7480 /* Special case initializing vector short/char that are splats if we are on
7481 64-bit systems with direct move. */
7482 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
7483 && (mode
== V16QImode
|| mode
== V8HImode
))
7485 rtx op0
= XVECEXP (vals
, 0, 0);
7486 rtx di_tmp
= gen_reg_rtx (DImode
);
7489 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
7491 if (mode
== V16QImode
)
7493 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
7494 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
7498 if (mode
== V8HImode
)
7500 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7501 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7506 /* Store value to stack temp. Load vector element. Splat. However, splat
7507 of 64-bit items is not supported on Altivec. */
7508 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7510 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7511 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7512 XVECEXP (vals
, 0, 0));
7513 x
= gen_rtx_UNSPEC (VOIDmode
,
7514 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7515 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7517 gen_rtx_SET (target
, mem
),
7519 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7520 gen_rtx_PARALLEL (VOIDmode
,
7521 gen_rtvec (1, const0_rtx
)));
7522 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7526 /* One field is non-constant. Load constant then overwrite
7530 rtx copy
= copy_rtx (vals
);
7532 /* Load constant part of vector, substitute neighboring value for
7534 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7535 rs6000_expand_vector_init (target
, copy
);
7537 /* Insert variable. */
7538 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
), one_var
);
7542 /* Construct the vector in memory one field at a time
7543 and load the whole vector. */
7544 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7545 for (i
= 0; i
< n_elts
; i
++)
7546 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7547 i
* GET_MODE_SIZE (inner_mode
)),
7548 XVECEXP (vals
, 0, i
));
7549 emit_move_insn (target
, mem
);
7552 /* Set field ELT of TARGET to VAL. */
7555 rs6000_expand_vector_set (rtx target
, rtx val
, int elt
)
7557 machine_mode mode
= GET_MODE (target
);
7558 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7559 rtx reg
= gen_reg_rtx (mode
);
7561 int width
= GET_MODE_SIZE (inner_mode
);
7564 val
= force_reg (GET_MODE (val
), val
);
7566 if (VECTOR_MEM_VSX_P (mode
))
7568 rtx insn
= NULL_RTX
;
7569 rtx elt_rtx
= GEN_INT (elt
);
7571 if (mode
== V2DFmode
)
7572 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7574 else if (mode
== V2DImode
)
7575 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7577 else if (TARGET_P9_VECTOR
&& TARGET_VSX_SMALL_INTEGER
7578 && TARGET_UPPER_REGS_DI
&& TARGET_POWERPC64
)
7580 if (mode
== V4SImode
)
7581 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7582 else if (mode
== V8HImode
)
7583 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7584 else if (mode
== V16QImode
)
7585 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7595 /* Simplify setting single element vectors like V1TImode. */
7596 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
) && elt
== 0)
7598 emit_move_insn (target
, gen_lowpart (mode
, val
));
7602 /* Load single variable value. */
7603 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7604 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7605 x
= gen_rtx_UNSPEC (VOIDmode
,
7606 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7607 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7609 gen_rtx_SET (reg
, mem
),
7612 /* Linear sequence. */
7613 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7614 for (i
= 0; i
< 16; ++i
)
7615 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7617 /* Set permute mask to insert element into target. */
7618 for (i
= 0; i
< width
; ++i
)
7619 XVECEXP (mask
, 0, elt
*width
+ i
)
7620 = GEN_INT (i
+ 0x10);
7621 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7623 if (BYTES_BIG_ENDIAN
)
7624 x
= gen_rtx_UNSPEC (mode
,
7625 gen_rtvec (3, target
, reg
,
7626 force_reg (V16QImode
, x
)),
7630 if (TARGET_P9_VECTOR
)
7631 x
= gen_rtx_UNSPEC (mode
,
7632 gen_rtvec (3, target
, reg
,
7633 force_reg (V16QImode
, x
)),
7637 /* Invert selector. We prefer to generate VNAND on P8 so
7638 that future fusion opportunities can kick in, but must
7639 generate VNOR elsewhere. */
7640 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7641 rtx iorx
= (TARGET_P8_VECTOR
7642 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7643 : gen_rtx_AND (V16QImode
, notx
, notx
));
7644 rtx tmp
= gen_reg_rtx (V16QImode
);
7645 emit_insn (gen_rtx_SET (tmp
, iorx
));
7647 /* Permute with operands reversed and adjusted selector. */
7648 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7653 emit_insn (gen_rtx_SET (target
, x
));
7656 /* Extract field ELT from VEC into TARGET. */
7659 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7661 machine_mode mode
= GET_MODE (vec
);
7662 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7665 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7672 gcc_assert (INTVAL (elt
) == 0 && inner_mode
== TImode
);
7673 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7676 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7679 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7682 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7685 if (TARGET_DIRECT_MOVE_64BIT
)
7687 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7693 if (TARGET_DIRECT_MOVE_64BIT
)
7695 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7701 if (TARGET_DIRECT_MOVE_64BIT
)
7703 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7709 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7710 && TARGET_DIRECT_MOVE_64BIT
)
7712 if (GET_MODE (elt
) != DImode
)
7714 rtx tmp
= gen_reg_rtx (DImode
);
7715 convert_move (tmp
, elt
, 0);
7718 else if (!REG_P (elt
))
7719 elt
= force_reg (DImode
, elt
);
7724 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7728 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7732 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7736 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7740 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7744 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7752 gcc_assert (CONST_INT_P (elt
));
7754 /* Allocate mode-sized buffer. */
7755 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7757 emit_move_insn (mem
, vec
);
7759 /* Add offset to field within buffer matching vector element. */
7760 mem
= adjust_address_nv (mem
, inner_mode
,
7761 INTVAL (elt
) * GET_MODE_SIZE (inner_mode
));
7763 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7766 /* Helper function to return the register number of a RTX. */
7768 regno_or_subregno (rtx op
)
7772 else if (SUBREG_P (op
))
7773 return subreg_regno (op
);
7778 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7779 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7780 temporary (BASE_TMP) to fixup the address. Return the new memory address
7781 that is valid for reads or writes to a given register (SCALAR_REG). */
7784 rs6000_adjust_vec_address (rtx scalar_reg
,
7788 machine_mode scalar_mode
)
7790 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7791 rtx addr
= XEXP (mem
, 0);
7796 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7797 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7799 /* Calculate what we need to add to the address to get the element
7801 if (CONST_INT_P (element
))
7802 element_offset
= GEN_INT (INTVAL (element
) * scalar_size
);
7805 int byte_shift
= exact_log2 (scalar_size
);
7806 gcc_assert (byte_shift
>= 0);
7808 if (byte_shift
== 0)
7809 element_offset
= element
;
7813 if (TARGET_POWERPC64
)
7814 emit_insn (gen_ashldi3 (base_tmp
, element
, GEN_INT (byte_shift
)));
7816 emit_insn (gen_ashlsi3 (base_tmp
, element
, GEN_INT (byte_shift
)));
7818 element_offset
= base_tmp
;
7822 /* Create the new address pointing to the element within the vector. If we
7823 are adding 0, we don't have to change the address. */
7824 if (element_offset
== const0_rtx
)
7827 /* A simple indirect address can be converted into a reg + offset
7829 else if (REG_P (addr
) || SUBREG_P (addr
))
7830 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7832 /* Optimize D-FORM addresses with constant offset with a constant element, to
7833 include the element offset in the address directly. */
7834 else if (GET_CODE (addr
) == PLUS
)
7836 rtx op0
= XEXP (addr
, 0);
7837 rtx op1
= XEXP (addr
, 1);
7840 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7841 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7843 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7844 rtx offset_rtx
= GEN_INT (offset
);
7846 if (IN_RANGE (offset
, -32768, 32767)
7847 && (scalar_size
< 8 || (offset
& 0x3) == 0))
7848 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7851 emit_move_insn (base_tmp
, offset_rtx
);
7852 new_addr
= gen_rtx_PLUS (Pmode
, op0
, base_tmp
);
7857 bool op1_reg_p
= (REG_P (op1
) || SUBREG_P (op1
));
7858 bool ele_reg_p
= (REG_P (element_offset
) || SUBREG_P (element_offset
));
7860 /* Note, ADDI requires the register being added to be a base
7861 register. If the register was R0, load it up into the temporary
7864 && (ele_reg_p
|| reg_or_subregno (op1
) != FIRST_GPR_REGNO
))
7866 insn
= gen_add3_insn (base_tmp
, op1
, element_offset
);
7867 gcc_assert (insn
!= NULL_RTX
);
7872 && reg_or_subregno (element_offset
) != FIRST_GPR_REGNO
)
7874 insn
= gen_add3_insn (base_tmp
, element_offset
, op1
);
7875 gcc_assert (insn
!= NULL_RTX
);
7881 emit_move_insn (base_tmp
, op1
);
7882 emit_insn (gen_add2_insn (base_tmp
, element_offset
));
7885 new_addr
= gen_rtx_PLUS (Pmode
, op0
, base_tmp
);
7891 emit_move_insn (base_tmp
, addr
);
7892 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7895 /* If we have a PLUS, we need to see whether the particular register class
7896 allows for D-FORM or X-FORM addressing. */
7897 if (GET_CODE (new_addr
) == PLUS
)
7899 rtx op1
= XEXP (new_addr
, 1);
7900 addr_mask_type addr_mask
;
7901 int scalar_regno
= regno_or_subregno (scalar_reg
);
7903 gcc_assert (scalar_regno
< FIRST_PSEUDO_REGISTER
);
7904 if (INT_REGNO_P (scalar_regno
))
7905 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_GPR
];
7907 else if (FP_REGNO_P (scalar_regno
))
7908 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_FPR
];
7910 else if (ALTIVEC_REGNO_P (scalar_regno
))
7911 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_VMX
];
7916 if (REG_P (op1
) || SUBREG_P (op1
))
7917 valid_addr_p
= (addr_mask
& RELOAD_REG_INDEXED
) != 0;
7919 valid_addr_p
= (addr_mask
& RELOAD_REG_OFFSET
) != 0;
7922 else if (REG_P (new_addr
) || SUBREG_P (new_addr
))
7923 valid_addr_p
= true;
7926 valid_addr_p
= false;
7930 emit_move_insn (base_tmp
, new_addr
);
7931 new_addr
= base_tmp
;
7934 return change_address (mem
, scalar_mode
, new_addr
);
7937 /* Split a variable vec_extract operation into the component instructions. */
7940 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7943 machine_mode mode
= GET_MODE (src
);
7944 machine_mode scalar_mode
= GET_MODE (dest
);
7945 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7946 int byte_shift
= exact_log2 (scalar_size
);
7948 gcc_assert (byte_shift
>= 0);
7950 /* If we are given a memory address, optimize to load just the element. We
7951 don't have to adjust the vector element number on little endian
7955 gcc_assert (REG_P (tmp_gpr
));
7956 emit_move_insn (dest
, rs6000_adjust_vec_address (dest
, src
, element
,
7957 tmp_gpr
, scalar_mode
));
7961 else if (REG_P (src
) || SUBREG_P (src
))
7963 int bit_shift
= byte_shift
+ 3;
7965 int dest_regno
= regno_or_subregno (dest
);
7966 int src_regno
= regno_or_subregno (src
);
7967 int element_regno
= regno_or_subregno (element
);
7969 gcc_assert (REG_P (tmp_gpr
));
7971 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7972 a general purpose register. */
7973 if (TARGET_P9_VECTOR
7974 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7975 && INT_REGNO_P (dest_regno
)
7976 && ALTIVEC_REGNO_P (src_regno
)
7977 && INT_REGNO_P (element_regno
))
7979 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7980 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7982 if (mode
== V16QImode
)
7983 emit_insn (VECTOR_ELT_ORDER_BIG
7984 ? gen_vextublx (dest_si
, element_si
, src
)
7985 : gen_vextubrx (dest_si
, element_si
, src
));
7987 else if (mode
== V8HImode
)
7989 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7990 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7991 emit_insn (VECTOR_ELT_ORDER_BIG
7992 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7993 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7999 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
8000 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
8001 emit_insn (VECTOR_ELT_ORDER_BIG
8002 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
8003 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
8010 gcc_assert (REG_P (tmp_altivec
));
8012 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8013 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8014 will shift the element into the upper position (adding 3 to convert a
8015 byte shift into a bit shift). */
8016 if (scalar_size
== 8)
8018 if (!VECTOR_ELT_ORDER_BIG
)
8020 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
8026 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8028 emit_insn (gen_rtx_SET (tmp_gpr
,
8029 gen_rtx_AND (DImode
,
8030 gen_rtx_ASHIFT (DImode
,
8037 if (!VECTOR_ELT_ORDER_BIG
)
8039 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
8041 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
8042 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
8048 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
8051 /* Get the value into the lower byte of the Altivec register where VSLO
8053 if (TARGET_P9_VECTOR
)
8054 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
8055 else if (can_create_pseudo_p ())
8056 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
8059 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8060 emit_move_insn (tmp_di
, tmp_gpr
);
8061 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
8064 /* Do the VSLO to get the value into the final location. */
8068 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
8072 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
8077 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8078 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
8079 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8080 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8083 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
8091 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8092 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8093 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
8094 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8096 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
8097 emit_insn (gen_ashrdi3 (tmp_gpr_di
, tmp_gpr_di
,
8098 GEN_INT (64 - (8 * scalar_size
))));
8112 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
8113 two SImode values. */
8116 rs6000_split_v4si_init_di_reg (rtx dest
, rtx si1
, rtx si2
, rtx tmp
)
8118 const unsigned HOST_WIDE_INT mask_32bit
= HOST_WIDE_INT_C (0xffffffff);
8120 if (CONST_INT_P (si1
) && CONST_INT_P (si2
))
8122 unsigned HOST_WIDE_INT const1
= (UINTVAL (si1
) & mask_32bit
) << 32;
8123 unsigned HOST_WIDE_INT const2
= UINTVAL (si2
) & mask_32bit
;
8125 emit_move_insn (dest
, GEN_INT (const1
| const2
));
8129 /* Put si1 into upper 32-bits of dest. */
8130 if (CONST_INT_P (si1
))
8131 emit_move_insn (dest
, GEN_INT ((UINTVAL (si1
) & mask_32bit
) << 32));
8134 /* Generate RLDIC. */
8135 rtx si1_di
= gen_rtx_REG (DImode
, regno_or_subregno (si1
));
8136 rtx shift_rtx
= gen_rtx_ASHIFT (DImode
, si1_di
, GEN_INT (32));
8137 rtx mask_rtx
= GEN_INT (mask_32bit
<< 32);
8138 rtx and_rtx
= gen_rtx_AND (DImode
, shift_rtx
, mask_rtx
);
8139 gcc_assert (!reg_overlap_mentioned_p (dest
, si1
));
8140 emit_insn (gen_rtx_SET (dest
, and_rtx
));
8143 /* Put si2 into the temporary. */
8144 gcc_assert (!reg_overlap_mentioned_p (dest
, tmp
));
8145 if (CONST_INT_P (si2
))
8146 emit_move_insn (tmp
, GEN_INT (UINTVAL (si2
) & mask_32bit
));
8148 emit_insn (gen_zero_extendsidi2 (tmp
, si2
));
8150 /* Combine the two parts. */
8151 emit_insn (gen_iordi3 (dest
, dest
, tmp
));
8155 /* Split a V4SI initialization. */
8158 rs6000_split_v4si_init (rtx operands
[])
8160 rtx dest
= operands
[0];
8162 /* Destination is a GPR, build up the two DImode parts in place. */
8163 if (REG_P (dest
) || SUBREG_P (dest
))
8165 int d_regno
= regno_or_subregno (dest
);
8166 rtx scalar1
= operands
[1];
8167 rtx scalar2
= operands
[2];
8168 rtx scalar3
= operands
[3];
8169 rtx scalar4
= operands
[4];
8170 rtx tmp1
= operands
[5];
8171 rtx tmp2
= operands
[6];
8173 /* Even though we only need one temporary (plus the destination, which
8174 has an early clobber constraint, try to use two temporaries, one for
8175 each double word created. That way the 2nd insn scheduling pass can
8176 rearrange things so the two parts are done in parallel. */
8177 if (BYTES_BIG_ENDIAN
)
8179 rtx di_lo
= gen_rtx_REG (DImode
, d_regno
);
8180 rtx di_hi
= gen_rtx_REG (DImode
, d_regno
+ 1);
8181 rs6000_split_v4si_init_di_reg (di_lo
, scalar1
, scalar2
, tmp1
);
8182 rs6000_split_v4si_init_di_reg (di_hi
, scalar3
, scalar4
, tmp2
);
8186 rtx di_lo
= gen_rtx_REG (DImode
, d_regno
+ 1);
8187 rtx di_hi
= gen_rtx_REG (DImode
, d_regno
);
8188 gcc_assert (!VECTOR_ELT_ORDER_BIG
);
8189 rs6000_split_v4si_init_di_reg (di_lo
, scalar4
, scalar3
, tmp1
);
8190 rs6000_split_v4si_init_di_reg (di_hi
, scalar2
, scalar1
, tmp2
);
8199 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
8202 invalid_e500_subreg (rtx op
, machine_mode mode
)
8204 if (TARGET_E500_DOUBLE
)
8206 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8207 subreg:TI and reg:TF. Decimal float modes are like integer
8208 modes (only low part of each register used) for this
8210 if (GET_CODE (op
) == SUBREG
8211 && (mode
== SImode
|| mode
== DImode
|| mode
== TImode
8212 || mode
== DDmode
|| mode
== TDmode
|| mode
== PTImode
)
8213 && REG_P (SUBREG_REG (op
))
8214 && (GET_MODE (SUBREG_REG (op
)) == DFmode
8215 || GET_MODE (SUBREG_REG (op
)) == TFmode
8216 || GET_MODE (SUBREG_REG (op
)) == IFmode
8217 || GET_MODE (SUBREG_REG (op
)) == KFmode
))
8220 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8222 if (GET_CODE (op
) == SUBREG
8223 && (mode
== DFmode
|| mode
== TFmode
|| mode
== IFmode
8225 && REG_P (SUBREG_REG (op
))
8226 && (GET_MODE (SUBREG_REG (op
)) == DImode
8227 || GET_MODE (SUBREG_REG (op
)) == TImode
8228 || GET_MODE (SUBREG_REG (op
)) == PTImode
8229 || GET_MODE (SUBREG_REG (op
)) == DDmode
8230 || GET_MODE (SUBREG_REG (op
)) == TDmode
))
8235 && GET_CODE (op
) == SUBREG
8237 && REG_P (SUBREG_REG (op
))
8238 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op
))))
8244 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8245 selects whether the alignment is abi mandated, optional, or
8246 both abi and optional alignment. */
8249 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8251 if (how
!= align_opt
)
8253 if (TREE_CODE (type
) == VECTOR_TYPE
)
8255 if ((TARGET_SPE
&& SPE_VECTOR_MODE (TYPE_MODE (type
)))
8256 || (TARGET_PAIRED_FLOAT
&& PAIRED_VECTOR_MODE (TYPE_MODE (type
))))
8261 else if (align
< 128)
8264 else if (TARGET_E500_DOUBLE
8265 && TREE_CODE (type
) == REAL_TYPE
8266 && TYPE_MODE (type
) == DFmode
)
8273 if (how
!= align_abi
)
8275 if (TREE_CODE (type
) == ARRAY_TYPE
8276 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8278 if (align
< BITS_PER_WORD
)
8279 align
= BITS_PER_WORD
;
8286 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8289 rs6000_special_adjust_field_align_p (tree type
, unsigned int computed
)
8291 if (TARGET_ALTIVEC
&& TREE_CODE (type
) == VECTOR_TYPE
)
8293 if (computed
!= 128)
8296 if (!warned
&& warn_psabi
)
8299 inform (input_location
,
8300 "the layout of aggregates containing vectors with"
8301 " %d-byte alignment has changed in GCC 5",
8302 computed
/ BITS_PER_UNIT
);
8305 /* In current GCC there is no special case. */
8312 /* AIX increases natural record alignment to doubleword if the first
8313 field is an FP double while the FP fields remain word aligned. */
8316 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8317 unsigned int specified
)
8319 unsigned int align
= MAX (computed
, specified
);
8320 tree field
= TYPE_FIELDS (type
);
8322 /* Skip all non field decls */
8323 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
8324 field
= DECL_CHAIN (field
);
8326 if (field
!= NULL
&& field
!= type
)
8328 type
= TREE_TYPE (field
);
8329 while (TREE_CODE (type
) == ARRAY_TYPE
)
8330 type
= TREE_TYPE (type
);
8332 if (type
!= error_mark_node
&& TYPE_MODE (type
) == DFmode
)
8333 align
= MAX (align
, 64);
8339 /* Darwin increases record alignment to the natural alignment of
8343 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8344 unsigned int specified
)
8346 unsigned int align
= MAX (computed
, specified
);
8348 if (TYPE_PACKED (type
))
8351 /* Find the first field, looking down into aggregates. */
8353 tree field
= TYPE_FIELDS (type
);
8354 /* Skip all non field decls */
8355 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
8356 field
= DECL_CHAIN (field
);
8359 /* A packed field does not contribute any extra alignment. */
8360 if (DECL_PACKED (field
))
8362 type
= TREE_TYPE (field
);
8363 while (TREE_CODE (type
) == ARRAY_TYPE
)
8364 type
= TREE_TYPE (type
);
8365 } while (AGGREGATE_TYPE_P (type
));
8367 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
8368 align
= MAX (align
, TYPE_ALIGN (type
));
8373 /* Return 1 for an operand in small memory on V.4/eabi. */
8376 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8377 machine_mode mode ATTRIBUTE_UNUSED
)
8382 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8385 if (DEFAULT_ABI
!= ABI_V4
)
8388 /* Vector and float memory instructions have a limited offset on the
8389 SPE, so using a vector or float variable directly as an operand is
8392 && (SPE_VECTOR_MODE (mode
) || FLOAT_MODE_P (mode
)))
8395 if (GET_CODE (op
) == SYMBOL_REF
)
8398 else if (GET_CODE (op
) != CONST
8399 || GET_CODE (XEXP (op
, 0)) != PLUS
8400 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
8401 || GET_CODE (XEXP (XEXP (op
, 0), 1)) != CONST_INT
)
8406 rtx sum
= XEXP (op
, 0);
8407 HOST_WIDE_INT summand
;
8409 /* We have to be careful here, because it is the referenced address
8410 that must be 32k from _SDA_BASE_, not just the symbol. */
8411 summand
= INTVAL (XEXP (sum
, 1));
8412 if (summand
< 0 || summand
> g_switch_value
)
8415 sym_ref
= XEXP (sum
, 0);
8418 return SYMBOL_REF_SMALL_P (sym_ref
);
8424 /* Return true if either operand is a general purpose register. */
8427 gpr_or_gpr_p (rtx op0
, rtx op1
)
8429 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8430 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8433 /* Return true if this is a move direct operation between GPR registers and
8434 floating point/VSX registers. */
8437 direct_move_p (rtx op0
, rtx op1
)
8441 if (!REG_P (op0
) || !REG_P (op1
))
8444 if (!TARGET_DIRECT_MOVE
&& !TARGET_MFPGPR
)
8447 regno0
= REGNO (op0
);
8448 regno1
= REGNO (op1
);
8449 if (regno0
>= FIRST_PSEUDO_REGISTER
|| regno1
>= FIRST_PSEUDO_REGISTER
)
8452 if (INT_REGNO_P (regno0
))
8453 return (TARGET_DIRECT_MOVE
) ? VSX_REGNO_P (regno1
) : FP_REGNO_P (regno1
);
8455 else if (INT_REGNO_P (regno1
))
8457 if (TARGET_MFPGPR
&& FP_REGNO_P (regno0
))
8460 else if (TARGET_DIRECT_MOVE
&& VSX_REGNO_P (regno0
))
8467 /* Return true if the OFFSET is valid for the quad address instructions that
8468 use d-form (register + offset) addressing. */
8471 quad_address_offset_p (HOST_WIDE_INT offset
)
8473 return (IN_RANGE (offset
, -32768, 32767) && ((offset
) & 0xf) == 0);
8476 /* Return true if the ADDR is an acceptable address for a quad memory
8477 operation of mode MODE (either LQ/STQ for general purpose registers, or
8478 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8479 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8480 3.0 LXV/STXV instruction. */
8483 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8487 if (GET_MODE_SIZE (mode
) != 16)
8490 if (legitimate_indirect_address_p (addr
, strict
))
8493 if (VECTOR_MODE_P (mode
) && !mode_supports_vsx_dform_quad (mode
))
8496 if (GET_CODE (addr
) != PLUS
)
8499 op0
= XEXP (addr
, 0);
8500 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8503 op1
= XEXP (addr
, 1);
8504 if (!CONST_INT_P (op1
))
8507 return quad_address_offset_p (INTVAL (op1
));
8510 /* Return true if this is a load or store quad operation. This function does
8511 not handle the atomic quad memory instructions. */
8514 quad_load_store_p (rtx op0
, rtx op1
)
8518 if (!TARGET_QUAD_MEMORY
)
8521 else if (REG_P (op0
) && MEM_P (op1
))
8522 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8523 && quad_memory_operand (op1
, GET_MODE (op1
))
8524 && !reg_overlap_mentioned_p (op0
, op1
));
8526 else if (MEM_P (op0
) && REG_P (op1
))
8527 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8528 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8533 if (TARGET_DEBUG_ADDR
)
8535 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8536 ret
? "true" : "false");
8537 debug_rtx (gen_rtx_SET (op0
, op1
));
8543 /* Given an address, return a constant offset term if one exists. */
8546 address_offset (rtx op
)
8548 if (GET_CODE (op
) == PRE_INC
8549 || GET_CODE (op
) == PRE_DEC
)
8551 else if (GET_CODE (op
) == PRE_MODIFY
8552 || GET_CODE (op
) == LO_SUM
)
8555 if (GET_CODE (op
) == CONST
)
8558 if (GET_CODE (op
) == PLUS
)
8561 if (CONST_INT_P (op
))
8567 /* Return true if the MEM operand is a memory operand suitable for use
8568 with a (full width, possibly multiple) gpr load/store. On
8569 powerpc64 this means the offset must be divisible by 4.
8570 Implements 'Y' constraint.
8572 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8573 a constraint function we know the operand has satisfied a suitable
8574 memory predicate. Also accept some odd rtl generated by reload
8575 (see rs6000_legitimize_reload_address for various forms). It is
8576 important that reload rtl be accepted by appropriate constraints
8577 but not by the operand predicate.
8579 Offsetting a lo_sum should not be allowed, except where we know by
8580 alignment that a 32k boundary is not crossed, but see the ???
8581 comment in rs6000_legitimize_reload_address. Note that by
8582 "offsetting" here we mean a further offset to access parts of the
8583 MEM. It's fine to have a lo_sum where the inner address is offset
8584 from a sym, since the same sym+offset will appear in the high part
8585 of the address calculation. */
8588 mem_operand_gpr (rtx op
, machine_mode mode
)
8590 unsigned HOST_WIDE_INT offset
;
8592 rtx addr
= XEXP (op
, 0);
8594 op
= address_offset (addr
);
8598 offset
= INTVAL (op
);
8599 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8602 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8606 if (GET_CODE (addr
) == LO_SUM
)
8607 /* For lo_sum addresses, we must allow any offset except one that
8608 causes a wrap, so test only the low 16 bits. */
8609 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8611 return offset
+ 0x8000 < 0x10000u
- extra
;
8614 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8615 enforce an offset divisible by 4 even for 32-bit. */
8618 mem_operand_ds_form (rtx op
, machine_mode mode
)
8620 unsigned HOST_WIDE_INT offset
;
8622 rtx addr
= XEXP (op
, 0);
8624 if (!offsettable_address_p (false, mode
, addr
))
8627 op
= address_offset (addr
);
8631 offset
= INTVAL (op
);
8632 if ((offset
& 3) != 0)
8635 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8639 if (GET_CODE (addr
) == LO_SUM
)
8640 /* For lo_sum addresses, we must allow any offset except one that
8641 causes a wrap, so test only the low 16 bits. */
8642 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8644 return offset
+ 0x8000 < 0x10000u
- extra
;
8647 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8650 reg_offset_addressing_ok_p (machine_mode mode
)
8664 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8665 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8666 a vector mode, if we want to use the VSX registers to move it around,
8667 we need to restrict ourselves to reg+reg addressing. Similarly for
8668 IEEE 128-bit floating point that is passed in a single vector
8670 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8671 return mode_supports_vsx_dform_quad (mode
);
8678 /* Paired vector modes. Only reg+reg addressing is valid. */
8679 if (TARGET_PAIRED_FLOAT
)
8684 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8685 addressing for the LFIWZX and STFIWX instructions. */
8686 if (TARGET_NO_SDMODE_STACK
)
8698 virtual_stack_registers_memory_p (rtx op
)
8702 if (GET_CODE (op
) == REG
)
8703 regnum
= REGNO (op
);
8705 else if (GET_CODE (op
) == PLUS
8706 && GET_CODE (XEXP (op
, 0)) == REG
8707 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
8708 regnum
= REGNO (XEXP (op
, 0));
8713 return (regnum
>= FIRST_VIRTUAL_REGISTER
8714 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8717 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8718 is known to not straddle a 32k boundary. This function is used
8719 to determine whether -mcmodel=medium code can use TOC pointer
8720 relative addressing for OP. This means the alignment of the TOC
8721 pointer must also be taken into account, and unfortunately that is
8724 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8725 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8729 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8733 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8735 if (GET_CODE (op
) != SYMBOL_REF
)
8738 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8740 if (mode_supports_vsx_dform_quad (mode
))
8743 dsize
= GET_MODE_SIZE (mode
);
8744 decl
= SYMBOL_REF_DECL (op
);
8750 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8751 replacing memory addresses with an anchor plus offset. We
8752 could find the decl by rummaging around in the block->objects
8753 VEC for the given offset but that seems like too much work. */
8754 dalign
= BITS_PER_UNIT
;
8755 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8756 && SYMBOL_REF_ANCHOR_P (op
)
8757 && SYMBOL_REF_BLOCK (op
) != NULL
)
8759 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8761 dalign
= block
->alignment
;
8762 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8764 else if (CONSTANT_POOL_ADDRESS_P (op
))
8766 /* It would be nice to have get_pool_align().. */
8767 machine_mode cmode
= get_pool_mode (op
);
8769 dalign
= GET_MODE_ALIGNMENT (cmode
);
8772 else if (DECL_P (decl
))
8774 dalign
= DECL_ALIGN (decl
);
8778 /* Allow BLKmode when the entire object is known to not
8779 cross a 32k boundary. */
8780 if (!DECL_SIZE_UNIT (decl
))
8783 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8786 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8790 dalign
/= BITS_PER_UNIT
;
8791 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8792 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8793 return dalign
>= dsize
;
8799 /* Find how many bits of the alignment we know for this access. */
8800 dalign
/= BITS_PER_UNIT
;
8801 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8802 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8804 lsb
= offset
& -offset
;
8808 return dalign
>= dsize
;
8812 constant_pool_expr_p (rtx op
)
8816 split_const (op
, &base
, &offset
);
8817 return (GET_CODE (base
) == SYMBOL_REF
8818 && CONSTANT_POOL_ADDRESS_P (base
)
8819 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8822 static const_rtx tocrel_base
, tocrel_offset
;
8824 /* Return true if OP is a toc pointer relative address (the output
8825 of create_TOC_reference). If STRICT, do not match non-split
8826 -mcmodel=large/medium toc pointer relative addresses. */
8829 toc_relative_expr_p (const_rtx op
, bool strict
)
8834 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8836 /* When strict ensure we have everything tidy. */
8838 && !(GET_CODE (op
) == LO_SUM
8839 && REG_P (XEXP (op
, 0))
8840 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8843 /* When not strict, allow non-split TOC addresses and also allow
8844 (lo_sum (high ..)) TOC addresses created during reload. */
8845 if (GET_CODE (op
) == LO_SUM
)
8850 tocrel_offset
= const0_rtx
;
8851 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8853 tocrel_base
= XEXP (op
, 0);
8854 tocrel_offset
= XEXP (op
, 1);
8857 return (GET_CODE (tocrel_base
) == UNSPEC
8858 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
);
8861 /* Return true if X is a constant pool address, and also for cmodel=medium
8862 if X is a toc-relative address known to be offsettable within MODE. */
8865 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8868 return (toc_relative_expr_p (x
, strict
)
8869 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8870 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8872 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8873 INTVAL (tocrel_offset
), mode
)));
8877 legitimate_small_data_p (machine_mode mode
, rtx x
)
8879 return (DEFAULT_ABI
== ABI_V4
8880 && !flag_pic
&& !TARGET_TOC
8881 && (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
)
8882 && small_data_operand (x
, mode
));
8885 /* SPE offset addressing is limited to 5-bits worth of double words. */
8886 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8889 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8890 bool strict
, bool worst_case
)
8892 unsigned HOST_WIDE_INT offset
;
8895 if (GET_CODE (x
) != PLUS
)
8897 if (!REG_P (XEXP (x
, 0)))
8899 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8901 if (mode_supports_vsx_dform_quad (mode
))
8902 return quad_address_p (x
, mode
, strict
);
8903 if (!reg_offset_addressing_ok_p (mode
))
8904 return virtual_stack_registers_memory_p (x
);
8905 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8907 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
8910 offset
= INTVAL (XEXP (x
, 1));
8918 /* SPE vector modes. */
8919 return SPE_CONST_OFFSET_OK (offset
);
8924 /* On e500v2, we may have:
8926 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8928 Which gets addressed with evldd instructions. */
8929 if (TARGET_E500_DOUBLE
)
8930 return SPE_CONST_OFFSET_OK (offset
);
8932 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8934 if (VECTOR_MEM_VSX_P (mode
))
8939 if (!TARGET_POWERPC64
)
8941 else if (offset
& 3)
8951 if (TARGET_E500_DOUBLE
)
8952 return (SPE_CONST_OFFSET_OK (offset
)
8953 && SPE_CONST_OFFSET_OK (offset
+ 8));
8958 if (!TARGET_POWERPC64
)
8960 else if (offset
& 3)
8969 return offset
< 0x10000 - extra
;
8973 legitimate_indexed_address_p (rtx x
, int strict
)
8977 if (GET_CODE (x
) != PLUS
)
8983 /* Recognize the rtl generated by reload which we know will later be
8984 replaced with proper base and index regs. */
8986 && reload_in_progress
8987 && (REG_P (op0
) || GET_CODE (op0
) == PLUS
)
8991 return (REG_P (op0
) && REG_P (op1
)
8992 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8993 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8994 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8995 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8999 avoiding_indexed_address_p (machine_mode mode
)
9001 /* Avoid indexed addressing for modes that have non-indexed
9002 load/store instruction forms. */
9003 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
9007 legitimate_indirect_address_p (rtx x
, int strict
)
9009 return GET_CODE (x
) == REG
&& INT_REG_OK_FOR_BASE_P (x
, strict
);
9013 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
9015 if (!TARGET_MACHO
|| !flag_pic
9016 || mode
!= SImode
|| GET_CODE (x
) != MEM
)
9020 if (GET_CODE (x
) != LO_SUM
)
9022 if (GET_CODE (XEXP (x
, 0)) != REG
)
9024 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
9028 return CONSTANT_P (x
);
9032 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
9034 if (GET_CODE (x
) != LO_SUM
)
9036 if (GET_CODE (XEXP (x
, 0)) != REG
)
9038 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9040 /* quad word addresses are restricted, and we can't use LO_SUM. */
9041 if (mode_supports_vsx_dform_quad (mode
))
9043 /* Restrict addressing for DI because of our SUBREG hackery. */
9044 if (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
9048 if (TARGET_ELF
|| TARGET_MACHO
)
9052 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
9054 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9055 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9056 recognizes some LO_SUM addresses as valid although this
9057 function says opposite. In most cases, LRA through different
9058 transformations can generate correct code for address reloads.
9059 It can not manage only some LO_SUM cases. So we need to add
9060 code analogous to one in rs6000_legitimize_reload_address for
9061 LOW_SUM here saying that some addresses are still valid. */
9062 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
9063 && small_toc_ref (x
, VOIDmode
));
9064 if (TARGET_TOC
&& ! large_toc_ok
)
9066 if (GET_MODE_NUNITS (mode
) != 1)
9068 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9069 && !(/* ??? Assume floating point reg based on mode? */
9070 TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
9071 && (mode
== DFmode
|| mode
== DDmode
)))
9074 return CONSTANT_P (x
) || large_toc_ok
;
9081 /* Try machine-dependent ways of modifying an illegitimate address
9082 to be legitimate. If we find one, return the new, valid address.
9083 This is used from only one place: `memory_address' in explow.c.
9085 OLDX is the address as it was before break_out_memory_refs was
9086 called. In some cases it is useful to look at this to decide what
9089 It is always safe for this function to do nothing. It exists to
9090 recognize opportunities to optimize the output.
9092 On RS/6000, first check for the sum of a register with a constant
9093 integer that is out of range. If so, generate code to add the
9094 constant with the low-order 16 bits masked to the register and force
9095 this result into another register (this can be done with `cau').
9096 Then generate an address of REG+(CONST&0xffff), allowing for the
9097 possibility of bit 16 being a one.
9099 Then check for the sum of a register and something not constant, try to
9100 load the other things into a register and return the sum. */
9103 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9108 if (!reg_offset_addressing_ok_p (mode
)
9109 || mode_supports_vsx_dform_quad (mode
))
9111 if (virtual_stack_registers_memory_p (x
))
9114 /* In theory we should not be seeing addresses of the form reg+0,
9115 but just in case it is generated, optimize it away. */
9116 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9117 return force_reg (Pmode
, XEXP (x
, 0));
9119 /* For TImode with load/store quad, restrict addresses to just a single
9120 pointer, so it works with both GPRs and VSX registers. */
9121 /* Make sure both operands are registers. */
9122 else if (GET_CODE (x
) == PLUS
9123 && (mode
!= TImode
|| !TARGET_VSX_TIMODE
))
9124 return gen_rtx_PLUS (Pmode
,
9125 force_reg (Pmode
, XEXP (x
, 0)),
9126 force_reg (Pmode
, XEXP (x
, 1)));
9128 return force_reg (Pmode
, x
);
9130 if (GET_CODE (x
) == SYMBOL_REF
)
9132 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9134 return rs6000_legitimize_tls_address (x
, model
);
9146 /* As in legitimate_offset_address_p we do not assume
9147 worst-case. The mode here is just a hint as to the registers
9148 used. A TImode is usually in gprs, but may actually be in
9149 fprs. Leave worst-case scenario for reload to handle via
9150 insn constraints. PTImode is only GPRs. */
9157 if (GET_CODE (x
) == PLUS
9158 && GET_CODE (XEXP (x
, 0)) == REG
9159 && GET_CODE (XEXP (x
, 1)) == CONST_INT
9160 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9162 && !(SPE_VECTOR_MODE (mode
)
9163 || (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)))
9165 HOST_WIDE_INT high_int
, low_int
;
9167 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9168 if (low_int
>= 0x8000 - extra
)
9170 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9171 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9172 GEN_INT (high_int
)), 0);
9173 return plus_constant (Pmode
, sum
, low_int
);
9175 else if (GET_CODE (x
) == PLUS
9176 && GET_CODE (XEXP (x
, 0)) == REG
9177 && GET_CODE (XEXP (x
, 1)) != CONST_INT
9178 && GET_MODE_NUNITS (mode
) == 1
9179 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9180 || (/* ??? Assume floating point reg based on mode? */
9181 (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
9182 && (mode
== DFmode
|| mode
== DDmode
)))
9183 && !avoiding_indexed_address_p (mode
))
9185 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9186 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9188 else if (SPE_VECTOR_MODE (mode
)
9189 || (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
))
9193 /* We accept [reg + reg] and [reg + OFFSET]. */
9195 if (GET_CODE (x
) == PLUS
)
9197 rtx op1
= XEXP (x
, 0);
9198 rtx op2
= XEXP (x
, 1);
9201 op1
= force_reg (Pmode
, op1
);
9203 if (GET_CODE (op2
) != REG
9204 && (GET_CODE (op2
) != CONST_INT
9205 || !SPE_CONST_OFFSET_OK (INTVAL (op2
))
9206 || (GET_MODE_SIZE (mode
) > 8
9207 && !SPE_CONST_OFFSET_OK (INTVAL (op2
) + 8))))
9208 op2
= force_reg (Pmode
, op2
);
9210 /* We can't always do [reg + reg] for these, because [reg +
9211 reg + offset] is not a legitimate addressing mode. */
9212 y
= gen_rtx_PLUS (Pmode
, op1
, op2
);
9214 if ((GET_MODE_SIZE (mode
) > 8 || mode
== DDmode
) && REG_P (op2
))
9215 return force_reg (Pmode
, y
);
9220 return force_reg (Pmode
, x
);
9222 else if ((TARGET_ELF
9224 || !MACHO_DYNAMIC_NO_PIC_P
9230 && GET_CODE (x
) != CONST_INT
9231 && GET_CODE (x
) != CONST_WIDE_INT
9232 && GET_CODE (x
) != CONST_DOUBLE
9234 && GET_MODE_NUNITS (mode
) == 1
9235 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9236 || (/* ??? Assume floating point reg based on mode? */
9237 (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
9238 && (mode
== DFmode
|| mode
== DDmode
))))
9240 rtx reg
= gen_reg_rtx (Pmode
);
9242 emit_insn (gen_elf_high (reg
, x
));
9244 emit_insn (gen_macho_high (reg
, x
));
9245 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9248 && GET_CODE (x
) == SYMBOL_REF
9249 && constant_pool_expr_p (x
)
9250 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9251 return create_TOC_reference (x
, NULL_RTX
);
9256 /* Debug version of rs6000_legitimize_address. */
9258 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9264 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9265 insns
= get_insns ();
9271 "\nrs6000_legitimize_address: mode %s, old code %s, "
9272 "new code %s, modified\n",
9273 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9274 GET_RTX_NAME (GET_CODE (ret
)));
9276 fprintf (stderr
, "Original address:\n");
9279 fprintf (stderr
, "oldx:\n");
9282 fprintf (stderr
, "New address:\n");
9287 fprintf (stderr
, "Insns added:\n");
9288 debug_rtx_list (insns
, 20);
9294 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9295 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9306 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9307 We need to emit DTP-relative relocations. */
9309 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9311 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9316 fputs ("\t.long\t", file
);
9319 fputs (DOUBLE_INT_ASM_OP
, file
);
9324 output_addr_const (file
, x
);
9326 fputs ("@dtprel+0x8000", file
);
9327 else if (TARGET_XCOFF
&& GET_CODE (x
) == SYMBOL_REF
)
9329 switch (SYMBOL_REF_TLS_MODEL (x
))
9333 case TLS_MODEL_LOCAL_EXEC
:
9334 fputs ("@le", file
);
9336 case TLS_MODEL_INITIAL_EXEC
:
9337 fputs ("@ie", file
);
9339 case TLS_MODEL_GLOBAL_DYNAMIC
:
9340 case TLS_MODEL_LOCAL_DYNAMIC
:
9349 /* Return true if X is a symbol that refers to real (rather than emulated)
9353 rs6000_real_tls_symbol_ref_p (rtx x
)
9355 return (GET_CODE (x
) == SYMBOL_REF
9356 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9359 /* In the name of slightly smaller debug output, and to cater to
9360 general assembler lossage, recognize various UNSPEC sequences
9361 and turn them back into a direct symbol reference. */
9364 rs6000_delegitimize_address (rtx orig_x
)
9368 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9374 if (TARGET_CMODEL
!= CMODEL_SMALL
9375 && GET_CODE (y
) == LO_SUM
)
9379 if (GET_CODE (y
) == PLUS
9380 && GET_MODE (y
) == Pmode
9381 && CONST_INT_P (XEXP (y
, 1)))
9383 offset
= XEXP (y
, 1);
9387 if (GET_CODE (y
) == UNSPEC
9388 && XINT (y
, 1) == UNSPEC_TOCREL
)
9390 y
= XVECEXP (y
, 0, 0);
9393 /* Do not associate thread-local symbols with the original
9394 constant pool symbol. */
9396 && GET_CODE (y
) == SYMBOL_REF
9397 && CONSTANT_POOL_ADDRESS_P (y
)
9398 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9402 if (offset
!= NULL_RTX
)
9403 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9404 if (!MEM_P (orig_x
))
9407 return replace_equiv_address_nv (orig_x
, y
);
9411 && GET_CODE (orig_x
) == LO_SUM
9412 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9414 y
= XEXP (XEXP (orig_x
, 1), 0);
9415 if (GET_CODE (y
) == UNSPEC
9416 && XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9417 return XVECEXP (y
, 0, 0);
9423 /* Return true if X shouldn't be emitted into the debug info.
9424 The linker doesn't like .toc section references from
9425 .debug_* sections, so reject .toc section symbols. */
9428 rs6000_const_not_ok_for_debug_p (rtx x
)
9430 if (GET_CODE (x
) == SYMBOL_REF
9431 && CONSTANT_POOL_ADDRESS_P (x
))
9433 rtx c
= get_pool_constant (x
);
9434 machine_mode cmode
= get_pool_mode (x
);
9435 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9443 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9446 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9448 int icode
= INSN_CODE (insn
);
9450 /* Reject creating doloop insns. Combine should not be allowed
9451 to create these for a number of reasons:
9452 1) In a nested loop, if combine creates one of these in an
9453 outer loop and the register allocator happens to allocate ctr
9454 to the outer loop insn, then the inner loop can't use ctr.
9455 Inner loops ought to be more highly optimized.
9456 2) Combine often wants to create one of these from what was
9457 originally a three insn sequence, first combining the three
9458 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9459 allocated ctr, the splitter takes use back to the three insn
9460 sequence. It's better to stop combine at the two insn
9462 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9463 insns, the register allocator sometimes uses floating point
9464 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9465 jump insn and output reloads are not implemented for jumps,
9466 the ctrsi/ctrdi splitters need to handle all possible cases.
9467 That's a pain, and it gets to be seriously difficult when a
9468 splitter that runs after reload needs memory to transfer from
9469 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9470 for the difficult case. It's better to not create problems
9471 in the first place. */
9472 if (icode
!= CODE_FOR_nothing
9473 && (icode
== CODE_FOR_ctrsi_internal1
9474 || icode
== CODE_FOR_ctrdi_internal1
9475 || icode
== CODE_FOR_ctrsi_internal2
9476 || icode
== CODE_FOR_ctrdi_internal2
9477 || icode
== CODE_FOR_ctrsi_internal3
9478 || icode
== CODE_FOR_ctrdi_internal3
9479 || icode
== CODE_FOR_ctrsi_internal4
9480 || icode
== CODE_FOR_ctrdi_internal4
))
9486 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9488 static GTY(()) rtx rs6000_tls_symbol
;
9490 rs6000_tls_get_addr (void)
9492 if (!rs6000_tls_symbol
)
9493 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9495 return rs6000_tls_symbol
;
9498 /* Construct the SYMBOL_REF for TLS GOT references. */
9500 static GTY(()) rtx rs6000_got_symbol
;
9502 rs6000_got_sym (void)
9504 if (!rs6000_got_symbol
)
9506 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9507 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9508 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9511 return rs6000_got_symbol
;
9514 /* AIX Thread-Local Address support. */
9517 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9519 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
, tlsaddr
;
9523 name
= XSTR (addr
, 0);
9524 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9525 or the symbol will be in TLS private data section. */
9526 if (name
[strlen (name
) - 1] != ']'
9527 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr
))
9528 || bss_initializer_p (SYMBOL_REF_DECL (addr
))))
9530 tlsname
= XALLOCAVEC (char, strlen (name
) + 4);
9531 strcpy (tlsname
, name
);
9533 bss_initializer_p (SYMBOL_REF_DECL (addr
)) ? "[UL]" : "[TL]");
9534 tlsaddr
= copy_rtx (addr
);
9535 XSTR (tlsaddr
, 0) = ggc_strdup (tlsname
);
9540 /* Place addr into TOC constant pool. */
9541 sym
= force_const_mem (GET_MODE (tlsaddr
), tlsaddr
);
9543 /* Output the TOC entry and create the MEM referencing the value. */
9544 if (constant_pool_expr_p (XEXP (sym
, 0))
9545 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9547 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9548 mem
= gen_const_mem (Pmode
, tocref
);
9549 set_mem_alias_set (mem
, get_TOC_alias_set ());
9554 /* Use global-dynamic for local-dynamic. */
9555 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9556 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9558 /* Create new TOC reference for @m symbol. */
9559 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9560 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9561 strcpy (tlsname
, "*LCM");
9562 strcat (tlsname
, name
+ 3);
9563 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9564 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9565 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9566 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9567 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9569 rtx modreg
= gen_reg_rtx (Pmode
);
9570 emit_insn (gen_rtx_SET (modreg
, modmem
));
9572 tmpreg
= gen_reg_rtx (Pmode
);
9573 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9575 dest
= gen_reg_rtx (Pmode
);
9577 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9579 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9582 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9583 else if (TARGET_32BIT
)
9585 tlsreg
= gen_reg_rtx (SImode
);
9586 emit_insn (gen_tls_get_tpointer (tlsreg
));
9589 tlsreg
= gen_rtx_REG (DImode
, 13);
9591 /* Load the TOC value into temporary register. */
9592 tmpreg
= gen_reg_rtx (Pmode
);
9593 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9594 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9595 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9597 /* Add TOC symbol value to TLS pointer. */
9598 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9603 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9604 this (thread-local) address. */
9607 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9612 return rs6000_legitimize_tls_address_aix (addr
, model
);
9614 dest
= gen_reg_rtx (Pmode
);
9615 if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 16)
9621 tlsreg
= gen_rtx_REG (Pmode
, 13);
9622 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9626 tlsreg
= gen_rtx_REG (Pmode
, 2);
9627 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9631 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9635 tmp
= gen_reg_rtx (Pmode
);
9638 tlsreg
= gen_rtx_REG (Pmode
, 13);
9639 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9643 tlsreg
= gen_rtx_REG (Pmode
, 2);
9644 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9648 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9650 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9655 rtx r3
, got
, tga
, tmp1
, tmp2
, call_insn
;
9657 /* We currently use relocations like @got@tlsgd for tls, which
9658 means the linker will handle allocation of tls entries, placing
9659 them in the .got section. So use a pointer to the .got section,
9660 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9661 or to secondary GOT sections used by 32-bit -fPIC. */
9663 got
= gen_rtx_REG (Pmode
, 2);
9667 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9670 rtx gsym
= rs6000_got_sym ();
9671 got
= gen_reg_rtx (Pmode
);
9673 rs6000_emit_move (got
, gsym
, Pmode
);
9678 tmp1
= gen_reg_rtx (Pmode
);
9679 tmp2
= gen_reg_rtx (Pmode
);
9680 mem
= gen_const_mem (Pmode
, tmp1
);
9681 lab
= gen_label_rtx ();
9682 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9683 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9684 if (TARGET_LINK_STACK
)
9685 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9686 emit_move_insn (tmp2
, mem
);
9687 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9688 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9693 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9695 tga
= rs6000_tls_get_addr ();
9696 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
,
9697 1, const0_rtx
, Pmode
);
9699 r3
= gen_rtx_REG (Pmode
, 3);
9700 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9703 insn
= gen_tls_gd_aix64 (r3
, got
, addr
, tga
, const0_rtx
);
9705 insn
= gen_tls_gd_aix32 (r3
, got
, addr
, tga
, const0_rtx
);
9707 else if (DEFAULT_ABI
== ABI_V4
)
9708 insn
= gen_tls_gd_sysvsi (r3
, got
, addr
, tga
, const0_rtx
);
9711 call_insn
= last_call_insn ();
9712 PATTERN (call_insn
) = insn
;
9713 if (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
9714 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
),
9715 pic_offset_table_rtx
);
9717 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9719 tga
= rs6000_tls_get_addr ();
9720 tmp1
= gen_reg_rtx (Pmode
);
9721 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
,
9722 1, const0_rtx
, Pmode
);
9724 r3
= gen_rtx_REG (Pmode
, 3);
9725 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9728 insn
= gen_tls_ld_aix64 (r3
, got
, tga
, const0_rtx
);
9730 insn
= gen_tls_ld_aix32 (r3
, got
, tga
, const0_rtx
);
9732 else if (DEFAULT_ABI
== ABI_V4
)
9733 insn
= gen_tls_ld_sysvsi (r3
, got
, tga
, const0_rtx
);
9736 call_insn
= last_call_insn ();
9737 PATTERN (call_insn
) = insn
;
9738 if (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
9739 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
),
9740 pic_offset_table_rtx
);
9742 if (rs6000_tls_size
== 16)
9745 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9747 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9749 else if (rs6000_tls_size
== 32)
9751 tmp2
= gen_reg_rtx (Pmode
);
9753 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9755 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9758 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9760 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9764 tmp2
= gen_reg_rtx (Pmode
);
9766 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9768 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9770 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9776 /* IE, or 64-bit offset LE. */
9777 tmp2
= gen_reg_rtx (Pmode
);
9779 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9781 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9784 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9786 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9794 /* Only create the global variable for the stack protect guard if we are using
9795 the global flavor of that guard. */
9797 rs6000_init_stack_protect_guard (void)
9799 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9800 return default_stack_protect_guard ();
9805 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9808 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9810 if (GET_CODE (x
) == HIGH
9811 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
9814 /* A TLS symbol in the TOC cannot contain a sum. */
9815 if (GET_CODE (x
) == CONST
9816 && GET_CODE (XEXP (x
, 0)) == PLUS
9817 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
9818 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9821 /* Do not place an ELF TLS symbol in the constant pool. */
9822 return TARGET_ELF
&& tls_referenced_p (x
);
9825 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9826 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9827 can be addressed relative to the toc pointer. */
9830 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9832 return ((constant_pool_expr_p (sym
)
9833 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9834 get_pool_mode (sym
)))
9835 || (TARGET_CMODEL
== CMODEL_MEDIUM
9836 && SYMBOL_REF_LOCAL_P (sym
)
9837 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9840 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9841 replace the input X, or the original X if no replacement is called for.
9842 The output parameter *WIN is 1 if the calling macro should goto WIN,
9845 For RS/6000, we wish to handle large displacements off a base
9846 register by splitting the addend across an addiu/addis and the mem insn.
9847 This cuts number of extra insns needed from 3 to 1.
9849 On Darwin, we use this to generate code for floating point constants.
9850 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9851 The Darwin code is inside #if TARGET_MACHO because only then are the
9852 machopic_* functions defined. */
9854 rs6000_legitimize_reload_address (rtx x
, machine_mode mode
,
9855 int opnum
, int type
,
9856 int ind_levels ATTRIBUTE_UNUSED
, int *win
)
9858 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9859 bool quad_offset_p
= mode_supports_vsx_dform_quad (mode
);
9861 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9862 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9865 && ((mode
== DFmode
&& recog_data
.operand_mode
[0] == V2DFmode
)
9866 || (mode
== DImode
&& recog_data
.operand_mode
[0] == V2DImode
)
9867 || (mode
== SFmode
&& recog_data
.operand_mode
[0] == V4SFmode
9868 && TARGET_P9_VECTOR
)
9869 || (mode
== SImode
&& recog_data
.operand_mode
[0] == V4SImode
9870 && TARGET_P9_VECTOR
)))
9871 reg_offset_p
= false;
9873 /* We must recognize output that we have already generated ourselves. */
9874 if (GET_CODE (x
) == PLUS
9875 && GET_CODE (XEXP (x
, 0)) == PLUS
9876 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
9877 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
9878 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
9880 if (TARGET_DEBUG_ADDR
)
9882 fprintf (stderr
, "\nlegitimize_reload_address push_reload #1:\n");
9885 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9886 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
9887 opnum
, (enum reload_type
) type
);
9892 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9893 if (GET_CODE (x
) == LO_SUM
9894 && GET_CODE (XEXP (x
, 0)) == HIGH
)
9896 if (TARGET_DEBUG_ADDR
)
9898 fprintf (stderr
, "\nlegitimize_reload_address push_reload #2:\n");
9901 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9902 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9903 opnum
, (enum reload_type
) type
);
9909 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
9910 && GET_CODE (x
) == LO_SUM
9911 && GET_CODE (XEXP (x
, 0)) == PLUS
9912 && XEXP (XEXP (x
, 0), 0) == pic_offset_table_rtx
9913 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == HIGH
9914 && XEXP (XEXP (XEXP (x
, 0), 1), 0) == XEXP (x
, 1)
9915 && machopic_operand_p (XEXP (x
, 1)))
9917 /* Result of previous invocation of this function on Darwin
9918 floating point constant. */
9919 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9920 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9921 opnum
, (enum reload_type
) type
);
9927 if (TARGET_CMODEL
!= CMODEL_SMALL
9930 && small_toc_ref (x
, VOIDmode
))
9932 rtx hi
= gen_rtx_HIGH (Pmode
, copy_rtx (x
));
9933 x
= gen_rtx_LO_SUM (Pmode
, hi
, x
);
9934 if (TARGET_DEBUG_ADDR
)
9936 fprintf (stderr
, "\nlegitimize_reload_address push_reload #3:\n");
9939 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9940 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9941 opnum
, (enum reload_type
) type
);
9946 if (GET_CODE (x
) == PLUS
9947 && REG_P (XEXP (x
, 0))
9948 && REGNO (XEXP (x
, 0)) < FIRST_PSEUDO_REGISTER
9949 && INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 1)
9950 && CONST_INT_P (XEXP (x
, 1))
9952 && !SPE_VECTOR_MODE (mode
)
9953 && !(TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
9954 && (quad_offset_p
|| !VECTOR_MODE_P (mode
) || VECTOR_MEM_NONE_P (mode
)))
9956 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
9957 HOST_WIDE_INT low
= ((val
& 0xffff) ^ 0x8000) - 0x8000;
9959 = (((val
- low
) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9961 /* Check for 32-bit overflow or quad addresses with one of the
9962 four least significant bits set. */
9963 if (high
+ low
!= val
9964 || (quad_offset_p
&& (low
& 0xf)))
9970 /* Reload the high part into a base reg; leave the low part
9971 in the mem directly. */
9973 x
= gen_rtx_PLUS (GET_MODE (x
),
9974 gen_rtx_PLUS (GET_MODE (x
), XEXP (x
, 0),
9978 if (TARGET_DEBUG_ADDR
)
9980 fprintf (stderr
, "\nlegitimize_reload_address push_reload #4:\n");
9983 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9984 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
9985 opnum
, (enum reload_type
) type
);
9990 if (GET_CODE (x
) == SYMBOL_REF
9993 && (!VECTOR_MODE_P (mode
) || VECTOR_MEM_NONE_P (mode
))
9994 && !SPE_VECTOR_MODE (mode
)
9996 && DEFAULT_ABI
== ABI_DARWIN
9997 && (flag_pic
|| MACHO_DYNAMIC_NO_PIC_P
)
9998 && machopic_symbol_defined_p (x
)
10000 && DEFAULT_ABI
== ABI_V4
10003 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
10004 The same goes for DImode without 64-bit gprs and DFmode and DDmode
10006 ??? Assume floating point reg based on mode? This assumption is
10007 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
10008 where reload ends up doing a DFmode load of a constant from
10009 mem using two gprs. Unfortunately, at this point reload
10010 hasn't yet selected regs so poking around in reload data
10011 won't help and even if we could figure out the regs reliably,
10012 we'd still want to allow this transformation when the mem is
10013 naturally aligned. Since we say the address is good here, we
10014 can't disable offsets from LO_SUMs in mem_operand_gpr.
10015 FIXME: Allow offset from lo_sum for other modes too, when
10016 mem is sufficiently aligned.
10018 Also disallow this if the type can go in VMX/Altivec registers, since
10019 those registers do not have d-form (reg+offset) address modes. */
10020 && !reg_addr
[mode
].scalar_in_vmx_p
10025 && (mode
!= TImode
|| !TARGET_VSX_TIMODE
)
10027 && (mode
!= DImode
|| TARGET_POWERPC64
)
10028 && ((mode
!= DFmode
&& mode
!= DDmode
) || TARGET_POWERPC64
10029 || (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)))
10034 rtx offset
= machopic_gen_offset (x
);
10035 x
= gen_rtx_LO_SUM (GET_MODE (x
),
10036 gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
10037 gen_rtx_HIGH (Pmode
, offset
)), offset
);
10041 x
= gen_rtx_LO_SUM (GET_MODE (x
),
10042 gen_rtx_HIGH (Pmode
, x
), x
);
10044 if (TARGET_DEBUG_ADDR
)
10046 fprintf (stderr
, "\nlegitimize_reload_address push_reload #5:\n");
10049 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
10050 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10051 opnum
, (enum reload_type
) type
);
10056 /* Reload an offset address wrapped by an AND that represents the
10057 masking of the lower bits. Strip the outer AND and let reload
10058 convert the offset address into an indirect address. For VSX,
10059 force reload to create the address with an AND in a separate
10060 register, because we can't guarantee an altivec register will
10062 if (VECTOR_MEM_ALTIVEC_P (mode
)
10063 && GET_CODE (x
) == AND
10064 && GET_CODE (XEXP (x
, 0)) == PLUS
10065 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
10066 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
10067 && GET_CODE (XEXP (x
, 1)) == CONST_INT
10068 && INTVAL (XEXP (x
, 1)) == -16)
10078 && GET_CODE (x
) == SYMBOL_REF
10079 && use_toc_relative_ref (x
, mode
))
10081 x
= create_TOC_reference (x
, NULL_RTX
);
10082 if (TARGET_CMODEL
!= CMODEL_SMALL
)
10084 if (TARGET_DEBUG_ADDR
)
10086 fprintf (stderr
, "\nlegitimize_reload_address push_reload #6:\n");
10089 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
10090 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10091 opnum
, (enum reload_type
) type
);
10100 /* Debug version of rs6000_legitimize_reload_address. */
10102 rs6000_debug_legitimize_reload_address (rtx x
, machine_mode mode
,
10103 int opnum
, int type
,
10104 int ind_levels
, int *win
)
10106 rtx ret
= rs6000_legitimize_reload_address (x
, mode
, opnum
, type
,
10109 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
10110 "type = %d, ind_levels = %d, win = %d, original addr:\n",
10111 GET_MODE_NAME (mode
), opnum
, type
, ind_levels
, *win
);
10115 fprintf (stderr
, "Same address returned\n");
10117 fprintf (stderr
, "NULL returned\n");
10120 fprintf (stderr
, "New address:\n");
10127 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
10128 that is a valid memory address for an instruction.
10129 The MODE argument is the machine mode for the MEM expression
10130 that wants to use this address.
10132 On the RS/6000, there are four valid address: a SYMBOL_REF that
10133 refers to a constant pool entry of an address (or the sum of it
10134 plus a constant), a short (16-bit signed) constant plus a register,
10135 the sum of two registers, or a register indirect, possibly with an
10136 auto-increment. For DFmode, DDmode and DImode with a constant plus
10137 register, we must ensure that both words are addressable or PowerPC64
10138 with offset word aligned.
10140 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10141 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10142 because adjacent memory cells are accessed by adding word-sized offsets
10143 during assembly output. */
10145 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
10147 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
10148 bool quad_offset_p
= mode_supports_vsx_dform_quad (mode
);
10150 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
10151 if (VECTOR_MEM_ALTIVEC_P (mode
)
10152 && GET_CODE (x
) == AND
10153 && GET_CODE (XEXP (x
, 1)) == CONST_INT
10154 && INTVAL (XEXP (x
, 1)) == -16)
10157 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
10159 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
10162 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
10163 && mode_supports_pre_incdec_p (mode
)
10164 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
10166 /* Handle restricted vector d-form offsets in ISA 3.0. */
10169 if (quad_address_p (x
, mode
, reg_ok_strict
))
10172 else if (virtual_stack_registers_memory_p (x
))
10175 else if (reg_offset_p
)
10177 if (legitimate_small_data_p (mode
, x
))
10179 if (legitimate_constant_pool_address_p (x
, mode
,
10180 reg_ok_strict
|| lra_in_progress
))
10182 if (reg_addr
[mode
].fused_toc
&& GET_CODE (x
) == UNSPEC
10183 && XINT (x
, 1) == UNSPEC_FUSION_ADDIS
)
10187 /* For TImode, if we have TImode in VSX registers, only allow register
10188 indirect addresses. This will allow the values to go in either GPRs
10189 or VSX registers without reloading. The vector types would tend to
10190 go into VSX registers, so we allow REG+REG, while TImode seems
10191 somewhat split, in that some uses are GPR based, and some VSX based. */
10192 /* FIXME: We could loosen this by changing the following to
10193 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10194 but currently we cannot allow REG+REG addressing for TImode. See
10195 PR72827 for complete details on how this ends up hoodwinking DSE. */
10196 if (mode
== TImode
&& TARGET_VSX_TIMODE
)
10198 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10199 if (! reg_ok_strict
10201 && GET_CODE (x
) == PLUS
10202 && GET_CODE (XEXP (x
, 0)) == REG
10203 && (XEXP (x
, 0) == virtual_stack_vars_rtx
10204 || XEXP (x
, 0) == arg_pointer_rtx
)
10205 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
10207 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
10209 if (!FLOAT128_2REG_P (mode
)
10210 && ((TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
10211 || TARGET_POWERPC64
10212 || (mode
!= DFmode
&& mode
!= DDmode
)
10213 || (TARGET_E500_DOUBLE
&& mode
!= DDmode
))
10214 && (TARGET_POWERPC64
|| mode
!= DImode
)
10215 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
10217 && !avoiding_indexed_address_p (mode
)
10218 && legitimate_indexed_address_p (x
, reg_ok_strict
))
10220 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
10221 && mode_supports_pre_modify_p (mode
)
10222 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
10223 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
10224 reg_ok_strict
, false)
10225 || (!avoiding_indexed_address_p (mode
)
10226 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
10227 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
10229 if (reg_offset_p
&& !quad_offset_p
10230 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
10235 /* Debug version of rs6000_legitimate_address_p. */
10237 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
10238 bool reg_ok_strict
)
10240 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
10242 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10243 "strict = %d, reload = %s, code = %s\n",
10244 ret
? "true" : "false",
10245 GET_MODE_NAME (mode
),
10249 : (reload_in_progress
? "progress" : "before")),
10250 GET_RTX_NAME (GET_CODE (x
)));
10256 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10259 rs6000_mode_dependent_address_p (const_rtx addr
,
10260 addr_space_t as ATTRIBUTE_UNUSED
)
10262 return rs6000_mode_dependent_address_ptr (addr
);
10265 /* Go to LABEL if ADDR (a legitimate address expression)
10266 has an effect that depends on the machine mode it is used for.
10268 On the RS/6000 this is true of all integral offsets (since AltiVec
10269 and VSX modes don't allow them) or is a pre-increment or decrement.
10271 ??? Except that due to conceptual problems in offsettable_address_p
10272 we can't really report the problems of integral offsets. So leave
10273 this assuming that the adjustable offset must be valid for the
10274 sub-words of a TFmode operand, which is what we had before. */
10277 rs6000_mode_dependent_address (const_rtx addr
)
10279 switch (GET_CODE (addr
))
10282 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10283 is considered a legitimate address before reload, so there
10284 are no offset restrictions in that case. Note that this
10285 condition is safe in strict mode because any address involving
10286 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10287 been rejected as illegitimate. */
10288 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10289 && XEXP (addr
, 0) != arg_pointer_rtx
10290 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
)
10292 unsigned HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10293 return val
+ 0x8000 >= 0x10000 - (TARGET_POWERPC64
? 8 : 12);
10298 /* Anything in the constant pool is sufficiently aligned that
10299 all bytes have the same high part address. */
10300 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10302 /* Auto-increment cases are now treated generically in recog.c. */
10304 return TARGET_UPDATE
;
10306 /* AND is only allowed in Altivec loads. */
10317 /* Debug version of rs6000_mode_dependent_address. */
10319 rs6000_debug_mode_dependent_address (const_rtx addr
)
10321 bool ret
= rs6000_mode_dependent_address (addr
);
10323 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10324 ret
? "true" : "false");
10330 /* Implement FIND_BASE_TERM. */
10333 rs6000_find_base_term (rtx op
)
10338 if (GET_CODE (base
) == CONST
)
10339 base
= XEXP (base
, 0);
10340 if (GET_CODE (base
) == PLUS
)
10341 base
= XEXP (base
, 0);
10342 if (GET_CODE (base
) == UNSPEC
)
10343 switch (XINT (base
, 1))
10345 case UNSPEC_TOCREL
:
10346 case UNSPEC_MACHOPIC_OFFSET
:
10347 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10348 for aliasing purposes. */
10349 return XVECEXP (base
, 0, 0);
10355 /* More elaborate version of recog's offsettable_memref_p predicate
10356 that works around the ??? note of rs6000_mode_dependent_address.
10357 In particular it accepts
10359 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10361 in 32-bit mode, that the recog predicate rejects. */
10364 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
)
10371 /* First mimic offsettable_memref_p. */
10372 if (offsettable_address_p (true, GET_MODE (op
), XEXP (op
, 0)))
10375 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10376 the latter predicate knows nothing about the mode of the memory
10377 reference and, therefore, assumes that it is the largest supported
10378 mode (TFmode). As a consequence, legitimate offsettable memory
10379 references are rejected. rs6000_legitimate_offset_address_p contains
10380 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10381 at least with a little bit of help here given that we know the
10382 actual registers used. */
10383 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10384 || GET_MODE_SIZE (reg_mode
) == 4);
10385 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10389 /* Determine the reassociation width to be used in reassociate_bb.
10390 This takes into account how many parallel operations we
10391 can actually do of a given type, and also the latency.
10393 int add/sub 6/cycle
10395 vect add/sub/mul 2/cycle
10396 fp add/sub/mul 2/cycle
10401 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10404 switch (rs6000_cpu
)
10406 case PROCESSOR_POWER8
:
10407 case PROCESSOR_POWER9
:
10408 if (DECIMAL_FLOAT_MODE_P (mode
))
10410 if (VECTOR_MODE_P (mode
))
10412 if (INTEGRAL_MODE_P (mode
))
10413 return opc
== MULT_EXPR
? 4 : 6;
10414 if (FLOAT_MODE_P (mode
))
10423 /* Change register usage conditional on target flags. */
10425 rs6000_conditional_register_usage (void)
10429 if (TARGET_DEBUG_TARGET
)
10430 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10432 /* Set MQ register fixed (already call_used) so that it will not be
10434 fixed_regs
[64] = 1;
10436 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10438 fixed_regs
[13] = call_used_regs
[13]
10439 = call_really_used_regs
[13] = 1;
10441 /* Conditionally disable FPRs. */
10442 if (TARGET_SOFT_FLOAT
|| !TARGET_FPRS
)
10443 for (i
= 32; i
< 64; i
++)
10444 fixed_regs
[i
] = call_used_regs
[i
]
10445 = call_really_used_regs
[i
] = 1;
10447 /* The TOC register is not killed across calls in a way that is
10448 visible to the compiler. */
10449 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10450 call_really_used_regs
[2] = 0;
10452 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10453 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10455 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10456 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10457 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10458 = call_really_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10460 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10461 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10462 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10463 = call_really_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10465 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10466 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10467 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10471 global_regs
[SPEFSCR_REGNO
] = 1;
10472 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10473 registers in prologues and epilogues. We no longer use r14
10474 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10475 pool for link-compatibility with older versions of GCC. Once
10476 "old" code has died out, we can return r14 to the allocation
10479 = call_used_regs
[14]
10480 = call_really_used_regs
[14] = 1;
10483 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10485 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10486 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10487 call_really_used_regs
[VRSAVE_REGNO
] = 1;
10490 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10491 global_regs
[VSCR_REGNO
] = 1;
10493 if (TARGET_ALTIVEC_ABI
)
10495 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10496 call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10498 /* AIX reserves VR20:31 in non-extended ABI mode. */
10500 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10501 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10506 /* Output insns to set DEST equal to the constant SOURCE as a series of
10507 lis, ori and shl instructions and return TRUE. */
10510 rs6000_emit_set_const (rtx dest
, rtx source
)
10512 machine_mode mode
= GET_MODE (dest
);
10517 gcc_checking_assert (CONST_INT_P (source
));
10518 c
= INTVAL (source
);
10523 emit_insn (gen_rtx_SET (dest
, source
));
10527 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10529 emit_insn (gen_rtx_SET (copy_rtx (temp
),
10530 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10531 emit_insn (gen_rtx_SET (dest
,
10532 gen_rtx_IOR (SImode
, copy_rtx (temp
),
10533 GEN_INT (c
& 0xffff))));
10537 if (!TARGET_POWERPC64
)
10541 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
10543 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
10545 emit_move_insn (hi
, GEN_INT (c
>> 32));
10546 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
10547 emit_move_insn (lo
, GEN_INT (c
));
10550 rs6000_emit_set_long_const (dest
, c
);
10554 gcc_unreachable ();
10557 insn
= get_last_insn ();
10558 set
= single_set (insn
);
10559 if (! CONSTANT_P (SET_SRC (set
)))
10560 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10565 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10566 Output insns to set DEST equal to the constant C as a series of
10567 lis, ori and shl instructions. */
10570 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10573 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10583 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10584 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10585 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
10587 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10588 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10590 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10592 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10593 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10595 emit_move_insn (dest
,
10596 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10599 else if (ud3
== 0 && ud4
== 0)
10601 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10603 gcc_assert (ud2
& 0x8000);
10604 emit_move_insn (copy_rtx (temp
),
10605 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10607 emit_move_insn (copy_rtx (temp
),
10608 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10610 emit_move_insn (dest
,
10611 gen_rtx_ZERO_EXTEND (DImode
,
10612 gen_lowpart (SImode
,
10613 copy_rtx (temp
))));
10615 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10616 || (ud4
== 0 && ! (ud3
& 0x8000)))
10618 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10620 emit_move_insn (copy_rtx (temp
),
10621 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
10623 emit_move_insn (copy_rtx (temp
),
10624 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10626 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10627 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10630 emit_move_insn (dest
,
10631 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10636 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10638 emit_move_insn (copy_rtx (temp
),
10639 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
10641 emit_move_insn (copy_rtx (temp
),
10642 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10645 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
10646 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10649 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10650 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10651 GEN_INT (ud2
<< 16)));
10653 emit_move_insn (dest
,
10654 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10659 /* Helper for the following. Get rid of [r+r] memory refs
10660 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10663 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10665 if (reload_in_progress
)
10668 if (GET_CODE (operands
[0]) == MEM
10669 && GET_CODE (XEXP (operands
[0], 0)) != REG
10670 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10671 GET_MODE (operands
[0]), false))
10673 = replace_equiv_address (operands
[0],
10674 copy_addr_to_reg (XEXP (operands
[0], 0)));
10676 if (GET_CODE (operands
[1]) == MEM
10677 && GET_CODE (XEXP (operands
[1], 0)) != REG
10678 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10679 GET_MODE (operands
[1]), false))
10681 = replace_equiv_address (operands
[1],
10682 copy_addr_to_reg (XEXP (operands
[1], 0)));
10685 /* Generate a vector of constants to permute MODE for a little-endian
10686 storage operation by swapping the two halves of a vector. */
10688 rs6000_const_vec (machine_mode mode
)
10716 v
= rtvec_alloc (subparts
);
10718 for (i
= 0; i
< subparts
/ 2; ++i
)
10719 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10720 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10721 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10726 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10727 for a VSX load or store operation. */
10729 rs6000_gen_le_vsx_permute (rtx source
, machine_mode mode
)
10731 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10732 128-bit integers if they are allowed in VSX registers. */
10733 if (FLOAT128_VECTOR_P (mode
) || mode
== TImode
|| mode
== V1TImode
)
10734 return gen_rtx_ROTATE (mode
, source
, GEN_INT (64));
10737 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10738 return gen_rtx_VEC_SELECT (mode
, source
, par
);
10742 /* Emit a little-endian load from vector memory location SOURCE to VSX
10743 register DEST in mode MODE. The load is done with two permuting
10744 insn's that represent an lxvd2x and xxpermdi. */
10746 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10748 rtx tmp
, permute_mem
, permute_reg
;
10750 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10752 if (mode
== TImode
|| mode
== V1TImode
)
10755 dest
= gen_lowpart (V2DImode
, dest
);
10756 source
= adjust_address (source
, V2DImode
, 0);
10759 tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10760 permute_mem
= rs6000_gen_le_vsx_permute (source
, mode
);
10761 permute_reg
= rs6000_gen_le_vsx_permute (tmp
, mode
);
10762 emit_insn (gen_rtx_SET (tmp
, permute_mem
));
10763 emit_insn (gen_rtx_SET (dest
, permute_reg
));
10766 /* Emit a little-endian store to vector memory location DEST from VSX
10767 register SOURCE in mode MODE. The store is done with two permuting
10768 insn's that represent an xxpermdi and an stxvd2x. */
10770 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10772 rtx tmp
, permute_src
, permute_tmp
;
10774 /* This should never be called during or after reload, because it does
10775 not re-permute the source register. It is intended only for use
10777 gcc_assert (!reload_in_progress
&& !lra_in_progress
&& !reload_completed
);
10779 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10781 if (mode
== TImode
|| mode
== V1TImode
)
10784 dest
= adjust_address (dest
, V2DImode
, 0);
10785 source
= gen_lowpart (V2DImode
, source
);
10788 tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source
) : source
;
10789 permute_src
= rs6000_gen_le_vsx_permute (source
, mode
);
10790 permute_tmp
= rs6000_gen_le_vsx_permute (tmp
, mode
);
10791 emit_insn (gen_rtx_SET (tmp
, permute_src
));
10792 emit_insn (gen_rtx_SET (dest
, permute_tmp
));
10795 /* Emit a sequence representing a little-endian VSX load or store,
10796 moving data from SOURCE to DEST in mode MODE. This is done
10797 separately from rs6000_emit_move to ensure it is called only
10798 during expand. LE VSX loads and stores introduced later are
10799 handled with a split. The expand-time RTL generation allows
10800 us to optimize away redundant pairs of register-permutes. */
10802 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10804 gcc_assert (!BYTES_BIG_ENDIAN
10805 && VECTOR_MEM_VSX_P (mode
)
10806 && !TARGET_P9_VECTOR
10807 && !gpr_or_gpr_p (dest
, source
)
10808 && (MEM_P (source
) ^ MEM_P (dest
)));
10810 if (MEM_P (source
))
10812 gcc_assert (REG_P (dest
) || GET_CODE (dest
) == SUBREG
);
10813 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10817 if (!REG_P (source
))
10818 source
= force_reg (mode
, source
);
10819 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10823 /* Return whether a SFmode or SImode move can be done without converting one
10824 mode to another. This arrises when we have:
10826 (SUBREG:SF (REG:SI ...))
10827 (SUBREG:SI (REG:SF ...))
10829 and one of the values is in a floating point/vector register, where SFmode
10830 scalars are stored in DFmode format. */
10833 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10835 if (TARGET_ALLOW_SF_SUBREG
)
10838 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10841 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10844 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10845 if (SUBREG_P (dest
))
10847 rtx dest_subreg
= SUBREG_REG (dest
);
10848 rtx src_subreg
= SUBREG_REG (src
);
10849 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10856 /* Helper function to change moves with:
10858 (SUBREG:SF (REG:SI)) and
10859 (SUBREG:SI (REG:SF))
10861 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10862 values are stored as DFmode values in the VSX registers. We need to convert
10863 the bits before we can use a direct move or operate on the bits in the
10864 vector register as an integer type.
10866 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10869 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10871 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_in_progress
&& !reload_completed
10872 && !lra_in_progress
10873 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10874 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10876 rtx inner_source
= SUBREG_REG (source
);
10877 machine_mode inner_mode
= GET_MODE (inner_source
);
10879 if (mode
== SImode
&& inner_mode
== SFmode
)
10881 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10885 if (mode
== SFmode
&& inner_mode
== SImode
)
10887 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10895 /* Emit a move from SOURCE to DEST in mode MODE. */
10897 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10900 operands
[0] = dest
;
10901 operands
[1] = source
;
10903 if (TARGET_DEBUG_ADDR
)
10906 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10907 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10908 GET_MODE_NAME (mode
),
10909 reload_in_progress
,
10911 can_create_pseudo_p ());
10913 fprintf (stderr
, "source:\n");
10914 debug_rtx (source
);
10917 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10918 if (CONST_WIDE_INT_P (operands
[1])
10919 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10921 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10922 gcc_unreachable ();
10925 /* See if we need to special case SImode/SFmode SUBREG moves. */
10926 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10927 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10930 /* Check if GCC is setting up a block move that will end up using FP
10931 registers as temporaries. We must make sure this is acceptable. */
10932 if (GET_CODE (operands
[0]) == MEM
10933 && GET_CODE (operands
[1]) == MEM
10935 && (SLOW_UNALIGNED_ACCESS (DImode
, MEM_ALIGN (operands
[0]))
10936 || SLOW_UNALIGNED_ACCESS (DImode
, MEM_ALIGN (operands
[1])))
10937 && ! (SLOW_UNALIGNED_ACCESS (SImode
, (MEM_ALIGN (operands
[0]) > 32
10938 ? 32 : MEM_ALIGN (operands
[0])))
10939 || SLOW_UNALIGNED_ACCESS (SImode
, (MEM_ALIGN (operands
[1]) > 32
10941 : MEM_ALIGN (operands
[1]))))
10942 && ! MEM_VOLATILE_P (operands
[0])
10943 && ! MEM_VOLATILE_P (operands
[1]))
10945 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10946 adjust_address (operands
[1], SImode
, 0));
10947 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10948 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10952 if (can_create_pseudo_p () && GET_CODE (operands
[0]) == MEM
10953 && !gpc_reg_operand (operands
[1], mode
))
10954 operands
[1] = force_reg (mode
, operands
[1]);
10956 /* Recognize the case where operand[1] is a reference to thread-local
10957 data and load its address to a register. */
10958 if (tls_referenced_p (operands
[1]))
10960 enum tls_model model
;
10961 rtx tmp
= operands
[1];
10964 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10966 addend
= XEXP (XEXP (tmp
, 0), 1);
10967 tmp
= XEXP (XEXP (tmp
, 0), 0);
10970 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
10971 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10972 gcc_assert (model
!= 0);
10974 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10977 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10978 tmp
= force_operand (tmp
, operands
[0]);
10983 /* Handle the case where reload calls us with an invalid address. */
10984 if (reload_in_progress
&& mode
== Pmode
10985 && (! general_operand (operands
[1], mode
)
10986 || ! nonimmediate_operand (operands
[0], mode
)))
10989 /* 128-bit constant floating-point values on Darwin should really be loaded
10990 as two parts. However, this premature splitting is a problem when DFmode
10991 values can go into Altivec registers. */
10992 if (FLOAT128_IBM_P (mode
) && !reg_addr
[DFmode
].scalar_in_vmx_p
10993 && GET_CODE (operands
[1]) == CONST_DOUBLE
)
10995 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10996 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10998 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10999 GET_MODE_SIZE (DFmode
)),
11000 simplify_gen_subreg (DFmode
, operands
[1], mode
,
11001 GET_MODE_SIZE (DFmode
)),
11006 if (reload_in_progress
&& cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
)
11007 cfun
->machine
->sdmode_stack_slot
=
11008 eliminate_regs (cfun
->machine
->sdmode_stack_slot
, VOIDmode
, NULL_RTX
);
11011 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11012 p1:SD) if p1 is not of floating point class and p0 is spilled as
11013 we can have no analogous movsd_store for this. */
11014 if (lra_in_progress
&& mode
== DDmode
11015 && REG_P (operands
[0]) && REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
11016 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11017 && GET_CODE (operands
[1]) == SUBREG
&& REG_P (SUBREG_REG (operands
[1]))
11018 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
11021 int regno
= REGNO (SUBREG_REG (operands
[1]));
11023 if (regno
>= FIRST_PSEUDO_REGISTER
)
11025 cl
= reg_preferred_class (regno
);
11026 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
11028 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11031 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
11032 operands
[1] = SUBREG_REG (operands
[1]);
11035 if (lra_in_progress
11037 && REG_P (operands
[0]) && REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
11038 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11039 && (REG_P (operands
[1])
11040 || (GET_CODE (operands
[1]) == SUBREG
11041 && REG_P (SUBREG_REG (operands
[1])))))
11043 int regno
= REGNO (GET_CODE (operands
[1]) == SUBREG
11044 ? SUBREG_REG (operands
[1]) : operands
[1]);
11047 if (regno
>= FIRST_PSEUDO_REGISTER
)
11049 cl
= reg_preferred_class (regno
);
11050 gcc_assert (cl
!= NO_REGS
);
11051 regno
= ira_class_hard_regs
[cl
][0];
11053 if (FP_REGNO_P (regno
))
11055 if (GET_MODE (operands
[0]) != DDmode
)
11056 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
11057 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
11059 else if (INT_REGNO_P (regno
))
11060 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11065 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11066 p:DD)) if p0 is not of floating point class and p1 is spilled as
11067 we can have no analogous movsd_load for this. */
11068 if (lra_in_progress
&& mode
== DDmode
11069 && GET_CODE (operands
[0]) == SUBREG
&& REG_P (SUBREG_REG (operands
[0]))
11070 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
11071 && REG_P (operands
[1]) && REGNO (operands
[1]) >= FIRST_PSEUDO_REGISTER
11072 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11075 int regno
= REGNO (SUBREG_REG (operands
[0]));
11077 if (regno
>= FIRST_PSEUDO_REGISTER
)
11079 cl
= reg_preferred_class (regno
);
11080 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
11082 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11085 operands
[0] = SUBREG_REG (operands
[0]);
11086 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
11089 if (lra_in_progress
11091 && (REG_P (operands
[0])
11092 || (GET_CODE (operands
[0]) == SUBREG
11093 && REG_P (SUBREG_REG (operands
[0]))))
11094 && REG_P (operands
[1]) && REGNO (operands
[1]) >= FIRST_PSEUDO_REGISTER
11095 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11097 int regno
= REGNO (GET_CODE (operands
[0]) == SUBREG
11098 ? SUBREG_REG (operands
[0]) : operands
[0]);
11101 if (regno
>= FIRST_PSEUDO_REGISTER
)
11103 cl
= reg_preferred_class (regno
);
11104 gcc_assert (cl
!= NO_REGS
);
11105 regno
= ira_class_hard_regs
[cl
][0];
11107 if (FP_REGNO_P (regno
))
11109 if (GET_MODE (operands
[1]) != DDmode
)
11110 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
11111 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
11113 else if (INT_REGNO_P (regno
))
11114 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11120 if (reload_in_progress
11122 && cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
11123 && MEM_P (operands
[0])
11124 && rtx_equal_p (operands
[0], cfun
->machine
->sdmode_stack_slot
)
11125 && REG_P (operands
[1]))
11127 if (FP_REGNO_P (REGNO (operands
[1])))
11129 rtx mem
= adjust_address_nv (operands
[0], DDmode
, 0);
11130 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11131 emit_insn (gen_movsd_store (mem
, operands
[1]));
11133 else if (INT_REGNO_P (REGNO (operands
[1])))
11135 rtx mem
= operands
[0];
11136 if (BYTES_BIG_ENDIAN
)
11137 mem
= adjust_address_nv (mem
, mode
, 4);
11138 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11139 emit_insn (gen_movsd_hardfloat (mem
, operands
[1]));
11145 if (reload_in_progress
11147 && REG_P (operands
[0])
11148 && MEM_P (operands
[1])
11149 && cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
11150 && rtx_equal_p (operands
[1], cfun
->machine
->sdmode_stack_slot
))
11152 if (FP_REGNO_P (REGNO (operands
[0])))
11154 rtx mem
= adjust_address_nv (operands
[1], DDmode
, 0);
11155 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11156 emit_insn (gen_movsd_load (operands
[0], mem
));
11158 else if (INT_REGNO_P (REGNO (operands
[0])))
11160 rtx mem
= operands
[1];
11161 if (BYTES_BIG_ENDIAN
)
11162 mem
= adjust_address_nv (mem
, mode
, 4);
11163 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11164 emit_insn (gen_movsd_hardfloat (operands
[0], mem
));
11171 /* FIXME: In the long term, this switch statement should go away
11172 and be replaced by a sequence of tests based on things like
11178 if (CONSTANT_P (operands
[1])
11179 && GET_CODE (operands
[1]) != CONST_INT
)
11180 operands
[1] = force_const_mem (mode
, operands
[1]);
11187 if (FLOAT128_2REG_P (mode
))
11188 rs6000_eliminate_indexed_memrefs (operands
);
11195 if (CONSTANT_P (operands
[1])
11196 && ! easy_fp_constant (operands
[1], mode
))
11197 operands
[1] = force_const_mem (mode
, operands
[1]);
11211 if (CONSTANT_P (operands
[1])
11212 && !easy_vector_constant (operands
[1], mode
))
11213 operands
[1] = force_const_mem (mode
, operands
[1]);
11218 /* Use default pattern for address of ELF small data */
11221 && DEFAULT_ABI
== ABI_V4
11222 && (GET_CODE (operands
[1]) == SYMBOL_REF
11223 || GET_CODE (operands
[1]) == CONST
)
11224 && small_data_operand (operands
[1], mode
))
11226 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11230 if (DEFAULT_ABI
== ABI_V4
11231 && mode
== Pmode
&& mode
== SImode
11232 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11234 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11238 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11242 && CONSTANT_P (operands
[1])
11243 && GET_CODE (operands
[1]) != HIGH
11244 && GET_CODE (operands
[1]) != CONST_INT
)
11246 rtx target
= (!can_create_pseudo_p ()
11248 : gen_reg_rtx (mode
));
11250 /* If this is a function address on -mcall-aixdesc,
11251 convert it to the address of the descriptor. */
11252 if (DEFAULT_ABI
== ABI_AIX
11253 && GET_CODE (operands
[1]) == SYMBOL_REF
11254 && XSTR (operands
[1], 0)[0] == '.')
11256 const char *name
= XSTR (operands
[1], 0);
11258 while (*name
== '.')
11260 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11261 CONSTANT_POOL_ADDRESS_P (new_ref
)
11262 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11263 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11264 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11265 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11266 operands
[1] = new_ref
;
11269 if (DEFAULT_ABI
== ABI_DARWIN
)
11272 if (MACHO_DYNAMIC_NO_PIC_P
)
11274 /* Take care of any required data indirection. */
11275 operands
[1] = rs6000_machopic_legitimize_pic_address (
11276 operands
[1], mode
, operands
[0]);
11277 if (operands
[0] != operands
[1])
11278 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11282 emit_insn (gen_macho_high (target
, operands
[1]));
11283 emit_insn (gen_macho_low (operands
[0], target
, operands
[1]));
11287 emit_insn (gen_elf_high (target
, operands
[1]));
11288 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11292 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11293 and we have put it in the TOC, we just need to make a TOC-relative
11294 reference to it. */
11296 && GET_CODE (operands
[1]) == SYMBOL_REF
11297 && use_toc_relative_ref (operands
[1], mode
))
11298 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11299 else if (mode
== Pmode
11300 && CONSTANT_P (operands
[1])
11301 && GET_CODE (operands
[1]) != HIGH
11302 && ((GET_CODE (operands
[1]) != CONST_INT
11303 && ! easy_fp_constant (operands
[1], mode
))
11304 || (GET_CODE (operands
[1]) == CONST_INT
11305 && (num_insns_constant (operands
[1], mode
)
11306 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11307 || (GET_CODE (operands
[0]) == REG
11308 && FP_REGNO_P (REGNO (operands
[0]))))
11309 && !toc_relative_expr_p (operands
[1], false)
11310 && (TARGET_CMODEL
== CMODEL_SMALL
11311 || can_create_pseudo_p ()
11312 || (REG_P (operands
[0])
11313 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11317 /* Darwin uses a special PIC legitimizer. */
11318 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11321 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11323 if (operands
[0] != operands
[1])
11324 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11329 /* If we are to limit the number of things we put in the TOC and
11330 this is a symbol plus a constant we can add in one insn,
11331 just put the symbol in the TOC and add the constant. Don't do
11332 this if reload is in progress. */
11333 if (GET_CODE (operands
[1]) == CONST
11334 && TARGET_NO_SUM_IN_TOC
&& ! reload_in_progress
11335 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11336 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11337 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11338 || GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == SYMBOL_REF
)
11339 && ! side_effects_p (operands
[0]))
11342 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11343 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11345 sym
= force_reg (mode
, sym
);
11346 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11350 operands
[1] = force_const_mem (mode
, operands
[1]);
11353 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
11354 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11356 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11358 operands
[1] = gen_const_mem (mode
, tocref
);
11359 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11365 if (!VECTOR_MEM_VSX_P (TImode
))
11366 rs6000_eliminate_indexed_memrefs (operands
);
11370 rs6000_eliminate_indexed_memrefs (operands
);
11374 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11377 /* Above, we may have called force_const_mem which may have returned
11378 an invalid address. If we can, fix this up; otherwise, reload will
11379 have to deal with it. */
11380 if (GET_CODE (operands
[1]) == MEM
&& ! reload_in_progress
)
11381 operands
[1] = validize_mem (operands
[1]);
11384 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11387 /* Return true if a structure, union or array containing FIELD should be
11388 accessed using `BLKMODE'.
11390 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11391 entire thing in a DI and use subregs to access the internals.
11392 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11393 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11394 best thing to do is set structs to BLKmode and avoid Severe Tire
11397 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11398 fit into 1, whereas DI still needs two. */
11401 rs6000_member_type_forces_blk (const_tree field
, machine_mode mode
)
11403 return ((TARGET_SPE
&& TREE_CODE (TREE_TYPE (field
)) == VECTOR_TYPE
)
11404 || (TARGET_E500_DOUBLE
&& mode
== DFmode
));
11407 /* Nonzero if we can use a floating-point register to pass this arg. */
11408 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11409 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11410 && (CUM)->fregno <= FP_ARG_MAX_REG \
11411 && TARGET_HARD_FLOAT && TARGET_FPRS)
11413 /* Nonzero if we can use an AltiVec register to pass this arg. */
11414 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11415 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11416 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11417 && TARGET_ALTIVEC_ABI \
11420 /* Walk down the type tree of TYPE counting consecutive base elements.
11421 If *MODEP is VOIDmode, then set it to the first valid floating point
11422 or vector type. If a non-floating point or vector type is found, or
11423 if a floating point or vector type that doesn't match a non-VOIDmode
11424 *MODEP is found, then return -1, otherwise return the count in the
11428 rs6000_aggregate_candidate (const_tree type
, machine_mode
*modep
)
11431 HOST_WIDE_INT size
;
11433 switch (TREE_CODE (type
))
11436 mode
= TYPE_MODE (type
);
11437 if (!SCALAR_FLOAT_MODE_P (mode
))
11440 if (*modep
== VOIDmode
)
11443 if (*modep
== mode
)
11449 mode
= TYPE_MODE (TREE_TYPE (type
));
11450 if (!SCALAR_FLOAT_MODE_P (mode
))
11453 if (*modep
== VOIDmode
)
11456 if (*modep
== mode
)
11462 if (!TARGET_ALTIVEC_ABI
|| !TARGET_ALTIVEC
)
11465 /* Use V4SImode as representative of all 128-bit vector types. */
11466 size
= int_size_in_bytes (type
);
11476 if (*modep
== VOIDmode
)
11479 /* Vector modes are considered to be opaque: two vectors are
11480 equivalent for the purposes of being homogeneous aggregates
11481 if they are the same size. */
11482 if (*modep
== mode
)
11490 tree index
= TYPE_DOMAIN (type
);
11492 /* Can't handle incomplete types nor sizes that are not
11494 if (!COMPLETE_TYPE_P (type
)
11495 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11498 count
= rs6000_aggregate_candidate (TREE_TYPE (type
), modep
);
11501 || !TYPE_MAX_VALUE (index
)
11502 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
11503 || !TYPE_MIN_VALUE (index
)
11504 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
11508 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
11509 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
11511 /* There must be no padding. */
11512 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11524 /* Can't handle incomplete types nor sizes that are not
11526 if (!COMPLETE_TYPE_P (type
)
11527 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11530 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11532 if (TREE_CODE (field
) != FIELD_DECL
)
11535 sub_count
= rs6000_aggregate_candidate (TREE_TYPE (field
), modep
);
11538 count
+= sub_count
;
11541 /* There must be no padding. */
11542 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11549 case QUAL_UNION_TYPE
:
11551 /* These aren't very interesting except in a degenerate case. */
11556 /* Can't handle incomplete types nor sizes that are not
11558 if (!COMPLETE_TYPE_P (type
)
11559 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11562 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11564 if (TREE_CODE (field
) != FIELD_DECL
)
11567 sub_count
= rs6000_aggregate_candidate (TREE_TYPE (field
), modep
);
11570 count
= count
> sub_count
? count
: sub_count
;
11573 /* There must be no padding. */
11574 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11587 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11588 float or vector aggregate that shall be passed in FP/vector registers
11589 according to the ELFv2 ABI, return the homogeneous element mode in
11590 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11592 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11595 rs6000_discover_homogeneous_aggregate (machine_mode mode
, const_tree type
,
11596 machine_mode
*elt_mode
,
11599 /* Note that we do not accept complex types at the top level as
11600 homogeneous aggregates; these types are handled via the
11601 targetm.calls.split_complex_arg mechanism. Complex types
11602 can be elements of homogeneous aggregates, however. */
11603 if (DEFAULT_ABI
== ABI_ELFv2
&& type
&& AGGREGATE_TYPE_P (type
))
11605 machine_mode field_mode
= VOIDmode
;
11606 int field_count
= rs6000_aggregate_candidate (type
, &field_mode
);
11608 if (field_count
> 0)
11610 int n_regs
= (SCALAR_FLOAT_MODE_P (field_mode
) ?
11611 (GET_MODE_SIZE (field_mode
) + 7) >> 3 : 1);
11613 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11614 up to AGGR_ARG_NUM_REG registers. */
11615 if (field_count
* n_regs
<= AGGR_ARG_NUM_REG
)
11618 *elt_mode
= field_mode
;
11620 *n_elts
= field_count
;
11633 /* Return a nonzero value to say to return the function value in
11634 memory, just as large structures are always returned. TYPE will be
11635 the data type of the value, and FNTYPE will be the type of the
11636 function doing the returning, or @code{NULL} for libcalls.
11638 The AIX ABI for the RS/6000 specifies that all structures are
11639 returned in memory. The Darwin ABI does the same.
11641 For the Darwin 64 Bit ABI, a function result can be returned in
11642 registers or in memory, depending on the size of the return data
11643 type. If it is returned in registers, the value occupies the same
11644 registers as it would if it were the first and only function
11645 argument. Otherwise, the function places its result in memory at
11646 the location pointed to by GPR3.
11648 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11649 but a draft put them in memory, and GCC used to implement the draft
11650 instead of the final standard. Therefore, aix_struct_return
11651 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11652 compatibility can change DRAFT_V4_STRUCT_RET to override the
11653 default, and -m switches get the final word. See
11654 rs6000_option_override_internal for more details.
11656 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11657 long double support is enabled. These values are returned in memory.
11659 int_size_in_bytes returns -1 for variable size objects, which go in
11660 memory always. The cast to unsigned makes -1 > 8. */
11663 rs6000_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
11665 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11667 && rs6000_darwin64_abi
11668 && TREE_CODE (type
) == RECORD_TYPE
11669 && int_size_in_bytes (type
) > 0)
11671 CUMULATIVE_ARGS valcum
;
11675 valcum
.fregno
= FP_ARG_MIN_REG
;
11676 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
11677 /* Do a trial code generation as if this were going to be passed
11678 as an argument; if any part goes in memory, we return NULL. */
11679 valret
= rs6000_darwin64_record_arg (&valcum
, type
, true, true);
11682 /* Otherwise fall through to more conventional ABI rules. */
11685 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11686 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type
), type
,
11690 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11691 if (DEFAULT_ABI
== ABI_ELFv2
&& AGGREGATE_TYPE_P (type
)
11692 && (unsigned HOST_WIDE_INT
) int_size_in_bytes (type
) <= 16)
11695 if (AGGREGATE_TYPE_P (type
)
11696 && (aix_struct_return
11697 || (unsigned HOST_WIDE_INT
) int_size_in_bytes (type
) > 8))
11700 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11701 modes only exist for GCC vector types if -maltivec. */
11702 if (TARGET_32BIT
&& !TARGET_ALTIVEC_ABI
11703 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type
)))
11706 /* Return synthetic vectors in memory. */
11707 if (TREE_CODE (type
) == VECTOR_TYPE
11708 && int_size_in_bytes (type
) > (TARGET_ALTIVEC_ABI
? 16 : 8))
11710 static bool warned_for_return_big_vectors
= false;
11711 if (!warned_for_return_big_vectors
)
11713 warning (OPT_Wpsabi
, "GCC vector returned by reference: "
11714 "non-standard ABI extension with no compatibility guarantee");
11715 warned_for_return_big_vectors
= true;
11720 if (DEFAULT_ABI
== ABI_V4
&& TARGET_IEEEQUAD
11721 && FLOAT128_IEEE_P (TYPE_MODE (type
)))
11727 /* Specify whether values returned in registers should be at the most
11728 significant end of a register. We want aggregates returned by
11729 value to match the way aggregates are passed to functions. */
11732 rs6000_return_in_msb (const_tree valtype
)
11734 return (DEFAULT_ABI
== ABI_ELFv2
11735 && BYTES_BIG_ENDIAN
11736 && AGGREGATE_TYPE_P (valtype
)
11737 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype
), valtype
) == upward
);
11740 #ifdef HAVE_AS_GNU_ATTRIBUTE
11741 /* Return TRUE if a call to function FNDECL may be one that
11742 potentially affects the function calling ABI of the object file. */
11745 call_ABI_of_interest (tree fndecl
)
11747 if (rs6000_gnu_attr
&& symtab
->state
== EXPANSION
)
11749 struct cgraph_node
*c_node
;
11751 /* Libcalls are always interesting. */
11752 if (fndecl
== NULL_TREE
)
11755 /* Any call to an external function is interesting. */
11756 if (DECL_EXTERNAL (fndecl
))
11759 /* Interesting functions that we are emitting in this object file. */
11760 c_node
= cgraph_node::get (fndecl
);
11761 c_node
= c_node
->ultimate_alias_target ();
11762 return !c_node
->only_called_directly_p ();
11768 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11769 for a call to a function whose data type is FNTYPE.
11770 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11772 For incoming args we set the number of arguments in the prototype large
11773 so we never return a PARALLEL. */
11776 init_cumulative_args (CUMULATIVE_ARGS
*cum
, tree fntype
,
11777 rtx libname ATTRIBUTE_UNUSED
, int incoming
,
11778 int libcall
, int n_named_args
,
11779 tree fndecl ATTRIBUTE_UNUSED
,
11780 machine_mode return_mode ATTRIBUTE_UNUSED
)
11782 static CUMULATIVE_ARGS zero_cumulative
;
11784 *cum
= zero_cumulative
;
11786 cum
->fregno
= FP_ARG_MIN_REG
;
11787 cum
->vregno
= ALTIVEC_ARG_MIN_REG
;
11788 cum
->prototype
= (fntype
&& prototype_p (fntype
));
11789 cum
->call_cookie
= ((DEFAULT_ABI
== ABI_V4
&& libcall
)
11790 ? CALL_LIBCALL
: CALL_NORMAL
);
11791 cum
->sysv_gregno
= GP_ARG_MIN_REG
;
11792 cum
->stdarg
= stdarg_p (fntype
);
11793 cum
->libcall
= libcall
;
11795 cum
->nargs_prototype
= 0;
11796 if (incoming
|| cum
->prototype
)
11797 cum
->nargs_prototype
= n_named_args
;
11799 /* Check for a longcall attribute. */
11800 if ((!fntype
&& rs6000_default_long_calls
)
11802 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype
))
11803 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype
))))
11804 cum
->call_cookie
|= CALL_LONG
;
11806 if (TARGET_DEBUG_ARG
)
11808 fprintf (stderr
, "\ninit_cumulative_args:");
11811 tree ret_type
= TREE_TYPE (fntype
);
11812 fprintf (stderr
, " ret code = %s,",
11813 get_tree_code_name (TREE_CODE (ret_type
)));
11816 if (cum
->call_cookie
& CALL_LONG
)
11817 fprintf (stderr
, " longcall,");
11819 fprintf (stderr
, " proto = %d, nargs = %d\n",
11820 cum
->prototype
, cum
->nargs_prototype
);
11823 #ifdef HAVE_AS_GNU_ATTRIBUTE
11824 if (TARGET_ELF
&& (TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
))
11826 cum
->escapes
= call_ABI_of_interest (fndecl
);
11833 return_type
= TREE_TYPE (fntype
);
11834 return_mode
= TYPE_MODE (return_type
);
11837 return_type
= lang_hooks
.types
.type_for_mode (return_mode
, 0);
11839 if (return_type
!= NULL
)
11841 if (TREE_CODE (return_type
) == RECORD_TYPE
11842 && TYPE_TRANSPARENT_AGGR (return_type
))
11844 return_type
= TREE_TYPE (first_field (return_type
));
11845 return_mode
= TYPE_MODE (return_type
);
11847 if (AGGREGATE_TYPE_P (return_type
)
11848 && ((unsigned HOST_WIDE_INT
) int_size_in_bytes (return_type
)
11850 rs6000_returns_struct
= true;
11852 if (SCALAR_FLOAT_MODE_P (return_mode
))
11854 rs6000_passes_float
= true;
11855 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
)
11856 && (FLOAT128_IBM_P (return_mode
)
11857 || FLOAT128_IEEE_P (return_mode
)
11858 || (return_type
!= NULL
11859 && (TYPE_MAIN_VARIANT (return_type
)
11860 == long_double_type_node
))))
11861 rs6000_passes_long_double
= true;
11863 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode
)
11864 || SPE_VECTOR_MODE (return_mode
))
11865 rs6000_passes_vector
= true;
11872 && TARGET_ALTIVEC_ABI
11873 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype
))))
11875 error ("cannot return value in vector register because"
11876 " altivec instructions are disabled, use -maltivec"
11877 " to enable them");
11881 /* The mode the ABI uses for a word. This is not the same as word_mode
11882 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11884 static scalar_int_mode
11885 rs6000_abi_word_mode (void)
11887 return TARGET_32BIT
? SImode
: DImode
;
11890 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11892 rs6000_offload_options (void)
11895 return xstrdup ("-foffload-abi=lp64");
11897 return xstrdup ("-foffload-abi=ilp32");
11900 /* On rs6000, function arguments are promoted, as are function return
11903 static machine_mode
11904 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
11906 int *punsignedp ATTRIBUTE_UNUSED
,
11909 PROMOTE_MODE (mode
, *punsignedp
, type
);
11914 /* Return true if TYPE must be passed on the stack and not in registers. */
11917 rs6000_must_pass_in_stack (machine_mode mode
, const_tree type
)
11919 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
|| TARGET_64BIT
)
11920 return must_pass_in_stack_var_size (mode
, type
);
11922 return must_pass_in_stack_var_size_or_pad (mode
, type
);
11926 is_complex_IBM_long_double (machine_mode mode
)
11928 return mode
== ICmode
|| (!TARGET_IEEEQUAD
&& mode
== TCmode
);
11931 /* Whether ABI_V4 passes MODE args to a function in floating point
11935 abi_v4_pass_in_fpr (machine_mode mode
)
11937 if (!TARGET_FPRS
|| !TARGET_HARD_FLOAT
)
11939 if (TARGET_SINGLE_FLOAT
&& mode
== SFmode
)
11941 if (TARGET_DOUBLE_FLOAT
&& mode
== DFmode
)
11943 /* ABI_V4 passes complex IBM long double in 8 gprs.
11944 Stupid, but we can't change the ABI now. */
11945 if (is_complex_IBM_long_double (mode
))
11947 if (FLOAT128_2REG_P (mode
))
11949 if (DECIMAL_FLOAT_MODE_P (mode
))
11954 /* If defined, a C expression which determines whether, and in which
11955 direction, to pad out an argument with extra space. The value
11956 should be of type `enum direction': either `upward' to pad above
11957 the argument, `downward' to pad below, or `none' to inhibit
11960 For the AIX ABI structs are always stored left shifted in their
11964 function_arg_padding (machine_mode mode
, const_tree type
)
11966 #ifndef AGGREGATE_PADDING_FIXED
11967 #define AGGREGATE_PADDING_FIXED 0
11969 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11970 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11973 if (!AGGREGATE_PADDING_FIXED
)
11975 /* GCC used to pass structures of the same size as integer types as
11976 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11977 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11978 passed padded downward, except that -mstrict-align further
11979 muddied the water in that multi-component structures of 2 and 4
11980 bytes in size were passed padded upward.
11982 The following arranges for best compatibility with previous
11983 versions of gcc, but removes the -mstrict-align dependency. */
11984 if (BYTES_BIG_ENDIAN
)
11986 HOST_WIDE_INT size
= 0;
11988 if (mode
== BLKmode
)
11990 if (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
11991 size
= int_size_in_bytes (type
);
11994 size
= GET_MODE_SIZE (mode
);
11996 if (size
== 1 || size
== 2 || size
== 4)
12002 if (AGGREGATES_PAD_UPWARD_ALWAYS
)
12004 if (type
!= 0 && AGGREGATE_TYPE_P (type
))
12008 /* Fall back to the default. */
12009 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
12012 /* If defined, a C expression that gives the alignment boundary, in bits,
12013 of an argument with the specified mode and type. If it is not defined,
12014 PARM_BOUNDARY is used for all arguments.
12016 V.4 wants long longs and doubles to be double word aligned. Just
12017 testing the mode size is a boneheaded way to do this as it means
12018 that other types such as complex int are also double word aligned.
12019 However, we're stuck with this because changing the ABI might break
12020 existing library interfaces.
12022 Doubleword align SPE vectors.
12023 Quadword align Altivec/VSX vectors.
12024 Quadword align large synthetic vector types. */
12026 static unsigned int
12027 rs6000_function_arg_boundary (machine_mode mode
, const_tree type
)
12029 machine_mode elt_mode
;
12032 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12034 if (DEFAULT_ABI
== ABI_V4
12035 && (GET_MODE_SIZE (mode
) == 8
12036 || (TARGET_HARD_FLOAT
12038 && !is_complex_IBM_long_double (mode
)
12039 && FLOAT128_2REG_P (mode
))))
12041 else if (FLOAT128_VECTOR_P (mode
))
12043 else if (SPE_VECTOR_MODE (mode
)
12044 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12045 && int_size_in_bytes (type
) >= 8
12046 && int_size_in_bytes (type
) < 16))
12048 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode
)
12049 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12050 && int_size_in_bytes (type
) >= 16))
12053 /* Aggregate types that need > 8 byte alignment are quadword-aligned
12054 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
12055 -mcompat-align-parm is used. */
12056 if (((DEFAULT_ABI
== ABI_AIX
&& !rs6000_compat_align_parm
)
12057 || DEFAULT_ABI
== ABI_ELFv2
)
12058 && type
&& TYPE_ALIGN (type
) > 64)
12060 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
12061 or homogeneous float/vector aggregates here. We already handled
12062 vector aggregates above, but still need to check for float here. */
12063 bool aggregate_p
= (AGGREGATE_TYPE_P (type
)
12064 && !SCALAR_FLOAT_MODE_P (elt_mode
));
12066 /* We used to check for BLKmode instead of the above aggregate type
12067 check. Warn when this results in any difference to the ABI. */
12068 if (aggregate_p
!= (mode
== BLKmode
))
12070 static bool warned
;
12071 if (!warned
&& warn_psabi
)
12074 inform (input_location
,
12075 "the ABI of passing aggregates with %d-byte alignment"
12076 " has changed in GCC 5",
12077 (int) TYPE_ALIGN (type
) / BITS_PER_UNIT
);
12085 /* Similar for the Darwin64 ABI. Note that for historical reasons we
12086 implement the "aggregate type" check as a BLKmode check here; this
12087 means certain aggregate types are in fact not aligned. */
12088 if (TARGET_MACHO
&& rs6000_darwin64_abi
12090 && type
&& TYPE_ALIGN (type
) > 64)
12093 return PARM_BOUNDARY
;
12096 /* The offset in words to the start of the parameter save area. */
12098 static unsigned int
12099 rs6000_parm_offset (void)
12101 return (DEFAULT_ABI
== ABI_V4
? 2
12102 : DEFAULT_ABI
== ABI_ELFv2
? 4
12106 /* For a function parm of MODE and TYPE, return the starting word in
12107 the parameter area. NWORDS of the parameter area are already used. */
12109 static unsigned int
12110 rs6000_parm_start (machine_mode mode
, const_tree type
,
12111 unsigned int nwords
)
12113 unsigned int align
;
12115 align
= rs6000_function_arg_boundary (mode
, type
) / PARM_BOUNDARY
- 1;
12116 return nwords
+ (-(rs6000_parm_offset () + nwords
) & align
);
12119 /* Compute the size (in words) of a function argument. */
12121 static unsigned long
12122 rs6000_arg_size (machine_mode mode
, const_tree type
)
12124 unsigned long size
;
12126 if (mode
!= BLKmode
)
12127 size
= GET_MODE_SIZE (mode
);
12129 size
= int_size_in_bytes (type
);
12132 return (size
+ 3) >> 2;
12134 return (size
+ 7) >> 3;
12137 /* Use this to flush pending int fields. */
12140 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS
*cum
,
12141 HOST_WIDE_INT bitpos
, int final
)
12143 unsigned int startbit
, endbit
;
12144 int intregs
, intoffset
;
12147 /* Handle the situations where a float is taking up the first half
12148 of the GPR, and the other half is empty (typically due to
12149 alignment restrictions). We can detect this by a 8-byte-aligned
12150 int field, or by seeing that this is the final flush for this
12151 argument. Count the word and continue on. */
12152 if (cum
->floats_in_gpr
== 1
12153 && (cum
->intoffset
% 64 == 0
12154 || (cum
->intoffset
== -1 && final
)))
12157 cum
->floats_in_gpr
= 0;
12160 if (cum
->intoffset
== -1)
12163 intoffset
= cum
->intoffset
;
12164 cum
->intoffset
= -1;
12165 cum
->floats_in_gpr
= 0;
12167 if (intoffset
% BITS_PER_WORD
!= 0)
12169 mode
= mode_for_size (BITS_PER_WORD
- intoffset
% BITS_PER_WORD
,
12171 if (mode
== BLKmode
)
12173 /* We couldn't find an appropriate mode, which happens,
12174 e.g., in packed structs when there are 3 bytes to load.
12175 Back intoffset back to the beginning of the word in this
12177 intoffset
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12181 startbit
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12182 endbit
= ROUND_UP (bitpos
, BITS_PER_WORD
);
12183 intregs
= (endbit
- startbit
) / BITS_PER_WORD
;
12184 cum
->words
+= intregs
;
12185 /* words should be unsigned. */
12186 if ((unsigned)cum
->words
< (endbit
/BITS_PER_WORD
))
12188 int pad
= (endbit
/BITS_PER_WORD
) - cum
->words
;
12193 /* The darwin64 ABI calls for us to recurse down through structs,
12194 looking for elements passed in registers. Unfortunately, we have
12195 to track int register count here also because of misalignments
12196 in powerpc alignment mode. */
12199 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS
*cum
,
12201 HOST_WIDE_INT startbitpos
)
12205 for (f
= TYPE_FIELDS (type
); f
; f
= DECL_CHAIN (f
))
12206 if (TREE_CODE (f
) == FIELD_DECL
)
12208 HOST_WIDE_INT bitpos
= startbitpos
;
12209 tree ftype
= TREE_TYPE (f
);
12211 if (ftype
== error_mark_node
)
12213 mode
= TYPE_MODE (ftype
);
12215 if (DECL_SIZE (f
) != 0
12216 && tree_fits_uhwi_p (bit_position (f
)))
12217 bitpos
+= int_bit_position (f
);
12219 /* ??? FIXME: else assume zero offset. */
12221 if (TREE_CODE (ftype
) == RECORD_TYPE
)
12222 rs6000_darwin64_record_arg_advance_recurse (cum
, ftype
, bitpos
);
12223 else if (USE_FP_FOR_ARG_P (cum
, mode
))
12225 unsigned n_fpregs
= (GET_MODE_SIZE (mode
) + 7) >> 3;
12226 rs6000_darwin64_record_arg_advance_flush (cum
, bitpos
, 0);
12227 cum
->fregno
+= n_fpregs
;
12228 /* Single-precision floats present a special problem for
12229 us, because they are smaller than an 8-byte GPR, and so
12230 the structure-packing rules combined with the standard
12231 varargs behavior mean that we want to pack float/float
12232 and float/int combinations into a single register's
12233 space. This is complicated by the arg advance flushing,
12234 which works on arbitrarily large groups of int-type
12236 if (mode
== SFmode
)
12238 if (cum
->floats_in_gpr
== 1)
12240 /* Two floats in a word; count the word and reset
12241 the float count. */
12243 cum
->floats_in_gpr
= 0;
12245 else if (bitpos
% 64 == 0)
12247 /* A float at the beginning of an 8-byte word;
12248 count it and put off adjusting cum->words until
12249 we see if a arg advance flush is going to do it
12251 cum
->floats_in_gpr
++;
12255 /* The float is at the end of a word, preceded
12256 by integer fields, so the arg advance flush
12257 just above has already set cum->words and
12258 everything is taken care of. */
12262 cum
->words
+= n_fpregs
;
12264 else if (USE_ALTIVEC_FOR_ARG_P (cum
, mode
, 1))
12266 rs6000_darwin64_record_arg_advance_flush (cum
, bitpos
, 0);
12270 else if (cum
->intoffset
== -1)
12271 cum
->intoffset
= bitpos
;
12275 /* Check for an item that needs to be considered specially under the darwin 64
12276 bit ABI. These are record types where the mode is BLK or the structure is
12277 8 bytes in size. */
12279 rs6000_darwin64_struct_check_p (machine_mode mode
, const_tree type
)
12281 return rs6000_darwin64_abi
12282 && ((mode
== BLKmode
12283 && TREE_CODE (type
) == RECORD_TYPE
12284 && int_size_in_bytes (type
) > 0)
12285 || (type
&& TREE_CODE (type
) == RECORD_TYPE
12286 && int_size_in_bytes (type
) == 8)) ? 1 : 0;
12289 /* Update the data in CUM to advance over an argument
12290 of mode MODE and data type TYPE.
12291 (TYPE is null for libcalls where that information may not be available.)
12293 Note that for args passed by reference, function_arg will be called
12294 with MODE and TYPE set to that of the pointer to the arg, not the arg
12298 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
12299 const_tree type
, bool named
, int depth
)
12301 machine_mode elt_mode
;
12304 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12306 /* Only tick off an argument if we're not recursing. */
12308 cum
->nargs_prototype
--;
12310 #ifdef HAVE_AS_GNU_ATTRIBUTE
12311 if (TARGET_ELF
&& (TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
12314 if (SCALAR_FLOAT_MODE_P (mode
))
12316 rs6000_passes_float
= true;
12317 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
)
12318 && (FLOAT128_IBM_P (mode
)
12319 || FLOAT128_IEEE_P (mode
)
12321 && TYPE_MAIN_VARIANT (type
) == long_double_type_node
)))
12322 rs6000_passes_long_double
= true;
12324 if ((named
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
12325 || (SPE_VECTOR_MODE (mode
)
12327 && cum
->sysv_gregno
<= GP_ARG_MAX_REG
))
12328 rs6000_passes_vector
= true;
12332 if (TARGET_ALTIVEC_ABI
12333 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode
)
12334 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12335 && int_size_in_bytes (type
) == 16)))
12337 bool stack
= false;
12339 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
12341 cum
->vregno
+= n_elts
;
12343 if (!TARGET_ALTIVEC
)
12344 error ("cannot pass argument in vector register because"
12345 " altivec instructions are disabled, use -maltivec"
12346 " to enable them");
12348 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12349 even if it is going to be passed in a vector register.
12350 Darwin does the same for variable-argument functions. */
12351 if (((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
12353 || (cum
->stdarg
&& DEFAULT_ABI
!= ABI_V4
))
12363 /* Vector parameters must be 16-byte aligned. In 32-bit
12364 mode this means we need to take into account the offset
12365 to the parameter save area. In 64-bit mode, they just
12366 have to start on an even word, since the parameter save
12367 area is 16-byte aligned. */
12369 align
= -(rs6000_parm_offset () + cum
->words
) & 3;
12371 align
= cum
->words
& 1;
12372 cum
->words
+= align
+ rs6000_arg_size (mode
, type
);
12374 if (TARGET_DEBUG_ARG
)
12376 fprintf (stderr
, "function_adv: words = %2d, align=%d, ",
12377 cum
->words
, align
);
12378 fprintf (stderr
, "nargs = %4d, proto = %d, mode = %4s\n",
12379 cum
->nargs_prototype
, cum
->prototype
,
12380 GET_MODE_NAME (mode
));
12384 else if (TARGET_SPE_ABI
&& TARGET_SPE
&& SPE_VECTOR_MODE (mode
)
12386 && cum
->sysv_gregno
<= GP_ARG_MAX_REG
)
12387 cum
->sysv_gregno
++;
12389 else if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
12391 int size
= int_size_in_bytes (type
);
12392 /* Variable sized types have size == -1 and are
12393 treated as if consisting entirely of ints.
12394 Pad to 16 byte boundary if needed. */
12395 if (TYPE_ALIGN (type
) >= 2 * BITS_PER_WORD
12396 && (cum
->words
% 2) != 0)
12398 /* For varargs, we can just go up by the size of the struct. */
12400 cum
->words
+= (size
+ 7) / 8;
12403 /* It is tempting to say int register count just goes up by
12404 sizeof(type)/8, but this is wrong in a case such as
12405 { int; double; int; } [powerpc alignment]. We have to
12406 grovel through the fields for these too. */
12407 cum
->intoffset
= 0;
12408 cum
->floats_in_gpr
= 0;
12409 rs6000_darwin64_record_arg_advance_recurse (cum
, type
, 0);
12410 rs6000_darwin64_record_arg_advance_flush (cum
,
12411 size
* BITS_PER_UNIT
, 1);
12413 if (TARGET_DEBUG_ARG
)
12415 fprintf (stderr
, "function_adv: words = %2d, align=%d, size=%d",
12416 cum
->words
, TYPE_ALIGN (type
), size
);
12418 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12419 cum
->nargs_prototype
, cum
->prototype
,
12420 GET_MODE_NAME (mode
));
12423 else if (DEFAULT_ABI
== ABI_V4
)
12425 if (abi_v4_pass_in_fpr (mode
))
12427 /* _Decimal128 must use an even/odd register pair. This assumes
12428 that the register number is odd when fregno is odd. */
12429 if (mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12432 if (cum
->fregno
+ (FLOAT128_2REG_P (mode
) ? 1 : 0)
12433 <= FP_ARG_V4_MAX_REG
)
12434 cum
->fregno
+= (GET_MODE_SIZE (mode
) + 7) >> 3;
12437 cum
->fregno
= FP_ARG_V4_MAX_REG
+ 1;
12438 if (mode
== DFmode
|| FLOAT128_IBM_P (mode
)
12439 || mode
== DDmode
|| mode
== TDmode
)
12440 cum
->words
+= cum
->words
& 1;
12441 cum
->words
+= rs6000_arg_size (mode
, type
);
12446 int n_words
= rs6000_arg_size (mode
, type
);
12447 int gregno
= cum
->sysv_gregno
;
12449 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12450 (r7,r8) or (r9,r10). As does any other 2 word item such
12451 as complex int due to a historical mistake. */
12453 gregno
+= (1 - gregno
) & 1;
12455 /* Multi-reg args are not split between registers and stack. */
12456 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12458 /* Long long and SPE vectors are aligned on the stack.
12459 So are other 2 word items such as complex int due to
12460 a historical mistake. */
12462 cum
->words
+= cum
->words
& 1;
12463 cum
->words
+= n_words
;
12466 /* Note: continuing to accumulate gregno past when we've started
12467 spilling to the stack indicates the fact that we've started
12468 spilling to the stack to expand_builtin_saveregs. */
12469 cum
->sysv_gregno
= gregno
+ n_words
;
12472 if (TARGET_DEBUG_ARG
)
12474 fprintf (stderr
, "function_adv: words = %2d, fregno = %2d, ",
12475 cum
->words
, cum
->fregno
);
12476 fprintf (stderr
, "gregno = %2d, nargs = %4d, proto = %d, ",
12477 cum
->sysv_gregno
, cum
->nargs_prototype
, cum
->prototype
);
12478 fprintf (stderr
, "mode = %4s, named = %d\n",
12479 GET_MODE_NAME (mode
), named
);
12484 int n_words
= rs6000_arg_size (mode
, type
);
12485 int start_words
= cum
->words
;
12486 int align_words
= rs6000_parm_start (mode
, type
, start_words
);
12488 cum
->words
= align_words
+ n_words
;
12490 if (SCALAR_FLOAT_MODE_P (elt_mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
12492 /* _Decimal128 must be passed in an even/odd float register pair.
12493 This assumes that the register number is odd when fregno is
12495 if (elt_mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12497 cum
->fregno
+= n_elts
* ((GET_MODE_SIZE (elt_mode
) + 7) >> 3);
12500 if (TARGET_DEBUG_ARG
)
12502 fprintf (stderr
, "function_adv: words = %2d, fregno = %2d, ",
12503 cum
->words
, cum
->fregno
);
12504 fprintf (stderr
, "nargs = %4d, proto = %d, mode = %4s, ",
12505 cum
->nargs_prototype
, cum
->prototype
, GET_MODE_NAME (mode
));
12506 fprintf (stderr
, "named = %d, align = %d, depth = %d\n",
12507 named
, align_words
- start_words
, depth
);
12513 rs6000_function_arg_advance (cumulative_args_t cum
, machine_mode mode
,
12514 const_tree type
, bool named
)
12516 rs6000_function_arg_advance_1 (get_cumulative_args (cum
), mode
, type
, named
,
12521 spe_build_register_parallel (machine_mode mode
, int gregno
)
12523 rtx r1
, r3
, r5
, r7
;
12528 r1
= gen_rtx_REG (DImode
, gregno
);
12529 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12530 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, r1
));
12534 r1
= gen_rtx_REG (DImode
, gregno
);
12535 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12536 r3
= gen_rtx_REG (DImode
, gregno
+ 2);
12537 r3
= gen_rtx_EXPR_LIST (VOIDmode
, r3
, GEN_INT (8));
12538 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r3
));
12541 r1
= gen_rtx_REG (DImode
, gregno
);
12542 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12543 r3
= gen_rtx_REG (DImode
, gregno
+ 2);
12544 r3
= gen_rtx_EXPR_LIST (VOIDmode
, r3
, GEN_INT (8));
12545 r5
= gen_rtx_REG (DImode
, gregno
+ 4);
12546 r5
= gen_rtx_EXPR_LIST (VOIDmode
, r5
, GEN_INT (16));
12547 r7
= gen_rtx_REG (DImode
, gregno
+ 6);
12548 r7
= gen_rtx_EXPR_LIST (VOIDmode
, r7
, GEN_INT (24));
12549 return gen_rtx_PARALLEL (mode
, gen_rtvec (4, r1
, r3
, r5
, r7
));
12552 gcc_unreachable ();
12556 /* Determine where to put a SIMD argument on the SPE. */
12558 rs6000_spe_function_arg (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
12561 int gregno
= cum
->sysv_gregno
;
12563 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12564 are passed and returned in a pair of GPRs for ABI compatibility. */
12565 if (TARGET_E500_DOUBLE
&& (mode
== DFmode
|| mode
== TFmode
12566 || mode
== DCmode
|| mode
== TCmode
))
12568 int n_words
= rs6000_arg_size (mode
, type
);
12570 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12571 if (mode
== DFmode
)
12572 gregno
+= (1 - gregno
) & 1;
12574 /* Multi-reg args are not split between registers and stack. */
12575 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12578 return spe_build_register_parallel (mode
, gregno
);
12582 int n_words
= rs6000_arg_size (mode
, type
);
12584 /* SPE vectors are put in odd registers. */
12585 if (n_words
== 2 && (gregno
& 1) == 0)
12588 if (gregno
+ n_words
- 1 <= GP_ARG_MAX_REG
)
12591 machine_mode m
= SImode
;
12593 r1
= gen_rtx_REG (m
, gregno
);
12594 r1
= gen_rtx_EXPR_LIST (m
, r1
, const0_rtx
);
12595 r2
= gen_rtx_REG (m
, gregno
+ 1);
12596 r2
= gen_rtx_EXPR_LIST (m
, r2
, GEN_INT (4));
12597 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
12604 if (gregno
<= GP_ARG_MAX_REG
)
12605 return gen_rtx_REG (mode
, gregno
);
12611 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12612 structure between cum->intoffset and bitpos to integer registers. */
12615 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS
*cum
,
12616 HOST_WIDE_INT bitpos
, rtx rvec
[], int *k
)
12619 unsigned int regno
;
12620 unsigned int startbit
, endbit
;
12621 int this_regno
, intregs
, intoffset
;
12624 if (cum
->intoffset
== -1)
12627 intoffset
= cum
->intoffset
;
12628 cum
->intoffset
= -1;
12630 /* If this is the trailing part of a word, try to only load that
12631 much into the register. Otherwise load the whole register. Note
12632 that in the latter case we may pick up unwanted bits. It's not a
12633 problem at the moment but may wish to revisit. */
12635 if (intoffset
% BITS_PER_WORD
!= 0)
12637 mode
= mode_for_size (BITS_PER_WORD
- intoffset
% BITS_PER_WORD
,
12639 if (mode
== BLKmode
)
12641 /* We couldn't find an appropriate mode, which happens,
12642 e.g., in packed structs when there are 3 bytes to load.
12643 Back intoffset back to the beginning of the word in this
12645 intoffset
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12652 startbit
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12653 endbit
= ROUND_UP (bitpos
, BITS_PER_WORD
);
12654 intregs
= (endbit
- startbit
) / BITS_PER_WORD
;
12655 this_regno
= cum
->words
+ intoffset
/ BITS_PER_WORD
;
12657 if (intregs
> 0 && intregs
> GP_ARG_NUM_REG
- this_regno
)
12658 cum
->use_stack
= 1;
12660 intregs
= MIN (intregs
, GP_ARG_NUM_REG
- this_regno
);
12664 intoffset
/= BITS_PER_UNIT
;
12667 regno
= GP_ARG_MIN_REG
+ this_regno
;
12668 reg
= gen_rtx_REG (mode
, regno
);
12670 gen_rtx_EXPR_LIST (VOIDmode
, reg
, GEN_INT (intoffset
));
12673 intoffset
= (intoffset
| (UNITS_PER_WORD
-1)) + 1;
12677 while (intregs
> 0);
12680 /* Recursive workhorse for the following. */
12683 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS
*cum
, const_tree type
,
12684 HOST_WIDE_INT startbitpos
, rtx rvec
[],
12689 for (f
= TYPE_FIELDS (type
); f
; f
= DECL_CHAIN (f
))
12690 if (TREE_CODE (f
) == FIELD_DECL
)
12692 HOST_WIDE_INT bitpos
= startbitpos
;
12693 tree ftype
= TREE_TYPE (f
);
12695 if (ftype
== error_mark_node
)
12697 mode
= TYPE_MODE (ftype
);
12699 if (DECL_SIZE (f
) != 0
12700 && tree_fits_uhwi_p (bit_position (f
)))
12701 bitpos
+= int_bit_position (f
);
12703 /* ??? FIXME: else assume zero offset. */
12705 if (TREE_CODE (ftype
) == RECORD_TYPE
)
12706 rs6000_darwin64_record_arg_recurse (cum
, ftype
, bitpos
, rvec
, k
);
12707 else if (cum
->named
&& USE_FP_FOR_ARG_P (cum
, mode
))
12709 unsigned n_fpreg
= (GET_MODE_SIZE (mode
) + 7) >> 3;
12713 case E_SCmode
: mode
= SFmode
; break;
12714 case E_DCmode
: mode
= DFmode
; break;
12715 case E_TCmode
: mode
= TFmode
; break;
12719 rs6000_darwin64_record_arg_flush (cum
, bitpos
, rvec
, k
);
12720 if (cum
->fregno
+ n_fpreg
> FP_ARG_MAX_REG
+ 1)
12722 gcc_assert (cum
->fregno
== FP_ARG_MAX_REG
12723 && (mode
== TFmode
|| mode
== TDmode
));
12724 /* Long double or _Decimal128 split over regs and memory. */
12725 mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
: DFmode
;
12729 = gen_rtx_EXPR_LIST (VOIDmode
,
12730 gen_rtx_REG (mode
, cum
->fregno
++),
12731 GEN_INT (bitpos
/ BITS_PER_UNIT
));
12732 if (FLOAT128_2REG_P (mode
))
12735 else if (cum
->named
&& USE_ALTIVEC_FOR_ARG_P (cum
, mode
, 1))
12737 rs6000_darwin64_record_arg_flush (cum
, bitpos
, rvec
, k
);
12739 = gen_rtx_EXPR_LIST (VOIDmode
,
12740 gen_rtx_REG (mode
, cum
->vregno
++),
12741 GEN_INT (bitpos
/ BITS_PER_UNIT
));
12743 else if (cum
->intoffset
== -1)
12744 cum
->intoffset
= bitpos
;
12748 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12749 the register(s) to be used for each field and subfield of a struct
12750 being passed by value, along with the offset of where the
12751 register's value may be found in the block. FP fields go in FP
12752 register, vector fields go in vector registers, and everything
12753 else goes in int registers, packed as in memory.
12755 This code is also used for function return values. RETVAL indicates
12756 whether this is the case.
12758 Much of this is taken from the SPARC V9 port, which has a similar
12759 calling convention. */
12762 rs6000_darwin64_record_arg (CUMULATIVE_ARGS
*orig_cum
, const_tree type
,
12763 bool named
, bool retval
)
12765 rtx rvec
[FIRST_PSEUDO_REGISTER
];
12766 int k
= 1, kbase
= 1;
12767 HOST_WIDE_INT typesize
= int_size_in_bytes (type
);
12768 /* This is a copy; modifications are not visible to our caller. */
12769 CUMULATIVE_ARGS copy_cum
= *orig_cum
;
12770 CUMULATIVE_ARGS
*cum
= ©_cum
;
12772 /* Pad to 16 byte boundary if needed. */
12773 if (!retval
&& TYPE_ALIGN (type
) >= 2 * BITS_PER_WORD
12774 && (cum
->words
% 2) != 0)
12777 cum
->intoffset
= 0;
12778 cum
->use_stack
= 0;
12779 cum
->named
= named
;
12781 /* Put entries into rvec[] for individual FP and vector fields, and
12782 for the chunks of memory that go in int regs. Note we start at
12783 element 1; 0 is reserved for an indication of using memory, and
12784 may or may not be filled in below. */
12785 rs6000_darwin64_record_arg_recurse (cum
, type
, /* startbit pos= */ 0, rvec
, &k
);
12786 rs6000_darwin64_record_arg_flush (cum
, typesize
* BITS_PER_UNIT
, rvec
, &k
);
12788 /* If any part of the struct went on the stack put all of it there.
12789 This hack is because the generic code for
12790 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12791 parts of the struct are not at the beginning. */
12792 if (cum
->use_stack
)
12795 return NULL_RTX
; /* doesn't go in registers at all */
12797 rvec
[0] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12799 if (k
> 1 || cum
->use_stack
)
12800 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (k
- kbase
, &rvec
[kbase
]));
12805 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12808 rs6000_mixed_function_arg (machine_mode mode
, const_tree type
,
12813 rtx rvec
[GP_ARG_NUM_REG
+ 1];
12815 if (align_words
>= GP_ARG_NUM_REG
)
12818 n_units
= rs6000_arg_size (mode
, type
);
12820 /* Optimize the simple case where the arg fits in one gpr, except in
12821 the case of BLKmode due to assign_parms assuming that registers are
12822 BITS_PER_WORD wide. */
12824 || (n_units
== 1 && mode
!= BLKmode
))
12825 return gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12828 if (align_words
+ n_units
> GP_ARG_NUM_REG
)
12829 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12830 using a magic NULL_RTX component.
12831 This is not strictly correct. Only some of the arg belongs in
12832 memory, not all of it. However, the normal scheme using
12833 function_arg_partial_nregs can result in unusual subregs, eg.
12834 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12835 store the whole arg to memory is often more efficient than code
12836 to store pieces, and we know that space is available in the right
12837 place for the whole arg. */
12838 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12843 rtx r
= gen_rtx_REG (SImode
, GP_ARG_MIN_REG
+ align_words
);
12844 rtx off
= GEN_INT (i
++ * 4);
12845 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12847 while (++align_words
< GP_ARG_NUM_REG
&& --n_units
!= 0);
12849 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (k
, rvec
));
12852 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12853 but must also be copied into the parameter save area starting at
12854 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12855 to the GPRs and/or memory. Return the number of elements used. */
12858 rs6000_psave_function_arg (machine_mode mode
, const_tree type
,
12859 int align_words
, rtx
*rvec
)
12863 if (align_words
< GP_ARG_NUM_REG
)
12865 int n_words
= rs6000_arg_size (mode
, type
);
12867 if (align_words
+ n_words
> GP_ARG_NUM_REG
12869 || (TARGET_32BIT
&& TARGET_POWERPC64
))
12871 /* If this is partially on the stack, then we only
12872 include the portion actually in registers here. */
12873 machine_mode rmode
= TARGET_32BIT
? SImode
: DImode
;
12876 if (align_words
+ n_words
> GP_ARG_NUM_REG
)
12878 /* Not all of the arg fits in gprs. Say that it goes in memory
12879 too, using a magic NULL_RTX component. Also see comment in
12880 rs6000_mixed_function_arg for why the normal
12881 function_arg_partial_nregs scheme doesn't work in this case. */
12882 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12887 rtx r
= gen_rtx_REG (rmode
, GP_ARG_MIN_REG
+ align_words
);
12888 rtx off
= GEN_INT (i
++ * GET_MODE_SIZE (rmode
));
12889 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12891 while (++align_words
< GP_ARG_NUM_REG
&& --n_words
!= 0);
12895 /* The whole arg fits in gprs. */
12896 rtx r
= gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12897 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, const0_rtx
);
12902 /* It's entirely in memory. */
12903 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12909 /* RVEC is a vector of K components of an argument of mode MODE.
12910 Construct the final function_arg return value from it. */
12913 rs6000_finish_function_arg (machine_mode mode
, rtx
*rvec
, int k
)
12915 gcc_assert (k
>= 1);
12917 /* Avoid returning a PARALLEL in the trivial cases. */
12920 if (XEXP (rvec
[0], 0) == NULL_RTX
)
12923 if (GET_MODE (XEXP (rvec
[0], 0)) == mode
)
12924 return XEXP (rvec
[0], 0);
12927 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (k
, rvec
));
12930 /* Determine where to put an argument to a function.
12931 Value is zero to push the argument on the stack,
12932 or a hard register in which to store the argument.
12934 MODE is the argument's machine mode.
12935 TYPE is the data type of the argument (as a tree).
12936 This is null for libcalls where that information may
12938 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12939 the preceding args and about the function being called. It is
12940 not modified in this routine.
12941 NAMED is nonzero if this argument is a named parameter
12942 (otherwise it is an extra parameter matching an ellipsis).
12944 On RS/6000 the first eight words of non-FP are normally in registers
12945 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12946 Under V.4, the first 8 FP args are in registers.
12948 If this is floating-point and no prototype is specified, we use
12949 both an FP and integer register (or possibly FP reg and stack). Library
12950 functions (when CALL_LIBCALL is set) always have the proper types for args,
12951 so we can pass the FP value just in one register. emit_library_function
12952 doesn't support PARALLEL anyway.
12954 Note that for args passed by reference, function_arg will be called
12955 with MODE and TYPE set to that of the pointer to the arg, not the arg
12959 rs6000_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
12960 const_tree type
, bool named
)
12962 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12963 enum rs6000_abi abi
= DEFAULT_ABI
;
12964 machine_mode elt_mode
;
12967 /* Return a marker to indicate whether CR1 needs to set or clear the
12968 bit that V.4 uses to say fp args were passed in registers.
12969 Assume that we don't need the marker for software floating point,
12970 or compiler generated library calls. */
12971 if (mode
== VOIDmode
)
12974 && (cum
->call_cookie
& CALL_LIBCALL
) == 0
12976 || (cum
->nargs_prototype
< 0
12977 && (cum
->prototype
|| TARGET_NO_PROTOTYPE
))))
12979 /* For the SPE, we need to crxor CR6 always. */
12980 if (TARGET_SPE_ABI
)
12981 return GEN_INT (cum
->call_cookie
| CALL_V4_SET_FP_ARGS
);
12982 else if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
12983 return GEN_INT (cum
->call_cookie
12984 | ((cum
->fregno
== FP_ARG_MIN_REG
)
12985 ? CALL_V4_SET_FP_ARGS
12986 : CALL_V4_CLEAR_FP_ARGS
));
12989 return GEN_INT (cum
->call_cookie
& ~CALL_LIBCALL
);
12992 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12994 if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
12996 rtx rslt
= rs6000_darwin64_record_arg (cum
, type
, named
, /*retval= */false);
12997 if (rslt
!= NULL_RTX
)
12999 /* Else fall through to usual handling. */
13002 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
13004 rtx rvec
[GP_ARG_NUM_REG
+ AGGR_ARG_NUM_REG
+ 1];
13008 /* Do we also need to pass this argument in the parameter save area?
13009 Library support functions for IEEE 128-bit are assumed to not need the
13010 value passed both in GPRs and in vector registers. */
13011 if (TARGET_64BIT
&& !cum
->prototype
13012 && (!cum
->libcall
|| !FLOAT128_VECTOR_P (elt_mode
)))
13014 int align_words
= ROUND_UP (cum
->words
, 2);
13015 k
= rs6000_psave_function_arg (mode
, type
, align_words
, rvec
);
13018 /* Describe where this argument goes in the vector registers. */
13019 for (i
= 0; i
< n_elts
&& cum
->vregno
+ i
<= ALTIVEC_ARG_MAX_REG
; i
++)
13021 r
= gen_rtx_REG (elt_mode
, cum
->vregno
+ i
);
13022 off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
13023 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
13026 return rs6000_finish_function_arg (mode
, rvec
, k
);
13028 else if (TARGET_ALTIVEC_ABI
13029 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
13030 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
13031 && int_size_in_bytes (type
) == 16)))
13033 if (named
|| abi
== ABI_V4
)
13037 /* Vector parameters to varargs functions under AIX or Darwin
13038 get passed in memory and possibly also in GPRs. */
13039 int align
, align_words
, n_words
;
13040 machine_mode part_mode
;
13042 /* Vector parameters must be 16-byte aligned. In 32-bit
13043 mode this means we need to take into account the offset
13044 to the parameter save area. In 64-bit mode, they just
13045 have to start on an even word, since the parameter save
13046 area is 16-byte aligned. */
13048 align
= -(rs6000_parm_offset () + cum
->words
) & 3;
13050 align
= cum
->words
& 1;
13051 align_words
= cum
->words
+ align
;
13053 /* Out of registers? Memory, then. */
13054 if (align_words
>= GP_ARG_NUM_REG
)
13057 if (TARGET_32BIT
&& TARGET_POWERPC64
)
13058 return rs6000_mixed_function_arg (mode
, type
, align_words
);
13060 /* The vector value goes in GPRs. Only the part of the
13061 value in GPRs is reported here. */
13063 n_words
= rs6000_arg_size (mode
, type
);
13064 if (align_words
+ n_words
> GP_ARG_NUM_REG
)
13065 /* Fortunately, there are only two possibilities, the value
13066 is either wholly in GPRs or half in GPRs and half not. */
13067 part_mode
= DImode
;
13069 return gen_rtx_REG (part_mode
, GP_ARG_MIN_REG
+ align_words
);
13072 else if (TARGET_SPE_ABI
&& TARGET_SPE
13073 && (SPE_VECTOR_MODE (mode
)
13074 || (TARGET_E500_DOUBLE
&& (mode
== DFmode
13077 || mode
== TCmode
))))
13078 return rs6000_spe_function_arg (cum
, mode
, type
);
13080 else if (abi
== ABI_V4
)
13082 if (abi_v4_pass_in_fpr (mode
))
13084 /* _Decimal128 must use an even/odd register pair. This assumes
13085 that the register number is odd when fregno is odd. */
13086 if (mode
== TDmode
&& (cum
->fregno
% 2) == 1)
13089 if (cum
->fregno
+ (FLOAT128_2REG_P (mode
) ? 1 : 0)
13090 <= FP_ARG_V4_MAX_REG
)
13091 return gen_rtx_REG (mode
, cum
->fregno
);
13097 int n_words
= rs6000_arg_size (mode
, type
);
13098 int gregno
= cum
->sysv_gregno
;
13100 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
13101 (r7,r8) or (r9,r10). As does any other 2 word item such
13102 as complex int due to a historical mistake. */
13104 gregno
+= (1 - gregno
) & 1;
13106 /* Multi-reg args are not split between registers and stack. */
13107 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
13110 if (TARGET_32BIT
&& TARGET_POWERPC64
)
13111 return rs6000_mixed_function_arg (mode
, type
,
13112 gregno
- GP_ARG_MIN_REG
);
13113 return gen_rtx_REG (mode
, gregno
);
13118 int align_words
= rs6000_parm_start (mode
, type
, cum
->words
);
13120 /* _Decimal128 must be passed in an even/odd float register pair.
13121 This assumes that the register number is odd when fregno is odd. */
13122 if (elt_mode
== TDmode
&& (cum
->fregno
% 2) == 1)
13125 if (USE_FP_FOR_ARG_P (cum
, elt_mode
))
13127 rtx rvec
[GP_ARG_NUM_REG
+ AGGR_ARG_NUM_REG
+ 1];
13130 unsigned long n_fpreg
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
13133 /* Do we also need to pass this argument in the parameter
13135 if (type
&& (cum
->nargs_prototype
<= 0
13136 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
13137 && TARGET_XL_COMPAT
13138 && align_words
>= GP_ARG_NUM_REG
)))
13139 k
= rs6000_psave_function_arg (mode
, type
, align_words
, rvec
);
13141 /* Describe where this argument goes in the fprs. */
13142 for (i
= 0; i
< n_elts
13143 && cum
->fregno
+ i
* n_fpreg
<= FP_ARG_MAX_REG
; i
++)
13145 /* Check if the argument is split over registers and memory.
13146 This can only ever happen for long double or _Decimal128;
13147 complex types are handled via split_complex_arg. */
13148 machine_mode fmode
= elt_mode
;
13149 if (cum
->fregno
+ (i
+ 1) * n_fpreg
> FP_ARG_MAX_REG
+ 1)
13151 gcc_assert (FLOAT128_2REG_P (fmode
));
13152 fmode
= DECIMAL_FLOAT_MODE_P (fmode
) ? DDmode
: DFmode
;
13155 r
= gen_rtx_REG (fmode
, cum
->fregno
+ i
* n_fpreg
);
13156 off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
13157 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
13160 /* If there were not enough FPRs to hold the argument, the rest
13161 usually goes into memory. However, if the current position
13162 is still within the register parameter area, a portion may
13163 actually have to go into GPRs.
13165 Note that it may happen that the portion of the argument
13166 passed in the first "half" of the first GPR was already
13167 passed in the last FPR as well.
13169 For unnamed arguments, we already set up GPRs to cover the
13170 whole argument in rs6000_psave_function_arg, so there is
13171 nothing further to do at this point. */
13172 fpr_words
= (i
* GET_MODE_SIZE (elt_mode
)) / (TARGET_32BIT
? 4 : 8);
13173 if (i
< n_elts
&& align_words
+ fpr_words
< GP_ARG_NUM_REG
13174 && cum
->nargs_prototype
> 0)
13176 static bool warned
;
13178 machine_mode rmode
= TARGET_32BIT
? SImode
: DImode
;
13179 int n_words
= rs6000_arg_size (mode
, type
);
13181 align_words
+= fpr_words
;
13182 n_words
-= fpr_words
;
13186 r
= gen_rtx_REG (rmode
, GP_ARG_MIN_REG
+ align_words
);
13187 off
= GEN_INT (fpr_words
++ * GET_MODE_SIZE (rmode
));
13188 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
13190 while (++align_words
< GP_ARG_NUM_REG
&& --n_words
!= 0);
13192 if (!warned
&& warn_psabi
)
13195 inform (input_location
,
13196 "the ABI of passing homogeneous float aggregates"
13197 " has changed in GCC 5");
13201 return rs6000_finish_function_arg (mode
, rvec
, k
);
13203 else if (align_words
< GP_ARG_NUM_REG
)
13205 if (TARGET_32BIT
&& TARGET_POWERPC64
)
13206 return rs6000_mixed_function_arg (mode
, type
, align_words
);
13208 return gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
13215 /* For an arg passed partly in registers and partly in memory, this is
13216 the number of bytes passed in registers. For args passed entirely in
13217 registers or entirely in memory, zero. When an arg is described by a
13218 PARALLEL, perhaps using more than one register type, this function
13219 returns the number of bytes used by the first element of the PARALLEL. */
13222 rs6000_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
13223 tree type
, bool named
)
13225 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
13226 bool passed_in_gprs
= true;
13229 machine_mode elt_mode
;
13232 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
13234 if (DEFAULT_ABI
== ABI_V4
)
13237 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
13239 /* If we are passing this arg in the fixed parameter save area (gprs or
13240 memory) as well as VRs, we do not use the partial bytes mechanism;
13241 instead, rs6000_function_arg will return a PARALLEL including a memory
13242 element as necessary. Library support functions for IEEE 128-bit are
13243 assumed to not need the value passed both in GPRs and in vector
13245 if (TARGET_64BIT
&& !cum
->prototype
13246 && (!cum
->libcall
|| !FLOAT128_VECTOR_P (elt_mode
)))
13249 /* Otherwise, we pass in VRs only. Check for partial copies. */
13250 passed_in_gprs
= false;
13251 if (cum
->vregno
+ n_elts
> ALTIVEC_ARG_MAX_REG
+ 1)
13252 ret
= (ALTIVEC_ARG_MAX_REG
+ 1 - cum
->vregno
) * 16;
13255 /* In this complicated case we just disable the partial_nregs code. */
13256 if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
13259 align_words
= rs6000_parm_start (mode
, type
, cum
->words
);
13261 if (USE_FP_FOR_ARG_P (cum
, elt_mode
))
13263 unsigned long n_fpreg
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
13265 /* If we are passing this arg in the fixed parameter save area
13266 (gprs or memory) as well as FPRs, we do not use the partial
13267 bytes mechanism; instead, rs6000_function_arg will return a
13268 PARALLEL including a memory element as necessary. */
13270 && (cum
->nargs_prototype
<= 0
13271 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
13272 && TARGET_XL_COMPAT
13273 && align_words
>= GP_ARG_NUM_REG
)))
13276 /* Otherwise, we pass in FPRs only. Check for partial copies. */
13277 passed_in_gprs
= false;
13278 if (cum
->fregno
+ n_elts
* n_fpreg
> FP_ARG_MAX_REG
+ 1)
13280 /* Compute number of bytes / words passed in FPRs. If there
13281 is still space available in the register parameter area
13282 *after* that amount, a part of the argument will be passed
13283 in GPRs. In that case, the total amount passed in any
13284 registers is equal to the amount that would have been passed
13285 in GPRs if everything were passed there, so we fall back to
13286 the GPR code below to compute the appropriate value. */
13287 int fpr
= ((FP_ARG_MAX_REG
+ 1 - cum
->fregno
)
13288 * MIN (8, GET_MODE_SIZE (elt_mode
)));
13289 int fpr_words
= fpr
/ (TARGET_32BIT
? 4 : 8);
13291 if (align_words
+ fpr_words
< GP_ARG_NUM_REG
)
13292 passed_in_gprs
= true;
13299 && align_words
< GP_ARG_NUM_REG
13300 && GP_ARG_NUM_REG
< align_words
+ rs6000_arg_size (mode
, type
))
13301 ret
= (GP_ARG_NUM_REG
- align_words
) * (TARGET_32BIT
? 4 : 8);
13303 if (ret
!= 0 && TARGET_DEBUG_ARG
)
13304 fprintf (stderr
, "rs6000_arg_partial_bytes: %d\n", ret
);
13309 /* A C expression that indicates when an argument must be passed by
13310 reference. If nonzero for an argument, a copy of that argument is
13311 made in memory and a pointer to the argument is passed instead of
13312 the argument itself. The pointer is passed in whatever way is
13313 appropriate for passing a pointer to that type.
13315 Under V.4, aggregates and long double are passed by reference.
13317 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13318 reference unless the AltiVec vector extension ABI is in force.
13320 As an extension to all ABIs, variable sized types are passed by
13324 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
13325 machine_mode mode
, const_tree type
,
13326 bool named ATTRIBUTE_UNUSED
)
13331 if (DEFAULT_ABI
== ABI_V4
&& TARGET_IEEEQUAD
13332 && FLOAT128_IEEE_P (TYPE_MODE (type
)))
13334 if (TARGET_DEBUG_ARG
)
13335 fprintf (stderr
, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13339 if (DEFAULT_ABI
== ABI_V4
&& AGGREGATE_TYPE_P (type
))
13341 if (TARGET_DEBUG_ARG
)
13342 fprintf (stderr
, "function_arg_pass_by_reference: V4 aggregate\n");
13346 if (int_size_in_bytes (type
) < 0)
13348 if (TARGET_DEBUG_ARG
)
13349 fprintf (stderr
, "function_arg_pass_by_reference: variable size\n");
13353 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
13354 modes only exist for GCC vector types if -maltivec. */
13355 if (TARGET_32BIT
&& !TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
13357 if (TARGET_DEBUG_ARG
)
13358 fprintf (stderr
, "function_arg_pass_by_reference: AltiVec\n");
13362 /* Pass synthetic vectors in memory. */
13363 if (TREE_CODE (type
) == VECTOR_TYPE
13364 && int_size_in_bytes (type
) > (TARGET_ALTIVEC_ABI
? 16 : 8))
13366 static bool warned_for_pass_big_vectors
= false;
13367 if (TARGET_DEBUG_ARG
)
13368 fprintf (stderr
, "function_arg_pass_by_reference: synthetic vector\n");
13369 if (!warned_for_pass_big_vectors
)
13371 warning (OPT_Wpsabi
, "GCC vector passed by reference: "
13372 "non-standard ABI extension with no compatibility guarantee");
13373 warned_for_pass_big_vectors
= true;
13381 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13382 already processes. Return true if the parameter must be passed
13383 (fully or partially) on the stack. */
13386 rs6000_parm_needs_stack (cumulative_args_t args_so_far
, tree type
)
13392 /* Catch errors. */
13393 if (type
== NULL
|| type
== error_mark_node
)
13396 /* Handle types with no storage requirement. */
13397 if (TYPE_MODE (type
) == VOIDmode
)
13400 /* Handle complex types. */
13401 if (TREE_CODE (type
) == COMPLEX_TYPE
)
13402 return (rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (type
))
13403 || rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (type
)));
13405 /* Handle transparent aggregates. */
13406 if ((TREE_CODE (type
) == UNION_TYPE
|| TREE_CODE (type
) == RECORD_TYPE
)
13407 && TYPE_TRANSPARENT_AGGR (type
))
13408 type
= TREE_TYPE (first_field (type
));
13410 /* See if this arg was passed by invisible reference. */
13411 if (pass_by_reference (get_cumulative_args (args_so_far
),
13412 TYPE_MODE (type
), type
, true))
13413 type
= build_pointer_type (type
);
13415 /* Find mode as it is passed by the ABI. */
13416 unsignedp
= TYPE_UNSIGNED (type
);
13417 mode
= promote_mode (type
, TYPE_MODE (type
), &unsignedp
);
13419 /* If we must pass in stack, we need a stack. */
13420 if (rs6000_must_pass_in_stack (mode
, type
))
13423 /* If there is no incoming register, we need a stack. */
13424 entry_parm
= rs6000_function_arg (args_so_far
, mode
, type
, true);
13425 if (entry_parm
== NULL
)
13428 /* Likewise if we need to pass both in registers and on the stack. */
13429 if (GET_CODE (entry_parm
) == PARALLEL
13430 && XEXP (XVECEXP (entry_parm
, 0, 0), 0) == NULL_RTX
)
13433 /* Also true if we're partially in registers and partially not. */
13434 if (rs6000_arg_partial_bytes (args_so_far
, mode
, type
, true) != 0)
13437 /* Update info on where next arg arrives in registers. */
13438 rs6000_function_arg_advance (args_so_far
, mode
, type
, true);
13442 /* Return true if FUN has no prototype, has a variable argument
13443 list, or passes any parameter in memory. */
13446 rs6000_function_parms_need_stack (tree fun
, bool incoming
)
13448 tree fntype
, result
;
13449 CUMULATIVE_ARGS args_so_far_v
;
13450 cumulative_args_t args_so_far
;
13453 /* Must be a libcall, all of which only use reg parms. */
13458 fntype
= TREE_TYPE (fun
);
13460 /* Varargs functions need the parameter save area. */
13461 if ((!incoming
&& !prototype_p (fntype
)) || stdarg_p (fntype
))
13464 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v
, fntype
, NULL_RTX
);
13465 args_so_far
= pack_cumulative_args (&args_so_far_v
);
13467 /* When incoming, we will have been passed the function decl.
13468 It is necessary to use the decl to handle K&R style functions,
13469 where TYPE_ARG_TYPES may not be available. */
13472 gcc_assert (DECL_P (fun
));
13473 result
= DECL_RESULT (fun
);
13476 result
= TREE_TYPE (fntype
);
13478 if (result
&& aggregate_value_p (result
, fntype
))
13480 if (!TYPE_P (result
))
13481 result
= TREE_TYPE (result
);
13482 result
= build_pointer_type (result
);
13483 rs6000_parm_needs_stack (args_so_far
, result
);
13490 for (parm
= DECL_ARGUMENTS (fun
);
13491 parm
&& parm
!= void_list_node
;
13492 parm
= TREE_CHAIN (parm
))
13493 if (rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (parm
)))
13498 function_args_iterator args_iter
;
13501 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
13502 if (rs6000_parm_needs_stack (args_so_far
, arg_type
))
13509 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13510 usually a constant depending on the ABI. However, in the ELFv2 ABI
13511 the register parameter area is optional when calling a function that
13512 has a prototype is scope, has no variable argument list, and passes
13513 all parameters in registers. */
13516 rs6000_reg_parm_stack_space (tree fun
, bool incoming
)
13518 int reg_parm_stack_space
;
13520 switch (DEFAULT_ABI
)
13523 reg_parm_stack_space
= 0;
13528 reg_parm_stack_space
= TARGET_64BIT
? 64 : 32;
13532 /* ??? Recomputing this every time is a bit expensive. Is there
13533 a place to cache this information? */
13534 if (rs6000_function_parms_need_stack (fun
, incoming
))
13535 reg_parm_stack_space
= TARGET_64BIT
? 64 : 32;
13537 reg_parm_stack_space
= 0;
13541 return reg_parm_stack_space
;
13545 rs6000_move_block_from_reg (int regno
, rtx x
, int nregs
)
13548 machine_mode reg_mode
= TARGET_32BIT
? SImode
: DImode
;
13553 for (i
= 0; i
< nregs
; i
++)
13555 rtx tem
= adjust_address_nv (x
, reg_mode
, i
* GET_MODE_SIZE (reg_mode
));
13556 if (reload_completed
)
13558 if (! strict_memory_address_p (reg_mode
, XEXP (tem
, 0)))
13561 tem
= simplify_gen_subreg (reg_mode
, x
, BLKmode
,
13562 i
* GET_MODE_SIZE (reg_mode
));
13565 tem
= replace_equiv_address (tem
, XEXP (tem
, 0));
13569 emit_move_insn (tem
, gen_rtx_REG (reg_mode
, regno
+ i
));
13573 /* Perform any needed actions needed for a function that is receiving a
13574 variable number of arguments.
13578 MODE and TYPE are the mode and type of the current parameter.
13580 PRETEND_SIZE is a variable that should be set to the amount of stack
13581 that must be pushed by the prolog to pretend that our caller pushed
13584 Normally, this macro will push all remaining incoming registers on the
13585 stack and set PRETEND_SIZE to the length of the registers pushed. */
13588 setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
13589 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
13592 CUMULATIVE_ARGS next_cum
;
13593 int reg_size
= TARGET_32BIT
? 4 : 8;
13594 rtx save_area
= NULL_RTX
, mem
;
13595 int first_reg_offset
;
13596 alias_set_type set
;
13598 /* Skip the last named argument. */
13599 next_cum
= *get_cumulative_args (cum
);
13600 rs6000_function_arg_advance_1 (&next_cum
, mode
, type
, true, 0);
13602 if (DEFAULT_ABI
== ABI_V4
)
13604 first_reg_offset
= next_cum
.sysv_gregno
- GP_ARG_MIN_REG
;
13608 int gpr_reg_num
= 0, gpr_size
= 0, fpr_size
= 0;
13609 HOST_WIDE_INT offset
= 0;
13611 /* Try to optimize the size of the varargs save area.
13612 The ABI requires that ap.reg_save_area is doubleword
13613 aligned, but we don't need to allocate space for all
13614 the bytes, only those to which we actually will save
13616 if (cfun
->va_list_gpr_size
&& first_reg_offset
< GP_ARG_NUM_REG
)
13617 gpr_reg_num
= GP_ARG_NUM_REG
- first_reg_offset
;
13618 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
13619 && next_cum
.fregno
<= FP_ARG_V4_MAX_REG
13620 && cfun
->va_list_fpr_size
)
13623 fpr_size
= (next_cum
.fregno
- FP_ARG_MIN_REG
)
13624 * UNITS_PER_FP_WORD
;
13625 if (cfun
->va_list_fpr_size
13626 < FP_ARG_V4_MAX_REG
+ 1 - next_cum
.fregno
)
13627 fpr_size
+= cfun
->va_list_fpr_size
* UNITS_PER_FP_WORD
;
13629 fpr_size
+= (FP_ARG_V4_MAX_REG
+ 1 - next_cum
.fregno
)
13630 * UNITS_PER_FP_WORD
;
13634 offset
= -((first_reg_offset
* reg_size
) & ~7);
13635 if (!fpr_size
&& gpr_reg_num
> cfun
->va_list_gpr_size
)
13637 gpr_reg_num
= cfun
->va_list_gpr_size
;
13638 if (reg_size
== 4 && (first_reg_offset
& 1))
13641 gpr_size
= (gpr_reg_num
* reg_size
+ 7) & ~7;
13644 offset
= - (int) (next_cum
.fregno
- FP_ARG_MIN_REG
)
13645 * UNITS_PER_FP_WORD
13646 - (int) (GP_ARG_NUM_REG
* reg_size
);
13648 if (gpr_size
+ fpr_size
)
13651 = assign_stack_local (BLKmode
, gpr_size
+ fpr_size
, 64);
13652 gcc_assert (GET_CODE (reg_save_area
) == MEM
);
13653 reg_save_area
= XEXP (reg_save_area
, 0);
13654 if (GET_CODE (reg_save_area
) == PLUS
)
13656 gcc_assert (XEXP (reg_save_area
, 0)
13657 == virtual_stack_vars_rtx
);
13658 gcc_assert (GET_CODE (XEXP (reg_save_area
, 1)) == CONST_INT
);
13659 offset
+= INTVAL (XEXP (reg_save_area
, 1));
13662 gcc_assert (reg_save_area
== virtual_stack_vars_rtx
);
13665 cfun
->machine
->varargs_save_offset
= offset
;
13666 save_area
= plus_constant (Pmode
, virtual_stack_vars_rtx
, offset
);
13671 first_reg_offset
= next_cum
.words
;
13672 save_area
= crtl
->args
.internal_arg_pointer
;
13674 if (targetm
.calls
.must_pass_in_stack (mode
, type
))
13675 first_reg_offset
+= rs6000_arg_size (TYPE_MODE (type
), type
);
13678 set
= get_varargs_alias_set ();
13679 if (! no_rtl
&& first_reg_offset
< GP_ARG_NUM_REG
13680 && cfun
->va_list_gpr_size
)
13682 int n_gpr
, nregs
= GP_ARG_NUM_REG
- first_reg_offset
;
13684 if (va_list_gpr_counter_field
)
13685 /* V4 va_list_gpr_size counts number of registers needed. */
13686 n_gpr
= cfun
->va_list_gpr_size
;
13688 /* char * va_list instead counts number of bytes needed. */
13689 n_gpr
= (cfun
->va_list_gpr_size
+ reg_size
- 1) / reg_size
;
13694 mem
= gen_rtx_MEM (BLKmode
,
13695 plus_constant (Pmode
, save_area
,
13696 first_reg_offset
* reg_size
));
13697 MEM_NOTRAP_P (mem
) = 1;
13698 set_mem_alias_set (mem
, set
);
13699 set_mem_align (mem
, BITS_PER_WORD
);
13701 rs6000_move_block_from_reg (GP_ARG_MIN_REG
+ first_reg_offset
, mem
,
13705 /* Save FP registers if needed. */
13706 if (DEFAULT_ABI
== ABI_V4
13707 && TARGET_HARD_FLOAT
&& TARGET_FPRS
13709 && next_cum
.fregno
<= FP_ARG_V4_MAX_REG
13710 && cfun
->va_list_fpr_size
)
13712 int fregno
= next_cum
.fregno
, nregs
;
13713 rtx cr1
= gen_rtx_REG (CCmode
, CR1_REGNO
);
13714 rtx lab
= gen_label_rtx ();
13715 int off
= (GP_ARG_NUM_REG
* reg_size
) + ((fregno
- FP_ARG_MIN_REG
)
13716 * UNITS_PER_FP_WORD
);
13719 (gen_rtx_SET (pc_rtx
,
13720 gen_rtx_IF_THEN_ELSE (VOIDmode
,
13721 gen_rtx_NE (VOIDmode
, cr1
,
13723 gen_rtx_LABEL_REF (VOIDmode
, lab
),
13727 fregno
<= FP_ARG_V4_MAX_REG
&& nregs
< cfun
->va_list_fpr_size
;
13728 fregno
++, off
+= UNITS_PER_FP_WORD
, nregs
++)
13730 mem
= gen_rtx_MEM ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13732 plus_constant (Pmode
, save_area
, off
));
13733 MEM_NOTRAP_P (mem
) = 1;
13734 set_mem_alias_set (mem
, set
);
13735 set_mem_align (mem
, GET_MODE_ALIGNMENT (
13736 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13737 ? DFmode
: SFmode
));
13738 emit_move_insn (mem
, gen_rtx_REG (
13739 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13740 ? DFmode
: SFmode
, fregno
));
13747 /* Create the va_list data type. */
13750 rs6000_build_builtin_va_list (void)
13752 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
, record
, type_decl
;
13754 /* For AIX, prefer 'char *' because that's what the system
13755 header files like. */
13756 if (DEFAULT_ABI
!= ABI_V4
)
13757 return build_pointer_type (char_type_node
);
13759 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
13760 type_decl
= build_decl (BUILTINS_LOCATION
, TYPE_DECL
,
13761 get_identifier ("__va_list_tag"), record
);
13763 f_gpr
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("gpr"),
13764 unsigned_char_type_node
);
13765 f_fpr
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("fpr"),
13766 unsigned_char_type_node
);
13767 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13768 every user file. */
13769 f_res
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13770 get_identifier ("reserved"), short_unsigned_type_node
);
13771 f_ovf
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13772 get_identifier ("overflow_arg_area"),
13774 f_sav
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13775 get_identifier ("reg_save_area"),
13778 va_list_gpr_counter_field
= f_gpr
;
13779 va_list_fpr_counter_field
= f_fpr
;
13781 DECL_FIELD_CONTEXT (f_gpr
) = record
;
13782 DECL_FIELD_CONTEXT (f_fpr
) = record
;
13783 DECL_FIELD_CONTEXT (f_res
) = record
;
13784 DECL_FIELD_CONTEXT (f_ovf
) = record
;
13785 DECL_FIELD_CONTEXT (f_sav
) = record
;
13787 TYPE_STUB_DECL (record
) = type_decl
;
13788 TYPE_NAME (record
) = type_decl
;
13789 TYPE_FIELDS (record
) = f_gpr
;
13790 DECL_CHAIN (f_gpr
) = f_fpr
;
13791 DECL_CHAIN (f_fpr
) = f_res
;
13792 DECL_CHAIN (f_res
) = f_ovf
;
13793 DECL_CHAIN (f_ovf
) = f_sav
;
13795 layout_type (record
);
13797 /* The correct type is an array type of one element. */
13798 return build_array_type (record
, build_index_type (size_zero_node
));
13801 /* Implement va_start. */
13804 rs6000_va_start (tree valist
, rtx nextarg
)
13806 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
13807 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
;
13808 tree gpr
, fpr
, ovf
, sav
, t
;
13810 /* Only SVR4 needs something special. */
13811 if (DEFAULT_ABI
!= ABI_V4
)
13813 std_expand_builtin_va_start (valist
, nextarg
);
13817 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13818 f_fpr
= DECL_CHAIN (f_gpr
);
13819 f_res
= DECL_CHAIN (f_fpr
);
13820 f_ovf
= DECL_CHAIN (f_res
);
13821 f_sav
= DECL_CHAIN (f_ovf
);
13823 valist
= build_simple_mem_ref (valist
);
13824 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13825 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
13827 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
13829 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
13832 /* Count number of gp and fp argument registers used. */
13833 words
= crtl
->args
.info
.words
;
13834 n_gpr
= MIN (crtl
->args
.info
.sysv_gregno
- GP_ARG_MIN_REG
,
13836 n_fpr
= MIN (crtl
->args
.info
.fregno
- FP_ARG_MIN_REG
,
13839 if (TARGET_DEBUG_ARG
)
13840 fprintf (stderr
, "va_start: words = " HOST_WIDE_INT_PRINT_DEC
", n_gpr = "
13841 HOST_WIDE_INT_PRINT_DEC
", n_fpr = " HOST_WIDE_INT_PRINT_DEC
"\n",
13842 words
, n_gpr
, n_fpr
);
13844 if (cfun
->va_list_gpr_size
)
13846 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
13847 build_int_cst (NULL_TREE
, n_gpr
));
13848 TREE_SIDE_EFFECTS (t
) = 1;
13849 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13852 if (cfun
->va_list_fpr_size
)
13854 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
13855 build_int_cst (NULL_TREE
, n_fpr
));
13856 TREE_SIDE_EFFECTS (t
) = 1;
13857 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13859 #ifdef HAVE_AS_GNU_ATTRIBUTE
13860 if (call_ABI_of_interest (cfun
->decl
))
13861 rs6000_passes_float
= true;
13865 /* Find the overflow area. */
13866 t
= make_tree (TREE_TYPE (ovf
), crtl
->args
.internal_arg_pointer
);
13868 t
= fold_build_pointer_plus_hwi (t
, words
* MIN_UNITS_PER_WORD
);
13869 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
13870 TREE_SIDE_EFFECTS (t
) = 1;
13871 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13873 /* If there were no va_arg invocations, don't set up the register
13875 if (!cfun
->va_list_gpr_size
13876 && !cfun
->va_list_fpr_size
13877 && n_gpr
< GP_ARG_NUM_REG
13878 && n_fpr
< FP_ARG_V4_MAX_REG
)
13881 /* Find the register save area. */
13882 t
= make_tree (TREE_TYPE (sav
), virtual_stack_vars_rtx
);
13883 if (cfun
->machine
->varargs_save_offset
)
13884 t
= fold_build_pointer_plus_hwi (t
, cfun
->machine
->varargs_save_offset
);
13885 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
13886 TREE_SIDE_EFFECTS (t
) = 1;
13887 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13890 /* Implement va_arg. */
13893 rs6000_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
13894 gimple_seq
*post_p
)
13896 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
;
13897 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
13898 int size
, rsize
, n_reg
, sav_ofs
, sav_scale
;
13899 tree lab_false
, lab_over
, addr
;
13901 tree ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
13905 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
13907 t
= rs6000_gimplify_va_arg (valist
, ptrtype
, pre_p
, post_p
);
13908 return build_va_arg_indirect_ref (t
);
13911 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13912 earlier version of gcc, with the property that it always applied alignment
13913 adjustments to the va-args (even for zero-sized types). The cheapest way
13914 to deal with this is to replicate the effect of the part of
13915 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13917 We don't need to check for pass-by-reference because of the test above.
13918 We can return a simplifed answer, since we know there's no offset to add. */
13921 && rs6000_darwin64_abi
)
13922 || DEFAULT_ABI
== ABI_ELFv2
13923 || (DEFAULT_ABI
== ABI_AIX
&& !rs6000_compat_align_parm
))
13924 && integer_zerop (TYPE_SIZE (type
)))
13926 unsigned HOST_WIDE_INT align
, boundary
;
13927 tree valist_tmp
= get_initialized_tmp_var (valist
, pre_p
, NULL
);
13928 align
= PARM_BOUNDARY
/ BITS_PER_UNIT
;
13929 boundary
= rs6000_function_arg_boundary (TYPE_MODE (type
), type
);
13930 if (boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
13931 boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
13932 boundary
/= BITS_PER_UNIT
;
13933 if (boundary
> align
)
13936 /* This updates arg ptr by the amount that would be necessary
13937 to align the zero-sized (but not zero-alignment) item. */
13938 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist_tmp
,
13939 fold_build_pointer_plus_hwi (valist_tmp
, boundary
- 1));
13940 gimplify_and_add (t
, pre_p
);
13942 t
= fold_convert (sizetype
, valist_tmp
);
13943 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist_tmp
,
13944 fold_convert (TREE_TYPE (valist
),
13945 fold_build2 (BIT_AND_EXPR
, sizetype
, t
,
13946 size_int (-boundary
))));
13947 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
13948 gimplify_and_add (t
, pre_p
);
13950 /* Since it is zero-sized there's no increment for the item itself. */
13951 valist_tmp
= fold_convert (build_pointer_type (type
), valist_tmp
);
13952 return build_va_arg_indirect_ref (valist_tmp
);
13955 if (DEFAULT_ABI
!= ABI_V4
)
13957 if (targetm
.calls
.split_complex_arg
&& TREE_CODE (type
) == COMPLEX_TYPE
)
13959 tree elem_type
= TREE_TYPE (type
);
13960 machine_mode elem_mode
= TYPE_MODE (elem_type
);
13961 int elem_size
= GET_MODE_SIZE (elem_mode
);
13963 if (elem_size
< UNITS_PER_WORD
)
13965 tree real_part
, imag_part
;
13966 gimple_seq post
= NULL
;
13968 real_part
= rs6000_gimplify_va_arg (valist
, elem_type
, pre_p
,
13970 /* Copy the value into a temporary, lest the formal temporary
13971 be reused out from under us. */
13972 real_part
= get_initialized_tmp_var (real_part
, pre_p
, &post
);
13973 gimple_seq_add_seq (pre_p
, post
);
13975 imag_part
= rs6000_gimplify_va_arg (valist
, elem_type
, pre_p
,
13978 return build2 (COMPLEX_EXPR
, type
, real_part
, imag_part
);
13982 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
13985 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13986 f_fpr
= DECL_CHAIN (f_gpr
);
13987 f_res
= DECL_CHAIN (f_fpr
);
13988 f_ovf
= DECL_CHAIN (f_res
);
13989 f_sav
= DECL_CHAIN (f_ovf
);
13991 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13992 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
13994 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
13996 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
13999 size
= int_size_in_bytes (type
);
14000 rsize
= (size
+ 3) / 4;
14001 int pad
= 4 * rsize
- size
;
14004 machine_mode mode
= TYPE_MODE (type
);
14005 if (abi_v4_pass_in_fpr (mode
))
14007 /* FP args go in FP registers, if present. */
14009 n_reg
= (size
+ 7) / 8;
14010 sav_ofs
= ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? 8 : 4) * 4;
14011 sav_scale
= ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? 8 : 4);
14012 if (mode
!= SFmode
&& mode
!= SDmode
)
14017 /* Otherwise into GP registers. */
14026 /* Pull the value out of the saved registers.... */
14029 addr
= create_tmp_var (ptr_type_node
, "addr");
14031 /* AltiVec vectors never go in registers when -mabi=altivec. */
14032 if (TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
14036 lab_false
= create_artificial_label (input_location
);
14037 lab_over
= create_artificial_label (input_location
);
14039 /* Long long and SPE vectors are aligned in the registers.
14040 As are any other 2 gpr item such as complex int due to a
14041 historical mistake. */
14043 if (n_reg
== 2 && reg
== gpr
)
14046 u
= build2 (BIT_AND_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
14047 build_int_cst (TREE_TYPE (reg
), n_reg
- 1));
14048 u
= build2 (POSTINCREMENT_EXPR
, TREE_TYPE (reg
),
14049 unshare_expr (reg
), u
);
14051 /* _Decimal128 is passed in even/odd fpr pairs; the stored
14052 reg number is 0 for f1, so we want to make it odd. */
14053 else if (reg
== fpr
&& mode
== TDmode
)
14055 t
= build2 (BIT_IOR_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
14056 build_int_cst (TREE_TYPE (reg
), 1));
14057 u
= build2 (MODIFY_EXPR
, void_type_node
, unshare_expr (reg
), t
);
14060 t
= fold_convert (TREE_TYPE (reg
), size_int (8 - n_reg
+ 1));
14061 t
= build2 (GE_EXPR
, boolean_type_node
, u
, t
);
14062 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
14063 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
14064 gimplify_and_add (t
, pre_p
);
14068 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
14070 u
= build2 (POSTINCREMENT_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
14071 build_int_cst (TREE_TYPE (reg
), n_reg
));
14072 u
= fold_convert (sizetype
, u
);
14073 u
= build2 (MULT_EXPR
, sizetype
, u
, size_int (sav_scale
));
14074 t
= fold_build_pointer_plus (t
, u
);
14076 /* _Decimal32 varargs are located in the second word of the 64-bit
14077 FP register for 32-bit binaries. */
14079 && TARGET_HARD_FLOAT
&& TARGET_FPRS
14081 t
= fold_build_pointer_plus_hwi (t
, size
);
14083 /* Args are passed right-aligned. */
14084 if (BYTES_BIG_ENDIAN
)
14085 t
= fold_build_pointer_plus_hwi (t
, pad
);
14087 gimplify_assign (addr
, t
, pre_p
);
14089 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
14091 stmt
= gimple_build_label (lab_false
);
14092 gimple_seq_add_stmt (pre_p
, stmt
);
14094 if ((n_reg
== 2 && !regalign
) || n_reg
> 2)
14096 /* Ensure that we don't find any more args in regs.
14097 Alignment has taken care of for special cases. */
14098 gimplify_assign (reg
, build_int_cst (TREE_TYPE (reg
), 8), pre_p
);
14102 /* ... otherwise out of the overflow area. */
14104 /* Care for on-stack alignment if needed. */
14108 t
= fold_build_pointer_plus_hwi (t
, align
- 1);
14109 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
14110 build_int_cst (TREE_TYPE (t
), -align
));
14113 /* Args are passed right-aligned. */
14114 if (BYTES_BIG_ENDIAN
)
14115 t
= fold_build_pointer_plus_hwi (t
, pad
);
14117 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
14119 gimplify_assign (unshare_expr (addr
), t
, pre_p
);
14121 t
= fold_build_pointer_plus_hwi (t
, size
);
14122 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
14126 stmt
= gimple_build_label (lab_over
);
14127 gimple_seq_add_stmt (pre_p
, stmt
);
14130 if (STRICT_ALIGNMENT
14131 && (TYPE_ALIGN (type
)
14132 > (unsigned) BITS_PER_UNIT
* (align
< 4 ? 4 : align
)))
14134 /* The value (of type complex double, for example) may not be
14135 aligned in memory in the saved registers, so copy via a
14136 temporary. (This is the same code as used for SPARC.) */
14137 tree tmp
= create_tmp_var (type
, "va_arg_tmp");
14138 tree dest_addr
= build_fold_addr_expr (tmp
);
14140 tree copy
= build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
14141 3, dest_addr
, addr
, size_int (rsize
* 4));
14143 gimplify_and_add (copy
, pre_p
);
14147 addr
= fold_convert (ptrtype
, addr
);
14148 return build_va_arg_indirect_ref (addr
);
14154 def_builtin (const char *name
, tree type
, enum rs6000_builtins code
)
14157 unsigned classify
= rs6000_builtin_info
[(int)code
].attr
;
14158 const char *attr_string
= "";
14160 gcc_assert (name
!= NULL
);
14161 gcc_assert (IN_RANGE ((int)code
, 0, (int)RS6000_BUILTIN_COUNT
));
14163 if (rs6000_builtin_decls
[(int)code
])
14164 fatal_error (input_location
,
14165 "internal error: builtin function %s already processed", name
);
14167 rs6000_builtin_decls
[(int)code
] = t
=
14168 add_builtin_function (name
, type
, (int)code
, BUILT_IN_MD
, NULL
, NULL_TREE
);
14170 /* Set any special attributes. */
14171 if ((classify
& RS6000_BTC_CONST
) != 0)
14173 /* const function, function only depends on the inputs. */
14174 TREE_READONLY (t
) = 1;
14175 TREE_NOTHROW (t
) = 1;
14176 attr_string
= ", const";
14178 else if ((classify
& RS6000_BTC_PURE
) != 0)
14180 /* pure function, function can read global memory, but does not set any
14182 DECL_PURE_P (t
) = 1;
14183 TREE_NOTHROW (t
) = 1;
14184 attr_string
= ", pure";
14186 else if ((classify
& RS6000_BTC_FP
) != 0)
14188 /* Function is a math function. If rounding mode is on, then treat the
14189 function as not reading global memory, but it can have arbitrary side
14190 effects. If it is off, then assume the function is a const function.
14191 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14192 builtin-attribute.def that is used for the math functions. */
14193 TREE_NOTHROW (t
) = 1;
14194 if (flag_rounding_math
)
14196 DECL_PURE_P (t
) = 1;
14197 DECL_IS_NOVOPS (t
) = 1;
14198 attr_string
= ", fp, pure";
14202 TREE_READONLY (t
) = 1;
14203 attr_string
= ", fp, const";
14206 else if ((classify
& RS6000_BTC_ATTR_MASK
) != 0)
14207 gcc_unreachable ();
14209 if (TARGET_DEBUG_BUILTIN
)
14210 fprintf (stderr
, "rs6000_builtin, code = %4d, %s%s\n",
14211 (int)code
, name
, attr_string
);
14214 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
14216 #undef RS6000_BUILTIN_0
14217 #undef RS6000_BUILTIN_1
14218 #undef RS6000_BUILTIN_2
14219 #undef RS6000_BUILTIN_3
14220 #undef RS6000_BUILTIN_A
14221 #undef RS6000_BUILTIN_D
14222 #undef RS6000_BUILTIN_E
14223 #undef RS6000_BUILTIN_H
14224 #undef RS6000_BUILTIN_P
14225 #undef RS6000_BUILTIN_Q
14226 #undef RS6000_BUILTIN_S
14227 #undef RS6000_BUILTIN_X
14229 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14230 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14231 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14232 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14233 { MASK, ICODE, NAME, ENUM },
14235 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14236 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14237 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14238 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14239 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14240 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14241 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14242 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14244 static const struct builtin_description bdesc_3arg
[] =
14246 #include "powerpcspe-builtin.def"
14249 /* DST operations: void foo (void *, const int, const char). */
14251 #undef RS6000_BUILTIN_0
14252 #undef RS6000_BUILTIN_1
14253 #undef RS6000_BUILTIN_2
14254 #undef RS6000_BUILTIN_3
14255 #undef RS6000_BUILTIN_A
14256 #undef RS6000_BUILTIN_D
14257 #undef RS6000_BUILTIN_E
14258 #undef RS6000_BUILTIN_H
14259 #undef RS6000_BUILTIN_P
14260 #undef RS6000_BUILTIN_Q
14261 #undef RS6000_BUILTIN_S
14262 #undef RS6000_BUILTIN_X
14264 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14265 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14266 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14267 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14268 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14269 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14270 { MASK, ICODE, NAME, ENUM },
14272 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14273 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14274 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14275 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14276 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14277 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14279 static const struct builtin_description bdesc_dst
[] =
14281 #include "powerpcspe-builtin.def"
14284 /* Simple binary operations: VECc = foo (VECa, VECb). */
14286 #undef RS6000_BUILTIN_0
14287 #undef RS6000_BUILTIN_1
14288 #undef RS6000_BUILTIN_2
14289 #undef RS6000_BUILTIN_3
14290 #undef RS6000_BUILTIN_A
14291 #undef RS6000_BUILTIN_D
14292 #undef RS6000_BUILTIN_E
14293 #undef RS6000_BUILTIN_H
14294 #undef RS6000_BUILTIN_P
14295 #undef RS6000_BUILTIN_Q
14296 #undef RS6000_BUILTIN_S
14297 #undef RS6000_BUILTIN_X
14299 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14300 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14301 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14302 { MASK, ICODE, NAME, ENUM },
14304 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14305 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14306 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14307 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14308 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14309 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14310 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14311 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14312 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14314 static const struct builtin_description bdesc_2arg
[] =
14316 #include "powerpcspe-builtin.def"
14319 #undef RS6000_BUILTIN_0
14320 #undef RS6000_BUILTIN_1
14321 #undef RS6000_BUILTIN_2
14322 #undef RS6000_BUILTIN_3
14323 #undef RS6000_BUILTIN_A
14324 #undef RS6000_BUILTIN_D
14325 #undef RS6000_BUILTIN_E
14326 #undef RS6000_BUILTIN_H
14327 #undef RS6000_BUILTIN_P
14328 #undef RS6000_BUILTIN_Q
14329 #undef RS6000_BUILTIN_S
14330 #undef RS6000_BUILTIN_X
14332 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14333 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14334 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14335 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14336 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14337 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14338 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14339 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14340 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14341 { MASK, ICODE, NAME, ENUM },
14343 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14344 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14345 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14347 /* AltiVec predicates. */
14349 static const struct builtin_description bdesc_altivec_preds
[] =
14351 #include "powerpcspe-builtin.def"
14354 /* SPE predicates. */
14355 #undef RS6000_BUILTIN_0
14356 #undef RS6000_BUILTIN_1
14357 #undef RS6000_BUILTIN_2
14358 #undef RS6000_BUILTIN_3
14359 #undef RS6000_BUILTIN_A
14360 #undef RS6000_BUILTIN_D
14361 #undef RS6000_BUILTIN_E
14362 #undef RS6000_BUILTIN_H
14363 #undef RS6000_BUILTIN_P
14364 #undef RS6000_BUILTIN_Q
14365 #undef RS6000_BUILTIN_S
14366 #undef RS6000_BUILTIN_X
14368 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14369 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14370 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14371 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14372 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14373 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14374 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14375 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14376 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14377 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14378 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14379 { MASK, ICODE, NAME, ENUM },
14381 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14383 static const struct builtin_description bdesc_spe_predicates
[] =
14385 #include "powerpcspe-builtin.def"
14388 /* SPE evsel predicates. */
14389 #undef RS6000_BUILTIN_0
14390 #undef RS6000_BUILTIN_1
14391 #undef RS6000_BUILTIN_2
14392 #undef RS6000_BUILTIN_3
14393 #undef RS6000_BUILTIN_A
14394 #undef RS6000_BUILTIN_D
14395 #undef RS6000_BUILTIN_E
14396 #undef RS6000_BUILTIN_H
14397 #undef RS6000_BUILTIN_P
14398 #undef RS6000_BUILTIN_Q
14399 #undef RS6000_BUILTIN_S
14400 #undef RS6000_BUILTIN_X
14402 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14403 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14404 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14405 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14406 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14407 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14408 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14409 { MASK, ICODE, NAME, ENUM },
14411 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14412 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14413 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14414 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14415 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14417 static const struct builtin_description bdesc_spe_evsel
[] =
14419 #include "powerpcspe-builtin.def"
14422 /* PAIRED predicates. */
14423 #undef RS6000_BUILTIN_0
14424 #undef RS6000_BUILTIN_1
14425 #undef RS6000_BUILTIN_2
14426 #undef RS6000_BUILTIN_3
14427 #undef RS6000_BUILTIN_A
14428 #undef RS6000_BUILTIN_D
14429 #undef RS6000_BUILTIN_E
14430 #undef RS6000_BUILTIN_H
14431 #undef RS6000_BUILTIN_P
14432 #undef RS6000_BUILTIN_Q
14433 #undef RS6000_BUILTIN_S
14434 #undef RS6000_BUILTIN_X
14436 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14437 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14438 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14439 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14440 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14441 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14442 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14443 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14444 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14445 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14446 { MASK, ICODE, NAME, ENUM },
14448 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14449 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14451 static const struct builtin_description bdesc_paired_preds
[] =
14453 #include "powerpcspe-builtin.def"
14456 /* ABS* operations. */
14458 #undef RS6000_BUILTIN_0
14459 #undef RS6000_BUILTIN_1
14460 #undef RS6000_BUILTIN_2
14461 #undef RS6000_BUILTIN_3
14462 #undef RS6000_BUILTIN_A
14463 #undef RS6000_BUILTIN_D
14464 #undef RS6000_BUILTIN_E
14465 #undef RS6000_BUILTIN_H
14466 #undef RS6000_BUILTIN_P
14467 #undef RS6000_BUILTIN_Q
14468 #undef RS6000_BUILTIN_S
14469 #undef RS6000_BUILTIN_X
14471 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14472 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14473 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14474 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14475 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14476 { MASK, ICODE, NAME, ENUM },
14478 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14479 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14480 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14481 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14482 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14483 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14484 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14486 static const struct builtin_description bdesc_abs
[] =
14488 #include "powerpcspe-builtin.def"
14491 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14494 #undef RS6000_BUILTIN_0
14495 #undef RS6000_BUILTIN_1
14496 #undef RS6000_BUILTIN_2
14497 #undef RS6000_BUILTIN_3
14498 #undef RS6000_BUILTIN_A
14499 #undef RS6000_BUILTIN_D
14500 #undef RS6000_BUILTIN_E
14501 #undef RS6000_BUILTIN_H
14502 #undef RS6000_BUILTIN_P
14503 #undef RS6000_BUILTIN_Q
14504 #undef RS6000_BUILTIN_S
14505 #undef RS6000_BUILTIN_X
14507 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14508 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14509 { MASK, ICODE, NAME, ENUM },
14511 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14512 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14513 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14514 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14515 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14516 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14517 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14518 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14519 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14520 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14522 static const struct builtin_description bdesc_1arg
[] =
14524 #include "powerpcspe-builtin.def"
14527 /* Simple no-argument operations: result = __builtin_darn_32 () */
14529 #undef RS6000_BUILTIN_0
14530 #undef RS6000_BUILTIN_1
14531 #undef RS6000_BUILTIN_2
14532 #undef RS6000_BUILTIN_3
14533 #undef RS6000_BUILTIN_A
14534 #undef RS6000_BUILTIN_D
14535 #undef RS6000_BUILTIN_E
14536 #undef RS6000_BUILTIN_H
14537 #undef RS6000_BUILTIN_P
14538 #undef RS6000_BUILTIN_Q
14539 #undef RS6000_BUILTIN_S
14540 #undef RS6000_BUILTIN_X
14542 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14543 { MASK, ICODE, NAME, ENUM },
14545 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14546 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14547 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14548 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14549 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14550 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14551 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14552 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14553 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14554 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14555 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14557 static const struct builtin_description bdesc_0arg
[] =
14559 #include "powerpcspe-builtin.def"
14562 /* HTM builtins. */
14563 #undef RS6000_BUILTIN_0
14564 #undef RS6000_BUILTIN_1
14565 #undef RS6000_BUILTIN_2
14566 #undef RS6000_BUILTIN_3
14567 #undef RS6000_BUILTIN_A
14568 #undef RS6000_BUILTIN_D
14569 #undef RS6000_BUILTIN_E
14570 #undef RS6000_BUILTIN_H
14571 #undef RS6000_BUILTIN_P
14572 #undef RS6000_BUILTIN_Q
14573 #undef RS6000_BUILTIN_S
14574 #undef RS6000_BUILTIN_X
14576 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14577 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14578 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14579 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14580 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14581 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14582 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14583 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14584 { MASK, ICODE, NAME, ENUM },
14586 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14587 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14588 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14589 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14591 static const struct builtin_description bdesc_htm
[] =
14593 #include "powerpcspe-builtin.def"
14596 #undef RS6000_BUILTIN_0
14597 #undef RS6000_BUILTIN_1
14598 #undef RS6000_BUILTIN_2
14599 #undef RS6000_BUILTIN_3
14600 #undef RS6000_BUILTIN_A
14601 #undef RS6000_BUILTIN_D
14602 #undef RS6000_BUILTIN_E
14603 #undef RS6000_BUILTIN_H
14604 #undef RS6000_BUILTIN_P
14605 #undef RS6000_BUILTIN_Q
14606 #undef RS6000_BUILTIN_S
14608 /* Return true if a builtin function is overloaded. */
14610 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode
)
14612 return (rs6000_builtin_info
[(int)fncode
].attr
& RS6000_BTC_OVERLOADED
) != 0;
14616 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode
)
14618 return rs6000_builtin_info
[(int)fncode
].name
;
14621 /* Expand an expression EXP that calls a builtin without arguments. */
14623 rs6000_expand_zeroop_builtin (enum insn_code icode
, rtx target
)
14626 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14628 if (icode
== CODE_FOR_nothing
)
14629 /* Builtin not supported on this processor. */
14633 || GET_MODE (target
) != tmode
14634 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14635 target
= gen_reg_rtx (tmode
);
14637 pat
= GEN_FCN (icode
) (target
);
14647 rs6000_expand_mtfsf_builtin (enum insn_code icode
, tree exp
)
14650 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14651 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14652 rtx op0
= expand_normal (arg0
);
14653 rtx op1
= expand_normal (arg1
);
14654 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
14655 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
14657 if (icode
== CODE_FOR_nothing
)
14658 /* Builtin not supported on this processor. */
14661 /* If we got invalid arguments bail out before generating bad rtl. */
14662 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14665 if (GET_CODE (op0
) != CONST_INT
14666 || INTVAL (op0
) > 255
14667 || INTVAL (op0
) < 0)
14669 error ("argument 1 must be an 8-bit field value");
14673 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14674 op0
= copy_to_mode_reg (mode0
, op0
);
14676 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14677 op1
= copy_to_mode_reg (mode1
, op1
);
14679 pat
= GEN_FCN (icode
) (op0
, op1
);
14688 rs6000_expand_unop_builtin (enum insn_code icode
, tree exp
, rtx target
)
14691 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14692 rtx op0
= expand_normal (arg0
);
14693 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14694 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14696 if (icode
== CODE_FOR_nothing
)
14697 /* Builtin not supported on this processor. */
14700 /* If we got invalid arguments bail out before generating bad rtl. */
14701 if (arg0
== error_mark_node
)
14704 if (icode
== CODE_FOR_altivec_vspltisb
14705 || icode
== CODE_FOR_altivec_vspltish
14706 || icode
== CODE_FOR_altivec_vspltisw
14707 || icode
== CODE_FOR_spe_evsplatfi
14708 || icode
== CODE_FOR_spe_evsplati
)
14710 /* Only allow 5-bit *signed* literals. */
14711 if (GET_CODE (op0
) != CONST_INT
14712 || INTVAL (op0
) > 15
14713 || INTVAL (op0
) < -16)
14715 error ("argument 1 must be a 5-bit signed literal");
14716 return CONST0_RTX (tmode
);
14721 || GET_MODE (target
) != tmode
14722 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14723 target
= gen_reg_rtx (tmode
);
14725 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14726 op0
= copy_to_mode_reg (mode0
, op0
);
14728 pat
= GEN_FCN (icode
) (target
, op0
);
14737 altivec_expand_abs_builtin (enum insn_code icode
, tree exp
, rtx target
)
14739 rtx pat
, scratch1
, scratch2
;
14740 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14741 rtx op0
= expand_normal (arg0
);
14742 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14743 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14745 /* If we have invalid arguments, bail out before generating bad rtl. */
14746 if (arg0
== error_mark_node
)
14750 || GET_MODE (target
) != tmode
14751 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14752 target
= gen_reg_rtx (tmode
);
14754 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14755 op0
= copy_to_mode_reg (mode0
, op0
);
14757 scratch1
= gen_reg_rtx (mode0
);
14758 scratch2
= gen_reg_rtx (mode0
);
14760 pat
= GEN_FCN (icode
) (target
, op0
, scratch1
, scratch2
);
14769 rs6000_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
14772 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14773 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14774 rtx op0
= expand_normal (arg0
);
14775 rtx op1
= expand_normal (arg1
);
14776 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14777 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14778 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14780 if (icode
== CODE_FOR_nothing
)
14781 /* Builtin not supported on this processor. */
14784 /* If we got invalid arguments bail out before generating bad rtl. */
14785 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14788 if (icode
== CODE_FOR_altivec_vcfux
14789 || icode
== CODE_FOR_altivec_vcfsx
14790 || icode
== CODE_FOR_altivec_vctsxs
14791 || icode
== CODE_FOR_altivec_vctuxs
14792 || icode
== CODE_FOR_altivec_vspltb
14793 || icode
== CODE_FOR_altivec_vsplth
14794 || icode
== CODE_FOR_altivec_vspltw
14795 || icode
== CODE_FOR_spe_evaddiw
14796 || icode
== CODE_FOR_spe_evldd
14797 || icode
== CODE_FOR_spe_evldh
14798 || icode
== CODE_FOR_spe_evldw
14799 || icode
== CODE_FOR_spe_evlhhesplat
14800 || icode
== CODE_FOR_spe_evlhhossplat
14801 || icode
== CODE_FOR_spe_evlhhousplat
14802 || icode
== CODE_FOR_spe_evlwhe
14803 || icode
== CODE_FOR_spe_evlwhos
14804 || icode
== CODE_FOR_spe_evlwhou
14805 || icode
== CODE_FOR_spe_evlwhsplat
14806 || icode
== CODE_FOR_spe_evlwwsplat
14807 || icode
== CODE_FOR_spe_evrlwi
14808 || icode
== CODE_FOR_spe_evslwi
14809 || icode
== CODE_FOR_spe_evsrwis
14810 || icode
== CODE_FOR_spe_evsubifw
14811 || icode
== CODE_FOR_spe_evsrwiu
)
14813 /* Only allow 5-bit unsigned literals. */
14815 if (TREE_CODE (arg1
) != INTEGER_CST
14816 || TREE_INT_CST_LOW (arg1
) & ~0x1f)
14818 error ("argument 2 must be a 5-bit unsigned literal");
14819 return CONST0_RTX (tmode
);
14822 else if (icode
== CODE_FOR_dfptstsfi_eq_dd
14823 || icode
== CODE_FOR_dfptstsfi_lt_dd
14824 || icode
== CODE_FOR_dfptstsfi_gt_dd
14825 || icode
== CODE_FOR_dfptstsfi_unordered_dd
14826 || icode
== CODE_FOR_dfptstsfi_eq_td
14827 || icode
== CODE_FOR_dfptstsfi_lt_td
14828 || icode
== CODE_FOR_dfptstsfi_gt_td
14829 || icode
== CODE_FOR_dfptstsfi_unordered_td
)
14831 /* Only allow 6-bit unsigned literals. */
14833 if (TREE_CODE (arg0
) != INTEGER_CST
14834 || !IN_RANGE (TREE_INT_CST_LOW (arg0
), 0, 63))
14836 error ("argument 1 must be a 6-bit unsigned literal");
14837 return CONST0_RTX (tmode
);
14840 else if (icode
== CODE_FOR_xststdcdp
14841 || icode
== CODE_FOR_xststdcsp
14842 || icode
== CODE_FOR_xvtstdcdp
14843 || icode
== CODE_FOR_xvtstdcsp
)
14845 /* Only allow 7-bit unsigned literals. */
14847 if (TREE_CODE (arg1
) != INTEGER_CST
14848 || !IN_RANGE (TREE_INT_CST_LOW (arg1
), 0, 127))
14850 error ("argument 2 must be a 7-bit unsigned literal");
14851 return CONST0_RTX (tmode
);
14856 || GET_MODE (target
) != tmode
14857 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14858 target
= gen_reg_rtx (tmode
);
14860 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14861 op0
= copy_to_mode_reg (mode0
, op0
);
14862 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14863 op1
= copy_to_mode_reg (mode1
, op1
);
14865 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14874 altivec_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
14877 tree cr6_form
= CALL_EXPR_ARG (exp
, 0);
14878 tree arg0
= CALL_EXPR_ARG (exp
, 1);
14879 tree arg1
= CALL_EXPR_ARG (exp
, 2);
14880 rtx op0
= expand_normal (arg0
);
14881 rtx op1
= expand_normal (arg1
);
14882 machine_mode tmode
= SImode
;
14883 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14884 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14887 if (TREE_CODE (cr6_form
) != INTEGER_CST
)
14889 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14893 cr6_form_int
= TREE_INT_CST_LOW (cr6_form
);
14895 gcc_assert (mode0
== mode1
);
14897 /* If we have invalid arguments, bail out before generating bad rtl. */
14898 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14902 || GET_MODE (target
) != tmode
14903 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14904 target
= gen_reg_rtx (tmode
);
14906 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14907 op0
= copy_to_mode_reg (mode0
, op0
);
14908 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14909 op1
= copy_to_mode_reg (mode1
, op1
);
14911 /* Note that for many of the relevant operations (e.g. cmpne or
14912 cmpeq) with float or double operands, it makes more sense for the
14913 mode of the allocated scratch register to select a vector of
14914 integer. But the choice to copy the mode of operand 0 was made
14915 long ago and there are no plans to change it. */
14916 scratch
= gen_reg_rtx (mode0
);
14918 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
14923 /* The vec_any* and vec_all* predicates use the same opcodes for two
14924 different operations, but the bits in CR6 will be different
14925 depending on what information we want. So we have to play tricks
14926 with CR6 to get the right bits out.
14928 If you think this is disgusting, look at the specs for the
14929 AltiVec predicates. */
14931 switch (cr6_form_int
)
14934 emit_insn (gen_cr6_test_for_zero (target
));
14937 emit_insn (gen_cr6_test_for_zero_reverse (target
));
14940 emit_insn (gen_cr6_test_for_lt (target
));
14943 emit_insn (gen_cr6_test_for_lt_reverse (target
));
14946 error ("argument 1 of __builtin_altivec_predicate is out of range");
14954 paired_expand_lv_builtin (enum insn_code icode
, tree exp
, rtx target
)
14957 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14958 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14959 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14960 machine_mode mode0
= Pmode
;
14961 machine_mode mode1
= Pmode
;
14962 rtx op0
= expand_normal (arg0
);
14963 rtx op1
= expand_normal (arg1
);
14965 if (icode
== CODE_FOR_nothing
)
14966 /* Builtin not supported on this processor. */
14969 /* If we got invalid arguments bail out before generating bad rtl. */
14970 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14974 || GET_MODE (target
) != tmode
14975 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14976 target
= gen_reg_rtx (tmode
);
14978 op1
= copy_to_mode_reg (mode1
, op1
);
14980 if (op0
== const0_rtx
)
14982 addr
= gen_rtx_MEM (tmode
, op1
);
14986 op0
= copy_to_mode_reg (mode0
, op0
);
14987 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op0
, op1
));
14990 pat
= GEN_FCN (icode
) (target
, addr
);
14999 /* Return a constant vector for use as a little-endian permute control vector
15000 to reverse the order of elements of the given vector mode. */
15002 swap_selector_for_mode (machine_mode mode
)
15004 /* These are little endian vectors, so their elements are reversed
15005 from what you would normally expect for a permute control vector. */
15006 unsigned int swap2
[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
15007 unsigned int swap4
[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
15008 unsigned int swap8
[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
15009 unsigned int swap16
[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
15010 unsigned int *swaparray
, i
;
15027 swaparray
= swap16
;
15030 gcc_unreachable ();
15033 for (i
= 0; i
< 16; ++i
)
15034 perm
[i
] = GEN_INT (swaparray
[i
]);
15036 return force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
)));
15039 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
15040 with -maltivec=be specified. Issue the load followed by an element-
15041 reversing permute. */
15043 altivec_expand_lvx_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
15045 rtx tmp
= gen_reg_rtx (mode
);
15046 rtx load
= gen_rtx_SET (tmp
, op1
);
15047 rtx lvx
= gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), unspec
);
15048 rtx par
= gen_rtx_PARALLEL (mode
, gen_rtvec (2, load
, lvx
));
15049 rtx sel
= swap_selector_for_mode (mode
);
15050 rtx vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, tmp
, tmp
, sel
), UNSPEC_VPERM
);
15052 gcc_assert (REG_P (op0
));
15054 emit_insn (gen_rtx_SET (op0
, vperm
));
15057 /* Generate code for a "stvxl" built-in for a little endian target with
15058 -maltivec=be specified. Issue the store preceded by an element-reversing
15061 altivec_expand_stvx_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
15063 rtx tmp
= gen_reg_rtx (mode
);
15064 rtx store
= gen_rtx_SET (op0
, tmp
);
15065 rtx stvx
= gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), unspec
);
15066 rtx par
= gen_rtx_PARALLEL (mode
, gen_rtvec (2, store
, stvx
));
15067 rtx sel
= swap_selector_for_mode (mode
);
15070 gcc_assert (REG_P (op1
));
15071 vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op1
, sel
), UNSPEC_VPERM
);
15072 emit_insn (gen_rtx_SET (tmp
, vperm
));
15076 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
15077 specified. Issue the store preceded by an element-reversing permute. */
15079 altivec_expand_stvex_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
15081 machine_mode inner_mode
= GET_MODE_INNER (mode
);
15082 rtx tmp
= gen_reg_rtx (mode
);
15083 rtx stvx
= gen_rtx_UNSPEC (inner_mode
, gen_rtvec (1, tmp
), unspec
);
15084 rtx sel
= swap_selector_for_mode (mode
);
15087 gcc_assert (REG_P (op1
));
15088 vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op1
, sel
), UNSPEC_VPERM
);
15089 emit_insn (gen_rtx_SET (tmp
, vperm
));
15090 emit_insn (gen_rtx_SET (op0
, stvx
));
15094 altivec_expand_lv_builtin (enum insn_code icode
, tree exp
, rtx target
, bool blk
)
15097 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15098 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15099 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15100 machine_mode mode0
= Pmode
;
15101 machine_mode mode1
= Pmode
;
15102 rtx op0
= expand_normal (arg0
);
15103 rtx op1
= expand_normal (arg1
);
15105 if (icode
== CODE_FOR_nothing
)
15106 /* Builtin not supported on this processor. */
15109 /* If we got invalid arguments bail out before generating bad rtl. */
15110 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
15114 || GET_MODE (target
) != tmode
15115 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15116 target
= gen_reg_rtx (tmode
);
15118 op1
= copy_to_mode_reg (mode1
, op1
);
15120 /* For LVX, express the RTL accurately by ANDing the address with -16.
15121 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
15122 so the raw address is fine. */
15123 if (icode
== CODE_FOR_altivec_lvx_v2df_2op
15124 || icode
== CODE_FOR_altivec_lvx_v2di_2op
15125 || icode
== CODE_FOR_altivec_lvx_v4sf_2op
15126 || icode
== CODE_FOR_altivec_lvx_v4si_2op
15127 || icode
== CODE_FOR_altivec_lvx_v8hi_2op
15128 || icode
== CODE_FOR_altivec_lvx_v16qi_2op
)
15131 if (op0
== const0_rtx
)
15135 op0
= copy_to_mode_reg (mode0
, op0
);
15136 rawaddr
= gen_rtx_PLUS (Pmode
, op1
, op0
);
15138 addr
= gen_rtx_AND (Pmode
, rawaddr
, gen_rtx_CONST_INT (Pmode
, -16));
15139 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
, addr
);
15141 /* For -maltivec=be, emit the load and follow it up with a
15142 permute to swap the elements. */
15143 if (!BYTES_BIG_ENDIAN
&& VECTOR_ELT_ORDER_BIG
)
15145 rtx temp
= gen_reg_rtx (tmode
);
15146 emit_insn (gen_rtx_SET (temp
, addr
));
15148 rtx sel
= swap_selector_for_mode (tmode
);
15149 rtx vperm
= gen_rtx_UNSPEC (tmode
, gen_rtvec (3, temp
, temp
, sel
),
15151 emit_insn (gen_rtx_SET (target
, vperm
));
15154 emit_insn (gen_rtx_SET (target
, addr
));
15158 if (op0
== const0_rtx
)
15159 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
, op1
);
15162 op0
= copy_to_mode_reg (mode0
, op0
);
15163 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
,
15164 gen_rtx_PLUS (Pmode
, op1
, op0
));
15167 pat
= GEN_FCN (icode
) (target
, addr
);
15177 spe_expand_stv_builtin (enum insn_code icode
, tree exp
)
15179 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15180 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15181 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15182 rtx op0
= expand_normal (arg0
);
15183 rtx op1
= expand_normal (arg1
);
15184 rtx op2
= expand_normal (arg2
);
15186 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15187 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15188 machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
15190 /* Invalid arguments. Bail before doing anything stoopid! */
15191 if (arg0
== error_mark_node
15192 || arg1
== error_mark_node
15193 || arg2
== error_mark_node
)
15196 if (! (*insn_data
[icode
].operand
[2].predicate
) (op0
, mode2
))
15197 op0
= copy_to_mode_reg (mode2
, op0
);
15198 if (! (*insn_data
[icode
].operand
[0].predicate
) (op1
, mode0
))
15199 op1
= copy_to_mode_reg (mode0
, op1
);
15200 if (! (*insn_data
[icode
].operand
[1].predicate
) (op2
, mode1
))
15201 op2
= copy_to_mode_reg (mode1
, op2
);
15203 pat
= GEN_FCN (icode
) (op1
, op2
, op0
);
15210 paired_expand_stv_builtin (enum insn_code icode
, tree exp
)
15212 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15213 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15214 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15215 rtx op0
= expand_normal (arg0
);
15216 rtx op1
= expand_normal (arg1
);
15217 rtx op2
= expand_normal (arg2
);
15219 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15220 machine_mode mode1
= Pmode
;
15221 machine_mode mode2
= Pmode
;
15223 /* Invalid arguments. Bail before doing anything stoopid! */
15224 if (arg0
== error_mark_node
15225 || arg1
== error_mark_node
15226 || arg2
== error_mark_node
)
15229 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, tmode
))
15230 op0
= copy_to_mode_reg (tmode
, op0
);
15232 op2
= copy_to_mode_reg (mode2
, op2
);
15234 if (op1
== const0_rtx
)
15236 addr
= gen_rtx_MEM (tmode
, op2
);
15240 op1
= copy_to_mode_reg (mode1
, op1
);
15241 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op1
, op2
));
15244 pat
= GEN_FCN (icode
) (addr
, op0
);
15251 altivec_expand_stxvl_builtin (enum insn_code icode
, tree exp
)
15254 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15255 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15256 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15257 rtx op0
= expand_normal (arg0
);
15258 rtx op1
= expand_normal (arg1
);
15259 rtx op2
= expand_normal (arg2
);
15260 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15261 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15262 machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
15264 if (icode
== CODE_FOR_nothing
)
15265 /* Builtin not supported on this processor. */
15268 /* If we got invalid arguments bail out before generating bad rtl. */
15269 if (arg0
== error_mark_node
15270 || arg1
== error_mark_node
15271 || arg2
== error_mark_node
)
15274 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15275 op0
= copy_to_mode_reg (mode0
, op0
);
15276 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15277 op1
= copy_to_mode_reg (mode1
, op1
);
15278 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
15279 op2
= copy_to_mode_reg (mode2
, op2
);
15281 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
15289 altivec_expand_stv_builtin (enum insn_code icode
, tree exp
)
15291 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15292 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15293 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15294 rtx op0
= expand_normal (arg0
);
15295 rtx op1
= expand_normal (arg1
);
15296 rtx op2
= expand_normal (arg2
);
15297 rtx pat
, addr
, rawaddr
;
15298 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15299 machine_mode smode
= insn_data
[icode
].operand
[1].mode
;
15300 machine_mode mode1
= Pmode
;
15301 machine_mode mode2
= Pmode
;
15303 /* Invalid arguments. Bail before doing anything stoopid! */
15304 if (arg0
== error_mark_node
15305 || arg1
== error_mark_node
15306 || arg2
== error_mark_node
)
15309 op2
= copy_to_mode_reg (mode2
, op2
);
15311 /* For STVX, express the RTL accurately by ANDing the address with -16.
15312 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15313 so the raw address is fine. */
15314 if (icode
== CODE_FOR_altivec_stvx_v2df_2op
15315 || icode
== CODE_FOR_altivec_stvx_v2di_2op
15316 || icode
== CODE_FOR_altivec_stvx_v4sf_2op
15317 || icode
== CODE_FOR_altivec_stvx_v4si_2op
15318 || icode
== CODE_FOR_altivec_stvx_v8hi_2op
15319 || icode
== CODE_FOR_altivec_stvx_v16qi_2op
)
15321 if (op1
== const0_rtx
)
15325 op1
= copy_to_mode_reg (mode1
, op1
);
15326 rawaddr
= gen_rtx_PLUS (Pmode
, op2
, op1
);
15329 addr
= gen_rtx_AND (Pmode
, rawaddr
, gen_rtx_CONST_INT (Pmode
, -16));
15330 addr
= gen_rtx_MEM (tmode
, addr
);
15332 op0
= copy_to_mode_reg (tmode
, op0
);
15334 /* For -maltivec=be, emit a permute to swap the elements, followed
15336 if (!BYTES_BIG_ENDIAN
&& VECTOR_ELT_ORDER_BIG
)
15338 rtx temp
= gen_reg_rtx (tmode
);
15339 rtx sel
= swap_selector_for_mode (tmode
);
15340 rtx vperm
= gen_rtx_UNSPEC (tmode
, gen_rtvec (3, op0
, op0
, sel
),
15342 emit_insn (gen_rtx_SET (temp
, vperm
));
15343 emit_insn (gen_rtx_SET (addr
, temp
));
15346 emit_insn (gen_rtx_SET (addr
, op0
));
15350 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, smode
))
15351 op0
= copy_to_mode_reg (smode
, op0
);
15353 if (op1
== const0_rtx
)
15354 addr
= gen_rtx_MEM (tmode
, op2
);
15357 op1
= copy_to_mode_reg (mode1
, op1
);
15358 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op2
, op1
));
15361 pat
= GEN_FCN (icode
) (addr
, op0
);
15369 /* Return the appropriate SPR number associated with the given builtin. */
15370 static inline HOST_WIDE_INT
15371 htm_spr_num (enum rs6000_builtins code
)
15373 if (code
== HTM_BUILTIN_GET_TFHAR
15374 || code
== HTM_BUILTIN_SET_TFHAR
)
15376 else if (code
== HTM_BUILTIN_GET_TFIAR
15377 || code
== HTM_BUILTIN_SET_TFIAR
)
15379 else if (code
== HTM_BUILTIN_GET_TEXASR
15380 || code
== HTM_BUILTIN_SET_TEXASR
)
15382 gcc_assert (code
== HTM_BUILTIN_GET_TEXASRU
15383 || code
== HTM_BUILTIN_SET_TEXASRU
);
15384 return TEXASRU_SPR
;
15387 /* Return the appropriate SPR regno associated with the given builtin. */
15388 static inline HOST_WIDE_INT
15389 htm_spr_regno (enum rs6000_builtins code
)
15391 if (code
== HTM_BUILTIN_GET_TFHAR
15392 || code
== HTM_BUILTIN_SET_TFHAR
)
15393 return TFHAR_REGNO
;
15394 else if (code
== HTM_BUILTIN_GET_TFIAR
15395 || code
== HTM_BUILTIN_SET_TFIAR
)
15396 return TFIAR_REGNO
;
15397 gcc_assert (code
== HTM_BUILTIN_GET_TEXASR
15398 || code
== HTM_BUILTIN_SET_TEXASR
15399 || code
== HTM_BUILTIN_GET_TEXASRU
15400 || code
== HTM_BUILTIN_SET_TEXASRU
);
15401 return TEXASR_REGNO
;
15404 /* Return the correct ICODE value depending on whether we are
15405 setting or reading the HTM SPRs. */
15406 static inline enum insn_code
15407 rs6000_htm_spr_icode (bool nonvoid
)
15410 return (TARGET_POWERPC64
) ? CODE_FOR_htm_mfspr_di
: CODE_FOR_htm_mfspr_si
;
15412 return (TARGET_POWERPC64
) ? CODE_FOR_htm_mtspr_di
: CODE_FOR_htm_mtspr_si
;
15415 /* Expand the HTM builtin in EXP and store the result in TARGET.
15416 Store true in *EXPANDEDP if we found a builtin to expand. */
15418 htm_expand_builtin (tree exp
, rtx target
, bool * expandedp
)
15420 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15421 bool nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
15422 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
15423 const struct builtin_description
*d
;
15428 if (!TARGET_POWERPC64
15429 && (fcode
== HTM_BUILTIN_TABORTDC
15430 || fcode
== HTM_BUILTIN_TABORTDCI
))
15432 size_t uns_fcode
= (size_t)fcode
;
15433 const char *name
= rs6000_builtin_info
[uns_fcode
].name
;
15434 error ("builtin %s is only valid in 64-bit mode", name
);
15438 /* Expand the HTM builtins. */
15440 for (i
= 0; i
< ARRAY_SIZE (bdesc_htm
); i
++, d
++)
15441 if (d
->code
== fcode
)
15443 rtx op
[MAX_HTM_OPERANDS
], pat
;
15446 call_expr_arg_iterator iter
;
15447 unsigned attr
= rs6000_builtin_info
[fcode
].attr
;
15448 enum insn_code icode
= d
->icode
;
15449 const struct insn_operand_data
*insn_op
;
15450 bool uses_spr
= (attr
& RS6000_BTC_SPR
);
15454 icode
= rs6000_htm_spr_icode (nonvoid
);
15455 insn_op
= &insn_data
[icode
].operand
[0];
15459 machine_mode tmode
= (uses_spr
) ? insn_op
->mode
: E_SImode
;
15461 || GET_MODE (target
) != tmode
15462 || (uses_spr
&& !(*insn_op
->predicate
) (target
, tmode
)))
15463 target
= gen_reg_rtx (tmode
);
15465 op
[nopnds
++] = target
;
15468 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
15470 if (arg
== error_mark_node
|| nopnds
>= MAX_HTM_OPERANDS
)
15473 insn_op
= &insn_data
[icode
].operand
[nopnds
];
15475 op
[nopnds
] = expand_normal (arg
);
15477 if (!(*insn_op
->predicate
) (op
[nopnds
], insn_op
->mode
))
15479 if (!strcmp (insn_op
->constraint
, "n"))
15481 int arg_num
= (nonvoid
) ? nopnds
: nopnds
+ 1;
15482 if (!CONST_INT_P (op
[nopnds
]))
15483 error ("argument %d must be an unsigned literal", arg_num
);
15485 error ("argument %d is an unsigned literal that is "
15486 "out of range", arg_num
);
15489 op
[nopnds
] = copy_to_mode_reg (insn_op
->mode
, op
[nopnds
]);
15495 /* Handle the builtins for extended mnemonics. These accept
15496 no arguments, but map to builtins that take arguments. */
15499 case HTM_BUILTIN_TENDALL
: /* Alias for: tend. 1 */
15500 case HTM_BUILTIN_TRESUME
: /* Alias for: tsr. 1 */
15501 op
[nopnds
++] = GEN_INT (1);
15503 attr
|= RS6000_BTC_UNARY
;
15505 case HTM_BUILTIN_TSUSPEND
: /* Alias for: tsr. 0 */
15506 op
[nopnds
++] = GEN_INT (0);
15508 attr
|= RS6000_BTC_UNARY
;
15514 /* If this builtin accesses SPRs, then pass in the appropriate
15515 SPR number and SPR regno as the last two operands. */
15518 machine_mode mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
15519 op
[nopnds
++] = gen_rtx_CONST_INT (mode
, htm_spr_num (fcode
));
15520 op
[nopnds
++] = gen_rtx_REG (mode
, htm_spr_regno (fcode
));
15522 /* If this builtin accesses a CR, then pass in a scratch
15523 CR as the last operand. */
15524 else if (attr
& RS6000_BTC_CR
)
15525 { cr
= gen_reg_rtx (CCmode
);
15531 int expected_nopnds
= 0;
15532 if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_UNARY
)
15533 expected_nopnds
= 1;
15534 else if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_BINARY
)
15535 expected_nopnds
= 2;
15536 else if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_TERNARY
)
15537 expected_nopnds
= 3;
15538 if (!(attr
& RS6000_BTC_VOID
))
15539 expected_nopnds
+= 1;
15541 expected_nopnds
+= 2;
15543 gcc_assert (nopnds
== expected_nopnds
15544 && nopnds
<= MAX_HTM_OPERANDS
);
15550 pat
= GEN_FCN (icode
) (op
[0]);
15553 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
15556 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
15559 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
15562 gcc_unreachable ();
15568 if (attr
& RS6000_BTC_CR
)
15570 if (fcode
== HTM_BUILTIN_TBEGIN
)
15572 /* Emit code to set TARGET to true or false depending on
15573 whether the tbegin. instruction successfully or failed
15574 to start a transaction. We do this by placing the 1's
15575 complement of CR's EQ bit into TARGET. */
15576 rtx scratch
= gen_reg_rtx (SImode
);
15577 emit_insn (gen_rtx_SET (scratch
,
15578 gen_rtx_EQ (SImode
, cr
,
15580 emit_insn (gen_rtx_SET (target
,
15581 gen_rtx_XOR (SImode
, scratch
,
15586 /* Emit code to copy the 4-bit condition register field
15587 CR into the least significant end of register TARGET. */
15588 rtx scratch1
= gen_reg_rtx (SImode
);
15589 rtx scratch2
= gen_reg_rtx (SImode
);
15590 rtx subreg
= simplify_gen_subreg (CCmode
, scratch1
, SImode
, 0);
15591 emit_insn (gen_movcc (subreg
, cr
));
15592 emit_insn (gen_lshrsi3 (scratch2
, scratch1
, GEN_INT (28)));
15593 emit_insn (gen_andsi3 (target
, scratch2
, GEN_INT (0xf)));
15602 *expandedp
= false;
15606 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15609 cpu_expand_builtin (enum rs6000_builtins fcode
, tree exp ATTRIBUTE_UNUSED
,
15612 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15613 if (fcode
== RS6000_BUILTIN_CPU_INIT
)
15616 if (target
== 0 || GET_MODE (target
) != SImode
)
15617 target
= gen_reg_rtx (SImode
);
15619 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15620 tree arg
= TREE_OPERAND (CALL_EXPR_ARG (exp
, 0), 0);
15621 if (TREE_CODE (arg
) != STRING_CST
)
15623 error ("builtin %s only accepts a string argument",
15624 rs6000_builtin_info
[(size_t) fcode
].name
);
15628 if (fcode
== RS6000_BUILTIN_CPU_IS
)
15630 const char *cpu
= TREE_STRING_POINTER (arg
);
15631 rtx cpuid
= NULL_RTX
;
15632 for (size_t i
= 0; i
< ARRAY_SIZE (cpu_is_info
); i
++)
15633 if (strcmp (cpu
, cpu_is_info
[i
].cpu
) == 0)
15635 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15636 cpuid
= GEN_INT (cpu_is_info
[i
].cpuid
+ _DL_FIRST_PLATFORM
);
15639 if (cpuid
== NULL_RTX
)
15641 /* Invalid CPU argument. */
15642 error ("cpu %s is an invalid argument to builtin %s",
15643 cpu
, rs6000_builtin_info
[(size_t) fcode
].name
);
15647 rtx platform
= gen_reg_rtx (SImode
);
15648 rtx tcbmem
= gen_const_mem (SImode
,
15649 gen_rtx_PLUS (Pmode
,
15650 gen_rtx_REG (Pmode
, TLS_REGNUM
),
15651 GEN_INT (TCB_PLATFORM_OFFSET
)));
15652 emit_move_insn (platform
, tcbmem
);
15653 emit_insn (gen_eqsi3 (target
, platform
, cpuid
));
15655 else if (fcode
== RS6000_BUILTIN_CPU_SUPPORTS
)
15657 const char *hwcap
= TREE_STRING_POINTER (arg
);
15658 rtx mask
= NULL_RTX
;
15660 for (size_t i
= 0; i
< ARRAY_SIZE (cpu_supports_info
); i
++)
15661 if (strcmp (hwcap
, cpu_supports_info
[i
].hwcap
) == 0)
15663 mask
= GEN_INT (cpu_supports_info
[i
].mask
);
15664 hwcap_offset
= TCB_HWCAP_OFFSET (cpu_supports_info
[i
].id
);
15667 if (mask
== NULL_RTX
)
15669 /* Invalid HWCAP argument. */
15670 error ("hwcap %s is an invalid argument to builtin %s",
15671 hwcap
, rs6000_builtin_info
[(size_t) fcode
].name
);
15675 rtx tcb_hwcap
= gen_reg_rtx (SImode
);
15676 rtx tcbmem
= gen_const_mem (SImode
,
15677 gen_rtx_PLUS (Pmode
,
15678 gen_rtx_REG (Pmode
, TLS_REGNUM
),
15679 GEN_INT (hwcap_offset
)));
15680 emit_move_insn (tcb_hwcap
, tcbmem
);
15681 rtx scratch1
= gen_reg_rtx (SImode
);
15682 emit_insn (gen_rtx_SET (scratch1
, gen_rtx_AND (SImode
, tcb_hwcap
, mask
)));
15683 rtx scratch2
= gen_reg_rtx (SImode
);
15684 emit_insn (gen_eqsi3 (scratch2
, scratch1
, const0_rtx
));
15685 emit_insn (gen_rtx_SET (target
, gen_rtx_XOR (SImode
, scratch2
, const1_rtx
)));
15688 /* Record that we have expanded a CPU builtin, so that we can later
15689 emit a reference to the special symbol exported by LIBC to ensure we
15690 do not link against an old LIBC that doesn't support this feature. */
15691 cpu_builtin_p
= true;
15694 /* For old LIBCs, always return FALSE. */
15695 emit_move_insn (target
, GEN_INT (0));
15696 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15702 rs6000_expand_ternop_builtin (enum insn_code icode
, tree exp
, rtx target
)
15705 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15706 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15707 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15708 rtx op0
= expand_normal (arg0
);
15709 rtx op1
= expand_normal (arg1
);
15710 rtx op2
= expand_normal (arg2
);
15711 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15712 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15713 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15714 machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
15716 if (icode
== CODE_FOR_nothing
)
15717 /* Builtin not supported on this processor. */
15720 /* If we got invalid arguments bail out before generating bad rtl. */
15721 if (arg0
== error_mark_node
15722 || arg1
== error_mark_node
15723 || arg2
== error_mark_node
)
15726 /* Check and prepare argument depending on the instruction code.
15728 Note that a switch statement instead of the sequence of tests
15729 would be incorrect as many of the CODE_FOR values could be
15730 CODE_FOR_nothing and that would yield multiple alternatives
15731 with identical values. We'd never reach here at runtime in
15733 if (icode
== CODE_FOR_altivec_vsldoi_v4sf
15734 || icode
== CODE_FOR_altivec_vsldoi_v2df
15735 || icode
== CODE_FOR_altivec_vsldoi_v4si
15736 || icode
== CODE_FOR_altivec_vsldoi_v8hi
15737 || icode
== CODE_FOR_altivec_vsldoi_v16qi
)
15739 /* Only allow 4-bit unsigned literals. */
15741 if (TREE_CODE (arg2
) != INTEGER_CST
15742 || TREE_INT_CST_LOW (arg2
) & ~0xf)
15744 error ("argument 3 must be a 4-bit unsigned literal");
15745 return CONST0_RTX (tmode
);
15748 else if (icode
== CODE_FOR_vsx_xxpermdi_v2df
15749 || icode
== CODE_FOR_vsx_xxpermdi_v2di
15750 || icode
== CODE_FOR_vsx_xxpermdi_v2df_be
15751 || icode
== CODE_FOR_vsx_xxpermdi_v2di_be
15752 || icode
== CODE_FOR_vsx_xxpermdi_v1ti
15753 || icode
== CODE_FOR_vsx_xxpermdi_v4sf
15754 || icode
== CODE_FOR_vsx_xxpermdi_v4si
15755 || icode
== CODE_FOR_vsx_xxpermdi_v8hi
15756 || icode
== CODE_FOR_vsx_xxpermdi_v16qi
15757 || icode
== CODE_FOR_vsx_xxsldwi_v16qi
15758 || icode
== CODE_FOR_vsx_xxsldwi_v8hi
15759 || icode
== CODE_FOR_vsx_xxsldwi_v4si
15760 || icode
== CODE_FOR_vsx_xxsldwi_v4sf
15761 || icode
== CODE_FOR_vsx_xxsldwi_v2di
15762 || icode
== CODE_FOR_vsx_xxsldwi_v2df
)
15764 /* Only allow 2-bit unsigned literals. */
15766 if (TREE_CODE (arg2
) != INTEGER_CST
15767 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15769 error ("argument 3 must be a 2-bit unsigned literal");
15770 return CONST0_RTX (tmode
);
15773 else if (icode
== CODE_FOR_vsx_set_v2df
15774 || icode
== CODE_FOR_vsx_set_v2di
15775 || icode
== CODE_FOR_bcdadd
15776 || icode
== CODE_FOR_bcdadd_lt
15777 || icode
== CODE_FOR_bcdadd_eq
15778 || icode
== CODE_FOR_bcdadd_gt
15779 || icode
== CODE_FOR_bcdsub
15780 || icode
== CODE_FOR_bcdsub_lt
15781 || icode
== CODE_FOR_bcdsub_eq
15782 || icode
== CODE_FOR_bcdsub_gt
)
15784 /* Only allow 1-bit unsigned literals. */
15786 if (TREE_CODE (arg2
) != INTEGER_CST
15787 || TREE_INT_CST_LOW (arg2
) & ~0x1)
15789 error ("argument 3 must be a 1-bit unsigned literal");
15790 return CONST0_RTX (tmode
);
15793 else if (icode
== CODE_FOR_dfp_ddedpd_dd
15794 || icode
== CODE_FOR_dfp_ddedpd_td
)
15796 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15798 if (TREE_CODE (arg0
) != INTEGER_CST
15799 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15801 error ("argument 1 must be 0 or 2");
15802 return CONST0_RTX (tmode
);
15805 else if (icode
== CODE_FOR_dfp_denbcd_dd
15806 || icode
== CODE_FOR_dfp_denbcd_td
)
15808 /* Only allow 1-bit unsigned literals. */
15810 if (TREE_CODE (arg0
) != INTEGER_CST
15811 || TREE_INT_CST_LOW (arg0
) & ~0x1)
15813 error ("argument 1 must be a 1-bit unsigned literal");
15814 return CONST0_RTX (tmode
);
15817 else if (icode
== CODE_FOR_dfp_dscli_dd
15818 || icode
== CODE_FOR_dfp_dscli_td
15819 || icode
== CODE_FOR_dfp_dscri_dd
15820 || icode
== CODE_FOR_dfp_dscri_td
)
15822 /* Only allow 6-bit unsigned literals. */
15824 if (TREE_CODE (arg1
) != INTEGER_CST
15825 || TREE_INT_CST_LOW (arg1
) & ~0x3f)
15827 error ("argument 2 must be a 6-bit unsigned literal");
15828 return CONST0_RTX (tmode
);
15831 else if (icode
== CODE_FOR_crypto_vshasigmaw
15832 || icode
== CODE_FOR_crypto_vshasigmad
)
15834 /* Check whether the 2nd and 3rd arguments are integer constants and in
15835 range and prepare arguments. */
15837 if (TREE_CODE (arg1
) != INTEGER_CST
|| wi::geu_p (arg1
, 2))
15839 error ("argument 2 must be 0 or 1");
15840 return CONST0_RTX (tmode
);
15844 if (TREE_CODE (arg2
) != INTEGER_CST
|| wi::geu_p (arg2
, 16))
15846 error ("argument 3 must be in the range 0..15");
15847 return CONST0_RTX (tmode
);
15852 || GET_MODE (target
) != tmode
15853 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15854 target
= gen_reg_rtx (tmode
);
15856 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15857 op0
= copy_to_mode_reg (mode0
, op0
);
15858 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15859 op1
= copy_to_mode_reg (mode1
, op1
);
15860 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
15861 op2
= copy_to_mode_reg (mode2
, op2
);
15863 if (TARGET_PAIRED_FLOAT
&& icode
== CODE_FOR_selv2sf4
)
15864 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, CONST0_RTX (SFmode
));
15866 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
15874 /* Expand the lvx builtins. */
15876 altivec_expand_ld_builtin (tree exp
, rtx target
, bool *expandedp
)
15878 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15879 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15881 machine_mode tmode
, mode0
;
15883 enum insn_code icode
;
15887 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi
:
15888 icode
= CODE_FOR_vector_altivec_load_v16qi
;
15890 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi
:
15891 icode
= CODE_FOR_vector_altivec_load_v8hi
;
15893 case ALTIVEC_BUILTIN_LD_INTERNAL_4si
:
15894 icode
= CODE_FOR_vector_altivec_load_v4si
;
15896 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf
:
15897 icode
= CODE_FOR_vector_altivec_load_v4sf
;
15899 case ALTIVEC_BUILTIN_LD_INTERNAL_2df
:
15900 icode
= CODE_FOR_vector_altivec_load_v2df
;
15902 case ALTIVEC_BUILTIN_LD_INTERNAL_2di
:
15903 icode
= CODE_FOR_vector_altivec_load_v2di
;
15905 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti
:
15906 icode
= CODE_FOR_vector_altivec_load_v1ti
;
15909 *expandedp
= false;
15915 arg0
= CALL_EXPR_ARG (exp
, 0);
15916 op0
= expand_normal (arg0
);
15917 tmode
= insn_data
[icode
].operand
[0].mode
;
15918 mode0
= insn_data
[icode
].operand
[1].mode
;
15921 || GET_MODE (target
) != tmode
15922 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15923 target
= gen_reg_rtx (tmode
);
15925 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15926 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15928 pat
= GEN_FCN (icode
) (target
, op0
);
15935 /* Expand the stvx builtins. */
15937 altivec_expand_st_builtin (tree exp
, rtx target ATTRIBUTE_UNUSED
,
15940 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15941 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15943 machine_mode mode0
, mode1
;
15945 enum insn_code icode
;
15949 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi
:
15950 icode
= CODE_FOR_vector_altivec_store_v16qi
;
15952 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi
:
15953 icode
= CODE_FOR_vector_altivec_store_v8hi
;
15955 case ALTIVEC_BUILTIN_ST_INTERNAL_4si
:
15956 icode
= CODE_FOR_vector_altivec_store_v4si
;
15958 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf
:
15959 icode
= CODE_FOR_vector_altivec_store_v4sf
;
15961 case ALTIVEC_BUILTIN_ST_INTERNAL_2df
:
15962 icode
= CODE_FOR_vector_altivec_store_v2df
;
15964 case ALTIVEC_BUILTIN_ST_INTERNAL_2di
:
15965 icode
= CODE_FOR_vector_altivec_store_v2di
;
15967 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti
:
15968 icode
= CODE_FOR_vector_altivec_store_v1ti
;
15971 *expandedp
= false;
15975 arg0
= CALL_EXPR_ARG (exp
, 0);
15976 arg1
= CALL_EXPR_ARG (exp
, 1);
15977 op0
= expand_normal (arg0
);
15978 op1
= expand_normal (arg1
);
15979 mode0
= insn_data
[icode
].operand
[0].mode
;
15980 mode1
= insn_data
[icode
].operand
[1].mode
;
15982 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
15983 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15984 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
15985 op1
= copy_to_mode_reg (mode1
, op1
);
15987 pat
= GEN_FCN (icode
) (op0
, op1
);
15995 /* Expand the dst builtins. */
15997 altivec_expand_dst_builtin (tree exp
, rtx target ATTRIBUTE_UNUSED
,
16000 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16001 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16002 tree arg0
, arg1
, arg2
;
16003 machine_mode mode0
, mode1
;
16004 rtx pat
, op0
, op1
, op2
;
16005 const struct builtin_description
*d
;
16008 *expandedp
= false;
16010 /* Handle DST variants. */
16012 for (i
= 0; i
< ARRAY_SIZE (bdesc_dst
); i
++, d
++)
16013 if (d
->code
== fcode
)
16015 arg0
= CALL_EXPR_ARG (exp
, 0);
16016 arg1
= CALL_EXPR_ARG (exp
, 1);
16017 arg2
= CALL_EXPR_ARG (exp
, 2);
16018 op0
= expand_normal (arg0
);
16019 op1
= expand_normal (arg1
);
16020 op2
= expand_normal (arg2
);
16021 mode0
= insn_data
[d
->icode
].operand
[0].mode
;
16022 mode1
= insn_data
[d
->icode
].operand
[1].mode
;
16024 /* Invalid arguments, bail out before generating bad rtl. */
16025 if (arg0
== error_mark_node
16026 || arg1
== error_mark_node
16027 || arg2
== error_mark_node
)
16032 if (TREE_CODE (arg2
) != INTEGER_CST
16033 || TREE_INT_CST_LOW (arg2
) & ~0x3)
16035 error ("argument to %qs must be a 2-bit unsigned literal", d
->name
);
16039 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
16040 op0
= copy_to_mode_reg (Pmode
, op0
);
16041 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
16042 op1
= copy_to_mode_reg (mode1
, op1
);
16044 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
16054 /* Expand vec_init builtin. */
16056 altivec_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
16058 machine_mode tmode
= TYPE_MODE (type
);
16059 machine_mode inner_mode
= GET_MODE_INNER (tmode
);
16060 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
16062 gcc_assert (VECTOR_MODE_P (tmode
));
16063 gcc_assert (n_elt
== call_expr_nargs (exp
));
16065 if (!target
|| !register_operand (target
, tmode
))
16066 target
= gen_reg_rtx (tmode
);
16068 /* If we have a vector compromised of a single element, such as V1TImode, do
16069 the initialization directly. */
16070 if (n_elt
== 1 && GET_MODE_SIZE (tmode
) == GET_MODE_SIZE (inner_mode
))
16072 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, 0));
16073 emit_move_insn (target
, gen_lowpart (tmode
, x
));
16077 rtvec v
= rtvec_alloc (n_elt
);
16079 for (i
= 0; i
< n_elt
; ++i
)
16081 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
16082 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
16085 rs6000_expand_vector_init (target
, gen_rtx_PARALLEL (tmode
, v
));
16091 /* Return the integer constant in ARG. Constrain it to be in the range
16092 of the subparts of VEC_TYPE; issue an error if not. */
16095 get_element_number (tree vec_type
, tree arg
)
16097 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
16099 if (!tree_fits_uhwi_p (arg
)
16100 || (elt
= tree_to_uhwi (arg
), elt
> max
))
16102 error ("selector must be an integer constant in the range 0..%wi", max
);
16109 /* Expand vec_set builtin. */
16111 altivec_expand_vec_set_builtin (tree exp
)
16113 machine_mode tmode
, mode1
;
16114 tree arg0
, arg1
, arg2
;
16118 arg0
= CALL_EXPR_ARG (exp
, 0);
16119 arg1
= CALL_EXPR_ARG (exp
, 1);
16120 arg2
= CALL_EXPR_ARG (exp
, 2);
16122 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
16123 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16124 gcc_assert (VECTOR_MODE_P (tmode
));
16126 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
16127 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
16128 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
16130 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
16131 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
16133 op0
= force_reg (tmode
, op0
);
16134 op1
= force_reg (mode1
, op1
);
16136 rs6000_expand_vector_set (op0
, op1
, elt
);
16141 /* Expand vec_ext builtin. */
16143 altivec_expand_vec_ext_builtin (tree exp
, rtx target
)
16145 machine_mode tmode
, mode0
;
16150 arg0
= CALL_EXPR_ARG (exp
, 0);
16151 arg1
= CALL_EXPR_ARG (exp
, 1);
16153 op0
= expand_normal (arg0
);
16154 op1
= expand_normal (arg1
);
16156 /* Call get_element_number to validate arg1 if it is a constant. */
16157 if (TREE_CODE (arg1
) == INTEGER_CST
)
16158 (void) get_element_number (TREE_TYPE (arg0
), arg1
);
16160 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16161 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
16162 gcc_assert (VECTOR_MODE_P (mode0
));
16164 op0
= force_reg (mode0
, op0
);
16166 if (optimize
|| !target
|| !register_operand (target
, tmode
))
16167 target
= gen_reg_rtx (tmode
);
16169 rs6000_expand_vector_extract (target
, op0
, op1
);
16174 /* Expand the builtin in EXP and store the result in TARGET. Store
16175 true in *EXPANDEDP if we found a builtin to expand. */
16177 altivec_expand_builtin (tree exp
, rtx target
, bool *expandedp
)
16179 const struct builtin_description
*d
;
16181 enum insn_code icode
;
16182 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16183 tree arg0
, arg1
, arg2
;
16185 machine_mode tmode
, mode0
;
16186 enum rs6000_builtins fcode
16187 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16189 if (rs6000_overloaded_builtin_p (fcode
))
16192 error ("unresolved overload for Altivec builtin %qF", fndecl
);
16194 /* Given it is invalid, just generate a normal call. */
16195 return expand_call (exp
, target
, false);
16198 target
= altivec_expand_ld_builtin (exp
, target
, expandedp
);
16202 target
= altivec_expand_st_builtin (exp
, target
, expandedp
);
16206 target
= altivec_expand_dst_builtin (exp
, target
, expandedp
);
16214 case ALTIVEC_BUILTIN_STVX_V2DF
:
16215 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op
, exp
);
16216 case ALTIVEC_BUILTIN_STVX_V2DI
:
16217 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op
, exp
);
16218 case ALTIVEC_BUILTIN_STVX_V4SF
:
16219 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op
, exp
);
16220 case ALTIVEC_BUILTIN_STVX
:
16221 case ALTIVEC_BUILTIN_STVX_V4SI
:
16222 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op
, exp
);
16223 case ALTIVEC_BUILTIN_STVX_V8HI
:
16224 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op
, exp
);
16225 case ALTIVEC_BUILTIN_STVX_V16QI
:
16226 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op
, exp
);
16227 case ALTIVEC_BUILTIN_STVEBX
:
16228 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx
, exp
);
16229 case ALTIVEC_BUILTIN_STVEHX
:
16230 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx
, exp
);
16231 case ALTIVEC_BUILTIN_STVEWX
:
16232 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx
, exp
);
16233 case ALTIVEC_BUILTIN_STVXL_V2DF
:
16234 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df
, exp
);
16235 case ALTIVEC_BUILTIN_STVXL_V2DI
:
16236 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di
, exp
);
16237 case ALTIVEC_BUILTIN_STVXL_V4SF
:
16238 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf
, exp
);
16239 case ALTIVEC_BUILTIN_STVXL
:
16240 case ALTIVEC_BUILTIN_STVXL_V4SI
:
16241 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si
, exp
);
16242 case ALTIVEC_BUILTIN_STVXL_V8HI
:
16243 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi
, exp
);
16244 case ALTIVEC_BUILTIN_STVXL_V16QI
:
16245 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi
, exp
);
16247 case ALTIVEC_BUILTIN_STVLX
:
16248 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx
, exp
);
16249 case ALTIVEC_BUILTIN_STVLXL
:
16250 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl
, exp
);
16251 case ALTIVEC_BUILTIN_STVRX
:
16252 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx
, exp
);
16253 case ALTIVEC_BUILTIN_STVRXL
:
16254 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl
, exp
);
16256 case P9V_BUILTIN_STXVL
:
16257 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl
, exp
);
16259 case VSX_BUILTIN_STXVD2X_V1TI
:
16260 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti
, exp
);
16261 case VSX_BUILTIN_STXVD2X_V2DF
:
16262 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df
, exp
);
16263 case VSX_BUILTIN_STXVD2X_V2DI
:
16264 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di
, exp
);
16265 case VSX_BUILTIN_STXVW4X_V4SF
:
16266 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf
, exp
);
16267 case VSX_BUILTIN_STXVW4X_V4SI
:
16268 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si
, exp
);
16269 case VSX_BUILTIN_STXVW4X_V8HI
:
16270 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi
, exp
);
16271 case VSX_BUILTIN_STXVW4X_V16QI
:
16272 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi
, exp
);
16274 /* For the following on big endian, it's ok to use any appropriate
16275 unaligned-supporting store, so use a generic expander. For
16276 little-endian, the exact element-reversing instruction must
16278 case VSX_BUILTIN_ST_ELEMREV_V2DF
:
16280 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v2df
16281 : CODE_FOR_vsx_st_elemrev_v2df
);
16282 return altivec_expand_stv_builtin (code
, exp
);
16284 case VSX_BUILTIN_ST_ELEMREV_V2DI
:
16286 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v2di
16287 : CODE_FOR_vsx_st_elemrev_v2di
);
16288 return altivec_expand_stv_builtin (code
, exp
);
16290 case VSX_BUILTIN_ST_ELEMREV_V4SF
:
16292 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v4sf
16293 : CODE_FOR_vsx_st_elemrev_v4sf
);
16294 return altivec_expand_stv_builtin (code
, exp
);
16296 case VSX_BUILTIN_ST_ELEMREV_V4SI
:
16298 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v4si
16299 : CODE_FOR_vsx_st_elemrev_v4si
);
16300 return altivec_expand_stv_builtin (code
, exp
);
16302 case VSX_BUILTIN_ST_ELEMREV_V8HI
:
16304 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v8hi
16305 : CODE_FOR_vsx_st_elemrev_v8hi
);
16306 return altivec_expand_stv_builtin (code
, exp
);
16308 case VSX_BUILTIN_ST_ELEMREV_V16QI
:
16310 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v16qi
16311 : CODE_FOR_vsx_st_elemrev_v16qi
);
16312 return altivec_expand_stv_builtin (code
, exp
);
16315 case ALTIVEC_BUILTIN_MFVSCR
:
16316 icode
= CODE_FOR_altivec_mfvscr
;
16317 tmode
= insn_data
[icode
].operand
[0].mode
;
16320 || GET_MODE (target
) != tmode
16321 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16322 target
= gen_reg_rtx (tmode
);
16324 pat
= GEN_FCN (icode
) (target
);
16330 case ALTIVEC_BUILTIN_MTVSCR
:
16331 icode
= CODE_FOR_altivec_mtvscr
;
16332 arg0
= CALL_EXPR_ARG (exp
, 0);
16333 op0
= expand_normal (arg0
);
16334 mode0
= insn_data
[icode
].operand
[0].mode
;
16336 /* If we got invalid arguments bail out before generating bad rtl. */
16337 if (arg0
== error_mark_node
)
16340 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16341 op0
= copy_to_mode_reg (mode0
, op0
);
16343 pat
= GEN_FCN (icode
) (op0
);
16348 case ALTIVEC_BUILTIN_DSSALL
:
16349 emit_insn (gen_altivec_dssall ());
16352 case ALTIVEC_BUILTIN_DSS
:
16353 icode
= CODE_FOR_altivec_dss
;
16354 arg0
= CALL_EXPR_ARG (exp
, 0);
16356 op0
= expand_normal (arg0
);
16357 mode0
= insn_data
[icode
].operand
[0].mode
;
16359 /* If we got invalid arguments bail out before generating bad rtl. */
16360 if (arg0
== error_mark_node
)
16363 if (TREE_CODE (arg0
) != INTEGER_CST
16364 || TREE_INT_CST_LOW (arg0
) & ~0x3)
16366 error ("argument to dss must be a 2-bit unsigned literal");
16370 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16371 op0
= copy_to_mode_reg (mode0
, op0
);
16373 emit_insn (gen_altivec_dss (op0
));
16376 case ALTIVEC_BUILTIN_VEC_INIT_V4SI
:
16377 case ALTIVEC_BUILTIN_VEC_INIT_V8HI
:
16378 case ALTIVEC_BUILTIN_VEC_INIT_V16QI
:
16379 case ALTIVEC_BUILTIN_VEC_INIT_V4SF
:
16380 case VSX_BUILTIN_VEC_INIT_V2DF
:
16381 case VSX_BUILTIN_VEC_INIT_V2DI
:
16382 case VSX_BUILTIN_VEC_INIT_V1TI
:
16383 return altivec_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
16385 case ALTIVEC_BUILTIN_VEC_SET_V4SI
:
16386 case ALTIVEC_BUILTIN_VEC_SET_V8HI
:
16387 case ALTIVEC_BUILTIN_VEC_SET_V16QI
:
16388 case ALTIVEC_BUILTIN_VEC_SET_V4SF
:
16389 case VSX_BUILTIN_VEC_SET_V2DF
:
16390 case VSX_BUILTIN_VEC_SET_V2DI
:
16391 case VSX_BUILTIN_VEC_SET_V1TI
:
16392 return altivec_expand_vec_set_builtin (exp
);
16394 case ALTIVEC_BUILTIN_VEC_EXT_V4SI
:
16395 case ALTIVEC_BUILTIN_VEC_EXT_V8HI
:
16396 case ALTIVEC_BUILTIN_VEC_EXT_V16QI
:
16397 case ALTIVEC_BUILTIN_VEC_EXT_V4SF
:
16398 case VSX_BUILTIN_VEC_EXT_V2DF
:
16399 case VSX_BUILTIN_VEC_EXT_V2DI
:
16400 case VSX_BUILTIN_VEC_EXT_V1TI
:
16401 return altivec_expand_vec_ext_builtin (exp
, target
);
16403 case P9V_BUILTIN_VEXTRACT4B
:
16404 case P9V_BUILTIN_VEC_VEXTRACT4B
:
16405 arg1
= CALL_EXPR_ARG (exp
, 1);
16408 /* Generate a normal call if it is invalid. */
16409 if (arg1
== error_mark_node
)
16410 return expand_call (exp
, target
, false);
16412 if (TREE_CODE (arg1
) != INTEGER_CST
|| TREE_INT_CST_LOW (arg1
) > 12)
16414 error ("second argument to vec_vextract4b must be 0..12");
16415 return expand_call (exp
, target
, false);
16419 case P9V_BUILTIN_VINSERT4B
:
16420 case P9V_BUILTIN_VINSERT4B_DI
:
16421 case P9V_BUILTIN_VEC_VINSERT4B
:
16422 arg2
= CALL_EXPR_ARG (exp
, 2);
16425 /* Generate a normal call if it is invalid. */
16426 if (arg2
== error_mark_node
)
16427 return expand_call (exp
, target
, false);
16429 if (TREE_CODE (arg2
) != INTEGER_CST
|| TREE_INT_CST_LOW (arg2
) > 12)
16431 error ("third argument to vec_vinsert4b must be 0..12");
16432 return expand_call (exp
, target
, false);
16438 /* Fall through. */
16441 /* Expand abs* operations. */
16443 for (i
= 0; i
< ARRAY_SIZE (bdesc_abs
); i
++, d
++)
16444 if (d
->code
== fcode
)
16445 return altivec_expand_abs_builtin (d
->icode
, exp
, target
);
16447 /* Expand the AltiVec predicates. */
16448 d
= bdesc_altivec_preds
;
16449 for (i
= 0; i
< ARRAY_SIZE (bdesc_altivec_preds
); i
++, d
++)
16450 if (d
->code
== fcode
)
16451 return altivec_expand_predicate_builtin (d
->icode
, exp
, target
);
16453 /* LV* are funky. We initialized them differently. */
16456 case ALTIVEC_BUILTIN_LVSL
:
16457 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl
,
16458 exp
, target
, false);
16459 case ALTIVEC_BUILTIN_LVSR
:
16460 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr
,
16461 exp
, target
, false);
16462 case ALTIVEC_BUILTIN_LVEBX
:
16463 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx
,
16464 exp
, target
, false);
16465 case ALTIVEC_BUILTIN_LVEHX
:
16466 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx
,
16467 exp
, target
, false);
16468 case ALTIVEC_BUILTIN_LVEWX
:
16469 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx
,
16470 exp
, target
, false);
16471 case ALTIVEC_BUILTIN_LVXL_V2DF
:
16472 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df
,
16473 exp
, target
, false);
16474 case ALTIVEC_BUILTIN_LVXL_V2DI
:
16475 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di
,
16476 exp
, target
, false);
16477 case ALTIVEC_BUILTIN_LVXL_V4SF
:
16478 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf
,
16479 exp
, target
, false);
16480 case ALTIVEC_BUILTIN_LVXL
:
16481 case ALTIVEC_BUILTIN_LVXL_V4SI
:
16482 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si
,
16483 exp
, target
, false);
16484 case ALTIVEC_BUILTIN_LVXL_V8HI
:
16485 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi
,
16486 exp
, target
, false);
16487 case ALTIVEC_BUILTIN_LVXL_V16QI
:
16488 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi
,
16489 exp
, target
, false);
16490 case ALTIVEC_BUILTIN_LVX_V2DF
:
16491 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op
,
16492 exp
, target
, false);
16493 case ALTIVEC_BUILTIN_LVX_V2DI
:
16494 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op
,
16495 exp
, target
, false);
16496 case ALTIVEC_BUILTIN_LVX_V4SF
:
16497 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op
,
16498 exp
, target
, false);
16499 case ALTIVEC_BUILTIN_LVX
:
16500 case ALTIVEC_BUILTIN_LVX_V4SI
:
16501 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op
,
16502 exp
, target
, false);
16503 case ALTIVEC_BUILTIN_LVX_V8HI
:
16504 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op
,
16505 exp
, target
, false);
16506 case ALTIVEC_BUILTIN_LVX_V16QI
:
16507 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op
,
16508 exp
, target
, false);
16509 case ALTIVEC_BUILTIN_LVLX
:
16510 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx
,
16511 exp
, target
, true);
16512 case ALTIVEC_BUILTIN_LVLXL
:
16513 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl
,
16514 exp
, target
, true);
16515 case ALTIVEC_BUILTIN_LVRX
:
16516 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx
,
16517 exp
, target
, true);
16518 case ALTIVEC_BUILTIN_LVRXL
:
16519 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl
,
16520 exp
, target
, true);
16521 case VSX_BUILTIN_LXVD2X_V1TI
:
16522 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti
,
16523 exp
, target
, false);
16524 case VSX_BUILTIN_LXVD2X_V2DF
:
16525 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df
,
16526 exp
, target
, false);
16527 case VSX_BUILTIN_LXVD2X_V2DI
:
16528 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di
,
16529 exp
, target
, false);
16530 case VSX_BUILTIN_LXVW4X_V4SF
:
16531 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf
,
16532 exp
, target
, false);
16533 case VSX_BUILTIN_LXVW4X_V4SI
:
16534 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si
,
16535 exp
, target
, false);
16536 case VSX_BUILTIN_LXVW4X_V8HI
:
16537 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi
,
16538 exp
, target
, false);
16539 case VSX_BUILTIN_LXVW4X_V16QI
:
16540 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi
,
16541 exp
, target
, false);
16542 /* For the following on big endian, it's ok to use any appropriate
16543 unaligned-supporting load, so use a generic expander. For
16544 little-endian, the exact element-reversing instruction must
16546 case VSX_BUILTIN_LD_ELEMREV_V2DF
:
16548 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v2df
16549 : CODE_FOR_vsx_ld_elemrev_v2df
);
16550 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16552 case VSX_BUILTIN_LD_ELEMREV_V2DI
:
16554 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v2di
16555 : CODE_FOR_vsx_ld_elemrev_v2di
);
16556 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16558 case VSX_BUILTIN_LD_ELEMREV_V4SF
:
16560 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v4sf
16561 : CODE_FOR_vsx_ld_elemrev_v4sf
);
16562 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16564 case VSX_BUILTIN_LD_ELEMREV_V4SI
:
16566 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v4si
16567 : CODE_FOR_vsx_ld_elemrev_v4si
);
16568 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16570 case VSX_BUILTIN_LD_ELEMREV_V8HI
:
16572 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v8hi
16573 : CODE_FOR_vsx_ld_elemrev_v8hi
);
16574 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16576 case VSX_BUILTIN_LD_ELEMREV_V16QI
:
16578 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v16qi
16579 : CODE_FOR_vsx_ld_elemrev_v16qi
);
16580 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16585 /* Fall through. */
16588 *expandedp
= false;
16592 /* Expand the builtin in EXP and store the result in TARGET. Store
16593 true in *EXPANDEDP if we found a builtin to expand. */
16595 paired_expand_builtin (tree exp
, rtx target
, bool * expandedp
)
16597 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16598 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16599 const struct builtin_description
*d
;
16606 case PAIRED_BUILTIN_STX
:
16607 return paired_expand_stv_builtin (CODE_FOR_paired_stx
, exp
);
16608 case PAIRED_BUILTIN_LX
:
16609 return paired_expand_lv_builtin (CODE_FOR_paired_lx
, exp
, target
);
16612 /* Fall through. */
16615 /* Expand the paired predicates. */
16616 d
= bdesc_paired_preds
;
16617 for (i
= 0; i
< ARRAY_SIZE (bdesc_paired_preds
); i
++, d
++)
16618 if (d
->code
== fcode
)
16619 return paired_expand_predicate_builtin (d
->icode
, exp
, target
);
16621 *expandedp
= false;
16625 /* Binops that need to be initialized manually, but can be expanded
16626 automagically by rs6000_expand_binop_builtin. */
16627 static const struct builtin_description bdesc_2arg_spe
[] =
16629 { RS6000_BTM_SPE
, CODE_FOR_spe_evlddx
, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX
},
16630 { RS6000_BTM_SPE
, CODE_FOR_spe_evldwx
, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX
},
16631 { RS6000_BTM_SPE
, CODE_FOR_spe_evldhx
, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX
},
16632 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhex
, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX
},
16633 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhoux
, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX
},
16634 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhosx
, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX
},
16635 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwwsplatx
, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX
},
16636 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhsplatx
, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX
},
16637 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhesplatx
, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX
},
16638 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhousplatx
, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX
},
16639 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhossplatx
, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX
},
16640 { RS6000_BTM_SPE
, CODE_FOR_spe_evldd
, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD
},
16641 { RS6000_BTM_SPE
, CODE_FOR_spe_evldw
, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW
},
16642 { RS6000_BTM_SPE
, CODE_FOR_spe_evldh
, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH
},
16643 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhe
, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE
},
16644 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhou
, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU
},
16645 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhos
, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS
},
16646 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwwsplat
, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT
},
16647 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhsplat
, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT
},
16648 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhesplat
, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT
},
16649 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhousplat
, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT
},
16650 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhossplat
, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT
}
16653 /* Expand the builtin in EXP and store the result in TARGET. Store
16654 true in *EXPANDEDP if we found a builtin to expand.
16656 This expands the SPE builtins that are not simple unary and binary
16659 spe_expand_builtin (tree exp
, rtx target
, bool *expandedp
)
16661 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16663 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16664 enum insn_code icode
;
16665 machine_mode tmode
, mode0
;
16667 const struct builtin_description
*d
;
16672 /* Syntax check for a 5-bit unsigned immediate. */
16675 case SPE_BUILTIN_EVSTDD
:
16676 case SPE_BUILTIN_EVSTDH
:
16677 case SPE_BUILTIN_EVSTDW
:
16678 case SPE_BUILTIN_EVSTWHE
:
16679 case SPE_BUILTIN_EVSTWHO
:
16680 case SPE_BUILTIN_EVSTWWE
:
16681 case SPE_BUILTIN_EVSTWWO
:
16682 arg1
= CALL_EXPR_ARG (exp
, 2);
16683 if (TREE_CODE (arg1
) != INTEGER_CST
16684 || TREE_INT_CST_LOW (arg1
) & ~0x1f)
16686 error ("argument 2 must be a 5-bit unsigned literal");
16694 /* The evsplat*i instructions are not quite generic. */
16697 case SPE_BUILTIN_EVSPLATFI
:
16698 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi
,
16700 case SPE_BUILTIN_EVSPLATI
:
16701 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati
,
16707 d
= bdesc_2arg_spe
;
16708 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg_spe
); ++i
, ++d
)
16709 if (d
->code
== fcode
)
16710 return rs6000_expand_binop_builtin (d
->icode
, exp
, target
);
16712 d
= bdesc_spe_predicates
;
16713 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_predicates
); ++i
, ++d
)
16714 if (d
->code
== fcode
)
16715 return spe_expand_predicate_builtin (d
->icode
, exp
, target
);
16717 d
= bdesc_spe_evsel
;
16718 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_evsel
); ++i
, ++d
)
16719 if (d
->code
== fcode
)
16720 return spe_expand_evsel_builtin (d
->icode
, exp
, target
);
16724 case SPE_BUILTIN_EVSTDDX
:
16725 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx
, exp
);
16726 case SPE_BUILTIN_EVSTDHX
:
16727 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx
, exp
);
16728 case SPE_BUILTIN_EVSTDWX
:
16729 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx
, exp
);
16730 case SPE_BUILTIN_EVSTWHEX
:
16731 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex
, exp
);
16732 case SPE_BUILTIN_EVSTWHOX
:
16733 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox
, exp
);
16734 case SPE_BUILTIN_EVSTWWEX
:
16735 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex
, exp
);
16736 case SPE_BUILTIN_EVSTWWOX
:
16737 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox
, exp
);
16738 case SPE_BUILTIN_EVSTDD
:
16739 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd
, exp
);
16740 case SPE_BUILTIN_EVSTDH
:
16741 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh
, exp
);
16742 case SPE_BUILTIN_EVSTDW
:
16743 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw
, exp
);
16744 case SPE_BUILTIN_EVSTWHE
:
16745 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe
, exp
);
16746 case SPE_BUILTIN_EVSTWHO
:
16747 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho
, exp
);
16748 case SPE_BUILTIN_EVSTWWE
:
16749 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe
, exp
);
16750 case SPE_BUILTIN_EVSTWWO
:
16751 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo
, exp
);
16752 case SPE_BUILTIN_MFSPEFSCR
:
16753 icode
= CODE_FOR_spe_mfspefscr
;
16754 tmode
= insn_data
[icode
].operand
[0].mode
;
16757 || GET_MODE (target
) != tmode
16758 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16759 target
= gen_reg_rtx (tmode
);
16761 pat
= GEN_FCN (icode
) (target
);
16766 case SPE_BUILTIN_MTSPEFSCR
:
16767 icode
= CODE_FOR_spe_mtspefscr
;
16768 arg0
= CALL_EXPR_ARG (exp
, 0);
16769 op0
= expand_normal (arg0
);
16770 mode0
= insn_data
[icode
].operand
[0].mode
;
16772 if (arg0
== error_mark_node
)
16775 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16776 op0
= copy_to_mode_reg (mode0
, op0
);
16778 pat
= GEN_FCN (icode
) (op0
);
16786 *expandedp
= false;
16791 paired_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
16793 rtx pat
, scratch
, tmp
;
16794 tree form
= CALL_EXPR_ARG (exp
, 0);
16795 tree arg0
= CALL_EXPR_ARG (exp
, 1);
16796 tree arg1
= CALL_EXPR_ARG (exp
, 2);
16797 rtx op0
= expand_normal (arg0
);
16798 rtx op1
= expand_normal (arg1
);
16799 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16800 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16802 enum rtx_code code
;
16804 if (TREE_CODE (form
) != INTEGER_CST
)
16806 error ("argument 1 of __builtin_paired_predicate must be a constant");
16810 form_int
= TREE_INT_CST_LOW (form
);
16812 gcc_assert (mode0
== mode1
);
16814 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
16818 || GET_MODE (target
) != SImode
16819 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, SImode
))
16820 target
= gen_reg_rtx (SImode
);
16821 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16822 op0
= copy_to_mode_reg (mode0
, op0
);
16823 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16824 op1
= copy_to_mode_reg (mode1
, op1
);
16826 scratch
= gen_reg_rtx (CCFPmode
);
16828 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
16850 emit_insn (gen_move_from_CR_ov_bit (target
, scratch
));
16853 error ("argument 1 of __builtin_paired_predicate is out of range");
16857 tmp
= gen_rtx_fmt_ee (code
, SImode
, scratch
, const0_rtx
);
16858 emit_move_insn (target
, tmp
);
16863 spe_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
16865 rtx pat
, scratch
, tmp
;
16866 tree form
= CALL_EXPR_ARG (exp
, 0);
16867 tree arg0
= CALL_EXPR_ARG (exp
, 1);
16868 tree arg1
= CALL_EXPR_ARG (exp
, 2);
16869 rtx op0
= expand_normal (arg0
);
16870 rtx op1
= expand_normal (arg1
);
16871 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16872 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16874 enum rtx_code code
;
16876 if (TREE_CODE (form
) != INTEGER_CST
)
16878 error ("argument 1 of __builtin_spe_predicate must be a constant");
16882 form_int
= TREE_INT_CST_LOW (form
);
16884 gcc_assert (mode0
== mode1
);
16886 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
16890 || GET_MODE (target
) != SImode
16891 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, SImode
))
16892 target
= gen_reg_rtx (SImode
);
16894 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16895 op0
= copy_to_mode_reg (mode0
, op0
);
16896 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16897 op1
= copy_to_mode_reg (mode1
, op1
);
16899 scratch
= gen_reg_rtx (CCmode
);
16901 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
16906 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16907 _lower_. We use one compare, but look in different bits of the
16908 CR for each variant.
16910 There are 2 elements in each SPE simd type (upper/lower). The CR
16911 bits are set as follows:
16913 BIT0 | BIT 1 | BIT 2 | BIT 3
16914 U | L | (U | L) | (U & L)
16916 So, for an "all" relationship, BIT 3 would be set.
16917 For an "any" relationship, BIT 2 would be set. Etc.
16919 Following traditional nomenclature, these bits map to:
16921 BIT0 | BIT 1 | BIT 2 | BIT 3
16924 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16929 /* All variant. OV bit. */
16931 /* We need to get to the OV bit, which is the ORDERED bit. We
16932 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16933 that's ugly and will make validate_condition_mode die.
16934 So let's just use another pattern. */
16935 emit_insn (gen_move_from_CR_ov_bit (target
, scratch
));
16937 /* Any variant. EQ bit. */
16941 /* Upper variant. LT bit. */
16945 /* Lower variant. GT bit. */
16950 error ("argument 1 of __builtin_spe_predicate is out of range");
16954 tmp
= gen_rtx_fmt_ee (code
, SImode
, scratch
, const0_rtx
);
16955 emit_move_insn (target
, tmp
);
16960 /* The evsel builtins look like this:
16962 e = __builtin_spe_evsel_OP (a, b, c, d);
16964 and work like this:
16966 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16967 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16971 spe_expand_evsel_builtin (enum insn_code icode
, tree exp
, rtx target
)
16974 tree arg0
= CALL_EXPR_ARG (exp
, 0);
16975 tree arg1
= CALL_EXPR_ARG (exp
, 1);
16976 tree arg2
= CALL_EXPR_ARG (exp
, 2);
16977 tree arg3
= CALL_EXPR_ARG (exp
, 3);
16978 rtx op0
= expand_normal (arg0
);
16979 rtx op1
= expand_normal (arg1
);
16980 rtx op2
= expand_normal (arg2
);
16981 rtx op3
= expand_normal (arg3
);
16982 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16983 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16985 gcc_assert (mode0
== mode1
);
16987 if (arg0
== error_mark_node
|| arg1
== error_mark_node
16988 || arg2
== error_mark_node
|| arg3
== error_mark_node
)
16992 || GET_MODE (target
) != mode0
16993 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, mode0
))
16994 target
= gen_reg_rtx (mode0
);
16996 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16997 op0
= copy_to_mode_reg (mode0
, op0
);
16998 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
16999 op1
= copy_to_mode_reg (mode0
, op1
);
17000 if (! (*insn_data
[icode
].operand
[1].predicate
) (op2
, mode1
))
17001 op2
= copy_to_mode_reg (mode0
, op2
);
17002 if (! (*insn_data
[icode
].operand
[1].predicate
) (op3
, mode1
))
17003 op3
= copy_to_mode_reg (mode0
, op3
);
17005 /* Generate the compare. */
17006 scratch
= gen_reg_rtx (CCmode
);
17007 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
17012 if (mode0
== V2SImode
)
17013 emit_insn (gen_spe_evsel (target
, op2
, op3
, scratch
));
17015 emit_insn (gen_spe_evsel_fs (target
, op2
, op3
, scratch
));
17020 /* Raise an error message for a builtin function that is called without the
17021 appropriate target options being set. */
17024 rs6000_invalid_builtin (enum rs6000_builtins fncode
)
17026 size_t uns_fncode
= (size_t)fncode
;
17027 const char *name
= rs6000_builtin_info
[uns_fncode
].name
;
17028 HOST_WIDE_INT fnmask
= rs6000_builtin_info
[uns_fncode
].mask
;
17030 gcc_assert (name
!= NULL
);
17031 if ((fnmask
& RS6000_BTM_CELL
) != 0)
17032 error ("Builtin function %s is only valid for the cell processor", name
);
17033 else if ((fnmask
& RS6000_BTM_VSX
) != 0)
17034 error ("Builtin function %s requires the -mvsx option", name
);
17035 else if ((fnmask
& RS6000_BTM_HTM
) != 0)
17036 error ("Builtin function %s requires the -mhtm option", name
);
17037 else if ((fnmask
& RS6000_BTM_ALTIVEC
) != 0)
17038 error ("Builtin function %s requires the -maltivec option", name
);
17039 else if ((fnmask
& RS6000_BTM_PAIRED
) != 0)
17040 error ("Builtin function %s requires the -mpaired option", name
);
17041 else if ((fnmask
& RS6000_BTM_SPE
) != 0)
17042 error ("Builtin function %s requires the -mspe option", name
);
17043 else if ((fnmask
& (RS6000_BTM_DFP
| RS6000_BTM_P8_VECTOR
))
17044 == (RS6000_BTM_DFP
| RS6000_BTM_P8_VECTOR
))
17045 error ("Builtin function %s requires the -mhard-dfp and"
17046 " -mpower8-vector options", name
);
17047 else if ((fnmask
& RS6000_BTM_DFP
) != 0)
17048 error ("Builtin function %s requires the -mhard-dfp option", name
);
17049 else if ((fnmask
& RS6000_BTM_P8_VECTOR
) != 0)
17050 error ("Builtin function %s requires the -mpower8-vector option", name
);
17051 else if ((fnmask
& (RS6000_BTM_P9_VECTOR
| RS6000_BTM_64BIT
))
17052 == (RS6000_BTM_P9_VECTOR
| RS6000_BTM_64BIT
))
17053 error ("Builtin function %s requires the -mcpu=power9 and"
17054 " -m64 options", name
);
17055 else if ((fnmask
& RS6000_BTM_P9_VECTOR
) != 0)
17056 error ("Builtin function %s requires the -mcpu=power9 option", name
);
17057 else if ((fnmask
& (RS6000_BTM_P9_MISC
| RS6000_BTM_64BIT
))
17058 == (RS6000_BTM_P9_MISC
| RS6000_BTM_64BIT
))
17059 error ("Builtin function %s requires the -mcpu=power9 and"
17060 " -m64 options", name
);
17061 else if ((fnmask
& RS6000_BTM_P9_MISC
) == RS6000_BTM_P9_MISC
)
17062 error ("Builtin function %s requires the -mcpu=power9 option", name
);
17063 else if ((fnmask
& (RS6000_BTM_HARD_FLOAT
| RS6000_BTM_LDBL128
))
17064 == (RS6000_BTM_HARD_FLOAT
| RS6000_BTM_LDBL128
))
17065 error ("Builtin function %s requires the -mhard-float and"
17066 " -mlong-double-128 options", name
);
17067 else if ((fnmask
& RS6000_BTM_HARD_FLOAT
) != 0)
17068 error ("Builtin function %s requires the -mhard-float option", name
);
17069 else if ((fnmask
& RS6000_BTM_FLOAT128
) != 0)
17070 error ("Builtin function %s requires the -mfloat128 option", name
);
17072 error ("Builtin function %s is not supported with the current options",
17076 /* Target hook for early folding of built-ins, shamelessly stolen
17080 rs6000_fold_builtin (tree fndecl
, int n_args ATTRIBUTE_UNUSED
,
17081 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
17083 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
17085 enum rs6000_builtins fn_code
17086 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
17089 case RS6000_BUILTIN_NANQ
:
17090 case RS6000_BUILTIN_NANSQ
:
17092 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17093 const char *str
= c_getstr (*args
);
17094 int quiet
= fn_code
== RS6000_BUILTIN_NANQ
;
17095 REAL_VALUE_TYPE real
;
17097 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
17098 return build_real (type
, real
);
17101 case RS6000_BUILTIN_INFQ
:
17102 case RS6000_BUILTIN_HUGE_VALQ
:
17104 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17105 REAL_VALUE_TYPE inf
;
17107 return build_real (type
, inf
);
17113 #ifdef SUBTARGET_FOLD_BUILTIN
17114 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
17120 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
17121 a constant, use rs6000_fold_builtin.) */
17124 rs6000_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
17126 gimple
*stmt
= gsi_stmt (*gsi
);
17127 tree fndecl
= gimple_call_fndecl (stmt
);
17128 gcc_checking_assert (fndecl
&& DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
);
17129 enum rs6000_builtins fn_code
17130 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
17131 tree arg0
, arg1
, lhs
;
17135 /* Flavors of vec_add. We deliberately don't expand
17136 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17137 TImode, resulting in much poorer code generation. */
17138 case ALTIVEC_BUILTIN_VADDUBM
:
17139 case ALTIVEC_BUILTIN_VADDUHM
:
17140 case ALTIVEC_BUILTIN_VADDUWM
:
17141 case P8V_BUILTIN_VADDUDM
:
17142 case ALTIVEC_BUILTIN_VADDFP
:
17143 case VSX_BUILTIN_XVADDDP
:
17145 arg0
= gimple_call_arg (stmt
, 0);
17146 arg1
= gimple_call_arg (stmt
, 1);
17147 lhs
= gimple_call_lhs (stmt
);
17148 gimple
*g
= gimple_build_assign (lhs
, PLUS_EXPR
, arg0
, arg1
);
17149 gimple_set_location (g
, gimple_location (stmt
));
17150 gsi_replace (gsi
, g
, true);
17153 /* Flavors of vec_sub. We deliberately don't expand
17154 P8V_BUILTIN_VSUBUQM. */
17155 case ALTIVEC_BUILTIN_VSUBUBM
:
17156 case ALTIVEC_BUILTIN_VSUBUHM
:
17157 case ALTIVEC_BUILTIN_VSUBUWM
:
17158 case P8V_BUILTIN_VSUBUDM
:
17159 case ALTIVEC_BUILTIN_VSUBFP
:
17160 case VSX_BUILTIN_XVSUBDP
:
17162 arg0
= gimple_call_arg (stmt
, 0);
17163 arg1
= gimple_call_arg (stmt
, 1);
17164 lhs
= gimple_call_lhs (stmt
);
17165 gimple
*g
= gimple_build_assign (lhs
, MINUS_EXPR
, arg0
, arg1
);
17166 gimple_set_location (g
, gimple_location (stmt
));
17167 gsi_replace (gsi
, g
, true);
17170 case VSX_BUILTIN_XVMULSP
:
17171 case VSX_BUILTIN_XVMULDP
:
17173 arg0
= gimple_call_arg (stmt
, 0);
17174 arg1
= gimple_call_arg (stmt
, 1);
17175 lhs
= gimple_call_lhs (stmt
);
17176 gimple
*g
= gimple_build_assign (lhs
, MULT_EXPR
, arg0
, arg1
);
17177 gimple_set_location (g
, gimple_location (stmt
));
17178 gsi_replace (gsi
, g
, true);
17181 /* Even element flavors of vec_mul (signed). */
17182 case ALTIVEC_BUILTIN_VMULESB
:
17183 case ALTIVEC_BUILTIN_VMULESH
:
17184 /* Even element flavors of vec_mul (unsigned). */
17185 case ALTIVEC_BUILTIN_VMULEUB
:
17186 case ALTIVEC_BUILTIN_VMULEUH
:
17188 arg0
= gimple_call_arg (stmt
, 0);
17189 arg1
= gimple_call_arg (stmt
, 1);
17190 lhs
= gimple_call_lhs (stmt
);
17191 gimple
*g
= gimple_build_assign (lhs
, VEC_WIDEN_MULT_EVEN_EXPR
, arg0
, arg1
);
17192 gimple_set_location (g
, gimple_location (stmt
));
17193 gsi_replace (gsi
, g
, true);
17196 /* Odd element flavors of vec_mul (signed). */
17197 case ALTIVEC_BUILTIN_VMULOSB
:
17198 case ALTIVEC_BUILTIN_VMULOSH
:
17199 /* Odd element flavors of vec_mul (unsigned). */
17200 case ALTIVEC_BUILTIN_VMULOUB
:
17201 case ALTIVEC_BUILTIN_VMULOUH
:
17203 arg0
= gimple_call_arg (stmt
, 0);
17204 arg1
= gimple_call_arg (stmt
, 1);
17205 lhs
= gimple_call_lhs (stmt
);
17206 gimple
*g
= gimple_build_assign (lhs
, VEC_WIDEN_MULT_ODD_EXPR
, arg0
, arg1
);
17207 gimple_set_location (g
, gimple_location (stmt
));
17208 gsi_replace (gsi
, g
, true);
17211 /* Flavors of vec_div (Integer). */
17212 case VSX_BUILTIN_DIV_V2DI
:
17213 case VSX_BUILTIN_UDIV_V2DI
:
17215 arg0
= gimple_call_arg (stmt
, 0);
17216 arg1
= gimple_call_arg (stmt
, 1);
17217 lhs
= gimple_call_lhs (stmt
);
17218 gimple
*g
= gimple_build_assign (lhs
, TRUNC_DIV_EXPR
, arg0
, arg1
);
17219 gimple_set_location (g
, gimple_location (stmt
));
17220 gsi_replace (gsi
, g
, true);
17223 /* Flavors of vec_div (Float). */
17224 case VSX_BUILTIN_XVDIVSP
:
17225 case VSX_BUILTIN_XVDIVDP
:
17227 arg0
= gimple_call_arg (stmt
, 0);
17228 arg1
= gimple_call_arg (stmt
, 1);
17229 lhs
= gimple_call_lhs (stmt
);
17230 gimple
*g
= gimple_build_assign (lhs
, RDIV_EXPR
, arg0
, arg1
);
17231 gimple_set_location (g
, gimple_location (stmt
));
17232 gsi_replace (gsi
, g
, true);
17235 /* Flavors of vec_and. */
17236 case ALTIVEC_BUILTIN_VAND
:
17238 arg0
= gimple_call_arg (stmt
, 0);
17239 arg1
= gimple_call_arg (stmt
, 1);
17240 lhs
= gimple_call_lhs (stmt
);
17241 gimple
*g
= gimple_build_assign (lhs
, BIT_AND_EXPR
, arg0
, arg1
);
17242 gimple_set_location (g
, gimple_location (stmt
));
17243 gsi_replace (gsi
, g
, true);
17246 /* Flavors of vec_andc. */
17247 case ALTIVEC_BUILTIN_VANDC
:
17249 arg0
= gimple_call_arg (stmt
, 0);
17250 arg1
= gimple_call_arg (stmt
, 1);
17251 lhs
= gimple_call_lhs (stmt
);
17252 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
17253 gimple
*g
= gimple_build_assign(temp
, BIT_NOT_EXPR
, arg1
);
17254 gimple_set_location (g
, gimple_location (stmt
));
17255 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
17256 g
= gimple_build_assign (lhs
, BIT_AND_EXPR
, arg0
, temp
);
17257 gimple_set_location (g
, gimple_location (stmt
));
17258 gsi_replace (gsi
, g
, true);
17261 /* Flavors of vec_nand. */
17262 case P8V_BUILTIN_VEC_NAND
:
17263 case P8V_BUILTIN_NAND_V16QI
:
17264 case P8V_BUILTIN_NAND_V8HI
:
17265 case P8V_BUILTIN_NAND_V4SI
:
17266 case P8V_BUILTIN_NAND_V4SF
:
17267 case P8V_BUILTIN_NAND_V2DF
:
17268 case P8V_BUILTIN_NAND_V2DI
:
17270 arg0
= gimple_call_arg (stmt
, 0);
17271 arg1
= gimple_call_arg (stmt
, 1);
17272 lhs
= gimple_call_lhs (stmt
);
17273 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
17274 gimple
*g
= gimple_build_assign(temp
, BIT_AND_EXPR
, arg0
, arg1
);
17275 gimple_set_location (g
, gimple_location (stmt
));
17276 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
17277 g
= gimple_build_assign (lhs
, BIT_NOT_EXPR
, temp
);
17278 gimple_set_location (g
, gimple_location (stmt
));
17279 gsi_replace (gsi
, g
, true);
17282 /* Flavors of vec_or. */
17283 case ALTIVEC_BUILTIN_VOR
:
17285 arg0
= gimple_call_arg (stmt
, 0);
17286 arg1
= gimple_call_arg (stmt
, 1);
17287 lhs
= gimple_call_lhs (stmt
);
17288 gimple
*g
= gimple_build_assign (lhs
, BIT_IOR_EXPR
, arg0
, arg1
);
17289 gimple_set_location (g
, gimple_location (stmt
));
17290 gsi_replace (gsi
, g
, true);
17293 /* flavors of vec_orc. */
17294 case P8V_BUILTIN_ORC_V16QI
:
17295 case P8V_BUILTIN_ORC_V8HI
:
17296 case P8V_BUILTIN_ORC_V4SI
:
17297 case P8V_BUILTIN_ORC_V4SF
:
17298 case P8V_BUILTIN_ORC_V2DF
:
17299 case P8V_BUILTIN_ORC_V2DI
:
17301 arg0
= gimple_call_arg (stmt
, 0);
17302 arg1
= gimple_call_arg (stmt
, 1);
17303 lhs
= gimple_call_lhs (stmt
);
17304 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
17305 gimple
*g
= gimple_build_assign(temp
, BIT_NOT_EXPR
, arg1
);
17306 gimple_set_location (g
, gimple_location (stmt
));
17307 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
17308 g
= gimple_build_assign (lhs
, BIT_IOR_EXPR
, arg0
, temp
);
17309 gimple_set_location (g
, gimple_location (stmt
));
17310 gsi_replace (gsi
, g
, true);
17313 /* Flavors of vec_xor. */
17314 case ALTIVEC_BUILTIN_VXOR
:
17316 arg0
= gimple_call_arg (stmt
, 0);
17317 arg1
= gimple_call_arg (stmt
, 1);
17318 lhs
= gimple_call_lhs (stmt
);
17319 gimple
*g
= gimple_build_assign (lhs
, BIT_XOR_EXPR
, arg0
, arg1
);
17320 gimple_set_location (g
, gimple_location (stmt
));
17321 gsi_replace (gsi
, g
, true);
17324 /* Flavors of vec_nor. */
17325 case ALTIVEC_BUILTIN_VNOR
:
17327 arg0
= gimple_call_arg (stmt
, 0);
17328 arg1
= gimple_call_arg (stmt
, 1);
17329 lhs
= gimple_call_lhs (stmt
);
17330 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
17331 gimple
*g
= gimple_build_assign (temp
, BIT_IOR_EXPR
, arg0
, arg1
);
17332 gimple_set_location (g
, gimple_location (stmt
));
17333 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
17334 g
= gimple_build_assign (lhs
, BIT_NOT_EXPR
, temp
);
17335 gimple_set_location (g
, gimple_location (stmt
));
17336 gsi_replace (gsi
, g
, true);
17346 /* Expand an expression EXP that calls a built-in function,
17347 with result going to TARGET if that's convenient
17348 (and in mode MODE if that's convenient).
17349 SUBTARGET may be used as the target for computing one of EXP's operands.
17350 IGNORE is nonzero if the value is to be ignored. */
17353 rs6000_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17354 machine_mode mode ATTRIBUTE_UNUSED
,
17355 int ignore ATTRIBUTE_UNUSED
)
17357 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17358 enum rs6000_builtins fcode
17359 = (enum rs6000_builtins
)DECL_FUNCTION_CODE (fndecl
);
17360 size_t uns_fcode
= (size_t)fcode
;
17361 const struct builtin_description
*d
;
17365 HOST_WIDE_INT mask
= rs6000_builtin_info
[uns_fcode
].mask
;
17366 bool func_valid_p
= ((rs6000_builtin_mask
& mask
) == mask
);
17368 if (TARGET_DEBUG_BUILTIN
)
17370 enum insn_code icode
= rs6000_builtin_info
[uns_fcode
].icode
;
17371 const char *name1
= rs6000_builtin_info
[uns_fcode
].name
;
17372 const char *name2
= ((icode
!= CODE_FOR_nothing
)
17373 ? get_insn_name ((int)icode
)
17377 switch (rs6000_builtin_info
[uns_fcode
].attr
& RS6000_BTC_TYPE_MASK
)
17379 default: name3
= "unknown"; break;
17380 case RS6000_BTC_SPECIAL
: name3
= "special"; break;
17381 case RS6000_BTC_UNARY
: name3
= "unary"; break;
17382 case RS6000_BTC_BINARY
: name3
= "binary"; break;
17383 case RS6000_BTC_TERNARY
: name3
= "ternary"; break;
17384 case RS6000_BTC_PREDICATE
: name3
= "predicate"; break;
17385 case RS6000_BTC_ABS
: name3
= "abs"; break;
17386 case RS6000_BTC_EVSEL
: name3
= "evsel"; break;
17387 case RS6000_BTC_DST
: name3
= "dst"; break;
17392 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17393 (name1
) ? name1
: "---", fcode
,
17394 (name2
) ? name2
: "---", (int)icode
,
17396 func_valid_p
? "" : ", not valid");
17401 rs6000_invalid_builtin (fcode
);
17403 /* Given it is invalid, just generate a normal call. */
17404 return expand_call (exp
, target
, ignore
);
17409 case RS6000_BUILTIN_RECIP
:
17410 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3
, exp
, target
);
17412 case RS6000_BUILTIN_RECIPF
:
17413 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3
, exp
, target
);
17415 case RS6000_BUILTIN_RSQRTF
:
17416 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
17418 case RS6000_BUILTIN_RSQRT
:
17419 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2
, exp
, target
);
17421 case POWER7_BUILTIN_BPERMD
:
17422 return rs6000_expand_binop_builtin (((TARGET_64BIT
)
17423 ? CODE_FOR_bpermd_di
17424 : CODE_FOR_bpermd_si
), exp
, target
);
17426 case RS6000_BUILTIN_GET_TB
:
17427 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase
,
17430 case RS6000_BUILTIN_MFTB
:
17431 return rs6000_expand_zeroop_builtin (((TARGET_64BIT
)
17432 ? CODE_FOR_rs6000_mftb_di
17433 : CODE_FOR_rs6000_mftb_si
),
17436 case RS6000_BUILTIN_MFFS
:
17437 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs
, target
);
17439 case RS6000_BUILTIN_MTFSF
:
17440 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf
, exp
);
17442 case RS6000_BUILTIN_CPU_INIT
:
17443 case RS6000_BUILTIN_CPU_IS
:
17444 case RS6000_BUILTIN_CPU_SUPPORTS
:
17445 return cpu_expand_builtin (fcode
, exp
, target
);
17447 case ALTIVEC_BUILTIN_MASK_FOR_LOAD
:
17448 case ALTIVEC_BUILTIN_MASK_FOR_STORE
:
17450 int icode
= (BYTES_BIG_ENDIAN
? (int) CODE_FOR_altivec_lvsr_direct
17451 : (int) CODE_FOR_altivec_lvsl_direct
);
17452 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17453 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
17457 gcc_assert (TARGET_ALTIVEC
);
17459 arg
= CALL_EXPR_ARG (exp
, 0);
17460 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
17461 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
17462 addr
= memory_address (mode
, op
);
17463 if (fcode
== ALTIVEC_BUILTIN_MASK_FOR_STORE
)
17467 /* For the load case need to negate the address. */
17468 op
= gen_reg_rtx (GET_MODE (addr
));
17469 emit_insn (gen_rtx_SET (op
, gen_rtx_NEG (GET_MODE (addr
), addr
)));
17471 op
= gen_rtx_MEM (mode
, op
);
17474 || GET_MODE (target
) != tmode
17475 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17476 target
= gen_reg_rtx (tmode
);
17478 pat
= GEN_FCN (icode
) (target
, op
);
17486 case ALTIVEC_BUILTIN_VCFUX
:
17487 case ALTIVEC_BUILTIN_VCFSX
:
17488 case ALTIVEC_BUILTIN_VCTUXS
:
17489 case ALTIVEC_BUILTIN_VCTSXS
:
17490 /* FIXME: There's got to be a nicer way to handle this case than
17491 constructing a new CALL_EXPR. */
17492 if (call_expr_nargs (exp
) == 1)
17494 exp
= build_call_nary (TREE_TYPE (exp
), CALL_EXPR_FN (exp
),
17495 2, CALL_EXPR_ARG (exp
, 0), integer_zero_node
);
17503 if (TARGET_ALTIVEC
)
17505 ret
= altivec_expand_builtin (exp
, target
, &success
);
17512 ret
= spe_expand_builtin (exp
, target
, &success
);
17517 if (TARGET_PAIRED_FLOAT
)
17519 ret
= paired_expand_builtin (exp
, target
, &success
);
17526 ret
= htm_expand_builtin (exp
, target
, &success
);
17532 unsigned attr
= rs6000_builtin_info
[uns_fcode
].attr
& RS6000_BTC_TYPE_MASK
;
17533 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17534 gcc_assert (attr
== RS6000_BTC_UNARY
17535 || attr
== RS6000_BTC_BINARY
17536 || attr
== RS6000_BTC_TERNARY
17537 || attr
== RS6000_BTC_SPECIAL
);
17539 /* Handle simple unary operations. */
17541 for (i
= 0; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17542 if (d
->code
== fcode
)
17543 return rs6000_expand_unop_builtin (d
->icode
, exp
, target
);
17545 /* Handle simple binary operations. */
17547 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17548 if (d
->code
== fcode
)
17549 return rs6000_expand_binop_builtin (d
->icode
, exp
, target
);
17551 /* Handle simple ternary operations. */
17553 for (i
= 0; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
17554 if (d
->code
== fcode
)
17555 return rs6000_expand_ternop_builtin (d
->icode
, exp
, target
);
17557 /* Handle simple no-argument operations. */
17559 for (i
= 0; i
< ARRAY_SIZE (bdesc_0arg
); i
++, d
++)
17560 if (d
->code
== fcode
)
17561 return rs6000_expand_zeroop_builtin (d
->icode
, target
);
17563 gcc_unreachable ();
17566 /* Create a builtin vector type with a name. Taking care not to give
17567 the canonical type a name. */
17570 rs6000_vector_type (const char *name
, tree elt_type
, unsigned num_elts
)
17572 tree result
= build_vector_type (elt_type
, num_elts
);
17574 /* Copy so we don't give the canonical type a name. */
17575 result
= build_variant_type_copy (result
);
17577 add_builtin_type (name
, result
);
17583 rs6000_init_builtins (void)
17589 if (TARGET_DEBUG_BUILTIN
)
17590 fprintf (stderr
, "rs6000_init_builtins%s%s%s%s\n",
17591 (TARGET_PAIRED_FLOAT
) ? ", paired" : "",
17592 (TARGET_SPE
) ? ", spe" : "",
17593 (TARGET_ALTIVEC
) ? ", altivec" : "",
17594 (TARGET_VSX
) ? ", vsx" : "");
17596 V2SI_type_node
= build_vector_type (intSI_type_node
, 2);
17597 V2SF_type_node
= build_vector_type (float_type_node
, 2);
17598 V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
? "__vector long"
17599 : "__vector long long",
17600 intDI_type_node
, 2);
17601 V2DF_type_node
= rs6000_vector_type ("__vector double", double_type_node
, 2);
17602 V4HI_type_node
= build_vector_type (intHI_type_node
, 4);
17603 V4SI_type_node
= rs6000_vector_type ("__vector signed int",
17604 intSI_type_node
, 4);
17605 V4SF_type_node
= rs6000_vector_type ("__vector float", float_type_node
, 4);
17606 V8HI_type_node
= rs6000_vector_type ("__vector signed short",
17607 intHI_type_node
, 8);
17608 V16QI_type_node
= rs6000_vector_type ("__vector signed char",
17609 intQI_type_node
, 16);
17611 unsigned_V16QI_type_node
= rs6000_vector_type ("__vector unsigned char",
17612 unsigned_intQI_type_node
, 16);
17613 unsigned_V8HI_type_node
= rs6000_vector_type ("__vector unsigned short",
17614 unsigned_intHI_type_node
, 8);
17615 unsigned_V4SI_type_node
= rs6000_vector_type ("__vector unsigned int",
17616 unsigned_intSI_type_node
, 4);
17617 unsigned_V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
17618 ? "__vector unsigned long"
17619 : "__vector unsigned long long",
17620 unsigned_intDI_type_node
, 2);
17622 opaque_V2SF_type_node
= build_opaque_vector_type (float_type_node
, 2);
17623 opaque_V2SI_type_node
= build_opaque_vector_type (intSI_type_node
, 2);
17624 opaque_p_V2SI_type_node
= build_pointer_type (opaque_V2SI_type_node
);
17625 opaque_V4SI_type_node
= build_opaque_vector_type (intSI_type_node
, 4);
17627 const_str_type_node
17628 = build_pointer_type (build_qualified_type (char_type_node
,
17631 /* We use V1TI mode as a special container to hold __int128_t items that
17632 must live in VSX registers. */
17633 if (intTI_type_node
)
17635 V1TI_type_node
= rs6000_vector_type ("__vector __int128",
17636 intTI_type_node
, 1);
17637 unsigned_V1TI_type_node
17638 = rs6000_vector_type ("__vector unsigned __int128",
17639 unsigned_intTI_type_node
, 1);
17642 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17643 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17644 'vector unsigned short'. */
17646 bool_char_type_node
= build_distinct_type_copy (unsigned_intQI_type_node
);
17647 bool_short_type_node
= build_distinct_type_copy (unsigned_intHI_type_node
);
17648 bool_int_type_node
= build_distinct_type_copy (unsigned_intSI_type_node
);
17649 bool_long_type_node
= build_distinct_type_copy (unsigned_intDI_type_node
);
17650 pixel_type_node
= build_distinct_type_copy (unsigned_intHI_type_node
);
17652 long_integer_type_internal_node
= long_integer_type_node
;
17653 long_unsigned_type_internal_node
= long_unsigned_type_node
;
17654 long_long_integer_type_internal_node
= long_long_integer_type_node
;
17655 long_long_unsigned_type_internal_node
= long_long_unsigned_type_node
;
17656 intQI_type_internal_node
= intQI_type_node
;
17657 uintQI_type_internal_node
= unsigned_intQI_type_node
;
17658 intHI_type_internal_node
= intHI_type_node
;
17659 uintHI_type_internal_node
= unsigned_intHI_type_node
;
17660 intSI_type_internal_node
= intSI_type_node
;
17661 uintSI_type_internal_node
= unsigned_intSI_type_node
;
17662 intDI_type_internal_node
= intDI_type_node
;
17663 uintDI_type_internal_node
= unsigned_intDI_type_node
;
17664 intTI_type_internal_node
= intTI_type_node
;
17665 uintTI_type_internal_node
= unsigned_intTI_type_node
;
17666 float_type_internal_node
= float_type_node
;
17667 double_type_internal_node
= double_type_node
;
17668 long_double_type_internal_node
= long_double_type_node
;
17669 dfloat64_type_internal_node
= dfloat64_type_node
;
17670 dfloat128_type_internal_node
= dfloat128_type_node
;
17671 void_type_internal_node
= void_type_node
;
17673 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17674 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17675 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17676 format that uses a pair of doubles, depending on the switches and
17679 We do not enable the actual __float128 keyword unless the user explicitly
17680 asks for it, because the library support is not yet complete.
17682 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17683 floating point, we need make sure the type is non-zero or else self-test
17684 fails during bootstrap.
17686 We don't register a built-in type for __ibm128 if the type is the same as
17687 long double. Instead we add a #define for __ibm128 in
17688 rs6000_cpu_cpp_builtins to long double. */
17689 if (TARGET_LONG_DOUBLE_128
&& FLOAT128_IEEE_P (TFmode
))
17691 ibm128_float_type_node
= make_node (REAL_TYPE
);
17692 TYPE_PRECISION (ibm128_float_type_node
) = 128;
17693 SET_TYPE_MODE (ibm128_float_type_node
, IFmode
);
17694 layout_type (ibm128_float_type_node
);
17696 lang_hooks
.types
.register_builtin_type (ibm128_float_type_node
,
17700 ibm128_float_type_node
= long_double_type_node
;
17702 if (TARGET_FLOAT128_KEYWORD
)
17704 ieee128_float_type_node
= float128_type_node
;
17705 lang_hooks
.types
.register_builtin_type (ieee128_float_type_node
,
17709 else if (TARGET_FLOAT128_TYPE
)
17711 ieee128_float_type_node
= make_node (REAL_TYPE
);
17712 TYPE_PRECISION (ibm128_float_type_node
) = 128;
17713 SET_TYPE_MODE (ieee128_float_type_node
, KFmode
);
17714 layout_type (ieee128_float_type_node
);
17716 /* If we are not exporting the __float128/_Float128 keywords, we need a
17717 keyword to get the types created. Use __ieee128 as the dummy
17719 lang_hooks
.types
.register_builtin_type (ieee128_float_type_node
,
17724 ieee128_float_type_node
= long_double_type_node
;
17726 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17728 builtin_mode_to_type
[QImode
][0] = integer_type_node
;
17729 builtin_mode_to_type
[HImode
][0] = integer_type_node
;
17730 builtin_mode_to_type
[SImode
][0] = intSI_type_node
;
17731 builtin_mode_to_type
[SImode
][1] = unsigned_intSI_type_node
;
17732 builtin_mode_to_type
[DImode
][0] = intDI_type_node
;
17733 builtin_mode_to_type
[DImode
][1] = unsigned_intDI_type_node
;
17734 builtin_mode_to_type
[TImode
][0] = intTI_type_node
;
17735 builtin_mode_to_type
[TImode
][1] = unsigned_intTI_type_node
;
17736 builtin_mode_to_type
[SFmode
][0] = float_type_node
;
17737 builtin_mode_to_type
[DFmode
][0] = double_type_node
;
17738 builtin_mode_to_type
[IFmode
][0] = ibm128_float_type_node
;
17739 builtin_mode_to_type
[KFmode
][0] = ieee128_float_type_node
;
17740 builtin_mode_to_type
[TFmode
][0] = long_double_type_node
;
17741 builtin_mode_to_type
[DDmode
][0] = dfloat64_type_node
;
17742 builtin_mode_to_type
[TDmode
][0] = dfloat128_type_node
;
17743 builtin_mode_to_type
[V1TImode
][0] = V1TI_type_node
;
17744 builtin_mode_to_type
[V1TImode
][1] = unsigned_V1TI_type_node
;
17745 builtin_mode_to_type
[V2SImode
][0] = V2SI_type_node
;
17746 builtin_mode_to_type
[V2SFmode
][0] = V2SF_type_node
;
17747 builtin_mode_to_type
[V2DImode
][0] = V2DI_type_node
;
17748 builtin_mode_to_type
[V2DImode
][1] = unsigned_V2DI_type_node
;
17749 builtin_mode_to_type
[V2DFmode
][0] = V2DF_type_node
;
17750 builtin_mode_to_type
[V4HImode
][0] = V4HI_type_node
;
17751 builtin_mode_to_type
[V4SImode
][0] = V4SI_type_node
;
17752 builtin_mode_to_type
[V4SImode
][1] = unsigned_V4SI_type_node
;
17753 builtin_mode_to_type
[V4SFmode
][0] = V4SF_type_node
;
17754 builtin_mode_to_type
[V8HImode
][0] = V8HI_type_node
;
17755 builtin_mode_to_type
[V8HImode
][1] = unsigned_V8HI_type_node
;
17756 builtin_mode_to_type
[V16QImode
][0] = V16QI_type_node
;
17757 builtin_mode_to_type
[V16QImode
][1] = unsigned_V16QI_type_node
;
17759 tdecl
= add_builtin_type ("__bool char", bool_char_type_node
);
17760 TYPE_NAME (bool_char_type_node
) = tdecl
;
17762 tdecl
= add_builtin_type ("__bool short", bool_short_type_node
);
17763 TYPE_NAME (bool_short_type_node
) = tdecl
;
17765 tdecl
= add_builtin_type ("__bool int", bool_int_type_node
);
17766 TYPE_NAME (bool_int_type_node
) = tdecl
;
17768 tdecl
= add_builtin_type ("__pixel", pixel_type_node
);
17769 TYPE_NAME (pixel_type_node
) = tdecl
;
17771 bool_V16QI_type_node
= rs6000_vector_type ("__vector __bool char",
17772 bool_char_type_node
, 16);
17773 bool_V8HI_type_node
= rs6000_vector_type ("__vector __bool short",
17774 bool_short_type_node
, 8);
17775 bool_V4SI_type_node
= rs6000_vector_type ("__vector __bool int",
17776 bool_int_type_node
, 4);
17777 bool_V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
17778 ? "__vector __bool long"
17779 : "__vector __bool long long",
17780 bool_long_type_node
, 2);
17781 pixel_V8HI_type_node
= rs6000_vector_type ("__vector __pixel",
17782 pixel_type_node
, 8);
17784 /* Paired and SPE builtins are only available if you build a compiler with
17785 the appropriate options, so only create those builtins with the
17786 appropriate compiler option. Create Altivec and VSX builtins on machines
17787 with at least the general purpose extensions (970 and newer) to allow the
17788 use of the target attribute. */
17789 if (TARGET_PAIRED_FLOAT
)
17790 paired_init_builtins ();
17792 spe_init_builtins ();
17793 if (TARGET_EXTRA_BUILTINS
)
17794 altivec_init_builtins ();
17796 htm_init_builtins ();
17798 if (TARGET_EXTRA_BUILTINS
|| TARGET_SPE
|| TARGET_PAIRED_FLOAT
)
17799 rs6000_common_init_builtins ();
17801 ftype
= build_function_type_list (ieee128_float_type_node
,
17802 const_str_type_node
, NULL_TREE
);
17803 def_builtin ("__builtin_nanq", ftype
, RS6000_BUILTIN_NANQ
);
17804 def_builtin ("__builtin_nansq", ftype
, RS6000_BUILTIN_NANSQ
);
17806 ftype
= build_function_type_list (ieee128_float_type_node
, NULL_TREE
);
17807 def_builtin ("__builtin_infq", ftype
, RS6000_BUILTIN_INFQ
);
17808 def_builtin ("__builtin_huge_valq", ftype
, RS6000_BUILTIN_HUGE_VALQ
);
17810 ftype
= builtin_function_type (DFmode
, DFmode
, DFmode
, VOIDmode
,
17811 RS6000_BUILTIN_RECIP
, "__builtin_recipdiv");
17812 def_builtin ("__builtin_recipdiv", ftype
, RS6000_BUILTIN_RECIP
);
17814 ftype
= builtin_function_type (SFmode
, SFmode
, SFmode
, VOIDmode
,
17815 RS6000_BUILTIN_RECIPF
, "__builtin_recipdivf");
17816 def_builtin ("__builtin_recipdivf", ftype
, RS6000_BUILTIN_RECIPF
);
17818 ftype
= builtin_function_type (DFmode
, DFmode
, VOIDmode
, VOIDmode
,
17819 RS6000_BUILTIN_RSQRT
, "__builtin_rsqrt");
17820 def_builtin ("__builtin_rsqrt", ftype
, RS6000_BUILTIN_RSQRT
);
17822 ftype
= builtin_function_type (SFmode
, SFmode
, VOIDmode
, VOIDmode
,
17823 RS6000_BUILTIN_RSQRTF
, "__builtin_rsqrtf");
17824 def_builtin ("__builtin_rsqrtf", ftype
, RS6000_BUILTIN_RSQRTF
);
17826 mode
= (TARGET_64BIT
) ? DImode
: SImode
;
17827 ftype
= builtin_function_type (mode
, mode
, mode
, VOIDmode
,
17828 POWER7_BUILTIN_BPERMD
, "__builtin_bpermd");
17829 def_builtin ("__builtin_bpermd", ftype
, POWER7_BUILTIN_BPERMD
);
17831 ftype
= build_function_type_list (unsigned_intDI_type_node
,
17833 def_builtin ("__builtin_ppc_get_timebase", ftype
, RS6000_BUILTIN_GET_TB
);
17836 ftype
= build_function_type_list (unsigned_intDI_type_node
,
17839 ftype
= build_function_type_list (unsigned_intSI_type_node
,
17841 def_builtin ("__builtin_ppc_mftb", ftype
, RS6000_BUILTIN_MFTB
);
17843 ftype
= build_function_type_list (double_type_node
, NULL_TREE
);
17844 def_builtin ("__builtin_mffs", ftype
, RS6000_BUILTIN_MFFS
);
17846 ftype
= build_function_type_list (void_type_node
,
17847 intSI_type_node
, double_type_node
,
17849 def_builtin ("__builtin_mtfsf", ftype
, RS6000_BUILTIN_MTFSF
);
17851 ftype
= build_function_type_list (void_type_node
, NULL_TREE
);
17852 def_builtin ("__builtin_cpu_init", ftype
, RS6000_BUILTIN_CPU_INIT
);
17854 ftype
= build_function_type_list (bool_int_type_node
, const_ptr_type_node
,
17856 def_builtin ("__builtin_cpu_is", ftype
, RS6000_BUILTIN_CPU_IS
);
17857 def_builtin ("__builtin_cpu_supports", ftype
, RS6000_BUILTIN_CPU_SUPPORTS
);
17859 /* AIX libm provides clog as __clog. */
17860 if (TARGET_XCOFF
&&
17861 (tdecl
= builtin_decl_explicit (BUILT_IN_CLOG
)) != NULL_TREE
)
17862 set_user_assembler_name (tdecl
, "__clog");
17864 #ifdef SUBTARGET_INIT_BUILTINS
17865 SUBTARGET_INIT_BUILTINS
;
17869 /* Returns the rs6000 builtin decl for CODE. */
17872 rs6000_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
17874 HOST_WIDE_INT fnmask
;
17876 if (code
>= RS6000_BUILTIN_COUNT
)
17877 return error_mark_node
;
17879 fnmask
= rs6000_builtin_info
[code
].mask
;
17880 if ((fnmask
& rs6000_builtin_mask
) != fnmask
)
17882 rs6000_invalid_builtin ((enum rs6000_builtins
)code
);
17883 return error_mark_node
;
17886 return rs6000_builtin_decls
[code
];
17890 spe_init_builtins (void)
17892 tree puint_type_node
= build_pointer_type (unsigned_type_node
);
17893 tree pushort_type_node
= build_pointer_type (short_unsigned_type_node
);
17894 const struct builtin_description
*d
;
17896 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
17898 tree v2si_ftype_4_v2si
17899 = build_function_type_list (opaque_V2SI_type_node
,
17900 opaque_V2SI_type_node
,
17901 opaque_V2SI_type_node
,
17902 opaque_V2SI_type_node
,
17903 opaque_V2SI_type_node
,
17906 tree v2sf_ftype_4_v2sf
17907 = build_function_type_list (opaque_V2SF_type_node
,
17908 opaque_V2SF_type_node
,
17909 opaque_V2SF_type_node
,
17910 opaque_V2SF_type_node
,
17911 opaque_V2SF_type_node
,
17914 tree int_ftype_int_v2si_v2si
17915 = build_function_type_list (integer_type_node
,
17917 opaque_V2SI_type_node
,
17918 opaque_V2SI_type_node
,
17921 tree int_ftype_int_v2sf_v2sf
17922 = build_function_type_list (integer_type_node
,
17924 opaque_V2SF_type_node
,
17925 opaque_V2SF_type_node
,
17928 tree void_ftype_v2si_puint_int
17929 = build_function_type_list (void_type_node
,
17930 opaque_V2SI_type_node
,
17935 tree void_ftype_v2si_puint_char
17936 = build_function_type_list (void_type_node
,
17937 opaque_V2SI_type_node
,
17942 tree void_ftype_v2si_pv2si_int
17943 = build_function_type_list (void_type_node
,
17944 opaque_V2SI_type_node
,
17945 opaque_p_V2SI_type_node
,
17949 tree void_ftype_v2si_pv2si_char
17950 = build_function_type_list (void_type_node
,
17951 opaque_V2SI_type_node
,
17952 opaque_p_V2SI_type_node
,
17956 tree void_ftype_int
17957 = build_function_type_list (void_type_node
, integer_type_node
, NULL_TREE
);
17959 tree int_ftype_void
17960 = build_function_type_list (integer_type_node
, NULL_TREE
);
17962 tree v2si_ftype_pv2si_int
17963 = build_function_type_list (opaque_V2SI_type_node
,
17964 opaque_p_V2SI_type_node
,
17968 tree v2si_ftype_puint_int
17969 = build_function_type_list (opaque_V2SI_type_node
,
17974 tree v2si_ftype_pushort_int
17975 = build_function_type_list (opaque_V2SI_type_node
,
17980 tree v2si_ftype_signed_char
17981 = build_function_type_list (opaque_V2SI_type_node
,
17982 signed_char_type_node
,
17985 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node
);
17987 /* Initialize irregular SPE builtins. */
17989 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int
, SPE_BUILTIN_MTSPEFSCR
);
17990 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void
, SPE_BUILTIN_MFSPEFSCR
);
17991 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDDX
);
17992 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDHX
);
17993 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDWX
);
17994 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWHEX
);
17995 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWHOX
);
17996 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWWEX
);
17997 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWWOX
);
17998 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDD
);
17999 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDH
);
18000 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDW
);
18001 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWHE
);
18002 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWHO
);
18003 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWWE
);
18004 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWWO
);
18005 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char
, SPE_BUILTIN_EVSPLATFI
);
18006 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char
, SPE_BUILTIN_EVSPLATI
);
18009 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDDX
);
18010 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDWX
);
18011 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDHX
);
18012 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHEX
);
18013 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOUX
);
18014 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOSX
);
18015 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWWSPLATX
);
18016 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHSPLATX
);
18017 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHESPLATX
);
18018 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOUSPLATX
);
18019 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOSSPLATX
);
18020 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDD
);
18021 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDW
);
18022 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDH
);
18023 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHESPLAT
);
18024 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOSSPLAT
);
18025 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOUSPLAT
);
18026 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHE
);
18027 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOS
);
18028 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOU
);
18029 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHSPLAT
);
18030 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWWSPLAT
);
18033 d
= bdesc_spe_predicates
;
18034 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_predicates
); ++i
, d
++)
18037 HOST_WIDE_INT mask
= d
->mask
;
18039 if ((mask
& builtin_mask
) != mask
)
18041 if (TARGET_DEBUG_BUILTIN
)
18042 fprintf (stderr
, "spe_init_builtins, skip predicate %s\n",
18047 /* Cannot define builtin if the instruction is disabled. */
18048 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18049 switch (insn_data
[d
->icode
].operand
[1].mode
)
18052 type
= int_ftype_int_v2si_v2si
;
18055 type
= int_ftype_int_v2sf_v2sf
;
18058 gcc_unreachable ();
18061 def_builtin (d
->name
, type
, d
->code
);
18064 /* Evsel predicates. */
18065 d
= bdesc_spe_evsel
;
18066 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_evsel
); ++i
, d
++)
18069 HOST_WIDE_INT mask
= d
->mask
;
18071 if ((mask
& builtin_mask
) != mask
)
18073 if (TARGET_DEBUG_BUILTIN
)
18074 fprintf (stderr
, "spe_init_builtins, skip evsel %s\n",
18079 /* Cannot define builtin if the instruction is disabled. */
18080 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18081 switch (insn_data
[d
->icode
].operand
[1].mode
)
18084 type
= v2si_ftype_4_v2si
;
18087 type
= v2sf_ftype_4_v2sf
;
18090 gcc_unreachable ();
18093 def_builtin (d
->name
, type
, d
->code
);
18098 paired_init_builtins (void)
18100 const struct builtin_description
*d
;
18102 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18104 tree int_ftype_int_v2sf_v2sf
18105 = build_function_type_list (integer_type_node
,
18110 tree pcfloat_type_node
=
18111 build_pointer_type (build_qualified_type
18112 (float_type_node
, TYPE_QUAL_CONST
));
18114 tree v2sf_ftype_long_pcfloat
= build_function_type_list (V2SF_type_node
,
18115 long_integer_type_node
,
18118 tree void_ftype_v2sf_long_pcfloat
=
18119 build_function_type_list (void_type_node
,
18121 long_integer_type_node
,
18126 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat
,
18127 PAIRED_BUILTIN_LX
);
18130 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat
,
18131 PAIRED_BUILTIN_STX
);
18134 d
= bdesc_paired_preds
;
18135 for (i
= 0; i
< ARRAY_SIZE (bdesc_paired_preds
); ++i
, d
++)
18138 HOST_WIDE_INT mask
= d
->mask
;
18140 if ((mask
& builtin_mask
) != mask
)
18142 if (TARGET_DEBUG_BUILTIN
)
18143 fprintf (stderr
, "paired_init_builtins, skip predicate %s\n",
18148 /* Cannot define builtin if the instruction is disabled. */
18149 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18151 if (TARGET_DEBUG_BUILTIN
)
18152 fprintf (stderr
, "paired pred #%d, insn = %s [%d], mode = %s\n",
18153 (int)i
, get_insn_name (d
->icode
), (int)d
->icode
,
18154 GET_MODE_NAME (insn_data
[d
->icode
].operand
[1].mode
));
18156 switch (insn_data
[d
->icode
].operand
[1].mode
)
18159 type
= int_ftype_int_v2sf_v2sf
;
18162 gcc_unreachable ();
18165 def_builtin (d
->name
, type
, d
->code
);
18170 altivec_init_builtins (void)
18172 const struct builtin_description
*d
;
18176 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18178 tree pvoid_type_node
= build_pointer_type (void_type_node
);
18180 tree pcvoid_type_node
18181 = build_pointer_type (build_qualified_type (void_type_node
,
18184 tree int_ftype_opaque
18185 = build_function_type_list (integer_type_node
,
18186 opaque_V4SI_type_node
, NULL_TREE
);
18187 tree opaque_ftype_opaque
18188 = build_function_type_list (integer_type_node
, NULL_TREE
);
18189 tree opaque_ftype_opaque_int
18190 = build_function_type_list (opaque_V4SI_type_node
,
18191 opaque_V4SI_type_node
, integer_type_node
, NULL_TREE
);
18192 tree opaque_ftype_opaque_opaque_int
18193 = build_function_type_list (opaque_V4SI_type_node
,
18194 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
18195 integer_type_node
, NULL_TREE
);
18196 tree opaque_ftype_opaque_opaque_opaque
18197 = build_function_type_list (opaque_V4SI_type_node
,
18198 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
18199 opaque_V4SI_type_node
, NULL_TREE
);
18200 tree opaque_ftype_opaque_opaque
18201 = build_function_type_list (opaque_V4SI_type_node
,
18202 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
18204 tree int_ftype_int_opaque_opaque
18205 = build_function_type_list (integer_type_node
,
18206 integer_type_node
, opaque_V4SI_type_node
,
18207 opaque_V4SI_type_node
, NULL_TREE
);
18208 tree int_ftype_int_v4si_v4si
18209 = build_function_type_list (integer_type_node
,
18210 integer_type_node
, V4SI_type_node
,
18211 V4SI_type_node
, NULL_TREE
);
18212 tree int_ftype_int_v2di_v2di
18213 = build_function_type_list (integer_type_node
,
18214 integer_type_node
, V2DI_type_node
,
18215 V2DI_type_node
, NULL_TREE
);
18216 tree void_ftype_v4si
18217 = build_function_type_list (void_type_node
, V4SI_type_node
, NULL_TREE
);
18218 tree v8hi_ftype_void
18219 = build_function_type_list (V8HI_type_node
, NULL_TREE
);
18220 tree void_ftype_void
18221 = build_function_type_list (void_type_node
, NULL_TREE
);
18222 tree void_ftype_int
18223 = build_function_type_list (void_type_node
, integer_type_node
, NULL_TREE
);
18225 tree opaque_ftype_long_pcvoid
18226 = build_function_type_list (opaque_V4SI_type_node
,
18227 long_integer_type_node
, pcvoid_type_node
,
18229 tree v16qi_ftype_long_pcvoid
18230 = build_function_type_list (V16QI_type_node
,
18231 long_integer_type_node
, pcvoid_type_node
,
18233 tree v8hi_ftype_long_pcvoid
18234 = build_function_type_list (V8HI_type_node
,
18235 long_integer_type_node
, pcvoid_type_node
,
18237 tree v4si_ftype_long_pcvoid
18238 = build_function_type_list (V4SI_type_node
,
18239 long_integer_type_node
, pcvoid_type_node
,
18241 tree v4sf_ftype_long_pcvoid
18242 = build_function_type_list (V4SF_type_node
,
18243 long_integer_type_node
, pcvoid_type_node
,
18245 tree v2df_ftype_long_pcvoid
18246 = build_function_type_list (V2DF_type_node
,
18247 long_integer_type_node
, pcvoid_type_node
,
18249 tree v2di_ftype_long_pcvoid
18250 = build_function_type_list (V2DI_type_node
,
18251 long_integer_type_node
, pcvoid_type_node
,
18254 tree void_ftype_opaque_long_pvoid
18255 = build_function_type_list (void_type_node
,
18256 opaque_V4SI_type_node
, long_integer_type_node
,
18257 pvoid_type_node
, NULL_TREE
);
18258 tree void_ftype_v4si_long_pvoid
18259 = build_function_type_list (void_type_node
,
18260 V4SI_type_node
, long_integer_type_node
,
18261 pvoid_type_node
, NULL_TREE
);
18262 tree void_ftype_v16qi_long_pvoid
18263 = build_function_type_list (void_type_node
,
18264 V16QI_type_node
, long_integer_type_node
,
18265 pvoid_type_node
, NULL_TREE
);
18267 tree void_ftype_v16qi_pvoid_long
18268 = build_function_type_list (void_type_node
,
18269 V16QI_type_node
, pvoid_type_node
,
18270 long_integer_type_node
, NULL_TREE
);
18272 tree void_ftype_v8hi_long_pvoid
18273 = build_function_type_list (void_type_node
,
18274 V8HI_type_node
, long_integer_type_node
,
18275 pvoid_type_node
, NULL_TREE
);
18276 tree void_ftype_v4sf_long_pvoid
18277 = build_function_type_list (void_type_node
,
18278 V4SF_type_node
, long_integer_type_node
,
18279 pvoid_type_node
, NULL_TREE
);
18280 tree void_ftype_v2df_long_pvoid
18281 = build_function_type_list (void_type_node
,
18282 V2DF_type_node
, long_integer_type_node
,
18283 pvoid_type_node
, NULL_TREE
);
18284 tree void_ftype_v2di_long_pvoid
18285 = build_function_type_list (void_type_node
,
18286 V2DI_type_node
, long_integer_type_node
,
18287 pvoid_type_node
, NULL_TREE
);
18288 tree int_ftype_int_v8hi_v8hi
18289 = build_function_type_list (integer_type_node
,
18290 integer_type_node
, V8HI_type_node
,
18291 V8HI_type_node
, NULL_TREE
);
18292 tree int_ftype_int_v16qi_v16qi
18293 = build_function_type_list (integer_type_node
,
18294 integer_type_node
, V16QI_type_node
,
18295 V16QI_type_node
, NULL_TREE
);
18296 tree int_ftype_int_v4sf_v4sf
18297 = build_function_type_list (integer_type_node
,
18298 integer_type_node
, V4SF_type_node
,
18299 V4SF_type_node
, NULL_TREE
);
18300 tree int_ftype_int_v2df_v2df
18301 = build_function_type_list (integer_type_node
,
18302 integer_type_node
, V2DF_type_node
,
18303 V2DF_type_node
, NULL_TREE
);
18304 tree v2di_ftype_v2di
18305 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18306 tree v4si_ftype_v4si
18307 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18308 tree v8hi_ftype_v8hi
18309 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18310 tree v16qi_ftype_v16qi
18311 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18312 tree v4sf_ftype_v4sf
18313 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18314 tree v2df_ftype_v2df
18315 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18316 tree void_ftype_pcvoid_int_int
18317 = build_function_type_list (void_type_node
,
18318 pcvoid_type_node
, integer_type_node
,
18319 integer_type_node
, NULL_TREE
);
18321 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si
, ALTIVEC_BUILTIN_MTVSCR
);
18322 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void
, ALTIVEC_BUILTIN_MFVSCR
);
18323 def_builtin ("__builtin_altivec_dssall", void_ftype_void
, ALTIVEC_BUILTIN_DSSALL
);
18324 def_builtin ("__builtin_altivec_dss", void_ftype_int
, ALTIVEC_BUILTIN_DSS
);
18325 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVSL
);
18326 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVSR
);
18327 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEBX
);
18328 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEHX
);
18329 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEWX
);
18330 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVXL
);
18331 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid
,
18332 ALTIVEC_BUILTIN_LVXL_V2DF
);
18333 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid
,
18334 ALTIVEC_BUILTIN_LVXL_V2DI
);
18335 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid
,
18336 ALTIVEC_BUILTIN_LVXL_V4SF
);
18337 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid
,
18338 ALTIVEC_BUILTIN_LVXL_V4SI
);
18339 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid
,
18340 ALTIVEC_BUILTIN_LVXL_V8HI
);
18341 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid
,
18342 ALTIVEC_BUILTIN_LVXL_V16QI
);
18343 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVX
);
18344 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid
,
18345 ALTIVEC_BUILTIN_LVX_V2DF
);
18346 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid
,
18347 ALTIVEC_BUILTIN_LVX_V2DI
);
18348 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid
,
18349 ALTIVEC_BUILTIN_LVX_V4SF
);
18350 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid
,
18351 ALTIVEC_BUILTIN_LVX_V4SI
);
18352 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid
,
18353 ALTIVEC_BUILTIN_LVX_V8HI
);
18354 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid
,
18355 ALTIVEC_BUILTIN_LVX_V16QI
);
18356 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVX
);
18357 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid
,
18358 ALTIVEC_BUILTIN_STVX_V2DF
);
18359 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid
,
18360 ALTIVEC_BUILTIN_STVX_V2DI
);
18361 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid
,
18362 ALTIVEC_BUILTIN_STVX_V4SF
);
18363 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid
,
18364 ALTIVEC_BUILTIN_STVX_V4SI
);
18365 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid
,
18366 ALTIVEC_BUILTIN_STVX_V8HI
);
18367 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid
,
18368 ALTIVEC_BUILTIN_STVX_V16QI
);
18369 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVEWX
);
18370 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVXL
);
18371 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid
,
18372 ALTIVEC_BUILTIN_STVXL_V2DF
);
18373 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid
,
18374 ALTIVEC_BUILTIN_STVXL_V2DI
);
18375 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid
,
18376 ALTIVEC_BUILTIN_STVXL_V4SF
);
18377 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid
,
18378 ALTIVEC_BUILTIN_STVXL_V4SI
);
18379 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid
,
18380 ALTIVEC_BUILTIN_STVXL_V8HI
);
18381 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid
,
18382 ALTIVEC_BUILTIN_STVXL_V16QI
);
18383 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVEBX
);
18384 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid
, ALTIVEC_BUILTIN_STVEHX
);
18385 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LD
);
18386 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LDE
);
18387 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LDL
);
18388 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVSL
);
18389 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVSR
);
18390 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEBX
);
18391 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEHX
);
18392 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEWX
);
18393 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_ST
);
18394 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STE
);
18395 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STL
);
18396 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEWX
);
18397 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEBX
);
18398 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEHX
);
18400 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid
,
18401 VSX_BUILTIN_LXVD2X_V2DF
);
18402 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid
,
18403 VSX_BUILTIN_LXVD2X_V2DI
);
18404 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid
,
18405 VSX_BUILTIN_LXVW4X_V4SF
);
18406 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid
,
18407 VSX_BUILTIN_LXVW4X_V4SI
);
18408 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid
,
18409 VSX_BUILTIN_LXVW4X_V8HI
);
18410 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid
,
18411 VSX_BUILTIN_LXVW4X_V16QI
);
18412 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid
,
18413 VSX_BUILTIN_STXVD2X_V2DF
);
18414 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid
,
18415 VSX_BUILTIN_STXVD2X_V2DI
);
18416 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid
,
18417 VSX_BUILTIN_STXVW4X_V4SF
);
18418 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid
,
18419 VSX_BUILTIN_STXVW4X_V4SI
);
18420 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid
,
18421 VSX_BUILTIN_STXVW4X_V8HI
);
18422 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid
,
18423 VSX_BUILTIN_STXVW4X_V16QI
);
18425 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid
,
18426 VSX_BUILTIN_LD_ELEMREV_V2DF
);
18427 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid
,
18428 VSX_BUILTIN_LD_ELEMREV_V2DI
);
18429 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid
,
18430 VSX_BUILTIN_LD_ELEMREV_V4SF
);
18431 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid
,
18432 VSX_BUILTIN_LD_ELEMREV_V4SI
);
18433 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid
,
18434 VSX_BUILTIN_ST_ELEMREV_V2DF
);
18435 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid
,
18436 VSX_BUILTIN_ST_ELEMREV_V2DI
);
18437 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid
,
18438 VSX_BUILTIN_ST_ELEMREV_V4SF
);
18439 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid
,
18440 VSX_BUILTIN_ST_ELEMREV_V4SI
);
18442 if (TARGET_P9_VECTOR
)
18444 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid
,
18445 VSX_BUILTIN_LD_ELEMREV_V8HI
);
18446 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid
,
18447 VSX_BUILTIN_LD_ELEMREV_V16QI
);
18448 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18449 void_ftype_v8hi_long_pvoid
, VSX_BUILTIN_ST_ELEMREV_V8HI
);
18450 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18451 void_ftype_v16qi_long_pvoid
, VSX_BUILTIN_ST_ELEMREV_V16QI
);
18455 rs6000_builtin_decls
[(int) VSX_BUILTIN_LD_ELEMREV_V8HI
]
18456 = rs6000_builtin_decls
[(int) VSX_BUILTIN_LXVW4X_V8HI
];
18457 rs6000_builtin_decls
[(int) VSX_BUILTIN_LD_ELEMREV_V16QI
]
18458 = rs6000_builtin_decls
[(int) VSX_BUILTIN_LXVW4X_V16QI
];
18459 rs6000_builtin_decls
[(int) VSX_BUILTIN_ST_ELEMREV_V8HI
]
18460 = rs6000_builtin_decls
[(int) VSX_BUILTIN_STXVW4X_V8HI
];
18461 rs6000_builtin_decls
[(int) VSX_BUILTIN_ST_ELEMREV_V16QI
]
18462 = rs6000_builtin_decls
[(int) VSX_BUILTIN_STXVW4X_V16QI
];
18465 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid
,
18466 VSX_BUILTIN_VEC_LD
);
18467 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid
,
18468 VSX_BUILTIN_VEC_ST
);
18469 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid
,
18470 VSX_BUILTIN_VEC_XL
);
18471 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid
,
18472 VSX_BUILTIN_VEC_XST
);
18474 def_builtin ("__builtin_vec_step", int_ftype_opaque
, ALTIVEC_BUILTIN_VEC_STEP
);
18475 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque
, ALTIVEC_BUILTIN_VEC_SPLATS
);
18476 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque
, ALTIVEC_BUILTIN_VEC_PROMOTE
);
18478 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int
, ALTIVEC_BUILTIN_VEC_SLD
);
18479 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_SPLAT
);
18480 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_EXTRACT
);
18481 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int
, ALTIVEC_BUILTIN_VEC_INSERT
);
18482 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTW
);
18483 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTH
);
18484 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTB
);
18485 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTF
);
18486 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VCFSX
);
18487 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VCFUX
);
18488 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTS
);
18489 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTU
);
18491 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque
,
18492 ALTIVEC_BUILTIN_VEC_ADDE
);
18493 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque
,
18494 ALTIVEC_BUILTIN_VEC_ADDEC
);
18495 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque
,
18496 ALTIVEC_BUILTIN_VEC_CMPNE
);
18497 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque
,
18498 ALTIVEC_BUILTIN_VEC_MUL
);
18500 /* Cell builtins. */
18501 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVLX
);
18502 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVLXL
);
18503 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVRX
);
18504 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVRXL
);
18506 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVLX
);
18507 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVLXL
);
18508 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVRX
);
18509 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVRXL
);
18511 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVLX
);
18512 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVLXL
);
18513 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVRX
);
18514 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVRXL
);
18516 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVLX
);
18517 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVLXL
);
18518 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVRX
);
18519 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVRXL
);
18521 if (TARGET_P9_VECTOR
)
18522 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long
,
18523 P9V_BUILTIN_STXVL
);
18525 /* Add the DST variants. */
18527 for (i
= 0; i
< ARRAY_SIZE (bdesc_dst
); i
++, d
++)
18529 HOST_WIDE_INT mask
= d
->mask
;
18531 /* It is expected that these dst built-in functions may have
18532 d->icode equal to CODE_FOR_nothing. */
18533 if ((mask
& builtin_mask
) != mask
)
18535 if (TARGET_DEBUG_BUILTIN
)
18536 fprintf (stderr
, "altivec_init_builtins, skip dst %s\n",
18540 def_builtin (d
->name
, void_ftype_pcvoid_int_int
, d
->code
);
18543 /* Initialize the predicates. */
18544 d
= bdesc_altivec_preds
;
18545 for (i
= 0; i
< ARRAY_SIZE (bdesc_altivec_preds
); i
++, d
++)
18547 machine_mode mode1
;
18549 HOST_WIDE_INT mask
= d
->mask
;
18551 if ((mask
& builtin_mask
) != mask
)
18553 if (TARGET_DEBUG_BUILTIN
)
18554 fprintf (stderr
, "altivec_init_builtins, skip predicate %s\n",
18559 if (rs6000_overloaded_builtin_p (d
->code
))
18563 /* Cannot define builtin if the instruction is disabled. */
18564 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18565 mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18571 type
= int_ftype_int_opaque_opaque
;
18574 type
= int_ftype_int_v2di_v2di
;
18577 type
= int_ftype_int_v4si_v4si
;
18580 type
= int_ftype_int_v8hi_v8hi
;
18583 type
= int_ftype_int_v16qi_v16qi
;
18586 type
= int_ftype_int_v4sf_v4sf
;
18589 type
= int_ftype_int_v2df_v2df
;
18592 gcc_unreachable ();
18595 def_builtin (d
->name
, type
, d
->code
);
18598 /* Initialize the abs* operators. */
18600 for (i
= 0; i
< ARRAY_SIZE (bdesc_abs
); i
++, d
++)
18602 machine_mode mode0
;
18604 HOST_WIDE_INT mask
= d
->mask
;
18606 if ((mask
& builtin_mask
) != mask
)
18608 if (TARGET_DEBUG_BUILTIN
)
18609 fprintf (stderr
, "altivec_init_builtins, skip abs %s\n",
18614 /* Cannot define builtin if the instruction is disabled. */
18615 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18616 mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18621 type
= v2di_ftype_v2di
;
18624 type
= v4si_ftype_v4si
;
18627 type
= v8hi_ftype_v8hi
;
18630 type
= v16qi_ftype_v16qi
;
18633 type
= v4sf_ftype_v4sf
;
18636 type
= v2df_ftype_v2df
;
18639 gcc_unreachable ();
18642 def_builtin (d
->name
, type
, d
->code
);
18645 /* Initialize target builtin that implements
18646 targetm.vectorize.builtin_mask_for_load. */
18648 decl
= add_builtin_function ("__builtin_altivec_mask_for_load",
18649 v16qi_ftype_long_pcvoid
,
18650 ALTIVEC_BUILTIN_MASK_FOR_LOAD
,
18651 BUILT_IN_MD
, NULL
, NULL_TREE
);
18652 TREE_READONLY (decl
) = 1;
18653 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18654 altivec_builtin_mask_for_load
= decl
;
18656 /* Access to the vec_init patterns. */
18657 ftype
= build_function_type_list (V4SI_type_node
, integer_type_node
,
18658 integer_type_node
, integer_type_node
,
18659 integer_type_node
, NULL_TREE
);
18660 def_builtin ("__builtin_vec_init_v4si", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V4SI
);
18662 ftype
= build_function_type_list (V8HI_type_node
, short_integer_type_node
,
18663 short_integer_type_node
,
18664 short_integer_type_node
,
18665 short_integer_type_node
,
18666 short_integer_type_node
,
18667 short_integer_type_node
,
18668 short_integer_type_node
,
18669 short_integer_type_node
, NULL_TREE
);
18670 def_builtin ("__builtin_vec_init_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V8HI
);
18672 ftype
= build_function_type_list (V16QI_type_node
, char_type_node
,
18673 char_type_node
, char_type_node
,
18674 char_type_node
, char_type_node
,
18675 char_type_node
, char_type_node
,
18676 char_type_node
, char_type_node
,
18677 char_type_node
, char_type_node
,
18678 char_type_node
, char_type_node
,
18679 char_type_node
, char_type_node
,
18680 char_type_node
, NULL_TREE
);
18681 def_builtin ("__builtin_vec_init_v16qi", ftype
,
18682 ALTIVEC_BUILTIN_VEC_INIT_V16QI
);
18684 ftype
= build_function_type_list (V4SF_type_node
, float_type_node
,
18685 float_type_node
, float_type_node
,
18686 float_type_node
, NULL_TREE
);
18687 def_builtin ("__builtin_vec_init_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V4SF
);
18689 /* VSX builtins. */
18690 ftype
= build_function_type_list (V2DF_type_node
, double_type_node
,
18691 double_type_node
, NULL_TREE
);
18692 def_builtin ("__builtin_vec_init_v2df", ftype
, VSX_BUILTIN_VEC_INIT_V2DF
);
18694 ftype
= build_function_type_list (V2DI_type_node
, intDI_type_node
,
18695 intDI_type_node
, NULL_TREE
);
18696 def_builtin ("__builtin_vec_init_v2di", ftype
, VSX_BUILTIN_VEC_INIT_V2DI
);
18698 /* Access to the vec_set patterns. */
18699 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18701 integer_type_node
, NULL_TREE
);
18702 def_builtin ("__builtin_vec_set_v4si", ftype
, ALTIVEC_BUILTIN_VEC_SET_V4SI
);
18704 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18706 integer_type_node
, NULL_TREE
);
18707 def_builtin ("__builtin_vec_set_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_SET_V8HI
);
18709 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18711 integer_type_node
, NULL_TREE
);
18712 def_builtin ("__builtin_vec_set_v16qi", ftype
, ALTIVEC_BUILTIN_VEC_SET_V16QI
);
18714 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
18716 integer_type_node
, NULL_TREE
);
18717 def_builtin ("__builtin_vec_set_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_SET_V4SF
);
18719 ftype
= build_function_type_list (V2DF_type_node
, V2DF_type_node
,
18721 integer_type_node
, NULL_TREE
);
18722 def_builtin ("__builtin_vec_set_v2df", ftype
, VSX_BUILTIN_VEC_SET_V2DF
);
18724 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18726 integer_type_node
, NULL_TREE
);
18727 def_builtin ("__builtin_vec_set_v2di", ftype
, VSX_BUILTIN_VEC_SET_V2DI
);
18729 /* Access to the vec_extract patterns. */
18730 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
18731 integer_type_node
, NULL_TREE
);
18732 def_builtin ("__builtin_vec_ext_v4si", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V4SI
);
18734 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
18735 integer_type_node
, NULL_TREE
);
18736 def_builtin ("__builtin_vec_ext_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V8HI
);
18738 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
18739 integer_type_node
, NULL_TREE
);
18740 def_builtin ("__builtin_vec_ext_v16qi", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V16QI
);
18742 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
18743 integer_type_node
, NULL_TREE
);
18744 def_builtin ("__builtin_vec_ext_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V4SF
);
18746 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
18747 integer_type_node
, NULL_TREE
);
18748 def_builtin ("__builtin_vec_ext_v2df", ftype
, VSX_BUILTIN_VEC_EXT_V2DF
);
18750 ftype
= build_function_type_list (intDI_type_node
, V2DI_type_node
,
18751 integer_type_node
, NULL_TREE
);
18752 def_builtin ("__builtin_vec_ext_v2di", ftype
, VSX_BUILTIN_VEC_EXT_V2DI
);
18755 if (V1TI_type_node
)
18757 tree v1ti_ftype_long_pcvoid
18758 = build_function_type_list (V1TI_type_node
,
18759 long_integer_type_node
, pcvoid_type_node
,
18761 tree void_ftype_v1ti_long_pvoid
18762 = build_function_type_list (void_type_node
,
18763 V1TI_type_node
, long_integer_type_node
,
18764 pvoid_type_node
, NULL_TREE
);
18765 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid
,
18766 VSX_BUILTIN_LXVD2X_V1TI
);
18767 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid
,
18768 VSX_BUILTIN_STXVD2X_V1TI
);
18769 ftype
= build_function_type_list (V1TI_type_node
, intTI_type_node
,
18770 NULL_TREE
, NULL_TREE
);
18771 def_builtin ("__builtin_vec_init_v1ti", ftype
, VSX_BUILTIN_VEC_INIT_V1TI
);
18772 ftype
= build_function_type_list (V1TI_type_node
, V1TI_type_node
,
18774 integer_type_node
, NULL_TREE
);
18775 def_builtin ("__builtin_vec_set_v1ti", ftype
, VSX_BUILTIN_VEC_SET_V1TI
);
18776 ftype
= build_function_type_list (intTI_type_node
, V1TI_type_node
,
18777 integer_type_node
, NULL_TREE
);
18778 def_builtin ("__builtin_vec_ext_v1ti", ftype
, VSX_BUILTIN_VEC_EXT_V1TI
);
18784 htm_init_builtins (void)
18786 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18787 const struct builtin_description
*d
;
18791 for (i
= 0; i
< ARRAY_SIZE (bdesc_htm
); i
++, d
++)
18793 tree op
[MAX_HTM_OPERANDS
], type
;
18794 HOST_WIDE_INT mask
= d
->mask
;
18795 unsigned attr
= rs6000_builtin_info
[d
->code
].attr
;
18796 bool void_func
= (attr
& RS6000_BTC_VOID
);
18797 int attr_args
= (attr
& RS6000_BTC_TYPE_MASK
);
18799 tree gpr_type_node
;
18803 /* It is expected that these htm built-in functions may have
18804 d->icode equal to CODE_FOR_nothing. */
18806 if (TARGET_32BIT
&& TARGET_POWERPC64
)
18807 gpr_type_node
= long_long_unsigned_type_node
;
18809 gpr_type_node
= long_unsigned_type_node
;
18811 if (attr
& RS6000_BTC_SPR
)
18813 rettype
= gpr_type_node
;
18814 argtype
= gpr_type_node
;
18816 else if (d
->code
== HTM_BUILTIN_TABORTDC
18817 || d
->code
== HTM_BUILTIN_TABORTDCI
)
18819 rettype
= unsigned_type_node
;
18820 argtype
= gpr_type_node
;
18824 rettype
= unsigned_type_node
;
18825 argtype
= unsigned_type_node
;
18828 if ((mask
& builtin_mask
) != mask
)
18830 if (TARGET_DEBUG_BUILTIN
)
18831 fprintf (stderr
, "htm_builtin, skip binary %s\n", d
->name
);
18837 if (TARGET_DEBUG_BUILTIN
)
18838 fprintf (stderr
, "htm_builtin, bdesc_htm[%ld] no name\n",
18839 (long unsigned) i
);
18843 op
[nopnds
++] = (void_func
) ? void_type_node
: rettype
;
18845 if (attr_args
== RS6000_BTC_UNARY
)
18846 op
[nopnds
++] = argtype
;
18847 else if (attr_args
== RS6000_BTC_BINARY
)
18849 op
[nopnds
++] = argtype
;
18850 op
[nopnds
++] = argtype
;
18852 else if (attr_args
== RS6000_BTC_TERNARY
)
18854 op
[nopnds
++] = argtype
;
18855 op
[nopnds
++] = argtype
;
18856 op
[nopnds
++] = argtype
;
18862 type
= build_function_type_list (op
[0], NULL_TREE
);
18865 type
= build_function_type_list (op
[0], op
[1], NULL_TREE
);
18868 type
= build_function_type_list (op
[0], op
[1], op
[2], NULL_TREE
);
18871 type
= build_function_type_list (op
[0], op
[1], op
[2], op
[3],
18875 gcc_unreachable ();
18878 def_builtin (d
->name
, type
, d
->code
);
18882 /* Hash function for builtin functions with up to 3 arguments and a return
18885 builtin_hasher::hash (builtin_hash_struct
*bh
)
18890 for (i
= 0; i
< 4; i
++)
18892 ret
= (ret
* (unsigned)MAX_MACHINE_MODE
) + ((unsigned)bh
->mode
[i
]);
18893 ret
= (ret
* 2) + bh
->uns_p
[i
];
18899 /* Compare builtin hash entries H1 and H2 for equivalence. */
18901 builtin_hasher::equal (builtin_hash_struct
*p1
, builtin_hash_struct
*p2
)
18903 return ((p1
->mode
[0] == p2
->mode
[0])
18904 && (p1
->mode
[1] == p2
->mode
[1])
18905 && (p1
->mode
[2] == p2
->mode
[2])
18906 && (p1
->mode
[3] == p2
->mode
[3])
18907 && (p1
->uns_p
[0] == p2
->uns_p
[0])
18908 && (p1
->uns_p
[1] == p2
->uns_p
[1])
18909 && (p1
->uns_p
[2] == p2
->uns_p
[2])
18910 && (p1
->uns_p
[3] == p2
->uns_p
[3]));
18913 /* Map types for builtin functions with an explicit return type and up to 3
18914 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18915 of the argument. */
18917 builtin_function_type (machine_mode mode_ret
, machine_mode mode_arg0
,
18918 machine_mode mode_arg1
, machine_mode mode_arg2
,
18919 enum rs6000_builtins builtin
, const char *name
)
18921 struct builtin_hash_struct h
;
18922 struct builtin_hash_struct
*h2
;
18925 tree ret_type
= NULL_TREE
;
18926 tree arg_type
[3] = { NULL_TREE
, NULL_TREE
, NULL_TREE
};
18928 /* Create builtin_hash_table. */
18929 if (builtin_hash_table
== NULL
)
18930 builtin_hash_table
= hash_table
<builtin_hasher
>::create_ggc (1500);
18932 h
.type
= NULL_TREE
;
18933 h
.mode
[0] = mode_ret
;
18934 h
.mode
[1] = mode_arg0
;
18935 h
.mode
[2] = mode_arg1
;
18936 h
.mode
[3] = mode_arg2
;
18942 /* If the builtin is a type that produces unsigned results or takes unsigned
18943 arguments, and it is returned as a decl for the vectorizer (such as
18944 widening multiplies, permute), make sure the arguments and return value
18945 are type correct. */
18948 /* unsigned 1 argument functions. */
18949 case CRYPTO_BUILTIN_VSBOX
:
18950 case P8V_BUILTIN_VGBBD
:
18951 case MISC_BUILTIN_CDTBCD
:
18952 case MISC_BUILTIN_CBCDTD
:
18957 /* unsigned 2 argument functions. */
18958 case ALTIVEC_BUILTIN_VMULEUB
:
18959 case ALTIVEC_BUILTIN_VMULEUH
:
18960 case ALTIVEC_BUILTIN_VMULOUB
:
18961 case ALTIVEC_BUILTIN_VMULOUH
:
18962 case CRYPTO_BUILTIN_VCIPHER
:
18963 case CRYPTO_BUILTIN_VCIPHERLAST
:
18964 case CRYPTO_BUILTIN_VNCIPHER
:
18965 case CRYPTO_BUILTIN_VNCIPHERLAST
:
18966 case CRYPTO_BUILTIN_VPMSUMB
:
18967 case CRYPTO_BUILTIN_VPMSUMH
:
18968 case CRYPTO_BUILTIN_VPMSUMW
:
18969 case CRYPTO_BUILTIN_VPMSUMD
:
18970 case CRYPTO_BUILTIN_VPMSUM
:
18971 case MISC_BUILTIN_ADDG6S
:
18972 case MISC_BUILTIN_DIVWEU
:
18973 case MISC_BUILTIN_DIVWEUO
:
18974 case MISC_BUILTIN_DIVDEU
:
18975 case MISC_BUILTIN_DIVDEUO
:
18976 case VSX_BUILTIN_UDIV_V2DI
:
18982 /* unsigned 3 argument functions. */
18983 case ALTIVEC_BUILTIN_VPERM_16QI_UNS
:
18984 case ALTIVEC_BUILTIN_VPERM_8HI_UNS
:
18985 case ALTIVEC_BUILTIN_VPERM_4SI_UNS
:
18986 case ALTIVEC_BUILTIN_VPERM_2DI_UNS
:
18987 case ALTIVEC_BUILTIN_VSEL_16QI_UNS
:
18988 case ALTIVEC_BUILTIN_VSEL_8HI_UNS
:
18989 case ALTIVEC_BUILTIN_VSEL_4SI_UNS
:
18990 case ALTIVEC_BUILTIN_VSEL_2DI_UNS
:
18991 case VSX_BUILTIN_VPERM_16QI_UNS
:
18992 case VSX_BUILTIN_VPERM_8HI_UNS
:
18993 case VSX_BUILTIN_VPERM_4SI_UNS
:
18994 case VSX_BUILTIN_VPERM_2DI_UNS
:
18995 case VSX_BUILTIN_XXSEL_16QI_UNS
:
18996 case VSX_BUILTIN_XXSEL_8HI_UNS
:
18997 case VSX_BUILTIN_XXSEL_4SI_UNS
:
18998 case VSX_BUILTIN_XXSEL_2DI_UNS
:
18999 case CRYPTO_BUILTIN_VPERMXOR
:
19000 case CRYPTO_BUILTIN_VPERMXOR_V2DI
:
19001 case CRYPTO_BUILTIN_VPERMXOR_V4SI
:
19002 case CRYPTO_BUILTIN_VPERMXOR_V8HI
:
19003 case CRYPTO_BUILTIN_VPERMXOR_V16QI
:
19004 case CRYPTO_BUILTIN_VSHASIGMAW
:
19005 case CRYPTO_BUILTIN_VSHASIGMAD
:
19006 case CRYPTO_BUILTIN_VSHASIGMA
:
19013 /* signed permute functions with unsigned char mask. */
19014 case ALTIVEC_BUILTIN_VPERM_16QI
:
19015 case ALTIVEC_BUILTIN_VPERM_8HI
:
19016 case ALTIVEC_BUILTIN_VPERM_4SI
:
19017 case ALTIVEC_BUILTIN_VPERM_4SF
:
19018 case ALTIVEC_BUILTIN_VPERM_2DI
:
19019 case ALTIVEC_BUILTIN_VPERM_2DF
:
19020 case VSX_BUILTIN_VPERM_16QI
:
19021 case VSX_BUILTIN_VPERM_8HI
:
19022 case VSX_BUILTIN_VPERM_4SI
:
19023 case VSX_BUILTIN_VPERM_4SF
:
19024 case VSX_BUILTIN_VPERM_2DI
:
19025 case VSX_BUILTIN_VPERM_2DF
:
19029 /* unsigned args, signed return. */
19030 case VSX_BUILTIN_XVCVUXDSP
:
19031 case VSX_BUILTIN_XVCVUXDDP_UNS
:
19032 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF
:
19036 /* signed args, unsigned return. */
19037 case VSX_BUILTIN_XVCVDPUXDS_UNS
:
19038 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI
:
19039 case MISC_BUILTIN_UNPACK_TD
:
19040 case MISC_BUILTIN_UNPACK_V1TI
:
19044 /* unsigned arguments for 128-bit pack instructions. */
19045 case MISC_BUILTIN_PACK_TD
:
19046 case MISC_BUILTIN_PACK_V1TI
:
19055 /* Figure out how many args are present. */
19056 while (num_args
> 0 && h
.mode
[num_args
] == VOIDmode
)
19059 ret_type
= builtin_mode_to_type
[h
.mode
[0]][h
.uns_p
[0]];
19060 if (!ret_type
&& h
.uns_p
[0])
19061 ret_type
= builtin_mode_to_type
[h
.mode
[0]][0];
19064 fatal_error (input_location
,
19065 "internal error: builtin function %s had an unexpected "
19066 "return type %s", name
, GET_MODE_NAME (h
.mode
[0]));
19068 for (i
= 0; i
< (int) ARRAY_SIZE (arg_type
); i
++)
19069 arg_type
[i
] = NULL_TREE
;
19071 for (i
= 0; i
< num_args
; i
++)
19073 int m
= (int) h
.mode
[i
+1];
19074 int uns_p
= h
.uns_p
[i
+1];
19076 arg_type
[i
] = builtin_mode_to_type
[m
][uns_p
];
19077 if (!arg_type
[i
] && uns_p
)
19078 arg_type
[i
] = builtin_mode_to_type
[m
][0];
19081 fatal_error (input_location
,
19082 "internal error: builtin function %s, argument %d "
19083 "had unexpected argument type %s", name
, i
,
19084 GET_MODE_NAME (m
));
19087 builtin_hash_struct
**found
= builtin_hash_table
->find_slot (&h
, INSERT
);
19088 if (*found
== NULL
)
19090 h2
= ggc_alloc
<builtin_hash_struct
> ();
19094 h2
->type
= build_function_type_list (ret_type
, arg_type
[0], arg_type
[1],
19095 arg_type
[2], NULL_TREE
);
19098 return (*found
)->type
;
19102 rs6000_common_init_builtins (void)
19104 const struct builtin_description
*d
;
19107 tree opaque_ftype_opaque
= NULL_TREE
;
19108 tree opaque_ftype_opaque_opaque
= NULL_TREE
;
19109 tree opaque_ftype_opaque_opaque_opaque
= NULL_TREE
;
19110 tree v2si_ftype
= NULL_TREE
;
19111 tree v2si_ftype_qi
= NULL_TREE
;
19112 tree v2si_ftype_v2si_qi
= NULL_TREE
;
19113 tree v2si_ftype_int_qi
= NULL_TREE
;
19114 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
19116 if (!TARGET_PAIRED_FLOAT
)
19118 builtin_mode_to_type
[V2SImode
][0] = opaque_V2SI_type_node
;
19119 builtin_mode_to_type
[V2SFmode
][0] = opaque_V2SF_type_node
;
19122 /* Paired and SPE builtins are only available if you build a compiler with
19123 the appropriate options, so only create those builtins with the
19124 appropriate compiler option. Create Altivec and VSX builtins on machines
19125 with at least the general purpose extensions (970 and newer) to allow the
19126 use of the target attribute.. */
19128 if (TARGET_EXTRA_BUILTINS
)
19129 builtin_mask
|= RS6000_BTM_COMMON
;
19131 /* Add the ternary operators. */
19133 for (i
= 0; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
19136 HOST_WIDE_INT mask
= d
->mask
;
19138 if ((mask
& builtin_mask
) != mask
)
19140 if (TARGET_DEBUG_BUILTIN
)
19141 fprintf (stderr
, "rs6000_builtin, skip ternary %s\n", d
->name
);
19145 if (rs6000_overloaded_builtin_p (d
->code
))
19147 if (! (type
= opaque_ftype_opaque_opaque_opaque
))
19148 type
= opaque_ftype_opaque_opaque_opaque
19149 = build_function_type_list (opaque_V4SI_type_node
,
19150 opaque_V4SI_type_node
,
19151 opaque_V4SI_type_node
,
19152 opaque_V4SI_type_node
,
19157 enum insn_code icode
= d
->icode
;
19160 if (TARGET_DEBUG_BUILTIN
)
19161 fprintf (stderr
, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
19167 if (icode
== CODE_FOR_nothing
)
19169 if (TARGET_DEBUG_BUILTIN
)
19170 fprintf (stderr
, "rs6000_builtin, skip ternary %s (no code)\n",
19176 type
= builtin_function_type (insn_data
[icode
].operand
[0].mode
,
19177 insn_data
[icode
].operand
[1].mode
,
19178 insn_data
[icode
].operand
[2].mode
,
19179 insn_data
[icode
].operand
[3].mode
,
19183 def_builtin (d
->name
, type
, d
->code
);
19186 /* Add the binary operators. */
19188 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19190 machine_mode mode0
, mode1
, mode2
;
19192 HOST_WIDE_INT mask
= d
->mask
;
19194 if ((mask
& builtin_mask
) != mask
)
19196 if (TARGET_DEBUG_BUILTIN
)
19197 fprintf (stderr
, "rs6000_builtin, skip binary %s\n", d
->name
);
19201 if (rs6000_overloaded_builtin_p (d
->code
))
19203 if (! (type
= opaque_ftype_opaque_opaque
))
19204 type
= opaque_ftype_opaque_opaque
19205 = build_function_type_list (opaque_V4SI_type_node
,
19206 opaque_V4SI_type_node
,
19207 opaque_V4SI_type_node
,
19212 enum insn_code icode
= d
->icode
;
19215 if (TARGET_DEBUG_BUILTIN
)
19216 fprintf (stderr
, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
19222 if (icode
== CODE_FOR_nothing
)
19224 if (TARGET_DEBUG_BUILTIN
)
19225 fprintf (stderr
, "rs6000_builtin, skip binary %s (no code)\n",
19231 mode0
= insn_data
[icode
].operand
[0].mode
;
19232 mode1
= insn_data
[icode
].operand
[1].mode
;
19233 mode2
= insn_data
[icode
].operand
[2].mode
;
19235 if (mode0
== V2SImode
&& mode1
== V2SImode
&& mode2
== QImode
)
19237 if (! (type
= v2si_ftype_v2si_qi
))
19238 type
= v2si_ftype_v2si_qi
19239 = build_function_type_list (opaque_V2SI_type_node
,
19240 opaque_V2SI_type_node
,
19245 else if (mode0
== V2SImode
&& GET_MODE_CLASS (mode1
) == MODE_INT
19246 && mode2
== QImode
)
19248 if (! (type
= v2si_ftype_int_qi
))
19249 type
= v2si_ftype_int_qi
19250 = build_function_type_list (opaque_V2SI_type_node
,
19257 type
= builtin_function_type (mode0
, mode1
, mode2
, VOIDmode
,
19261 def_builtin (d
->name
, type
, d
->code
);
19264 /* Add the simple unary operators. */
19266 for (i
= 0; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19268 machine_mode mode0
, mode1
;
19270 HOST_WIDE_INT mask
= d
->mask
;
19272 if ((mask
& builtin_mask
) != mask
)
19274 if (TARGET_DEBUG_BUILTIN
)
19275 fprintf (stderr
, "rs6000_builtin, skip unary %s\n", d
->name
);
19279 if (rs6000_overloaded_builtin_p (d
->code
))
19281 if (! (type
= opaque_ftype_opaque
))
19282 type
= opaque_ftype_opaque
19283 = build_function_type_list (opaque_V4SI_type_node
,
19284 opaque_V4SI_type_node
,
19289 enum insn_code icode
= d
->icode
;
19292 if (TARGET_DEBUG_BUILTIN
)
19293 fprintf (stderr
, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19299 if (icode
== CODE_FOR_nothing
)
19301 if (TARGET_DEBUG_BUILTIN
)
19302 fprintf (stderr
, "rs6000_builtin, skip unary %s (no code)\n",
19308 mode0
= insn_data
[icode
].operand
[0].mode
;
19309 mode1
= insn_data
[icode
].operand
[1].mode
;
19311 if (mode0
== V2SImode
&& mode1
== QImode
)
19313 if (! (type
= v2si_ftype_qi
))
19314 type
= v2si_ftype_qi
19315 = build_function_type_list (opaque_V2SI_type_node
,
19321 type
= builtin_function_type (mode0
, mode1
, VOIDmode
, VOIDmode
,
19325 def_builtin (d
->name
, type
, d
->code
);
19328 /* Add the simple no-argument operators. */
19330 for (i
= 0; i
< ARRAY_SIZE (bdesc_0arg
); i
++, d
++)
19332 machine_mode mode0
;
19334 HOST_WIDE_INT mask
= d
->mask
;
19336 if ((mask
& builtin_mask
) != mask
)
19338 if (TARGET_DEBUG_BUILTIN
)
19339 fprintf (stderr
, "rs6000_builtin, skip no-argument %s\n", d
->name
);
19342 if (rs6000_overloaded_builtin_p (d
->code
))
19344 if (!opaque_ftype_opaque
)
19345 opaque_ftype_opaque
19346 = build_function_type_list (opaque_V4SI_type_node
, NULL_TREE
);
19347 type
= opaque_ftype_opaque
;
19351 enum insn_code icode
= d
->icode
;
19354 if (TARGET_DEBUG_BUILTIN
)
19355 fprintf (stderr
, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19356 (long unsigned) i
);
19359 if (icode
== CODE_FOR_nothing
)
19361 if (TARGET_DEBUG_BUILTIN
)
19363 "rs6000_builtin, skip no-argument %s (no code)\n",
19367 mode0
= insn_data
[icode
].operand
[0].mode
;
19368 if (mode0
== V2SImode
)
19371 if (! (type
= v2si_ftype
))
19374 = build_function_type_list (opaque_V2SI_type_node
,
19380 type
= builtin_function_type (mode0
, VOIDmode
, VOIDmode
, VOIDmode
,
19383 def_builtin (d
->name
, type
, d
->code
);
19387 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
19389 init_float128_ibm (machine_mode mode
)
19391 if (!TARGET_XL_COMPAT
)
19393 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
19394 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
19395 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
19396 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
19398 if (!(TARGET_HARD_FLOAT
&& (TARGET_FPRS
|| TARGET_E500_DOUBLE
)))
19400 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
19401 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
19402 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
19403 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
19404 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
19405 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
19406 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
19408 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
19409 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
19410 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
19411 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
19412 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
19413 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
19414 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
19415 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
19418 if (!(TARGET_HARD_FLOAT
&& TARGET_FPRS
))
19419 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
19423 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
19424 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
19425 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
19426 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
19429 /* Add various conversions for IFmode to use the traditional TFmode
19431 if (mode
== IFmode
)
19433 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf2");
19434 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf2");
19435 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctftd2");
19436 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd2");
19437 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd2");
19438 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtdtf2");
19440 if (TARGET_POWERPC64
)
19442 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
19443 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
19444 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
19445 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
19450 /* Set up IEEE 128-bit floating point routines. Use different names if the
19451 arguments can be passed in a vector register. The historical PowerPC
19452 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19453 continue to use that if we aren't using vector registers to pass IEEE
19454 128-bit floating point. */
19457 init_float128_ieee (machine_mode mode
)
19459 if (FLOAT128_VECTOR_P (mode
))
19461 set_optab_libfunc (add_optab
, mode
, "__addkf3");
19462 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
19463 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
19464 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
19465 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
19466 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
19467 set_optab_libfunc (abs_optab
, mode
, "__abstkf2");
19469 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
19470 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
19471 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
19472 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
19473 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
19474 set_optab_libfunc (le_optab
, mode
, "__lekf2");
19475 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
19477 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
19478 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
19479 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
19480 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
19482 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__extendtfkf2");
19483 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
19484 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__extendtfkf2");
19486 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__trunckftf2");
19487 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
19488 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__trunckftf2");
19490 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf2");
19491 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf2");
19492 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunckftd2");
19493 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd2");
19494 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd2");
19495 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtdkf2");
19497 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
19498 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
19499 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
19500 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
19502 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
19503 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
19504 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
19505 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
19507 if (TARGET_POWERPC64
)
19509 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti");
19510 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti");
19511 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf");
19512 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf");
19518 set_optab_libfunc (add_optab
, mode
, "_q_add");
19519 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
19520 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
19521 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
19522 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
19523 if (TARGET_PPC_GPOPT
)
19524 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
19526 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
19527 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
19528 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
19529 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
19530 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
19531 set_optab_libfunc (le_optab
, mode
, "_q_fle");
19533 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
19534 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
19535 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
19536 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
19537 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
19538 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
19539 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
19540 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
19545 rs6000_init_libfuncs (void)
19547 /* __float128 support. */
19548 if (TARGET_FLOAT128_TYPE
)
19550 init_float128_ibm (IFmode
);
19551 init_float128_ieee (KFmode
);
19554 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19555 if (TARGET_LONG_DOUBLE_128
)
19557 if (!TARGET_IEEEQUAD
)
19558 init_float128_ibm (TFmode
);
19560 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19562 init_float128_ieee (TFmode
);
19567 /* Expand a block clear operation, and return 1 if successful. Return 0
19568 if we should let the compiler generate normal code.
19570 operands[0] is the destination
19571 operands[1] is the length
19572 operands[3] is the alignment */
19575 expand_block_clear (rtx operands
[])
19577 rtx orig_dest
= operands
[0];
19578 rtx bytes_rtx
= operands
[1];
19579 rtx align_rtx
= operands
[3];
19580 bool constp
= (GET_CODE (bytes_rtx
) == CONST_INT
);
19581 HOST_WIDE_INT align
;
19582 HOST_WIDE_INT bytes
;
19587 /* If this is not a fixed size move, just call memcpy */
19591 /* This must be a fixed size alignment */
19592 gcc_assert (GET_CODE (align_rtx
) == CONST_INT
);
19593 align
= INTVAL (align_rtx
) * BITS_PER_UNIT
;
19595 /* Anything to clear? */
19596 bytes
= INTVAL (bytes_rtx
);
19600 /* Use the builtin memset after a point, to avoid huge code bloat.
19601 When optimize_size, avoid any significant code bloat; calling
19602 memset is about 4 instructions, so allow for one instruction to
19603 load zero and three to do clearing. */
19604 if (TARGET_ALTIVEC
&& align
>= 128)
19606 else if (TARGET_POWERPC64
&& (align
>= 64 || !STRICT_ALIGNMENT
))
19608 else if (TARGET_SPE
&& align
>= 64)
19613 if (optimize_size
&& bytes
> 3 * clear_step
)
19615 if (! optimize_size
&& bytes
> 8 * clear_step
)
19618 for (offset
= 0; bytes
> 0; offset
+= clear_bytes
, bytes
-= clear_bytes
)
19620 machine_mode mode
= BLKmode
;
19623 if (bytes
>= 16 && TARGET_ALTIVEC
&& align
>= 128)
19628 else if (bytes
>= 8 && TARGET_SPE
&& align
>= 64)
19633 else if (bytes
>= 8 && TARGET_POWERPC64
19634 && (align
>= 64 || !STRICT_ALIGNMENT
))
19638 if (offset
== 0 && align
< 64)
19642 /* If the address form is reg+offset with offset not a
19643 multiple of four, reload into reg indirect form here
19644 rather than waiting for reload. This way we get one
19645 reload, not one per store. */
19646 addr
= XEXP (orig_dest
, 0);
19647 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
19648 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
19649 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
19651 addr
= copy_addr_to_reg (addr
);
19652 orig_dest
= replace_equiv_address (orig_dest
, addr
);
19656 else if (bytes
>= 4 && (align
>= 32 || !STRICT_ALIGNMENT
))
19657 { /* move 4 bytes */
19661 else if (bytes
>= 2 && (align
>= 16 || !STRICT_ALIGNMENT
))
19662 { /* move 2 bytes */
19666 else /* move 1 byte at a time */
19672 dest
= adjust_address (orig_dest
, mode
, offset
);
19674 emit_move_insn (dest
, CONST0_RTX (mode
));
19680 /* Emit a potentially record-form instruction, setting DST from SRC.
19681 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19682 signed comparison of DST with zero. If DOT is 1, the generated RTL
19683 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19684 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19685 a separate COMPARE. */
19688 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
19692 emit_move_insn (dst
, src
);
19696 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
19698 emit_move_insn (dst
, src
);
19699 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
19703 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
19706 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
19707 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
19711 rtx set
= gen_rtx_SET (dst
, src
);
19712 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
19716 /* Figure out the correct instructions to generate to load data for
19717 block compare. MODE is used for the read from memory, and
19718 data is zero extended if REG is wider than MODE. If LE code
19719 is being generated, bswap loads are used.
19721 REG is the destination register to move the data into.
19722 MEM is the memory block being read.
19723 MODE is the mode of memory to use for the read. */
19725 do_load_for_compare (rtx reg
, rtx mem
, machine_mode mode
)
19727 switch (GET_MODE (reg
))
19733 emit_insn (gen_zero_extendqidi2 (reg
, mem
));
19738 if (!BYTES_BIG_ENDIAN
)
19740 src
= gen_reg_rtx (HImode
);
19741 emit_insn (gen_bswaphi2 (src
, mem
));
19743 emit_insn (gen_zero_extendhidi2 (reg
, src
));
19749 if (!BYTES_BIG_ENDIAN
)
19751 src
= gen_reg_rtx (SImode
);
19752 emit_insn (gen_bswapsi2 (src
, mem
));
19754 emit_insn (gen_zero_extendsidi2 (reg
, src
));
19758 if (!BYTES_BIG_ENDIAN
)
19759 emit_insn (gen_bswapdi2 (reg
, mem
));
19761 emit_insn (gen_movdi (reg
, mem
));
19764 gcc_unreachable ();
19772 emit_insn (gen_zero_extendqisi2 (reg
, mem
));
19777 if (!BYTES_BIG_ENDIAN
)
19779 src
= gen_reg_rtx (HImode
);
19780 emit_insn (gen_bswaphi2 (src
, mem
));
19782 emit_insn (gen_zero_extendhisi2 (reg
, src
));
19786 if (!BYTES_BIG_ENDIAN
)
19787 emit_insn (gen_bswapsi2 (reg
, mem
));
19789 emit_insn (gen_movsi (reg
, mem
));
19792 /* DImode is larger than the destination reg so is not expected. */
19793 gcc_unreachable ();
19796 gcc_unreachable ();
19800 gcc_unreachable ();
19805 /* Select the mode to be used for reading the next chunk of bytes
19808 OFFSET is the current read offset from the beginning of the block.
19809 BYTES is the number of bytes remaining to be read.
19810 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19811 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19812 the largest allowable mode. */
19813 static machine_mode
19814 select_block_compare_mode (unsigned HOST_WIDE_INT offset
,
19815 unsigned HOST_WIDE_INT bytes
,
19816 unsigned HOST_WIDE_INT align
, bool word_mode_ok
)
19818 /* First see if we can do a whole load unit
19819 as that will be more efficient than a larger load + shift. */
19821 /* If big, use biggest chunk.
19822 If exactly chunk size, use that size.
19823 If remainder can be done in one piece with shifting, do that.
19824 Do largest chunk possible without violating alignment rules. */
19826 /* The most we can read without potential page crossing. */
19827 unsigned HOST_WIDE_INT maxread
= ROUND_UP (bytes
, align
);
19829 if (word_mode_ok
&& bytes
>= UNITS_PER_WORD
)
19831 else if (bytes
== GET_MODE_SIZE (SImode
))
19833 else if (bytes
== GET_MODE_SIZE (HImode
))
19835 else if (bytes
== GET_MODE_SIZE (QImode
))
19837 else if (bytes
< GET_MODE_SIZE (SImode
)
19838 && offset
>= GET_MODE_SIZE (SImode
) - bytes
)
19839 /* This matches the case were we have SImode and 3 bytes
19840 and offset >= 1 and permits us to move back one and overlap
19841 with the previous read, thus avoiding having to shift
19842 unwanted bytes off of the input. */
19844 else if (word_mode_ok
&& bytes
< UNITS_PER_WORD
19845 && offset
>= UNITS_PER_WORD
-bytes
)
19846 /* Similarly, if we can use DImode it will get matched here and
19847 can do an overlapping read that ends at the end of the block. */
19849 else if (word_mode_ok
&& maxread
>= UNITS_PER_WORD
)
19850 /* It is safe to do all remaining in one load of largest size,
19851 possibly with a shift to get rid of unwanted bytes. */
19853 else if (maxread
>= GET_MODE_SIZE (SImode
))
19854 /* It is safe to do all remaining in one SImode load,
19855 possibly with a shift to get rid of unwanted bytes. */
19857 else if (bytes
> GET_MODE_SIZE (SImode
))
19859 else if (bytes
> GET_MODE_SIZE (HImode
))
19862 /* final fallback is do one byte */
19866 /* Compute the alignment of pointer+OFFSET where the original alignment
19867 of pointer was BASE_ALIGN. */
19868 static unsigned HOST_WIDE_INT
19869 compute_current_alignment (unsigned HOST_WIDE_INT base_align
,
19870 unsigned HOST_WIDE_INT offset
)
19874 return min (base_align
, offset
& -offset
);
19877 /* Expand a block compare operation, and return true if successful.
19878 Return false if we should let the compiler generate normal code,
19879 probably a memcmp call.
19881 OPERANDS[0] is the target (result).
19882 OPERANDS[1] is the first source.
19883 OPERANDS[2] is the second source.
19884 OPERANDS[3] is the length.
19885 OPERANDS[4] is the alignment. */
19887 expand_block_compare (rtx operands
[])
19889 rtx target
= operands
[0];
19890 rtx orig_src1
= operands
[1];
19891 rtx orig_src2
= operands
[2];
19892 rtx bytes_rtx
= operands
[3];
19893 rtx align_rtx
= operands
[4];
19894 HOST_WIDE_INT cmp_bytes
= 0;
19895 rtx src1
= orig_src1
;
19896 rtx src2
= orig_src2
;
19898 /* This case is complicated to handle because the subtract
19899 with carry instructions do not generate the 64-bit
19900 carry and so we must emit code to calculate it ourselves.
19901 We choose not to implement this yet. */
19902 if (TARGET_32BIT
&& TARGET_POWERPC64
)
19905 /* If this is not a fixed size compare, just call memcmp. */
19906 if (!CONST_INT_P (bytes_rtx
))
19909 /* This must be a fixed size alignment. */
19910 if (!CONST_INT_P (align_rtx
))
19913 unsigned int base_align
= UINTVAL (align_rtx
) / BITS_PER_UNIT
;
19915 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
19916 if (SLOW_UNALIGNED_ACCESS (word_mode
, MEM_ALIGN (orig_src1
))
19917 || SLOW_UNALIGNED_ACCESS (word_mode
, MEM_ALIGN (orig_src2
)))
19920 gcc_assert (GET_MODE (target
) == SImode
);
19922 /* Anything to move? */
19923 unsigned HOST_WIDE_INT bytes
= UINTVAL (bytes_rtx
);
19927 /* The code generated for p7 and older is not faster than glibc
19928 memcmp if alignment is small and length is not short, so bail
19929 out to avoid those conditions. */
19930 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
19931 && ((base_align
== 1 && bytes
> 16)
19932 || (base_align
== 2 && bytes
> 32)))
19935 rtx tmp_reg_src1
= gen_reg_rtx (word_mode
);
19936 rtx tmp_reg_src2
= gen_reg_rtx (word_mode
);
19937 /* P7/P8 code uses cond for subfc. but P9 uses
19938 it for cmpld which needs CCUNSmode. */
19940 if (TARGET_P9_MISC
)
19941 cond
= gen_reg_rtx (CCUNSmode
);
19943 cond
= gen_reg_rtx (CCmode
);
19945 /* If we have an LE target without ldbrx and word_mode is DImode,
19946 then we must avoid using word_mode. */
19947 int word_mode_ok
= !(!BYTES_BIG_ENDIAN
&& !TARGET_LDBRX
19948 && word_mode
== DImode
);
19950 /* Strategy phase. How many ops will this take and should we expand it? */
19952 unsigned HOST_WIDE_INT offset
= 0;
19953 machine_mode load_mode
=
19954 select_block_compare_mode (offset
, bytes
, base_align
, word_mode_ok
);
19955 unsigned int load_mode_size
= GET_MODE_SIZE (load_mode
);
19957 /* We don't want to generate too much code. */
19958 unsigned HOST_WIDE_INT max_bytes
=
19959 load_mode_size
* (unsigned HOST_WIDE_INT
) rs6000_block_compare_inline_limit
;
19960 if (!IN_RANGE (bytes
, 1, max_bytes
))
19963 bool generate_6432_conversion
= false;
19964 rtx convert_label
= NULL
;
19965 rtx final_label
= NULL
;
19967 /* Example of generated code for 18 bytes aligned 1 byte.
19968 Compiled with -fno-reorder-blocks for clarity.
19986 .L6487: #convert_label
19990 .L6488: #final_label
19993 We start off with DImode for two blocks that jump to the DI->SI conversion
19994 if the difference is found there, then a final block of HImode that skips
19995 the DI->SI conversion. */
19999 unsigned int align
= compute_current_alignment (base_align
, offset
);
20000 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20001 load_mode
= select_block_compare_mode (offset
, bytes
, align
,
20004 load_mode
= select_block_compare_mode (0, bytes
, align
, word_mode_ok
);
20005 load_mode_size
= GET_MODE_SIZE (load_mode
);
20006 if (bytes
>= load_mode_size
)
20007 cmp_bytes
= load_mode_size
;
20008 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20010 /* Move this load back so it doesn't go past the end.
20011 P8/P9 can do this efficiently. */
20012 unsigned int extra_bytes
= load_mode_size
- bytes
;
20014 if (extra_bytes
< offset
)
20016 offset
-= extra_bytes
;
20017 cmp_bytes
= load_mode_size
;
20022 /* P7 and earlier can't do the overlapping load trick fast,
20023 so this forces a non-overlapping load and a shift to get
20024 rid of the extra bytes. */
20027 src1
= adjust_address (orig_src1
, load_mode
, offset
);
20028 src2
= adjust_address (orig_src2
, load_mode
, offset
);
20030 if (!REG_P (XEXP (src1
, 0)))
20032 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20033 src1
= replace_equiv_address (src1
, src1_reg
);
20035 set_mem_size (src1
, cmp_bytes
);
20037 if (!REG_P (XEXP (src2
, 0)))
20039 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20040 src2
= replace_equiv_address (src2
, src2_reg
);
20042 set_mem_size (src2
, cmp_bytes
);
20044 do_load_for_compare (tmp_reg_src1
, src1
, load_mode
);
20045 do_load_for_compare (tmp_reg_src2
, src2
, load_mode
);
20047 if (cmp_bytes
< load_mode_size
)
20049 /* Shift unneeded bytes off. */
20050 rtx sh
= GEN_INT (BITS_PER_UNIT
* (load_mode_size
- cmp_bytes
));
20051 if (word_mode
== DImode
)
20053 emit_insn (gen_lshrdi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20054 emit_insn (gen_lshrdi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20058 emit_insn (gen_lshrsi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20059 emit_insn (gen_lshrsi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20063 int remain
= bytes
- cmp_bytes
;
20064 if (GET_MODE_SIZE (GET_MODE (target
)) > GET_MODE_SIZE (load_mode
))
20066 /* Target is larger than load size so we don't need to
20067 reduce result size. */
20069 /* We previously did a block that need 64->32 conversion but
20070 the current block does not, so a label is needed to jump
20072 if (generate_6432_conversion
&& !final_label
)
20073 final_label
= gen_label_rtx ();
20077 /* This is not the last block, branch to the end if the result
20078 of this subtract is not zero. */
20080 final_label
= gen_label_rtx ();
20081 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20082 rtx tmp
= gen_rtx_MINUS (word_mode
, tmp_reg_src1
, tmp_reg_src2
);
20083 rtx cr
= gen_reg_rtx (CCmode
);
20084 rs6000_emit_dot_insn (tmp_reg_src2
, tmp
, 2, cr
);
20085 emit_insn (gen_movsi (target
,
20086 gen_lowpart (SImode
, tmp_reg_src2
)));
20087 rtx ne_rtx
= gen_rtx_NE (VOIDmode
, cr
, const0_rtx
);
20088 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
20090 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20091 JUMP_LABEL (j
) = final_label
;
20092 LABEL_NUSES (final_label
) += 1;
20096 if (word_mode
== DImode
)
20098 emit_insn (gen_subdi3 (tmp_reg_src2
, tmp_reg_src1
,
20100 emit_insn (gen_movsi (target
,
20101 gen_lowpart (SImode
, tmp_reg_src2
)));
20104 emit_insn (gen_subsi3 (target
, tmp_reg_src1
, tmp_reg_src2
));
20108 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20109 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
20110 JUMP_LABEL(j
) = final_label
;
20111 LABEL_NUSES (final_label
) += 1;
20118 /* Do we need a 64->32 conversion block? We need the 64->32
20119 conversion even if target size == load_mode size because
20120 the subtract generates one extra bit. */
20121 generate_6432_conversion
= true;
20125 if (!convert_label
)
20126 convert_label
= gen_label_rtx ();
20128 /* Compare to zero and branch to convert_label if not zero. */
20129 rtx cvt_ref
= gen_rtx_LABEL_REF (VOIDmode
, convert_label
);
20130 if (TARGET_P9_MISC
)
20132 /* Generate a compare, and convert with a setb later. */
20133 rtx cmp
= gen_rtx_COMPARE (CCUNSmode
, tmp_reg_src1
,
20135 emit_insn (gen_rtx_SET (cond
, cmp
));
20138 /* Generate a subfc. and use the longer
20139 sequence for conversion. */
20141 emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2
, tmp_reg_src2
,
20142 tmp_reg_src1
, cond
));
20144 emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2
, tmp_reg_src2
,
20145 tmp_reg_src1
, cond
));
20146 rtx ne_rtx
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
20147 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
20149 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20150 JUMP_LABEL(j
) = convert_label
;
20151 LABEL_NUSES (convert_label
) += 1;
20155 /* Just do the subtract/compare. Since this is the last block
20156 the convert code will be generated immediately following. */
20157 if (TARGET_P9_MISC
)
20159 rtx cmp
= gen_rtx_COMPARE (CCUNSmode
, tmp_reg_src1
,
20161 emit_insn (gen_rtx_SET (cond
, cmp
));
20165 emit_insn (gen_subfdi3_carry (tmp_reg_src2
, tmp_reg_src2
,
20168 emit_insn (gen_subfsi3_carry (tmp_reg_src2
, tmp_reg_src2
,
20173 offset
+= cmp_bytes
;
20174 bytes
-= cmp_bytes
;
20177 if (generate_6432_conversion
)
20180 emit_label (convert_label
);
20182 /* We need to produce DI result from sub, then convert to target SI
20183 while maintaining <0 / ==0 / >0 properties. This sequence works:
20189 This is an alternate one Segher cooked up if somebody
20190 wants to expand this for something that doesn't have popcntd:
20197 And finally, p9 can just do this:
20201 if (TARGET_P9_MISC
)
20203 emit_insn (gen_setb_unsigned (target
, cond
));
20209 rtx tmp_reg_ca
= gen_reg_rtx (DImode
);
20210 emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca
));
20211 emit_insn (gen_popcntddi2 (tmp_reg_src2
, tmp_reg_src2
));
20212 emit_insn (gen_iordi3 (tmp_reg_src2
, tmp_reg_src2
, tmp_reg_ca
));
20213 emit_insn (gen_movsi (target
, gen_lowpart (SImode
, tmp_reg_src2
)));
20217 rtx tmp_reg_ca
= gen_reg_rtx (SImode
);
20218 emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca
));
20219 emit_insn (gen_popcntdsi2 (tmp_reg_src2
, tmp_reg_src2
));
20220 emit_insn (gen_iorsi3 (target
, tmp_reg_src2
, tmp_reg_ca
));
20226 emit_label (final_label
);
20228 gcc_assert (bytes
== 0);
20232 /* Generate alignment check and branch code to set up for
20233 strncmp when we don't have DI alignment.
20234 STRNCMP_LABEL is the label to branch if there is a page crossing.
20235 SRC is the string pointer to be examined.
20236 BYTES is the max number of bytes to compare. */
20238 expand_strncmp_align_check (rtx strncmp_label
, rtx src
, HOST_WIDE_INT bytes
)
20240 rtx lab_ref
= gen_rtx_LABEL_REF (VOIDmode
, strncmp_label
);
20241 rtx src_check
= copy_addr_to_reg (XEXP (src
, 0));
20242 if (GET_MODE (src_check
) == SImode
)
20243 emit_insn (gen_andsi3 (src_check
, src_check
, GEN_INT (0xfff)));
20245 emit_insn (gen_anddi3 (src_check
, src_check
, GEN_INT (0xfff)));
20246 rtx cond
= gen_reg_rtx (CCmode
);
20247 emit_move_insn (cond
, gen_rtx_COMPARE (CCmode
, src_check
,
20248 GEN_INT (4096 - bytes
)));
20250 rtx cmp_rtx
= gen_rtx_LT (VOIDmode
, cond
, const0_rtx
);
20252 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmp_rtx
,
20254 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20255 JUMP_LABEL (j
) = strncmp_label
;
20256 LABEL_NUSES (strncmp_label
) += 1;
20259 /* Expand a string compare operation with length, and return
20260 true if successful. Return false if we should let the
20261 compiler generate normal code, probably a strncmp call.
20263 OPERANDS[0] is the target (result).
20264 OPERANDS[1] is the first source.
20265 OPERANDS[2] is the second source.
20266 If NO_LENGTH is zero, then:
20267 OPERANDS[3] is the length.
20268 OPERANDS[4] is the alignment in bytes.
20269 If NO_LENGTH is nonzero, then:
20270 OPERANDS[3] is the alignment in bytes. */
20272 expand_strn_compare (rtx operands
[], int no_length
)
20274 rtx target
= operands
[0];
20275 rtx orig_src1
= operands
[1];
20276 rtx orig_src2
= operands
[2];
20277 rtx bytes_rtx
, align_rtx
;
20281 align_rtx
= operands
[3];
20285 bytes_rtx
= operands
[3];
20286 align_rtx
= operands
[4];
20288 unsigned HOST_WIDE_INT cmp_bytes
= 0;
20289 rtx src1
= orig_src1
;
20290 rtx src2
= orig_src2
;
20292 /* If we have a length, it must be constant. This simplifies things
20293 a bit as we don't have to generate code to check if we've exceeded
20294 the length. Later this could be expanded to handle this case. */
20295 if (!no_length
&& !CONST_INT_P (bytes_rtx
))
20298 /* This must be a fixed size alignment. */
20299 if (!CONST_INT_P (align_rtx
))
20302 unsigned int base_align
= UINTVAL (align_rtx
);
20303 int align1
= MEM_ALIGN (orig_src1
) / BITS_PER_UNIT
;
20304 int align2
= MEM_ALIGN (orig_src2
) / BITS_PER_UNIT
;
20306 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
20307 if (SLOW_UNALIGNED_ACCESS (word_mode
, align1
)
20308 || SLOW_UNALIGNED_ACCESS (word_mode
, align2
))
20311 gcc_assert (GET_MODE (target
) == SImode
);
20313 /* If we have an LE target without ldbrx and word_mode is DImode,
20314 then we must avoid using word_mode. */
20315 int word_mode_ok
= !(!BYTES_BIG_ENDIAN
&& !TARGET_LDBRX
20316 && word_mode
== DImode
);
20318 unsigned int word_mode_size
= GET_MODE_SIZE (word_mode
);
20320 unsigned HOST_WIDE_INT offset
= 0;
20321 unsigned HOST_WIDE_INT bytes
; /* N from the strncmp args if available. */
20322 unsigned HOST_WIDE_INT compare_length
; /* How much to compare inline. */
20324 /* Use this as a standin to determine the mode to use. */
20325 bytes
= rs6000_string_compare_inline_limit
* word_mode_size
;
20327 bytes
= UINTVAL (bytes_rtx
);
20329 machine_mode load_mode
=
20330 select_block_compare_mode (offset
, bytes
, base_align
, word_mode_ok
);
20331 unsigned int load_mode_size
= GET_MODE_SIZE (load_mode
);
20332 compare_length
= rs6000_string_compare_inline_limit
* load_mode_size
;
20334 /* If we have equality at the end of the last compare and we have not
20335 found the end of the string, we need to call strcmp/strncmp to
20336 compare the remainder. */
20337 bool equality_compare_rest
= false;
20341 bytes
= compare_length
;
20342 equality_compare_rest
= true;
20346 if (bytes
<= compare_length
)
20347 compare_length
= bytes
;
20349 equality_compare_rest
= true;
20352 rtx result_reg
= gen_reg_rtx (word_mode
);
20353 rtx final_move_label
= gen_label_rtx ();
20354 rtx final_label
= gen_label_rtx ();
20355 rtx begin_compare_label
= NULL
;
20357 if (base_align
< 8)
20359 /* Generate code that checks distance to 4k boundary for this case. */
20360 begin_compare_label
= gen_label_rtx ();
20361 rtx strncmp_label
= gen_label_rtx ();
20364 /* Strncmp for power8 in glibc does this:
20366 cmpldi cr7,r8,4096-16
20367 bgt cr7,L(pagecross) */
20369 /* Make sure that the length we use for the alignment test and
20370 the subsequent code generation are in agreement so we do not
20371 go past the length we tested for a 4k boundary crossing. */
20372 unsigned HOST_WIDE_INT align_test
= compare_length
;
20373 if (align_test
< 8)
20375 align_test
= HOST_WIDE_INT_1U
<< ceil_log2 (align_test
);
20376 base_align
= align_test
;
20380 align_test
= ROUND_UP (align_test
, 8);
20385 expand_strncmp_align_check (strncmp_label
, src1
, align_test
);
20387 expand_strncmp_align_check (strncmp_label
, src2
, align_test
);
20389 /* Now generate the following sequence:
20390 - branch to begin_compare
20393 - branch to final_label
20394 - begin_compare_label */
20396 rtx cmp_ref
= gen_rtx_LABEL_REF (VOIDmode
, begin_compare_label
);
20397 jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, cmp_ref
));
20398 JUMP_LABEL (jmp
) = begin_compare_label
;
20399 LABEL_NUSES (begin_compare_label
) += 1;
20402 emit_label (strncmp_label
);
20404 if (!REG_P (XEXP (src1
, 0)))
20406 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20407 src1
= replace_equiv_address (src1
, src1_reg
);
20410 if (!REG_P (XEXP (src2
, 0)))
20412 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20413 src2
= replace_equiv_address (src2
, src2_reg
);
20418 tree fun
= builtin_decl_explicit (BUILT_IN_STRCMP
);
20419 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20420 target
, LCT_NORMAL
, GET_MODE (target
), 2,
20421 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20422 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
);
20426 /* -m32 -mpowerpc64 results in word_mode being DImode even
20427 though otherwise it is 32-bit. The length arg to strncmp
20428 is a size_t which will be the same size as pointers. */
20431 len_rtx
= gen_reg_rtx (DImode
);
20433 len_rtx
= gen_reg_rtx (SImode
);
20435 emit_move_insn (len_rtx
, bytes_rtx
);
20437 tree fun
= builtin_decl_explicit (BUILT_IN_STRNCMP
);
20438 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20439 target
, LCT_NORMAL
, GET_MODE (target
), 3,
20440 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20441 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
,
20442 len_rtx
, GET_MODE (len_rtx
));
20445 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20446 jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
20447 JUMP_LABEL (jmp
) = final_label
;
20448 LABEL_NUSES (final_label
) += 1;
20450 emit_label (begin_compare_label
);
20453 rtx cleanup_label
= NULL
;
20454 rtx tmp_reg_src1
= gen_reg_rtx (word_mode
);
20455 rtx tmp_reg_src2
= gen_reg_rtx (word_mode
);
20457 /* Generate sequence of ld/ldbrx, cmpb to compare out
20458 to the length specified. */
20459 unsigned HOST_WIDE_INT bytes_to_compare
= compare_length
;
20460 while (bytes_to_compare
> 0)
20462 /* Compare sequence:
20463 check each 8B with: ld/ld cmpd bne
20464 If equal, use rldicr/cmpb to check for zero byte.
20465 cleanup code at end:
20466 cmpb get byte that differs
20467 cmpb look for zero byte
20469 cntlzd get bit of first zero/diff byte
20470 subfic convert for rldcl use
20471 rldcl rldcl extract diff/zero byte
20472 subf subtract for final result
20474 The last compare can branch around the cleanup code if the
20475 result is zero because the strings are exactly equal. */
20476 unsigned int align
= compute_current_alignment (base_align
, offset
);
20477 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20478 load_mode
= select_block_compare_mode (offset
, bytes_to_compare
, align
,
20481 load_mode
= select_block_compare_mode (0, bytes_to_compare
, align
,
20483 load_mode_size
= GET_MODE_SIZE (load_mode
);
20484 if (bytes_to_compare
>= load_mode_size
)
20485 cmp_bytes
= load_mode_size
;
20486 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20488 /* Move this load back so it doesn't go past the end.
20489 P8/P9 can do this efficiently. */
20490 unsigned int extra_bytes
= load_mode_size
- bytes_to_compare
;
20491 cmp_bytes
= bytes_to_compare
;
20492 if (extra_bytes
< offset
)
20494 offset
-= extra_bytes
;
20495 cmp_bytes
= load_mode_size
;
20496 bytes_to_compare
= cmp_bytes
;
20500 /* P7 and earlier can't do the overlapping load trick fast,
20501 so this forces a non-overlapping load and a shift to get
20502 rid of the extra bytes. */
20503 cmp_bytes
= bytes_to_compare
;
20505 src1
= adjust_address (orig_src1
, load_mode
, offset
);
20506 src2
= adjust_address (orig_src2
, load_mode
, offset
);
20508 if (!REG_P (XEXP (src1
, 0)))
20510 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20511 src1
= replace_equiv_address (src1
, src1_reg
);
20513 set_mem_size (src1
, cmp_bytes
);
20515 if (!REG_P (XEXP (src2
, 0)))
20517 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20518 src2
= replace_equiv_address (src2
, src2_reg
);
20520 set_mem_size (src2
, cmp_bytes
);
20522 do_load_for_compare (tmp_reg_src1
, src1
, load_mode
);
20523 do_load_for_compare (tmp_reg_src2
, src2
, load_mode
);
20525 /* We must always left-align the data we read, and
20526 clear any bytes to the right that are beyond the string.
20527 Otherwise the cmpb sequence won't produce the correct
20528 results. The beginning of the compare will be done
20529 with word_mode so will not have any extra shifts or
20532 if (load_mode_size
< word_mode_size
)
20534 /* Rotate left first. */
20535 rtx sh
= GEN_INT (BITS_PER_UNIT
* (word_mode_size
- load_mode_size
));
20536 if (word_mode
== DImode
)
20538 emit_insn (gen_rotldi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20539 emit_insn (gen_rotldi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20543 emit_insn (gen_rotlsi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20544 emit_insn (gen_rotlsi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20548 if (cmp_bytes
< word_mode_size
)
20550 /* Now clear right. This plus the rotate can be
20551 turned into a rldicr instruction. */
20552 HOST_WIDE_INT mb
= BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20553 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20554 if (word_mode
== DImode
)
20556 emit_insn (gen_anddi3_mask (tmp_reg_src1
, tmp_reg_src1
, mask
));
20557 emit_insn (gen_anddi3_mask (tmp_reg_src2
, tmp_reg_src2
, mask
));
20561 emit_insn (gen_andsi3_mask (tmp_reg_src1
, tmp_reg_src1
, mask
));
20562 emit_insn (gen_andsi3_mask (tmp_reg_src2
, tmp_reg_src2
, mask
));
20566 /* Cases to handle. A and B are chunks of the two strings.
20567 1: Not end of comparison:
20568 A != B: branch to cleanup code to compute result.
20569 A == B: check for 0 byte, next block if not found.
20570 2: End of the inline comparison:
20571 A != B: branch to cleanup code to compute result.
20572 A == B: check for 0 byte, call strcmp/strncmp
20573 3: compared requested N bytes:
20574 A == B: branch to result 0.
20575 A != B: cleanup code to compute result. */
20577 unsigned HOST_WIDE_INT remain
= bytes_to_compare
- cmp_bytes
;
20580 if (remain
> 0 || equality_compare_rest
)
20582 /* Branch to cleanup code, otherwise fall through to do
20584 if (!cleanup_label
)
20585 cleanup_label
= gen_label_rtx ();
20586 dst_label
= cleanup_label
;
20589 /* Branch to end and produce result of 0. */
20590 dst_label
= final_move_label
;
20592 rtx lab_ref
= gen_rtx_LABEL_REF (VOIDmode
, dst_label
);
20593 rtx cond
= gen_reg_rtx (CCmode
);
20595 /* Always produce the 0 result, it is needed if
20596 cmpb finds a 0 byte in this chunk. */
20597 rtx tmp
= gen_rtx_MINUS (word_mode
, tmp_reg_src1
, tmp_reg_src2
);
20598 rs6000_emit_dot_insn (result_reg
, tmp
, 1, cond
);
20601 if (remain
== 0 && !equality_compare_rest
)
20602 cmp_rtx
= gen_rtx_EQ (VOIDmode
, cond
, const0_rtx
);
20604 cmp_rtx
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
20606 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmp_rtx
,
20608 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20609 JUMP_LABEL (j
) = dst_label
;
20610 LABEL_NUSES (dst_label
) += 1;
20612 if (remain
> 0 || equality_compare_rest
)
20614 /* Generate a cmpb to test for a 0 byte and branch
20615 to final result if found. */
20616 rtx cmpb_zero
= gen_reg_rtx (word_mode
);
20617 rtx lab_ref_fin
= gen_rtx_LABEL_REF (VOIDmode
, final_move_label
);
20618 rtx condz
= gen_reg_rtx (CCmode
);
20619 rtx zero_reg
= gen_reg_rtx (word_mode
);
20620 if (word_mode
== SImode
)
20622 emit_insn (gen_movsi (zero_reg
, GEN_INT (0)));
20623 emit_insn (gen_cmpbsi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20624 if (cmp_bytes
< word_mode_size
)
20626 /* Don't want to look at zero bytes past end. */
20628 BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20629 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20630 emit_insn (gen_andsi3_mask (cmpb_zero
, cmpb_zero
, mask
));
20635 emit_insn (gen_movdi (zero_reg
, GEN_INT (0)));
20636 emit_insn (gen_cmpbdi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20637 if (cmp_bytes
< word_mode_size
)
20639 /* Don't want to look at zero bytes past end. */
20641 BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20642 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20643 emit_insn (gen_anddi3_mask (cmpb_zero
, cmpb_zero
, mask
));
20647 emit_move_insn (condz
, gen_rtx_COMPARE (CCmode
, cmpb_zero
, zero_reg
));
20648 rtx cmpnz_rtx
= gen_rtx_NE (VOIDmode
, condz
, const0_rtx
);
20649 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmpnz_rtx
,
20650 lab_ref_fin
, pc_rtx
);
20651 rtx j2
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20652 JUMP_LABEL (j2
) = final_move_label
;
20653 LABEL_NUSES (final_move_label
) += 1;
20657 offset
+= cmp_bytes
;
20658 bytes_to_compare
-= cmp_bytes
;
20661 if (equality_compare_rest
)
20663 /* Update pointers past what has been compared already. */
20664 src1
= adjust_address (orig_src1
, load_mode
, offset
);
20665 src2
= adjust_address (orig_src2
, load_mode
, offset
);
20667 if (!REG_P (XEXP (src1
, 0)))
20669 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20670 src1
= replace_equiv_address (src1
, src1_reg
);
20672 set_mem_size (src1
, cmp_bytes
);
20674 if (!REG_P (XEXP (src2
, 0)))
20676 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20677 src2
= replace_equiv_address (src2
, src2_reg
);
20679 set_mem_size (src2
, cmp_bytes
);
20681 /* Construct call to strcmp/strncmp to compare the rest of the string. */
20684 tree fun
= builtin_decl_explicit (BUILT_IN_STRCMP
);
20685 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20686 target
, LCT_NORMAL
, GET_MODE (target
), 2,
20687 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20688 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
);
20694 len_rtx
= gen_reg_rtx (DImode
);
20696 len_rtx
= gen_reg_rtx (SImode
);
20698 emit_move_insn (len_rtx
, GEN_INT (bytes
- compare_length
));
20699 tree fun
= builtin_decl_explicit (BUILT_IN_STRNCMP
);
20700 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20701 target
, LCT_NORMAL
, GET_MODE (target
), 3,
20702 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20703 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
,
20704 len_rtx
, GET_MODE (len_rtx
));
20707 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20708 rtx jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
20709 JUMP_LABEL (jmp
) = final_label
;
20710 LABEL_NUSES (final_label
) += 1;
20715 emit_label (cleanup_label
);
20717 /* Generate the final sequence that identifies the differing
20718 byte and generates the final result, taking into account
20721 cmpb cmpb_result1, src1, src2
20722 cmpb cmpb_result2, src1, zero
20723 orc cmpb_result1, cmp_result1, cmpb_result2
20724 cntlzd get bit of first zero/diff byte
20725 addi convert for rldcl use
20726 rldcl rldcl extract diff/zero byte
20727 subf subtract for final result
20730 rtx cmpb_diff
= gen_reg_rtx (word_mode
);
20731 rtx cmpb_zero
= gen_reg_rtx (word_mode
);
20732 rtx rot_amt
= gen_reg_rtx (word_mode
);
20733 rtx zero_reg
= gen_reg_rtx (word_mode
);
20735 rtx rot1_1
= gen_reg_rtx (word_mode
);
20736 rtx rot1_2
= gen_reg_rtx (word_mode
);
20737 rtx rot2_1
= gen_reg_rtx (word_mode
);
20738 rtx rot2_2
= gen_reg_rtx (word_mode
);
20740 if (word_mode
== SImode
)
20742 emit_insn (gen_cmpbsi3 (cmpb_diff
, tmp_reg_src1
, tmp_reg_src2
));
20743 emit_insn (gen_movsi (zero_reg
, GEN_INT (0)));
20744 emit_insn (gen_cmpbsi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20745 emit_insn (gen_one_cmplsi2 (cmpb_diff
,cmpb_diff
));
20746 emit_insn (gen_iorsi3 (cmpb_diff
, cmpb_diff
, cmpb_zero
));
20747 emit_insn (gen_clzsi2 (rot_amt
, cmpb_diff
));
20748 emit_insn (gen_addsi3 (rot_amt
, rot_amt
, GEN_INT (8)));
20749 emit_insn (gen_rotlsi3 (rot1_1
, tmp_reg_src1
,
20750 gen_lowpart (SImode
, rot_amt
)));
20751 emit_insn (gen_andsi3_mask (rot1_2
, rot1_1
, GEN_INT (0xff)));
20752 emit_insn (gen_rotlsi3 (rot2_1
, tmp_reg_src2
,
20753 gen_lowpart (SImode
, rot_amt
)));
20754 emit_insn (gen_andsi3_mask (rot2_2
, rot2_1
, GEN_INT (0xff)));
20755 emit_insn (gen_subsi3 (result_reg
, rot1_2
, rot2_2
));
20759 emit_insn (gen_cmpbdi3 (cmpb_diff
, tmp_reg_src1
, tmp_reg_src2
));
20760 emit_insn (gen_movdi (zero_reg
, GEN_INT (0)));
20761 emit_insn (gen_cmpbdi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20762 emit_insn (gen_one_cmpldi2 (cmpb_diff
,cmpb_diff
));
20763 emit_insn (gen_iordi3 (cmpb_diff
, cmpb_diff
, cmpb_zero
));
20764 emit_insn (gen_clzdi2 (rot_amt
, cmpb_diff
));
20765 emit_insn (gen_adddi3 (rot_amt
, rot_amt
, GEN_INT (8)));
20766 emit_insn (gen_rotldi3 (rot1_1
, tmp_reg_src1
,
20767 gen_lowpart (SImode
, rot_amt
)));
20768 emit_insn (gen_anddi3_mask (rot1_2
, rot1_1
, GEN_INT (0xff)));
20769 emit_insn (gen_rotldi3 (rot2_1
, tmp_reg_src2
,
20770 gen_lowpart (SImode
, rot_amt
)));
20771 emit_insn (gen_anddi3_mask (rot2_2
, rot2_1
, GEN_INT (0xff)));
20772 emit_insn (gen_subdi3 (result_reg
, rot1_2
, rot2_2
));
20775 emit_label (final_move_label
);
20776 emit_insn (gen_movsi (target
,
20777 gen_lowpart (SImode
, result_reg
)));
20778 emit_label (final_label
);
20782 /* Expand a block move operation, and return 1 if successful. Return 0
20783 if we should let the compiler generate normal code.
20785 operands[0] is the destination
20786 operands[1] is the source
20787 operands[2] is the length
20788 operands[3] is the alignment */
20790 #define MAX_MOVE_REG 4
20793 expand_block_move (rtx operands
[])
20795 rtx orig_dest
= operands
[0];
20796 rtx orig_src
= operands
[1];
20797 rtx bytes_rtx
= operands
[2];
20798 rtx align_rtx
= operands
[3];
20799 int constp
= (GET_CODE (bytes_rtx
) == CONST_INT
);
20804 rtx stores
[MAX_MOVE_REG
];
20807 /* If this is not a fixed size move, just call memcpy */
20811 /* This must be a fixed size alignment */
20812 gcc_assert (GET_CODE (align_rtx
) == CONST_INT
);
20813 align
= INTVAL (align_rtx
) * BITS_PER_UNIT
;
20815 /* Anything to move? */
20816 bytes
= INTVAL (bytes_rtx
);
20820 if (bytes
> rs6000_block_move_inline_limit
)
20823 for (offset
= 0; bytes
> 0; offset
+= move_bytes
, bytes
-= move_bytes
)
20826 rtx (*movmemsi
) (rtx
, rtx
, rtx
, rtx
);
20827 rtx (*mov
) (rtx
, rtx
);
20829 machine_mode mode
= BLKmode
;
20832 /* Altivec first, since it will be faster than a string move
20833 when it applies, and usually not significantly larger. */
20834 if (TARGET_ALTIVEC
&& bytes
>= 16 && align
>= 128)
20838 gen_func
.mov
= gen_movv4si
;
20840 else if (TARGET_SPE
&& bytes
>= 8 && align
>= 64)
20844 gen_func
.mov
= gen_movv2si
;
20846 else if (TARGET_STRING
20847 && bytes
> 24 /* move up to 32 bytes at a time */
20853 && ! fixed_regs
[10]
20854 && ! fixed_regs
[11]
20855 && ! fixed_regs
[12])
20857 move_bytes
= (bytes
> 32) ? 32 : bytes
;
20858 gen_func
.movmemsi
= gen_movmemsi_8reg
;
20860 else if (TARGET_STRING
20861 && bytes
> 16 /* move up to 24 bytes at a time */
20867 && ! fixed_regs
[10])
20869 move_bytes
= (bytes
> 24) ? 24 : bytes
;
20870 gen_func
.movmemsi
= gen_movmemsi_6reg
;
20872 else if (TARGET_STRING
20873 && bytes
> 8 /* move up to 16 bytes at a time */
20877 && ! fixed_regs
[8])
20879 move_bytes
= (bytes
> 16) ? 16 : bytes
;
20880 gen_func
.movmemsi
= gen_movmemsi_4reg
;
20882 else if (bytes
>= 8 && TARGET_POWERPC64
20883 && (align
>= 64 || !STRICT_ALIGNMENT
))
20887 gen_func
.mov
= gen_movdi
;
20888 if (offset
== 0 && align
< 64)
20892 /* If the address form is reg+offset with offset not a
20893 multiple of four, reload into reg indirect form here
20894 rather than waiting for reload. This way we get one
20895 reload, not one per load and/or store. */
20896 addr
= XEXP (orig_dest
, 0);
20897 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
20898 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
20899 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
20901 addr
= copy_addr_to_reg (addr
);
20902 orig_dest
= replace_equiv_address (orig_dest
, addr
);
20904 addr
= XEXP (orig_src
, 0);
20905 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
20906 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
20907 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
20909 addr
= copy_addr_to_reg (addr
);
20910 orig_src
= replace_equiv_address (orig_src
, addr
);
20914 else if (TARGET_STRING
&& bytes
> 4 && !TARGET_POWERPC64
)
20915 { /* move up to 8 bytes at a time */
20916 move_bytes
= (bytes
> 8) ? 8 : bytes
;
20917 gen_func
.movmemsi
= gen_movmemsi_2reg
;
20919 else if (bytes
>= 4 && (align
>= 32 || !STRICT_ALIGNMENT
))
20920 { /* move 4 bytes */
20923 gen_func
.mov
= gen_movsi
;
20925 else if (bytes
>= 2 && (align
>= 16 || !STRICT_ALIGNMENT
))
20926 { /* move 2 bytes */
20929 gen_func
.mov
= gen_movhi
;
20931 else if (TARGET_STRING
&& bytes
> 1)
20932 { /* move up to 4 bytes at a time */
20933 move_bytes
= (bytes
> 4) ? 4 : bytes
;
20934 gen_func
.movmemsi
= gen_movmemsi_1reg
;
20936 else /* move 1 byte at a time */
20940 gen_func
.mov
= gen_movqi
;
20943 src
= adjust_address (orig_src
, mode
, offset
);
20944 dest
= adjust_address (orig_dest
, mode
, offset
);
20946 if (mode
!= BLKmode
)
20948 rtx tmp_reg
= gen_reg_rtx (mode
);
20950 emit_insn ((*gen_func
.mov
) (tmp_reg
, src
));
20951 stores
[num_reg
++] = (*gen_func
.mov
) (dest
, tmp_reg
);
20954 if (mode
== BLKmode
|| num_reg
>= MAX_MOVE_REG
|| bytes
== move_bytes
)
20957 for (i
= 0; i
< num_reg
; i
++)
20958 emit_insn (stores
[i
]);
20962 if (mode
== BLKmode
)
20964 /* Move the address into scratch registers. The movmemsi
20965 patterns require zero offset. */
20966 if (!REG_P (XEXP (src
, 0)))
20968 rtx src_reg
= copy_addr_to_reg (XEXP (src
, 0));
20969 src
= replace_equiv_address (src
, src_reg
);
20971 set_mem_size (src
, move_bytes
);
20973 if (!REG_P (XEXP (dest
, 0)))
20975 rtx dest_reg
= copy_addr_to_reg (XEXP (dest
, 0));
20976 dest
= replace_equiv_address (dest
, dest_reg
);
20978 set_mem_size (dest
, move_bytes
);
20980 emit_insn ((*gen_func
.movmemsi
) (dest
, src
,
20981 GEN_INT (move_bytes
& 31),
20990 /* Return a string to perform a load_multiple operation.
20991 operands[0] is the vector.
20992 operands[1] is the source address.
20993 operands[2] is the first destination register. */
20996 rs6000_output_load_multiple (rtx operands
[3])
20998 /* We have to handle the case where the pseudo used to contain the address
20999 is assigned to one of the output registers. */
21001 int words
= XVECLEN (operands
[0], 0);
21004 if (XVECLEN (operands
[0], 0) == 1)
21005 return "lwz %2,0(%1)";
21007 for (i
= 0; i
< words
; i
++)
21008 if (refers_to_regno_p (REGNO (operands
[2]) + i
, operands
[1]))
21012 xop
[0] = GEN_INT (4 * (words
-1));
21013 xop
[1] = operands
[1];
21014 xop
[2] = operands
[2];
21015 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop
);
21020 xop
[0] = GEN_INT (4 * (words
-1));
21021 xop
[1] = operands
[1];
21022 xop
[2] = gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
21023 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop
);
21028 for (j
= 0; j
< words
; j
++)
21031 xop
[0] = GEN_INT (j
* 4);
21032 xop
[1] = operands
[1];
21033 xop
[2] = gen_rtx_REG (SImode
, REGNO (operands
[2]) + j
);
21034 output_asm_insn ("lwz %2,%0(%1)", xop
);
21036 xop
[0] = GEN_INT (i
* 4);
21037 xop
[1] = operands
[1];
21038 output_asm_insn ("lwz %1,%0(%1)", xop
);
21043 return "lswi %2,%1,%N0";
21047 /* A validation routine: say whether CODE, a condition code, and MODE
21048 match. The other alternatives either don't make sense or should
21049 never be generated. */
21052 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
21054 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
21055 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
21056 && GET_MODE_CLASS (mode
) == MODE_CC
);
21058 /* These don't make sense. */
21059 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
21060 || mode
!= CCUNSmode
);
21062 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
21063 || mode
== CCUNSmode
);
21065 gcc_assert (mode
== CCFPmode
21066 || (code
!= ORDERED
&& code
!= UNORDERED
21067 && code
!= UNEQ
&& code
!= LTGT
21068 && code
!= UNGT
&& code
!= UNLT
21069 && code
!= UNGE
&& code
!= UNLE
));
21071 /* These should never be generated except for
21072 flag_finite_math_only. */
21073 gcc_assert (mode
!= CCFPmode
21074 || flag_finite_math_only
21075 || (code
!= LE
&& code
!= GE
21076 && code
!= UNEQ
&& code
!= LTGT
21077 && code
!= UNGT
&& code
!= UNLT
));
21079 /* These are invalid; the information is not there. */
21080 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
21084 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
21085 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
21086 not zero, store there the bit offset (counted from the right) where
21087 the single stretch of 1 bits begins; and similarly for B, the bit
21088 offset where it ends. */
21091 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
21093 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
21094 unsigned HOST_WIDE_INT bit
;
21096 int n
= GET_MODE_PRECISION (mode
);
21098 if (mode
!= DImode
&& mode
!= SImode
)
21101 if (INTVAL (mask
) >= 0)
21104 ne
= exact_log2 (bit
);
21105 nb
= exact_log2 (val
+ bit
);
21107 else if (val
+ 1 == 0)
21116 nb
= exact_log2 (bit
);
21117 ne
= exact_log2 (val
+ bit
);
21122 ne
= exact_log2 (bit
);
21123 if (val
+ bit
== 0)
21131 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
21142 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
21143 or rldicr instruction, to implement an AND with it in mode MODE. */
21146 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
21150 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
21153 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
21155 if (mode
== DImode
)
21156 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
21158 /* For SImode, rlwinm can do everything. */
21159 if (mode
== SImode
)
21160 return (nb
< 32 && ne
< 32);
21165 /* Return the instruction template for an AND with mask in mode MODE, with
21166 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21169 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
21173 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
21174 gcc_unreachable ();
21176 if (mode
== DImode
&& ne
== 0)
21178 operands
[3] = GEN_INT (63 - nb
);
21180 return "rldicl. %0,%1,0,%3";
21181 return "rldicl %0,%1,0,%3";
21184 if (mode
== DImode
&& nb
== 63)
21186 operands
[3] = GEN_INT (63 - ne
);
21188 return "rldicr. %0,%1,0,%3";
21189 return "rldicr %0,%1,0,%3";
21192 if (nb
< 32 && ne
< 32)
21194 operands
[3] = GEN_INT (31 - nb
);
21195 operands
[4] = GEN_INT (31 - ne
);
21197 return "rlwinm. %0,%1,0,%3,%4";
21198 return "rlwinm %0,%1,0,%3,%4";
21201 gcc_unreachable ();
21204 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
21205 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
21206 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
21209 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
21213 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
21216 int n
= GET_MODE_PRECISION (mode
);
21219 if (CONST_INT_P (XEXP (shift
, 1)))
21221 sh
= INTVAL (XEXP (shift
, 1));
21222 if (sh
< 0 || sh
>= n
)
21226 rtx_code code
= GET_CODE (shift
);
21228 /* Convert any shift by 0 to a rotate, to simplify below code. */
21232 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21233 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
21235 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
21241 /* DImode rotates need rld*. */
21242 if (mode
== DImode
&& code
== ROTATE
)
21243 return (nb
== 63 || ne
== 0 || ne
== sh
);
21245 /* SImode rotates need rlw*. */
21246 if (mode
== SImode
&& code
== ROTATE
)
21247 return (nb
< 32 && ne
< 32 && sh
< 32);
21249 /* Wrap-around masks are only okay for rotates. */
21253 /* Variable shifts are only okay for rotates. */
21257 /* Don't allow ASHIFT if the mask is wrong for that. */
21258 if (code
== ASHIFT
&& ne
< sh
)
21261 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
21262 if the mask is wrong for that. */
21263 if (nb
< 32 && ne
< 32 && sh
< 32
21264 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
21267 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
21268 if the mask is wrong for that. */
21269 if (code
== LSHIFTRT
)
21271 if (nb
== 63 || ne
== 0 || ne
== sh
)
21272 return !(code
== LSHIFTRT
&& nb
>= sh
);
21277 /* Return the instruction template for a shift with mask in mode MODE, with
21278 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21281 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
21285 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
21286 gcc_unreachable ();
21288 if (mode
== DImode
&& ne
== 0)
21290 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21291 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
21292 operands
[3] = GEN_INT (63 - nb
);
21294 return "rld%I2cl. %0,%1,%2,%3";
21295 return "rld%I2cl %0,%1,%2,%3";
21298 if (mode
== DImode
&& nb
== 63)
21300 operands
[3] = GEN_INT (63 - ne
);
21302 return "rld%I2cr. %0,%1,%2,%3";
21303 return "rld%I2cr %0,%1,%2,%3";
21307 && GET_CODE (operands
[4]) != LSHIFTRT
21308 && CONST_INT_P (operands
[2])
21309 && ne
== INTVAL (operands
[2]))
21311 operands
[3] = GEN_INT (63 - nb
);
21313 return "rld%I2c. %0,%1,%2,%3";
21314 return "rld%I2c %0,%1,%2,%3";
21317 if (nb
< 32 && ne
< 32)
21319 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21320 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
21321 operands
[3] = GEN_INT (31 - nb
);
21322 operands
[4] = GEN_INT (31 - ne
);
21323 /* This insn can also be a 64-bit rotate with mask that really makes
21324 it just a shift right (with mask); the %h below are to adjust for
21325 that situation (shift count is >= 32 in that case). */
21327 return "rlw%I2nm. %0,%1,%h2,%3,%4";
21328 return "rlw%I2nm %0,%1,%h2,%3,%4";
21331 gcc_unreachable ();
21334 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21335 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21336 ASHIFT, or LSHIFTRT) in mode MODE. */
21339 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
21343 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
21346 int n
= GET_MODE_PRECISION (mode
);
21348 int sh
= INTVAL (XEXP (shift
, 1));
21349 if (sh
< 0 || sh
>= n
)
21352 rtx_code code
= GET_CODE (shift
);
21354 /* Convert any shift by 0 to a rotate, to simplify below code. */
21358 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21359 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
21361 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
21367 /* DImode rotates need rldimi. */
21368 if (mode
== DImode
&& code
== ROTATE
)
21371 /* SImode rotates need rlwimi. */
21372 if (mode
== SImode
&& code
== ROTATE
)
21373 return (nb
< 32 && ne
< 32 && sh
< 32);
21375 /* Wrap-around masks are only okay for rotates. */
21379 /* Don't allow ASHIFT if the mask is wrong for that. */
21380 if (code
== ASHIFT
&& ne
< sh
)
21383 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
21384 if the mask is wrong for that. */
21385 if (nb
< 32 && ne
< 32 && sh
< 32
21386 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
21389 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
21390 if the mask is wrong for that. */
21391 if (code
== LSHIFTRT
)
21394 return !(code
== LSHIFTRT
&& nb
>= sh
);
21399 /* Return the instruction template for an insert with mask in mode MODE, with
21400 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21403 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
21407 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
21408 gcc_unreachable ();
21410 /* Prefer rldimi because rlwimi is cracked. */
21411 if (TARGET_POWERPC64
21412 && (!dot
|| mode
== DImode
)
21413 && GET_CODE (operands
[4]) != LSHIFTRT
21414 && ne
== INTVAL (operands
[2]))
21416 operands
[3] = GEN_INT (63 - nb
);
21418 return "rldimi. %0,%1,%2,%3";
21419 return "rldimi %0,%1,%2,%3";
21422 if (nb
< 32 && ne
< 32)
21424 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21425 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
21426 operands
[3] = GEN_INT (31 - nb
);
21427 operands
[4] = GEN_INT (31 - ne
);
21429 return "rlwimi. %0,%1,%2,%3,%4";
21430 return "rlwimi %0,%1,%2,%3,%4";
21433 gcc_unreachable ();
21436 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21437 using two machine instructions. */
21440 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
21442 /* There are two kinds of AND we can handle with two insns:
21443 1) those we can do with two rl* insn;
21446 We do not handle that last case yet. */
21448 /* If there is just one stretch of ones, we can do it. */
21449 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
21452 /* Otherwise, fill in the lowest "hole"; if we can do the result with
21453 one insn, we can do the whole thing with two. */
21454 unsigned HOST_WIDE_INT val
= INTVAL (c
);
21455 unsigned HOST_WIDE_INT bit1
= val
& -val
;
21456 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
21457 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
21458 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
21459 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
21462 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21463 If EXPAND is true, split rotate-and-mask instructions we generate to
21464 their constituent parts as well (this is used during expand); if DOT
21465 is 1, make the last insn a record-form instruction clobbering the
21466 destination GPR and setting the CC reg (from operands[3]); if 2, set
21467 that GPR as well as the CC reg. */
21470 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
21472 gcc_assert (!(expand
&& dot
));
21474 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
21476 /* If it is one stretch of ones, it is DImode; shift left, mask, then
21477 shift right. This generates better code than doing the masks without
21478 shifts, or shifting first right and then left. */
21480 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
21482 gcc_assert (mode
== DImode
);
21484 int shift
= 63 - nb
;
21487 rtx tmp1
= gen_reg_rtx (DImode
);
21488 rtx tmp2
= gen_reg_rtx (DImode
);
21489 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
21490 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
21491 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
21495 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
21496 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
21497 emit_move_insn (operands
[0], tmp
);
21498 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
21499 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21504 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21505 that does the rest. */
21506 unsigned HOST_WIDE_INT bit1
= val
& -val
;
21507 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
21508 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
21509 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
21511 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
21512 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
21514 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
21516 /* Two "no-rotate"-and-mask instructions, for SImode. */
21517 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
21519 gcc_assert (mode
== SImode
);
21521 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
21522 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
21523 emit_move_insn (reg
, tmp
);
21524 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
21525 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21529 gcc_assert (mode
== DImode
);
21531 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21532 insns; we have to do the first in SImode, because it wraps. */
21533 if (mask2
<= 0xffffffff
21534 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
21536 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
21537 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
21539 rtx reg_low
= gen_lowpart (SImode
, reg
);
21540 emit_move_insn (reg_low
, tmp
);
21541 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
21542 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21546 /* Two rld* insns: rotate, clear the hole in the middle (which now is
21547 at the top end), rotate back and clear the other hole. */
21548 int right
= exact_log2 (bit3
);
21549 int left
= 64 - right
;
21551 /* Rotate the mask too. */
21552 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
21556 rtx tmp1
= gen_reg_rtx (DImode
);
21557 rtx tmp2
= gen_reg_rtx (DImode
);
21558 rtx tmp3
= gen_reg_rtx (DImode
);
21559 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
21560 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
21561 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
21562 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
21566 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
21567 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
21568 emit_move_insn (operands
[0], tmp
);
21569 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
21570 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
21571 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21575 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21576 for lfq and stfq insns iff the registers are hard registers. */
21579 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
21581 /* We might have been passed a SUBREG. */
21582 if (GET_CODE (reg1
) != REG
|| GET_CODE (reg2
) != REG
)
21585 /* We might have been passed non floating point registers. */
21586 if (!FP_REGNO_P (REGNO (reg1
))
21587 || !FP_REGNO_P (REGNO (reg2
)))
21590 return (REGNO (reg1
) == REGNO (reg2
) - 1);
21593 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21594 addr1 and addr2 must be in consecutive memory locations
21595 (addr2 == addr1 + 8). */
21598 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
21601 unsigned int reg1
, reg2
;
21602 int offset1
, offset2
;
21604 /* The mems cannot be volatile. */
21605 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
21608 addr1
= XEXP (mem1
, 0);
21609 addr2
= XEXP (mem2
, 0);
21611 /* Extract an offset (if used) from the first addr. */
21612 if (GET_CODE (addr1
) == PLUS
)
21614 /* If not a REG, return zero. */
21615 if (GET_CODE (XEXP (addr1
, 0)) != REG
)
21619 reg1
= REGNO (XEXP (addr1
, 0));
21620 /* The offset must be constant! */
21621 if (GET_CODE (XEXP (addr1
, 1)) != CONST_INT
)
21623 offset1
= INTVAL (XEXP (addr1
, 1));
21626 else if (GET_CODE (addr1
) != REG
)
21630 reg1
= REGNO (addr1
);
21631 /* This was a simple (mem (reg)) expression. Offset is 0. */
21635 /* And now for the second addr. */
21636 if (GET_CODE (addr2
) == PLUS
)
21638 /* If not a REG, return zero. */
21639 if (GET_CODE (XEXP (addr2
, 0)) != REG
)
21643 reg2
= REGNO (XEXP (addr2
, 0));
21644 /* The offset must be constant. */
21645 if (GET_CODE (XEXP (addr2
, 1)) != CONST_INT
)
21647 offset2
= INTVAL (XEXP (addr2
, 1));
21650 else if (GET_CODE (addr2
) != REG
)
21654 reg2
= REGNO (addr2
);
21655 /* This was a simple (mem (reg)) expression. Offset is 0. */
21659 /* Both of these must have the same base register. */
21663 /* The offset for the second addr must be 8 more than the first addr. */
21664 if (offset2
!= offset1
+ 8)
21667 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
21674 rs6000_secondary_memory_needed_rtx (machine_mode mode
)
21676 static bool eliminated
= false;
21679 if (mode
!= SDmode
|| TARGET_NO_SDMODE_STACK
)
21680 ret
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
21683 rtx mem
= cfun
->machine
->sdmode_stack_slot
;
21684 gcc_assert (mem
!= NULL_RTX
);
21688 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
21689 cfun
->machine
->sdmode_stack_slot
= mem
;
21695 if (TARGET_DEBUG_ADDR
)
21697 fprintf (stderr
, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21698 GET_MODE_NAME (mode
));
21700 fprintf (stderr
, "\tNULL_RTX\n");
21708 /* Return the mode to be used for memory when a secondary memory
21709 location is needed. For SDmode values we need to use DDmode, in
21710 all other cases we can use the same mode. */
21712 rs6000_secondary_memory_needed_mode (machine_mode mode
)
21714 if (lra_in_progress
&& mode
== SDmode
)
21720 rs6000_check_sdmode (tree
*tp
, int *walk_subtrees
, void *data ATTRIBUTE_UNUSED
)
21722 /* Don't walk into types. */
21723 if (*tp
== NULL_TREE
|| *tp
== error_mark_node
|| TYPE_P (*tp
))
21725 *walk_subtrees
= 0;
21729 switch (TREE_CODE (*tp
))
21738 case VIEW_CONVERT_EXPR
:
21739 if (TYPE_MODE (TREE_TYPE (*tp
)) == SDmode
)
21749 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21750 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21751 only work on the traditional altivec registers, note if an altivec register
21754 static enum rs6000_reg_type
21755 register_to_reg_type (rtx reg
, bool *is_altivec
)
21757 HOST_WIDE_INT regno
;
21758 enum reg_class rclass
;
21760 if (GET_CODE (reg
) == SUBREG
)
21761 reg
= SUBREG_REG (reg
);
21764 return NO_REG_TYPE
;
21766 regno
= REGNO (reg
);
21767 if (regno
>= FIRST_PSEUDO_REGISTER
)
21769 if (!lra_in_progress
&& !reload_in_progress
&& !reload_completed
)
21770 return PSEUDO_REG_TYPE
;
21772 regno
= true_regnum (reg
);
21773 if (regno
< 0 || regno
>= FIRST_PSEUDO_REGISTER
)
21774 return PSEUDO_REG_TYPE
;
21777 gcc_assert (regno
>= 0);
21779 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
21780 *is_altivec
= true;
21782 rclass
= rs6000_regno_regclass
[regno
];
21783 return reg_class_to_reg_type
[(int)rclass
];
21786 /* Helper function to return the cost of adding a TOC entry address. */
21789 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
21793 if (TARGET_CMODEL
!= CMODEL_SMALL
)
21794 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
21797 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
21802 /* Helper function for rs6000_secondary_reload to determine whether the memory
21803 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21804 needs reloading. Return negative if the memory is not handled by the memory
21805 helper functions and to try a different reload method, 0 if no additional
21806 instructions are need, and positive to give the extra cost for the
21810 rs6000_secondary_reload_memory (rtx addr
,
21811 enum reg_class rclass
,
21814 int extra_cost
= 0;
21815 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
21816 addr_mask_type addr_mask
;
21817 const char *type
= NULL
;
21818 const char *fail_msg
= NULL
;
21820 if (GPR_REG_CLASS_P (rclass
))
21821 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
21823 else if (rclass
== FLOAT_REGS
)
21824 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
21826 else if (rclass
== ALTIVEC_REGS
)
21827 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
21829 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21830 else if (rclass
== VSX_REGS
)
21831 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
21832 & ~RELOAD_REG_AND_M16
);
21834 /* If the register allocator hasn't made up its mind yet on the register
21835 class to use, settle on defaults to use. */
21836 else if (rclass
== NO_REGS
)
21838 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
21839 & ~RELOAD_REG_AND_M16
);
21841 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
21842 addr_mask
&= ~(RELOAD_REG_INDEXED
21843 | RELOAD_REG_PRE_INCDEC
21844 | RELOAD_REG_PRE_MODIFY
);
21850 /* If the register isn't valid in this register class, just return now. */
21851 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
21853 if (TARGET_DEBUG_ADDR
)
21856 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21857 "not valid in class\n",
21858 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
21865 switch (GET_CODE (addr
))
21867 /* Does the register class supports auto update forms for this mode? We
21868 don't need a scratch register, since the powerpc only supports
21869 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21872 reg
= XEXP (addr
, 0);
21873 if (!base_reg_operand (addr
, GET_MODE (reg
)))
21875 fail_msg
= "no base register #1";
21879 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
21887 reg
= XEXP (addr
, 0);
21888 plus_arg1
= XEXP (addr
, 1);
21889 if (!base_reg_operand (reg
, GET_MODE (reg
))
21890 || GET_CODE (plus_arg1
) != PLUS
21891 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
21893 fail_msg
= "bad PRE_MODIFY";
21897 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
21904 /* Do we need to simulate AND -16 to clear the bottom address bits used
21905 in VMX load/stores? Only allow the AND for vector sizes. */
21907 and_arg
= XEXP (addr
, 0);
21908 if (GET_MODE_SIZE (mode
) != 16
21909 || GET_CODE (XEXP (addr
, 1)) != CONST_INT
21910 || INTVAL (XEXP (addr
, 1)) != -16)
21912 fail_msg
= "bad Altivec AND #1";
21916 if (rclass
!= ALTIVEC_REGS
)
21918 if (legitimate_indirect_address_p (and_arg
, false))
21921 else if (legitimate_indexed_address_p (and_arg
, false))
21926 fail_msg
= "bad Altivec AND #2";
21934 /* If this is an indirect address, make sure it is a base register. */
21937 if (!legitimate_indirect_address_p (addr
, false))
21944 /* If this is an indexed address, make sure the register class can handle
21945 indexed addresses for this mode. */
21947 plus_arg0
= XEXP (addr
, 0);
21948 plus_arg1
= XEXP (addr
, 1);
21950 /* (plus (plus (reg) (constant)) (constant)) is generated during
21951 push_reload processing, so handle it now. */
21952 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
21954 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
21961 /* (plus (plus (reg) (constant)) (reg)) is also generated during
21962 push_reload processing, so handle it now. */
21963 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
21965 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
21968 type
= "indexed #2";
21972 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
21974 fail_msg
= "no base register #2";
21978 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
21980 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
21981 || !legitimate_indexed_address_p (addr
, false))
21988 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
21989 && CONST_INT_P (plus_arg1
))
21991 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
21994 type
= "vector d-form offset";
21998 /* Make sure the register class can handle offset addresses. */
21999 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
22001 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22004 type
= "offset #2";
22010 fail_msg
= "bad PLUS";
22017 /* Quad offsets are restricted and can't handle normal addresses. */
22018 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
22021 type
= "vector d-form lo_sum";
22024 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
22026 fail_msg
= "bad LO_SUM";
22030 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22037 /* Static addresses need to create a TOC entry. */
22041 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
22044 type
= "vector d-form lo_sum #2";
22050 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
22054 /* TOC references look like offsetable memory. */
22056 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
22058 fail_msg
= "bad UNSPEC";
22062 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
22065 type
= "vector d-form lo_sum #3";
22068 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22071 type
= "toc reference";
22077 fail_msg
= "bad address";
22082 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
22084 if (extra_cost
< 0)
22086 "rs6000_secondary_reload_memory error: mode = %s, "
22087 "class = %s, addr_mask = '%s', %s\n",
22088 GET_MODE_NAME (mode
),
22089 reg_class_names
[rclass
],
22090 rs6000_debug_addr_mask (addr_mask
, false),
22091 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
22095 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
22096 "addr_mask = '%s', extra cost = %d, %s\n",
22097 GET_MODE_NAME (mode
),
22098 reg_class_names
[rclass
],
22099 rs6000_debug_addr_mask (addr_mask
, false),
22101 (type
) ? type
: "<none>");
22109 /* Helper function for rs6000_secondary_reload to return true if a move to a
22110 different register classe is really a simple move. */
22113 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
22114 enum rs6000_reg_type from_type
,
22117 int size
= GET_MODE_SIZE (mode
);
22119 /* Add support for various direct moves available. In this function, we only
22120 look at cases where we don't need any extra registers, and one or more
22121 simple move insns are issued. Originally small integers are not allowed
22122 in FPR/VSX registers. Single precision binary floating is not a simple
22123 move because we need to convert to the single precision memory layout.
22124 The 4-byte SDmode can be moved. TDmode values are disallowed since they
22125 need special direct move handling, which we do not support yet. */
22126 if (TARGET_DIRECT_MOVE
22127 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
22128 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
22130 if (TARGET_POWERPC64
)
22132 /* ISA 2.07: MTVSRD or MVFVSRD. */
22136 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
22137 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
22141 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22142 if (TARGET_VSX_SMALL_INTEGER
)
22144 if (mode
== SImode
)
22147 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
22151 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22152 if (mode
== SDmode
)
22156 /* Power6+: MFTGPR or MFFGPR. */
22157 else if (TARGET_MFPGPR
&& TARGET_POWERPC64
&& size
== 8
22158 && ((to_type
== GPR_REG_TYPE
&& from_type
== FPR_REG_TYPE
)
22159 || (to_type
== FPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
22162 /* Move to/from SPR. */
22163 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
22164 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
22165 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
22171 /* Direct move helper function for rs6000_secondary_reload, handle all of the
22172 special direct moves that involve allocating an extra register, return the
22173 insn code of the helper function if there is such a function or
22174 CODE_FOR_nothing if not. */
22177 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
22178 enum rs6000_reg_type from_type
,
22180 secondary_reload_info
*sri
,
22184 enum insn_code icode
= CODE_FOR_nothing
;
22186 int size
= GET_MODE_SIZE (mode
);
22188 if (TARGET_POWERPC64
&& size
== 16)
22190 /* Handle moving 128-bit values from GPRs to VSX point registers on
22191 ISA 2.07 (power8, power9) when running in 64-bit mode using
22192 XXPERMDI to glue the two 64-bit values back together. */
22193 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
22195 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
22196 icode
= reg_addr
[mode
].reload_vsx_gpr
;
22199 /* Handle moving 128-bit values from VSX point registers to GPRs on
22200 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
22201 bottom 64-bit value. */
22202 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
22204 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
22205 icode
= reg_addr
[mode
].reload_gpr_vsx
;
22209 else if (TARGET_POWERPC64
&& mode
== SFmode
)
22211 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
22213 cost
= 3; /* xscvdpspn, mfvsrd, and. */
22214 icode
= reg_addr
[mode
].reload_gpr_vsx
;
22217 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
22219 cost
= 2; /* mtvsrz, xscvspdpn. */
22220 icode
= reg_addr
[mode
].reload_vsx_gpr
;
22224 else if (!TARGET_POWERPC64
&& size
== 8)
22226 /* Handle moving 64-bit values from GPRs to floating point registers on
22227 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
22228 32-bit values back together. Altivec register classes must be handled
22229 specially since a different instruction is used, and the secondary
22230 reload support requires a single instruction class in the scratch
22231 register constraint. However, right now TFmode is not allowed in
22232 Altivec registers, so the pattern will never match. */
22233 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
22235 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
22236 icode
= reg_addr
[mode
].reload_fpr_gpr
;
22240 if (icode
!= CODE_FOR_nothing
)
22245 sri
->icode
= icode
;
22246 sri
->extra_cost
= cost
;
22253 /* Return whether a move between two register classes can be done either
22254 directly (simple move) or via a pattern that uses a single extra temporary
22255 (using ISA 2.07's direct move in this case. */
22258 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
22259 enum rs6000_reg_type from_type
,
22261 secondary_reload_info
*sri
,
22264 /* Fall back to load/store reloads if either type is not a register. */
22265 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
22268 /* If we haven't allocated registers yet, assume the move can be done for the
22269 standard register types. */
22270 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
22271 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
22272 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
22275 /* Moves to the same set of registers is a simple move for non-specialized
22277 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
22280 /* Check whether a simple move can be done directly. */
22281 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
22285 sri
->icode
= CODE_FOR_nothing
;
22286 sri
->extra_cost
= 0;
22291 /* Now check if we can do it in a few steps. */
22292 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
22296 /* Inform reload about cases where moving X with a mode MODE to a register in
22297 RCLASS requires an extra scratch or immediate register. Return the class
22298 needed for the immediate register.
22300 For VSX and Altivec, we may need a register to convert sp+offset into
22303 For misaligned 64-bit gpr loads and stores we need a register to
22304 convert an offset address to indirect. */
22307 rs6000_secondary_reload (bool in_p
,
22309 reg_class_t rclass_i
,
22311 secondary_reload_info
*sri
)
22313 enum reg_class rclass
= (enum reg_class
) rclass_i
;
22314 reg_class_t ret
= ALL_REGS
;
22315 enum insn_code icode
;
22316 bool default_p
= false;
22317 bool done_p
= false;
22319 /* Allow subreg of memory before/during reload. */
22320 bool memory_p
= (MEM_P (x
)
22321 || (!reload_completed
&& GET_CODE (x
) == SUBREG
22322 && MEM_P (SUBREG_REG (x
))));
22324 sri
->icode
= CODE_FOR_nothing
;
22325 sri
->t_icode
= CODE_FOR_nothing
;
22326 sri
->extra_cost
= 0;
22328 ? reg_addr
[mode
].reload_load
22329 : reg_addr
[mode
].reload_store
);
22331 if (REG_P (x
) || register_operand (x
, mode
))
22333 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
22334 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
22335 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
22338 std::swap (to_type
, from_type
);
22340 /* Can we do a direct move of some sort? */
22341 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
22344 icode
= (enum insn_code
)sri
->icode
;
22351 /* Make sure 0.0 is not reloaded or forced into memory. */
22352 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
22359 /* If this is a scalar floating point value and we want to load it into the
22360 traditional Altivec registers, do it via a move via a traditional floating
22361 point register, unless we have D-form addressing. Also make sure that
22362 non-zero constants use a FPR. */
22363 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
22364 && !mode_supports_vmx_dform (mode
)
22365 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
22366 && (memory_p
|| (GET_CODE (x
) == CONST_DOUBLE
)))
22373 /* Handle reload of load/stores if we have reload helper functions. */
22374 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
22376 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
22379 if (extra_cost
>= 0)
22383 if (extra_cost
> 0)
22385 sri
->extra_cost
= extra_cost
;
22386 sri
->icode
= icode
;
22391 /* Handle unaligned loads and stores of integer registers. */
22392 if (!done_p
&& TARGET_POWERPC64
22393 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
22395 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
22397 rtx addr
= XEXP (x
, 0);
22398 rtx off
= address_offset (addr
);
22400 if (off
!= NULL_RTX
)
22402 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
22403 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
22405 /* We need a secondary reload when our legitimate_address_p
22406 says the address is good (as otherwise the entire address
22407 will be reloaded), and the offset is not a multiple of
22408 four or we have an address wrap. Address wrap will only
22409 occur for LO_SUMs since legitimate_offset_address_p
22410 rejects addresses for 16-byte mems that will wrap. */
22411 if (GET_CODE (addr
) == LO_SUM
22412 ? (1 /* legitimate_address_p allows any offset for lo_sum */
22413 && ((offset
& 3) != 0
22414 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
22415 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
22416 && (offset
& 3) != 0))
22418 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
22420 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
22421 : CODE_FOR_reload_di_load
);
22423 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
22424 : CODE_FOR_reload_di_store
);
22425 sri
->extra_cost
= 2;
22436 if (!done_p
&& !TARGET_POWERPC64
22437 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
22439 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
22441 rtx addr
= XEXP (x
, 0);
22442 rtx off
= address_offset (addr
);
22444 if (off
!= NULL_RTX
)
22446 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
22447 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
22449 /* We need a secondary reload when our legitimate_address_p
22450 says the address is good (as otherwise the entire address
22451 will be reloaded), and we have a wrap.
22453 legitimate_lo_sum_address_p allows LO_SUM addresses to
22454 have any offset so test for wrap in the low 16 bits.
22456 legitimate_offset_address_p checks for the range
22457 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22458 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
22459 [0x7ff4,0x7fff] respectively, so test for the
22460 intersection of these ranges, [0x7ffc,0x7fff] and
22461 [0x7ff4,0x7ff7] respectively.
22463 Note that the address we see here may have been
22464 manipulated by legitimize_reload_address. */
22465 if (GET_CODE (addr
) == LO_SUM
22466 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
22467 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
22470 sri
->icode
= CODE_FOR_reload_si_load
;
22472 sri
->icode
= CODE_FOR_reload_si_store
;
22473 sri
->extra_cost
= 2;
22488 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
22490 gcc_assert (ret
!= ALL_REGS
);
22492 if (TARGET_DEBUG_ADDR
)
22495 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22497 reg_class_names
[ret
],
22498 in_p
? "true" : "false",
22499 reg_class_names
[rclass
],
22500 GET_MODE_NAME (mode
));
22502 if (reload_completed
)
22503 fputs (", after reload", stderr
);
22506 fputs (", done_p not set", stderr
);
22509 fputs (", default secondary reload", stderr
);
22511 if (sri
->icode
!= CODE_FOR_nothing
)
22512 fprintf (stderr
, ", reload func = %s, extra cost = %d",
22513 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
22515 else if (sri
->extra_cost
> 0)
22516 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
22518 fputs ("\n", stderr
);
22525 /* Better tracing for rs6000_secondary_reload_inner. */
22528 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
22533 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
22535 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
22536 store_p
? "store" : "load");
22539 set
= gen_rtx_SET (mem
, reg
);
22541 set
= gen_rtx_SET (reg
, mem
);
22543 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
22544 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
22547 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
22548 ATTRIBUTE_NORETURN
;
22551 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
22554 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
22555 gcc_unreachable ();
22558 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22559 reload helper functions. These were identified in
22560 rs6000_secondary_reload_memory, and if reload decided to use the secondary
22561 reload, it calls the insns:
22562 reload_<RELOAD:mode>_<P:mptrsize>_store
22563 reload_<RELOAD:mode>_<P:mptrsize>_load
22565 which in turn calls this function, to do whatever is necessary to create
22566 valid addresses. */
22569 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
22571 int regno
= true_regnum (reg
);
22572 machine_mode mode
= GET_MODE (reg
);
22573 addr_mask_type addr_mask
;
22576 rtx op_reg
, op0
, op1
;
22581 if (regno
< 0 || regno
>= FIRST_PSEUDO_REGISTER
|| !MEM_P (mem
)
22582 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
22583 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22585 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
22586 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
22588 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
22589 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
22591 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
22592 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
22595 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22597 /* Make sure the mode is valid in this register class. */
22598 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
22599 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22601 if (TARGET_DEBUG_ADDR
)
22602 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
22604 new_addr
= addr
= XEXP (mem
, 0);
22605 switch (GET_CODE (addr
))
22607 /* Does the register class support auto update forms for this mode? If
22608 not, do the update now. We don't need a scratch register, since the
22609 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
22612 op_reg
= XEXP (addr
, 0);
22613 if (!base_reg_operand (op_reg
, Pmode
))
22614 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22616 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
22618 emit_insn (gen_add2_insn (op_reg
, GEN_INT (GET_MODE_SIZE (mode
))));
22624 op0
= XEXP (addr
, 0);
22625 op1
= XEXP (addr
, 1);
22626 if (!base_reg_operand (op0
, Pmode
)
22627 || GET_CODE (op1
) != PLUS
22628 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
22629 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22631 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
22633 emit_insn (gen_rtx_SET (op0
, op1
));
22638 /* Do we need to simulate AND -16 to clear the bottom address bits used
22639 in VMX load/stores? */
22641 op0
= XEXP (addr
, 0);
22642 op1
= XEXP (addr
, 1);
22643 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
22645 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
22648 else if (GET_CODE (op1
) == PLUS
)
22650 emit_insn (gen_rtx_SET (scratch
, op1
));
22655 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22657 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
22658 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
22659 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
22660 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
22661 new_addr
= scratch
;
22665 /* If this is an indirect address, make sure it is a base register. */
22668 if (!base_reg_operand (addr
, GET_MODE (addr
)))
22670 emit_insn (gen_rtx_SET (scratch
, addr
));
22671 new_addr
= scratch
;
22675 /* If this is an indexed address, make sure the register class can handle
22676 indexed addresses for this mode. */
22678 op0
= XEXP (addr
, 0);
22679 op1
= XEXP (addr
, 1);
22680 if (!base_reg_operand (op0
, Pmode
))
22681 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22683 else if (int_reg_operand (op1
, Pmode
))
22685 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
22687 emit_insn (gen_rtx_SET (scratch
, addr
));
22688 new_addr
= scratch
;
22692 else if (mode_supports_vsx_dform_quad (mode
) && CONST_INT_P (op1
))
22694 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
22695 || !quad_address_p (addr
, mode
, false))
22697 emit_insn (gen_rtx_SET (scratch
, addr
));
22698 new_addr
= scratch
;
22702 /* Make sure the register class can handle offset addresses. */
22703 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
22705 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22707 emit_insn (gen_rtx_SET (scratch
, addr
));
22708 new_addr
= scratch
;
22713 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22718 op0
= XEXP (addr
, 0);
22719 op1
= XEXP (addr
, 1);
22720 if (!base_reg_operand (op0
, Pmode
))
22721 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22723 else if (int_reg_operand (op1
, Pmode
))
22725 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
22727 emit_insn (gen_rtx_SET (scratch
, addr
));
22728 new_addr
= scratch
;
22732 /* Quad offsets are restricted and can't handle normal addresses. */
22733 else if (mode_supports_vsx_dform_quad (mode
))
22735 emit_insn (gen_rtx_SET (scratch
, addr
));
22736 new_addr
= scratch
;
22739 /* Make sure the register class can handle offset addresses. */
22740 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
22742 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22744 emit_insn (gen_rtx_SET (scratch
, addr
));
22745 new_addr
= scratch
;
22750 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22757 rs6000_emit_move (scratch
, addr
, Pmode
);
22758 new_addr
= scratch
;
22762 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22765 /* Adjust the address if it changed. */
22766 if (addr
!= new_addr
)
22768 mem
= replace_equiv_address_nv (mem
, new_addr
);
22769 if (TARGET_DEBUG_ADDR
)
22770 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22773 /* Now create the move. */
22775 emit_insn (gen_rtx_SET (mem
, reg
));
22777 emit_insn (gen_rtx_SET (reg
, mem
));
22782 /* Convert reloads involving 64-bit gprs and misaligned offset
22783 addressing, or multiple 32-bit gprs and offsets that are too large,
22784 to use indirect addressing. */
22787 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
22789 int regno
= true_regnum (reg
);
22790 enum reg_class rclass
;
22792 rtx scratch_or_premodify
= scratch
;
22794 if (TARGET_DEBUG_ADDR
)
22796 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
22797 store_p
? "store" : "load");
22798 fprintf (stderr
, "reg:\n");
22800 fprintf (stderr
, "mem:\n");
22802 fprintf (stderr
, "scratch:\n");
22803 debug_rtx (scratch
);
22806 gcc_assert (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
);
22807 gcc_assert (GET_CODE (mem
) == MEM
);
22808 rclass
= REGNO_REG_CLASS (regno
);
22809 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
22810 addr
= XEXP (mem
, 0);
22812 if (GET_CODE (addr
) == PRE_MODIFY
)
22814 gcc_assert (REG_P (XEXP (addr
, 0))
22815 && GET_CODE (XEXP (addr
, 1)) == PLUS
22816 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
22817 scratch_or_premodify
= XEXP (addr
, 0);
22818 if (!HARD_REGISTER_P (scratch_or_premodify
))
22819 /* If we have a pseudo here then reload will have arranged
22820 to have it replaced, but only in the original insn.
22821 Use the replacement here too. */
22822 scratch_or_premodify
= find_replacement (&XEXP (addr
, 0));
22824 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22825 expressions from the original insn, without unsharing them.
22826 Any RTL that points into the original insn will of course
22827 have register replacements applied. That is why we don't
22828 need to look for replacements under the PLUS. */
22829 addr
= XEXP (addr
, 1);
22831 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
22833 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
22835 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
22837 /* Now create the move. */
22839 emit_insn (gen_rtx_SET (mem
, reg
));
22841 emit_insn (gen_rtx_SET (reg
, mem
));
22846 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22847 this function has any SDmode references. If we are on a power7 or later, we
22848 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22849 can load/store the value. */
22852 rs6000_alloc_sdmode_stack_slot (void)
22856 gimple_stmt_iterator gsi
;
22858 gcc_assert (cfun
->machine
->sdmode_stack_slot
== NULL_RTX
);
22859 /* We use a different approach for dealing with the secondary
22864 if (TARGET_NO_SDMODE_STACK
)
22867 FOR_EACH_BB_FN (bb
, cfun
)
22868 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
22870 tree ret
= walk_gimple_op (gsi_stmt (gsi
), rs6000_check_sdmode
, NULL
);
22873 rtx stack
= assign_stack_local (DDmode
, GET_MODE_SIZE (DDmode
), 0);
22874 cfun
->machine
->sdmode_stack_slot
= adjust_address_nv (stack
,
22880 /* Check for any SDmode parameters of the function. */
22881 for (t
= DECL_ARGUMENTS (cfun
->decl
); t
; t
= DECL_CHAIN (t
))
22883 if (TREE_TYPE (t
) == error_mark_node
)
22886 if (TYPE_MODE (TREE_TYPE (t
)) == SDmode
22887 || TYPE_MODE (DECL_ARG_TYPE (t
)) == SDmode
)
22889 rtx stack
= assign_stack_local (DDmode
, GET_MODE_SIZE (DDmode
), 0);
22890 cfun
->machine
->sdmode_stack_slot
= adjust_address_nv (stack
,
22898 rs6000_instantiate_decls (void)
22900 if (cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
)
22901 instantiate_decl_rtl (cfun
->machine
->sdmode_stack_slot
);
22904 /* Given an rtx X being reloaded into a reg required to be
22905 in class CLASS, return the class of reg to actually use.
22906 In general this is just CLASS; but on some machines
22907 in some cases it is preferable to use a more restrictive class.
22909 On the RS/6000, we have to return NO_REGS when we want to reload a
22910 floating-point CONST_DOUBLE to force it to be copied to memory.
22912 We also don't want to reload integer values into floating-point
22913 registers if we can at all help it. In fact, this can
22914 cause reload to die, if it tries to generate a reload of CTR
22915 into a FP register and discovers it doesn't have the memory location
22918 ??? Would it be a good idea to have reload do the converse, that is
22919 try to reload floating modes into FP registers if possible?
22922 static enum reg_class
22923 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
22925 machine_mode mode
= GET_MODE (x
);
22926 bool is_constant
= CONSTANT_P (x
);
22928 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
22929 reload class for it. */
22930 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22931 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
22934 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
22935 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
22938 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
22939 the reloading of address expressions using PLUS into floating point
22941 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
22945 /* Zero is always allowed in all VSX registers. */
22946 if (x
== CONST0_RTX (mode
))
22949 /* If this is a vector constant that can be formed with a few Altivec
22950 instructions, we want altivec registers. */
22951 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
22952 return ALTIVEC_REGS
;
22954 /* If this is an integer constant that can easily be loaded into
22955 vector registers, allow it. */
22956 if (CONST_INT_P (x
))
22958 HOST_WIDE_INT value
= INTVAL (x
);
22960 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
22961 2.06 can generate it in the Altivec registers with
22965 if (TARGET_P8_VECTOR
)
22967 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22968 return ALTIVEC_REGS
;
22973 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
22974 a sign extend in the Altivec registers. */
22975 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
22976 && TARGET_VSX_SMALL_INTEGER
22977 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
22978 return ALTIVEC_REGS
;
22981 /* Force constant to memory. */
22985 /* D-form addressing can easily reload the value. */
22986 if (mode_supports_vmx_dform (mode
)
22987 || mode_supports_vsx_dform_quad (mode
))
22990 /* If this is a scalar floating point value and we don't have D-form
22991 addressing, prefer the traditional floating point registers so that we
22992 can use D-form (register+offset) addressing. */
22993 if (rclass
== VSX_REGS
22994 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
22997 /* Prefer the Altivec registers if Altivec is handling the vector
22998 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
23000 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
23001 || mode
== V1TImode
)
23002 return ALTIVEC_REGS
;
23007 if (is_constant
|| GET_CODE (x
) == PLUS
)
23009 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
23010 return GENERAL_REGS
;
23011 if (reg_class_subset_p (BASE_REGS
, rclass
))
23016 if (GET_MODE_CLASS (mode
) == MODE_INT
&& rclass
== NON_SPECIAL_REGS
)
23017 return GENERAL_REGS
;
23022 /* Debug version of rs6000_preferred_reload_class. */
23023 static enum reg_class
23024 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
23026 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
23029 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
23031 reg_class_names
[ret
], reg_class_names
[rclass
],
23032 GET_MODE_NAME (GET_MODE (x
)));
23038 /* If we are copying between FP or AltiVec registers and anything else, we need
23039 a memory location. The exception is when we are targeting ppc64 and the
23040 move to/from fpr to gpr instructions are available. Also, under VSX, you
23041 can copy vector registers from the FP register set to the Altivec register
23042 set and vice versa. */
23045 rs6000_secondary_memory_needed (enum reg_class from_class
,
23046 enum reg_class to_class
,
23049 enum rs6000_reg_type from_type
, to_type
;
23050 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
23051 || (to_class
== ALTIVEC_REGS
));
23053 /* If a simple/direct move is available, we don't need secondary memory */
23054 from_type
= reg_class_to_reg_type
[(int)from_class
];
23055 to_type
= reg_class_to_reg_type
[(int)to_class
];
23057 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
23058 (secondary_reload_info
*)0, altivec_p
))
23061 /* If we have a floating point or vector register class, we need to use
23062 memory to transfer the data. */
23063 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
23069 /* Debug version of rs6000_secondary_memory_needed. */
23071 rs6000_debug_secondary_memory_needed (enum reg_class from_class
,
23072 enum reg_class to_class
,
23075 bool ret
= rs6000_secondary_memory_needed (from_class
, to_class
, mode
);
23078 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
23079 "to_class = %s, mode = %s\n",
23080 ret
? "true" : "false",
23081 reg_class_names
[from_class
],
23082 reg_class_names
[to_class
],
23083 GET_MODE_NAME (mode
));
23088 /* Return the register class of a scratch register needed to copy IN into
23089 or out of a register in RCLASS in MODE. If it can be done directly,
23090 NO_REGS is returned. */
23092 static enum reg_class
23093 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
23098 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
23100 && MACHOPIC_INDIRECT
23104 /* We cannot copy a symbolic operand directly into anything
23105 other than BASE_REGS for TARGET_ELF. So indicate that a
23106 register from BASE_REGS is needed as an intermediate
23109 On Darwin, pic addresses require a load from memory, which
23110 needs a base register. */
23111 if (rclass
!= BASE_REGS
23112 && (GET_CODE (in
) == SYMBOL_REF
23113 || GET_CODE (in
) == HIGH
23114 || GET_CODE (in
) == LABEL_REF
23115 || GET_CODE (in
) == CONST
))
23119 if (GET_CODE (in
) == REG
)
23121 regno
= REGNO (in
);
23122 if (regno
>= FIRST_PSEUDO_REGISTER
)
23124 regno
= true_regnum (in
);
23125 if (regno
>= FIRST_PSEUDO_REGISTER
)
23129 else if (GET_CODE (in
) == SUBREG
)
23131 regno
= true_regnum (in
);
23132 if (regno
>= FIRST_PSEUDO_REGISTER
)
23138 /* If we have VSX register moves, prefer moving scalar values between
23139 Altivec registers and GPR by going via an FPR (and then via memory)
23140 instead of reloading the secondary memory address for Altivec moves. */
23142 && GET_MODE_SIZE (mode
) < 16
23143 && !mode_supports_vmx_dform (mode
)
23144 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
23145 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
23146 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
23147 && (regno
>= 0 && INT_REGNO_P (regno
)))))
23150 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
23152 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
23153 || (regno
>= 0 && INT_REGNO_P (regno
)))
23156 /* Constants, memory, and VSX registers can go into VSX registers (both the
23157 traditional floating point and the altivec registers). */
23158 if (rclass
== VSX_REGS
23159 && (regno
== -1 || VSX_REGNO_P (regno
)))
23162 /* Constants, memory, and FP registers can go into FP registers. */
23163 if ((regno
== -1 || FP_REGNO_P (regno
))
23164 && (rclass
== FLOAT_REGS
|| rclass
== NON_SPECIAL_REGS
))
23165 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
23167 /* Memory, and AltiVec registers can go into AltiVec registers. */
23168 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
23169 && rclass
== ALTIVEC_REGS
)
23172 /* We can copy among the CR registers. */
23173 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
23174 && regno
>= 0 && CR_REGNO_P (regno
))
23177 /* Otherwise, we need GENERAL_REGS. */
23178 return GENERAL_REGS
;
23181 /* Debug version of rs6000_secondary_reload_class. */
23182 static enum reg_class
23183 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
23184 machine_mode mode
, rtx in
)
23186 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
23188 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
23189 "mode = %s, input rtx:\n",
23190 reg_class_names
[ret
], reg_class_names
[rclass
],
23191 GET_MODE_NAME (mode
));
23197 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
23200 rs6000_cannot_change_mode_class (machine_mode from
,
23202 enum reg_class rclass
)
23204 unsigned from_size
= GET_MODE_SIZE (from
);
23205 unsigned to_size
= GET_MODE_SIZE (to
);
23207 if (from_size
!= to_size
)
23209 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
23211 if (reg_classes_intersect_p (xclass
, rclass
))
23213 unsigned to_nregs
= hard_regno_nregs
[FIRST_FPR_REGNO
][to
];
23214 unsigned from_nregs
= hard_regno_nregs
[FIRST_FPR_REGNO
][from
];
23215 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
23216 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
23218 /* Don't allow 64-bit types to overlap with 128-bit types that take a
23219 single register under VSX because the scalar part of the register
23220 is in the upper 64-bits, and not the lower 64-bits. Types like
23221 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
23222 IEEE floating point can't overlap, and neither can small
23225 if (to_float128_vector_p
&& from_float128_vector_p
)
23228 else if (to_float128_vector_p
|| from_float128_vector_p
)
23231 /* TDmode in floating-mode registers must always go into a register
23232 pair with the most significant word in the even-numbered register
23233 to match ISA requirements. In little-endian mode, this does not
23234 match subreg numbering, so we cannot allow subregs. */
23235 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
23238 if (from_size
< 8 || to_size
< 8)
23241 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
23244 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
23253 if (TARGET_E500_DOUBLE
23254 && ((((to
) == DFmode
) + ((from
) == DFmode
)) == 1
23255 || (((to
) == TFmode
) + ((from
) == TFmode
)) == 1
23256 || (((to
) == IFmode
) + ((from
) == IFmode
)) == 1
23257 || (((to
) == KFmode
) + ((from
) == KFmode
)) == 1
23258 || (((to
) == DDmode
) + ((from
) == DDmode
)) == 1
23259 || (((to
) == TDmode
) + ((from
) == TDmode
)) == 1
23260 || (((to
) == DImode
) + ((from
) == DImode
)) == 1))
23263 /* Since the VSX register set includes traditional floating point registers
23264 and altivec registers, just check for the size being different instead of
23265 trying to check whether the modes are vector modes. Otherwise it won't
23266 allow say DF and DI to change classes. For types like TFmode and TDmode
23267 that take 2 64-bit registers, rather than a single 128-bit register, don't
23268 allow subregs of those types to other 128 bit types. */
23269 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
23271 unsigned num_regs
= (from_size
+ 15) / 16;
23272 if (hard_regno_nregs
[FIRST_FPR_REGNO
][to
] > num_regs
23273 || hard_regno_nregs
[FIRST_FPR_REGNO
][from
] > num_regs
)
23276 return (from_size
!= 8 && from_size
!= 16);
23279 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
23280 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
23283 if (TARGET_SPE
&& (SPE_VECTOR_MODE (from
) + SPE_VECTOR_MODE (to
)) == 1
23284 && reg_classes_intersect_p (GENERAL_REGS
, rclass
))
23290 /* Debug version of rs6000_cannot_change_mode_class. */
23292 rs6000_debug_cannot_change_mode_class (machine_mode from
,
23294 enum reg_class rclass
)
23296 bool ret
= rs6000_cannot_change_mode_class (from
, to
, rclass
);
23299 "rs6000_cannot_change_mode_class, return %s, from = %s, "
23300 "to = %s, rclass = %s\n",
23301 ret
? "true" : "false",
23302 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
23303 reg_class_names
[rclass
]);
23308 /* Return a string to do a move operation of 128 bits of data. */
23311 rs6000_output_move_128bit (rtx operands
[])
23313 rtx dest
= operands
[0];
23314 rtx src
= operands
[1];
23315 machine_mode mode
= GET_MODE (dest
);
23318 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
23319 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
23323 dest_regno
= REGNO (dest
);
23324 dest_gpr_p
= INT_REGNO_P (dest_regno
);
23325 dest_fp_p
= FP_REGNO_P (dest_regno
);
23326 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
23327 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
23332 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
23337 src_regno
= REGNO (src
);
23338 src_gpr_p
= INT_REGNO_P (src_regno
);
23339 src_fp_p
= FP_REGNO_P (src_regno
);
23340 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
23341 src_vsx_p
= src_fp_p
| src_vmx_p
;
23346 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
23349 /* Register moves. */
23350 if (dest_regno
>= 0 && src_regno
>= 0)
23357 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
23358 return (WORDS_BIG_ENDIAN
23359 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23360 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23362 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
23366 else if (TARGET_VSX
&& dest_vsx_p
)
23369 return "xxlor %x0,%x1,%x1";
23371 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
23372 return (WORDS_BIG_ENDIAN
23373 ? "mtvsrdd %x0,%1,%L1"
23374 : "mtvsrdd %x0,%L1,%1");
23376 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
23380 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
23381 return "vor %0,%1,%1";
23383 else if (dest_fp_p
&& src_fp_p
)
23388 else if (dest_regno
>= 0 && MEM_P (src
))
23392 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
23398 else if (TARGET_ALTIVEC
&& dest_vmx_p
23399 && altivec_indexed_or_indirect_operand (src
, mode
))
23400 return "lvx %0,%y1";
23402 else if (TARGET_VSX
&& dest_vsx_p
)
23404 if (mode_supports_vsx_dform_quad (mode
)
23405 && quad_address_p (XEXP (src
, 0), mode
, true))
23406 return "lxv %x0,%1";
23408 else if (TARGET_P9_VECTOR
)
23409 return "lxvx %x0,%y1";
23411 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
23412 return "lxvw4x %x0,%y1";
23415 return "lxvd2x %x0,%y1";
23418 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
23419 return "lvx %0,%y1";
23421 else if (dest_fp_p
)
23426 else if (src_regno
>= 0 && MEM_P (dest
))
23430 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
23431 return "stq %1,%0";
23436 else if (TARGET_ALTIVEC
&& src_vmx_p
23437 && altivec_indexed_or_indirect_operand (src
, mode
))
23438 return "stvx %1,%y0";
23440 else if (TARGET_VSX
&& src_vsx_p
)
23442 if (mode_supports_vsx_dform_quad (mode
)
23443 && quad_address_p (XEXP (dest
, 0), mode
, true))
23444 return "stxv %x1,%0";
23446 else if (TARGET_P9_VECTOR
)
23447 return "stxvx %x1,%y0";
23449 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
23450 return "stxvw4x %x1,%y0";
23453 return "stxvd2x %x1,%y0";
23456 else if (TARGET_ALTIVEC
&& src_vmx_p
)
23457 return "stvx %1,%y0";
23464 else if (dest_regno
>= 0
23465 && (GET_CODE (src
) == CONST_INT
23466 || GET_CODE (src
) == CONST_WIDE_INT
23467 || GET_CODE (src
) == CONST_DOUBLE
23468 || GET_CODE (src
) == CONST_VECTOR
))
23473 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
23474 || (dest_vsx_p
&& TARGET_VSX
))
23475 return output_vec_const_move (operands
);
23478 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
23481 /* Validate a 128-bit move. */
23483 rs6000_move_128bit_ok_p (rtx operands
[])
23485 machine_mode mode
= GET_MODE (operands
[0]);
23486 return (gpc_reg_operand (operands
[0], mode
)
23487 || gpc_reg_operand (operands
[1], mode
));
23490 /* Return true if a 128-bit move needs to be split. */
23492 rs6000_split_128bit_ok_p (rtx operands
[])
23494 if (!reload_completed
)
23497 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
23500 if (quad_load_store_p (operands
[0], operands
[1]))
23507 /* Given a comparison operation, return the bit number in CCR to test. We
23508 know this is a valid comparison.
23510 SCC_P is 1 if this is for an scc. That means that %D will have been
23511 used instead of %C, so the bits will be in different places.
23513 Return -1 if OP isn't a valid comparison for some reason. */
23516 ccr_bit (rtx op
, int scc_p
)
23518 enum rtx_code code
= GET_CODE (op
);
23519 machine_mode cc_mode
;
23524 if (!COMPARISON_P (op
))
23527 reg
= XEXP (op
, 0);
23529 gcc_assert (GET_CODE (reg
) == REG
&& CR_REGNO_P (REGNO (reg
)));
23531 cc_mode
= GET_MODE (reg
);
23532 cc_regnum
= REGNO (reg
);
23533 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
23535 validate_condition_mode (code
, cc_mode
);
23537 /* When generating a sCOND operation, only positive conditions are
23540 || code
== EQ
|| code
== GT
|| code
== LT
|| code
== UNORDERED
23541 || code
== GTU
|| code
== LTU
);
23546 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
23548 return base_bit
+ 2;
23549 case GT
: case GTU
: case UNLE
:
23550 return base_bit
+ 1;
23551 case LT
: case LTU
: case UNGE
:
23553 case ORDERED
: case UNORDERED
:
23554 return base_bit
+ 3;
23557 /* If scc, we will have done a cror to put the bit in the
23558 unordered position. So test that bit. For integer, this is ! LT
23559 unless this is an scc insn. */
23560 return scc_p
? base_bit
+ 3 : base_bit
;
23563 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
23566 gcc_unreachable ();
23570 /* Return the GOT register. */
23573 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
23575 /* The second flow pass currently (June 1999) can't update
23576 regs_ever_live without disturbing other parts of the compiler, so
23577 update it here to make the prolog/epilogue code happy. */
23578 if (!can_create_pseudo_p ()
23579 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
23580 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
23582 crtl
->uses_pic_offset_table
= 1;
23584 return pic_offset_table_rtx
;
23587 static rs6000_stack_t stack_info
;
23589 /* Function to init struct machine_function.
23590 This will be called, via a pointer variable,
23591 from push_function_context. */
23593 static struct machine_function
*
23594 rs6000_init_machine_status (void)
23596 stack_info
.reload_completed
= 0;
23597 return ggc_cleared_alloc
<machine_function
> ();
23600 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23602 /* Write out a function code label. */
23605 rs6000_output_function_entry (FILE *file
, const char *fname
)
23607 if (fname
[0] != '.')
23609 switch (DEFAULT_ABI
)
23612 gcc_unreachable ();
23618 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
23628 RS6000_OUTPUT_BASENAME (file
, fname
);
23631 /* Print an operand. Recognize special options, documented below. */
23634 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23635 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23637 #define SMALL_DATA_RELOC "sda21"
23638 #define SMALL_DATA_REG 0
23642 print_operand (FILE *file
, rtx x
, int code
)
23645 unsigned HOST_WIDE_INT uval
;
23649 /* %a is output_address. */
23651 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23655 /* Like 'J' but get to the GT bit only. */
23656 gcc_assert (REG_P (x
));
23658 /* Bit 1 is GT bit. */
23659 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
23661 /* Add one for shift count in rlinm for scc. */
23662 fprintf (file
, "%d", i
+ 1);
23666 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
23669 output_operand_lossage ("invalid %%e value");
23674 if ((uval
& 0xffff) == 0 && uval
!= 0)
23679 /* X is a CR register. Print the number of the EQ bit of the CR */
23680 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23681 output_operand_lossage ("invalid %%E value");
23683 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
23687 /* X is a CR register. Print the shift count needed to move it
23688 to the high-order four bits. */
23689 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23690 output_operand_lossage ("invalid %%f value");
23692 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
23696 /* Similar, but print the count for the rotate in the opposite
23698 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23699 output_operand_lossage ("invalid %%F value");
23701 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
23705 /* X is a constant integer. If it is negative, print "m",
23706 otherwise print "z". This is to make an aze or ame insn. */
23707 if (GET_CODE (x
) != CONST_INT
)
23708 output_operand_lossage ("invalid %%G value");
23709 else if (INTVAL (x
) >= 0)
23716 /* If constant, output low-order five bits. Otherwise, write
23719 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
23721 print_operand (file
, x
, 0);
23725 /* If constant, output low-order six bits. Otherwise, write
23728 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
23730 print_operand (file
, x
, 0);
23734 /* Print `i' if this is a constant, else nothing. */
23740 /* Write the bit number in CCR for jump. */
23741 i
= ccr_bit (x
, 0);
23743 output_operand_lossage ("invalid %%j code");
23745 fprintf (file
, "%d", i
);
23749 /* Similar, but add one for shift count in rlinm for scc and pass
23750 scc flag to `ccr_bit'. */
23751 i
= ccr_bit (x
, 1);
23753 output_operand_lossage ("invalid %%J code");
23755 /* If we want bit 31, write a shift count of zero, not 32. */
23756 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
23760 /* X must be a constant. Write the 1's complement of the
23763 output_operand_lossage ("invalid %%k value");
23765 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
23769 /* X must be a symbolic constant on ELF. Write an
23770 expression suitable for an 'addi' that adds in the low 16
23771 bits of the MEM. */
23772 if (GET_CODE (x
) == CONST
)
23774 if (GET_CODE (XEXP (x
, 0)) != PLUS
23775 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) != SYMBOL_REF
23776 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
23777 || GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
23778 output_operand_lossage ("invalid %%K value");
23780 print_operand_address (file
, x
);
23781 fputs ("@l", file
);
23784 /* %l is output_asm_label. */
23787 /* Write second word of DImode or DFmode reference. Works on register
23788 or non-indexed memory only. */
23790 fputs (reg_names
[REGNO (x
) + 1], file
);
23791 else if (MEM_P (x
))
23793 machine_mode mode
= GET_MODE (x
);
23794 /* Handle possible auto-increment. Since it is pre-increment and
23795 we have already done it, we can just use an offset of word. */
23796 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
23797 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
23798 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
23800 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
23801 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
23804 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
23808 if (small_data_operand (x
, GET_MODE (x
)))
23809 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
23810 reg_names
[SMALL_DATA_REG
]);
23815 /* Write the number of elements in the vector times 4. */
23816 if (GET_CODE (x
) != PARALLEL
)
23817 output_operand_lossage ("invalid %%N value");
23819 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
23823 /* Similar, but subtract 1 first. */
23824 if (GET_CODE (x
) != PARALLEL
)
23825 output_operand_lossage ("invalid %%O value");
23827 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
23831 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23834 || (i
= exact_log2 (INTVAL (x
))) < 0)
23835 output_operand_lossage ("invalid %%p value");
23837 fprintf (file
, "%d", i
);
23841 /* The operand must be an indirect memory reference. The result
23842 is the register name. */
23843 if (GET_CODE (x
) != MEM
|| GET_CODE (XEXP (x
, 0)) != REG
23844 || REGNO (XEXP (x
, 0)) >= 32)
23845 output_operand_lossage ("invalid %%P value");
23847 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
23851 /* This outputs the logical code corresponding to a boolean
23852 expression. The expression may have one or both operands
23853 negated (if one, only the first one). For condition register
23854 logical operations, it will also treat the negated
23855 CR codes as NOTs, but not handle NOTs of them. */
23857 const char *const *t
= 0;
23859 enum rtx_code code
= GET_CODE (x
);
23860 static const char * const tbl
[3][3] = {
23861 { "and", "andc", "nor" },
23862 { "or", "orc", "nand" },
23863 { "xor", "eqv", "xor" } };
23867 else if (code
== IOR
)
23869 else if (code
== XOR
)
23872 output_operand_lossage ("invalid %%q value");
23874 if (GET_CODE (XEXP (x
, 0)) != NOT
)
23878 if (GET_CODE (XEXP (x
, 1)) == NOT
)
23889 if (! TARGET_MFCRF
)
23895 /* X is a CR register. Print the mask for `mtcrf'. */
23896 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23897 output_operand_lossage ("invalid %%R value");
23899 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
23903 /* Low 5 bits of 32 - value */
23905 output_operand_lossage ("invalid %%s value");
23907 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
23911 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
23912 gcc_assert (REG_P (x
) && GET_MODE (x
) == CCmode
);
23914 /* Bit 3 is OV bit. */
23915 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
23917 /* If we want bit 31, write a shift count of zero, not 32. */
23918 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
23922 /* Print the symbolic name of a branch target register. */
23923 if (GET_CODE (x
) != REG
|| (REGNO (x
) != LR_REGNO
23924 && REGNO (x
) != CTR_REGNO
))
23925 output_operand_lossage ("invalid %%T value");
23926 else if (REGNO (x
) == LR_REGNO
)
23927 fputs ("lr", file
);
23929 fputs ("ctr", file
);
23933 /* High-order or low-order 16 bits of constant, whichever is non-zero,
23934 for use in unsigned operand. */
23937 output_operand_lossage ("invalid %%u value");
23942 if ((uval
& 0xffff) == 0)
23945 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
23949 /* High-order 16 bits of constant for use in signed operand. */
23951 output_operand_lossage ("invalid %%v value");
23953 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
23954 (INTVAL (x
) >> 16) & 0xffff);
23958 /* Print `u' if this has an auto-increment or auto-decrement. */
23960 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
23961 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
23962 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
23967 /* Print the trap code for this operand. */
23968 switch (GET_CODE (x
))
23971 fputs ("eq", file
); /* 4 */
23974 fputs ("ne", file
); /* 24 */
23977 fputs ("lt", file
); /* 16 */
23980 fputs ("le", file
); /* 20 */
23983 fputs ("gt", file
); /* 8 */
23986 fputs ("ge", file
); /* 12 */
23989 fputs ("llt", file
); /* 2 */
23992 fputs ("lle", file
); /* 6 */
23995 fputs ("lgt", file
); /* 1 */
23998 fputs ("lge", file
); /* 5 */
24001 gcc_unreachable ();
24006 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
24009 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
24010 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
24012 print_operand (file
, x
, 0);
24016 /* X is a FPR or Altivec register used in a VSX context. */
24017 if (GET_CODE (x
) != REG
|| !VSX_REGNO_P (REGNO (x
)))
24018 output_operand_lossage ("invalid %%x value");
24021 int reg
= REGNO (x
);
24022 int vsx_reg
= (FP_REGNO_P (reg
)
24024 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
24026 #ifdef TARGET_REGNAMES
24027 if (TARGET_REGNAMES
)
24028 fprintf (file
, "%%vs%d", vsx_reg
);
24031 fprintf (file
, "%d", vsx_reg
);
24037 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
24038 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
24039 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
24044 /* Like 'L', for third word of TImode/PTImode */
24046 fputs (reg_names
[REGNO (x
) + 2], file
);
24047 else if (MEM_P (x
))
24049 machine_mode mode
= GET_MODE (x
);
24050 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
24051 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
24052 output_address (mode
, plus_constant (Pmode
,
24053 XEXP (XEXP (x
, 0), 0), 8));
24054 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
24055 output_address (mode
, plus_constant (Pmode
,
24056 XEXP (XEXP (x
, 0), 0), 8));
24058 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
24059 if (small_data_operand (x
, GET_MODE (x
)))
24060 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
24061 reg_names
[SMALL_DATA_REG
]);
24066 /* X is a SYMBOL_REF. Write out the name preceded by a
24067 period and without any trailing data in brackets. Used for function
24068 names. If we are configured for System V (or the embedded ABI) on
24069 the PowerPC, do not emit the period, since those systems do not use
24070 TOCs and the like. */
24071 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
24073 /* For macho, check to see if we need a stub. */
24076 const char *name
= XSTR (x
, 0);
24078 if (darwin_emit_branch_islands
24079 && MACHOPIC_INDIRECT
24080 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
24081 name
= machopic_indirection_name (x
, /*stub_p=*/true);
24083 assemble_name (file
, name
);
24085 else if (!DOT_SYMBOLS
)
24086 assemble_name (file
, XSTR (x
, 0));
24088 rs6000_output_function_entry (file
, XSTR (x
, 0));
24092 /* Like 'L', for last word of TImode/PTImode. */
24094 fputs (reg_names
[REGNO (x
) + 3], file
);
24095 else if (MEM_P (x
))
24097 machine_mode mode
= GET_MODE (x
);
24098 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
24099 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
24100 output_address (mode
, plus_constant (Pmode
,
24101 XEXP (XEXP (x
, 0), 0), 12));
24102 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
24103 output_address (mode
, plus_constant (Pmode
,
24104 XEXP (XEXP (x
, 0), 0), 12));
24106 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
24107 if (small_data_operand (x
, GET_MODE (x
)))
24108 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
24109 reg_names
[SMALL_DATA_REG
]);
24113 /* Print AltiVec or SPE memory operand. */
24118 gcc_assert (MEM_P (x
));
24122 /* Ugly hack because %y is overloaded. */
24123 if ((TARGET_SPE
|| TARGET_E500_DOUBLE
)
24124 && (GET_MODE_SIZE (GET_MODE (x
)) == 8
24125 || FLOAT128_2REG_P (GET_MODE (x
))
24126 || GET_MODE (x
) == TImode
24127 || GET_MODE (x
) == PTImode
))
24129 /* Handle [reg]. */
24132 fprintf (file
, "0(%s)", reg_names
[REGNO (tmp
)]);
24135 /* Handle [reg+UIMM]. */
24136 else if (GET_CODE (tmp
) == PLUS
&&
24137 GET_CODE (XEXP (tmp
, 1)) == CONST_INT
)
24141 gcc_assert (REG_P (XEXP (tmp
, 0)));
24143 x
= INTVAL (XEXP (tmp
, 1));
24144 fprintf (file
, "%d(%s)", x
, reg_names
[REGNO (XEXP (tmp
, 0))]);
24148 /* Fall through. Must be [reg+reg]. */
24150 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x
))
24151 && GET_CODE (tmp
) == AND
24152 && GET_CODE (XEXP (tmp
, 1)) == CONST_INT
24153 && INTVAL (XEXP (tmp
, 1)) == -16)
24154 tmp
= XEXP (tmp
, 0);
24155 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
24156 && GET_CODE (tmp
) == PRE_MODIFY
)
24157 tmp
= XEXP (tmp
, 1);
24159 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
24162 if (GET_CODE (tmp
) != PLUS
24163 || !REG_P (XEXP (tmp
, 0))
24164 || !REG_P (XEXP (tmp
, 1)))
24166 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
24170 if (REGNO (XEXP (tmp
, 0)) == 0)
24171 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
24172 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
24174 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
24175 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
24182 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
24183 else if (MEM_P (x
))
24185 /* We need to handle PRE_INC and PRE_DEC here, since we need to
24186 know the width from the mode. */
24187 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
24188 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
24189 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
24190 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
24191 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
24192 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
24193 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
24194 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
24196 output_address (GET_MODE (x
), XEXP (x
, 0));
24200 if (toc_relative_expr_p (x
, false))
24201 /* This hack along with a corresponding hack in
24202 rs6000_output_addr_const_extra arranges to output addends
24203 where the assembler expects to find them. eg.
24204 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
24205 without this hack would be output as "x@toc+4". We
24207 output_addr_const (file
, CONST_CAST_RTX (tocrel_base
));
24209 output_addr_const (file
, x
);
24214 if (const char *name
= get_some_local_dynamic_name ())
24215 assemble_name (file
, name
);
24217 output_operand_lossage ("'%%&' used without any "
24218 "local dynamic TLS references");
24222 output_operand_lossage ("invalid %%xn code");
24226 /* Print the address of an operand. */
24229 print_operand_address (FILE *file
, rtx x
)
24232 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
24233 else if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
24234 || GET_CODE (x
) == LABEL_REF
)
24236 output_addr_const (file
, x
);
24237 if (small_data_operand (x
, GET_MODE (x
)))
24238 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
24239 reg_names
[SMALL_DATA_REG
]);
24241 gcc_assert (!TARGET_TOC
);
24243 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
24244 && REG_P (XEXP (x
, 1)))
24246 if (REGNO (XEXP (x
, 0)) == 0)
24247 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
24248 reg_names
[ REGNO (XEXP (x
, 0)) ]);
24250 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
24251 reg_names
[ REGNO (XEXP (x
, 1)) ]);
24253 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
24254 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
24255 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
24256 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
24258 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
24259 && CONSTANT_P (XEXP (x
, 1)))
24261 fprintf (file
, "lo16(");
24262 output_addr_const (file
, XEXP (x
, 1));
24263 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
24267 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
24268 && CONSTANT_P (XEXP (x
, 1)))
24270 output_addr_const (file
, XEXP (x
, 1));
24271 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
24274 else if (toc_relative_expr_p (x
, false))
24276 /* This hack along with a corresponding hack in
24277 rs6000_output_addr_const_extra arranges to output addends
24278 where the assembler expects to find them. eg.
24280 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24281 without this hack would be output as "x@toc+8@l(9)". We
24282 want "x+8@toc@l(9)". */
24283 output_addr_const (file
, CONST_CAST_RTX (tocrel_base
));
24284 if (GET_CODE (x
) == LO_SUM
)
24285 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
24287 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base
, 0, 1))]);
24290 gcc_unreachable ();
24293 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
24296 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
24298 if (GET_CODE (x
) == UNSPEC
)
24299 switch (XINT (x
, 1))
24301 case UNSPEC_TOCREL
:
24302 gcc_checking_assert (GET_CODE (XVECEXP (x
, 0, 0)) == SYMBOL_REF
24303 && REG_P (XVECEXP (x
, 0, 1))
24304 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
24305 output_addr_const (file
, XVECEXP (x
, 0, 0));
24306 if (x
== tocrel_base
&& tocrel_offset
!= const0_rtx
)
24308 if (INTVAL (tocrel_offset
) >= 0)
24309 fprintf (file
, "+");
24310 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset
));
24312 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
24315 assemble_name (file
, toc_label_name
);
24318 else if (TARGET_ELF
)
24319 fputs ("@toc", file
);
24323 case UNSPEC_MACHOPIC_OFFSET
:
24324 output_addr_const (file
, XVECEXP (x
, 0, 0));
24326 machopic_output_function_base_name (file
);
24333 /* Target hook for assembling integer objects. The PowerPC version has
24334 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24335 is defined. It also needs to handle DI-mode objects on 64-bit
24339 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24341 #ifdef RELOCATABLE_NEEDS_FIXUP
24342 /* Special handling for SI values. */
24343 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
24345 static int recurse
= 0;
24347 /* For -mrelocatable, we mark all addresses that need to be fixed up in
24348 the .fixup section. Since the TOC section is already relocated, we
24349 don't need to mark it here. We used to skip the text section, but it
24350 should never be valid for relocated addresses to be placed in the text
24352 if (DEFAULT_ABI
== ABI_V4
24353 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
24354 && in_section
!= toc_section
24356 && !CONST_SCALAR_INT_P (x
)
24362 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
24364 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
24365 fprintf (asm_out_file
, "\t.long\t(");
24366 output_addr_const (asm_out_file
, x
);
24367 fprintf (asm_out_file
, ")@fixup\n");
24368 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
24369 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
24370 fprintf (asm_out_file
, "\t.long\t");
24371 assemble_name (asm_out_file
, buf
);
24372 fprintf (asm_out_file
, "\n\t.previous\n");
24376 /* Remove initial .'s to turn a -mcall-aixdesc function
24377 address into the address of the descriptor, not the function
24379 else if (GET_CODE (x
) == SYMBOL_REF
24380 && XSTR (x
, 0)[0] == '.'
24381 && DEFAULT_ABI
== ABI_AIX
)
24383 const char *name
= XSTR (x
, 0);
24384 while (*name
== '.')
24387 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
24391 #endif /* RELOCATABLE_NEEDS_FIXUP */
24392 return default_assemble_integer (x
, size
, aligned_p
);
24395 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24396 /* Emit an assembler directive to set symbol visibility for DECL to
24397 VISIBILITY_TYPE. */
24400 rs6000_assemble_visibility (tree decl
, int vis
)
24405 /* Functions need to have their entry point symbol visibility set as
24406 well as their descriptor symbol visibility. */
24407 if (DEFAULT_ABI
== ABI_AIX
24409 && TREE_CODE (decl
) == FUNCTION_DECL
)
24411 static const char * const visibility_types
[] = {
24412 NULL
, "protected", "hidden", "internal"
24415 const char *name
, *type
;
24417 name
= ((* targetm
.strip_name_encoding
)
24418 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
24419 type
= visibility_types
[vis
];
24421 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
24422 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
24425 default_assemble_visibility (decl
, vis
);
24430 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
24432 /* Reversal of FP compares takes care -- an ordered compare
24433 becomes an unordered compare and vice versa. */
24434 if (mode
== CCFPmode
24435 && (!flag_finite_math_only
24436 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
24437 || code
== UNEQ
|| code
== LTGT
))
24438 return reverse_condition_maybe_unordered (code
);
24440 return reverse_condition (code
);
24443 /* Generate a compare for CODE. Return a brand-new rtx that
24444 represents the result of the compare. */
24447 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
24449 machine_mode comp_mode
;
24450 rtx compare_result
;
24451 enum rtx_code code
= GET_CODE (cmp
);
24452 rtx op0
= XEXP (cmp
, 0);
24453 rtx op1
= XEXP (cmp
, 1);
24455 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
24456 comp_mode
= CCmode
;
24457 else if (FLOAT_MODE_P (mode
))
24458 comp_mode
= CCFPmode
;
24459 else if (code
== GTU
|| code
== LTU
24460 || code
== GEU
|| code
== LEU
)
24461 comp_mode
= CCUNSmode
;
24462 else if ((code
== EQ
|| code
== NE
)
24463 && unsigned_reg_p (op0
)
24464 && (unsigned_reg_p (op1
)
24465 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
24466 /* These are unsigned values, perhaps there will be a later
24467 ordering compare that can be shared with this one. */
24468 comp_mode
= CCUNSmode
;
24470 comp_mode
= CCmode
;
24472 /* If we have an unsigned compare, make sure we don't have a signed value as
24474 if (comp_mode
== CCUNSmode
&& GET_CODE (op1
) == CONST_INT
24475 && INTVAL (op1
) < 0)
24477 op0
= copy_rtx_if_shared (op0
);
24478 op1
= force_reg (GET_MODE (op0
), op1
);
24479 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
24482 /* First, the compare. */
24483 compare_result
= gen_reg_rtx (comp_mode
);
24485 /* E500 FP compare instructions on the GPRs. Yuck! */
24486 if ((!TARGET_FPRS
&& TARGET_HARD_FLOAT
)
24487 && FLOAT_MODE_P (mode
))
24489 rtx cmp
, or_result
, compare_result2
;
24490 machine_mode op_mode
= GET_MODE (op0
);
24493 if (op_mode
== VOIDmode
)
24494 op_mode
= GET_MODE (op1
);
24496 /* First reverse the condition codes that aren't directly supported. */
24504 code
= reverse_condition_maybe_unordered (code
);
24517 gcc_unreachable ();
24520 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24521 This explains the following mess. */
24529 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24530 ? gen_tstsfeq_gpr (compare_result
, op0
, op1
)
24531 : gen_cmpsfeq_gpr (compare_result
, op0
, op1
);
24535 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24536 ? gen_tstdfeq_gpr (compare_result
, op0
, op1
)
24537 : gen_cmpdfeq_gpr (compare_result
, op0
, op1
);
24543 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24544 ? gen_tsttfeq_gpr (compare_result
, op0
, op1
)
24545 : gen_cmptfeq_gpr (compare_result
, op0
, op1
);
24549 gcc_unreachable ();
24558 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24559 ? gen_tstsfgt_gpr (compare_result
, op0
, op1
)
24560 : gen_cmpsfgt_gpr (compare_result
, op0
, op1
);
24564 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24565 ? gen_tstdfgt_gpr (compare_result
, op0
, op1
)
24566 : gen_cmpdfgt_gpr (compare_result
, op0
, op1
);
24572 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24573 ? gen_tsttfgt_gpr (compare_result
, op0
, op1
)
24574 : gen_cmptfgt_gpr (compare_result
, op0
, op1
);
24578 gcc_unreachable ();
24587 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24588 ? gen_tstsflt_gpr (compare_result
, op0
, op1
)
24589 : gen_cmpsflt_gpr (compare_result
, op0
, op1
);
24593 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24594 ? gen_tstdflt_gpr (compare_result
, op0
, op1
)
24595 : gen_cmpdflt_gpr (compare_result
, op0
, op1
);
24601 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24602 ? gen_tsttflt_gpr (compare_result
, op0
, op1
)
24603 : gen_cmptflt_gpr (compare_result
, op0
, op1
);
24607 gcc_unreachable ();
24612 gcc_unreachable ();
24615 /* Synthesize LE and GE from LT/GT || EQ. */
24616 if (code
== LE
|| code
== GE
)
24620 compare_result2
= gen_reg_rtx (CCFPmode
);
24626 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24627 ? gen_tstsfeq_gpr (compare_result2
, op0
, op1
)
24628 : gen_cmpsfeq_gpr (compare_result2
, op0
, op1
);
24632 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24633 ? gen_tstdfeq_gpr (compare_result2
, op0
, op1
)
24634 : gen_cmpdfeq_gpr (compare_result2
, op0
, op1
);
24640 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24641 ? gen_tsttfeq_gpr (compare_result2
, op0
, op1
)
24642 : gen_cmptfeq_gpr (compare_result2
, op0
, op1
);
24646 gcc_unreachable ();
24651 /* OR them together. */
24652 or_result
= gen_reg_rtx (CCFPmode
);
24653 cmp
= gen_e500_cr_ior_compare (or_result
, compare_result
,
24655 compare_result
= or_result
;
24658 code
= reverse_p
? NE
: EQ
;
24663 /* IEEE 128-bit support in VSX registers when we do not have hardware
24665 else if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
24667 rtx libfunc
= NULL_RTX
;
24668 bool check_nan
= false;
24675 libfunc
= optab_libfunc (eq_optab
, mode
);
24680 libfunc
= optab_libfunc (ge_optab
, mode
);
24685 libfunc
= optab_libfunc (le_optab
, mode
);
24690 libfunc
= optab_libfunc (unord_optab
, mode
);
24691 code
= (code
== UNORDERED
) ? NE
: EQ
;
24697 libfunc
= optab_libfunc (ge_optab
, mode
);
24698 code
= (code
== UNGE
) ? GE
: GT
;
24704 libfunc
= optab_libfunc (le_optab
, mode
);
24705 code
= (code
== UNLE
) ? LE
: LT
;
24711 libfunc
= optab_libfunc (eq_optab
, mode
);
24712 code
= (code
= UNEQ
) ? EQ
: NE
;
24716 gcc_unreachable ();
24719 gcc_assert (libfunc
);
24722 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
24723 SImode
, 2, op0
, mode
, op1
, mode
);
24725 /* The library signals an exception for signalling NaNs, so we need to
24726 handle isgreater, etc. by first checking isordered. */
24729 rtx ne_rtx
, normal_dest
, unord_dest
;
24730 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
24731 rtx join_label
= gen_label_rtx ();
24732 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
24733 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
24736 /* Test for either value being a NaN. */
24737 gcc_assert (unord_func
);
24738 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
24739 SImode
, 2, op0
, mode
, op1
,
24742 /* Set value (0) if either value is a NaN, and jump to the join
24744 dest
= gen_reg_rtx (SImode
);
24745 emit_move_insn (dest
, const1_rtx
);
24746 emit_insn (gen_rtx_SET (unord_cmp
,
24747 gen_rtx_COMPARE (comp_mode
, unord_dest
,
24750 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
24751 emit_jump_insn (gen_rtx_SET (pc_rtx
,
24752 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
24756 /* Do the normal comparison, knowing that the values are not
24758 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
24759 SImode
, 2, op0
, mode
, op1
,
24762 emit_insn (gen_cstoresi4 (dest
,
24763 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
24765 normal_dest
, const0_rtx
));
24767 /* Join NaN and non-Nan paths. Compare dest against 0. */
24768 emit_label (join_label
);
24772 emit_insn (gen_rtx_SET (compare_result
,
24773 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
24778 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24779 CLOBBERs to match cmptf_internal2 pattern. */
24780 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
24781 && FLOAT128_IBM_P (GET_MODE (op0
))
24782 && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
24783 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
24785 gen_rtx_SET (compare_result
,
24786 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
24787 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24788 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24789 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24790 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24791 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24792 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24793 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24794 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24795 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
24796 else if (GET_CODE (op1
) == UNSPEC
24797 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
24799 rtx op1b
= XVECEXP (op1
, 0, 0);
24800 comp_mode
= CCEQmode
;
24801 compare_result
= gen_reg_rtx (CCEQmode
);
24803 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
24805 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
24808 emit_insn (gen_rtx_SET (compare_result
,
24809 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
24812 /* Some kinds of FP comparisons need an OR operation;
24813 under flag_finite_math_only we don't bother. */
24814 if (FLOAT_MODE_P (mode
)
24815 && (!FLOAT128_IEEE_P (mode
) || TARGET_FLOAT128_HW
)
24816 && !flag_finite_math_only
24817 && !(TARGET_HARD_FLOAT
&& !TARGET_FPRS
)
24818 && (code
== LE
|| code
== GE
24819 || code
== UNEQ
|| code
== LTGT
24820 || code
== UNGT
|| code
== UNLT
))
24822 enum rtx_code or1
, or2
;
24823 rtx or1_rtx
, or2_rtx
, compare2_rtx
;
24824 rtx or_result
= gen_reg_rtx (CCEQmode
);
24828 case LE
: or1
= LT
; or2
= EQ
; break;
24829 case GE
: or1
= GT
; or2
= EQ
; break;
24830 case UNEQ
: or1
= UNORDERED
; or2
= EQ
; break;
24831 case LTGT
: or1
= LT
; or2
= GT
; break;
24832 case UNGT
: or1
= UNORDERED
; or2
= GT
; break;
24833 case UNLT
: or1
= UNORDERED
; or2
= LT
; break;
24834 default: gcc_unreachable ();
24836 validate_condition_mode (or1
, comp_mode
);
24837 validate_condition_mode (or2
, comp_mode
);
24838 or1_rtx
= gen_rtx_fmt_ee (or1
, SImode
, compare_result
, const0_rtx
);
24839 or2_rtx
= gen_rtx_fmt_ee (or2
, SImode
, compare_result
, const0_rtx
);
24840 compare2_rtx
= gen_rtx_COMPARE (CCEQmode
,
24841 gen_rtx_IOR (SImode
, or1_rtx
, or2_rtx
),
24843 emit_insn (gen_rtx_SET (or_result
, compare2_rtx
));
24845 compare_result
= or_result
;
24849 validate_condition_mode (code
, GET_MODE (compare_result
));
24851 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
24855 /* Return the diagnostic message string if the binary operation OP is
24856 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24859 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
24863 machine_mode mode1
= TYPE_MODE (type1
);
24864 machine_mode mode2
= TYPE_MODE (type2
);
24866 /* For complex modes, use the inner type. */
24867 if (COMPLEX_MODE_P (mode1
))
24868 mode1
= GET_MODE_INNER (mode1
);
24870 if (COMPLEX_MODE_P (mode2
))
24871 mode2
= GET_MODE_INNER (mode2
);
24873 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24874 double to intermix unless -mfloat128-convert. */
24875 if (mode1
== mode2
)
24878 if (!TARGET_FLOAT128_CVT
)
24880 if ((mode1
== KFmode
&& mode2
== IFmode
)
24881 || (mode1
== IFmode
&& mode2
== KFmode
))
24882 return N_("__float128 and __ibm128 cannot be used in the same "
24885 if (TARGET_IEEEQUAD
24886 && ((mode1
== IFmode
&& mode2
== TFmode
)
24887 || (mode1
== TFmode
&& mode2
== IFmode
)))
24888 return N_("__ibm128 and long double cannot be used in the same "
24891 if (!TARGET_IEEEQUAD
24892 && ((mode1
== KFmode
&& mode2
== TFmode
)
24893 || (mode1
== TFmode
&& mode2
== KFmode
)))
24894 return N_("__float128 and long double cannot be used in the same "
24902 /* Expand floating point conversion to/from __float128 and __ibm128. */
24905 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
24907 machine_mode dest_mode
= GET_MODE (dest
);
24908 machine_mode src_mode
= GET_MODE (src
);
24909 convert_optab cvt
= unknown_optab
;
24910 bool do_move
= false;
24911 rtx libfunc
= NULL_RTX
;
24913 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
24914 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
24918 rtx_2func_t from_df
;
24919 rtx_2func_t from_sf
;
24920 rtx_2func_t from_si_sign
;
24921 rtx_2func_t from_si_uns
;
24922 rtx_2func_t from_di_sign
;
24923 rtx_2func_t from_di_uns
;
24926 rtx_2func_t to_si_sign
;
24927 rtx_2func_t to_si_uns
;
24928 rtx_2func_t to_di_sign
;
24929 rtx_2func_t to_di_uns
;
24930 } hw_conversions
[2] = {
24931 /* convertions to/from KFmode */
24933 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
24934 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
24935 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
24936 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
24937 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
24938 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
24939 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
24940 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
24941 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
24942 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
24943 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
24944 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
24947 /* convertions to/from TFmode */
24949 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
24950 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
24951 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
24952 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
24953 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
24954 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
24955 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
24956 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
24957 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
24958 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
24959 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
24960 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
24964 if (dest_mode
== src_mode
)
24965 gcc_unreachable ();
24967 /* Eliminate memory operations. */
24969 src
= force_reg (src_mode
, src
);
24973 rtx tmp
= gen_reg_rtx (dest_mode
);
24974 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
24975 rs6000_emit_move (dest
, tmp
, dest_mode
);
24979 /* Convert to IEEE 128-bit floating point. */
24980 if (FLOAT128_IEEE_P (dest_mode
))
24982 if (dest_mode
== KFmode
)
24984 else if (dest_mode
== TFmode
)
24987 gcc_unreachable ();
24993 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
24998 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
25004 if (FLOAT128_IBM_P (src_mode
))
25013 cvt
= ufloat_optab
;
25014 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
25018 cvt
= sfloat_optab
;
25019 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
25026 cvt
= ufloat_optab
;
25027 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
25031 cvt
= sfloat_optab
;
25032 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
25037 gcc_unreachable ();
25041 /* Convert from IEEE 128-bit floating point. */
25042 else if (FLOAT128_IEEE_P (src_mode
))
25044 if (src_mode
== KFmode
)
25046 else if (src_mode
== TFmode
)
25049 gcc_unreachable ();
25055 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
25060 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
25066 if (FLOAT128_IBM_P (dest_mode
))
25076 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
25081 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
25089 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
25094 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
25099 gcc_unreachable ();
25103 /* Both IBM format. */
25104 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
25108 gcc_unreachable ();
25110 /* Handle conversion between TFmode/KFmode. */
25112 emit_move_insn (dest
, gen_lowpart (dest_mode
, src
));
25114 /* Handle conversion if we have hardware support. */
25115 else if (TARGET_FLOAT128_HW
&& hw_convert
)
25116 emit_insn ((hw_convert
) (dest
, src
));
25118 /* Call an external function to do the conversion. */
25119 else if (cvt
!= unknown_optab
)
25121 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
25122 gcc_assert (libfunc
!= NULL_RTX
);
25124 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
, 1, src
,
25127 gcc_assert (dest2
!= NULL_RTX
);
25128 if (!rtx_equal_p (dest
, dest2
))
25129 emit_move_insn (dest
, dest2
);
25133 gcc_unreachable ();
25139 /* Emit the RTL for an sISEL pattern. */
25142 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED
, rtx operands
[])
25144 rs6000_emit_int_cmove (operands
[0], operands
[1], const1_rtx
, const0_rtx
);
25147 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
25148 can be used as that dest register. Return the dest register. */
25151 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
25153 if (op2
== const0_rtx
)
25156 if (GET_CODE (scratch
) == SCRATCH
)
25157 scratch
= gen_reg_rtx (mode
);
25159 if (logical_operand (op2
, mode
))
25160 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
25162 emit_insn (gen_rtx_SET (scratch
,
25163 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
25169 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
25172 machine_mode op_mode
;
25173 enum rtx_code cond_code
;
25174 rtx result
= operands
[0];
25176 condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
25177 cond_code
= GET_CODE (condition_rtx
);
25179 if (FLOAT_MODE_P (mode
)
25180 && !TARGET_FPRS
&& TARGET_HARD_FLOAT
)
25184 PUT_MODE (condition_rtx
, SImode
);
25185 t
= XEXP (condition_rtx
, 0);
25187 gcc_assert (cond_code
== NE
|| cond_code
== EQ
);
25189 if (cond_code
== NE
)
25190 emit_insn (gen_e500_flip_gt_bit (t
, t
));
25192 emit_insn (gen_move_from_CR_gt_bit (result
, t
));
25196 if (cond_code
== NE
25197 || cond_code
== GE
|| cond_code
== LE
25198 || cond_code
== GEU
|| cond_code
== LEU
25199 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
25201 rtx not_result
= gen_reg_rtx (CCEQmode
);
25202 rtx not_op
, rev_cond_rtx
;
25203 machine_mode cc_mode
;
25205 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
25207 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
25208 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
25209 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
25210 emit_insn (gen_rtx_SET (not_result
, not_op
));
25211 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
25214 op_mode
= GET_MODE (XEXP (operands
[1], 0));
25215 if (op_mode
== VOIDmode
)
25216 op_mode
= GET_MODE (XEXP (operands
[1], 1));
25218 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
25220 PUT_MODE (condition_rtx
, DImode
);
25221 convert_move (result
, condition_rtx
, 0);
25225 PUT_MODE (condition_rtx
, SImode
);
25226 emit_insn (gen_rtx_SET (result
, condition_rtx
));
25230 /* Emit a branch of kind CODE to location LOC. */
25233 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
25235 rtx condition_rtx
, loc_ref
;
25237 condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
25238 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
25239 emit_jump_insn (gen_rtx_SET (pc_rtx
,
25240 gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
,
25241 loc_ref
, pc_rtx
)));
25244 /* Return the string to output a conditional branch to LABEL, which is
25245 the operand template of the label, or NULL if the branch is really a
25246 conditional return.
25248 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
25249 condition code register and its mode specifies what kind of
25250 comparison we made.
25252 REVERSED is nonzero if we should reverse the sense of the comparison.
25254 INSN is the insn. */
25257 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
25259 static char string
[64];
25260 enum rtx_code code
= GET_CODE (op
);
25261 rtx cc_reg
= XEXP (op
, 0);
25262 machine_mode mode
= GET_MODE (cc_reg
);
25263 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
25264 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
25265 int really_reversed
= reversed
^ need_longbranch
;
25271 validate_condition_mode (code
, mode
);
25273 /* Work out which way this really branches. We could use
25274 reverse_condition_maybe_unordered here always but this
25275 makes the resulting assembler clearer. */
25276 if (really_reversed
)
25278 /* Reversal of FP compares takes care -- an ordered compare
25279 becomes an unordered compare and vice versa. */
25280 if (mode
== CCFPmode
)
25281 code
= reverse_condition_maybe_unordered (code
);
25283 code
= reverse_condition (code
);
25286 if ((!TARGET_FPRS
&& TARGET_HARD_FLOAT
) && mode
== CCFPmode
)
25288 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25293 /* Opposite of GT. */
25302 gcc_unreachable ();
25308 /* Not all of these are actually distinct opcodes, but
25309 we distinguish them for clarity of the resulting assembler. */
25310 case NE
: case LTGT
:
25311 ccode
= "ne"; break;
25312 case EQ
: case UNEQ
:
25313 ccode
= "eq"; break;
25315 ccode
= "ge"; break;
25316 case GT
: case GTU
: case UNGT
:
25317 ccode
= "gt"; break;
25319 ccode
= "le"; break;
25320 case LT
: case LTU
: case UNLT
:
25321 ccode
= "lt"; break;
25322 case UNORDERED
: ccode
= "un"; break;
25323 case ORDERED
: ccode
= "nu"; break;
25324 case UNGE
: ccode
= "nl"; break;
25325 case UNLE
: ccode
= "ng"; break;
25327 gcc_unreachable ();
25330 /* Maybe we have a guess as to how likely the branch is. */
25332 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
25333 if (note
!= NULL_RTX
)
25335 /* PROB is the difference from 50%. */
25336 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
25337 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
25339 /* Only hint for highly probable/improbable branches on newer cpus when
25340 we have real profile data, as static prediction overrides processor
25341 dynamic prediction. For older cpus we may as well always hint, but
25342 assume not taken for branches that are very close to 50% as a
25343 mispredicted taken branch is more expensive than a
25344 mispredicted not-taken branch. */
25345 if (rs6000_always_hint
25346 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
25347 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
25348 && br_prob_note_reliable_p (note
)))
25350 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
25351 && ((prob
> 0) ^ need_longbranch
))
25359 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
25361 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
25363 /* We need to escape any '%' characters in the reg_names string.
25364 Assume they'd only be the first character.... */
25365 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
25367 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
25371 /* If the branch distance was too far, we may have to use an
25372 unconditional branch to go the distance. */
25373 if (need_longbranch
)
25374 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
25376 s
+= sprintf (s
, ",%s", label
);
25382 /* Return the string to flip the GT bit on a CR. */
25384 output_e500_flip_gt_bit (rtx dst
, rtx src
)
25386 static char string
[64];
25389 gcc_assert (GET_CODE (dst
) == REG
&& CR_REGNO_P (REGNO (dst
))
25390 && GET_CODE (src
) == REG
&& CR_REGNO_P (REGNO (src
)));
25393 a
= 4 * (REGNO (dst
) - CR0_REGNO
) + 1;
25394 b
= 4 * (REGNO (src
) - CR0_REGNO
) + 1;
25396 sprintf (string
, "crnot %d,%d", a
, b
);
25400 /* Return insn for VSX or Altivec comparisons. */
25403 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
25406 machine_mode mode
= GET_MODE (op0
);
25414 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
25425 mask
= gen_reg_rtx (mode
);
25426 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
25433 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25434 DMODE is expected destination mode. This is a recursive function. */
25437 rs6000_emit_vector_compare (enum rtx_code rcode
,
25439 machine_mode dmode
)
25442 bool swap_operands
= false;
25443 bool try_again
= false;
25445 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
25446 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
25448 /* See if the comparison works as is. */
25449 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
25457 swap_operands
= true;
25462 swap_operands
= true;
25470 /* Invert condition and try again.
25471 e.g., A != B becomes ~(A==B). */
25473 enum rtx_code rev_code
;
25474 enum insn_code nor_code
;
25477 rev_code
= reverse_condition_maybe_unordered (rcode
);
25478 if (rev_code
== UNKNOWN
)
25481 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
25482 if (nor_code
== CODE_FOR_nothing
)
25485 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
25489 mask
= gen_reg_rtx (dmode
);
25490 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
25498 /* Try GT/GTU/LT/LTU OR EQ */
25501 enum insn_code ior_code
;
25502 enum rtx_code new_code
;
25523 gcc_unreachable ();
25526 ior_code
= optab_handler (ior_optab
, dmode
);
25527 if (ior_code
== CODE_FOR_nothing
)
25530 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
25534 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
25538 mask
= gen_reg_rtx (dmode
);
25539 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
25550 std::swap (op0
, op1
);
25552 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
25557 /* You only get two chances. */
25561 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
25562 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
25563 operands for the relation operation COND. */
25566 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
25567 rtx cond
, rtx cc_op0
, rtx cc_op1
)
25569 machine_mode dest_mode
= GET_MODE (dest
);
25570 machine_mode mask_mode
= GET_MODE (cc_op0
);
25571 enum rtx_code rcode
= GET_CODE (cond
);
25572 machine_mode cc_mode
= CCmode
;
25575 bool invert_move
= false;
25577 if (VECTOR_UNIT_NONE_P (dest_mode
))
25580 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
25581 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
25585 /* Swap operands if we can, and fall back to doing the operation as
25586 specified, and doing a NOR to invert the test. */
25592 /* Invert condition and try again.
25593 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
25594 invert_move
= true;
25595 rcode
= reverse_condition_maybe_unordered (rcode
);
25596 if (rcode
== UNKNOWN
)
25602 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
25604 /* Invert condition to avoid compound test. */
25605 invert_move
= true;
25606 rcode
= reverse_condition (rcode
);
25614 /* Mark unsigned tests with CCUNSmode. */
25615 cc_mode
= CCUNSmode
;
25617 /* Invert condition to avoid compound test if necessary. */
25618 if (rcode
== GEU
|| rcode
== LEU
)
25620 invert_move
= true;
25621 rcode
= reverse_condition (rcode
);
25629 /* Get the vector mask for the given relational operations. */
25630 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
25636 std::swap (op_true
, op_false
);
25638 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
25639 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
25640 && (GET_CODE (op_true
) == CONST_VECTOR
25641 || GET_CODE (op_false
) == CONST_VECTOR
))
25643 rtx constant_0
= CONST0_RTX (dest_mode
);
25644 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
25646 if (op_true
== constant_m1
&& op_false
== constant_0
)
25648 emit_move_insn (dest
, mask
);
25652 else if (op_true
== constant_0
&& op_false
== constant_m1
)
25654 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
25658 /* If we can't use the vector comparison directly, perhaps we can use
25659 the mask for the true or false fields, instead of loading up a
25661 if (op_true
== constant_m1
)
25664 if (op_false
== constant_0
)
25668 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
25669 op_true
= force_reg (dest_mode
, op_true
);
25671 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
25672 op_false
= force_reg (dest_mode
, op_false
);
25674 cond2
= gen_rtx_fmt_ee (NE
, cc_mode
, gen_lowpart (dest_mode
, mask
),
25675 CONST0_RTX (dest_mode
));
25676 emit_insn (gen_rtx_SET (dest
,
25677 gen_rtx_IF_THEN_ELSE (dest_mode
,
25684 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25685 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
25686 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
25687 hardware has no such operation. */
25690 rs6000_emit_p9_fp_minmax (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25692 enum rtx_code code
= GET_CODE (op
);
25693 rtx op0
= XEXP (op
, 0);
25694 rtx op1
= XEXP (op
, 1);
25695 machine_mode compare_mode
= GET_MODE (op0
);
25696 machine_mode result_mode
= GET_MODE (dest
);
25697 bool max_p
= false;
25699 if (result_mode
!= compare_mode
)
25702 if (code
== GE
|| code
== GT
)
25704 else if (code
== LE
|| code
== LT
)
25709 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
25712 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
))
25718 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
25722 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25723 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25724 operands of the last comparison is nonzero/true, FALSE_COND if it is
25725 zero/false. Return 0 if the hardware has no such operation. */
25728 rs6000_emit_p9_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25730 enum rtx_code code
= GET_CODE (op
);
25731 rtx op0
= XEXP (op
, 0);
25732 rtx op1
= XEXP (op
, 1);
25733 machine_mode result_mode
= GET_MODE (dest
);
25738 if (!can_create_pseudo_p ())
25751 code
= swap_condition (code
);
25752 std::swap (op0
, op1
);
25759 /* Generate: [(parallel [(set (dest)
25760 (if_then_else (op (cmp1) (cmp2))
25763 (clobber (scratch))])]. */
25765 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
25766 cmove_rtx
= gen_rtx_SET (dest
,
25767 gen_rtx_IF_THEN_ELSE (result_mode
,
25772 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
25773 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
25774 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
25779 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25780 operands of the last comparison is nonzero/true, FALSE_COND if it
25781 is zero/false. Return 0 if the hardware has no such operation. */
25784 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25786 enum rtx_code code
= GET_CODE (op
);
25787 rtx op0
= XEXP (op
, 0);
25788 rtx op1
= XEXP (op
, 1);
25789 machine_mode compare_mode
= GET_MODE (op0
);
25790 machine_mode result_mode
= GET_MODE (dest
);
25792 bool is_against_zero
;
25794 /* These modes should always match. */
25795 if (GET_MODE (op1
) != compare_mode
25796 /* In the isel case however, we can use a compare immediate, so
25797 op1 may be a small constant. */
25798 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
25800 if (GET_MODE (true_cond
) != result_mode
)
25802 if (GET_MODE (false_cond
) != result_mode
)
25805 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25806 if (TARGET_P9_MINMAX
25807 && (compare_mode
== SFmode
|| compare_mode
== DFmode
)
25808 && (result_mode
== SFmode
|| result_mode
== DFmode
))
25810 if (rs6000_emit_p9_fp_minmax (dest
, op
, true_cond
, false_cond
))
25813 if (rs6000_emit_p9_fp_cmove (dest
, op
, true_cond
, false_cond
))
25817 /* Don't allow using floating point comparisons for integer results for
25819 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
25822 /* First, work out if the hardware can do this at all, or
25823 if it's too slow.... */
25824 if (!FLOAT_MODE_P (compare_mode
))
25827 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
25830 else if (TARGET_HARD_FLOAT
&& !TARGET_FPRS
25831 && SCALAR_FLOAT_MODE_P (compare_mode
))
25834 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
25836 /* A floating-point subtract might overflow, underflow, or produce
25837 an inexact result, thus changing the floating-point flags, so it
25838 can't be generated if we care about that. It's safe if one side
25839 of the construct is zero, since then no subtract will be
25841 if (SCALAR_FLOAT_MODE_P (compare_mode
)
25842 && flag_trapping_math
&& ! is_against_zero
)
25845 /* Eliminate half of the comparisons by switching operands, this
25846 makes the remaining code simpler. */
25847 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
25848 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
25850 code
= reverse_condition_maybe_unordered (code
);
25852 true_cond
= false_cond
;
25856 /* UNEQ and LTGT take four instructions for a comparison with zero,
25857 it'll probably be faster to use a branch here too. */
25858 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
25861 /* We're going to try to implement comparisons by performing
25862 a subtract, then comparing against zero. Unfortunately,
25863 Inf - Inf is NaN which is not zero, and so if we don't
25864 know that the operand is finite and the comparison
25865 would treat EQ different to UNORDERED, we can't do it. */
25866 if (HONOR_INFINITIES (compare_mode
)
25867 && code
!= GT
&& code
!= UNGE
25868 && (GET_CODE (op1
) != CONST_DOUBLE
25869 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
25870 /* Constructs of the form (a OP b ? a : b) are safe. */
25871 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
25872 || (! rtx_equal_p (op0
, true_cond
)
25873 && ! rtx_equal_p (op1
, true_cond
))))
25876 /* At this point we know we can use fsel. */
25878 /* Reduce the comparison to a comparison against zero. */
25879 if (! is_against_zero
)
25881 temp
= gen_reg_rtx (compare_mode
);
25882 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
25884 op1
= CONST0_RTX (compare_mode
);
25887 /* If we don't care about NaNs we can reduce some of the comparisons
25888 down to faster ones. */
25889 if (! HONOR_NANS (compare_mode
))
25895 true_cond
= false_cond
;
25908 /* Now, reduce everything down to a GE. */
25915 temp
= gen_reg_rtx (compare_mode
);
25916 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
25921 temp
= gen_reg_rtx (compare_mode
);
25922 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
25927 temp
= gen_reg_rtx (compare_mode
);
25928 emit_insn (gen_rtx_SET (temp
,
25929 gen_rtx_NEG (compare_mode
,
25930 gen_rtx_ABS (compare_mode
, op0
))));
25935 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
25936 temp
= gen_reg_rtx (result_mode
);
25937 emit_insn (gen_rtx_SET (temp
,
25938 gen_rtx_IF_THEN_ELSE (result_mode
,
25939 gen_rtx_GE (VOIDmode
,
25941 true_cond
, false_cond
)));
25942 false_cond
= true_cond
;
25945 temp
= gen_reg_rtx (compare_mode
);
25946 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
25951 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
25952 temp
= gen_reg_rtx (result_mode
);
25953 emit_insn (gen_rtx_SET (temp
,
25954 gen_rtx_IF_THEN_ELSE (result_mode
,
25955 gen_rtx_GE (VOIDmode
,
25957 true_cond
, false_cond
)));
25958 true_cond
= false_cond
;
25961 temp
= gen_reg_rtx (compare_mode
);
25962 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
25967 gcc_unreachable ();
25970 emit_insn (gen_rtx_SET (dest
,
25971 gen_rtx_IF_THEN_ELSE (result_mode
,
25972 gen_rtx_GE (VOIDmode
,
25974 true_cond
, false_cond
)));
25978 /* Same as above, but for ints (isel). */
25981 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25983 rtx condition_rtx
, cr
;
25984 machine_mode mode
= GET_MODE (dest
);
25985 enum rtx_code cond_code
;
25986 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
25989 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
25992 /* We still have to do the compare, because isel doesn't do a
25993 compare, it just looks at the CRx bits set by a previous compare
25995 condition_rtx
= rs6000_generate_compare (op
, mode
);
25996 cond_code
= GET_CODE (condition_rtx
);
25997 cr
= XEXP (condition_rtx
, 0);
25998 signedp
= GET_MODE (cr
) == CCmode
;
26000 isel_func
= (mode
== SImode
26001 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
26002 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
26006 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
26007 /* isel handles these directly. */
26011 /* We need to swap the sense of the comparison. */
26013 std::swap (false_cond
, true_cond
);
26014 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
26019 false_cond
= force_reg (mode
, false_cond
);
26020 if (true_cond
!= const0_rtx
)
26021 true_cond
= force_reg (mode
, true_cond
);
26023 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
26029 output_isel (rtx
*operands
)
26031 enum rtx_code code
;
26033 code
= GET_CODE (operands
[1]);
26035 if (code
== GE
|| code
== GEU
|| code
== LE
|| code
== LEU
|| code
== NE
)
26037 gcc_assert (GET_CODE (operands
[2]) == REG
26038 && GET_CODE (operands
[3]) == REG
);
26039 PUT_CODE (operands
[1], reverse_condition (code
));
26040 return "isel %0,%3,%2,%j1";
26043 return "isel %0,%2,%3,%j1";
26047 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
26049 machine_mode mode
= GET_MODE (op0
);
26053 /* VSX/altivec have direct min/max insns. */
26054 if ((code
== SMAX
|| code
== SMIN
)
26055 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
26056 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))))
26058 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
26062 if (code
== SMAX
|| code
== SMIN
)
26067 if (code
== SMAX
|| code
== UMAX
)
26068 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
26069 op0
, op1
, mode
, 0);
26071 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
26072 op1
, op0
, mode
, 0);
26073 gcc_assert (target
);
26074 if (target
!= dest
)
26075 emit_move_insn (dest
, target
);
26078 /* Split a signbit operation on 64-bit machines with direct move. Also allow
26079 for the value to come from memory or if it is already loaded into a GPR. */
26082 rs6000_split_signbit (rtx dest
, rtx src
)
26084 machine_mode d_mode
= GET_MODE (dest
);
26085 machine_mode s_mode
= GET_MODE (src
);
26086 rtx dest_di
= (d_mode
== DImode
) ? dest
: gen_lowpart (DImode
, dest
);
26087 rtx shift_reg
= dest_di
;
26089 gcc_assert (FLOAT128_IEEE_P (s_mode
) && TARGET_POWERPC64
);
26093 rtx mem
= (WORDS_BIG_ENDIAN
26094 ? adjust_address (src
, DImode
, 0)
26095 : adjust_address (src
, DImode
, 8));
26096 emit_insn (gen_rtx_SET (dest_di
, mem
));
26101 unsigned int r
= reg_or_subregno (src
);
26103 if (INT_REGNO_P (r
))
26104 shift_reg
= gen_rtx_REG (DImode
, r
+ (BYTES_BIG_ENDIAN
== 0));
26108 /* Generate the special mfvsrd instruction to get it in a GPR. */
26109 gcc_assert (VSX_REGNO_P (r
));
26110 if (s_mode
== KFmode
)
26111 emit_insn (gen_signbitkf2_dm2 (dest_di
, src
));
26113 emit_insn (gen_signbittf2_dm2 (dest_di
, src
));
26117 emit_insn (gen_lshrdi3 (dest_di
, shift_reg
, GEN_INT (63)));
26121 /* A subroutine of the atomic operation splitters. Jump to LABEL if
26122 COND is true. Mark the jump as unlikely to be taken. */
26125 emit_unlikely_jump (rtx cond
, rtx label
)
26127 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
26128 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
26129 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
26132 /* A subroutine of the atomic operation splitters. Emit a load-locked
26133 instruction in MODE. For QI/HImode, possibly use a pattern than includes
26134 the zero_extend operation. */
26137 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
26139 rtx (*fn
) (rtx
, rtx
) = NULL
;
26144 fn
= gen_load_lockedqi
;
26147 fn
= gen_load_lockedhi
;
26150 if (GET_MODE (mem
) == QImode
)
26151 fn
= gen_load_lockedqi_si
;
26152 else if (GET_MODE (mem
) == HImode
)
26153 fn
= gen_load_lockedhi_si
;
26155 fn
= gen_load_lockedsi
;
26158 fn
= gen_load_lockeddi
;
26161 fn
= gen_load_lockedti
;
26164 gcc_unreachable ();
26166 emit_insn (fn (reg
, mem
));
26169 /* A subroutine of the atomic operation splitters. Emit a store-conditional
26170 instruction in MODE. */
26173 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
26175 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
26180 fn
= gen_store_conditionalqi
;
26183 fn
= gen_store_conditionalhi
;
26186 fn
= gen_store_conditionalsi
;
26189 fn
= gen_store_conditionaldi
;
26192 fn
= gen_store_conditionalti
;
26195 gcc_unreachable ();
26198 /* Emit sync before stwcx. to address PPC405 Erratum. */
26199 if (PPC405_ERRATUM77
)
26200 emit_insn (gen_hwsync ());
26202 emit_insn (fn (res
, mem
, val
));
26205 /* Expand barriers before and after a load_locked/store_cond sequence. */
26208 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
26210 rtx addr
= XEXP (mem
, 0);
26211 int strict_p
= (reload_in_progress
|| reload_completed
);
26213 if (!legitimate_indirect_address_p (addr
, strict_p
)
26214 && !legitimate_indexed_address_p (addr
, strict_p
))
26216 addr
= force_reg (Pmode
, addr
);
26217 mem
= replace_equiv_address_nv (mem
, addr
);
26222 case MEMMODEL_RELAXED
:
26223 case MEMMODEL_CONSUME
:
26224 case MEMMODEL_ACQUIRE
:
26226 case MEMMODEL_RELEASE
:
26227 case MEMMODEL_ACQ_REL
:
26228 emit_insn (gen_lwsync ());
26230 case MEMMODEL_SEQ_CST
:
26231 emit_insn (gen_hwsync ());
26234 gcc_unreachable ();
26240 rs6000_post_atomic_barrier (enum memmodel model
)
26244 case MEMMODEL_RELAXED
:
26245 case MEMMODEL_CONSUME
:
26246 case MEMMODEL_RELEASE
:
26248 case MEMMODEL_ACQUIRE
:
26249 case MEMMODEL_ACQ_REL
:
26250 case MEMMODEL_SEQ_CST
:
26251 emit_insn (gen_isync ());
26254 gcc_unreachable ();
26258 /* A subroutine of the various atomic expanders. For sub-word operations,
26259 we must adjust things to operate on SImode. Given the original MEM,
26260 return a new aligned memory. Also build and return the quantities by
26261 which to shift and mask. */
26264 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
26266 rtx addr
, align
, shift
, mask
, mem
;
26267 HOST_WIDE_INT shift_mask
;
26268 machine_mode mode
= GET_MODE (orig_mem
);
26270 /* For smaller modes, we have to implement this via SImode. */
26271 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
26273 addr
= XEXP (orig_mem
, 0);
26274 addr
= force_reg (GET_MODE (addr
), addr
);
26276 /* Aligned memory containing subword. Generate a new memory. We
26277 do not want any of the existing MEM_ATTR data, as we're now
26278 accessing memory outside the original object. */
26279 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
26280 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26281 mem
= gen_rtx_MEM (SImode
, align
);
26282 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
26283 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
26284 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
26286 /* Shift amount for subword relative to aligned word. */
26287 shift
= gen_reg_rtx (SImode
);
26288 addr
= gen_lowpart (SImode
, addr
);
26289 rtx tmp
= gen_reg_rtx (SImode
);
26290 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
26291 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
26292 if (BYTES_BIG_ENDIAN
)
26293 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
26294 shift
, 1, OPTAB_LIB_WIDEN
);
26297 /* Mask for insertion. */
26298 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
26299 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26305 /* A subroutine of the various atomic expanders. For sub-word operands,
26306 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
26309 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
26313 x
= gen_reg_rtx (SImode
);
26314 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
26315 gen_rtx_NOT (SImode
, mask
),
26318 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
26323 /* A subroutine of the various atomic expanders. For sub-word operands,
26324 extract WIDE to NARROW via SHIFT. */
26327 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
26329 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
26330 wide
, 1, OPTAB_LIB_WIDEN
);
26331 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
26334 /* Expand an atomic compare and swap operation. */
26337 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
26339 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
26340 rtx label1
, label2
, x
, mask
, shift
;
26341 machine_mode mode
, orig_mode
;
26342 enum memmodel mod_s
, mod_f
;
26345 boolval
= operands
[0];
26346 retval
= operands
[1];
26348 oldval
= operands
[3];
26349 newval
= operands
[4];
26350 is_weak
= (INTVAL (operands
[5]) != 0);
26351 mod_s
= memmodel_base (INTVAL (operands
[6]));
26352 mod_f
= memmodel_base (INTVAL (operands
[7]));
26353 orig_mode
= mode
= GET_MODE (mem
);
26355 mask
= shift
= NULL_RTX
;
26356 if (mode
== QImode
|| mode
== HImode
)
26358 /* Before power8, we didn't have access to lbarx/lharx, so generate a
26359 lwarx and shift/mask operations. With power8, we need to do the
26360 comparison in SImode, but the store is still done in QI/HImode. */
26361 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
26363 if (!TARGET_SYNC_HI_QI
)
26365 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26367 /* Shift and mask OLDVAL into position with the word. */
26368 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
26369 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26371 /* Shift and mask NEWVAL into position within the word. */
26372 newval
= convert_modes (SImode
, mode
, newval
, 1);
26373 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
26374 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26377 /* Prepare to adjust the return value. */
26378 retval
= gen_reg_rtx (SImode
);
26381 else if (reg_overlap_mentioned_p (retval
, oldval
))
26382 oldval
= copy_to_reg (oldval
);
26384 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
26385 oldval
= copy_to_mode_reg (mode
, oldval
);
26387 if (reg_overlap_mentioned_p (retval
, newval
))
26388 newval
= copy_to_reg (newval
);
26390 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
26395 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26396 emit_label (XEXP (label1
, 0));
26398 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26400 emit_load_locked (mode
, retval
, mem
);
26404 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
26405 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26407 cond
= gen_reg_rtx (CCmode
);
26408 /* If we have TImode, synthesize a comparison. */
26409 if (mode
!= TImode
)
26410 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
26413 rtx xor1_result
= gen_reg_rtx (DImode
);
26414 rtx xor2_result
= gen_reg_rtx (DImode
);
26415 rtx or_result
= gen_reg_rtx (DImode
);
26416 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
26417 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
26418 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
26419 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
26421 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
26422 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
26423 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
26424 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
26427 emit_insn (gen_rtx_SET (cond
, x
));
26429 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26430 emit_unlikely_jump (x
, label2
);
26434 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
26436 emit_store_conditional (orig_mode
, cond
, mem
, x
);
26440 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26441 emit_unlikely_jump (x
, label1
);
26444 if (!is_mm_relaxed (mod_f
))
26445 emit_label (XEXP (label2
, 0));
26447 rs6000_post_atomic_barrier (mod_s
);
26449 if (is_mm_relaxed (mod_f
))
26450 emit_label (XEXP (label2
, 0));
26453 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
26454 else if (mode
!= GET_MODE (operands
[1]))
26455 convert_move (operands
[1], retval
, 1);
26457 /* In all cases, CR0 contains EQ on success, and NE on failure. */
26458 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
26459 emit_insn (gen_rtx_SET (boolval
, x
));
26462 /* Expand an atomic exchange operation. */
26465 rs6000_expand_atomic_exchange (rtx operands
[])
26467 rtx retval
, mem
, val
, cond
;
26469 enum memmodel model
;
26470 rtx label
, x
, mask
, shift
;
26472 retval
= operands
[0];
26475 model
= memmodel_base (INTVAL (operands
[3]));
26476 mode
= GET_MODE (mem
);
26478 mask
= shift
= NULL_RTX
;
26479 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
26481 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26483 /* Shift and mask VAL into position with the word. */
26484 val
= convert_modes (SImode
, mode
, val
, 1);
26485 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
26486 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26488 /* Prepare to adjust the return value. */
26489 retval
= gen_reg_rtx (SImode
);
26493 mem
= rs6000_pre_atomic_barrier (mem
, model
);
26495 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26496 emit_label (XEXP (label
, 0));
26498 emit_load_locked (mode
, retval
, mem
);
26502 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
26504 cond
= gen_reg_rtx (CCmode
);
26505 emit_store_conditional (mode
, cond
, mem
, x
);
26507 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26508 emit_unlikely_jump (x
, label
);
26510 rs6000_post_atomic_barrier (model
);
26513 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
26516 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
26517 to perform. MEM is the memory on which to operate. VAL is the second
26518 operand of the binary operator. BEFORE and AFTER are optional locations to
26519 return the value of MEM either before of after the operation. MODEL_RTX
26520 is a CONST_INT containing the memory model to use. */
26523 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
26524 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
26526 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
26527 machine_mode mode
= GET_MODE (mem
);
26528 machine_mode store_mode
= mode
;
26529 rtx label
, x
, cond
, mask
, shift
;
26530 rtx before
= orig_before
, after
= orig_after
;
26532 mask
= shift
= NULL_RTX
;
26533 /* On power8, we want to use SImode for the operation. On previous systems,
26534 use the operation in a subword and shift/mask to get the proper byte or
26536 if (mode
== QImode
|| mode
== HImode
)
26538 if (TARGET_SYNC_HI_QI
)
26540 val
= convert_modes (SImode
, mode
, val
, 1);
26542 /* Prepare to adjust the return value. */
26543 before
= gen_reg_rtx (SImode
);
26545 after
= gen_reg_rtx (SImode
);
26550 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26552 /* Shift and mask VAL into position with the word. */
26553 val
= convert_modes (SImode
, mode
, val
, 1);
26554 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
26555 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26561 /* We've already zero-extended VAL. That is sufficient to
26562 make certain that it does not affect other bits. */
26567 /* If we make certain that all of the other bits in VAL are
26568 set, that will be sufficient to not affect other bits. */
26569 x
= gen_rtx_NOT (SImode
, mask
);
26570 x
= gen_rtx_IOR (SImode
, x
, val
);
26571 emit_insn (gen_rtx_SET (val
, x
));
26578 /* These will all affect bits outside the field and need
26579 adjustment via MASK within the loop. */
26583 gcc_unreachable ();
26586 /* Prepare to adjust the return value. */
26587 before
= gen_reg_rtx (SImode
);
26589 after
= gen_reg_rtx (SImode
);
26590 store_mode
= mode
= SImode
;
26594 mem
= rs6000_pre_atomic_barrier (mem
, model
);
26596 label
= gen_label_rtx ();
26597 emit_label (label
);
26598 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
26600 if (before
== NULL_RTX
)
26601 before
= gen_reg_rtx (mode
);
26603 emit_load_locked (mode
, before
, mem
);
26607 x
= expand_simple_binop (mode
, AND
, before
, val
,
26608 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26609 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
26613 after
= expand_simple_binop (mode
, code
, before
, val
,
26614 after
, 1, OPTAB_LIB_WIDEN
);
26620 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
26621 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26622 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
26624 else if (store_mode
!= mode
)
26625 x
= convert_modes (store_mode
, mode
, x
, 1);
26627 cond
= gen_reg_rtx (CCmode
);
26628 emit_store_conditional (store_mode
, cond
, mem
, x
);
26630 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26631 emit_unlikely_jump (x
, label
);
26633 rs6000_post_atomic_barrier (model
);
26637 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26638 then do the calcuations in a SImode register. */
26640 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
26642 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
26644 else if (store_mode
!= mode
)
26646 /* QImode/HImode on machines with lbarx/lharx where we do the native
26647 operation and then do the calcuations in a SImode register. */
26649 convert_move (orig_before
, before
, 1);
26651 convert_move (orig_after
, after
, 1);
26653 else if (orig_after
&& after
!= orig_after
)
26654 emit_move_insn (orig_after
, after
);
26657 /* Emit instructions to move SRC to DST. Called by splitters for
26658 multi-register moves. It will emit at most one instruction for
26659 each register that is accessed; that is, it won't emit li/lis pairs
26660 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26664 rs6000_split_multireg_move (rtx dst
, rtx src
)
26666 /* The register number of the first register being moved. */
26668 /* The mode that is to be moved. */
26670 /* The mode that the move is being done in, and its size. */
26671 machine_mode reg_mode
;
26673 /* The number of registers that will be moved. */
26676 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
26677 mode
= GET_MODE (dst
);
26678 nregs
= hard_regno_nregs
[reg
][mode
];
26679 if (FP_REGNO_P (reg
))
26680 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
26681 ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? DFmode
: SFmode
);
26682 else if (ALTIVEC_REGNO_P (reg
))
26683 reg_mode
= V16QImode
;
26684 else if (TARGET_E500_DOUBLE
&& FLOAT128_2REG_P (mode
))
26687 reg_mode
= word_mode
;
26688 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
26690 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
26692 /* TDmode residing in FP registers is special, since the ISA requires that
26693 the lower-numbered word of a register pair is always the most significant
26694 word, even in little-endian mode. This does not match the usual subreg
26695 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26696 the appropriate constituent registers "by hand" in little-endian mode.
26698 Note we do not need to check for destructive overlap here since TDmode
26699 can only reside in even/odd register pairs. */
26700 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
26705 for (i
= 0; i
< nregs
; i
++)
26707 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
26708 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
26710 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
26711 i
* reg_mode_size
);
26713 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
26714 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
26716 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
26717 i
* reg_mode_size
);
26719 emit_insn (gen_rtx_SET (p_dst
, p_src
));
26725 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
26727 /* Move register range backwards, if we might have destructive
26730 for (i
= nregs
- 1; i
>= 0; i
--)
26731 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
26732 i
* reg_mode_size
),
26733 simplify_gen_subreg (reg_mode
, src
, mode
,
26734 i
* reg_mode_size
)));
26740 bool used_update
= false;
26741 rtx restore_basereg
= NULL_RTX
;
26743 if (MEM_P (src
) && INT_REGNO_P (reg
))
26747 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
26748 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
26751 breg
= XEXP (XEXP (src
, 0), 0);
26752 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
26753 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
26754 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
26755 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
26756 src
= replace_equiv_address (src
, breg
);
26758 else if (! rs6000_offsettable_memref_p (src
, reg_mode
))
26760 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
26762 rtx basereg
= XEXP (XEXP (src
, 0), 0);
26765 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
26766 emit_insn (gen_rtx_SET (ndst
,
26767 gen_rtx_MEM (reg_mode
,
26769 used_update
= true;
26772 emit_insn (gen_rtx_SET (basereg
,
26773 XEXP (XEXP (src
, 0), 1)));
26774 src
= replace_equiv_address (src
, basereg
);
26778 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
26779 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
26780 src
= replace_equiv_address (src
, basereg
);
26784 breg
= XEXP (src
, 0);
26785 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
26786 breg
= XEXP (breg
, 0);
26788 /* If the base register we are using to address memory is
26789 also a destination reg, then change that register last. */
26791 && REGNO (breg
) >= REGNO (dst
)
26792 && REGNO (breg
) < REGNO (dst
) + nregs
)
26793 j
= REGNO (breg
) - REGNO (dst
);
26795 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
26799 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
26800 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
26803 breg
= XEXP (XEXP (dst
, 0), 0);
26804 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
26805 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
26806 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
26808 /* We have to update the breg before doing the store.
26809 Use store with update, if available. */
26813 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
26814 emit_insn (TARGET_32BIT
26815 ? (TARGET_POWERPC64
26816 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
26817 : gen_movsi_update (breg
, breg
, delta_rtx
, nsrc
))
26818 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
26819 used_update
= true;
26822 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
26823 dst
= replace_equiv_address (dst
, breg
);
26825 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
)
26826 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
26828 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
26830 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
26833 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
26834 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
26837 used_update
= true;
26840 emit_insn (gen_rtx_SET (basereg
,
26841 XEXP (XEXP (dst
, 0), 1)));
26842 dst
= replace_equiv_address (dst
, basereg
);
26846 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
26847 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
26848 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
26850 && REG_P (offsetreg
)
26851 && REGNO (basereg
) != REGNO (offsetreg
));
26852 if (REGNO (basereg
) == 0)
26854 rtx tmp
= offsetreg
;
26855 offsetreg
= basereg
;
26858 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
26859 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
26860 dst
= replace_equiv_address (dst
, basereg
);
26863 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
26864 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
));
26867 for (i
= 0; i
< nregs
; i
++)
26869 /* Calculate index to next subword. */
26874 /* If compiler already emitted move of first word by
26875 store with update, no need to do anything. */
26876 if (j
== 0 && used_update
)
26879 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
26880 j
* reg_mode_size
),
26881 simplify_gen_subreg (reg_mode
, src
, mode
,
26882 j
* reg_mode_size
)));
26884 if (restore_basereg
!= NULL_RTX
)
26885 emit_insn (restore_basereg
);
26890 /* This page contains routines that are used to determine what the
26891 function prologue and epilogue code will do and write them out. */
26896 return !call_used_regs
[r
] && df_regs_ever_live_p (r
);
26899 /* Determine whether the gp REG is really used. */
26902 rs6000_reg_live_or_pic_offset_p (int reg
)
26904 /* We need to mark the PIC offset register live for the same conditions
26905 as it is set up, or otherwise it won't be saved before we clobber it. */
26907 if (reg
== RS6000_PIC_OFFSET_TABLE_REGNUM
&& !TARGET_SINGLE_PIC_BASE
)
26909 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
26910 && (crtl
->calls_eh_return
26911 || df_regs_ever_live_p (reg
)
26912 || !constant_pool_empty_p ()))
26915 if ((DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
26920 /* If the function calls eh_return, claim used all the registers that would
26921 be checked for liveness otherwise. */
26923 return ((crtl
->calls_eh_return
|| df_regs_ever_live_p (reg
))
26924 && !call_used_regs
[reg
]);
26927 /* Return the first fixed-point register that is required to be
26928 saved. 32 if none. */
26931 first_reg_to_save (void)
26935 /* Find lowest numbered live register. */
26936 for (first_reg
= 13; first_reg
<= 31; first_reg
++)
26937 if (save_reg_p (first_reg
))
26940 if (first_reg
> RS6000_PIC_OFFSET_TABLE_REGNUM
26941 && ((DEFAULT_ABI
== ABI_V4
&& flag_pic
!= 0)
26942 || (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
26943 || (TARGET_TOC
&& TARGET_MINIMAL_TOC
))
26944 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
26945 first_reg
= RS6000_PIC_OFFSET_TABLE_REGNUM
;
26949 && crtl
->uses_pic_offset_table
26950 && first_reg
> RS6000_PIC_OFFSET_TABLE_REGNUM
)
26951 return RS6000_PIC_OFFSET_TABLE_REGNUM
;
26957 /* Similar, for FP regs. */
26960 first_fp_reg_to_save (void)
26964 /* Find lowest numbered live register. */
26965 for (first_reg
= 14 + 32; first_reg
<= 63; first_reg
++)
26966 if (save_reg_p (first_reg
))
26972 /* Similar, for AltiVec regs. */
26975 first_altivec_reg_to_save (void)
26979 /* Stack frame remains as is unless we are in AltiVec ABI. */
26980 if (! TARGET_ALTIVEC_ABI
)
26981 return LAST_ALTIVEC_REGNO
+ 1;
26983 /* On Darwin, the unwind routines are compiled without
26984 TARGET_ALTIVEC, and use save_world to save/restore the
26985 altivec registers when necessary. */
26986 if (DEFAULT_ABI
== ABI_DARWIN
&& crtl
->calls_eh_return
26987 && ! TARGET_ALTIVEC
)
26988 return FIRST_ALTIVEC_REGNO
+ 20;
26990 /* Find lowest numbered live register. */
26991 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
<= LAST_ALTIVEC_REGNO
; ++i
)
26992 if (save_reg_p (i
))
26998 /* Return a 32-bit mask of the AltiVec registers we need to set in
26999 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
27000 the 32-bit word is 0. */
27002 static unsigned int
27003 compute_vrsave_mask (void)
27005 unsigned int i
, mask
= 0;
27007 /* On Darwin, the unwind routines are compiled without
27008 TARGET_ALTIVEC, and use save_world to save/restore the
27009 call-saved altivec registers when necessary. */
27010 if (DEFAULT_ABI
== ABI_DARWIN
&& crtl
->calls_eh_return
27011 && ! TARGET_ALTIVEC
)
27014 /* First, find out if we use _any_ altivec registers. */
27015 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
27016 if (df_regs_ever_live_p (i
))
27017 mask
|= ALTIVEC_REG_BIT (i
);
27022 /* Next, remove the argument registers from the set. These must
27023 be in the VRSAVE mask set by the caller, so we don't need to add
27024 them in again. More importantly, the mask we compute here is
27025 used to generate CLOBBERs in the set_vrsave insn, and we do not
27026 wish the argument registers to die. */
27027 for (i
= ALTIVEC_ARG_MIN_REG
; i
< (unsigned) crtl
->args
.info
.vregno
; i
++)
27028 mask
&= ~ALTIVEC_REG_BIT (i
);
27030 /* Similarly, remove the return value from the set. */
27033 diddle_return_value (is_altivec_return_reg
, &yes
);
27035 mask
&= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN
);
27041 /* For a very restricted set of circumstances, we can cut down the
27042 size of prologues/epilogues by calling our own save/restore-the-world
27046 compute_save_world_info (rs6000_stack_t
*info
)
27048 info
->world_save_p
= 1;
27050 = (WORLD_SAVE_P (info
)
27051 && DEFAULT_ABI
== ABI_DARWIN
27052 && !cfun
->has_nonlocal_label
27053 && info
->first_fp_reg_save
== FIRST_SAVED_FP_REGNO
27054 && info
->first_gp_reg_save
== FIRST_SAVED_GP_REGNO
27055 && info
->first_altivec_reg_save
== FIRST_SAVED_ALTIVEC_REGNO
27056 && info
->cr_save_p
);
27058 /* This will not work in conjunction with sibcalls. Make sure there
27059 are none. (This check is expensive, but seldom executed.) */
27060 if (WORLD_SAVE_P (info
))
27063 for (insn
= get_last_insn_anywhere (); insn
; insn
= PREV_INSN (insn
))
27064 if (CALL_P (insn
) && SIBLING_CALL_P (insn
))
27066 info
->world_save_p
= 0;
27071 if (WORLD_SAVE_P (info
))
27073 /* Even if we're not touching VRsave, make sure there's room on the
27074 stack for it, if it looks like we're calling SAVE_WORLD, which
27075 will attempt to save it. */
27076 info
->vrsave_size
= 4;
27078 /* If we are going to save the world, we need to save the link register too. */
27079 info
->lr_save_p
= 1;
27081 /* "Save" the VRsave register too if we're saving the world. */
27082 if (info
->vrsave_mask
== 0)
27083 info
->vrsave_mask
= compute_vrsave_mask ();
27085 /* Because the Darwin register save/restore routines only handle
27086 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
27088 gcc_assert (info
->first_fp_reg_save
>= FIRST_SAVED_FP_REGNO
27089 && (info
->first_altivec_reg_save
27090 >= FIRST_SAVED_ALTIVEC_REGNO
));
27098 is_altivec_return_reg (rtx reg
, void *xyes
)
27100 bool *yes
= (bool *) xyes
;
27101 if (REGNO (reg
) == ALTIVEC_ARG_RETURN
)
27106 /* Return whether REG is a global user reg or has been specifed by
27107 -ffixed-REG. We should not restore these, and so cannot use
27108 lmw or out-of-line restore functions if there are any. We also
27109 can't save them (well, emit frame notes for them), because frame
27110 unwinding during exception handling will restore saved registers. */
27113 fixed_reg_p (int reg
)
27115 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
27116 backend sets it, overriding anything the user might have given. */
27117 if (reg
== RS6000_PIC_OFFSET_TABLE_REGNUM
27118 && ((DEFAULT_ABI
== ABI_V4
&& flag_pic
)
27119 || (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
27120 || (TARGET_TOC
&& TARGET_MINIMAL_TOC
)))
27123 return fixed_regs
[reg
];
27126 /* Determine the strategy for savings/restoring registers. */
27129 SAVE_MULTIPLE
= 0x1,
27130 SAVE_INLINE_GPRS
= 0x2,
27131 SAVE_INLINE_FPRS
= 0x4,
27132 SAVE_NOINLINE_GPRS_SAVES_LR
= 0x8,
27133 SAVE_NOINLINE_FPRS_SAVES_LR
= 0x10,
27134 SAVE_INLINE_VRS
= 0x20,
27135 REST_MULTIPLE
= 0x100,
27136 REST_INLINE_GPRS
= 0x200,
27137 REST_INLINE_FPRS
= 0x400,
27138 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
= 0x800,
27139 REST_INLINE_VRS
= 0x1000
27143 rs6000_savres_strategy (rs6000_stack_t
*info
,
27144 bool using_static_chain_p
)
27148 /* Select between in-line and out-of-line save and restore of regs.
27149 First, all the obvious cases where we don't use out-of-line. */
27150 if (crtl
->calls_eh_return
27151 || cfun
->machine
->ra_need_lr
)
27152 strategy
|= (SAVE_INLINE_FPRS
| REST_INLINE_FPRS
27153 | SAVE_INLINE_GPRS
| REST_INLINE_GPRS
27154 | SAVE_INLINE_VRS
| REST_INLINE_VRS
);
27156 if (info
->first_gp_reg_save
== 32)
27157 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27159 if (info
->first_fp_reg_save
== 64
27160 /* The out-of-line FP routines use double-precision stores;
27161 we can't use those routines if we don't have such stores. */
27162 || (TARGET_HARD_FLOAT
&& !TARGET_DOUBLE_FLOAT
))
27163 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
27165 if (info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1)
27166 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27168 /* Define cutoff for using out-of-line functions to save registers. */
27169 if (DEFAULT_ABI
== ABI_V4
|| TARGET_ELF
)
27171 if (!optimize_size
)
27173 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
27174 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27175 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27179 /* Prefer out-of-line restore if it will exit. */
27180 if (info
->first_fp_reg_save
> 61)
27181 strategy
|= SAVE_INLINE_FPRS
;
27182 if (info
->first_gp_reg_save
> 29)
27184 if (info
->first_fp_reg_save
== 64)
27185 strategy
|= SAVE_INLINE_GPRS
;
27187 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27189 if (info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
)
27190 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27193 else if (DEFAULT_ABI
== ABI_DARWIN
)
27195 if (info
->first_fp_reg_save
> 60)
27196 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
27197 if (info
->first_gp_reg_save
> 29)
27198 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27199 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27203 gcc_checking_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
27204 if ((flag_shrink_wrap_separate
&& optimize_function_for_speed_p (cfun
))
27205 || info
->first_fp_reg_save
> 61)
27206 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
27207 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27208 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27211 /* Don't bother to try to save things out-of-line if r11 is occupied
27212 by the static chain. It would require too much fiddling and the
27213 static chain is rarely used anyway. FPRs are saved w.r.t the stack
27214 pointer on Darwin, and AIX uses r1 or r12. */
27215 if (using_static_chain_p
27216 && (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
))
27217 strategy
|= ((DEFAULT_ABI
== ABI_DARWIN
? 0 : SAVE_INLINE_FPRS
)
27219 | SAVE_INLINE_VRS
);
27221 /* Saving CR interferes with the exit routines used on the SPE, so
27224 && info
->spe_64bit_regs_used
27225 && info
->cr_save_p
)
27226 strategy
|= REST_INLINE_GPRS
;
27228 /* We can only use the out-of-line routines to restore fprs if we've
27229 saved all the registers from first_fp_reg_save in the prologue.
27230 Otherwise, we risk loading garbage. Of course, if we have saved
27231 out-of-line then we know we haven't skipped any fprs. */
27232 if ((strategy
& SAVE_INLINE_FPRS
)
27233 && !(strategy
& REST_INLINE_FPRS
))
27237 for (i
= info
->first_fp_reg_save
; i
< 64; i
++)
27238 if (fixed_regs
[i
] || !save_reg_p (i
))
27240 strategy
|= REST_INLINE_FPRS
;
27245 /* Similarly, for altivec regs. */
27246 if ((strategy
& SAVE_INLINE_VRS
)
27247 && !(strategy
& REST_INLINE_VRS
))
27251 for (i
= info
->first_altivec_reg_save
; i
< LAST_ALTIVEC_REGNO
+ 1; i
++)
27252 if (fixed_regs
[i
] || !save_reg_p (i
))
27254 strategy
|= REST_INLINE_VRS
;
27259 /* info->lr_save_p isn't yet set if the only reason lr needs to be
27260 saved is an out-of-line save or restore. Set up the value for
27261 the next test (excluding out-of-line gprs). */
27262 bool lr_save_p
= (info
->lr_save_p
27263 || !(strategy
& SAVE_INLINE_FPRS
)
27264 || !(strategy
& SAVE_INLINE_VRS
)
27265 || !(strategy
& REST_INLINE_FPRS
)
27266 || !(strategy
& REST_INLINE_VRS
));
27268 if (TARGET_MULTIPLE
27269 && !TARGET_POWERPC64
27270 && !(TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
)
27271 && info
->first_gp_reg_save
< 31
27272 && !(flag_shrink_wrap
27273 && flag_shrink_wrap_separate
27274 && optimize_function_for_speed_p (cfun
)))
27276 /* Prefer store multiple for saves over out-of-line routines,
27277 since the store-multiple instruction will always be smaller. */
27278 strategy
|= SAVE_INLINE_GPRS
| SAVE_MULTIPLE
;
27280 /* The situation is more complicated with load multiple. We'd
27281 prefer to use the out-of-line routines for restores, since the
27282 "exit" out-of-line routines can handle the restore of LR and the
27283 frame teardown. However if doesn't make sense to use the
27284 out-of-line routine if that is the only reason we'd need to save
27285 LR, and we can't use the "exit" out-of-line gpr restore if we
27286 have saved some fprs; In those cases it is advantageous to use
27287 load multiple when available. */
27288 if (info
->first_fp_reg_save
!= 64 || !lr_save_p
)
27289 strategy
|= REST_INLINE_GPRS
| REST_MULTIPLE
;
27292 /* Using the "exit" out-of-line routine does not improve code size
27293 if using it would require lr to be saved and if only saving one
27295 else if (!lr_save_p
&& info
->first_gp_reg_save
> 29)
27296 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27298 /* We can only use load multiple or the out-of-line routines to
27299 restore gprs if we've saved all the registers from
27300 first_gp_reg_save. Otherwise, we risk loading garbage.
27301 Of course, if we have saved out-of-line or used stmw then we know
27302 we haven't skipped any gprs. */
27303 if ((strategy
& (SAVE_INLINE_GPRS
| SAVE_MULTIPLE
)) == SAVE_INLINE_GPRS
27304 && (strategy
& (REST_INLINE_GPRS
| REST_MULTIPLE
)) != REST_INLINE_GPRS
)
27308 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
27309 if (fixed_reg_p (i
) || !save_reg_p (i
))
27311 strategy
|= REST_INLINE_GPRS
;
27312 strategy
&= ~REST_MULTIPLE
;
27317 if (TARGET_ELF
&& TARGET_64BIT
)
27319 if (!(strategy
& SAVE_INLINE_FPRS
))
27320 strategy
|= SAVE_NOINLINE_FPRS_SAVES_LR
;
27321 else if (!(strategy
& SAVE_INLINE_GPRS
)
27322 && info
->first_fp_reg_save
== 64)
27323 strategy
|= SAVE_NOINLINE_GPRS_SAVES_LR
;
27325 else if (TARGET_AIX
&& !(strategy
& REST_INLINE_FPRS
))
27326 strategy
|= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
;
27328 if (TARGET_MACHO
&& !(strategy
& SAVE_INLINE_FPRS
))
27329 strategy
|= SAVE_NOINLINE_FPRS_SAVES_LR
;
27334 /* Calculate the stack information for the current function. This is
27335 complicated by having two separate calling sequences, the AIX calling
27336 sequence and the V.4 calling sequence.
27338 AIX (and Darwin/Mac OS X) stack frames look like:
27340 SP----> +---------------------------------------+
27341 | back chain to caller | 0 0
27342 +---------------------------------------+
27343 | saved CR | 4 8 (8-11)
27344 +---------------------------------------+
27346 +---------------------------------------+
27347 | reserved for compilers | 12 24
27348 +---------------------------------------+
27349 | reserved for binders | 16 32
27350 +---------------------------------------+
27351 | saved TOC pointer | 20 40
27352 +---------------------------------------+
27353 | Parameter save area (+padding*) (P) | 24 48
27354 +---------------------------------------+
27355 | Alloca space (A) | 24+P etc.
27356 +---------------------------------------+
27357 | Local variable space (L) | 24+P+A
27358 +---------------------------------------+
27359 | Float/int conversion temporary (X) | 24+P+A+L
27360 +---------------------------------------+
27361 | Save area for AltiVec registers (W) | 24+P+A+L+X
27362 +---------------------------------------+
27363 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
27364 +---------------------------------------+
27365 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
27366 +---------------------------------------+
27367 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
27368 +---------------------------------------+
27369 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
27370 +---------------------------------------+
27371 old SP->| back chain to caller's caller |
27372 +---------------------------------------+
27374 * If the alloca area is present, the parameter save area is
27375 padded so that the former starts 16-byte aligned.
27377 The required alignment for AIX configurations is two words (i.e., 8
27380 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
27382 SP----> +---------------------------------------+
27383 | Back chain to caller | 0
27384 +---------------------------------------+
27385 | Save area for CR | 8
27386 +---------------------------------------+
27388 +---------------------------------------+
27389 | Saved TOC pointer | 24
27390 +---------------------------------------+
27391 | Parameter save area (+padding*) (P) | 32
27392 +---------------------------------------+
27393 | Alloca space (A) | 32+P
27394 +---------------------------------------+
27395 | Local variable space (L) | 32+P+A
27396 +---------------------------------------+
27397 | Save area for AltiVec registers (W) | 32+P+A+L
27398 +---------------------------------------+
27399 | AltiVec alignment padding (Y) | 32+P+A+L+W
27400 +---------------------------------------+
27401 | Save area for GP registers (G) | 32+P+A+L+W+Y
27402 +---------------------------------------+
27403 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
27404 +---------------------------------------+
27405 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
27406 +---------------------------------------+
27408 * If the alloca area is present, the parameter save area is
27409 padded so that the former starts 16-byte aligned.
27411 V.4 stack frames look like:
27413 SP----> +---------------------------------------+
27414 | back chain to caller | 0
27415 +---------------------------------------+
27416 | caller's saved LR | 4
27417 +---------------------------------------+
27418 | Parameter save area (+padding*) (P) | 8
27419 +---------------------------------------+
27420 | Alloca space (A) | 8+P
27421 +---------------------------------------+
27422 | Varargs save area (V) | 8+P+A
27423 +---------------------------------------+
27424 | Local variable space (L) | 8+P+A+V
27425 +---------------------------------------+
27426 | Float/int conversion temporary (X) | 8+P+A+V+L
27427 +---------------------------------------+
27428 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
27429 +---------------------------------------+
27430 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
27431 +---------------------------------------+
27432 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
27433 +---------------------------------------+
27434 | SPE: area for 64-bit GP registers |
27435 +---------------------------------------+
27436 | SPE alignment padding |
27437 +---------------------------------------+
27438 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
27439 +---------------------------------------+
27440 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
27441 +---------------------------------------+
27442 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
27443 +---------------------------------------+
27444 old SP->| back chain to caller's caller |
27445 +---------------------------------------+
27447 * If the alloca area is present and the required alignment is
27448 16 bytes, the parameter save area is padded so that the
27449 alloca area starts 16-byte aligned.
27451 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27452 given. (But note below and in sysv4.h that we require only 8 and
27453 may round up the size of our stack frame anyways. The historical
27454 reason is early versions of powerpc-linux which didn't properly
27455 align the stack at program startup. A happy side-effect is that
27456 -mno-eabi libraries can be used with -meabi programs.)
27458 The EABI configuration defaults to the V.4 layout. However,
27459 the stack alignment requirements may differ. If -mno-eabi is not
27460 given, the required stack alignment is 8 bytes; if -mno-eabi is
27461 given, the required alignment is 16 bytes. (But see V.4 comment
27464 #ifndef ABI_STACK_BOUNDARY
27465 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27468 static rs6000_stack_t
*
27469 rs6000_stack_info (void)
27471 /* We should never be called for thunks, we are not set up for that. */
27472 gcc_assert (!cfun
->is_thunk
);
27474 rs6000_stack_t
*info
= &stack_info
;
27475 int reg_size
= TARGET_32BIT
? 4 : 8;
27480 HOST_WIDE_INT non_fixed_size
;
27481 bool using_static_chain_p
;
27483 if (reload_completed
&& info
->reload_completed
)
27486 memset (info
, 0, sizeof (*info
));
27487 info
->reload_completed
= reload_completed
;
27491 /* Cache value so we don't rescan instruction chain over and over. */
27492 if (cfun
->machine
->spe_insn_chain_scanned_p
== 0)
27493 cfun
->machine
->spe_insn_chain_scanned_p
27494 = spe_func_has_64bit_regs_p () + 1;
27495 info
->spe_64bit_regs_used
= cfun
->machine
->spe_insn_chain_scanned_p
- 1;
27498 /* Select which calling sequence. */
27499 info
->abi
= DEFAULT_ABI
;
27501 /* Calculate which registers need to be saved & save area size. */
27502 info
->first_gp_reg_save
= first_reg_to_save ();
27503 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27504 even if it currently looks like we won't. Reload may need it to
27505 get at a constant; if so, it will have already created a constant
27506 pool entry for it. */
27507 if (((TARGET_TOC
&& TARGET_MINIMAL_TOC
)
27508 || (flag_pic
== 1 && DEFAULT_ABI
== ABI_V4
)
27509 || (flag_pic
&& DEFAULT_ABI
== ABI_DARWIN
))
27510 && crtl
->uses_const_pool
27511 && info
->first_gp_reg_save
> RS6000_PIC_OFFSET_TABLE_REGNUM
)
27512 first_gp
= RS6000_PIC_OFFSET_TABLE_REGNUM
;
27514 first_gp
= info
->first_gp_reg_save
;
27516 info
->gp_size
= reg_size
* (32 - first_gp
);
27518 /* For the SPE, we have an additional upper 32-bits on each GPR.
27519 Ideally we should save the entire 64-bits only when the upper
27520 half is used in SIMD instructions. Since we only record
27521 registers live (not the size they are used in), this proves
27522 difficult because we'd have to traverse the instruction chain at
27523 the right time, taking reload into account. This is a real pain,
27524 so we opt to save the GPRs in 64-bits always if but one register
27525 gets used in 64-bits. Otherwise, all the registers in the frame
27526 get saved in 32-bits.
27528 So... since when we save all GPRs (except the SP) in 64-bits, the
27529 traditional GP save area will be empty. */
27530 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27533 info
->first_fp_reg_save
= first_fp_reg_to_save ();
27534 info
->fp_size
= 8 * (64 - info
->first_fp_reg_save
);
27536 info
->first_altivec_reg_save
= first_altivec_reg_to_save ();
27537 info
->altivec_size
= 16 * (LAST_ALTIVEC_REGNO
+ 1
27538 - info
->first_altivec_reg_save
);
27540 /* Does this function call anything? */
27541 info
->calls_p
= (!crtl
->is_leaf
|| cfun
->machine
->ra_needs_full_frame
);
27543 /* Determine if we need to save the condition code registers. */
27544 if (save_reg_p (CR2_REGNO
)
27545 || save_reg_p (CR3_REGNO
)
27546 || save_reg_p (CR4_REGNO
))
27548 info
->cr_save_p
= 1;
27549 if (DEFAULT_ABI
== ABI_V4
)
27550 info
->cr_size
= reg_size
;
27553 /* If the current function calls __builtin_eh_return, then we need
27554 to allocate stack space for registers that will hold data for
27555 the exception handler. */
27556 if (crtl
->calls_eh_return
)
27559 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
27562 /* SPE saves EH registers in 64-bits. */
27563 ehrd_size
= i
* (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0
27564 ? UNITS_PER_SPE_WORD
: UNITS_PER_WORD
);
27569 /* In the ELFv2 ABI, we also need to allocate space for separate
27570 CR field save areas if the function calls __builtin_eh_return. */
27571 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
27573 /* This hard-codes that we have three call-saved CR fields. */
27574 ehcr_size
= 3 * reg_size
;
27575 /* We do *not* use the regular CR save mechanism. */
27576 info
->cr_save_p
= 0;
27581 /* Determine various sizes. */
27582 info
->reg_size
= reg_size
;
27583 info
->fixed_size
= RS6000_SAVE_AREA
;
27584 info
->vars_size
= RS6000_ALIGN (get_frame_size (), 8);
27585 if (cfun
->calls_alloca
)
27587 RS6000_ALIGN (crtl
->outgoing_args_size
+ info
->fixed_size
,
27588 STACK_BOUNDARY
/ BITS_PER_UNIT
) - info
->fixed_size
;
27590 info
->parm_size
= RS6000_ALIGN (crtl
->outgoing_args_size
,
27591 TARGET_ALTIVEC
? 16 : 8);
27592 if (FRAME_GROWS_DOWNWARD
)
27594 += RS6000_ALIGN (info
->fixed_size
+ info
->vars_size
+ info
->parm_size
,
27595 ABI_STACK_BOUNDARY
/ BITS_PER_UNIT
)
27596 - (info
->fixed_size
+ info
->vars_size
+ info
->parm_size
);
27598 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27599 info
->spe_gp_size
= 8 * (32 - first_gp
);
27601 if (TARGET_ALTIVEC_ABI
)
27602 info
->vrsave_mask
= compute_vrsave_mask ();
27604 if (TARGET_ALTIVEC_VRSAVE
&& info
->vrsave_mask
)
27605 info
->vrsave_size
= 4;
27607 compute_save_world_info (info
);
27609 /* Calculate the offsets. */
27610 switch (DEFAULT_ABI
)
27614 gcc_unreachable ();
27619 info
->fp_save_offset
= -info
->fp_size
;
27620 info
->gp_save_offset
= info
->fp_save_offset
- info
->gp_size
;
27622 if (TARGET_ALTIVEC_ABI
)
27624 info
->vrsave_save_offset
= info
->gp_save_offset
- info
->vrsave_size
;
27626 /* Align stack so vector save area is on a quadword boundary.
27627 The padding goes above the vectors. */
27628 if (info
->altivec_size
!= 0)
27629 info
->altivec_padding_size
= info
->vrsave_save_offset
& 0xF;
27631 info
->altivec_save_offset
= info
->vrsave_save_offset
27632 - info
->altivec_padding_size
27633 - info
->altivec_size
;
27634 gcc_assert (info
->altivec_size
== 0
27635 || info
->altivec_save_offset
% 16 == 0);
27637 /* Adjust for AltiVec case. */
27638 info
->ehrd_offset
= info
->altivec_save_offset
- ehrd_size
;
27641 info
->ehrd_offset
= info
->gp_save_offset
- ehrd_size
;
27643 info
->ehcr_offset
= info
->ehrd_offset
- ehcr_size
;
27644 info
->cr_save_offset
= reg_size
; /* first word when 64-bit. */
27645 info
->lr_save_offset
= 2*reg_size
;
27649 info
->fp_save_offset
= -info
->fp_size
;
27650 info
->gp_save_offset
= info
->fp_save_offset
- info
->gp_size
;
27651 info
->cr_save_offset
= info
->gp_save_offset
- info
->cr_size
;
27653 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27655 /* Align stack so SPE GPR save area is aligned on a
27656 double-word boundary. */
27657 if (info
->spe_gp_size
!= 0 && info
->cr_save_offset
!= 0)
27658 info
->spe_padding_size
= 8 - (-info
->cr_save_offset
% 8);
27660 info
->spe_padding_size
= 0;
27662 info
->spe_gp_save_offset
= info
->cr_save_offset
27663 - info
->spe_padding_size
27664 - info
->spe_gp_size
;
27666 /* Adjust for SPE case. */
27667 info
->ehrd_offset
= info
->spe_gp_save_offset
;
27669 else if (TARGET_ALTIVEC_ABI
)
27671 info
->vrsave_save_offset
= info
->cr_save_offset
- info
->vrsave_size
;
27673 /* Align stack so vector save area is on a quadword boundary. */
27674 if (info
->altivec_size
!= 0)
27675 info
->altivec_padding_size
= 16 - (-info
->vrsave_save_offset
% 16);
27677 info
->altivec_save_offset
= info
->vrsave_save_offset
27678 - info
->altivec_padding_size
27679 - info
->altivec_size
;
27681 /* Adjust for AltiVec case. */
27682 info
->ehrd_offset
= info
->altivec_save_offset
;
27685 info
->ehrd_offset
= info
->cr_save_offset
;
27687 info
->ehrd_offset
-= ehrd_size
;
27688 info
->lr_save_offset
= reg_size
;
27691 save_align
= (TARGET_ALTIVEC_ABI
|| DEFAULT_ABI
== ABI_DARWIN
) ? 16 : 8;
27692 info
->save_size
= RS6000_ALIGN (info
->fp_size
27694 + info
->altivec_size
27695 + info
->altivec_padding_size
27696 + info
->spe_gp_size
27697 + info
->spe_padding_size
27701 + info
->vrsave_size
,
27704 non_fixed_size
= info
->vars_size
+ info
->parm_size
+ info
->save_size
;
27706 info
->total_size
= RS6000_ALIGN (non_fixed_size
+ info
->fixed_size
,
27707 ABI_STACK_BOUNDARY
/ BITS_PER_UNIT
);
27709 /* Determine if we need to save the link register. */
27711 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
27713 && !TARGET_PROFILE_KERNEL
)
27714 || (DEFAULT_ABI
== ABI_V4
&& cfun
->calls_alloca
)
27715 #ifdef TARGET_RELOCATABLE
27716 || (DEFAULT_ABI
== ABI_V4
27717 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
27718 && !constant_pool_empty_p ())
27720 || rs6000_ra_ever_killed ())
27721 info
->lr_save_p
= 1;
27723 using_static_chain_p
= (cfun
->static_chain_decl
!= NULL_TREE
27724 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
27725 && call_used_regs
[STATIC_CHAIN_REGNUM
]);
27726 info
->savres_strategy
= rs6000_savres_strategy (info
, using_static_chain_p
);
27728 if (!(info
->savres_strategy
& SAVE_INLINE_GPRS
)
27729 || !(info
->savres_strategy
& SAVE_INLINE_FPRS
)
27730 || !(info
->savres_strategy
& SAVE_INLINE_VRS
)
27731 || !(info
->savres_strategy
& REST_INLINE_GPRS
)
27732 || !(info
->savres_strategy
& REST_INLINE_FPRS
)
27733 || !(info
->savres_strategy
& REST_INLINE_VRS
))
27734 info
->lr_save_p
= 1;
27736 if (info
->lr_save_p
)
27737 df_set_regs_ever_live (LR_REGNO
, true);
27739 /* Determine if we need to allocate any stack frame:
27741 For AIX we need to push the stack if a frame pointer is needed
27742 (because the stack might be dynamically adjusted), if we are
27743 debugging, if we make calls, or if the sum of fp_save, gp_save,
27744 and local variables are more than the space needed to save all
27745 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27746 + 18*8 = 288 (GPR13 reserved).
27748 For V.4 we don't have the stack cushion that AIX uses, but assume
27749 that the debugger can handle stackless frames. */
27754 else if (DEFAULT_ABI
== ABI_V4
)
27755 info
->push_p
= non_fixed_size
!= 0;
27757 else if (frame_pointer_needed
)
27760 else if (TARGET_XCOFF
&& write_symbols
!= NO_DEBUG
)
27764 info
->push_p
= non_fixed_size
> (TARGET_32BIT
? 220 : 288);
27769 /* Return true if the current function uses any GPRs in 64-bit SIMD
27773 spe_func_has_64bit_regs_p (void)
27775 rtx_insn
*insns
, *insn
;
27777 /* Functions that save and restore all the call-saved registers will
27778 need to save/restore the registers in 64-bits. */
27779 if (crtl
->calls_eh_return
27780 || cfun
->calls_setjmp
27781 || crtl
->has_nonlocal_goto
)
27784 insns
= get_insns ();
27786 for (insn
= NEXT_INSN (insns
); insn
!= NULL_RTX
; insn
= NEXT_INSN (insn
))
27792 /* FIXME: This should be implemented with attributes...
27794 (set_attr "spe64" "true")....then,
27795 if (get_spe64(insn)) return true;
27797 It's the only reliable way to do the stuff below. */
27799 i
= PATTERN (insn
);
27800 if (GET_CODE (i
) == SET
)
27802 machine_mode mode
= GET_MODE (SET_SRC (i
));
27804 if (SPE_VECTOR_MODE (mode
))
27806 if (TARGET_E500_DOUBLE
27807 && (mode
== DFmode
|| FLOAT128_2REG_P (mode
)))
27817 debug_stack_info (rs6000_stack_t
*info
)
27819 const char *abi_string
;
27822 info
= rs6000_stack_info ();
27824 fprintf (stderr
, "\nStack information for function %s:\n",
27825 ((current_function_decl
&& DECL_NAME (current_function_decl
))
27826 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl
))
27831 default: abi_string
= "Unknown"; break;
27832 case ABI_NONE
: abi_string
= "NONE"; break;
27833 case ABI_AIX
: abi_string
= "AIX"; break;
27834 case ABI_ELFv2
: abi_string
= "ELFv2"; break;
27835 case ABI_DARWIN
: abi_string
= "Darwin"; break;
27836 case ABI_V4
: abi_string
= "V.4"; break;
27839 fprintf (stderr
, "\tABI = %5s\n", abi_string
);
27841 if (TARGET_ALTIVEC_ABI
)
27842 fprintf (stderr
, "\tALTIVEC ABI extensions enabled.\n");
27844 if (TARGET_SPE_ABI
)
27845 fprintf (stderr
, "\tSPE ABI extensions enabled.\n");
27847 if (info
->first_gp_reg_save
!= 32)
27848 fprintf (stderr
, "\tfirst_gp_reg_save = %5d\n", info
->first_gp_reg_save
);
27850 if (info
->first_fp_reg_save
!= 64)
27851 fprintf (stderr
, "\tfirst_fp_reg_save = %5d\n", info
->first_fp_reg_save
);
27853 if (info
->first_altivec_reg_save
<= LAST_ALTIVEC_REGNO
)
27854 fprintf (stderr
, "\tfirst_altivec_reg_save = %5d\n",
27855 info
->first_altivec_reg_save
);
27857 if (info
->lr_save_p
)
27858 fprintf (stderr
, "\tlr_save_p = %5d\n", info
->lr_save_p
);
27860 if (info
->cr_save_p
)
27861 fprintf (stderr
, "\tcr_save_p = %5d\n", info
->cr_save_p
);
27863 if (info
->vrsave_mask
)
27864 fprintf (stderr
, "\tvrsave_mask = 0x%x\n", info
->vrsave_mask
);
27867 fprintf (stderr
, "\tpush_p = %5d\n", info
->push_p
);
27870 fprintf (stderr
, "\tcalls_p = %5d\n", info
->calls_p
);
27873 fprintf (stderr
, "\tgp_save_offset = %5d\n", info
->gp_save_offset
);
27876 fprintf (stderr
, "\tfp_save_offset = %5d\n", info
->fp_save_offset
);
27878 if (info
->altivec_size
)
27879 fprintf (stderr
, "\taltivec_save_offset = %5d\n",
27880 info
->altivec_save_offset
);
27882 if (info
->spe_gp_size
)
27883 fprintf (stderr
, "\tspe_gp_save_offset = %5d\n",
27884 info
->spe_gp_save_offset
);
27886 if (info
->vrsave_size
)
27887 fprintf (stderr
, "\tvrsave_save_offset = %5d\n",
27888 info
->vrsave_save_offset
);
27890 if (info
->lr_save_p
)
27891 fprintf (stderr
, "\tlr_save_offset = %5d\n", info
->lr_save_offset
);
27893 if (info
->cr_save_p
)
27894 fprintf (stderr
, "\tcr_save_offset = %5d\n", info
->cr_save_offset
);
27896 if (info
->varargs_save_offset
)
27897 fprintf (stderr
, "\tvarargs_save_offset = %5d\n", info
->varargs_save_offset
);
27899 if (info
->total_size
)
27900 fprintf (stderr
, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC
"\n",
27903 if (info
->vars_size
)
27904 fprintf (stderr
, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC
"\n",
27907 if (info
->parm_size
)
27908 fprintf (stderr
, "\tparm_size = %5d\n", info
->parm_size
);
27910 if (info
->fixed_size
)
27911 fprintf (stderr
, "\tfixed_size = %5d\n", info
->fixed_size
);
27914 fprintf (stderr
, "\tgp_size = %5d\n", info
->gp_size
);
27916 if (info
->spe_gp_size
)
27917 fprintf (stderr
, "\tspe_gp_size = %5d\n", info
->spe_gp_size
);
27920 fprintf (stderr
, "\tfp_size = %5d\n", info
->fp_size
);
27922 if (info
->altivec_size
)
27923 fprintf (stderr
, "\taltivec_size = %5d\n", info
->altivec_size
);
27925 if (info
->vrsave_size
)
27926 fprintf (stderr
, "\tvrsave_size = %5d\n", info
->vrsave_size
);
27928 if (info
->altivec_padding_size
)
27929 fprintf (stderr
, "\taltivec_padding_size= %5d\n",
27930 info
->altivec_padding_size
);
27932 if (info
->spe_padding_size
)
27933 fprintf (stderr
, "\tspe_padding_size = %5d\n",
27934 info
->spe_padding_size
);
27937 fprintf (stderr
, "\tcr_size = %5d\n", info
->cr_size
);
27939 if (info
->save_size
)
27940 fprintf (stderr
, "\tsave_size = %5d\n", info
->save_size
);
27942 if (info
->reg_size
!= 4)
27943 fprintf (stderr
, "\treg_size = %5d\n", info
->reg_size
);
27945 fprintf (stderr
, "\tsave-strategy = %04x\n", info
->savres_strategy
);
27947 fprintf (stderr
, "\n");
27951 rs6000_return_addr (int count
, rtx frame
)
27953 /* Currently we don't optimize very well between prolog and body
27954 code and for PIC code the code can be actually quite bad, so
27955 don't try to be too clever here. */
27957 || ((DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
) && flag_pic
))
27959 cfun
->machine
->ra_needs_full_frame
= 1;
27966 plus_constant (Pmode
,
27968 (gen_rtx_MEM (Pmode
,
27969 memory_address (Pmode
, frame
))),
27970 RETURN_ADDRESS_OFFSET
)));
27973 cfun
->machine
->ra_need_lr
= 1;
27974 return get_hard_reg_initial_val (Pmode
, LR_REGNO
);
27977 /* Say whether a function is a candidate for sibcall handling or not. */
27980 rs6000_function_ok_for_sibcall (tree decl
, tree exp
)
27985 fntype
= TREE_TYPE (decl
);
27987 fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
27989 /* We can't do it if the called function has more vector parameters
27990 than the current function; there's nowhere to put the VRsave code. */
27991 if (TARGET_ALTIVEC_ABI
27992 && TARGET_ALTIVEC_VRSAVE
27993 && !(decl
&& decl
== current_function_decl
))
27995 function_args_iterator args_iter
;
27999 /* Functions with vector parameters are required to have a
28000 prototype, so the argument type info must be available
28002 FOREACH_FUNCTION_ARGS(fntype
, type
, args_iter
)
28003 if (TREE_CODE (type
) == VECTOR_TYPE
28004 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type
)))
28007 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl
), type
, args_iter
)
28008 if (TREE_CODE (type
) == VECTOR_TYPE
28009 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type
)))
28016 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
28017 functions, because the callee may have a different TOC pointer to
28018 the caller and there's no way to ensure we restore the TOC when
28019 we return. With the secure-plt SYSV ABI we can't make non-local
28020 calls when -fpic/PIC because the plt call stubs use r30. */
28021 if (DEFAULT_ABI
== ABI_DARWIN
28022 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
28024 && !DECL_EXTERNAL (decl
)
28025 && !DECL_WEAK (decl
)
28026 && (*targetm
.binds_local_p
) (decl
))
28027 || (DEFAULT_ABI
== ABI_V4
28028 && (!TARGET_SECURE_PLT
28031 && (*targetm
.binds_local_p
) (decl
)))))
28033 tree attr_list
= TYPE_ATTRIBUTES (fntype
);
28035 if (!lookup_attribute ("longcall", attr_list
)
28036 || lookup_attribute ("shortcall", attr_list
))
28044 rs6000_ra_ever_killed (void)
28050 if (cfun
->is_thunk
)
28053 if (cfun
->machine
->lr_save_state
)
28054 return cfun
->machine
->lr_save_state
- 1;
28056 /* regs_ever_live has LR marked as used if any sibcalls are present,
28057 but this should not force saving and restoring in the
28058 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
28059 clobbers LR, so that is inappropriate. */
28061 /* Also, the prologue can generate a store into LR that
28062 doesn't really count, like this:
28065 bcl to set PIC register
28069 When we're called from the epilogue, we need to avoid counting
28070 this as a store. */
28072 push_topmost_sequence ();
28073 top
= get_insns ();
28074 pop_topmost_sequence ();
28075 reg
= gen_rtx_REG (Pmode
, LR_REGNO
);
28077 for (insn
= NEXT_INSN (top
); insn
!= NULL_RTX
; insn
= NEXT_INSN (insn
))
28083 if (!SIBLING_CALL_P (insn
))
28086 else if (find_regno_note (insn
, REG_INC
, LR_REGNO
))
28088 else if (set_of (reg
, insn
) != NULL_RTX
28089 && !prologue_epilogue_contains (insn
))
28096 /* Emit instructions needed to load the TOC register.
28097 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
28098 a constant pool; or for SVR4 -fpic. */
28101 rs6000_emit_load_toc_table (int fromprolog
)
28104 dest
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
28106 if (TARGET_ELF
&& TARGET_SECURE_PLT
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
)
28109 rtx lab
, tmp1
, tmp2
, got
;
28111 lab
= gen_label_rtx ();
28112 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (lab
));
28113 lab
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
28116 got
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
28120 got
= rs6000_got_sym ();
28121 tmp1
= tmp2
= dest
;
28124 tmp1
= gen_reg_rtx (Pmode
);
28125 tmp2
= gen_reg_rtx (Pmode
);
28127 emit_insn (gen_load_toc_v4_PIC_1 (lab
));
28128 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
28129 emit_insn (gen_load_toc_v4_PIC_3b (tmp2
, tmp1
, got
, lab
));
28130 emit_insn (gen_load_toc_v4_PIC_3c (dest
, tmp2
, got
, lab
));
28132 else if (TARGET_ELF
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
28134 emit_insn (gen_load_toc_v4_pic_si ());
28135 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
28137 else if (TARGET_ELF
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
28140 rtx temp0
= (fromprolog
28141 ? gen_rtx_REG (Pmode
, 0)
28142 : gen_reg_rtx (Pmode
));
28148 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
28149 symF
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
28151 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCL", rs6000_pic_labelno
);
28152 symL
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
28154 emit_insn (gen_load_toc_v4_PIC_1 (symF
));
28155 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
28156 emit_insn (gen_load_toc_v4_PIC_2 (temp0
, dest
, symL
, symF
));
28162 tocsym
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
28164 lab
= gen_label_rtx ();
28165 emit_insn (gen_load_toc_v4_PIC_1b (tocsym
, lab
));
28166 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
28167 if (TARGET_LINK_STACK
)
28168 emit_insn (gen_addsi3 (dest
, dest
, GEN_INT (4)));
28169 emit_move_insn (temp0
, gen_rtx_MEM (Pmode
, dest
));
28171 emit_insn (gen_addsi3 (dest
, temp0
, dest
));
28173 else if (TARGET_ELF
&& !TARGET_AIX
&& flag_pic
== 0 && TARGET_MINIMAL_TOC
)
28175 /* This is for AIX code running in non-PIC ELF32. */
28176 rtx realsym
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
28179 emit_insn (gen_elf_high (dest
, realsym
));
28180 emit_insn (gen_elf_low (dest
, dest
, realsym
));
28184 gcc_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
28187 emit_insn (gen_load_toc_aix_si (dest
));
28189 emit_insn (gen_load_toc_aix_di (dest
));
28193 /* Emit instructions to restore the link register after determining where
28194 its value has been stored. */
28197 rs6000_emit_eh_reg_restore (rtx source
, rtx scratch
)
28199 rs6000_stack_t
*info
= rs6000_stack_info ();
28202 operands
[0] = source
;
28203 operands
[1] = scratch
;
28205 if (info
->lr_save_p
)
28207 rtx frame_rtx
= stack_pointer_rtx
;
28208 HOST_WIDE_INT sp_offset
= 0;
28211 if (frame_pointer_needed
28212 || cfun
->calls_alloca
28213 || info
->total_size
> 32767)
28215 tmp
= gen_frame_mem (Pmode
, frame_rtx
);
28216 emit_move_insn (operands
[1], tmp
);
28217 frame_rtx
= operands
[1];
28219 else if (info
->push_p
)
28220 sp_offset
= info
->total_size
;
28222 tmp
= plus_constant (Pmode
, frame_rtx
,
28223 info
->lr_save_offset
+ sp_offset
);
28224 tmp
= gen_frame_mem (Pmode
, tmp
);
28225 emit_move_insn (tmp
, operands
[0]);
28228 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNO
), operands
[0]);
28230 /* Freeze lr_save_p. We've just emitted rtl that depends on the
28231 state of lr_save_p so any change from here on would be a bug. In
28232 particular, stop rs6000_ra_ever_killed from considering the SET
28233 of lr we may have added just above. */
28234 cfun
->machine
->lr_save_state
= info
->lr_save_p
+ 1;
28237 static GTY(()) alias_set_type set
= -1;
28240 get_TOC_alias_set (void)
28243 set
= new_alias_set ();
28247 /* This returns nonzero if the current function uses the TOC. This is
28248 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
28249 is generated by the ABI_V4 load_toc_* patterns. */
28256 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
28259 rtx pat
= PATTERN (insn
);
28262 if (GET_CODE (pat
) == PARALLEL
)
28263 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
28265 rtx sub
= XVECEXP (pat
, 0, i
);
28266 if (GET_CODE (sub
) == USE
)
28268 sub
= XEXP (sub
, 0);
28269 if (GET_CODE (sub
) == UNSPEC
28270 && XINT (sub
, 1) == UNSPEC_TOC
)
28280 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
28282 rtx tocrel
, tocreg
, hi
;
28284 if (TARGET_DEBUG_ADDR
)
28286 if (GET_CODE (symbol
) == SYMBOL_REF
)
28287 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28291 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
28292 GET_RTX_NAME (GET_CODE (symbol
)));
28293 debug_rtx (symbol
);
28297 if (!can_create_pseudo_p ())
28298 df_set_regs_ever_live (TOC_REGISTER
, true);
28300 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
28301 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
28302 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
28305 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
28306 if (largetoc_reg
!= NULL
)
28308 emit_move_insn (largetoc_reg
, hi
);
28311 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
28314 /* Issue assembly directives that create a reference to the given DWARF
28315 FRAME_TABLE_LABEL from the current function section. */
28317 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label
)
28319 fprintf (asm_out_file
, "\t.ref %s\n",
28320 (* targetm
.strip_name_encoding
) (frame_table_label
));
28323 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28324 and the change to the stack pointer. */
28327 rs6000_emit_stack_tie (rtx fp
, bool hard_frame_needed
)
28334 regs
[i
++] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
28335 if (hard_frame_needed
)
28336 regs
[i
++] = gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
28337 if (!(REGNO (fp
) == STACK_POINTER_REGNUM
28338 || (hard_frame_needed
28339 && REGNO (fp
) == HARD_FRAME_POINTER_REGNUM
)))
28342 p
= rtvec_alloc (i
);
28345 rtx mem
= gen_frame_mem (BLKmode
, regs
[i
]);
28346 RTVEC_ELT (p
, i
) = gen_rtx_SET (mem
, const0_rtx
);
28349 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode
, p
)));
28352 /* Emit the correct code for allocating stack space, as insns.
28353 If COPY_REG, make sure a copy of the old frame is left there.
28354 The generated code may use hard register 0 as a temporary. */
28357 rs6000_emit_allocate_stack (HOST_WIDE_INT size
, rtx copy_reg
, int copy_off
)
28360 rtx stack_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
28361 rtx tmp_reg
= gen_rtx_REG (Pmode
, 0);
28362 rtx todec
= gen_int_mode (-size
, Pmode
);
28365 if (INTVAL (todec
) != -size
)
28367 warning (0, "stack frame too large");
28368 emit_insn (gen_trap ());
28372 if (crtl
->limit_stack
)
28374 if (REG_P (stack_limit_rtx
)
28375 && REGNO (stack_limit_rtx
) > 1
28376 && REGNO (stack_limit_rtx
) <= 31)
28378 emit_insn (gen_add3_insn (tmp_reg
, stack_limit_rtx
, GEN_INT (size
)));
28379 emit_insn (gen_cond_trap (LTU
, stack_reg
, tmp_reg
,
28382 else if (GET_CODE (stack_limit_rtx
) == SYMBOL_REF
28384 && DEFAULT_ABI
== ABI_V4
28387 rtx toload
= gen_rtx_CONST (VOIDmode
,
28388 gen_rtx_PLUS (Pmode
,
28392 emit_insn (gen_elf_high (tmp_reg
, toload
));
28393 emit_insn (gen_elf_low (tmp_reg
, tmp_reg
, toload
));
28394 emit_insn (gen_cond_trap (LTU
, stack_reg
, tmp_reg
,
28398 warning (0, "stack limit expression is not supported");
28404 emit_insn (gen_add3_insn (copy_reg
, stack_reg
, GEN_INT (copy_off
)));
28406 emit_move_insn (copy_reg
, stack_reg
);
28411 /* Need a note here so that try_split doesn't get confused. */
28412 if (get_last_insn () == NULL_RTX
)
28413 emit_note (NOTE_INSN_DELETED
);
28414 insn
= emit_move_insn (tmp_reg
, todec
);
28415 try_split (PATTERN (insn
), insn
, 0);
28419 insn
= emit_insn (TARGET_32BIT
28420 ? gen_movsi_update_stack (stack_reg
, stack_reg
,
28422 : gen_movdi_di_update_stack (stack_reg
, stack_reg
,
28423 todec
, stack_reg
));
28424 /* Since we didn't use gen_frame_mem to generate the MEM, grab
28425 it now and set the alias set/attributes. The above gen_*_update
28426 calls will generate a PARALLEL with the MEM set being the first
28428 par
= PATTERN (insn
);
28429 gcc_assert (GET_CODE (par
) == PARALLEL
);
28430 set
= XVECEXP (par
, 0, 0);
28431 gcc_assert (GET_CODE (set
) == SET
);
28432 mem
= SET_DEST (set
);
28433 gcc_assert (MEM_P (mem
));
28434 MEM_NOTRAP_P (mem
) = 1;
28435 set_mem_alias_set (mem
, get_frame_alias_set ());
28437 RTX_FRAME_RELATED_P (insn
) = 1;
28438 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
28439 gen_rtx_SET (stack_reg
, gen_rtx_PLUS (Pmode
, stack_reg
,
28440 GEN_INT (-size
))));
28444 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28446 #if PROBE_INTERVAL > 32768
28447 #error Cannot use indexed addressing mode for stack probing
28450 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28451 inclusive. These are offsets from the current stack pointer. */
28454 rs6000_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
28456 /* See if we have a constant small number of probes to generate. If so,
28457 that's the easy case. */
28458 if (first
+ size
<= 32768)
28462 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28463 it exceeds SIZE. If only one probe is needed, this will not
28464 generate any code. Then probe at FIRST + SIZE. */
28465 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
28466 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
28469 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
28473 /* Otherwise, do the same as above, but in a loop. Note that we must be
28474 extra careful with variables wrapping around because we might be at
28475 the very top (or the very bottom) of the address space and we have
28476 to be able to handle this case properly; in particular, we use an
28477 equality test for the loop condition. */
28480 HOST_WIDE_INT rounded_size
;
28481 rtx r12
= gen_rtx_REG (Pmode
, 12);
28482 rtx r0
= gen_rtx_REG (Pmode
, 0);
28484 /* Sanity check for the addressing mode we're going to use. */
28485 gcc_assert (first
<= 32768);
28487 /* Step 1: round SIZE to the previous multiple of the interval. */
28489 rounded_size
= ROUND_DOWN (size
, PROBE_INTERVAL
);
28492 /* Step 2: compute initial and final value of the loop counter. */
28494 /* TEST_ADDR = SP + FIRST. */
28495 emit_insn (gen_rtx_SET (r12
, plus_constant (Pmode
, stack_pointer_rtx
,
28498 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
28499 if (rounded_size
> 32768)
28501 emit_move_insn (r0
, GEN_INT (-rounded_size
));
28502 emit_insn (gen_rtx_SET (r0
, gen_rtx_PLUS (Pmode
, r12
, r0
)));
28505 emit_insn (gen_rtx_SET (r0
, plus_constant (Pmode
, r12
,
28509 /* Step 3: the loop
28513 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28516 while (TEST_ADDR != LAST_ADDR)
28518 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28519 until it is equal to ROUNDED_SIZE. */
28522 emit_insn (gen_probe_stack_rangedi (r12
, r12
, r0
));
28524 emit_insn (gen_probe_stack_rangesi (r12
, r12
, r0
));
28527 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28528 that SIZE is equal to ROUNDED_SIZE. */
28530 if (size
!= rounded_size
)
28531 emit_stack_probe (plus_constant (Pmode
, r12
, rounded_size
- size
));
28535 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
28536 absolute addresses. */
28539 output_probe_stack_range (rtx reg1
, rtx reg2
)
28541 static int labelno
= 0;
28545 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
28548 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
28550 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
28552 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
28553 output_asm_insn ("addi %0,%0,%1", xops
);
28555 /* Probe at TEST_ADDR. */
28556 xops
[1] = gen_rtx_REG (Pmode
, 0);
28557 output_asm_insn ("stw %1,0(%0)", xops
);
28559 /* Test if TEST_ADDR == LAST_ADDR. */
28562 output_asm_insn ("cmpd 0,%0,%1", xops
);
28564 output_asm_insn ("cmpw 0,%0,%1", xops
);
28567 fputs ("\tbne 0,", asm_out_file
);
28568 assemble_name_raw (asm_out_file
, loop_lab
);
28569 fputc ('\n', asm_out_file
);
28574 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28575 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28576 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
28577 deduce these equivalences by itself so it wasn't necessary to hold
28578 its hand so much. Don't be tempted to always supply d2_f_d_e with
28579 the actual cfa register, ie. r31 when we are using a hard frame
28580 pointer. That fails when saving regs off r1, and sched moves the
28581 r31 setup past the reg saves. */
28584 rs6000_frame_related (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT val
,
28585 rtx reg2
, rtx repl2
)
28589 if (REGNO (reg
) == STACK_POINTER_REGNUM
)
28591 gcc_checking_assert (val
== 0);
28595 repl
= gen_rtx_PLUS (Pmode
, gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
),
28598 rtx pat
= PATTERN (insn
);
28599 if (!repl
&& !reg2
)
28601 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
28602 if (GET_CODE (pat
) == PARALLEL
)
28603 for (int i
= 0; i
< XVECLEN (pat
, 0); i
++)
28604 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
28606 rtx set
= XVECEXP (pat
, 0, i
);
28608 /* If this PARALLEL has been emitted for out-of-line
28609 register save functions, or store multiple, then omit
28610 eh_frame info for any user-defined global regs. If
28611 eh_frame info is supplied, frame unwinding will
28612 restore a user reg. */
28613 if (!REG_P (SET_SRC (set
))
28614 || !fixed_reg_p (REGNO (SET_SRC (set
))))
28615 RTX_FRAME_RELATED_P (set
) = 1;
28617 RTX_FRAME_RELATED_P (insn
) = 1;
28621 /* We expect that 'pat' is either a SET or a PARALLEL containing
28622 SETs (and possibly other stuff). In a PARALLEL, all the SETs
28623 are important so they all have to be marked RTX_FRAME_RELATED_P.
28624 Call simplify_replace_rtx on the SETs rather than the whole insn
28625 so as to leave the other stuff alone (for example USE of r12). */
28627 set_used_flags (pat
);
28628 if (GET_CODE (pat
) == SET
)
28631 pat
= simplify_replace_rtx (pat
, reg
, repl
);
28633 pat
= simplify_replace_rtx (pat
, reg2
, repl2
);
28635 else if (GET_CODE (pat
) == PARALLEL
)
28637 pat
= shallow_copy_rtx (pat
);
28638 XVEC (pat
, 0) = shallow_copy_rtvec (XVEC (pat
, 0));
28640 for (int i
= 0; i
< XVECLEN (pat
, 0); i
++)
28641 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
28643 rtx set
= XVECEXP (pat
, 0, i
);
28646 set
= simplify_replace_rtx (set
, reg
, repl
);
28648 set
= simplify_replace_rtx (set
, reg2
, repl2
);
28649 XVECEXP (pat
, 0, i
) = set
;
28651 /* Omit eh_frame info for any user-defined global regs. */
28652 if (!REG_P (SET_SRC (set
))
28653 || !fixed_reg_p (REGNO (SET_SRC (set
))))
28654 RTX_FRAME_RELATED_P (set
) = 1;
28658 gcc_unreachable ();
28660 RTX_FRAME_RELATED_P (insn
) = 1;
28661 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, copy_rtx_if_shared (pat
));
28666 /* Returns an insn that has a vrsave set operation with the
28667 appropriate CLOBBERs. */
28670 generate_set_vrsave (rtx reg
, rs6000_stack_t
*info
, int epiloguep
)
28673 rtx insn
, clobs
[TOTAL_ALTIVEC_REGS
+ 1];
28674 rtx vrsave
= gen_rtx_REG (SImode
, VRSAVE_REGNO
);
28677 = gen_rtx_SET (vrsave
,
28678 gen_rtx_UNSPEC_VOLATILE (SImode
,
28679 gen_rtvec (2, reg
, vrsave
),
28680 UNSPECV_SET_VRSAVE
));
28684 /* We need to clobber the registers in the mask so the scheduler
28685 does not move sets to VRSAVE before sets of AltiVec registers.
28687 However, if the function receives nonlocal gotos, reload will set
28688 all call saved registers live. We will end up with:
28690 (set (reg 999) (mem))
28691 (parallel [ (set (reg vrsave) (unspec blah))
28692 (clobber (reg 999))])
28694 The clobber will cause the store into reg 999 to be dead, and
28695 flow will attempt to delete an epilogue insn. In this case, we
28696 need an unspec use/set of the register. */
28698 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
28699 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
28701 if (!epiloguep
|| call_used_regs
[i
])
28702 clobs
[nclobs
++] = gen_rtx_CLOBBER (VOIDmode
,
28703 gen_rtx_REG (V4SImode
, i
));
28706 rtx reg
= gen_rtx_REG (V4SImode
, i
);
28709 = gen_rtx_SET (reg
,
28710 gen_rtx_UNSPEC (V4SImode
,
28711 gen_rtvec (1, reg
), 27));
28715 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nclobs
));
28717 for (i
= 0; i
< nclobs
; ++i
)
28718 XVECEXP (insn
, 0, i
) = clobs
[i
];
28724 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
28728 addr
= gen_rtx_PLUS (Pmode
, frame_reg
, GEN_INT (offset
));
28729 mem
= gen_frame_mem (GET_MODE (reg
), addr
);
28730 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
28734 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
28736 return gen_frame_set (reg
, frame_reg
, offset
, false);
28740 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
28742 return gen_frame_set (reg
, frame_reg
, offset
, true);
28745 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28746 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28749 emit_frame_save (rtx frame_reg
, machine_mode mode
,
28750 unsigned int regno
, int offset
, HOST_WIDE_INT frame_reg_to_sp
)
28754 /* Some cases that need register indexed addressing. */
28755 gcc_checking_assert (!((TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
28756 || (TARGET_VSX
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
28757 || (TARGET_E500_DOUBLE
&& mode
== DFmode
)
28759 && SPE_VECTOR_MODE (mode
)
28760 && !SPE_CONST_OFFSET_OK (offset
))));
28762 reg
= gen_rtx_REG (mode
, regno
);
28763 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, frame_reg
, offset
));
28764 return rs6000_frame_related (insn
, frame_reg
, frame_reg_to_sp
,
28765 NULL_RTX
, NULL_RTX
);
28768 /* Emit an offset memory reference suitable for a frame store, while
28769 converting to a valid addressing mode. */
28772 gen_frame_mem_offset (machine_mode mode
, rtx reg
, int offset
)
28774 rtx int_rtx
, offset_rtx
;
28776 int_rtx
= GEN_INT (offset
);
28778 if ((TARGET_SPE_ABI
&& SPE_VECTOR_MODE (mode
) && !SPE_CONST_OFFSET_OK (offset
))
28779 || (TARGET_E500_DOUBLE
&& mode
== DFmode
))
28781 offset_rtx
= gen_rtx_REG (Pmode
, FIXED_SCRATCH
);
28782 emit_move_insn (offset_rtx
, int_rtx
);
28785 offset_rtx
= int_rtx
;
28787 return gen_frame_mem (mode
, gen_rtx_PLUS (Pmode
, reg
, offset_rtx
));
28790 #ifndef TARGET_FIX_AND_CONTINUE
28791 #define TARGET_FIX_AND_CONTINUE 0
28794 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28795 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28796 #define LAST_SAVRES_REGISTER 31
28797 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28808 static GTY(()) rtx savres_routine_syms
[N_SAVRES_REGISTERS
][12];
28810 /* Temporary holding space for an out-of-line register save/restore
28812 static char savres_routine_name
[30];
28814 /* Return the name for an out-of-line register save/restore routine.
28815 We are saving/restoring GPRs if GPR is true. */
28818 rs6000_savres_routine_name (rs6000_stack_t
*info
, int regno
, int sel
)
28820 const char *prefix
= "";
28821 const char *suffix
= "";
28823 /* Different targets are supposed to define
28824 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28825 routine name could be defined with:
28827 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28829 This is a nice idea in practice, but in reality, things are
28830 complicated in several ways:
28832 - ELF targets have save/restore routines for GPRs.
28834 - SPE targets use different prefixes for 32/64-bit registers, and
28835 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28837 - PPC64 ELF targets have routines for save/restore of GPRs that
28838 differ in what they do with the link register, so having a set
28839 prefix doesn't work. (We only use one of the save routines at
28840 the moment, though.)
28842 - PPC32 elf targets have "exit" versions of the restore routines
28843 that restore the link register and can save some extra space.
28844 These require an extra suffix. (There are also "tail" versions
28845 of the restore routines and "GOT" versions of the save routines,
28846 but we don't generate those at present. Same problems apply,
28849 We deal with all this by synthesizing our own prefix/suffix and
28850 using that for the simple sprintf call shown above. */
28853 /* No floating point saves on the SPE. */
28854 gcc_assert ((sel
& SAVRES_REG
) == SAVRES_GPR
);
28856 if ((sel
& SAVRES_SAVE
))
28857 prefix
= info
->spe_64bit_regs_used
? "_save64gpr_" : "_save32gpr_";
28859 prefix
= info
->spe_64bit_regs_used
? "_rest64gpr_" : "_rest32gpr_";
28861 if ((sel
& SAVRES_LR
))
28864 else if (DEFAULT_ABI
== ABI_V4
)
28869 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28870 prefix
= (sel
& SAVRES_SAVE
) ? "_savegpr_" : "_restgpr_";
28871 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28872 prefix
= (sel
& SAVRES_SAVE
) ? "_savefpr_" : "_restfpr_";
28873 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28874 prefix
= (sel
& SAVRES_SAVE
) ? "_savevr_" : "_restvr_";
28878 if ((sel
& SAVRES_LR
))
28881 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
28883 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28884 /* No out-of-line save/restore routines for GPRs on AIX. */
28885 gcc_assert (!TARGET_AIX
|| (sel
& SAVRES_REG
) != SAVRES_GPR
);
28889 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28890 prefix
= ((sel
& SAVRES_SAVE
)
28891 ? ((sel
& SAVRES_LR
) ? "_savegpr0_" : "_savegpr1_")
28892 : ((sel
& SAVRES_LR
) ? "_restgpr0_" : "_restgpr1_"));
28893 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28895 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
28896 if ((sel
& SAVRES_LR
))
28897 prefix
= ((sel
& SAVRES_SAVE
) ? "_savefpr_" : "_restfpr_");
28901 prefix
= (sel
& SAVRES_SAVE
) ? SAVE_FP_PREFIX
: RESTORE_FP_PREFIX
;
28902 suffix
= (sel
& SAVRES_SAVE
) ? SAVE_FP_SUFFIX
: RESTORE_FP_SUFFIX
;
28905 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28906 prefix
= (sel
& SAVRES_SAVE
) ? "_savevr_" : "_restvr_";
28911 if (DEFAULT_ABI
== ABI_DARWIN
)
28913 /* The Darwin approach is (slightly) different, in order to be
28914 compatible with code generated by the system toolchain. There is a
28915 single symbol for the start of save sequence, and the code here
28916 embeds an offset into that code on the basis of the first register
28918 prefix
= (sel
& SAVRES_SAVE
) ? "save" : "rest" ;
28919 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28920 sprintf (savres_routine_name
, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix
,
28921 ((sel
& SAVRES_LR
) ? "x" : ""), (regno
== 13 ? "" : "+"),
28922 (regno
- 13) * 4, prefix
, regno
);
28923 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28924 sprintf (savres_routine_name
, "*%sFP%s%.0d ; %s f%d-f31", prefix
,
28925 (regno
== 14 ? "" : "+"), (regno
- 14) * 4, prefix
, regno
);
28926 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28927 sprintf (savres_routine_name
, "*%sVEC%s%.0d ; %s v%d-v31", prefix
,
28928 (regno
== 20 ? "" : "+"), (regno
- 20) * 8, prefix
, regno
);
28933 sprintf (savres_routine_name
, "%s%d%s", prefix
, regno
, suffix
);
28935 return savres_routine_name
;
28938 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
28939 We are saving/restoring GPRs if GPR is true. */
28942 rs6000_savres_routine_sym (rs6000_stack_t
*info
, int sel
)
28944 int regno
= ((sel
& SAVRES_REG
) == SAVRES_GPR
28945 ? info
->first_gp_reg_save
28946 : (sel
& SAVRES_REG
) == SAVRES_FPR
28947 ? info
->first_fp_reg_save
- 32
28948 : (sel
& SAVRES_REG
) == SAVRES_VR
28949 ? info
->first_altivec_reg_save
- FIRST_ALTIVEC_REGNO
28954 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
28955 versions of the gpr routines. */
28956 if (TARGET_SPE_ABI
&& (sel
& SAVRES_REG
) == SAVRES_GPR
28957 && info
->spe_64bit_regs_used
)
28958 select
^= SAVRES_FPR
^ SAVRES_GPR
;
28960 /* Don't generate bogus routine names. */
28961 gcc_assert (FIRST_SAVRES_REGISTER
<= regno
28962 && regno
<= LAST_SAVRES_REGISTER
28963 && select
>= 0 && select
<= 12);
28965 sym
= savres_routine_syms
[regno
-FIRST_SAVRES_REGISTER
][select
];
28971 name
= rs6000_savres_routine_name (info
, regno
, sel
);
28973 sym
= savres_routine_syms
[regno
-FIRST_SAVRES_REGISTER
][select
]
28974 = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
28975 SYMBOL_REF_FLAGS (sym
) |= SYMBOL_FLAG_FUNCTION
;
28981 /* Emit a sequence of insns, including a stack tie if needed, for
28982 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
28983 reset the stack pointer, but move the base of the frame into
28984 reg UPDT_REGNO for use by out-of-line register restore routines. */
28987 rs6000_emit_stack_reset (rs6000_stack_t
*info
,
28988 rtx frame_reg_rtx
, HOST_WIDE_INT frame_off
,
28989 unsigned updt_regno
)
28991 /* If there is nothing to do, don't do anything. */
28992 if (frame_off
== 0 && REGNO (frame_reg_rtx
) == updt_regno
)
28995 rtx updt_reg_rtx
= gen_rtx_REG (Pmode
, updt_regno
);
28997 /* This blockage is needed so that sched doesn't decide to move
28998 the sp change before the register restores. */
28999 if (DEFAULT_ABI
== ABI_V4
29001 && info
->spe_64bit_regs_used
!= 0
29002 && info
->first_gp_reg_save
!= 32))
29003 return emit_insn (gen_stack_restore_tie (updt_reg_rtx
, frame_reg_rtx
,
29004 GEN_INT (frame_off
)));
29006 /* If we are restoring registers out-of-line, we will be using the
29007 "exit" variants of the restore routines, which will reset the
29008 stack for us. But we do need to point updt_reg into the
29009 right place for those routines. */
29010 if (frame_off
!= 0)
29011 return emit_insn (gen_add3_insn (updt_reg_rtx
,
29012 frame_reg_rtx
, GEN_INT (frame_off
)));
29014 return emit_move_insn (updt_reg_rtx
, frame_reg_rtx
);
29019 /* Return the register number used as a pointer by out-of-line
29020 save/restore functions. */
29022 static inline unsigned
29023 ptr_regno_for_savres (int sel
)
29025 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
29026 return (sel
& SAVRES_REG
) == SAVRES_FPR
|| (sel
& SAVRES_LR
) ? 1 : 12;
29027 return DEFAULT_ABI
== ABI_DARWIN
&& (sel
& SAVRES_REG
) == SAVRES_FPR
? 1 : 11;
29030 /* Construct a parallel rtx describing the effect of a call to an
29031 out-of-line register save/restore routine, and emit the insn
29032 or jump_insn as appropriate. */
29035 rs6000_emit_savres_rtx (rs6000_stack_t
*info
,
29036 rtx frame_reg_rtx
, int save_area_offset
, int lr_offset
,
29037 machine_mode reg_mode
, int sel
)
29040 int offset
, start_reg
, end_reg
, n_regs
, use_reg
;
29041 int reg_size
= GET_MODE_SIZE (reg_mode
);
29048 start_reg
= ((sel
& SAVRES_REG
) == SAVRES_GPR
29049 ? info
->first_gp_reg_save
29050 : (sel
& SAVRES_REG
) == SAVRES_FPR
29051 ? info
->first_fp_reg_save
29052 : (sel
& SAVRES_REG
) == SAVRES_VR
29053 ? info
->first_altivec_reg_save
29055 end_reg
= ((sel
& SAVRES_REG
) == SAVRES_GPR
29057 : (sel
& SAVRES_REG
) == SAVRES_FPR
29059 : (sel
& SAVRES_REG
) == SAVRES_VR
29060 ? LAST_ALTIVEC_REGNO
+ 1
29062 n_regs
= end_reg
- start_reg
;
29063 p
= rtvec_alloc (3 + ((sel
& SAVRES_LR
) ? 1 : 0)
29064 + ((sel
& SAVRES_REG
) == SAVRES_VR
? 1 : 0)
29067 if (!(sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
29068 RTVEC_ELT (p
, offset
++) = ret_rtx
;
29070 RTVEC_ELT (p
, offset
++)
29071 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
29073 sym
= rs6000_savres_routine_sym (info
, sel
);
29074 RTVEC_ELT (p
, offset
++) = gen_rtx_USE (VOIDmode
, sym
);
29076 use_reg
= ptr_regno_for_savres (sel
);
29077 if ((sel
& SAVRES_REG
) == SAVRES_VR
)
29079 /* Vector regs are saved/restored using [reg+reg] addressing. */
29080 RTVEC_ELT (p
, offset
++)
29081 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, use_reg
));
29082 RTVEC_ELT (p
, offset
++)
29083 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, 0));
29086 RTVEC_ELT (p
, offset
++)
29087 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, use_reg
));
29089 for (i
= 0; i
< end_reg
- start_reg
; i
++)
29090 RTVEC_ELT (p
, i
+ offset
)
29091 = gen_frame_set (gen_rtx_REG (reg_mode
, start_reg
+ i
),
29092 frame_reg_rtx
, save_area_offset
+ reg_size
* i
,
29093 (sel
& SAVRES_SAVE
) != 0);
29095 if ((sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
29096 RTVEC_ELT (p
, i
+ offset
)
29097 = gen_frame_store (gen_rtx_REG (Pmode
, 0), frame_reg_rtx
, lr_offset
);
29099 par
= gen_rtx_PARALLEL (VOIDmode
, p
);
29101 if (!(sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
29103 insn
= emit_jump_insn (par
);
29104 JUMP_LABEL (insn
) = ret_rtx
;
29107 insn
= emit_insn (par
);
29111 /* Emit code to store CR fields that need to be saved into REG. */
29114 rs6000_emit_move_from_cr (rtx reg
)
29116 /* Only the ELFv2 ABI allows storing only selected fields. */
29117 if (DEFAULT_ABI
== ABI_ELFv2
&& TARGET_MFCRF
)
29119 int i
, cr_reg
[8], count
= 0;
29121 /* Collect CR fields that must be saved. */
29122 for (i
= 0; i
< 8; i
++)
29123 if (save_reg_p (CR0_REGNO
+ i
))
29124 cr_reg
[count
++] = i
;
29126 /* If it's just a single one, use mfcrf. */
29129 rtvec p
= rtvec_alloc (1);
29130 rtvec r
= rtvec_alloc (2);
29131 RTVEC_ELT (r
, 0) = gen_rtx_REG (CCmode
, CR0_REGNO
+ cr_reg
[0]);
29132 RTVEC_ELT (r
, 1) = GEN_INT (1 << (7 - cr_reg
[0]));
29134 = gen_rtx_SET (reg
,
29135 gen_rtx_UNSPEC (SImode
, r
, UNSPEC_MOVESI_FROM_CR
));
29137 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
29141 /* ??? It might be better to handle count == 2 / 3 cases here
29142 as well, using logical operations to combine the values. */
29145 emit_insn (gen_movesi_from_cr (reg
));
29148 /* Return whether the split-stack arg pointer (r12) is used. */
29151 split_stack_arg_pointer_used_p (void)
29153 /* If the pseudo holding the arg pointer is no longer a pseudo,
29154 then the arg pointer is used. */
29155 if (cfun
->machine
->split_stack_arg_pointer
!= NULL_RTX
29156 && (!REG_P (cfun
->machine
->split_stack_arg_pointer
)
29157 || (REGNO (cfun
->machine
->split_stack_arg_pointer
)
29158 < FIRST_PSEUDO_REGISTER
)))
29161 /* Unfortunately we also need to do some code scanning, since
29162 r12 may have been substituted for the pseudo. */
29164 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
29165 FOR_BB_INSNS (bb
, insn
)
29166 if (NONDEBUG_INSN_P (insn
))
29168 /* A call destroys r12. */
29173 FOR_EACH_INSN_USE (use
, insn
)
29175 rtx x
= DF_REF_REG (use
);
29176 if (REG_P (x
) && REGNO (x
) == 12)
29180 FOR_EACH_INSN_DEF (def
, insn
)
29182 rtx x
= DF_REF_REG (def
);
29183 if (REG_P (x
) && REGNO (x
) == 12)
29187 return bitmap_bit_p (DF_LR_OUT (bb
), 12);
29190 /* Return whether we need to emit an ELFv2 global entry point prologue. */
29193 rs6000_global_entry_point_needed_p (void)
29195 /* Only needed for the ELFv2 ABI. */
29196 if (DEFAULT_ABI
!= ABI_ELFv2
)
29199 /* With -msingle-pic-base, we assume the whole program shares the same
29200 TOC, so no global entry point prologues are needed anywhere. */
29201 if (TARGET_SINGLE_PIC_BASE
)
29204 /* Ensure we have a global entry point for thunks. ??? We could
29205 avoid that if the target routine doesn't need a global entry point,
29206 but we do not know whether this is the case at this point. */
29207 if (cfun
->is_thunk
)
29210 /* For regular functions, rs6000_emit_prologue sets this flag if the
29211 routine ever uses the TOC pointer. */
29212 return cfun
->machine
->r2_setup_needed
;
29215 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
29217 rs6000_get_separate_components (void)
29219 rs6000_stack_t
*info
= rs6000_stack_info ();
29221 if (WORLD_SAVE_P (info
))
29224 if (TARGET_SPE_ABI
)
29227 gcc_assert (!(info
->savres_strategy
& SAVE_MULTIPLE
)
29228 && !(info
->savres_strategy
& REST_MULTIPLE
));
29230 /* Component 0 is the save/restore of LR (done via GPR0).
29231 Components 13..31 are the save/restore of GPR13..GPR31.
29232 Components 46..63 are the save/restore of FPR14..FPR31. */
29234 cfun
->machine
->n_components
= 64;
29236 sbitmap components
= sbitmap_alloc (cfun
->machine
->n_components
);
29237 bitmap_clear (components
);
29239 int reg_size
= TARGET_32BIT
? 4 : 8;
29240 int fp_reg_size
= 8;
29242 /* The GPRs we need saved to the frame. */
29243 if ((info
->savres_strategy
& SAVE_INLINE_GPRS
)
29244 && (info
->savres_strategy
& REST_INLINE_GPRS
))
29246 int offset
= info
->gp_save_offset
;
29248 offset
+= info
->total_size
;
29250 for (unsigned regno
= info
->first_gp_reg_save
; regno
< 32; regno
++)
29252 if (IN_RANGE (offset
, -0x8000, 0x7fff)
29253 && rs6000_reg_live_or_pic_offset_p (regno
))
29254 bitmap_set_bit (components
, regno
);
29256 offset
+= reg_size
;
29260 /* Don't mess with the hard frame pointer. */
29261 if (frame_pointer_needed
)
29262 bitmap_clear_bit (components
, HARD_FRAME_POINTER_REGNUM
);
29264 /* Don't mess with the fixed TOC register. */
29265 if ((TARGET_TOC
&& TARGET_MINIMAL_TOC
)
29266 || (flag_pic
== 1 && DEFAULT_ABI
== ABI_V4
)
29267 || (flag_pic
&& DEFAULT_ABI
== ABI_DARWIN
))
29268 bitmap_clear_bit (components
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
29270 /* The FPRs we need saved to the frame. */
29271 if ((info
->savres_strategy
& SAVE_INLINE_FPRS
)
29272 && (info
->savres_strategy
& REST_INLINE_FPRS
))
29274 int offset
= info
->fp_save_offset
;
29276 offset
+= info
->total_size
;
29278 for (unsigned regno
= info
->first_fp_reg_save
; regno
< 64; regno
++)
29280 if (IN_RANGE (offset
, -0x8000, 0x7fff) && save_reg_p (regno
))
29281 bitmap_set_bit (components
, regno
);
29283 offset
+= fp_reg_size
;
29287 /* Optimize LR save and restore if we can. This is component 0. Any
29288 out-of-line register save/restore routines need LR. */
29289 if (info
->lr_save_p
29290 && !(flag_pic
&& (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
))
29291 && (info
->savres_strategy
& SAVE_INLINE_GPRS
)
29292 && (info
->savres_strategy
& REST_INLINE_GPRS
)
29293 && (info
->savres_strategy
& SAVE_INLINE_FPRS
)
29294 && (info
->savres_strategy
& REST_INLINE_FPRS
)
29295 && (info
->savres_strategy
& SAVE_INLINE_VRS
)
29296 && (info
->savres_strategy
& REST_INLINE_VRS
))
29298 int offset
= info
->lr_save_offset
;
29300 offset
+= info
->total_size
;
29301 if (IN_RANGE (offset
, -0x8000, 0x7fff))
29302 bitmap_set_bit (components
, 0);
29308 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29310 rs6000_components_for_bb (basic_block bb
)
29312 rs6000_stack_t
*info
= rs6000_stack_info ();
29314 bitmap in
= DF_LIVE_IN (bb
);
29315 bitmap gen
= &DF_LIVE_BB_INFO (bb
)->gen
;
29316 bitmap kill
= &DF_LIVE_BB_INFO (bb
)->kill
;
29318 sbitmap components
= sbitmap_alloc (cfun
->machine
->n_components
);
29319 bitmap_clear (components
);
29321 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
29324 for (unsigned regno
= info
->first_gp_reg_save
; regno
< 32; regno
++)
29325 if (bitmap_bit_p (in
, regno
)
29326 || bitmap_bit_p (gen
, regno
)
29327 || bitmap_bit_p (kill
, regno
))
29328 bitmap_set_bit (components
, regno
);
29331 for (unsigned regno
= info
->first_fp_reg_save
; regno
< 64; regno
++)
29332 if (bitmap_bit_p (in
, regno
)
29333 || bitmap_bit_p (gen
, regno
)
29334 || bitmap_bit_p (kill
, regno
))
29335 bitmap_set_bit (components
, regno
);
29337 /* The link register. */
29338 if (bitmap_bit_p (in
, LR_REGNO
)
29339 || bitmap_bit_p (gen
, LR_REGNO
)
29340 || bitmap_bit_p (kill
, LR_REGNO
))
29341 bitmap_set_bit (components
, 0);
29346 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29348 rs6000_disqualify_components (sbitmap components
, edge e
,
29349 sbitmap edge_components
, bool /*is_prologue*/)
29351 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29352 live where we want to place that code. */
29353 if (bitmap_bit_p (edge_components
, 0)
29354 && bitmap_bit_p (DF_LIVE_IN (e
->dest
), 0))
29357 fprintf (dump_file
, "Disqualifying LR because GPR0 is live "
29358 "on entry to bb %d\n", e
->dest
->index
);
29359 bitmap_clear_bit (components
, 0);
29363 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29365 rs6000_emit_prologue_components (sbitmap components
)
29367 rs6000_stack_t
*info
= rs6000_stack_info ();
29368 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
29369 ? HARD_FRAME_POINTER_REGNUM
29370 : STACK_POINTER_REGNUM
);
29372 machine_mode reg_mode
= Pmode
;
29373 int reg_size
= TARGET_32BIT
? 4 : 8;
29374 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
29376 int fp_reg_size
= 8;
29378 /* Prologue for LR. */
29379 if (bitmap_bit_p (components
, 0))
29381 rtx reg
= gen_rtx_REG (reg_mode
, 0);
29382 rtx_insn
*insn
= emit_move_insn (reg
, gen_rtx_REG (reg_mode
, LR_REGNO
));
29383 RTX_FRAME_RELATED_P (insn
) = 1;
29384 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
29386 int offset
= info
->lr_save_offset
;
29388 offset
+= info
->total_size
;
29390 insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
29391 RTX_FRAME_RELATED_P (insn
) = 1;
29392 rtx lr
= gen_rtx_REG (reg_mode
, LR_REGNO
);
29393 rtx mem
= copy_rtx (SET_DEST (single_set (insn
)));
29394 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, lr
));
29397 /* Prologue for the GPRs. */
29398 int offset
= info
->gp_save_offset
;
29400 offset
+= info
->total_size
;
29402 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29404 if (bitmap_bit_p (components
, i
))
29406 rtx reg
= gen_rtx_REG (reg_mode
, i
);
29407 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
29408 RTX_FRAME_RELATED_P (insn
) = 1;
29409 rtx set
= copy_rtx (single_set (insn
));
29410 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
29413 offset
+= reg_size
;
29416 /* Prologue for the FPRs. */
29417 offset
= info
->fp_save_offset
;
29419 offset
+= info
->total_size
;
29421 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
29423 if (bitmap_bit_p (components
, i
))
29425 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
29426 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
29427 RTX_FRAME_RELATED_P (insn
) = 1;
29428 rtx set
= copy_rtx (single_set (insn
));
29429 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
29432 offset
+= fp_reg_size
;
29436 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29438 rs6000_emit_epilogue_components (sbitmap components
)
29440 rs6000_stack_t
*info
= rs6000_stack_info ();
29441 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
29442 ? HARD_FRAME_POINTER_REGNUM
29443 : STACK_POINTER_REGNUM
);
29445 machine_mode reg_mode
= Pmode
;
29446 int reg_size
= TARGET_32BIT
? 4 : 8;
29448 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
29450 int fp_reg_size
= 8;
29452 /* Epilogue for the FPRs. */
29453 int offset
= info
->fp_save_offset
;
29455 offset
+= info
->total_size
;
29457 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
29459 if (bitmap_bit_p (components
, i
))
29461 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
29462 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
29463 RTX_FRAME_RELATED_P (insn
) = 1;
29464 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
29467 offset
+= fp_reg_size
;
29470 /* Epilogue for the GPRs. */
29471 offset
= info
->gp_save_offset
;
29473 offset
+= info
->total_size
;
29475 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29477 if (bitmap_bit_p (components
, i
))
29479 rtx reg
= gen_rtx_REG (reg_mode
, i
);
29480 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
29481 RTX_FRAME_RELATED_P (insn
) = 1;
29482 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
29485 offset
+= reg_size
;
29488 /* Epilogue for LR. */
29489 if (bitmap_bit_p (components
, 0))
29491 int offset
= info
->lr_save_offset
;
29493 offset
+= info
->total_size
;
29495 rtx reg
= gen_rtx_REG (reg_mode
, 0);
29496 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
29498 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
29499 insn
= emit_move_insn (lr
, reg
);
29500 RTX_FRAME_RELATED_P (insn
) = 1;
29501 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
29505 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29507 rs6000_set_handled_components (sbitmap components
)
29509 rs6000_stack_t
*info
= rs6000_stack_info ();
29511 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29512 if (bitmap_bit_p (components
, i
))
29513 cfun
->machine
->gpr_is_wrapped_separately
[i
] = true;
29515 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
29516 if (bitmap_bit_p (components
, i
))
29517 cfun
->machine
->fpr_is_wrapped_separately
[i
- 32] = true;
29519 if (bitmap_bit_p (components
, 0))
29520 cfun
->machine
->lr_is_wrapped_separately
= true;
29523 /* Emit function prologue as insns. */
29526 rs6000_emit_prologue (void)
29528 rs6000_stack_t
*info
= rs6000_stack_info ();
29529 machine_mode reg_mode
= Pmode
;
29530 int reg_size
= TARGET_32BIT
? 4 : 8;
29531 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
29533 int fp_reg_size
= 8;
29534 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
29535 rtx frame_reg_rtx
= sp_reg_rtx
;
29536 unsigned int cr_save_regno
;
29537 rtx cr_save_rtx
= NULL_RTX
;
29540 int using_static_chain_p
= (cfun
->static_chain_decl
!= NULL_TREE
29541 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
29542 && call_used_regs
[STATIC_CHAIN_REGNUM
]);
29543 int using_split_stack
= (flag_split_stack
29544 && (lookup_attribute ("no_split_stack",
29545 DECL_ATTRIBUTES (cfun
->decl
))
29548 /* Offset to top of frame for frame_reg and sp respectively. */
29549 HOST_WIDE_INT frame_off
= 0;
29550 HOST_WIDE_INT sp_off
= 0;
29551 /* sp_adjust is the stack adjusting instruction, tracked so that the
29552 insn setting up the split-stack arg pointer can be emitted just
29553 prior to it, when r12 is not used here for other purposes. */
29554 rtx_insn
*sp_adjust
= 0;
29557 /* Track and check usage of r0, r11, r12. */
29558 int reg_inuse
= using_static_chain_p
? 1 << 11 : 0;
29559 #define START_USE(R) do \
29561 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29562 reg_inuse |= 1 << (R); \
29564 #define END_USE(R) do \
29566 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
29567 reg_inuse &= ~(1 << (R)); \
29569 #define NOT_INUSE(R) do \
29571 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29574 #define START_USE(R) do {} while (0)
29575 #define END_USE(R) do {} while (0)
29576 #define NOT_INUSE(R) do {} while (0)
29579 if (DEFAULT_ABI
== ABI_ELFv2
29580 && !TARGET_SINGLE_PIC_BASE
)
29582 cfun
->machine
->r2_setup_needed
= df_regs_ever_live_p (TOC_REGNUM
);
29584 /* With -mminimal-toc we may generate an extra use of r2 below. */
29585 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
29586 && !constant_pool_empty_p ())
29587 cfun
->machine
->r2_setup_needed
= true;
29591 if (flag_stack_usage_info
)
29592 current_function_static_stack_size
= info
->total_size
;
29594 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
29596 HOST_WIDE_INT size
= info
->total_size
;
29598 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
29600 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
29601 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT
,
29602 size
- STACK_CHECK_PROTECT
);
29605 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
29608 if (TARGET_FIX_AND_CONTINUE
)
29610 /* gdb on darwin arranges to forward a function from the old
29611 address by modifying the first 5 instructions of the function
29612 to branch to the overriding function. This is necessary to
29613 permit function pointers that point to the old function to
29614 actually forward to the new function. */
29615 emit_insn (gen_nop ());
29616 emit_insn (gen_nop ());
29617 emit_insn (gen_nop ());
29618 emit_insn (gen_nop ());
29619 emit_insn (gen_nop ());
29622 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
29624 reg_mode
= V2SImode
;
29628 /* Handle world saves specially here. */
29629 if (WORLD_SAVE_P (info
))
29636 /* save_world expects lr in r0. */
29637 reg0
= gen_rtx_REG (Pmode
, 0);
29638 if (info
->lr_save_p
)
29640 insn
= emit_move_insn (reg0
,
29641 gen_rtx_REG (Pmode
, LR_REGNO
));
29642 RTX_FRAME_RELATED_P (insn
) = 1;
29645 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29646 assumptions about the offsets of various bits of the stack
29648 gcc_assert (info
->gp_save_offset
== -220
29649 && info
->fp_save_offset
== -144
29650 && info
->lr_save_offset
== 8
29651 && info
->cr_save_offset
== 4
29654 && (!crtl
->calls_eh_return
29655 || info
->ehrd_offset
== -432)
29656 && info
->vrsave_save_offset
== -224
29657 && info
->altivec_save_offset
== -416);
29659 treg
= gen_rtx_REG (SImode
, 11);
29660 emit_move_insn (treg
, GEN_INT (-info
->total_size
));
29662 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29663 in R11. It also clobbers R12, so beware! */
29665 /* Preserve CR2 for save_world prologues */
29667 sz
+= 32 - info
->first_gp_reg_save
;
29668 sz
+= 64 - info
->first_fp_reg_save
;
29669 sz
+= LAST_ALTIVEC_REGNO
- info
->first_altivec_reg_save
+ 1;
29670 p
= rtvec_alloc (sz
);
29672 RTVEC_ELT (p
, j
++) = gen_rtx_CLOBBER (VOIDmode
,
29673 gen_rtx_REG (SImode
,
29675 RTVEC_ELT (p
, j
++) = gen_rtx_USE (VOIDmode
,
29676 gen_rtx_SYMBOL_REF (Pmode
,
29678 /* We do floats first so that the instruction pattern matches
29680 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
29682 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
29684 info
->first_fp_reg_save
+ i
),
29686 info
->fp_save_offset
+ frame_off
+ 8 * i
);
29687 for (i
= 0; info
->first_altivec_reg_save
+ i
<= LAST_ALTIVEC_REGNO
; i
++)
29689 = gen_frame_store (gen_rtx_REG (V4SImode
,
29690 info
->first_altivec_reg_save
+ i
),
29692 info
->altivec_save_offset
+ frame_off
+ 16 * i
);
29693 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
29695 = gen_frame_store (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
29697 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
29699 /* CR register traditionally saved as CR2. */
29701 = gen_frame_store (gen_rtx_REG (SImode
, CR2_REGNO
),
29702 frame_reg_rtx
, info
->cr_save_offset
+ frame_off
);
29703 /* Explain about use of R0. */
29704 if (info
->lr_save_p
)
29706 = gen_frame_store (reg0
,
29707 frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
29708 /* Explain what happens to the stack pointer. */
29710 rtx newval
= gen_rtx_PLUS (Pmode
, sp_reg_rtx
, treg
);
29711 RTVEC_ELT (p
, j
++) = gen_rtx_SET (sp_reg_rtx
, newval
);
29714 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
29715 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
29716 treg
, GEN_INT (-info
->total_size
));
29717 sp_off
= frame_off
= info
->total_size
;
29720 strategy
= info
->savres_strategy
;
29722 /* For V.4, update stack before we do any saving and set back pointer. */
29723 if (! WORLD_SAVE_P (info
)
29725 && (DEFAULT_ABI
== ABI_V4
29726 || crtl
->calls_eh_return
))
29728 bool need_r11
= (TARGET_SPE
29729 ? (!(strategy
& SAVE_INLINE_GPRS
)
29730 && info
->spe_64bit_regs_used
== 0)
29731 : (!(strategy
& SAVE_INLINE_FPRS
)
29732 || !(strategy
& SAVE_INLINE_GPRS
)
29733 || !(strategy
& SAVE_INLINE_VRS
)));
29734 int ptr_regno
= -1;
29735 rtx ptr_reg
= NULL_RTX
;
29738 if (info
->total_size
< 32767)
29739 frame_off
= info
->total_size
;
29742 else if (info
->cr_save_p
29744 || info
->first_fp_reg_save
< 64
29745 || info
->first_gp_reg_save
< 32
29746 || info
->altivec_size
!= 0
29747 || info
->vrsave_size
!= 0
29748 || crtl
->calls_eh_return
)
29752 /* The prologue won't be saving any regs so there is no need
29753 to set up a frame register to access any frame save area.
29754 We also won't be using frame_off anywhere below, but set
29755 the correct value anyway to protect against future
29756 changes to this function. */
29757 frame_off
= info
->total_size
;
29759 if (ptr_regno
!= -1)
29761 /* Set up the frame offset to that needed by the first
29762 out-of-line save function. */
29763 START_USE (ptr_regno
);
29764 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29765 frame_reg_rtx
= ptr_reg
;
29766 if (!(strategy
& SAVE_INLINE_FPRS
) && info
->fp_size
!= 0)
29767 gcc_checking_assert (info
->fp_save_offset
+ info
->fp_size
== 0);
29768 else if (!(strategy
& SAVE_INLINE_GPRS
) && info
->first_gp_reg_save
< 32)
29769 ptr_off
= info
->gp_save_offset
+ info
->gp_size
;
29770 else if (!(strategy
& SAVE_INLINE_VRS
) && info
->altivec_size
!= 0)
29771 ptr_off
= info
->altivec_save_offset
+ info
->altivec_size
;
29772 frame_off
= -ptr_off
;
29774 sp_adjust
= rs6000_emit_allocate_stack (info
->total_size
,
29776 if (REGNO (frame_reg_rtx
) == 12)
29778 sp_off
= info
->total_size
;
29779 if (frame_reg_rtx
!= sp_reg_rtx
)
29780 rs6000_emit_stack_tie (frame_reg_rtx
, false);
29783 /* If we use the link register, get it into r0. */
29784 if (!WORLD_SAVE_P (info
) && info
->lr_save_p
29785 && !cfun
->machine
->lr_is_wrapped_separately
)
29787 rtx addr
, reg
, mem
;
29789 reg
= gen_rtx_REG (Pmode
, 0);
29791 insn
= emit_move_insn (reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
29792 RTX_FRAME_RELATED_P (insn
) = 1;
29794 if (!(strategy
& (SAVE_NOINLINE_GPRS_SAVES_LR
29795 | SAVE_NOINLINE_FPRS_SAVES_LR
)))
29797 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
29798 GEN_INT (info
->lr_save_offset
+ frame_off
));
29799 mem
= gen_rtx_MEM (Pmode
, addr
);
29800 /* This should not be of rs6000_sr_alias_set, because of
29801 __builtin_return_address. */
29803 insn
= emit_move_insn (mem
, reg
);
29804 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
29805 NULL_RTX
, NULL_RTX
);
29810 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29811 r12 will be needed by out-of-line gpr restore. */
29812 cr_save_regno
= ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
29813 && !(strategy
& (SAVE_INLINE_GPRS
29814 | SAVE_NOINLINE_GPRS_SAVES_LR
))
29816 if (!WORLD_SAVE_P (info
)
29818 && REGNO (frame_reg_rtx
) != cr_save_regno
29819 && !(using_static_chain_p
&& cr_save_regno
== 11)
29820 && !(using_split_stack
&& cr_save_regno
== 12 && sp_adjust
))
29822 cr_save_rtx
= gen_rtx_REG (SImode
, cr_save_regno
);
29823 START_USE (cr_save_regno
);
29824 rs6000_emit_move_from_cr (cr_save_rtx
);
29827 /* Do any required saving of fpr's. If only one or two to save, do
29828 it ourselves. Otherwise, call function. */
29829 if (!WORLD_SAVE_P (info
) && (strategy
& SAVE_INLINE_FPRS
))
29831 int offset
= info
->fp_save_offset
+ frame_off
;
29832 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
29835 && !cfun
->machine
->fpr_is_wrapped_separately
[i
- 32])
29836 emit_frame_save (frame_reg_rtx
, fp_reg_mode
, i
, offset
,
29837 sp_off
- frame_off
);
29839 offset
+= fp_reg_size
;
29842 else if (!WORLD_SAVE_P (info
) && info
->first_fp_reg_save
!= 64)
29844 bool lr
= (strategy
& SAVE_NOINLINE_FPRS_SAVES_LR
) != 0;
29845 int sel
= SAVRES_SAVE
| SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
29846 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
29847 rtx ptr_reg
= frame_reg_rtx
;
29849 if (REGNO (frame_reg_rtx
) == ptr_regno
)
29850 gcc_checking_assert (frame_off
== 0);
29853 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29854 NOT_INUSE (ptr_regno
);
29855 emit_insn (gen_add3_insn (ptr_reg
,
29856 frame_reg_rtx
, GEN_INT (frame_off
)));
29858 insn
= rs6000_emit_savres_rtx (info
, ptr_reg
,
29859 info
->fp_save_offset
,
29860 info
->lr_save_offset
,
29862 rs6000_frame_related (insn
, ptr_reg
, sp_off
,
29863 NULL_RTX
, NULL_RTX
);
29868 /* Save GPRs. This is done as a PARALLEL if we are using
29869 the store-multiple instructions. */
29870 if (!WORLD_SAVE_P (info
)
29872 && info
->spe_64bit_regs_used
!= 0
29873 && info
->first_gp_reg_save
!= 32)
29876 rtx spe_save_area_ptr
;
29877 HOST_WIDE_INT save_off
;
29878 int ool_adjust
= 0;
29880 /* Determine whether we can address all of the registers that need
29881 to be saved with an offset from frame_reg_rtx that fits in
29882 the small const field for SPE memory instructions. */
29883 int spe_regs_addressable
29884 = (SPE_CONST_OFFSET_OK (info
->spe_gp_save_offset
+ frame_off
29885 + reg_size
* (32 - info
->first_gp_reg_save
- 1))
29886 && (strategy
& SAVE_INLINE_GPRS
));
29888 if (spe_regs_addressable
)
29890 spe_save_area_ptr
= frame_reg_rtx
;
29891 save_off
= frame_off
;
29895 /* Make r11 point to the start of the SPE save area. We need
29896 to be careful here if r11 is holding the static chain. If
29897 it is, then temporarily save it in r0. */
29898 HOST_WIDE_INT offset
;
29900 if (!(strategy
& SAVE_INLINE_GPRS
))
29901 ool_adjust
= 8 * (info
->first_gp_reg_save
- FIRST_SAVED_GP_REGNO
);
29902 offset
= info
->spe_gp_save_offset
+ frame_off
- ool_adjust
;
29903 spe_save_area_ptr
= gen_rtx_REG (Pmode
, 11);
29904 save_off
= frame_off
- offset
;
29906 if (using_static_chain_p
)
29908 rtx r0
= gen_rtx_REG (Pmode
, 0);
29911 gcc_assert (info
->first_gp_reg_save
> 11);
29913 emit_move_insn (r0
, spe_save_area_ptr
);
29915 else if (REGNO (frame_reg_rtx
) != 11)
29918 emit_insn (gen_addsi3 (spe_save_area_ptr
,
29919 frame_reg_rtx
, GEN_INT (offset
)));
29920 if (!using_static_chain_p
&& REGNO (frame_reg_rtx
) == 11)
29921 frame_off
= -info
->spe_gp_save_offset
+ ool_adjust
;
29924 if ((strategy
& SAVE_INLINE_GPRS
))
29926 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
29927 if (rs6000_reg_live_or_pic_offset_p (info
->first_gp_reg_save
+ i
))
29928 emit_frame_save (spe_save_area_ptr
, reg_mode
,
29929 info
->first_gp_reg_save
+ i
,
29930 (info
->spe_gp_save_offset
+ save_off
29932 sp_off
- save_off
);
29936 insn
= rs6000_emit_savres_rtx (info
, spe_save_area_ptr
,
29937 info
->spe_gp_save_offset
+ save_off
,
29939 SAVRES_SAVE
| SAVRES_GPR
);
29941 rs6000_frame_related (insn
, spe_save_area_ptr
, sp_off
- save_off
,
29942 NULL_RTX
, NULL_RTX
);
29945 /* Move the static chain pointer back. */
29946 if (!spe_regs_addressable
)
29948 if (using_static_chain_p
)
29950 emit_move_insn (spe_save_area_ptr
, gen_rtx_REG (Pmode
, 0));
29953 else if (REGNO (frame_reg_rtx
) != 11)
29957 else if (!WORLD_SAVE_P (info
) && !(strategy
& SAVE_INLINE_GPRS
))
29959 bool lr
= (strategy
& SAVE_NOINLINE_GPRS_SAVES_LR
) != 0;
29960 int sel
= SAVRES_SAVE
| SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
29961 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
29962 rtx ptr_reg
= frame_reg_rtx
;
29963 bool ptr_set_up
= REGNO (ptr_reg
) == ptr_regno
;
29964 int end_save
= info
->gp_save_offset
+ info
->gp_size
;
29967 if (ptr_regno
== 12)
29970 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29972 /* Need to adjust r11 (r12) if we saved any FPRs. */
29973 if (end_save
+ frame_off
!= 0)
29975 rtx offset
= GEN_INT (end_save
+ frame_off
);
29978 frame_off
= -end_save
;
29980 NOT_INUSE (ptr_regno
);
29981 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
29983 else if (!ptr_set_up
)
29985 NOT_INUSE (ptr_regno
);
29986 emit_move_insn (ptr_reg
, frame_reg_rtx
);
29988 ptr_off
= -end_save
;
29989 insn
= rs6000_emit_savres_rtx (info
, ptr_reg
,
29990 info
->gp_save_offset
+ ptr_off
,
29991 info
->lr_save_offset
+ ptr_off
,
29993 rs6000_frame_related (insn
, ptr_reg
, sp_off
- ptr_off
,
29994 NULL_RTX
, NULL_RTX
);
29998 else if (!WORLD_SAVE_P (info
) && (strategy
& SAVE_MULTIPLE
))
30002 p
= rtvec_alloc (32 - info
->first_gp_reg_save
);
30003 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
30005 = gen_frame_store (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
30007 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
30008 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
30009 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
30010 NULL_RTX
, NULL_RTX
);
30012 else if (!WORLD_SAVE_P (info
))
30014 int offset
= info
->gp_save_offset
+ frame_off
;
30015 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
30017 if (rs6000_reg_live_or_pic_offset_p (i
)
30018 && !cfun
->machine
->gpr_is_wrapped_separately
[i
])
30019 emit_frame_save (frame_reg_rtx
, reg_mode
, i
, offset
,
30020 sp_off
- frame_off
);
30022 offset
+= reg_size
;
30026 if (crtl
->calls_eh_return
)
30033 unsigned int regno
= EH_RETURN_DATA_REGNO (i
);
30034 if (regno
== INVALID_REGNUM
)
30038 p
= rtvec_alloc (i
);
30042 unsigned int regno
= EH_RETURN_DATA_REGNO (i
);
30043 if (regno
== INVALID_REGNUM
)
30047 = gen_frame_store (gen_rtx_REG (reg_mode
, regno
),
30049 info
->ehrd_offset
+ sp_off
+ reg_size
* (int) i
);
30050 RTVEC_ELT (p
, i
) = set
;
30051 RTX_FRAME_RELATED_P (set
) = 1;
30054 insn
= emit_insn (gen_blockage ());
30055 RTX_FRAME_RELATED_P (insn
) = 1;
30056 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, gen_rtx_PARALLEL (VOIDmode
, p
));
30059 /* In AIX ABI we need to make sure r2 is really saved. */
30060 if (TARGET_AIX
&& crtl
->calls_eh_return
)
30062 rtx tmp_reg
, tmp_reg_si
, hi
, lo
, compare_result
, toc_save_done
, jump
;
30063 rtx join_insn
, note
;
30064 rtx_insn
*save_insn
;
30065 long toc_restore_insn
;
30067 tmp_reg
= gen_rtx_REG (Pmode
, 11);
30068 tmp_reg_si
= gen_rtx_REG (SImode
, 11);
30069 if (using_static_chain_p
)
30072 emit_move_insn (gen_rtx_REG (Pmode
, 0), tmp_reg
);
30076 emit_move_insn (tmp_reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
30077 /* Peek at instruction to which this function returns. If it's
30078 restoring r2, then we know we've already saved r2. We can't
30079 unconditionally save r2 because the value we have will already
30080 be updated if we arrived at this function via a plt call or
30081 toc adjusting stub. */
30082 emit_move_insn (tmp_reg_si
, gen_rtx_MEM (SImode
, tmp_reg
));
30083 toc_restore_insn
= ((TARGET_32BIT
? 0x80410000 : 0xE8410000)
30084 + RS6000_TOC_SAVE_SLOT
);
30085 hi
= gen_int_mode (toc_restore_insn
& ~0xffff, SImode
);
30086 emit_insn (gen_xorsi3 (tmp_reg_si
, tmp_reg_si
, hi
));
30087 compare_result
= gen_rtx_REG (CCUNSmode
, CR0_REGNO
);
30088 validate_condition_mode (EQ
, CCUNSmode
);
30089 lo
= gen_int_mode (toc_restore_insn
& 0xffff, SImode
);
30090 emit_insn (gen_rtx_SET (compare_result
,
30091 gen_rtx_COMPARE (CCUNSmode
, tmp_reg_si
, lo
)));
30092 toc_save_done
= gen_label_rtx ();
30093 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
30094 gen_rtx_EQ (VOIDmode
, compare_result
,
30096 gen_rtx_LABEL_REF (VOIDmode
, toc_save_done
),
30098 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
30099 JUMP_LABEL (jump
) = toc_save_done
;
30100 LABEL_NUSES (toc_save_done
) += 1;
30102 save_insn
= emit_frame_save (frame_reg_rtx
, reg_mode
,
30103 TOC_REGNUM
, frame_off
+ RS6000_TOC_SAVE_SLOT
,
30104 sp_off
- frame_off
);
30106 emit_label (toc_save_done
);
30108 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
30109 have a CFG that has different saves along different paths.
30110 Move the note to a dummy blockage insn, which describes that
30111 R2 is unconditionally saved after the label. */
30112 /* ??? An alternate representation might be a special insn pattern
30113 containing both the branch and the store. That might let the
30114 code that minimizes the number of DW_CFA_advance opcodes better
30115 freedom in placing the annotations. */
30116 note
= find_reg_note (save_insn
, REG_FRAME_RELATED_EXPR
, NULL
);
30118 remove_note (save_insn
, note
);
30120 note
= alloc_reg_note (REG_FRAME_RELATED_EXPR
,
30121 copy_rtx (PATTERN (save_insn
)), NULL_RTX
);
30122 RTX_FRAME_RELATED_P (save_insn
) = 0;
30124 join_insn
= emit_insn (gen_blockage ());
30125 REG_NOTES (join_insn
) = note
;
30126 RTX_FRAME_RELATED_P (join_insn
) = 1;
30128 if (using_static_chain_p
)
30130 emit_move_insn (tmp_reg
, gen_rtx_REG (Pmode
, 0));
30137 /* Save CR if we use any that must be preserved. */
30138 if (!WORLD_SAVE_P (info
) && info
->cr_save_p
)
30140 rtx addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
30141 GEN_INT (info
->cr_save_offset
+ frame_off
));
30142 rtx mem
= gen_frame_mem (SImode
, addr
);
30144 /* If we didn't copy cr before, do so now using r0. */
30145 if (cr_save_rtx
== NULL_RTX
)
30148 cr_save_rtx
= gen_rtx_REG (SImode
, 0);
30149 rs6000_emit_move_from_cr (cr_save_rtx
);
30152 /* Saving CR requires a two-instruction sequence: one instruction
30153 to move the CR to a general-purpose register, and a second
30154 instruction that stores the GPR to memory.
30156 We do not emit any DWARF CFI records for the first of these,
30157 because we cannot properly represent the fact that CR is saved in
30158 a register. One reason is that we cannot express that multiple
30159 CR fields are saved; another reason is that on 64-bit, the size
30160 of the CR register in DWARF (4 bytes) differs from the size of
30161 a general-purpose register.
30163 This means if any intervening instruction were to clobber one of
30164 the call-saved CR fields, we'd have incorrect CFI. To prevent
30165 this from happening, we mark the store to memory as a use of
30166 those CR fields, which prevents any such instruction from being
30167 scheduled in between the two instructions. */
30172 crsave_v
[n_crsave
++] = gen_rtx_SET (mem
, cr_save_rtx
);
30173 for (i
= 0; i
< 8; i
++)
30174 if (save_reg_p (CR0_REGNO
+ i
))
30175 crsave_v
[n_crsave
++]
30176 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (CCmode
, CR0_REGNO
+ i
));
30178 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
,
30179 gen_rtvec_v (n_crsave
, crsave_v
)));
30180 END_USE (REGNO (cr_save_rtx
));
30182 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
30183 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
30184 so we need to construct a frame expression manually. */
30185 RTX_FRAME_RELATED_P (insn
) = 1;
30187 /* Update address to be stack-pointer relative, like
30188 rs6000_frame_related would do. */
30189 addr
= gen_rtx_PLUS (Pmode
, gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
),
30190 GEN_INT (info
->cr_save_offset
+ sp_off
));
30191 mem
= gen_frame_mem (SImode
, addr
);
30193 if (DEFAULT_ABI
== ABI_ELFv2
)
30195 /* In the ELFv2 ABI we generate separate CFI records for each
30196 CR field that was actually saved. They all point to the
30197 same 32-bit stack slot. */
30201 for (i
= 0; i
< 8; i
++)
30202 if (save_reg_p (CR0_REGNO
+ i
))
30205 = gen_rtx_SET (mem
, gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
30207 RTX_FRAME_RELATED_P (crframe
[n_crframe
]) = 1;
30211 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
30212 gen_rtx_PARALLEL (VOIDmode
,
30213 gen_rtvec_v (n_crframe
, crframe
)));
30217 /* In other ABIs, by convention, we use a single CR regnum to
30218 represent the fact that all call-saved CR fields are saved.
30219 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
30220 rtx set
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, CR2_REGNO
));
30221 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, set
);
30225 /* In the ELFv2 ABI we need to save all call-saved CR fields into
30226 *separate* slots if the routine calls __builtin_eh_return, so
30227 that they can be independently restored by the unwinder. */
30228 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
30230 int i
, cr_off
= info
->ehcr_offset
;
30233 /* ??? We might get better performance by using multiple mfocrf
30235 crsave
= gen_rtx_REG (SImode
, 0);
30236 emit_insn (gen_movesi_from_cr (crsave
));
30238 for (i
= 0; i
< 8; i
++)
30239 if (!call_used_regs
[CR0_REGNO
+ i
])
30241 rtvec p
= rtvec_alloc (2);
30243 = gen_frame_store (crsave
, frame_reg_rtx
, cr_off
+ frame_off
);
30245 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (CCmode
, CR0_REGNO
+ i
));
30247 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
30249 RTX_FRAME_RELATED_P (insn
) = 1;
30250 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
30251 gen_frame_store (gen_rtx_REG (SImode
, CR0_REGNO
+ i
),
30252 sp_reg_rtx
, cr_off
+ sp_off
));
30254 cr_off
+= reg_size
;
30258 /* Update stack and set back pointer unless this is V.4,
30259 for which it was done previously. */
30260 if (!WORLD_SAVE_P (info
) && info
->push_p
30261 && !(DEFAULT_ABI
== ABI_V4
|| crtl
->calls_eh_return
))
30263 rtx ptr_reg
= NULL
;
30266 /* If saving altivec regs we need to be able to address all save
30267 locations using a 16-bit offset. */
30268 if ((strategy
& SAVE_INLINE_VRS
) == 0
30269 || (info
->altivec_size
!= 0
30270 && (info
->altivec_save_offset
+ info
->altivec_size
- 16
30271 + info
->total_size
- frame_off
) > 32767)
30272 || (info
->vrsave_size
!= 0
30273 && (info
->vrsave_save_offset
30274 + info
->total_size
- frame_off
) > 32767))
30276 int sel
= SAVRES_SAVE
| SAVRES_VR
;
30277 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
30279 if (using_static_chain_p
30280 && ptr_regno
== STATIC_CHAIN_REGNUM
)
30282 if (REGNO (frame_reg_rtx
) != ptr_regno
)
30283 START_USE (ptr_regno
);
30284 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
30285 frame_reg_rtx
= ptr_reg
;
30286 ptr_off
= info
->altivec_save_offset
+ info
->altivec_size
;
30287 frame_off
= -ptr_off
;
30289 else if (REGNO (frame_reg_rtx
) == 1)
30290 frame_off
= info
->total_size
;
30291 sp_adjust
= rs6000_emit_allocate_stack (info
->total_size
,
30293 if (REGNO (frame_reg_rtx
) == 12)
30295 sp_off
= info
->total_size
;
30296 if (frame_reg_rtx
!= sp_reg_rtx
)
30297 rs6000_emit_stack_tie (frame_reg_rtx
, false);
30300 /* Set frame pointer, if needed. */
30301 if (frame_pointer_needed
)
30303 insn
= emit_move_insn (gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
),
30305 RTX_FRAME_RELATED_P (insn
) = 1;
30308 /* Save AltiVec registers if needed. Save here because the red zone does
30309 not always include AltiVec registers. */
30310 if (!WORLD_SAVE_P (info
)
30311 && info
->altivec_size
!= 0 && (strategy
& SAVE_INLINE_VRS
) == 0)
30313 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
30315 /* Oddly, the vector save/restore functions point r0 at the end
30316 of the save area, then use r11 or r12 to load offsets for
30317 [reg+reg] addressing. */
30318 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
30319 int scratch_regno
= ptr_regno_for_savres (SAVRES_SAVE
| SAVRES_VR
);
30320 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
30322 gcc_checking_assert (scratch_regno
== 11 || scratch_regno
== 12);
30324 if (scratch_regno
== 12)
30326 if (end_save
+ frame_off
!= 0)
30328 rtx offset
= GEN_INT (end_save
+ frame_off
);
30330 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
30333 emit_move_insn (ptr_reg
, frame_reg_rtx
);
30335 ptr_off
= -end_save
;
30336 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
30337 info
->altivec_save_offset
+ ptr_off
,
30338 0, V4SImode
, SAVRES_SAVE
| SAVRES_VR
);
30339 rs6000_frame_related (insn
, scratch_reg
, sp_off
- ptr_off
,
30340 NULL_RTX
, NULL_RTX
);
30341 if (REGNO (frame_reg_rtx
) == REGNO (scratch_reg
))
30343 /* The oddity mentioned above clobbered our frame reg. */
30344 emit_move_insn (frame_reg_rtx
, ptr_reg
);
30345 frame_off
= ptr_off
;
30348 else if (!WORLD_SAVE_P (info
)
30349 && info
->altivec_size
!= 0)
30353 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
30354 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
30356 rtx areg
, savereg
, mem
;
30357 HOST_WIDE_INT offset
;
30359 offset
= (info
->altivec_save_offset
+ frame_off
30360 + 16 * (i
- info
->first_altivec_reg_save
));
30362 savereg
= gen_rtx_REG (V4SImode
, i
);
30364 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
30366 mem
= gen_frame_mem (V4SImode
,
30367 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
30368 GEN_INT (offset
)));
30369 insn
= emit_insn (gen_rtx_SET (mem
, savereg
));
30375 areg
= gen_rtx_REG (Pmode
, 0);
30376 emit_move_insn (areg
, GEN_INT (offset
));
30378 /* AltiVec addressing mode is [reg+reg]. */
30379 mem
= gen_frame_mem (V4SImode
,
30380 gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
));
30382 /* Rather than emitting a generic move, force use of the stvx
30383 instruction, which we always want on ISA 2.07 (power8) systems.
30384 In particular we don't want xxpermdi/stxvd2x for little
30386 insn
= emit_insn (gen_altivec_stvx_v4si_internal (mem
, savereg
));
30389 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
30390 areg
, GEN_INT (offset
));
30394 /* VRSAVE is a bit vector representing which AltiVec registers
30395 are used. The OS uses this to determine which vector
30396 registers to save on a context switch. We need to save
30397 VRSAVE on the stack frame, add whatever AltiVec registers we
30398 used in this function, and do the corresponding magic in the
30401 if (!WORLD_SAVE_P (info
)
30402 && info
->vrsave_size
!= 0)
30408 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
30409 be using r12 as frame_reg_rtx and r11 as the static chain
30410 pointer for nested functions. */
30412 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
30413 && !using_static_chain_p
)
30415 else if (using_split_stack
|| REGNO (frame_reg_rtx
) == 12)
30418 if (using_static_chain_p
)
30422 NOT_INUSE (save_regno
);
30423 reg
= gen_rtx_REG (SImode
, save_regno
);
30424 vrsave
= gen_rtx_REG (SImode
, VRSAVE_REGNO
);
30426 emit_insn (gen_get_vrsave_internal (reg
));
30428 emit_insn (gen_rtx_SET (reg
, vrsave
));
30431 offset
= info
->vrsave_save_offset
+ frame_off
;
30432 insn
= emit_insn (gen_frame_store (reg
, frame_reg_rtx
, offset
));
30434 /* Include the registers in the mask. */
30435 emit_insn (gen_iorsi3 (reg
, reg
, GEN_INT ((int) info
->vrsave_mask
)));
30437 insn
= emit_insn (generate_set_vrsave (reg
, info
, 0));
30440 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
30441 if (!TARGET_SINGLE_PIC_BASE
30442 && ((TARGET_TOC
&& TARGET_MINIMAL_TOC
30443 && !constant_pool_empty_p ())
30444 || (DEFAULT_ABI
== ABI_V4
30445 && (flag_pic
== 1 || (flag_pic
&& TARGET_SECURE_PLT
))
30446 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))))
30448 /* If emit_load_toc_table will use the link register, we need to save
30449 it. We use R12 for this purpose because emit_load_toc_table
30450 can use register 0. This allows us to use a plain 'blr' to return
30451 from the procedure more often. */
30452 int save_LR_around_toc_setup
= (TARGET_ELF
30453 && DEFAULT_ABI
== ABI_V4
30455 && ! info
->lr_save_p
30456 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
) > 0);
30457 if (save_LR_around_toc_setup
)
30459 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30460 rtx tmp
= gen_rtx_REG (Pmode
, 12);
30463 insn
= emit_move_insn (tmp
, lr
);
30464 RTX_FRAME_RELATED_P (insn
) = 1;
30466 rs6000_emit_load_toc_table (TRUE
);
30468 insn
= emit_move_insn (lr
, tmp
);
30469 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
30470 RTX_FRAME_RELATED_P (insn
) = 1;
30473 rs6000_emit_load_toc_table (TRUE
);
30477 if (!TARGET_SINGLE_PIC_BASE
30478 && DEFAULT_ABI
== ABI_DARWIN
30479 && flag_pic
&& crtl
->uses_pic_offset_table
)
30481 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30482 rtx src
= gen_rtx_SYMBOL_REF (Pmode
, MACHOPIC_FUNCTION_BASE_NAME
);
30484 /* Save and restore LR locally around this call (in R0). */
30485 if (!info
->lr_save_p
)
30486 emit_move_insn (gen_rtx_REG (Pmode
, 0), lr
);
30488 emit_insn (gen_load_macho_picbase (src
));
30490 emit_move_insn (gen_rtx_REG (Pmode
,
30491 RS6000_PIC_OFFSET_TABLE_REGNUM
),
30494 if (!info
->lr_save_p
)
30495 emit_move_insn (lr
, gen_rtx_REG (Pmode
, 0));
30499 /* If we need to, save the TOC register after doing the stack setup.
30500 Do not emit eh frame info for this save. The unwinder wants info,
30501 conceptually attached to instructions in this function, about
30502 register values in the caller of this function. This R2 may have
30503 already been changed from the value in the caller.
30504 We don't attempt to write accurate DWARF EH frame info for R2
30505 because code emitted by gcc for a (non-pointer) function call
30506 doesn't save and restore R2. Instead, R2 is managed out-of-line
30507 by a linker generated plt call stub when the function resides in
30508 a shared library. This behavior is costly to describe in DWARF,
30509 both in terms of the size of DWARF info and the time taken in the
30510 unwinder to interpret it. R2 changes, apart from the
30511 calls_eh_return case earlier in this function, are handled by
30512 linux-unwind.h frob_update_context. */
30513 if (rs6000_save_toc_in_prologue_p ())
30515 rtx reg
= gen_rtx_REG (reg_mode
, TOC_REGNUM
);
30516 emit_insn (gen_frame_store (reg
, sp_reg_rtx
, RS6000_TOC_SAVE_SLOT
));
30519 if (using_split_stack
&& split_stack_arg_pointer_used_p ())
30521 /* Set up the arg pointer (r12) for -fsplit-stack code. If
30522 __morestack was called, it left the arg pointer to the old
30523 stack in r29. Otherwise, the arg pointer is the top of the
30525 cfun
->machine
->split_stack_argp_used
= true;
30528 rtx r12
= gen_rtx_REG (Pmode
, 12);
30529 rtx set_r12
= gen_rtx_SET (r12
, sp_reg_rtx
);
30530 emit_insn_before (set_r12
, sp_adjust
);
30532 else if (frame_off
!= 0 || REGNO (frame_reg_rtx
) != 12)
30534 rtx r12
= gen_rtx_REG (Pmode
, 12);
30535 if (frame_off
== 0)
30536 emit_move_insn (r12
, frame_reg_rtx
);
30538 emit_insn (gen_add3_insn (r12
, frame_reg_rtx
, GEN_INT (frame_off
)));
30542 rtx r12
= gen_rtx_REG (Pmode
, 12);
30543 rtx r29
= gen_rtx_REG (Pmode
, 29);
30544 rtx cr7
= gen_rtx_REG (CCUNSmode
, CR7_REGNO
);
30545 rtx not_more
= gen_label_rtx ();
30548 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
30549 gen_rtx_GEU (VOIDmode
, cr7
, const0_rtx
),
30550 gen_rtx_LABEL_REF (VOIDmode
, not_more
),
30552 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
30553 JUMP_LABEL (jump
) = not_more
;
30554 LABEL_NUSES (not_more
) += 1;
30555 emit_move_insn (r12
, r29
);
30556 emit_label (not_more
);
30561 /* Output .extern statements for the save/restore routines we use. */
30564 rs6000_output_savres_externs (FILE *file
)
30566 rs6000_stack_t
*info
= rs6000_stack_info ();
30568 if (TARGET_DEBUG_STACK
)
30569 debug_stack_info (info
);
30571 /* Write .extern for any function we will call to save and restore
30573 if (info
->first_fp_reg_save
< 64
30578 int regno
= info
->first_fp_reg_save
- 32;
30580 if ((info
->savres_strategy
& SAVE_INLINE_FPRS
) == 0)
30582 bool lr
= (info
->savres_strategy
& SAVE_NOINLINE_FPRS_SAVES_LR
) != 0;
30583 int sel
= SAVRES_SAVE
| SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
30584 name
= rs6000_savres_routine_name (info
, regno
, sel
);
30585 fprintf (file
, "\t.extern %s\n", name
);
30587 if ((info
->savres_strategy
& REST_INLINE_FPRS
) == 0)
30589 bool lr
= (info
->savres_strategy
30590 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
30591 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
30592 name
= rs6000_savres_routine_name (info
, regno
, sel
);
30593 fprintf (file
, "\t.extern %s\n", name
);
30598 /* Write function prologue. */
30601 rs6000_output_function_prologue (FILE *file
)
30603 if (!cfun
->is_thunk
)
30604 rs6000_output_savres_externs (file
);
30606 /* ELFv2 ABI r2 setup code and local entry point. This must follow
30607 immediately after the global entry point label. */
30608 if (rs6000_global_entry_point_needed_p ())
30610 const char *name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
30612 (*targetm
.asm_out
.internal_label
) (file
, "LCF", rs6000_pic_labelno
);
30614 if (TARGET_CMODEL
!= CMODEL_LARGE
)
30616 /* In the small and medium code models, we assume the TOC is less
30617 2 GB away from the text section, so it can be computed via the
30618 following two-instruction sequence. */
30621 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
30622 fprintf (file
, "0:\taddis 2,12,.TOC.-");
30623 assemble_name (file
, buf
);
30624 fprintf (file
, "@ha\n");
30625 fprintf (file
, "\taddi 2,2,.TOC.-");
30626 assemble_name (file
, buf
);
30627 fprintf (file
, "@l\n");
30631 /* In the large code model, we allow arbitrary offsets between the
30632 TOC and the text section, so we have to load the offset from
30633 memory. The data field is emitted directly before the global
30634 entry point in rs6000_elf_declare_function_name. */
30637 #ifdef HAVE_AS_ENTRY_MARKERS
30638 /* If supported by the linker, emit a marker relocation. If the
30639 total code size of the final executable or shared library
30640 happens to fit into 2 GB after all, the linker will replace
30641 this code sequence with the sequence for the small or medium
30643 fprintf (file
, "\t.reloc .,R_PPC64_ENTRY\n");
30645 fprintf (file
, "\tld 2,");
30646 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCL", rs6000_pic_labelno
);
30647 assemble_name (file
, buf
);
30648 fprintf (file
, "-");
30649 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
30650 assemble_name (file
, buf
);
30651 fprintf (file
, "(12)\n");
30652 fprintf (file
, "\tadd 2,2,12\n");
30655 fputs ("\t.localentry\t", file
);
30656 assemble_name (file
, name
);
30657 fputs (",.-", file
);
30658 assemble_name (file
, name
);
30659 fputs ("\n", file
);
30662 /* Output -mprofile-kernel code. This needs to be done here instead of
30663 in output_function_profile since it must go after the ELFv2 ABI
30664 local entry point. */
30665 if (TARGET_PROFILE_KERNEL
&& crtl
->profile
)
30667 gcc_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
30668 gcc_assert (!TARGET_32BIT
);
30670 asm_fprintf (file
, "\tmflr %s\n", reg_names
[0]);
30672 /* In the ELFv2 ABI we have no compiler stack word. It must be
30673 the resposibility of _mcount to preserve the static chain
30674 register if required. */
30675 if (DEFAULT_ABI
!= ABI_ELFv2
30676 && cfun
->static_chain_decl
!= NULL
)
30678 asm_fprintf (file
, "\tstd %s,24(%s)\n",
30679 reg_names
[STATIC_CHAIN_REGNUM
], reg_names
[1]);
30680 fprintf (file
, "\tbl %s\n", RS6000_MCOUNT
);
30681 asm_fprintf (file
, "\tld %s,24(%s)\n",
30682 reg_names
[STATIC_CHAIN_REGNUM
], reg_names
[1]);
30685 fprintf (file
, "\tbl %s\n", RS6000_MCOUNT
);
30688 rs6000_pic_labelno
++;
30691 /* -mprofile-kernel code calls mcount before the function prolog,
30692 so a profiled leaf function should stay a leaf function. */
30694 rs6000_keep_leaf_when_profiled ()
30696 return TARGET_PROFILE_KERNEL
;
30699 /* Non-zero if vmx regs are restored before the frame pop, zero if
30700 we restore after the pop when possible. */
30701 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30703 /* Restoring cr is a two step process: loading a reg from the frame
30704 save, then moving the reg to cr. For ABI_V4 we must let the
30705 unwinder know that the stack location is no longer valid at or
30706 before the stack deallocation, but we can't emit a cfa_restore for
30707 cr at the stack deallocation like we do for other registers.
30708 The trouble is that it is possible for the move to cr to be
30709 scheduled after the stack deallocation. So say exactly where cr
30710 is located on each of the two insns. */
30713 load_cr_save (int regno
, rtx frame_reg_rtx
, int offset
, bool exit_func
)
30715 rtx mem
= gen_frame_mem_offset (SImode
, frame_reg_rtx
, offset
);
30716 rtx reg
= gen_rtx_REG (SImode
, regno
);
30717 rtx_insn
*insn
= emit_move_insn (reg
, mem
);
30719 if (!exit_func
&& DEFAULT_ABI
== ABI_V4
)
30721 rtx cr
= gen_rtx_REG (SImode
, CR2_REGNO
);
30722 rtx set
= gen_rtx_SET (reg
, cr
);
30724 add_reg_note (insn
, REG_CFA_REGISTER
, set
);
30725 RTX_FRAME_RELATED_P (insn
) = 1;
30730 /* Reload CR from REG. */
30733 restore_saved_cr (rtx reg
, int using_mfcr_multiple
, bool exit_func
)
30738 if (using_mfcr_multiple
)
30740 for (i
= 0; i
< 8; i
++)
30741 if (save_reg_p (CR0_REGNO
+ i
))
30743 gcc_assert (count
);
30746 if (using_mfcr_multiple
&& count
> 1)
30752 p
= rtvec_alloc (count
);
30755 for (i
= 0; i
< 8; i
++)
30756 if (save_reg_p (CR0_REGNO
+ i
))
30758 rtvec r
= rtvec_alloc (2);
30759 RTVEC_ELT (r
, 0) = reg
;
30760 RTVEC_ELT (r
, 1) = GEN_INT (1 << (7-i
));
30761 RTVEC_ELT (p
, ndx
) =
30762 gen_rtx_SET (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
),
30763 gen_rtx_UNSPEC (CCmode
, r
, UNSPEC_MOVESI_TO_CR
));
30766 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
30767 gcc_assert (ndx
== count
);
30769 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30770 CR field separately. */
30771 if (!exit_func
&& DEFAULT_ABI
== ABI_ELFv2
&& flag_shrink_wrap
)
30773 for (i
= 0; i
< 8; i
++)
30774 if (save_reg_p (CR0_REGNO
+ i
))
30775 add_reg_note (insn
, REG_CFA_RESTORE
,
30776 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
30778 RTX_FRAME_RELATED_P (insn
) = 1;
30782 for (i
= 0; i
< 8; i
++)
30783 if (save_reg_p (CR0_REGNO
+ i
))
30785 rtx insn
= emit_insn (gen_movsi_to_cr_one
30786 (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
), reg
));
30788 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30789 CR field separately, attached to the insn that in fact
30790 restores this particular CR field. */
30791 if (!exit_func
&& DEFAULT_ABI
== ABI_ELFv2
&& flag_shrink_wrap
)
30793 add_reg_note (insn
, REG_CFA_RESTORE
,
30794 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
30796 RTX_FRAME_RELATED_P (insn
) = 1;
30800 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
30801 if (!exit_func
&& DEFAULT_ABI
!= ABI_ELFv2
30802 && (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
))
30804 rtx_insn
*insn
= get_last_insn ();
30805 rtx cr
= gen_rtx_REG (SImode
, CR2_REGNO
);
30807 add_reg_note (insn
, REG_CFA_RESTORE
, cr
);
30808 RTX_FRAME_RELATED_P (insn
) = 1;
30812 /* Like cr, the move to lr instruction can be scheduled after the
30813 stack deallocation, but unlike cr, its stack frame save is still
30814 valid. So we only need to emit the cfa_restore on the correct
30818 load_lr_save (int regno
, rtx frame_reg_rtx
, int offset
)
30820 rtx mem
= gen_frame_mem_offset (Pmode
, frame_reg_rtx
, offset
);
30821 rtx reg
= gen_rtx_REG (Pmode
, regno
);
30823 emit_move_insn (reg
, mem
);
30827 restore_saved_lr (int regno
, bool exit_func
)
30829 rtx reg
= gen_rtx_REG (Pmode
, regno
);
30830 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30831 rtx_insn
*insn
= emit_move_insn (lr
, reg
);
30833 if (!exit_func
&& flag_shrink_wrap
)
30835 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
30836 RTX_FRAME_RELATED_P (insn
) = 1;
30841 add_crlr_cfa_restore (const rs6000_stack_t
*info
, rtx cfa_restores
)
30843 if (DEFAULT_ABI
== ABI_ELFv2
)
30846 for (i
= 0; i
< 8; i
++)
30847 if (save_reg_p (CR0_REGNO
+ i
))
30849 rtx cr
= gen_rtx_REG (SImode
, CR0_REGNO
+ i
);
30850 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, cr
,
30854 else if (info
->cr_save_p
)
30855 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
30856 gen_rtx_REG (SImode
, CR2_REGNO
),
30859 if (info
->lr_save_p
)
30860 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
30861 gen_rtx_REG (Pmode
, LR_REGNO
),
30863 return cfa_restores
;
30866 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30867 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30868 below stack pointer not cloberred by signals. */
30871 offset_below_red_zone_p (HOST_WIDE_INT offset
)
30873 return offset
< (DEFAULT_ABI
== ABI_V4
30875 : TARGET_32BIT
? -220 : -288);
30878 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30881 emit_cfa_restores (rtx cfa_restores
)
30883 rtx_insn
*insn
= get_last_insn ();
30884 rtx
*loc
= ®_NOTES (insn
);
30887 loc
= &XEXP (*loc
, 1);
30888 *loc
= cfa_restores
;
30889 RTX_FRAME_RELATED_P (insn
) = 1;
30892 /* Emit function epilogue as insns. */
30895 rs6000_emit_epilogue (int sibcall
)
30897 rs6000_stack_t
*info
;
30898 int restoring_GPRs_inline
;
30899 int restoring_FPRs_inline
;
30900 int using_load_multiple
;
30901 int using_mtcr_multiple
;
30902 int use_backchain_to_restore_sp
;
30905 HOST_WIDE_INT frame_off
= 0;
30906 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, 1);
30907 rtx frame_reg_rtx
= sp_reg_rtx
;
30908 rtx cfa_restores
= NULL_RTX
;
30910 rtx cr_save_reg
= NULL_RTX
;
30911 machine_mode reg_mode
= Pmode
;
30912 int reg_size
= TARGET_32BIT
? 4 : 8;
30913 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
30915 int fp_reg_size
= 8;
30918 unsigned ptr_regno
;
30920 info
= rs6000_stack_info ();
30922 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
30924 reg_mode
= V2SImode
;
30928 strategy
= info
->savres_strategy
;
30929 using_load_multiple
= strategy
& REST_MULTIPLE
;
30930 restoring_FPRs_inline
= sibcall
|| (strategy
& REST_INLINE_FPRS
);
30931 restoring_GPRs_inline
= sibcall
|| (strategy
& REST_INLINE_GPRS
);
30932 using_mtcr_multiple
= (rs6000_cpu
== PROCESSOR_PPC601
30933 || rs6000_cpu
== PROCESSOR_PPC603
30934 || rs6000_cpu
== PROCESSOR_PPC750
30936 /* Restore via the backchain when we have a large frame, since this
30937 is more efficient than an addis, addi pair. The second condition
30938 here will not trigger at the moment; We don't actually need a
30939 frame pointer for alloca, but the generic parts of the compiler
30940 give us one anyway. */
30941 use_backchain_to_restore_sp
= (info
->total_size
+ (info
->lr_save_p
30942 ? info
->lr_save_offset
30944 || (cfun
->calls_alloca
30945 && !frame_pointer_needed
));
30946 restore_lr
= (info
->lr_save_p
30947 && (restoring_FPRs_inline
30948 || (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
))
30949 && (restoring_GPRs_inline
30950 || info
->first_fp_reg_save
< 64)
30951 && !cfun
->machine
->lr_is_wrapped_separately
);
30954 if (WORLD_SAVE_P (info
))
30958 const char *alloc_rname
;
30961 /* eh_rest_world_r10 will return to the location saved in the LR
30962 stack slot (which is not likely to be our caller.)
30963 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
30964 rest_world is similar, except any R10 parameter is ignored.
30965 The exception-handling stuff that was here in 2.95 is no
30966 longer necessary. */
30969 + 32 - info
->first_gp_reg_save
30970 + LAST_ALTIVEC_REGNO
+ 1 - info
->first_altivec_reg_save
30971 + 63 + 1 - info
->first_fp_reg_save
);
30973 strcpy (rname
, ((crtl
->calls_eh_return
) ?
30974 "*eh_rest_world_r10" : "*rest_world"));
30975 alloc_rname
= ggc_strdup (rname
);
30978 RTVEC_ELT (p
, j
++) = ret_rtx
;
30980 = gen_rtx_USE (VOIDmode
, gen_rtx_SYMBOL_REF (Pmode
, alloc_rname
));
30981 /* The instruction pattern requires a clobber here;
30982 it is shared with the restVEC helper. */
30984 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, 11));
30987 /* CR register traditionally saved as CR2. */
30988 rtx reg
= gen_rtx_REG (SImode
, CR2_REGNO
);
30990 = gen_frame_load (reg
, frame_reg_rtx
, info
->cr_save_offset
);
30991 if (flag_shrink_wrap
)
30993 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
30994 gen_rtx_REG (Pmode
, LR_REGNO
),
30996 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31000 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
31002 rtx reg
= gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
);
31004 = gen_frame_load (reg
,
31005 frame_reg_rtx
, info
->gp_save_offset
+ reg_size
* i
);
31006 if (flag_shrink_wrap
)
31007 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31009 for (i
= 0; info
->first_altivec_reg_save
+ i
<= LAST_ALTIVEC_REGNO
; i
++)
31011 rtx reg
= gen_rtx_REG (V4SImode
, info
->first_altivec_reg_save
+ i
);
31013 = gen_frame_load (reg
,
31014 frame_reg_rtx
, info
->altivec_save_offset
+ 16 * i
);
31015 if (flag_shrink_wrap
)
31016 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31018 for (i
= 0; info
->first_fp_reg_save
+ i
<= 63; i
++)
31020 rtx reg
= gen_rtx_REG ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
31021 ? DFmode
: SFmode
),
31022 info
->first_fp_reg_save
+ i
);
31024 = gen_frame_load (reg
, frame_reg_rtx
, info
->fp_save_offset
+ 8 * i
);
31025 if (flag_shrink_wrap
)
31026 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31029 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, 0));
31031 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 12));
31033 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 7));
31035 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 8));
31037 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
, 10));
31038 insn
= emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
31040 if (flag_shrink_wrap
)
31042 REG_NOTES (insn
) = cfa_restores
;
31043 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
31044 RTX_FRAME_RELATED_P (insn
) = 1;
31049 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
31051 frame_off
= info
->total_size
;
31053 /* Restore AltiVec registers if we must do so before adjusting the
31055 if (info
->altivec_size
!= 0
31056 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31057 || (DEFAULT_ABI
!= ABI_V4
31058 && offset_below_red_zone_p (info
->altivec_save_offset
))))
31061 int scratch_regno
= ptr_regno_for_savres (SAVRES_VR
);
31063 gcc_checking_assert (scratch_regno
== 11 || scratch_regno
== 12);
31064 if (use_backchain_to_restore_sp
)
31066 int frame_regno
= 11;
31068 if ((strategy
& REST_INLINE_VRS
) == 0)
31070 /* Of r11 and r12, select the one not clobbered by an
31071 out-of-line restore function for the frame register. */
31072 frame_regno
= 11 + 12 - scratch_regno
;
31074 frame_reg_rtx
= gen_rtx_REG (Pmode
, frame_regno
);
31075 emit_move_insn (frame_reg_rtx
,
31076 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
31079 else if (frame_pointer_needed
)
31080 frame_reg_rtx
= hard_frame_pointer_rtx
;
31082 if ((strategy
& REST_INLINE_VRS
) == 0)
31084 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
31086 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
31087 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
31089 if (end_save
+ frame_off
!= 0)
31091 rtx offset
= GEN_INT (end_save
+ frame_off
);
31093 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
31096 emit_move_insn (ptr_reg
, frame_reg_rtx
);
31098 ptr_off
= -end_save
;
31099 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
31100 info
->altivec_save_offset
+ ptr_off
,
31101 0, V4SImode
, SAVRES_VR
);
31105 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
31106 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
31108 rtx addr
, areg
, mem
, insn
;
31109 rtx reg
= gen_rtx_REG (V4SImode
, i
);
31110 HOST_WIDE_INT offset
31111 = (info
->altivec_save_offset
+ frame_off
31112 + 16 * (i
- info
->first_altivec_reg_save
));
31114 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
31116 mem
= gen_frame_mem (V4SImode
,
31117 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
31118 GEN_INT (offset
)));
31119 insn
= gen_rtx_SET (reg
, mem
);
31123 areg
= gen_rtx_REG (Pmode
, 0);
31124 emit_move_insn (areg
, GEN_INT (offset
));
31126 /* AltiVec addressing mode is [reg+reg]. */
31127 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
);
31128 mem
= gen_frame_mem (V4SImode
, addr
);
31130 /* Rather than emitting a generic move, force use of the
31131 lvx instruction, which we always want. In particular we
31132 don't want lxvd2x/xxpermdi for little endian. */
31133 insn
= gen_altivec_lvx_v4si_internal (reg
, mem
);
31136 (void) emit_insn (insn
);
31140 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
31141 if (((strategy
& REST_INLINE_VRS
) == 0
31142 || (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
)) != 0)
31143 && (flag_shrink_wrap
31144 || (offset_below_red_zone_p
31145 (info
->altivec_save_offset
31146 + 16 * (i
- info
->first_altivec_reg_save
)))))
31148 rtx reg
= gen_rtx_REG (V4SImode
, i
);
31149 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31153 /* Restore VRSAVE if we must do so before adjusting the stack. */
31154 if (info
->vrsave_size
!= 0
31155 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31156 || (DEFAULT_ABI
!= ABI_V4
31157 && offset_below_red_zone_p (info
->vrsave_save_offset
))))
31161 if (frame_reg_rtx
== sp_reg_rtx
)
31163 if (use_backchain_to_restore_sp
)
31165 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31166 emit_move_insn (frame_reg_rtx
,
31167 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
31170 else if (frame_pointer_needed
)
31171 frame_reg_rtx
= hard_frame_pointer_rtx
;
31174 reg
= gen_rtx_REG (SImode
, 12);
31175 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31176 info
->vrsave_save_offset
+ frame_off
));
31178 emit_insn (generate_set_vrsave (reg
, info
, 1));
31182 /* If we have a large stack frame, restore the old stack pointer
31183 using the backchain. */
31184 if (use_backchain_to_restore_sp
)
31186 if (frame_reg_rtx
== sp_reg_rtx
)
31188 /* Under V.4, don't reset the stack pointer until after we're done
31189 loading the saved registers. */
31190 if (DEFAULT_ABI
== ABI_V4
)
31191 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31193 insn
= emit_move_insn (frame_reg_rtx
,
31194 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
31197 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31198 && DEFAULT_ABI
== ABI_V4
)
31199 /* frame_reg_rtx has been set up by the altivec restore. */
31203 insn
= emit_move_insn (sp_reg_rtx
, frame_reg_rtx
);
31204 frame_reg_rtx
= sp_reg_rtx
;
31207 /* If we have a frame pointer, we can restore the old stack pointer
31209 else if (frame_pointer_needed
)
31211 frame_reg_rtx
= sp_reg_rtx
;
31212 if (DEFAULT_ABI
== ABI_V4
)
31213 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31214 /* Prevent reordering memory accesses against stack pointer restore. */
31215 else if (cfun
->calls_alloca
31216 || offset_below_red_zone_p (-info
->total_size
))
31217 rs6000_emit_stack_tie (frame_reg_rtx
, true);
31219 insn
= emit_insn (gen_add3_insn (frame_reg_rtx
, hard_frame_pointer_rtx
,
31220 GEN_INT (info
->total_size
)));
31223 else if (info
->push_p
31224 && DEFAULT_ABI
!= ABI_V4
31225 && !crtl
->calls_eh_return
)
31227 /* Prevent reordering memory accesses against stack pointer restore. */
31228 if (cfun
->calls_alloca
31229 || offset_below_red_zone_p (-info
->total_size
))
31230 rs6000_emit_stack_tie (frame_reg_rtx
, false);
31231 insn
= emit_insn (gen_add3_insn (sp_reg_rtx
, sp_reg_rtx
,
31232 GEN_INT (info
->total_size
)));
31235 if (insn
&& frame_reg_rtx
== sp_reg_rtx
)
31239 REG_NOTES (insn
) = cfa_restores
;
31240 cfa_restores
= NULL_RTX
;
31242 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
31243 RTX_FRAME_RELATED_P (insn
) = 1;
31246 /* Restore AltiVec registers if we have not done so already. */
31247 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31248 && info
->altivec_size
!= 0
31249 && (DEFAULT_ABI
== ABI_V4
31250 || !offset_below_red_zone_p (info
->altivec_save_offset
)))
31254 if ((strategy
& REST_INLINE_VRS
) == 0)
31256 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
31258 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
31259 int scratch_regno
= ptr_regno_for_savres (SAVRES_VR
);
31260 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
31262 if (end_save
+ frame_off
!= 0)
31264 rtx offset
= GEN_INT (end_save
+ frame_off
);
31266 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
31269 emit_move_insn (ptr_reg
, frame_reg_rtx
);
31271 ptr_off
= -end_save
;
31272 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
31273 info
->altivec_save_offset
+ ptr_off
,
31274 0, V4SImode
, SAVRES_VR
);
31275 if (REGNO (frame_reg_rtx
) == REGNO (scratch_reg
))
31277 /* Frame reg was clobbered by out-of-line save. Restore it
31278 from ptr_reg, and if we are calling out-of-line gpr or
31279 fpr restore set up the correct pointer and offset. */
31280 unsigned newptr_regno
= 1;
31281 if (!restoring_GPRs_inline
)
31283 bool lr
= info
->gp_save_offset
+ info
->gp_size
== 0;
31284 int sel
= SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
31285 newptr_regno
= ptr_regno_for_savres (sel
);
31286 end_save
= info
->gp_save_offset
+ info
->gp_size
;
31288 else if (!restoring_FPRs_inline
)
31290 bool lr
= !(strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
);
31291 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
31292 newptr_regno
= ptr_regno_for_savres (sel
);
31293 end_save
= info
->fp_save_offset
+ info
->fp_size
;
31296 if (newptr_regno
!= 1 && REGNO (frame_reg_rtx
) != newptr_regno
)
31297 frame_reg_rtx
= gen_rtx_REG (Pmode
, newptr_regno
);
31299 if (end_save
+ ptr_off
!= 0)
31301 rtx offset
= GEN_INT (end_save
+ ptr_off
);
31303 frame_off
= -end_save
;
31305 emit_insn (gen_addsi3_carry (frame_reg_rtx
,
31308 emit_insn (gen_adddi3_carry (frame_reg_rtx
,
31313 frame_off
= ptr_off
;
31314 emit_move_insn (frame_reg_rtx
, ptr_reg
);
31320 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
31321 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
31323 rtx addr
, areg
, mem
, insn
;
31324 rtx reg
= gen_rtx_REG (V4SImode
, i
);
31325 HOST_WIDE_INT offset
31326 = (info
->altivec_save_offset
+ frame_off
31327 + 16 * (i
- info
->first_altivec_reg_save
));
31329 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
31331 mem
= gen_frame_mem (V4SImode
,
31332 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
31333 GEN_INT (offset
)));
31334 insn
= gen_rtx_SET (reg
, mem
);
31338 areg
= gen_rtx_REG (Pmode
, 0);
31339 emit_move_insn (areg
, GEN_INT (offset
));
31341 /* AltiVec addressing mode is [reg+reg]. */
31342 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
);
31343 mem
= gen_frame_mem (V4SImode
, addr
);
31345 /* Rather than emitting a generic move, force use of the
31346 lvx instruction, which we always want. In particular we
31347 don't want lxvd2x/xxpermdi for little endian. */
31348 insn
= gen_altivec_lvx_v4si_internal (reg
, mem
);
31351 (void) emit_insn (insn
);
31355 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
31356 if (((strategy
& REST_INLINE_VRS
) == 0
31357 || (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
)) != 0)
31358 && (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
))
31360 rtx reg
= gen_rtx_REG (V4SImode
, i
);
31361 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31365 /* Restore VRSAVE if we have not done so already. */
31366 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31367 && info
->vrsave_size
!= 0
31368 && (DEFAULT_ABI
== ABI_V4
31369 || !offset_below_red_zone_p (info
->vrsave_save_offset
)))
31373 reg
= gen_rtx_REG (SImode
, 12);
31374 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31375 info
->vrsave_save_offset
+ frame_off
));
31377 emit_insn (generate_set_vrsave (reg
, info
, 1));
31380 /* If we exit by an out-of-line restore function on ABI_V4 then that
31381 function will deallocate the stack, so we don't need to worry
31382 about the unwinder restoring cr from an invalid stack frame
31384 exit_func
= (!restoring_FPRs_inline
31385 || (!restoring_GPRs_inline
31386 && info
->first_fp_reg_save
== 64));
31388 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31389 *separate* slots if the routine calls __builtin_eh_return, so
31390 that they can be independently restored by the unwinder. */
31391 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
31393 int i
, cr_off
= info
->ehcr_offset
;
31395 for (i
= 0; i
< 8; i
++)
31396 if (!call_used_regs
[CR0_REGNO
+ i
])
31398 rtx reg
= gen_rtx_REG (SImode
, 0);
31399 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31400 cr_off
+ frame_off
));
31402 insn
= emit_insn (gen_movsi_to_cr_one
31403 (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
), reg
));
31405 if (!exit_func
&& flag_shrink_wrap
)
31407 add_reg_note (insn
, REG_CFA_RESTORE
,
31408 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
31410 RTX_FRAME_RELATED_P (insn
) = 1;
31413 cr_off
+= reg_size
;
31417 /* Get the old lr if we saved it. If we are restoring registers
31418 out-of-line, then the out-of-line routines can do this for us. */
31419 if (restore_lr
&& restoring_GPRs_inline
)
31420 load_lr_save (0, frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
31422 /* Get the old cr if we saved it. */
31423 if (info
->cr_save_p
)
31425 unsigned cr_save_regno
= 12;
31427 if (!restoring_GPRs_inline
)
31429 /* Ensure we don't use the register used by the out-of-line
31430 gpr register restore below. */
31431 bool lr
= info
->gp_save_offset
+ info
->gp_size
== 0;
31432 int sel
= SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
31433 int gpr_ptr_regno
= ptr_regno_for_savres (sel
);
31435 if (gpr_ptr_regno
== 12)
31436 cr_save_regno
= 11;
31437 gcc_checking_assert (REGNO (frame_reg_rtx
) != cr_save_regno
);
31439 else if (REGNO (frame_reg_rtx
) == 12)
31440 cr_save_regno
= 11;
31442 cr_save_reg
= load_cr_save (cr_save_regno
, frame_reg_rtx
,
31443 info
->cr_save_offset
+ frame_off
,
31447 /* Set LR here to try to overlap restores below. */
31448 if (restore_lr
&& restoring_GPRs_inline
)
31449 restore_saved_lr (0, exit_func
);
31451 /* Load exception handler data registers, if needed. */
31452 if (crtl
->calls_eh_return
)
31454 unsigned int i
, regno
;
31458 rtx reg
= gen_rtx_REG (reg_mode
, 2);
31459 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31460 frame_off
+ RS6000_TOC_SAVE_SLOT
));
31467 regno
= EH_RETURN_DATA_REGNO (i
);
31468 if (regno
== INVALID_REGNUM
)
31471 /* Note: possible use of r0 here to address SPE regs. */
31472 mem
= gen_frame_mem_offset (reg_mode
, frame_reg_rtx
,
31473 info
->ehrd_offset
+ frame_off
31474 + reg_size
* (int) i
);
31476 emit_move_insn (gen_rtx_REG (reg_mode
, regno
), mem
);
31480 /* Restore GPRs. This is done as a PARALLEL if we are using
31481 the load-multiple instructions. */
31483 && info
->spe_64bit_regs_used
31484 && info
->first_gp_reg_save
!= 32)
31486 /* Determine whether we can address all of the registers that need
31487 to be saved with an offset from frame_reg_rtx that fits in
31488 the small const field for SPE memory instructions. */
31489 int spe_regs_addressable
31490 = (SPE_CONST_OFFSET_OK (info
->spe_gp_save_offset
+ frame_off
31491 + reg_size
* (32 - info
->first_gp_reg_save
- 1))
31492 && restoring_GPRs_inline
);
31494 if (!spe_regs_addressable
)
31496 int ool_adjust
= 0;
31497 rtx old_frame_reg_rtx
= frame_reg_rtx
;
31498 /* Make r11 point to the start of the SPE save area. We worried about
31499 not clobbering it when we were saving registers in the prologue.
31500 There's no need to worry here because the static chain is passed
31501 anew to every function. */
31503 if (!restoring_GPRs_inline
)
31504 ool_adjust
= 8 * (info
->first_gp_reg_save
- FIRST_SAVED_GP_REGNO
);
31505 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31506 emit_insn (gen_addsi3 (frame_reg_rtx
, old_frame_reg_rtx
,
31507 GEN_INT (info
->spe_gp_save_offset
31510 /* Keep the invariant that frame_reg_rtx + frame_off points
31511 at the top of the stack frame. */
31512 frame_off
= -info
->spe_gp_save_offset
+ ool_adjust
;
31515 if (restoring_GPRs_inline
)
31517 HOST_WIDE_INT spe_offset
= info
->spe_gp_save_offset
+ frame_off
;
31519 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
31520 if (rs6000_reg_live_or_pic_offset_p (info
->first_gp_reg_save
+ i
))
31522 rtx offset
, addr
, mem
, reg
;
31524 /* We're doing all this to ensure that the immediate offset
31525 fits into the immediate field of 'evldd'. */
31526 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset
+ reg_size
* i
));
31528 offset
= GEN_INT (spe_offset
+ reg_size
* i
);
31529 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, offset
);
31530 mem
= gen_rtx_MEM (V2SImode
, addr
);
31531 reg
= gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
);
31533 emit_move_insn (reg
, mem
);
31537 rs6000_emit_savres_rtx (info
, frame_reg_rtx
,
31538 info
->spe_gp_save_offset
+ frame_off
,
31539 info
->lr_save_offset
+ frame_off
,
31541 SAVRES_GPR
| SAVRES_LR
);
31543 else if (!restoring_GPRs_inline
)
31545 /* We are jumping to an out-of-line function. */
31547 int end_save
= info
->gp_save_offset
+ info
->gp_size
;
31548 bool can_use_exit
= end_save
== 0;
31549 int sel
= SAVRES_GPR
| (can_use_exit
? SAVRES_LR
: 0);
31552 /* Emit stack reset code if we need it. */
31553 ptr_regno
= ptr_regno_for_savres (sel
);
31554 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
31556 rs6000_emit_stack_reset (info
, frame_reg_rtx
, frame_off
, ptr_regno
);
31557 else if (end_save
+ frame_off
!= 0)
31558 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
,
31559 GEN_INT (end_save
+ frame_off
)));
31560 else if (REGNO (frame_reg_rtx
) != ptr_regno
)
31561 emit_move_insn (ptr_reg
, frame_reg_rtx
);
31562 if (REGNO (frame_reg_rtx
) == ptr_regno
)
31563 frame_off
= -end_save
;
31565 if (can_use_exit
&& info
->cr_save_p
)
31566 restore_saved_cr (cr_save_reg
, using_mtcr_multiple
, true);
31568 ptr_off
= -end_save
;
31569 rs6000_emit_savres_rtx (info
, ptr_reg
,
31570 info
->gp_save_offset
+ ptr_off
,
31571 info
->lr_save_offset
+ ptr_off
,
31574 else if (using_load_multiple
)
31577 p
= rtvec_alloc (32 - info
->first_gp_reg_save
);
31578 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
31580 = gen_frame_load (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
31582 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
31583 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
31587 int offset
= info
->gp_save_offset
+ frame_off
;
31588 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
31590 if (rs6000_reg_live_or_pic_offset_p (i
)
31591 && !cfun
->machine
->gpr_is_wrapped_separately
[i
])
31593 rtx reg
= gen_rtx_REG (reg_mode
, i
);
31594 emit_insn (gen_frame_load (reg
, frame_reg_rtx
, offset
));
31597 offset
+= reg_size
;
31601 if (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
)
31603 /* If the frame pointer was used then we can't delay emitting
31604 a REG_CFA_DEF_CFA note. This must happen on the insn that
31605 restores the frame pointer, r31. We may have already emitted
31606 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
31607 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31608 be harmless if emitted. */
31609 if (frame_pointer_needed
)
31611 insn
= get_last_insn ();
31612 add_reg_note (insn
, REG_CFA_DEF_CFA
,
31613 plus_constant (Pmode
, frame_reg_rtx
, frame_off
));
31614 RTX_FRAME_RELATED_P (insn
) = 1;
31617 /* Set up cfa_restores. We always need these when
31618 shrink-wrapping. If not shrink-wrapping then we only need
31619 the cfa_restore when the stack location is no longer valid.
31620 The cfa_restores must be emitted on or before the insn that
31621 invalidates the stack, and of course must not be emitted
31622 before the insn that actually does the restore. The latter
31623 is why it is a bad idea to emit the cfa_restores as a group
31624 on the last instruction here that actually does a restore:
31625 That insn may be reordered with respect to others doing
31627 if (flag_shrink_wrap
31628 && !restoring_GPRs_inline
31629 && info
->first_fp_reg_save
== 64)
31630 cfa_restores
= add_crlr_cfa_restore (info
, cfa_restores
);
31632 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
31633 if (!restoring_GPRs_inline
31634 || using_load_multiple
31635 || rs6000_reg_live_or_pic_offset_p (i
))
31637 if (cfun
->machine
->gpr_is_wrapped_separately
[i
])
31640 rtx reg
= gen_rtx_REG (reg_mode
, i
);
31641 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31645 if (!restoring_GPRs_inline
31646 && info
->first_fp_reg_save
== 64)
31648 /* We are jumping to an out-of-line function. */
31650 emit_cfa_restores (cfa_restores
);
31654 if (restore_lr
&& !restoring_GPRs_inline
)
31656 load_lr_save (0, frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
31657 restore_saved_lr (0, exit_func
);
31660 /* Restore fpr's if we need to do it without calling a function. */
31661 if (restoring_FPRs_inline
)
31663 int offset
= info
->fp_save_offset
+ frame_off
;
31664 for (i
= info
->first_fp_reg_save
; i
< 64; i
++)
31667 && !cfun
->machine
->fpr_is_wrapped_separately
[i
- 32])
31669 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
31670 emit_insn (gen_frame_load (reg
, frame_reg_rtx
, offset
));
31671 if (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
)
31672 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
,
31676 offset
+= fp_reg_size
;
31680 /* If we saved cr, restore it here. Just those that were used. */
31681 if (info
->cr_save_p
)
31682 restore_saved_cr (cr_save_reg
, using_mtcr_multiple
, exit_func
);
31684 /* If this is V.4, unwind the stack pointer after all of the loads
31685 have been done, or set up r11 if we are restoring fp out of line. */
31687 if (!restoring_FPRs_inline
)
31689 bool lr
= (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
31690 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
31691 ptr_regno
= ptr_regno_for_savres (sel
);
31694 insn
= rs6000_emit_stack_reset (info
, frame_reg_rtx
, frame_off
, ptr_regno
);
31695 if (REGNO (frame_reg_rtx
) == ptr_regno
)
31698 if (insn
&& restoring_FPRs_inline
)
31702 REG_NOTES (insn
) = cfa_restores
;
31703 cfa_restores
= NULL_RTX
;
31705 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
31706 RTX_FRAME_RELATED_P (insn
) = 1;
31709 if (crtl
->calls_eh_return
)
31711 rtx sa
= EH_RETURN_STACKADJ_RTX
;
31712 emit_insn (gen_add3_insn (sp_reg_rtx
, sp_reg_rtx
, sa
));
31715 if (!sibcall
&& restoring_FPRs_inline
)
31719 /* We can't hang the cfa_restores off a simple return,
31720 since the shrink-wrap code sometimes uses an existing
31721 return. This means there might be a path from
31722 pre-prologue code to this return, and dwarf2cfi code
31723 wants the eh_frame unwinder state to be the same on
31724 all paths to any point. So we need to emit the
31725 cfa_restores before the return. For -m64 we really
31726 don't need epilogue cfa_restores at all, except for
31727 this irritating dwarf2cfi with shrink-wrap
31728 requirement; The stack red-zone means eh_frame info
31729 from the prologue telling the unwinder to restore
31730 from the stack is perfectly good right to the end of
31732 emit_insn (gen_blockage ());
31733 emit_cfa_restores (cfa_restores
);
31734 cfa_restores
= NULL_RTX
;
31737 emit_jump_insn (targetm
.gen_simple_return ());
31740 if (!sibcall
&& !restoring_FPRs_inline
)
31742 bool lr
= (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
31743 rtvec p
= rtvec_alloc (3 + !!lr
+ 64 - info
->first_fp_reg_save
);
31745 RTVEC_ELT (p
, elt
++) = ret_rtx
;
31747 RTVEC_ELT (p
, elt
++)
31748 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
31750 /* We have to restore more than two FP registers, so branch to the
31751 restore function. It will return to our caller. */
31756 if (flag_shrink_wrap
)
31757 cfa_restores
= add_crlr_cfa_restore (info
, cfa_restores
);
31759 sym
= rs6000_savres_routine_sym (info
, SAVRES_FPR
| (lr
? SAVRES_LR
: 0));
31760 RTVEC_ELT (p
, elt
++) = gen_rtx_USE (VOIDmode
, sym
);
31761 reg
= (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)? 1 : 11;
31762 RTVEC_ELT (p
, elt
++) = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, reg
));
31764 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
31766 rtx reg
= gen_rtx_REG (DFmode
, info
->first_fp_reg_save
+ i
);
31768 RTVEC_ELT (p
, elt
++)
31769 = gen_frame_load (reg
, sp_reg_rtx
, info
->fp_save_offset
+ 8 * i
);
31770 if (flag_shrink_wrap
)
31771 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31774 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
31780 /* Ensure the cfa_restores are hung off an insn that won't
31781 be reordered above other restores. */
31782 emit_insn (gen_blockage ());
31784 emit_cfa_restores (cfa_restores
);
31788 /* Write function epilogue. */
31791 rs6000_output_function_epilogue (FILE *file
)
31794 macho_branch_islands ();
31797 rtx_insn
*insn
= get_last_insn ();
31798 rtx_insn
*deleted_debug_label
= NULL
;
31800 /* Mach-O doesn't support labels at the end of objects, so if
31801 it looks like we might want one, take special action.
31803 First, collect any sequence of deleted debug labels. */
31806 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
31808 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31809 notes only, instead set their CODE_LABEL_NUMBER to -1,
31810 otherwise there would be code generation differences
31811 in between -g and -g0. */
31812 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
31813 deleted_debug_label
= insn
;
31814 insn
= PREV_INSN (insn
);
31817 /* Second, if we have:
31820 then this needs to be detected, so skip past the barrier. */
31822 if (insn
&& BARRIER_P (insn
))
31823 insn
= PREV_INSN (insn
);
31825 /* Up to now we've only seen notes or barriers. */
31830 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
31831 /* Trailing label: <barrier>. */
31832 fputs ("\tnop\n", file
);
31835 /* Lastly, see if we have a completely empty function body. */
31836 while (insn
&& ! INSN_P (insn
))
31837 insn
= PREV_INSN (insn
);
31838 /* If we don't find any insns, we've got an empty function body;
31839 I.e. completely empty - without a return or branch. This is
31840 taken as the case where a function body has been removed
31841 because it contains an inline __builtin_unreachable(). GCC
31842 states that reaching __builtin_unreachable() means UB so we're
31843 not obliged to do anything special; however, we want
31844 non-zero-sized function bodies. To meet this, and help the
31845 user out, let's trap the case. */
31847 fputs ("\ttrap\n", file
);
31850 else if (deleted_debug_label
)
31851 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
31852 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
31853 CODE_LABEL_NUMBER (insn
) = -1;
31857 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31860 We don't output a traceback table if -finhibit-size-directive was
31861 used. The documentation for -finhibit-size-directive reads
31862 ``don't output a @code{.size} assembler directive, or anything
31863 else that would cause trouble if the function is split in the
31864 middle, and the two halves are placed at locations far apart in
31865 memory.'' The traceback table has this property, since it
31866 includes the offset from the start of the function to the
31867 traceback table itself.
31869 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31870 different traceback table. */
31871 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
31872 && ! flag_inhibit_size_directive
31873 && rs6000_traceback
!= traceback_none
&& !cfun
->is_thunk
)
31875 const char *fname
= NULL
;
31876 const char *language_string
= lang_hooks
.name
;
31877 int fixed_parms
= 0, float_parms
= 0, parm_info
= 0;
31879 int optional_tbtab
;
31880 rs6000_stack_t
*info
= rs6000_stack_info ();
31882 if (rs6000_traceback
== traceback_full
)
31883 optional_tbtab
= 1;
31884 else if (rs6000_traceback
== traceback_part
)
31885 optional_tbtab
= 0;
31887 optional_tbtab
= !optimize_size
&& !TARGET_ELF
;
31889 if (optional_tbtab
)
31891 fname
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
31892 while (*fname
== '.') /* V.4 encodes . in the name */
31895 /* Need label immediately before tbtab, so we can compute
31896 its offset from the function start. */
31897 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LT");
31898 ASM_OUTPUT_LABEL (file
, fname
);
31901 /* The .tbtab pseudo-op can only be used for the first eight
31902 expressions, since it can't handle the possibly variable
31903 length fields that follow. However, if you omit the optional
31904 fields, the assembler outputs zeros for all optional fields
31905 anyways, giving each variable length field is minimum length
31906 (as defined in sys/debug.h). Thus we can not use the .tbtab
31907 pseudo-op at all. */
31909 /* An all-zero word flags the start of the tbtab, for debuggers
31910 that have to find it by searching forward from the entry
31911 point or from the current pc. */
31912 fputs ("\t.long 0\n", file
);
31914 /* Tbtab format type. Use format type 0. */
31915 fputs ("\t.byte 0,", file
);
31917 /* Language type. Unfortunately, there does not seem to be any
31918 official way to discover the language being compiled, so we
31919 use language_string.
31920 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
31921 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
31922 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
31923 either, so for now use 0. */
31925 || ! strcmp (language_string
, "GNU GIMPLE")
31926 || ! strcmp (language_string
, "GNU Go")
31927 || ! strcmp (language_string
, "libgccjit"))
31929 else if (! strcmp (language_string
, "GNU F77")
31930 || lang_GNU_Fortran ())
31932 else if (! strcmp (language_string
, "GNU Pascal"))
31934 else if (! strcmp (language_string
, "GNU Ada"))
31936 else if (lang_GNU_CXX ()
31937 || ! strcmp (language_string
, "GNU Objective-C++"))
31939 else if (! strcmp (language_string
, "GNU Java"))
31941 else if (! strcmp (language_string
, "GNU Objective-C"))
31944 gcc_unreachable ();
31945 fprintf (file
, "%d,", i
);
31947 /* 8 single bit fields: global linkage (not set for C extern linkage,
31948 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
31949 from start of procedure stored in tbtab, internal function, function
31950 has controlled storage, function has no toc, function uses fp,
31951 function logs/aborts fp operations. */
31952 /* Assume that fp operations are used if any fp reg must be saved. */
31953 fprintf (file
, "%d,",
31954 (optional_tbtab
<< 5) | ((info
->first_fp_reg_save
!= 64) << 1));
31956 /* 6 bitfields: function is interrupt handler, name present in
31957 proc table, function calls alloca, on condition directives
31958 (controls stack walks, 3 bits), saves condition reg, saves
31960 /* The `function calls alloca' bit seems to be set whenever reg 31 is
31961 set up as a frame pointer, even when there is no alloca call. */
31962 fprintf (file
, "%d,",
31963 ((optional_tbtab
<< 6)
31964 | ((optional_tbtab
& frame_pointer_needed
) << 5)
31965 | (info
->cr_save_p
<< 1)
31966 | (info
->lr_save_p
)));
31968 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
31970 fprintf (file
, "%d,",
31971 (info
->push_p
<< 7) | (64 - info
->first_fp_reg_save
));
31973 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
31974 fprintf (file
, "%d,", (32 - first_reg_to_save ()));
31976 if (optional_tbtab
)
31978 /* Compute the parameter info from the function decl argument
31981 int next_parm_info_bit
= 31;
31983 for (decl
= DECL_ARGUMENTS (current_function_decl
);
31984 decl
; decl
= DECL_CHAIN (decl
))
31986 rtx parameter
= DECL_INCOMING_RTL (decl
);
31987 machine_mode mode
= GET_MODE (parameter
);
31989 if (GET_CODE (parameter
) == REG
)
31991 if (SCALAR_FLOAT_MODE_P (mode
))
32014 gcc_unreachable ();
32017 /* If only one bit will fit, don't or in this entry. */
32018 if (next_parm_info_bit
> 0)
32019 parm_info
|= (bits
<< (next_parm_info_bit
- 1));
32020 next_parm_info_bit
-= 2;
32024 fixed_parms
+= ((GET_MODE_SIZE (mode
)
32025 + (UNITS_PER_WORD
- 1))
32027 next_parm_info_bit
-= 1;
32033 /* Number of fixed point parameters. */
32034 /* This is actually the number of words of fixed point parameters; thus
32035 an 8 byte struct counts as 2; and thus the maximum value is 8. */
32036 fprintf (file
, "%d,", fixed_parms
);
32038 /* 2 bitfields: number of floating point parameters (7 bits), parameters
32040 /* This is actually the number of fp registers that hold parameters;
32041 and thus the maximum value is 13. */
32042 /* Set parameters on stack bit if parameters are not in their original
32043 registers, regardless of whether they are on the stack? Xlc
32044 seems to set the bit when not optimizing. */
32045 fprintf (file
, "%d\n", ((float_parms
<< 1) | (! optimize
)));
32047 if (optional_tbtab
)
32049 /* Optional fields follow. Some are variable length. */
32051 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
32052 float, 11 double float. */
32053 /* There is an entry for each parameter in a register, in the order
32054 that they occur in the parameter list. Any intervening arguments
32055 on the stack are ignored. If the list overflows a long (max
32056 possible length 34 bits) then completely leave off all elements
32058 /* Only emit this long if there was at least one parameter. */
32059 if (fixed_parms
|| float_parms
)
32060 fprintf (file
, "\t.long %d\n", parm_info
);
32062 /* Offset from start of code to tb table. */
32063 fputs ("\t.long ", file
);
32064 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LT");
32065 RS6000_OUTPUT_BASENAME (file
, fname
);
32067 rs6000_output_function_entry (file
, fname
);
32070 /* Interrupt handler mask. */
32071 /* Omit this long, since we never set the interrupt handler bit
32074 /* Number of CTL (controlled storage) anchors. */
32075 /* Omit this long, since the has_ctl bit is never set above. */
32077 /* Displacement into stack of each CTL anchor. */
32078 /* Omit this list of longs, because there are no CTL anchors. */
32080 /* Length of function name. */
32083 fprintf (file
, "\t.short %d\n", (int) strlen (fname
));
32085 /* Function name. */
32086 assemble_string (fname
, strlen (fname
));
32088 /* Register for alloca automatic storage; this is always reg 31.
32089 Only emit this if the alloca bit was set above. */
32090 if (frame_pointer_needed
)
32091 fputs ("\t.byte 31\n", file
);
32093 fputs ("\t.align 2\n", file
);
32097 /* Arrange to define .LCTOC1 label, if not already done. */
32101 if (!toc_initialized
)
32103 switch_to_section (toc_section
);
32104 switch_to_section (current_function_section ());
32109 /* -fsplit-stack support. */
32111 /* A SYMBOL_REF for __morestack. */
32112 static GTY(()) rtx morestack_ref
;
32115 gen_add3_const (rtx rt
, rtx ra
, long c
)
32118 return gen_adddi3 (rt
, ra
, GEN_INT (c
));
32120 return gen_addsi3 (rt
, ra
, GEN_INT (c
));
32123 /* Emit -fsplit-stack prologue, which goes before the regular function
32124 prologue (at local entry point in the case of ELFv2). */
32127 rs6000_expand_split_stack_prologue (void)
32129 rs6000_stack_t
*info
= rs6000_stack_info ();
32130 unsigned HOST_WIDE_INT allocate
;
32131 long alloc_hi
, alloc_lo
;
32132 rtx r0
, r1
, r12
, lr
, ok_label
, compare
, jump
, call_fusage
;
32135 gcc_assert (flag_split_stack
&& reload_completed
);
32140 if (global_regs
[29])
32142 error ("-fsplit-stack uses register r29");
32143 inform (DECL_SOURCE_LOCATION (global_regs_decl
[29]),
32144 "conflicts with %qD", global_regs_decl
[29]);
32147 allocate
= info
->total_size
;
32148 if (allocate
> (unsigned HOST_WIDE_INT
) 1 << 31)
32150 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
32153 if (morestack_ref
== NULL_RTX
)
32155 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
32156 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
32157 | SYMBOL_FLAG_FUNCTION
);
32160 r0
= gen_rtx_REG (Pmode
, 0);
32161 r1
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
32162 r12
= gen_rtx_REG (Pmode
, 12);
32163 emit_insn (gen_load_split_stack_limit (r0
));
32164 /* Always emit two insns here to calculate the requested stack,
32165 so that the linker can edit them when adjusting size for calling
32166 non-split-stack code. */
32167 alloc_hi
= (-allocate
+ 0x8000) & ~0xffffL
;
32168 alloc_lo
= -allocate
- alloc_hi
;
32171 emit_insn (gen_add3_const (r12
, r1
, alloc_hi
));
32173 emit_insn (gen_add3_const (r12
, r12
, alloc_lo
));
32175 emit_insn (gen_nop ());
32179 emit_insn (gen_add3_const (r12
, r1
, alloc_lo
));
32180 emit_insn (gen_nop ());
32183 compare
= gen_rtx_REG (CCUNSmode
, CR7_REGNO
);
32184 emit_insn (gen_rtx_SET (compare
, gen_rtx_COMPARE (CCUNSmode
, r12
, r0
)));
32185 ok_label
= gen_label_rtx ();
32186 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
32187 gen_rtx_GEU (VOIDmode
, compare
, const0_rtx
),
32188 gen_rtx_LABEL_REF (VOIDmode
, ok_label
),
32190 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
32191 JUMP_LABEL (insn
) = ok_label
;
32192 /* Mark the jump as very likely to be taken. */
32193 add_reg_br_prob_note (insn
, profile_probability::very_likely ());
32195 lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
32196 insn
= emit_move_insn (r0
, lr
);
32197 RTX_FRAME_RELATED_P (insn
) = 1;
32198 insn
= emit_insn (gen_frame_store (r0
, r1
, info
->lr_save_offset
));
32199 RTX_FRAME_RELATED_P (insn
) = 1;
32201 insn
= emit_call_insn (gen_call (gen_rtx_MEM (SImode
, morestack_ref
),
32202 const0_rtx
, const0_rtx
));
32203 call_fusage
= NULL_RTX
;
32204 use_reg (&call_fusage
, r12
);
32205 /* Say the call uses r0, even though it doesn't, to stop regrename
32206 from twiddling with the insns saving lr, trashing args for cfun.
32207 The insns restoring lr are similarly protected by making
32208 split_stack_return use r0. */
32209 use_reg (&call_fusage
, r0
);
32210 add_function_usage_to (insn
, call_fusage
);
32211 /* Indicate that this function can't jump to non-local gotos. */
32212 make_reg_eh_region_note_nothrow_nononlocal (insn
);
32213 emit_insn (gen_frame_load (r0
, r1
, info
->lr_save_offset
));
32214 insn
= emit_move_insn (lr
, r0
);
32215 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
32216 RTX_FRAME_RELATED_P (insn
) = 1;
32217 emit_insn (gen_split_stack_return ());
32219 emit_label (ok_label
);
32220 LABEL_NUSES (ok_label
) = 1;
32223 /* Return the internal arg pointer used for function incoming
32224 arguments. When -fsplit-stack, the arg pointer is r12 so we need
32225 to copy it to a pseudo in order for it to be preserved over calls
32226 and suchlike. We'd really like to use a pseudo here for the
32227 internal arg pointer but data-flow analysis is not prepared to
32228 accept pseudos as live at the beginning of a function. */
32231 rs6000_internal_arg_pointer (void)
32233 if (flag_split_stack
32234 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
32238 if (cfun
->machine
->split_stack_arg_pointer
== NULL_RTX
)
32242 cfun
->machine
->split_stack_arg_pointer
= gen_reg_rtx (Pmode
);
32243 REG_POINTER (cfun
->machine
->split_stack_arg_pointer
) = 1;
32245 /* Put the pseudo initialization right after the note at the
32246 beginning of the function. */
32247 pat
= gen_rtx_SET (cfun
->machine
->split_stack_arg_pointer
,
32248 gen_rtx_REG (Pmode
, 12));
32249 push_topmost_sequence ();
32250 emit_insn_after (pat
, get_insns ());
32251 pop_topmost_sequence ();
32253 return plus_constant (Pmode
, cfun
->machine
->split_stack_arg_pointer
,
32254 FIRST_PARM_OFFSET (current_function_decl
));
32256 return virtual_incoming_args_rtx
;
32259 /* We may have to tell the dataflow pass that the split stack prologue
32260 is initializing a register. */
32263 rs6000_live_on_entry (bitmap regs
)
32265 if (flag_split_stack
)
32266 bitmap_set_bit (regs
, 12);
32269 /* Emit -fsplit-stack dynamic stack allocation space check. */
32272 rs6000_split_stack_space_check (rtx size
, rtx label
)
32274 rtx sp
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
32275 rtx limit
= gen_reg_rtx (Pmode
);
32276 rtx requested
= gen_reg_rtx (Pmode
);
32277 rtx cmp
= gen_reg_rtx (CCUNSmode
);
32280 emit_insn (gen_load_split_stack_limit (limit
));
32281 if (CONST_INT_P (size
))
32282 emit_insn (gen_add3_insn (requested
, sp
, GEN_INT (-INTVAL (size
))));
32285 size
= force_reg (Pmode
, size
);
32286 emit_move_insn (requested
, gen_rtx_MINUS (Pmode
, sp
, size
));
32288 emit_insn (gen_rtx_SET (cmp
, gen_rtx_COMPARE (CCUNSmode
, requested
, limit
)));
32289 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
32290 gen_rtx_GEU (VOIDmode
, cmp
, const0_rtx
),
32291 gen_rtx_LABEL_REF (VOIDmode
, label
),
32293 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
32294 JUMP_LABEL (jump
) = label
;
32297 /* A C compound statement that outputs the assembler code for a thunk
32298 function, used to implement C++ virtual function calls with
32299 multiple inheritance. The thunk acts as a wrapper around a virtual
32300 function, adjusting the implicit object parameter before handing
32301 control off to the real function.
32303 First, emit code to add the integer DELTA to the location that
32304 contains the incoming first argument. Assume that this argument
32305 contains a pointer, and is the one used to pass the `this' pointer
32306 in C++. This is the incoming argument *before* the function
32307 prologue, e.g. `%o0' on a sparc. The addition must preserve the
32308 values of all other incoming arguments.
32310 After the addition, emit code to jump to FUNCTION, which is a
32311 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
32312 not touch the return address. Hence returning from FUNCTION will
32313 return to whoever called the current `thunk'.
32315 The effect must be as if FUNCTION had been called directly with the
32316 adjusted first argument. This macro is responsible for emitting
32317 all of the code for a thunk function; output_function_prologue()
32318 and output_function_epilogue() are not invoked.
32320 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
32321 been extracted from it.) It might possibly be useful on some
32322 targets, but probably not.
32324 If you do not define this macro, the target-independent code in the
32325 C++ frontend will generate a less efficient heavyweight thunk that
32326 calls FUNCTION instead of jumping to it. The generic approach does
32327 not support varargs. */
32330 rs6000_output_mi_thunk (FILE *file
, tree thunk_fndecl ATTRIBUTE_UNUSED
,
32331 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
32334 rtx this_rtx
, funexp
;
32337 reload_completed
= 1;
32338 epilogue_completed
= 1;
32340 /* Mark the end of the (empty) prologue. */
32341 emit_note (NOTE_INSN_PROLOGUE_END
);
32343 /* Find the "this" pointer. If the function returns a structure,
32344 the structure return pointer is in r3. */
32345 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
32346 this_rtx
= gen_rtx_REG (Pmode
, 4);
32348 this_rtx
= gen_rtx_REG (Pmode
, 3);
32350 /* Apply the constant offset, if required. */
32352 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, GEN_INT (delta
)));
32354 /* Apply the offset from the vtable, if required. */
32357 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
32358 rtx tmp
= gen_rtx_REG (Pmode
, 12);
32360 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
32361 if (((unsigned HOST_WIDE_INT
) vcall_offset
) + 0x8000 >= 0x10000)
32363 emit_insn (gen_add3_insn (tmp
, tmp
, vcall_offset_rtx
));
32364 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
32368 rtx loc
= gen_rtx_PLUS (Pmode
, tmp
, vcall_offset_rtx
);
32370 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, loc
));
32372 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, tmp
));
32375 /* Generate a tail call to the target function. */
32376 if (!TREE_USED (function
))
32378 assemble_external (function
);
32379 TREE_USED (function
) = 1;
32381 funexp
= XEXP (DECL_RTL (function
), 0);
32382 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
32385 if (MACHOPIC_INDIRECT
)
32386 funexp
= machopic_indirect_call_target (funexp
);
32389 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32390 generate sibcall RTL explicitly. */
32391 insn
= emit_call_insn (
32392 gen_rtx_PARALLEL (VOIDmode
,
32394 gen_rtx_CALL (VOIDmode
,
32395 funexp
, const0_rtx
),
32396 gen_rtx_USE (VOIDmode
, const0_rtx
),
32397 simple_return_rtx
)));
32398 SIBLING_CALL_P (insn
) = 1;
32401 /* Run just enough of rest_of_compilation to get the insns emitted.
32402 There's not really enough bulk here to make other passes such as
32403 instruction scheduling worth while. Note that use_thunk calls
32404 assemble_start_function and assemble_end_function. */
32405 insn
= get_insns ();
32406 shorten_branches (insn
);
32407 final_start_function (insn
, file
, 1);
32408 final (insn
, file
, 1);
32409 final_end_function ();
32411 reload_completed
= 0;
32412 epilogue_completed
= 0;
32415 /* A quick summary of the various types of 'constant-pool tables'
32418 Target Flags Name One table per
32419 AIX (none) AIX TOC object file
32420 AIX -mfull-toc AIX TOC object file
32421 AIX -mminimal-toc AIX minimal TOC translation unit
32422 SVR4/EABI (none) SVR4 SDATA object file
32423 SVR4/EABI -fpic SVR4 pic object file
32424 SVR4/EABI -fPIC SVR4 PIC translation unit
32425 SVR4/EABI -mrelocatable EABI TOC function
32426 SVR4/EABI -maix AIX TOC object file
32427 SVR4/EABI -maix -mminimal-toc
32428 AIX minimal TOC translation unit
32430 Name Reg. Set by entries contains:
32431 made by addrs? fp? sum?
32433 AIX TOC 2 crt0 as Y option option
32434 AIX minimal TOC 30 prolog gcc Y Y option
32435 SVR4 SDATA 13 crt0 gcc N Y N
32436 SVR4 pic 30 prolog ld Y not yet N
32437 SVR4 PIC 30 prolog gcc Y option option
32438 EABI TOC 30 prolog gcc Y option option
32442 /* Hash functions for the hash table. */
32445 rs6000_hash_constant (rtx k
)
32447 enum rtx_code code
= GET_CODE (k
);
32448 machine_mode mode
= GET_MODE (k
);
32449 unsigned result
= (code
<< 3) ^ mode
;
32450 const char *format
;
32453 format
= GET_RTX_FORMAT (code
);
32454 flen
= strlen (format
);
32460 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
32462 case CONST_WIDE_INT
:
32465 flen
= CONST_WIDE_INT_NUNITS (k
);
32466 for (i
= 0; i
< flen
; i
++)
32467 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
32472 if (mode
!= VOIDmode
)
32473 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
32485 for (; fidx
< flen
; fidx
++)
32486 switch (format
[fidx
])
32491 const char *str
= XSTR (k
, fidx
);
32492 len
= strlen (str
);
32493 result
= result
* 613 + len
;
32494 for (i
= 0; i
< len
; i
++)
32495 result
= result
* 613 + (unsigned) str
[i
];
32500 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
32504 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
32507 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
32508 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
32512 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
32513 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
32520 gcc_unreachable ();
32527 toc_hasher::hash (toc_hash_struct
*thc
)
32529 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
32532 /* Compare H1 and H2 for equivalence. */
32535 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
32540 if (h1
->key_mode
!= h2
->key_mode
)
32543 return rtx_equal_p (r1
, r2
);
32546 /* These are the names given by the C++ front-end to vtables, and
32547 vtable-like objects. Ideally, this logic should not be here;
32548 instead, there should be some programmatic way of inquiring as
32549 to whether or not an object is a vtable. */
32551 #define VTABLE_NAME_P(NAME) \
32552 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
32553 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
32554 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
32555 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
32556 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32558 #ifdef NO_DOLLAR_IN_LABEL
32559 /* Return a GGC-allocated character string translating dollar signs in
32560 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
32563 rs6000_xcoff_strip_dollar (const char *name
)
32569 q
= (const char *) strchr (name
, '$');
32571 if (q
== 0 || q
== name
)
32574 len
= strlen (name
);
32575 strip
= XALLOCAVEC (char, len
+ 1);
32576 strcpy (strip
, name
);
32577 p
= strip
+ (q
- name
);
32581 p
= strchr (p
+ 1, '$');
32584 return ggc_alloc_string (strip
, len
);
32589 rs6000_output_symbol_ref (FILE *file
, rtx x
)
32591 const char *name
= XSTR (x
, 0);
32593 /* Currently C++ toc references to vtables can be emitted before it
32594 is decided whether the vtable is public or private. If this is
32595 the case, then the linker will eventually complain that there is
32596 a reference to an unknown section. Thus, for vtables only,
32597 we emit the TOC reference to reference the identifier and not the
32599 if (VTABLE_NAME_P (name
))
32601 RS6000_OUTPUT_BASENAME (file
, name
);
32604 assemble_name (file
, name
);
32607 /* Output a TOC entry. We derive the entry name from what is being
32611 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
32614 const char *name
= buf
;
32616 HOST_WIDE_INT offset
= 0;
32618 gcc_assert (!TARGET_NO_TOC
);
32620 /* When the linker won't eliminate them, don't output duplicate
32621 TOC entries (this happens on AIX if there is any kind of TOC,
32622 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
32624 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
32626 struct toc_hash_struct
*h
;
32628 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
32629 time because GGC is not initialized at that point. */
32630 if (toc_hash_table
== NULL
)
32631 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
32633 h
= ggc_alloc
<toc_hash_struct
> ();
32635 h
->key_mode
= mode
;
32636 h
->labelno
= labelno
;
32638 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
32639 if (*found
== NULL
)
32641 else /* This is indeed a duplicate.
32642 Set this label equal to that label. */
32644 fputs ("\t.set ", file
);
32645 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
32646 fprintf (file
, "%d,", labelno
);
32647 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
32648 fprintf (file
, "%d\n", ((*found
)->labelno
));
32651 if (TARGET_XCOFF
&& GET_CODE (x
) == SYMBOL_REF
32652 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
32653 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
32655 fputs ("\t.set ", file
);
32656 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
32657 fprintf (file
, "%d,", labelno
);
32658 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
32659 fprintf (file
, "%d\n", ((*found
)->labelno
));
32666 /* If we're going to put a double constant in the TOC, make sure it's
32667 aligned properly when strict alignment is on. */
32668 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
32669 && STRICT_ALIGNMENT
32670 && GET_MODE_BITSIZE (mode
) >= 64
32671 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
32672 ASM_OUTPUT_ALIGN (file
, 3);
32675 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
32677 /* Handle FP constants specially. Note that if we have a minimal
32678 TOC, things we put here aren't actually in the TOC, so we can allow
32680 if (GET_CODE (x
) == CONST_DOUBLE
&&
32681 (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
32682 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
32686 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32687 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32689 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32693 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32694 fputs (DOUBLE_INT_ASM_OP
, file
);
32696 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32697 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32698 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32699 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
32700 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
32701 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
32702 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
32703 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
32708 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32709 fputs ("\t.long ", file
);
32711 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32712 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32713 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32714 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32715 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32716 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32720 else if (GET_CODE (x
) == CONST_DOUBLE
&&
32721 (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
32725 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32726 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32728 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32732 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32733 fputs (DOUBLE_INT_ASM_OP
, file
);
32735 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
32736 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32737 fprintf (file
, "0x%lx%08lx\n",
32738 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
32739 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
32744 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32745 fputs ("\t.long ", file
);
32747 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
32748 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32749 fprintf (file
, "0x%lx,0x%lx\n",
32750 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32754 else if (GET_CODE (x
) == CONST_DOUBLE
&&
32755 (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
32759 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32760 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
32762 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
32766 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32767 fputs (DOUBLE_INT_ASM_OP
, file
);
32769 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
32770 if (WORDS_BIG_ENDIAN
)
32771 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
32773 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
32778 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32779 fputs ("\t.long ", file
);
32781 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
32782 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
32786 else if (GET_MODE (x
) == VOIDmode
&& GET_CODE (x
) == CONST_INT
)
32788 unsigned HOST_WIDE_INT low
;
32789 HOST_WIDE_INT high
;
32791 low
= INTVAL (x
) & 0xffffffff;
32792 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
32794 /* TOC entries are always Pmode-sized, so when big-endian
32795 smaller integer constants in the TOC need to be padded.
32796 (This is still a win over putting the constants in
32797 a separate constant pool, because then we'd have
32798 to have both a TOC entry _and_ the actual constant.)
32800 For a 32-bit target, CONST_INT values are loaded and shifted
32801 entirely within `low' and can be stored in one TOC entry. */
32803 /* It would be easy to make this work, but it doesn't now. */
32804 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
32806 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
32809 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
32810 high
= (HOST_WIDE_INT
) low
>> 32;
32816 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32817 fputs (DOUBLE_INT_ASM_OP
, file
);
32819 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
32820 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32821 fprintf (file
, "0x%lx%08lx\n",
32822 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32827 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
32829 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32830 fputs ("\t.long ", file
);
32832 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
32833 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32834 fprintf (file
, "0x%lx,0x%lx\n",
32835 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32839 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32840 fputs ("\t.long ", file
);
32842 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
32843 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
32849 if (GET_CODE (x
) == CONST
)
32851 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
32852 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
);
32854 base
= XEXP (XEXP (x
, 0), 0);
32855 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
32858 switch (GET_CODE (base
))
32861 name
= XSTR (base
, 0);
32865 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
32866 CODE_LABEL_NUMBER (XEXP (base
, 0)));
32870 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
32874 gcc_unreachable ();
32877 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32878 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
32881 fputs ("\t.tc ", file
);
32882 RS6000_OUTPUT_BASENAME (file
, name
);
32885 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
32887 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
32889 /* Mark large TOC symbols on AIX with [TE] so they are mapped
32890 after other TOC symbols, reducing overflow of small TOC access
32891 to [TC] symbols. */
32892 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
32893 ? "[TE]," : "[TC],", file
);
32896 /* Currently C++ toc references to vtables can be emitted before it
32897 is decided whether the vtable is public or private. If this is
32898 the case, then the linker will eventually complain that there is
32899 a TOC reference to an unknown section. Thus, for vtables only,
32900 we emit the TOC reference to reference the symbol and not the
32902 if (VTABLE_NAME_P (name
))
32904 RS6000_OUTPUT_BASENAME (file
, name
);
32906 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
32907 else if (offset
> 0)
32908 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
32911 output_addr_const (file
, x
);
32914 if (TARGET_XCOFF
&& GET_CODE (base
) == SYMBOL_REF
)
32916 switch (SYMBOL_REF_TLS_MODEL (base
))
32920 case TLS_MODEL_LOCAL_EXEC
:
32921 fputs ("@le", file
);
32923 case TLS_MODEL_INITIAL_EXEC
:
32924 fputs ("@ie", file
);
32926 /* Use global-dynamic for local-dynamic. */
32927 case TLS_MODEL_GLOBAL_DYNAMIC
:
32928 case TLS_MODEL_LOCAL_DYNAMIC
:
32930 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
32931 fputs ("\t.tc .", file
);
32932 RS6000_OUTPUT_BASENAME (file
, name
);
32933 fputs ("[TC],", file
);
32934 output_addr_const (file
, x
);
32935 fputs ("@m", file
);
32938 gcc_unreachable ();
32946 /* Output an assembler pseudo-op to write an ASCII string of N characters
32947 starting at P to FILE.
32949 On the RS/6000, we have to do this using the .byte operation and
32950 write out special characters outside the quoted string.
32951 Also, the assembler is broken; very long strings are truncated,
32952 so we must artificially break them up early. */
32955 output_ascii (FILE *file
, const char *p
, int n
)
32958 int i
, count_string
;
32959 const char *for_string
= "\t.byte \"";
32960 const char *for_decimal
= "\t.byte ";
32961 const char *to_close
= NULL
;
32964 for (i
= 0; i
< n
; i
++)
32967 if (c
>= ' ' && c
< 0177)
32970 fputs (for_string
, file
);
32973 /* Write two quotes to get one. */
32981 for_decimal
= "\"\n\t.byte ";
32985 if (count_string
>= 512)
32987 fputs (to_close
, file
);
32989 for_string
= "\t.byte \"";
32990 for_decimal
= "\t.byte ";
32998 fputs (for_decimal
, file
);
32999 fprintf (file
, "%d", c
);
33001 for_string
= "\n\t.byte \"";
33002 for_decimal
= ", ";
33008 /* Now close the string if we have written one. Then end the line. */
33010 fputs (to_close
, file
);
33013 /* Generate a unique section name for FILENAME for a section type
33014 represented by SECTION_DESC. Output goes into BUF.
33016 SECTION_DESC can be any string, as long as it is different for each
33017 possible section type.
33019 We name the section in the same manner as xlc. The name begins with an
33020 underscore followed by the filename (after stripping any leading directory
33021 names) with the last period replaced by the string SECTION_DESC. If
33022 FILENAME does not contain a period, SECTION_DESC is appended to the end of
33026 rs6000_gen_section_name (char **buf
, const char *filename
,
33027 const char *section_desc
)
33029 const char *q
, *after_last_slash
, *last_period
= 0;
33033 after_last_slash
= filename
;
33034 for (q
= filename
; *q
; q
++)
33037 after_last_slash
= q
+ 1;
33038 else if (*q
== '.')
33042 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
33043 *buf
= (char *) xmalloc (len
);
33048 for (q
= after_last_slash
; *q
; q
++)
33050 if (q
== last_period
)
33052 strcpy (p
, section_desc
);
33053 p
+= strlen (section_desc
);
33057 else if (ISALNUM (*q
))
33061 if (last_period
== 0)
33062 strcpy (p
, section_desc
);
33067 /* Emit profile function. */
33070 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
33072 /* Non-standard profiling for kernels, which just saves LR then calls
33073 _mcount without worrying about arg saves. The idea is to change
33074 the function prologue as little as possible as it isn't easy to
33075 account for arg save/restore code added just for _mcount. */
33076 if (TARGET_PROFILE_KERNEL
)
33079 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
33081 #ifndef NO_PROFILE_COUNTERS
33082 # define NO_PROFILE_COUNTERS 0
33084 if (NO_PROFILE_COUNTERS
)
33085 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
33086 LCT_NORMAL
, VOIDmode
, 0);
33090 const char *label_name
;
33093 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
33094 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
33095 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
33097 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
33098 LCT_NORMAL
, VOIDmode
, 1, fun
, Pmode
);
33101 else if (DEFAULT_ABI
== ABI_DARWIN
)
33103 const char *mcount_name
= RS6000_MCOUNT
;
33104 int caller_addr_regno
= LR_REGNO
;
33106 /* Be conservative and always set this, at least for now. */
33107 crtl
->uses_pic_offset_table
= 1;
33110 /* For PIC code, set up a stub and collect the caller's address
33111 from r0, which is where the prologue puts it. */
33112 if (MACHOPIC_INDIRECT
33113 && crtl
->uses_pic_offset_table
)
33114 caller_addr_regno
= 0;
33116 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
33117 LCT_NORMAL
, VOIDmode
, 1,
33118 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
33122 /* Write function profiler code. */
33125 output_function_profiler (FILE *file
, int labelno
)
33129 switch (DEFAULT_ABI
)
33132 gcc_unreachable ();
33137 warning (0, "no profiling of 64-bit code for this ABI");
33140 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
33141 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
33142 if (NO_PROFILE_COUNTERS
)
33144 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33145 reg_names
[0], reg_names
[1]);
33147 else if (TARGET_SECURE_PLT
&& flag_pic
)
33149 if (TARGET_LINK_STACK
)
33152 get_ppc476_thunk_name (name
);
33153 asm_fprintf (file
, "\tbl %s\n", name
);
33156 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
33157 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33158 reg_names
[0], reg_names
[1]);
33159 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
33160 asm_fprintf (file
, "\taddis %s,%s,",
33161 reg_names
[12], reg_names
[12]);
33162 assemble_name (file
, buf
);
33163 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
33164 assemble_name (file
, buf
);
33165 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
33167 else if (flag_pic
== 1)
33169 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
33170 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33171 reg_names
[0], reg_names
[1]);
33172 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
33173 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
33174 assemble_name (file
, buf
);
33175 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
33177 else if (flag_pic
> 1)
33179 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33180 reg_names
[0], reg_names
[1]);
33181 /* Now, we need to get the address of the label. */
33182 if (TARGET_LINK_STACK
)
33185 get_ppc476_thunk_name (name
);
33186 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
33187 assemble_name (file
, buf
);
33188 fputs ("-.\n1:", file
);
33189 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
33190 asm_fprintf (file
, "\taddi %s,%s,4\n",
33191 reg_names
[11], reg_names
[11]);
33195 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
33196 assemble_name (file
, buf
);
33197 fputs ("-.\n1:", file
);
33198 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
33200 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
33201 reg_names
[0], reg_names
[11]);
33202 asm_fprintf (file
, "\tadd %s,%s,%s\n",
33203 reg_names
[0], reg_names
[0], reg_names
[11]);
33207 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
33208 assemble_name (file
, buf
);
33209 fputs ("@ha\n", file
);
33210 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33211 reg_names
[0], reg_names
[1]);
33212 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
33213 assemble_name (file
, buf
);
33214 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
33217 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
33218 fprintf (file
, "\tbl %s%s\n",
33219 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
33225 /* Don't do anything, done in output_profile_hook (). */
33232 /* The following variable value is the last issued insn. */
33234 static rtx_insn
*last_scheduled_insn
;
33236 /* The following variable helps to balance issuing of load and
33237 store instructions */
33239 static int load_store_pendulum
;
33241 /* The following variable helps pair divide insns during scheduling. */
33242 static int divide_cnt
;
33243 /* The following variable helps pair and alternate vector and vector load
33244 insns during scheduling. */
33245 static int vec_pairing
;
33248 /* Power4 load update and store update instructions are cracked into a
33249 load or store and an integer insn which are executed in the same cycle.
33250 Branches have their own dispatch slot which does not count against the
33251 GCC issue rate, but it changes the program flow so there are no other
33252 instructions to issue in this cycle. */
33255 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
33257 last_scheduled_insn
= insn
;
33258 if (GET_CODE (PATTERN (insn
)) == USE
33259 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33261 cached_can_issue_more
= more
;
33262 return cached_can_issue_more
;
33265 if (insn_terminates_group_p (insn
, current_group
))
33267 cached_can_issue_more
= 0;
33268 return cached_can_issue_more
;
33271 /* If no reservation, but reach here */
33272 if (recog_memoized (insn
) < 0)
33275 if (rs6000_sched_groups
)
33277 if (is_microcoded_insn (insn
))
33278 cached_can_issue_more
= 0;
33279 else if (is_cracked_insn (insn
))
33280 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
33282 cached_can_issue_more
= more
- 1;
33284 return cached_can_issue_more
;
33287 if (rs6000_cpu_attr
== CPU_CELL
&& is_nonpipeline_insn (insn
))
33290 cached_can_issue_more
= more
- 1;
33291 return cached_can_issue_more
;
33295 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
33297 int r
= rs6000_variable_issue_1 (insn
, more
);
33299 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
33303 /* Adjust the cost of a scheduling dependency. Return the new cost of
33304 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
33307 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
33310 enum attr_type attr_type
;
33312 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
33319 /* Data dependency; DEP_INSN writes a register that INSN reads
33320 some cycles later. */
33322 /* Separate a load from a narrower, dependent store. */
33323 if ((rs6000_sched_groups
|| rs6000_cpu_attr
== CPU_POWER9
)
33324 && GET_CODE (PATTERN (insn
)) == SET
33325 && GET_CODE (PATTERN (dep_insn
)) == SET
33326 && GET_CODE (XEXP (PATTERN (insn
), 1)) == MEM
33327 && GET_CODE (XEXP (PATTERN (dep_insn
), 0)) == MEM
33328 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
33329 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
33332 attr_type
= get_attr_type (insn
);
33337 /* Tell the first scheduling pass about the latency between
33338 a mtctr and bctr (and mtlr and br/blr). The first
33339 scheduling pass will not know about this latency since
33340 the mtctr instruction, which has the latency associated
33341 to it, will be generated by reload. */
33344 /* Leave some extra cycles between a compare and its
33345 dependent branch, to inhibit expensive mispredicts. */
33346 if ((rs6000_cpu_attr
== CPU_PPC603
33347 || rs6000_cpu_attr
== CPU_PPC604
33348 || rs6000_cpu_attr
== CPU_PPC604E
33349 || rs6000_cpu_attr
== CPU_PPC620
33350 || rs6000_cpu_attr
== CPU_PPC630
33351 || rs6000_cpu_attr
== CPU_PPC750
33352 || rs6000_cpu_attr
== CPU_PPC7400
33353 || rs6000_cpu_attr
== CPU_PPC7450
33354 || rs6000_cpu_attr
== CPU_PPCE5500
33355 || rs6000_cpu_attr
== CPU_PPCE6500
33356 || rs6000_cpu_attr
== CPU_POWER4
33357 || rs6000_cpu_attr
== CPU_POWER5
33358 || rs6000_cpu_attr
== CPU_POWER7
33359 || rs6000_cpu_attr
== CPU_POWER8
33360 || rs6000_cpu_attr
== CPU_POWER9
33361 || rs6000_cpu_attr
== CPU_CELL
)
33362 && recog_memoized (dep_insn
)
33363 && (INSN_CODE (dep_insn
) >= 0))
33365 switch (get_attr_type (dep_insn
))
33368 case TYPE_FPCOMPARE
:
33369 case TYPE_CR_LOGICAL
:
33370 case TYPE_DELAYED_CR
:
33374 if (get_attr_dot (dep_insn
) == DOT_YES
)
33379 if (get_attr_dot (dep_insn
) == DOT_YES
33380 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
33391 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33392 && recog_memoized (dep_insn
)
33393 && (INSN_CODE (dep_insn
) >= 0))
33396 if (GET_CODE (PATTERN (insn
)) != SET
)
33397 /* If this happens, we have to extend this to schedule
33398 optimally. Return default for now. */
33401 /* Adjust the cost for the case where the value written
33402 by a fixed point operation is used as the address
33403 gen value on a store. */
33404 switch (get_attr_type (dep_insn
))
33409 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33410 return get_attr_sign_extend (dep_insn
)
33411 == SIGN_EXTEND_YES
? 6 : 4;
33416 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33417 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
33427 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33435 if (get_attr_update (dep_insn
) == UPDATE_YES
33436 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
33442 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33448 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33449 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
33459 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33460 && recog_memoized (dep_insn
)
33461 && (INSN_CODE (dep_insn
) >= 0))
33464 /* Adjust the cost for the case where the value written
33465 by a fixed point instruction is used within the address
33466 gen portion of a subsequent load(u)(x) */
33467 switch (get_attr_type (dep_insn
))
33472 if (set_to_load_agen (dep_insn
, insn
))
33473 return get_attr_sign_extend (dep_insn
)
33474 == SIGN_EXTEND_YES
? 6 : 4;
33479 if (set_to_load_agen (dep_insn
, insn
))
33480 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
33490 if (set_to_load_agen (dep_insn
, insn
))
33498 if (get_attr_update (dep_insn
) == UPDATE_YES
33499 && set_to_load_agen (dep_insn
, insn
))
33505 if (set_to_load_agen (dep_insn
, insn
))
33511 if (set_to_load_agen (dep_insn
, insn
))
33512 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
33522 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33523 && get_attr_update (insn
) == UPDATE_NO
33524 && recog_memoized (dep_insn
)
33525 && (INSN_CODE (dep_insn
) >= 0)
33526 && (get_attr_type (dep_insn
) == TYPE_MFFGPR
))
33533 /* Fall out to return default cost. */
33537 case REG_DEP_OUTPUT
:
33538 /* Output dependency; DEP_INSN writes a register that INSN writes some
33540 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33541 && recog_memoized (dep_insn
)
33542 && (INSN_CODE (dep_insn
) >= 0))
33544 attr_type
= get_attr_type (insn
);
33549 case TYPE_FPSIMPLE
:
33550 if (get_attr_type (dep_insn
) == TYPE_FP
33551 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
33555 if (get_attr_update (insn
) == UPDATE_NO
33556 && get_attr_type (dep_insn
) == TYPE_MFFGPR
)
33563 /* Fall through, no cost for output dependency. */
33567 /* Anti dependency; DEP_INSN reads a register that INSN writes some
33572 gcc_unreachable ();
33578 /* Debug version of rs6000_adjust_cost. */
33581 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
33582 int cost
, unsigned int dw
)
33584 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
33592 default: dep
= "unknown depencency"; break;
33593 case REG_DEP_TRUE
: dep
= "data dependency"; break;
33594 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
33595 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
33599 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33600 "%s, insn:\n", ret
, cost
, dep
);
33608 /* The function returns a true if INSN is microcoded.
33609 Return false otherwise. */
33612 is_microcoded_insn (rtx_insn
*insn
)
33614 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33615 || GET_CODE (PATTERN (insn
)) == USE
33616 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33619 if (rs6000_cpu_attr
== CPU_CELL
)
33620 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
33622 if (rs6000_sched_groups
33623 && (rs6000_cpu
== PROCESSOR_POWER4
|| rs6000_cpu
== PROCESSOR_POWER5
))
33625 enum attr_type type
= get_attr_type (insn
);
33626 if ((type
== TYPE_LOAD
33627 && get_attr_update (insn
) == UPDATE_YES
33628 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
33629 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
33630 && get_attr_update (insn
) == UPDATE_YES
33631 && get_attr_indexed (insn
) == INDEXED_YES
)
33632 || type
== TYPE_MFCR
)
33639 /* The function returns true if INSN is cracked into 2 instructions
33640 by the processor (and therefore occupies 2 issue slots). */
33643 is_cracked_insn (rtx_insn
*insn
)
33645 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33646 || GET_CODE (PATTERN (insn
)) == USE
33647 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33650 if (rs6000_sched_groups
33651 && (rs6000_cpu
== PROCESSOR_POWER4
|| rs6000_cpu
== PROCESSOR_POWER5
))
33653 enum attr_type type
= get_attr_type (insn
);
33654 if ((type
== TYPE_LOAD
33655 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
33656 && get_attr_update (insn
) == UPDATE_NO
)
33657 || (type
== TYPE_LOAD
33658 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
33659 && get_attr_update (insn
) == UPDATE_YES
33660 && get_attr_indexed (insn
) == INDEXED_NO
)
33661 || (type
== TYPE_STORE
33662 && get_attr_update (insn
) == UPDATE_YES
33663 && get_attr_indexed (insn
) == INDEXED_NO
)
33664 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
33665 && get_attr_update (insn
) == UPDATE_YES
)
33666 || type
== TYPE_DELAYED_CR
33667 || (type
== TYPE_EXTS
33668 && get_attr_dot (insn
) == DOT_YES
)
33669 || (type
== TYPE_SHIFT
33670 && get_attr_dot (insn
) == DOT_YES
33671 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
33672 || (type
== TYPE_MUL
33673 && get_attr_dot (insn
) == DOT_YES
)
33674 || type
== TYPE_DIV
33675 || (type
== TYPE_INSERT
33676 && get_attr_size (insn
) == SIZE_32
))
33683 /* The function returns true if INSN can be issued only from
33684 the branch slot. */
33687 is_branch_slot_insn (rtx_insn
*insn
)
33689 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33690 || GET_CODE (PATTERN (insn
)) == USE
33691 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33694 if (rs6000_sched_groups
)
33696 enum attr_type type
= get_attr_type (insn
);
33697 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
33705 /* The function returns true if out_inst sets a value that is
33706 used in the address generation computation of in_insn */
33708 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
33710 rtx out_set
, in_set
;
33712 /* For performance reasons, only handle the simple case where
33713 both loads are a single_set. */
33714 out_set
= single_set (out_insn
);
33717 in_set
= single_set (in_insn
);
33719 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
33725 /* Try to determine base/offset/size parts of the given MEM.
33726 Return true if successful, false if all the values couldn't
33729 This function only looks for REG or REG+CONST address forms.
33730 REG+REG address form will return false. */
33733 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
33734 HOST_WIDE_INT
*size
)
33737 if MEM_SIZE_KNOWN_P (mem
)
33738 *size
= MEM_SIZE (mem
);
33742 addr_rtx
= (XEXP (mem
, 0));
33743 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
33744 addr_rtx
= XEXP (addr_rtx
, 1);
33747 while (GET_CODE (addr_rtx
) == PLUS
33748 && CONST_INT_P (XEXP (addr_rtx
, 1)))
33750 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
33751 addr_rtx
= XEXP (addr_rtx
, 0);
33753 if (!REG_P (addr_rtx
))
33760 /* The function returns true if the target storage location of
33761 mem1 is adjacent to the target storage location of mem2 */
33762 /* Return 1 if memory locations are adjacent. */
33765 adjacent_mem_locations (rtx mem1
, rtx mem2
)
33768 HOST_WIDE_INT off1
, size1
, off2
, size2
;
33770 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
33771 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
33772 return ((REGNO (reg1
) == REGNO (reg2
))
33773 && ((off1
+ size1
== off2
)
33774 || (off2
+ size2
== off1
)));
33779 /* This function returns true if it can be determined that the two MEM
33780 locations overlap by at least 1 byte based on base reg/offset/size. */
33783 mem_locations_overlap (rtx mem1
, rtx mem2
)
33786 HOST_WIDE_INT off1
, size1
, off2
, size2
;
33788 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
33789 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
33790 return ((REGNO (reg1
) == REGNO (reg2
))
33791 && (((off1
<= off2
) && (off1
+ size1
> off2
))
33792 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
33797 /* A C statement (sans semicolon) to update the integer scheduling
33798 priority INSN_PRIORITY (INSN). Increase the priority to execute the
33799 INSN earlier, reduce the priority to execute INSN later. Do not
33800 define this macro if you do not need to adjust the scheduling
33801 priorities of insns. */
33804 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
33806 rtx load_mem
, str_mem
;
33807 /* On machines (like the 750) which have asymmetric integer units,
33808 where one integer unit can do multiply and divides and the other
33809 can't, reduce the priority of multiply/divide so it is scheduled
33810 before other integer operations. */
33813 if (! INSN_P (insn
))
33816 if (GET_CODE (PATTERN (insn
)) == USE
)
33819 switch (rs6000_cpu_attr
) {
33821 switch (get_attr_type (insn
))
33828 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
33829 priority
, priority
);
33830 if (priority
>= 0 && priority
< 0x01000000)
33837 if (insn_must_be_first_in_group (insn
)
33838 && reload_completed
33839 && current_sched_info
->sched_max_insns_priority
33840 && rs6000_sched_restricted_insns_priority
)
33843 /* Prioritize insns that can be dispatched only in the first
33845 if (rs6000_sched_restricted_insns_priority
== 1)
33846 /* Attach highest priority to insn. This means that in
33847 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33848 precede 'priority' (critical path) considerations. */
33849 return current_sched_info
->sched_max_insns_priority
;
33850 else if (rs6000_sched_restricted_insns_priority
== 2)
33851 /* Increase priority of insn by a minimal amount. This means that in
33852 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33853 considerations precede dispatch-slot restriction considerations. */
33854 return (priority
+ 1);
33857 if (rs6000_cpu
== PROCESSOR_POWER6
33858 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
33859 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
33860 /* Attach highest priority to insn if the scheduler has just issued two
33861 stores and this instruction is a load, or two loads and this instruction
33862 is a store. Power6 wants loads and stores scheduled alternately
33864 return current_sched_info
->sched_max_insns_priority
;
33869 /* Return true if the instruction is nonpipelined on the Cell. */
33871 is_nonpipeline_insn (rtx_insn
*insn
)
33873 enum attr_type type
;
33874 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33875 || GET_CODE (PATTERN (insn
)) == USE
33876 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33879 type
= get_attr_type (insn
);
33880 if (type
== TYPE_MUL
33881 || type
== TYPE_DIV
33882 || type
== TYPE_SDIV
33883 || type
== TYPE_DDIV
33884 || type
== TYPE_SSQRT
33885 || type
== TYPE_DSQRT
33886 || type
== TYPE_MFCR
33887 || type
== TYPE_MFCRF
33888 || type
== TYPE_MFJMPR
)
33896 /* Return how many instructions the machine can issue per cycle. */
33899 rs6000_issue_rate (void)
33901 /* Unless scheduling for register pressure, use issue rate of 1 for
33902 first scheduling pass to decrease degradation. */
33903 if (!reload_completed
&& !flag_sched_pressure
)
33906 switch (rs6000_cpu_attr
) {
33908 case CPU_PPC601
: /* ? */
33918 case CPU_PPCE300C2
:
33919 case CPU_PPCE300C3
:
33920 case CPU_PPCE500MC
:
33921 case CPU_PPCE500MC64
:
33946 /* Return how many instructions to look ahead for better insn
33950 rs6000_use_sched_lookahead (void)
33952 switch (rs6000_cpu_attr
)
33959 return (reload_completed
? 8 : 0);
33966 /* We are choosing insn from the ready queue. Return zero if INSN can be
33969 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
33971 if (ready_index
== 0)
33974 if (rs6000_cpu_attr
!= CPU_CELL
)
33977 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
33979 if (!reload_completed
33980 || is_nonpipeline_insn (insn
)
33981 || is_microcoded_insn (insn
))
33987 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
33988 and return true. */
33991 find_mem_ref (rtx pat
, rtx
*mem_ref
)
33996 /* stack_tie does not produce any real memory traffic. */
33997 if (tie_operand (pat
, VOIDmode
))
34000 if (GET_CODE (pat
) == MEM
)
34006 /* Recursively process the pattern. */
34007 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
34009 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
34013 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
34016 else if (fmt
[i
] == 'E')
34017 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
34019 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
34027 /* Determine if PAT is a PATTERN of a load insn. */
34030 is_load_insn1 (rtx pat
, rtx
*load_mem
)
34032 if (!pat
|| pat
== NULL_RTX
)
34035 if (GET_CODE (pat
) == SET
)
34036 return find_mem_ref (SET_SRC (pat
), load_mem
);
34038 if (GET_CODE (pat
) == PARALLEL
)
34042 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
34043 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
34050 /* Determine if INSN loads from memory. */
34053 is_load_insn (rtx insn
, rtx
*load_mem
)
34055 if (!insn
|| !INSN_P (insn
))
34061 return is_load_insn1 (PATTERN (insn
), load_mem
);
34064 /* Determine if PAT is a PATTERN of a store insn. */
34067 is_store_insn1 (rtx pat
, rtx
*str_mem
)
34069 if (!pat
|| pat
== NULL_RTX
)
34072 if (GET_CODE (pat
) == SET
)
34073 return find_mem_ref (SET_DEST (pat
), str_mem
);
34075 if (GET_CODE (pat
) == PARALLEL
)
34079 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
34080 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
34087 /* Determine if INSN stores to memory. */
34090 is_store_insn (rtx insn
, rtx
*str_mem
)
34092 if (!insn
|| !INSN_P (insn
))
34095 return is_store_insn1 (PATTERN (insn
), str_mem
);
34098 /* Return whether TYPE is a Power9 pairable vector instruction type. */
34101 is_power9_pairable_vec_type (enum attr_type type
)
34105 case TYPE_VECSIMPLE
:
34106 case TYPE_VECCOMPLEX
:
34110 case TYPE_VECFLOAT
:
34112 case TYPE_VECDOUBLE
:
34120 /* Returns whether the dependence between INSN and NEXT is considered
34121 costly by the given target. */
34124 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
34128 rtx load_mem
, str_mem
;
34130 /* If the flag is not enabled - no dependence is considered costly;
34131 allow all dependent insns in the same group.
34132 This is the most aggressive option. */
34133 if (rs6000_sched_costly_dep
== no_dep_costly
)
34136 /* If the flag is set to 1 - a dependence is always considered costly;
34137 do not allow dependent instructions in the same group.
34138 This is the most conservative option. */
34139 if (rs6000_sched_costly_dep
== all_deps_costly
)
34142 insn
= DEP_PRO (dep
);
34143 next
= DEP_CON (dep
);
34145 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
34146 && is_load_insn (next
, &load_mem
)
34147 && is_store_insn (insn
, &str_mem
))
34148 /* Prevent load after store in the same group. */
34151 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
34152 && is_load_insn (next
, &load_mem
)
34153 && is_store_insn (insn
, &str_mem
)
34154 && DEP_TYPE (dep
) == REG_DEP_TRUE
34155 && mem_locations_overlap(str_mem
, load_mem
))
34156 /* Prevent load after store in the same group if it is a true
34160 /* The flag is set to X; dependences with latency >= X are considered costly,
34161 and will not be scheduled in the same group. */
34162 if (rs6000_sched_costly_dep
<= max_dep_latency
34163 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
34169 /* Return the next insn after INSN that is found before TAIL is reached,
34170 skipping any "non-active" insns - insns that will not actually occupy
34171 an issue slot. Return NULL_RTX if such an insn is not found. */
34174 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
34176 if (insn
== NULL_RTX
|| insn
== tail
)
34181 insn
= NEXT_INSN (insn
);
34182 if (insn
== NULL_RTX
|| insn
== tail
)
34186 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
34187 || (NONJUMP_INSN_P (insn
)
34188 && GET_CODE (PATTERN (insn
)) != USE
34189 && GET_CODE (PATTERN (insn
)) != CLOBBER
34190 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
34196 /* Do Power9 specific sched_reorder2 reordering of ready list. */
34199 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
34204 enum attr_type type
, type2
;
34206 type
= get_attr_type (last_scheduled_insn
);
34208 /* Try to issue fixed point divides back-to-back in pairs so they will be
34209 routed to separate execution units and execute in parallel. */
34210 if (type
== TYPE_DIV
&& divide_cnt
== 0)
34212 /* First divide has been scheduled. */
34215 /* Scan the ready list looking for another divide, if found move it
34216 to the end of the list so it is chosen next. */
34220 if (recog_memoized (ready
[pos
]) >= 0
34221 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
34224 for (i
= pos
; i
< lastpos
; i
++)
34225 ready
[i
] = ready
[i
+ 1];
34226 ready
[lastpos
] = tmp
;
34234 /* Last insn was the 2nd divide or not a divide, reset the counter. */
34237 /* The best dispatch throughput for vector and vector load insns can be
34238 achieved by interleaving a vector and vector load such that they'll
34239 dispatch to the same superslice. If this pairing cannot be achieved
34240 then it is best to pair vector insns together and vector load insns
34243 To aid in this pairing, vec_pairing maintains the current state with
34244 the following values:
34246 0 : Initial state, no vecload/vector pairing has been started.
34248 1 : A vecload or vector insn has been issued and a candidate for
34249 pairing has been found and moved to the end of the ready
34251 if (type
== TYPE_VECLOAD
)
34253 /* Issued a vecload. */
34254 if (vec_pairing
== 0)
34256 int vecload_pos
= -1;
34257 /* We issued a single vecload, look for a vector insn to pair it
34258 with. If one isn't found, try to pair another vecload. */
34262 if (recog_memoized (ready
[pos
]) >= 0)
34264 type2
= get_attr_type (ready
[pos
]);
34265 if (is_power9_pairable_vec_type (type2
))
34267 /* Found a vector insn to pair with, move it to the
34268 end of the ready list so it is scheduled next. */
34270 for (i
= pos
; i
< lastpos
; i
++)
34271 ready
[i
] = ready
[i
+ 1];
34272 ready
[lastpos
] = tmp
;
34274 return cached_can_issue_more
;
34276 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
34277 /* Remember position of first vecload seen. */
34282 if (vecload_pos
>= 0)
34284 /* Didn't find a vector to pair with but did find a vecload,
34285 move it to the end of the ready list. */
34286 tmp
= ready
[vecload_pos
];
34287 for (i
= vecload_pos
; i
< lastpos
; i
++)
34288 ready
[i
] = ready
[i
+ 1];
34289 ready
[lastpos
] = tmp
;
34291 return cached_can_issue_more
;
34295 else if (is_power9_pairable_vec_type (type
))
34297 /* Issued a vector operation. */
34298 if (vec_pairing
== 0)
34301 /* We issued a single vector insn, look for a vecload to pair it
34302 with. If one isn't found, try to pair another vector. */
34306 if (recog_memoized (ready
[pos
]) >= 0)
34308 type2
= get_attr_type (ready
[pos
]);
34309 if (type2
== TYPE_VECLOAD
)
34311 /* Found a vecload insn to pair with, move it to the
34312 end of the ready list so it is scheduled next. */
34314 for (i
= pos
; i
< lastpos
; i
++)
34315 ready
[i
] = ready
[i
+ 1];
34316 ready
[lastpos
] = tmp
;
34318 return cached_can_issue_more
;
34320 else if (is_power9_pairable_vec_type (type2
)
34322 /* Remember position of first vector insn seen. */
34329 /* Didn't find a vecload to pair with but did find a vector
34330 insn, move it to the end of the ready list. */
34331 tmp
= ready
[vec_pos
];
34332 for (i
= vec_pos
; i
< lastpos
; i
++)
34333 ready
[i
] = ready
[i
+ 1];
34334 ready
[lastpos
] = tmp
;
34336 return cached_can_issue_more
;
34341 /* We've either finished a vec/vecload pair, couldn't find an insn to
34342 continue the current pair, or the last insn had nothing to do with
34343 with pairing. In any case, reset the state. */
34347 return cached_can_issue_more
;
34350 /* We are about to begin issuing insns for this clock cycle. */
34353 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
34354 rtx_insn
**ready ATTRIBUTE_UNUSED
,
34355 int *pn_ready ATTRIBUTE_UNUSED
,
34356 int clock_var ATTRIBUTE_UNUSED
)
34358 int n_ready
= *pn_ready
;
34361 fprintf (dump
, "// rs6000_sched_reorder :\n");
34363 /* Reorder the ready list, if the second to last ready insn
34364 is a nonepipeline insn. */
34365 if (rs6000_cpu_attr
== CPU_CELL
&& n_ready
> 1)
34367 if (is_nonpipeline_insn (ready
[n_ready
- 1])
34368 && (recog_memoized (ready
[n_ready
- 2]) > 0))
34369 /* Simply swap first two insns. */
34370 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
34373 if (rs6000_cpu
== PROCESSOR_POWER6
)
34374 load_store_pendulum
= 0;
34376 return rs6000_issue_rate ();
34379 /* Like rs6000_sched_reorder, but called after issuing each insn. */
34382 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
34383 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
34386 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
34388 /* For Power6, we need to handle some special cases to try and keep the
34389 store queue from overflowing and triggering expensive flushes.
34391 This code monitors how load and store instructions are being issued
34392 and skews the ready list one way or the other to increase the likelihood
34393 that a desired instruction is issued at the proper time.
34395 A couple of things are done. First, we maintain a "load_store_pendulum"
34396 to track the current state of load/store issue.
34398 - If the pendulum is at zero, then no loads or stores have been
34399 issued in the current cycle so we do nothing.
34401 - If the pendulum is 1, then a single load has been issued in this
34402 cycle and we attempt to locate another load in the ready list to
34405 - If the pendulum is -2, then two stores have already been
34406 issued in this cycle, so we increase the priority of the first load
34407 in the ready list to increase it's likelihood of being chosen first
34410 - If the pendulum is -1, then a single store has been issued in this
34411 cycle and we attempt to locate another store in the ready list to
34412 issue with it, preferring a store to an adjacent memory location to
34413 facilitate store pairing in the store queue.
34415 - If the pendulum is 2, then two loads have already been
34416 issued in this cycle, so we increase the priority of the first store
34417 in the ready list to increase it's likelihood of being chosen first
34420 - If the pendulum < -2 or > 2, then do nothing.
34422 Note: This code covers the most common scenarios. There exist non
34423 load/store instructions which make use of the LSU and which
34424 would need to be accounted for to strictly model the behavior
34425 of the machine. Those instructions are currently unaccounted
34426 for to help minimize compile time overhead of this code.
34428 if (rs6000_cpu
== PROCESSOR_POWER6
&& last_scheduled_insn
)
34433 rtx load_mem
, str_mem
;
34435 if (is_store_insn (last_scheduled_insn
, &str_mem
))
34436 /* Issuing a store, swing the load_store_pendulum to the left */
34437 load_store_pendulum
--;
34438 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
34439 /* Issuing a load, swing the load_store_pendulum to the right */
34440 load_store_pendulum
++;
34442 return cached_can_issue_more
;
34444 /* If the pendulum is balanced, or there is only one instruction on
34445 the ready list, then all is well, so return. */
34446 if ((load_store_pendulum
== 0) || (*pn_ready
<= 1))
34447 return cached_can_issue_more
;
34449 if (load_store_pendulum
== 1)
34451 /* A load has been issued in this cycle. Scan the ready list
34452 for another load to issue with it */
34457 if (is_load_insn (ready
[pos
], &load_mem
))
34459 /* Found a load. Move it to the head of the ready list,
34460 and adjust it's priority so that it is more likely to
34463 for (i
=pos
; i
<*pn_ready
-1; i
++)
34464 ready
[i
] = ready
[i
+ 1];
34465 ready
[*pn_ready
-1] = tmp
;
34467 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34468 INSN_PRIORITY (tmp
)++;
34474 else if (load_store_pendulum
== -2)
34476 /* Two stores have been issued in this cycle. Increase the
34477 priority of the first load in the ready list to favor it for
34478 issuing in the next cycle. */
34483 if (is_load_insn (ready
[pos
], &load_mem
)
34485 && INSN_PRIORITY_KNOWN (ready
[pos
]))
34487 INSN_PRIORITY (ready
[pos
])++;
34489 /* Adjust the pendulum to account for the fact that a load
34490 was found and increased in priority. This is to prevent
34491 increasing the priority of multiple loads */
34492 load_store_pendulum
--;
34499 else if (load_store_pendulum
== -1)
34501 /* A store has been issued in this cycle. Scan the ready list for
34502 another store to issue with it, preferring a store to an adjacent
34504 int first_store_pos
= -1;
34510 if (is_store_insn (ready
[pos
], &str_mem
))
34513 /* Maintain the index of the first store found on the
34515 if (first_store_pos
== -1)
34516 first_store_pos
= pos
;
34518 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
34519 && adjacent_mem_locations (str_mem
, str_mem2
))
34521 /* Found an adjacent store. Move it to the head of the
34522 ready list, and adjust it's priority so that it is
34523 more likely to stay there */
34525 for (i
=pos
; i
<*pn_ready
-1; i
++)
34526 ready
[i
] = ready
[i
+ 1];
34527 ready
[*pn_ready
-1] = tmp
;
34529 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34530 INSN_PRIORITY (tmp
)++;
34532 first_store_pos
= -1;
34540 if (first_store_pos
>= 0)
34542 /* An adjacent store wasn't found, but a non-adjacent store was,
34543 so move the non-adjacent store to the front of the ready
34544 list, and adjust its priority so that it is more likely to
34546 tmp
= ready
[first_store_pos
];
34547 for (i
=first_store_pos
; i
<*pn_ready
-1; i
++)
34548 ready
[i
] = ready
[i
+ 1];
34549 ready
[*pn_ready
-1] = tmp
;
34550 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34551 INSN_PRIORITY (tmp
)++;
34554 else if (load_store_pendulum
== 2)
34556 /* Two loads have been issued in this cycle. Increase the priority
34557 of the first store in the ready list to favor it for issuing in
34563 if (is_store_insn (ready
[pos
], &str_mem
)
34565 && INSN_PRIORITY_KNOWN (ready
[pos
]))
34567 INSN_PRIORITY (ready
[pos
])++;
34569 /* Adjust the pendulum to account for the fact that a store
34570 was found and increased in priority. This is to prevent
34571 increasing the priority of multiple stores */
34572 load_store_pendulum
++;
34581 /* Do Power9 dependent reordering if necessary. */
34582 if (rs6000_cpu
== PROCESSOR_POWER9
&& last_scheduled_insn
34583 && recog_memoized (last_scheduled_insn
) >= 0)
34584 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
34586 return cached_can_issue_more
;
34589 /* Return whether the presence of INSN causes a dispatch group termination
34590 of group WHICH_GROUP.
34592 If WHICH_GROUP == current_group, this function will return true if INSN
34593 causes the termination of the current group (i.e, the dispatch group to
34594 which INSN belongs). This means that INSN will be the last insn in the
34595 group it belongs to.
34597 If WHICH_GROUP == previous_group, this function will return true if INSN
34598 causes the termination of the previous group (i.e, the dispatch group that
34599 precedes the group to which INSN belongs). This means that INSN will be
34600 the first insn in the group it belongs to). */
34603 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
34610 first
= insn_must_be_first_in_group (insn
);
34611 last
= insn_must_be_last_in_group (insn
);
34616 if (which_group
== current_group
)
34618 else if (which_group
== previous_group
)
34626 insn_must_be_first_in_group (rtx_insn
*insn
)
34628 enum attr_type type
;
34632 || DEBUG_INSN_P (insn
)
34633 || GET_CODE (PATTERN (insn
)) == USE
34634 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
34637 switch (rs6000_cpu
)
34639 case PROCESSOR_POWER5
:
34640 if (is_cracked_insn (insn
))
34643 case PROCESSOR_POWER4
:
34644 if (is_microcoded_insn (insn
))
34647 if (!rs6000_sched_groups
)
34650 type
= get_attr_type (insn
);
34657 case TYPE_DELAYED_CR
:
34658 case TYPE_CR_LOGICAL
:
34671 case PROCESSOR_POWER6
:
34672 type
= get_attr_type (insn
);
34681 case TYPE_FPCOMPARE
:
34692 if (get_attr_dot (insn
) == DOT_NO
34693 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
34698 if (get_attr_size (insn
) == SIZE_32
)
34706 if (get_attr_update (insn
) == UPDATE_YES
)
34714 case PROCESSOR_POWER7
:
34715 type
= get_attr_type (insn
);
34719 case TYPE_CR_LOGICAL
:
34733 if (get_attr_dot (insn
) == DOT_YES
)
34738 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34739 || get_attr_update (insn
) == UPDATE_YES
)
34746 if (get_attr_update (insn
) == UPDATE_YES
)
34754 case PROCESSOR_POWER8
:
34755 type
= get_attr_type (insn
);
34759 case TYPE_CR_LOGICAL
:
34760 case TYPE_DELAYED_CR
:
34768 case TYPE_VECSTORE
:
34775 if (get_attr_dot (insn
) == DOT_YES
)
34780 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34781 || get_attr_update (insn
) == UPDATE_YES
)
34786 if (get_attr_update (insn
) == UPDATE_YES
34787 && get_attr_indexed (insn
) == INDEXED_YES
)
34803 insn_must_be_last_in_group (rtx_insn
*insn
)
34805 enum attr_type type
;
34809 || DEBUG_INSN_P (insn
)
34810 || GET_CODE (PATTERN (insn
)) == USE
34811 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
34814 switch (rs6000_cpu
) {
34815 case PROCESSOR_POWER4
:
34816 case PROCESSOR_POWER5
:
34817 if (is_microcoded_insn (insn
))
34820 if (is_branch_slot_insn (insn
))
34824 case PROCESSOR_POWER6
:
34825 type
= get_attr_type (insn
);
34833 case TYPE_FPCOMPARE
:
34844 if (get_attr_dot (insn
) == DOT_NO
34845 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
34850 if (get_attr_size (insn
) == SIZE_32
)
34858 case PROCESSOR_POWER7
:
34859 type
= get_attr_type (insn
);
34869 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34870 && get_attr_update (insn
) == UPDATE_YES
)
34875 if (get_attr_update (insn
) == UPDATE_YES
34876 && get_attr_indexed (insn
) == INDEXED_YES
)
34884 case PROCESSOR_POWER8
:
34885 type
= get_attr_type (insn
);
34897 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34898 && get_attr_update (insn
) == UPDATE_YES
)
34903 if (get_attr_update (insn
) == UPDATE_YES
34904 && get_attr_indexed (insn
) == INDEXED_YES
)
34919 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
34920 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
34923 is_costly_group (rtx
*group_insns
, rtx next_insn
)
34926 int issue_rate
= rs6000_issue_rate ();
34928 for (i
= 0; i
< issue_rate
; i
++)
34930 sd_iterator_def sd_it
;
34932 rtx insn
= group_insns
[i
];
34937 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
34939 rtx next
= DEP_CON (dep
);
34941 if (next
== next_insn
34942 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
34950 /* Utility of the function redefine_groups.
34951 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
34952 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
34953 to keep it "far" (in a separate group) from GROUP_INSNS, following
34954 one of the following schemes, depending on the value of the flag
34955 -minsert_sched_nops = X:
34956 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
34957 in order to force NEXT_INSN into a separate group.
34958 (2) X < sched_finish_regroup_exact: insert exactly X nops.
34959 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
34960 insertion (has a group just ended, how many vacant issue slots remain in the
34961 last group, and how many dispatch groups were encountered so far). */
34964 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
34965 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
34970 int issue_rate
= rs6000_issue_rate ();
34971 bool end
= *group_end
;
34974 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
34975 return can_issue_more
;
34977 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
34978 return can_issue_more
;
34980 force
= is_costly_group (group_insns
, next_insn
);
34982 return can_issue_more
;
34984 if (sched_verbose
> 6)
34985 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
34986 *group_count
,can_issue_more
);
34988 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
34991 can_issue_more
= 0;
34993 /* Since only a branch can be issued in the last issue_slot, it is
34994 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
34995 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
34996 in this case the last nop will start a new group and the branch
34997 will be forced to the new group. */
34998 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
35001 /* Do we have a special group ending nop? */
35002 if (rs6000_cpu_attr
== CPU_POWER6
|| rs6000_cpu_attr
== CPU_POWER7
35003 || rs6000_cpu_attr
== CPU_POWER8
)
35005 nop
= gen_group_ending_nop ();
35006 emit_insn_before (nop
, next_insn
);
35007 can_issue_more
= 0;
35010 while (can_issue_more
> 0)
35013 emit_insn_before (nop
, next_insn
);
35021 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
35023 int n_nops
= rs6000_sched_insert_nops
;
35025 /* Nops can't be issued from the branch slot, so the effective
35026 issue_rate for nops is 'issue_rate - 1'. */
35027 if (can_issue_more
== 0)
35028 can_issue_more
= issue_rate
;
35030 if (can_issue_more
== 0)
35032 can_issue_more
= issue_rate
- 1;
35035 for (i
= 0; i
< issue_rate
; i
++)
35037 group_insns
[i
] = 0;
35044 emit_insn_before (nop
, next_insn
);
35045 if (can_issue_more
== issue_rate
- 1) /* new group begins */
35048 if (can_issue_more
== 0)
35050 can_issue_more
= issue_rate
- 1;
35053 for (i
= 0; i
< issue_rate
; i
++)
35055 group_insns
[i
] = 0;
35061 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
35064 /* Is next_insn going to start a new group? */
35067 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
35068 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
35069 || (can_issue_more
< issue_rate
&&
35070 insn_terminates_group_p (next_insn
, previous_group
)));
35071 if (*group_end
&& end
)
35074 if (sched_verbose
> 6)
35075 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
35076 *group_count
, can_issue_more
);
35077 return can_issue_more
;
35080 return can_issue_more
;
35083 /* This function tries to synch the dispatch groups that the compiler "sees"
35084 with the dispatch groups that the processor dispatcher is expected to
35085 form in practice. It tries to achieve this synchronization by forcing the
35086 estimated processor grouping on the compiler (as opposed to the function
35087 'pad_goups' which tries to force the scheduler's grouping on the processor).
35089 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
35090 examines the (estimated) dispatch groups that will be formed by the processor
35091 dispatcher. It marks these group boundaries to reflect the estimated
35092 processor grouping, overriding the grouping that the scheduler had marked.
35093 Depending on the value of the flag '-minsert-sched-nops' this function can
35094 force certain insns into separate groups or force a certain distance between
35095 them by inserting nops, for example, if there exists a "costly dependence"
35098 The function estimates the group boundaries that the processor will form as
35099 follows: It keeps track of how many vacant issue slots are available after
35100 each insn. A subsequent insn will start a new group if one of the following
35102 - no more vacant issue slots remain in the current dispatch group.
35103 - only the last issue slot, which is the branch slot, is vacant, but the next
35104 insn is not a branch.
35105 - only the last 2 or less issue slots, including the branch slot, are vacant,
35106 which means that a cracked insn (which occupies two issue slots) can't be
35107 issued in this group.
35108 - less than 'issue_rate' slots are vacant, and the next insn always needs to
35109 start a new group. */
35112 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
35115 rtx_insn
*insn
, *next_insn
;
35117 int can_issue_more
;
35120 int group_count
= 0;
35124 issue_rate
= rs6000_issue_rate ();
35125 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
35126 for (i
= 0; i
< issue_rate
; i
++)
35128 group_insns
[i
] = 0;
35130 can_issue_more
= issue_rate
;
35132 insn
= get_next_active_insn (prev_head_insn
, tail
);
35135 while (insn
!= NULL_RTX
)
35137 slot
= (issue_rate
- can_issue_more
);
35138 group_insns
[slot
] = insn
;
35140 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
35141 if (insn_terminates_group_p (insn
, current_group
))
35142 can_issue_more
= 0;
35144 next_insn
= get_next_active_insn (insn
, tail
);
35145 if (next_insn
== NULL_RTX
)
35146 return group_count
+ 1;
35148 /* Is next_insn going to start a new group? */
35150 = (can_issue_more
== 0
35151 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
35152 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
35153 || (can_issue_more
< issue_rate
&&
35154 insn_terminates_group_p (next_insn
, previous_group
)));
35156 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
35157 next_insn
, &group_end
, can_issue_more
,
35163 can_issue_more
= 0;
35164 for (i
= 0; i
< issue_rate
; i
++)
35166 group_insns
[i
] = 0;
35170 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
35171 PUT_MODE (next_insn
, VOIDmode
);
35172 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
35173 PUT_MODE (next_insn
, TImode
);
35176 if (can_issue_more
== 0)
35177 can_issue_more
= issue_rate
;
35180 return group_count
;
35183 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
35184 dispatch group boundaries that the scheduler had marked. Pad with nops
35185 any dispatch groups which have vacant issue slots, in order to force the
35186 scheduler's grouping on the processor dispatcher. The function
35187 returns the number of dispatch groups found. */
35190 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
35193 rtx_insn
*insn
, *next_insn
;
35196 int can_issue_more
;
35198 int group_count
= 0;
35200 /* Initialize issue_rate. */
35201 issue_rate
= rs6000_issue_rate ();
35202 can_issue_more
= issue_rate
;
35204 insn
= get_next_active_insn (prev_head_insn
, tail
);
35205 next_insn
= get_next_active_insn (insn
, tail
);
35207 while (insn
!= NULL_RTX
)
35210 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
35212 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
35214 if (next_insn
== NULL_RTX
)
35219 /* If the scheduler had marked group termination at this location
35220 (between insn and next_insn), and neither insn nor next_insn will
35221 force group termination, pad the group with nops to force group
35224 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
35225 && !insn_terminates_group_p (insn
, current_group
)
35226 && !insn_terminates_group_p (next_insn
, previous_group
))
35228 if (!is_branch_slot_insn (next_insn
))
35231 while (can_issue_more
)
35234 emit_insn_before (nop
, next_insn
);
35239 can_issue_more
= issue_rate
;
35244 next_insn
= get_next_active_insn (insn
, tail
);
35247 return group_count
;
35250 /* We're beginning a new block. Initialize data structures as necessary. */
35253 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
35254 int sched_verbose ATTRIBUTE_UNUSED
,
35255 int max_ready ATTRIBUTE_UNUSED
)
35257 last_scheduled_insn
= NULL
;
35258 load_store_pendulum
= 0;
35263 /* The following function is called at the end of scheduling BB.
35264 After reload, it inserts nops at insn group bundling. */
35267 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
35272 fprintf (dump
, "=== Finishing schedule.\n");
35274 if (reload_completed
&& rs6000_sched_groups
)
35276 /* Do not run sched_finish hook when selective scheduling enabled. */
35277 if (sel_sched_p ())
35280 if (rs6000_sched_insert_nops
== sched_finish_none
)
35283 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
35284 n_groups
= pad_groups (dump
, sched_verbose
,
35285 current_sched_info
->prev_head
,
35286 current_sched_info
->next_tail
);
35288 n_groups
= redefine_groups (dump
, sched_verbose
,
35289 current_sched_info
->prev_head
,
35290 current_sched_info
->next_tail
);
35292 if (sched_verbose
>= 6)
35294 fprintf (dump
, "ngroups = %d\n", n_groups
);
35295 print_rtl (dump
, current_sched_info
->prev_head
);
35296 fprintf (dump
, "Done finish_sched\n");
35301 struct rs6000_sched_context
35303 short cached_can_issue_more
;
35304 rtx_insn
*last_scheduled_insn
;
35305 int load_store_pendulum
;
35310 typedef struct rs6000_sched_context rs6000_sched_context_def
;
35311 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
35313 /* Allocate store for new scheduling context. */
35315 rs6000_alloc_sched_context (void)
35317 return xmalloc (sizeof (rs6000_sched_context_def
));
35320 /* If CLEAN_P is true then initializes _SC with clean data,
35321 and from the global context otherwise. */
35323 rs6000_init_sched_context (void *_sc
, bool clean_p
)
35325 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
35329 sc
->cached_can_issue_more
= 0;
35330 sc
->last_scheduled_insn
= NULL
;
35331 sc
->load_store_pendulum
= 0;
35332 sc
->divide_cnt
= 0;
35333 sc
->vec_pairing
= 0;
35337 sc
->cached_can_issue_more
= cached_can_issue_more
;
35338 sc
->last_scheduled_insn
= last_scheduled_insn
;
35339 sc
->load_store_pendulum
= load_store_pendulum
;
35340 sc
->divide_cnt
= divide_cnt
;
35341 sc
->vec_pairing
= vec_pairing
;
35345 /* Sets the global scheduling context to the one pointed to by _SC. */
35347 rs6000_set_sched_context (void *_sc
)
35349 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
35351 gcc_assert (sc
!= NULL
);
35353 cached_can_issue_more
= sc
->cached_can_issue_more
;
35354 last_scheduled_insn
= sc
->last_scheduled_insn
;
35355 load_store_pendulum
= sc
->load_store_pendulum
;
35356 divide_cnt
= sc
->divide_cnt
;
35357 vec_pairing
= sc
->vec_pairing
;
35362 rs6000_free_sched_context (void *_sc
)
35364 gcc_assert (_sc
!= NULL
);
35370 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
35372 switch (get_attr_type (insn
))
35387 /* Length in units of the trampoline for entering a nested function. */
35390 rs6000_trampoline_size (void)
35394 switch (DEFAULT_ABI
)
35397 gcc_unreachable ();
35400 ret
= (TARGET_32BIT
) ? 12 : 24;
35404 gcc_assert (!TARGET_32BIT
);
35410 ret
= (TARGET_32BIT
) ? 40 : 48;
35417 /* Emit RTL insns to initialize the variable parts of a trampoline.
35418 FNADDR is an RTX for the address of the function's pure code.
35419 CXT is an RTX for the static chain value for the function. */
35422 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
35424 int regsize
= (TARGET_32BIT
) ? 4 : 8;
35425 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
35426 rtx ctx_reg
= force_reg (Pmode
, cxt
);
35427 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
35429 switch (DEFAULT_ABI
)
35432 gcc_unreachable ();
35434 /* Under AIX, just build the 3 word function descriptor */
35437 rtx fnmem
, fn_reg
, toc_reg
;
35439 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
35440 error ("You cannot take the address of a nested function if you use "
35441 "the -mno-pointers-to-nested-functions option.");
35443 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
35444 fn_reg
= gen_reg_rtx (Pmode
);
35445 toc_reg
= gen_reg_rtx (Pmode
);
35447 /* Macro to shorten the code expansions below. */
35448 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35450 m_tramp
= replace_equiv_address (m_tramp
, addr
);
35452 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
35453 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
35454 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
35455 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
35456 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
35462 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
35466 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
35467 LCT_NORMAL
, VOIDmode
, 4,
35469 GEN_INT (rs6000_trampoline_size ()), SImode
,
35477 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35478 identifier as an argument, so the front end shouldn't look it up. */
35481 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
35483 return is_attribute_p ("altivec", attr_id
);
35486 /* Handle the "altivec" attribute. The attribute may have
35487 arguments as follows:
35489 __attribute__((altivec(vector__)))
35490 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
35491 __attribute__((altivec(bool__))) (always followed by 'unsigned')
35493 and may appear more than once (e.g., 'vector bool char') in a
35494 given declaration. */
35497 rs6000_handle_altivec_attribute (tree
*node
,
35498 tree name ATTRIBUTE_UNUSED
,
35500 int flags ATTRIBUTE_UNUSED
,
35501 bool *no_add_attrs
)
35503 tree type
= *node
, result
= NULL_TREE
;
35507 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
35508 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
35509 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
35512 while (POINTER_TYPE_P (type
)
35513 || TREE_CODE (type
) == FUNCTION_TYPE
35514 || TREE_CODE (type
) == METHOD_TYPE
35515 || TREE_CODE (type
) == ARRAY_TYPE
)
35516 type
= TREE_TYPE (type
);
35518 mode
= TYPE_MODE (type
);
35520 /* Check for invalid AltiVec type qualifiers. */
35521 if (type
== long_double_type_node
)
35522 error ("use of %<long double%> in AltiVec types is invalid");
35523 else if (type
== boolean_type_node
)
35524 error ("use of boolean types in AltiVec types is invalid");
35525 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
35526 error ("use of %<complex%> in AltiVec types is invalid");
35527 else if (DECIMAL_FLOAT_MODE_P (mode
))
35528 error ("use of decimal floating point types in AltiVec types is invalid");
35529 else if (!TARGET_VSX
)
35531 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
35534 error ("use of %<long%> in AltiVec types is invalid for "
35535 "64-bit code without -mvsx");
35536 else if (rs6000_warn_altivec_long
)
35537 warning (0, "use of %<long%> in AltiVec types is deprecated; "
35540 else if (type
== long_long_unsigned_type_node
35541 || type
== long_long_integer_type_node
)
35542 error ("use of %<long long%> in AltiVec types is invalid without "
35544 else if (type
== double_type_node
)
35545 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35548 switch (altivec_type
)
35551 unsigned_p
= TYPE_UNSIGNED (type
);
35555 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
35558 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
35561 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
35564 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
35567 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
35569 case E_SFmode
: result
= V4SF_type_node
; break;
35570 case E_DFmode
: result
= V2DF_type_node
; break;
35571 /* If the user says 'vector int bool', we may be handed the 'bool'
35572 attribute _before_ the 'vector' attribute, and so select the
35573 proper type in the 'b' case below. */
35574 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
35575 case E_V2DImode
: case E_V2DFmode
:
35583 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
35584 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
35585 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
35586 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
35593 case E_V8HImode
: result
= pixel_V8HI_type_node
;
35599 /* Propagate qualifiers attached to the element type
35600 onto the vector type. */
35601 if (result
&& result
!= type
&& TYPE_QUALS (type
))
35602 result
= build_qualified_type (result
, TYPE_QUALS (type
));
35604 *no_add_attrs
= true; /* No need to hang on to the attribute. */
35607 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
35612 /* AltiVec defines four built-in scalar types that serve as vector
35613 elements; we must teach the compiler how to mangle them. */
35615 static const char *
35616 rs6000_mangle_type (const_tree type
)
35618 type
= TYPE_MAIN_VARIANT (type
);
35620 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35621 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35624 if (type
== bool_char_type_node
) return "U6__boolc";
35625 if (type
== bool_short_type_node
) return "U6__bools";
35626 if (type
== pixel_type_node
) return "u7__pixel";
35627 if (type
== bool_int_type_node
) return "U6__booli";
35628 if (type
== bool_long_type_node
) return "U6__booll";
35630 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35631 "g" for IBM extended double, no matter whether it is long double (using
35632 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
35633 if (TARGET_FLOAT128_TYPE
)
35635 if (type
== ieee128_float_type_node
)
35636 return "U10__float128";
35638 if (type
== ibm128_float_type_node
)
35641 if (type
== long_double_type_node
&& TARGET_LONG_DOUBLE_128
)
35642 return (TARGET_IEEEQUAD
) ? "U10__float128" : "g";
35645 /* Mangle IBM extended float long double as `g' (__float128) on
35646 powerpc*-linux where long-double-64 previously was the default. */
35647 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
35649 && TARGET_LONG_DOUBLE_128
35650 && !TARGET_IEEEQUAD
)
35653 /* For all other types, use normal C++ mangling. */
35657 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35658 struct attribute_spec.handler. */
35661 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
35662 tree args ATTRIBUTE_UNUSED
,
35663 int flags ATTRIBUTE_UNUSED
,
35664 bool *no_add_attrs
)
35666 if (TREE_CODE (*node
) != FUNCTION_TYPE
35667 && TREE_CODE (*node
) != FIELD_DECL
35668 && TREE_CODE (*node
) != TYPE_DECL
)
35670 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35672 *no_add_attrs
= true;
35678 /* Set longcall attributes on all functions declared when
35679 rs6000_default_long_calls is true. */
35681 rs6000_set_default_type_attributes (tree type
)
35683 if (rs6000_default_long_calls
35684 && (TREE_CODE (type
) == FUNCTION_TYPE
35685 || TREE_CODE (type
) == METHOD_TYPE
))
35686 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
35688 TYPE_ATTRIBUTES (type
));
35691 darwin_set_default_type_attributes (type
);
35695 /* Return a reference suitable for calling a function with the
35696 longcall attribute. */
35699 rs6000_longcall_ref (rtx call_ref
)
35701 const char *call_name
;
35704 if (GET_CODE (call_ref
) != SYMBOL_REF
)
35707 /* System V adds '.' to the internal name, so skip them. */
35708 call_name
= XSTR (call_ref
, 0);
35709 if (*call_name
== '.')
35711 while (*call_name
== '.')
35714 node
= get_identifier (call_name
);
35715 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
35718 return force_reg (Pmode
, call_ref
);
35721 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35722 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35725 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35726 struct attribute_spec.handler. */
35728 rs6000_handle_struct_attribute (tree
*node
, tree name
,
35729 tree args ATTRIBUTE_UNUSED
,
35730 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35733 if (DECL_P (*node
))
35735 if (TREE_CODE (*node
) == TYPE_DECL
)
35736 type
= &TREE_TYPE (*node
);
35741 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
35742 || TREE_CODE (*type
) == UNION_TYPE
)))
35744 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
35745 *no_add_attrs
= true;
35748 else if ((is_attribute_p ("ms_struct", name
)
35749 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35750 || ((is_attribute_p ("gcc_struct", name
)
35751 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35753 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35755 *no_add_attrs
= true;
35762 rs6000_ms_bitfield_layout_p (const_tree record_type
)
35764 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
35765 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35766 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
35769 #ifdef USING_ELFOS_H
35771 /* A get_unnamed_section callback, used for switching to toc_section. */
35774 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
35776 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
35777 && TARGET_MINIMAL_TOC
)
35779 if (!toc_initialized
)
35781 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
35782 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35783 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
35784 fprintf (asm_out_file
, "\t.tc ");
35785 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
35786 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35787 fprintf (asm_out_file
, "\n");
35789 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35790 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35791 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35792 fprintf (asm_out_file
, " = .+32768\n");
35793 toc_initialized
= 1;
35796 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35798 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
35800 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
35801 if (!toc_initialized
)
35803 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35804 toc_initialized
= 1;
35809 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35810 if (!toc_initialized
)
35812 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35813 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35814 fprintf (asm_out_file
, " = .+32768\n");
35815 toc_initialized
= 1;
35820 /* Implement TARGET_ASM_INIT_SECTIONS. */
35823 rs6000_elf_asm_init_sections (void)
35826 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
35829 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
35830 SDATA2_SECTION_ASM_OP
);
35833 /* Implement TARGET_SELECT_RTX_SECTION. */
35836 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
35837 unsigned HOST_WIDE_INT align
)
35839 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
35840 return toc_section
;
35842 return default_elf_select_rtx_section (mode
, x
, align
);
35845 /* For a SYMBOL_REF, set generic flags and then perform some
35846 target-specific processing.
35848 When the AIX ABI is requested on a non-AIX system, replace the
35849 function name with the real name (with a leading .) rather than the
35850 function descriptor name. This saves a lot of overriding code to
35851 read the prefixes. */
35853 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
35855 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
35857 default_encode_section_info (decl
, rtl
, first
);
35860 && TREE_CODE (decl
) == FUNCTION_DECL
35862 && DEFAULT_ABI
== ABI_AIX
)
35864 rtx sym_ref
= XEXP (rtl
, 0);
35865 size_t len
= strlen (XSTR (sym_ref
, 0));
35866 char *str
= XALLOCAVEC (char, len
+ 2);
35868 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
35869 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
35874 compare_section_name (const char *section
, const char *templ
)
35878 len
= strlen (templ
);
35879 return (strncmp (section
, templ
, len
) == 0
35880 && (section
[len
] == 0 || section
[len
] == '.'));
35884 rs6000_elf_in_small_data_p (const_tree decl
)
35886 if (rs6000_sdata
== SDATA_NONE
)
35889 /* We want to merge strings, so we never consider them small data. */
35890 if (TREE_CODE (decl
) == STRING_CST
)
35893 /* Functions are never in the small data area. */
35894 if (TREE_CODE (decl
) == FUNCTION_DECL
)
35897 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
35899 const char *section
= DECL_SECTION_NAME (decl
);
35900 if (compare_section_name (section
, ".sdata")
35901 || compare_section_name (section
, ".sdata2")
35902 || compare_section_name (section
, ".gnu.linkonce.s")
35903 || compare_section_name (section
, ".sbss")
35904 || compare_section_name (section
, ".sbss2")
35905 || compare_section_name (section
, ".gnu.linkonce.sb")
35906 || strcmp (section
, ".PPC.EMB.sdata0") == 0
35907 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
35912 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
35915 && size
<= g_switch_value
35916 /* If it's not public, and we're not going to reference it there,
35917 there's no need to put it in the small data section. */
35918 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
35925 #endif /* USING_ELFOS_H */
35927 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
35930 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
35932 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
35935 /* Do not place thread-local symbols refs in the object blocks. */
35938 rs6000_use_blocks_for_decl_p (const_tree decl
)
35940 return !DECL_THREAD_LOCAL_P (decl
);
35943 /* Return a REG that occurs in ADDR with coefficient 1.
35944 ADDR can be effectively incremented by incrementing REG.
35946 r0 is special and we must not select it as an address
35947 register by this routine since our caller will try to
35948 increment the returned register via an "la" instruction. */
35951 find_addr_reg (rtx addr
)
35953 while (GET_CODE (addr
) == PLUS
)
35955 if (GET_CODE (XEXP (addr
, 0)) == REG
35956 && REGNO (XEXP (addr
, 0)) != 0)
35957 addr
= XEXP (addr
, 0);
35958 else if (GET_CODE (XEXP (addr
, 1)) == REG
35959 && REGNO (XEXP (addr
, 1)) != 0)
35960 addr
= XEXP (addr
, 1);
35961 else if (CONSTANT_P (XEXP (addr
, 0)))
35962 addr
= XEXP (addr
, 1);
35963 else if (CONSTANT_P (XEXP (addr
, 1)))
35964 addr
= XEXP (addr
, 0);
35966 gcc_unreachable ();
35968 gcc_assert (GET_CODE (addr
) == REG
&& REGNO (addr
) != 0);
35973 rs6000_fatal_bad_address (rtx op
)
35975 fatal_insn ("bad address", op
);
35980 typedef struct branch_island_d
{
35981 tree function_name
;
35987 static vec
<branch_island
, va_gc
> *branch_islands
;
35989 /* Remember to generate a branch island for far calls to the given
35993 add_compiler_branch_island (tree label_name
, tree function_name
,
35996 branch_island bi
= {function_name
, label_name
, line_number
};
35997 vec_safe_push (branch_islands
, bi
);
36000 /* Generate far-jump branch islands for everything recorded in
36001 branch_islands. Invoked immediately after the last instruction of
36002 the epilogue has been emitted; the branch islands must be appended
36003 to, and contiguous with, the function body. Mach-O stubs are
36004 generated in machopic_output_stub(). */
36007 macho_branch_islands (void)
36011 while (!vec_safe_is_empty (branch_islands
))
36013 branch_island
*bi
= &branch_islands
->last ();
36014 const char *label
= IDENTIFIER_POINTER (bi
->label_name
);
36015 const char *name
= IDENTIFIER_POINTER (bi
->function_name
);
36016 char name_buf
[512];
36017 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
36018 if (name
[0] == '*' || name
[0] == '&')
36019 strcpy (name_buf
, name
+1);
36023 strcpy (name_buf
+1, name
);
36025 strcpy (tmp_buf
, "\n");
36026 strcat (tmp_buf
, label
);
36027 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36028 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
36029 dbxout_stabd (N_SLINE
, bi
->line_number
);
36030 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36033 if (TARGET_LINK_STACK
)
36036 get_ppc476_thunk_name (name
);
36037 strcat (tmp_buf
, ":\n\tmflr r0\n\tbl ");
36038 strcat (tmp_buf
, name
);
36039 strcat (tmp_buf
, "\n");
36040 strcat (tmp_buf
, label
);
36041 strcat (tmp_buf
, "_pic:\n\tmflr r11\n");
36045 strcat (tmp_buf
, ":\n\tmflr r0\n\tbcl 20,31,");
36046 strcat (tmp_buf
, label
);
36047 strcat (tmp_buf
, "_pic\n");
36048 strcat (tmp_buf
, label
);
36049 strcat (tmp_buf
, "_pic:\n\tmflr r11\n");
36052 strcat (tmp_buf
, "\taddis r11,r11,ha16(");
36053 strcat (tmp_buf
, name_buf
);
36054 strcat (tmp_buf
, " - ");
36055 strcat (tmp_buf
, label
);
36056 strcat (tmp_buf
, "_pic)\n");
36058 strcat (tmp_buf
, "\tmtlr r0\n");
36060 strcat (tmp_buf
, "\taddi r12,r11,lo16(");
36061 strcat (tmp_buf
, name_buf
);
36062 strcat (tmp_buf
, " - ");
36063 strcat (tmp_buf
, label
);
36064 strcat (tmp_buf
, "_pic)\n");
36066 strcat (tmp_buf
, "\tmtctr r12\n\tbctr\n");
36070 strcat (tmp_buf
, ":\nlis r12,hi16(");
36071 strcat (tmp_buf
, name_buf
);
36072 strcat (tmp_buf
, ")\n\tori r12,r12,lo16(");
36073 strcat (tmp_buf
, name_buf
);
36074 strcat (tmp_buf
, ")\n\tmtctr r12\n\tbctr");
36076 output_asm_insn (tmp_buf
, 0);
36077 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36078 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
36079 dbxout_stabd (N_SLINE
, bi
->line_number
);
36080 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36081 branch_islands
->pop ();
36085 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
36086 already there or not. */
36089 no_previous_def (tree function_name
)
36094 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
36095 if (function_name
== bi
->function_name
)
36100 /* GET_PREV_LABEL gets the label name from the previous definition of
36104 get_prev_label (tree function_name
)
36109 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
36110 if (function_name
== bi
->function_name
)
36111 return bi
->label_name
;
36115 /* INSN is either a function call or a millicode call. It may have an
36116 unconditional jump in its delay slot.
36118 CALL_DEST is the routine we are calling. */
36121 output_call (rtx_insn
*insn
, rtx
*operands
, int dest_operand_number
,
36122 int cookie_operand_number
)
36124 static char buf
[256];
36125 if (darwin_emit_branch_islands
36126 && GET_CODE (operands
[dest_operand_number
]) == SYMBOL_REF
36127 && (INTVAL (operands
[cookie_operand_number
]) & CALL_LONG
))
36130 tree funname
= get_identifier (XSTR (operands
[dest_operand_number
], 0));
36132 if (no_previous_def (funname
))
36134 rtx label_rtx
= gen_label_rtx ();
36135 char *label_buf
, temp_buf
[256];
36136 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
36137 CODE_LABEL_NUMBER (label_rtx
));
36138 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
36139 labelname
= get_identifier (label_buf
);
36140 add_compiler_branch_island (labelname
, funname
, insn_line (insn
));
36143 labelname
= get_prev_label (funname
);
36145 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
36146 instruction will reach 'foo', otherwise link as 'bl L42'".
36147 "L42" should be a 'branch island', that will do a far jump to
36148 'foo'. Branch islands are generated in
36149 macho_branch_islands(). */
36150 sprintf (buf
, "jbsr %%z%d,%.246s",
36151 dest_operand_number
, IDENTIFIER_POINTER (labelname
));
36154 sprintf (buf
, "bl %%z%d", dest_operand_number
);
36158 /* Generate PIC and indirect symbol stubs. */
36161 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
36163 unsigned int length
;
36164 char *symbol_name
, *lazy_ptr_name
;
36165 char *local_label_0
;
36166 static int label
= 0;
36168 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
36169 symb
= (*targetm
.strip_name_encoding
) (symb
);
36172 length
= strlen (symb
);
36173 symbol_name
= XALLOCAVEC (char, length
+ 32);
36174 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
36176 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
36177 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
36180 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
36182 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
36186 fprintf (file
, "\t.align 5\n");
36188 fprintf (file
, "%s:\n", stub
);
36189 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
36192 local_label_0
= XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
36193 sprintf (local_label_0
, "\"L%011d$spb\"", label
);
36195 fprintf (file
, "\tmflr r0\n");
36196 if (TARGET_LINK_STACK
)
36199 get_ppc476_thunk_name (name
);
36200 fprintf (file
, "\tbl %s\n", name
);
36201 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
36205 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
36206 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
36208 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
36209 lazy_ptr_name
, local_label_0
);
36210 fprintf (file
, "\tmtlr r0\n");
36211 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
36212 (TARGET_64BIT
? "ldu" : "lwzu"),
36213 lazy_ptr_name
, local_label_0
);
36214 fprintf (file
, "\tmtctr r12\n");
36215 fprintf (file
, "\tbctr\n");
36219 fprintf (file
, "\t.align 4\n");
36221 fprintf (file
, "%s:\n", stub
);
36222 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
36224 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
36225 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
36226 (TARGET_64BIT
? "ldu" : "lwzu"),
36228 fprintf (file
, "\tmtctr r12\n");
36229 fprintf (file
, "\tbctr\n");
36232 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
36233 fprintf (file
, "%s:\n", lazy_ptr_name
);
36234 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
36235 fprintf (file
, "%sdyld_stub_binding_helper\n",
36236 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
36239 /* Legitimize PIC addresses. If the address is already
36240 position-independent, we return ORIG. Newly generated
36241 position-independent addresses go into a reg. This is REG if non
36242 zero, otherwise we allocate register(s) as necessary. */
36244 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
36247 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
36252 if (reg
== NULL
&& ! reload_in_progress
&& ! reload_completed
)
36253 reg
= gen_reg_rtx (Pmode
);
36255 if (GET_CODE (orig
) == CONST
)
36259 if (GET_CODE (XEXP (orig
, 0)) == PLUS
36260 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
36263 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
36265 /* Use a different reg for the intermediate value, as
36266 it will be marked UNCHANGING. */
36267 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
36268 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
36271 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
36274 if (GET_CODE (offset
) == CONST_INT
)
36276 if (SMALL_INT (offset
))
36277 return plus_constant (Pmode
, base
, INTVAL (offset
));
36278 else if (! reload_in_progress
&& ! reload_completed
)
36279 offset
= force_reg (Pmode
, offset
);
36282 rtx mem
= force_const_mem (Pmode
, orig
);
36283 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
36286 return gen_rtx_PLUS (Pmode
, base
, offset
);
36289 /* Fall back on generic machopic code. */
36290 return machopic_legitimize_pic_address (orig
, mode
, reg
);
36293 /* Output a .machine directive for the Darwin assembler, and call
36294 the generic start_file routine. */
36297 rs6000_darwin_file_start (void)
36299 static const struct
36303 HOST_WIDE_INT if_set
;
36305 { "ppc64", "ppc64", MASK_64BIT
},
36306 { "970", "ppc970", MASK_PPC_GPOPT
| MASK_MFCRF
| MASK_POWERPC64
},
36307 { "power4", "ppc970", 0 },
36308 { "G5", "ppc970", 0 },
36309 { "7450", "ppc7450", 0 },
36310 { "7400", "ppc7400", MASK_ALTIVEC
},
36311 { "G4", "ppc7400", 0 },
36312 { "750", "ppc750", 0 },
36313 { "740", "ppc750", 0 },
36314 { "G3", "ppc750", 0 },
36315 { "604e", "ppc604e", 0 },
36316 { "604", "ppc604", 0 },
36317 { "603e", "ppc603", 0 },
36318 { "603", "ppc603", 0 },
36319 { "601", "ppc601", 0 },
36320 { NULL
, "ppc", 0 } };
36321 const char *cpu_id
= "";
36324 rs6000_file_start ();
36325 darwin_file_start ();
36327 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
36329 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
36330 cpu_id
= rs6000_default_cpu
;
36332 if (global_options_set
.x_rs6000_cpu_index
)
36333 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
36335 /* Look through the mapping array. Pick the first name that either
36336 matches the argument, has a bit set in IF_SET that is also set
36337 in the target flags, or has a NULL name. */
36340 while (mapping
[i
].arg
!= NULL
36341 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
36342 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
36345 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
36348 #endif /* TARGET_MACHO */
36352 rs6000_elf_reloc_rw_mask (void)
36356 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
36362 /* Record an element in the table of global constructors. SYMBOL is
36363 a SYMBOL_REF of the function to be called; PRIORITY is a number
36364 between 0 and MAX_INIT_PRIORITY.
36366 This differs from default_named_section_asm_out_constructor in
36367 that we have special handling for -mrelocatable. */
36369 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
36371 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
36373 const char *section
= ".ctors";
36376 if (priority
!= DEFAULT_INIT_PRIORITY
)
36378 sprintf (buf
, ".ctors.%.5u",
36379 /* Invert the numbering so the linker puts us in the proper
36380 order; constructors are run from right to left, and the
36381 linker sorts in increasing order. */
36382 MAX_INIT_PRIORITY
- priority
);
36386 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
36387 assemble_align (POINTER_SIZE
);
36389 if (DEFAULT_ABI
== ABI_V4
36390 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
36392 fputs ("\t.long (", asm_out_file
);
36393 output_addr_const (asm_out_file
, symbol
);
36394 fputs (")@fixup\n", asm_out_file
);
36397 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
36400 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
36402 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
36404 const char *section
= ".dtors";
36407 if (priority
!= DEFAULT_INIT_PRIORITY
)
36409 sprintf (buf
, ".dtors.%.5u",
36410 /* Invert the numbering so the linker puts us in the proper
36411 order; constructors are run from right to left, and the
36412 linker sorts in increasing order. */
36413 MAX_INIT_PRIORITY
- priority
);
36417 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
36418 assemble_align (POINTER_SIZE
);
36420 if (DEFAULT_ABI
== ABI_V4
36421 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
36423 fputs ("\t.long (", asm_out_file
);
36424 output_addr_const (asm_out_file
, symbol
);
36425 fputs (")@fixup\n", asm_out_file
);
36428 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
36432 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
36434 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
36436 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
36437 ASM_OUTPUT_LABEL (file
, name
);
36438 fputs (DOUBLE_INT_ASM_OP
, file
);
36439 rs6000_output_function_entry (file
, name
);
36440 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
36443 fputs ("\t.size\t", file
);
36444 assemble_name (file
, name
);
36445 fputs (",24\n\t.type\t.", file
);
36446 assemble_name (file
, name
);
36447 fputs (",@function\n", file
);
36448 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
36450 fputs ("\t.globl\t.", file
);
36451 assemble_name (file
, name
);
36456 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
36457 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
36458 rs6000_output_function_entry (file
, name
);
36459 fputs (":\n", file
);
36463 if (DEFAULT_ABI
== ABI_V4
36464 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
36465 && !TARGET_SECURE_PLT
36466 && (!constant_pool_empty_p () || crtl
->profile
)
36471 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
36473 fprintf (file
, "\t.long ");
36474 assemble_name (file
, toc_label_name
);
36477 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
36478 assemble_name (file
, buf
);
36482 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
36483 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
36485 if (TARGET_CMODEL
== CMODEL_LARGE
&& rs6000_global_entry_point_needed_p ())
36489 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
36491 fprintf (file
, "\t.quad .TOC.-");
36492 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
36493 assemble_name (file
, buf
);
36497 if (DEFAULT_ABI
== ABI_AIX
)
36499 const char *desc_name
, *orig_name
;
36501 orig_name
= (*targetm
.strip_name_encoding
) (name
);
36502 desc_name
= orig_name
;
36503 while (*desc_name
== '.')
36506 if (TREE_PUBLIC (decl
))
36507 fprintf (file
, "\t.globl %s\n", desc_name
);
36509 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
36510 fprintf (file
, "%s:\n", desc_name
);
36511 fprintf (file
, "\t.long %s\n", orig_name
);
36512 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
36513 fputs ("\t.long 0\n", file
);
36514 fprintf (file
, "\t.previous\n");
36516 ASM_OUTPUT_LABEL (file
, name
);
36519 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
36521 rs6000_elf_file_end (void)
36523 #ifdef HAVE_AS_GNU_ATTRIBUTE
36524 /* ??? The value emitted depends on options active at file end.
36525 Assume anyone using #pragma or attributes that might change
36526 options knows what they are doing. */
36527 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
36528 && rs6000_passes_float
)
36532 if (TARGET_DF_FPR
| TARGET_DF_SPE
)
36534 else if (TARGET_SF_FPR
| TARGET_SF_SPE
)
36538 if (rs6000_passes_long_double
)
36540 if (!TARGET_LONG_DOUBLE_128
)
36542 else if (TARGET_IEEEQUAD
)
36547 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
36549 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
36551 if (rs6000_passes_vector
)
36552 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
36553 (TARGET_ALTIVEC_ABI
? 2
36554 : TARGET_SPE_ABI
? 3
36556 if (rs6000_returns_struct
)
36557 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
36558 aix_struct_return
? 2 : 1);
36561 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36562 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
36563 file_end_indicate_exec_stack ();
36566 if (flag_split_stack
)
36567 file_end_indicate_split_stack ();
36571 /* We have expanded a CPU builtin, so we need to emit a reference to
36572 the special symbol that LIBC uses to declare it supports the
36573 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
36574 switch_to_section (data_section
);
36575 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
36576 fprintf (asm_out_file
, "\t%s %s\n",
36577 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
36584 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36585 #define HAVE_XCOFF_DWARF_EXTRAS 0
36588 static enum unwind_info_type
36589 rs6000_xcoff_debug_unwind_info (void)
36595 rs6000_xcoff_asm_output_anchor (rtx symbol
)
36599 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
36600 SYMBOL_REF_BLOCK_OFFSET (symbol
));
36601 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
36602 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
36603 fprintf (asm_out_file
, ",");
36604 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
36605 fprintf (asm_out_file
, "\n");
36609 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
36611 fputs (GLOBAL_ASM_OP
, stream
);
36612 RS6000_OUTPUT_BASENAME (stream
, name
);
36613 putc ('\n', stream
);
36616 /* A get_unnamed_decl callback, used for read-only sections. PTR
36617 points to the section string variable. */
36620 rs6000_xcoff_output_readonly_section_asm_op (const void *directive
)
36622 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
36623 *(const char *const *) directive
,
36624 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36627 /* Likewise for read-write sections. */
36630 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive
)
36632 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
36633 *(const char *const *) directive
,
36634 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36638 rs6000_xcoff_output_tls_section_asm_op (const void *directive
)
36640 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
36641 *(const char *const *) directive
,
36642 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36645 /* A get_unnamed_section callback, used for switching to toc_section. */
36648 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
36650 if (TARGET_MINIMAL_TOC
)
36652 /* toc_section is always selected at least once from
36653 rs6000_xcoff_file_start, so this is guaranteed to
36654 always be defined once and only once in each file. */
36655 if (!toc_initialized
)
36657 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
36658 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
36659 toc_initialized
= 1;
36661 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
36662 (TARGET_32BIT
? "" : ",3"));
36665 fputs ("\t.toc\n", asm_out_file
);
36668 /* Implement TARGET_ASM_INIT_SECTIONS. */
36671 rs6000_xcoff_asm_init_sections (void)
36673 read_only_data_section
36674 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
36675 &xcoff_read_only_section_name
);
36677 private_data_section
36678 = get_unnamed_section (SECTION_WRITE
,
36679 rs6000_xcoff_output_readwrite_section_asm_op
,
36680 &xcoff_private_data_section_name
);
36683 = get_unnamed_section (SECTION_TLS
,
36684 rs6000_xcoff_output_tls_section_asm_op
,
36685 &xcoff_tls_data_section_name
);
36687 tls_private_data_section
36688 = get_unnamed_section (SECTION_TLS
,
36689 rs6000_xcoff_output_tls_section_asm_op
,
36690 &xcoff_private_data_section_name
);
36692 read_only_private_data_section
36693 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
36694 &xcoff_private_data_section_name
);
36697 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
36699 readonly_data_section
= read_only_data_section
;
36703 rs6000_xcoff_reloc_rw_mask (void)
36709 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
36710 tree decl ATTRIBUTE_UNUSED
)
36713 static const char * const suffix
[5] = { "PR", "RO", "RW", "TL", "XO" };
36715 if (flags
& SECTION_EXCLUDE
)
36717 else if (flags
& SECTION_DEBUG
)
36719 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
36722 else if (flags
& SECTION_CODE
)
36724 else if (flags
& SECTION_TLS
)
36726 else if (flags
& SECTION_WRITE
)
36731 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
36732 (flags
& SECTION_CODE
) ? "." : "",
36733 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
36736 #define IN_NAMED_SECTION(DECL) \
36737 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36738 && DECL_SECTION_NAME (DECL) != NULL)
36741 rs6000_xcoff_select_section (tree decl
, int reloc
,
36742 unsigned HOST_WIDE_INT align
)
36744 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36746 if (align
> BIGGEST_ALIGNMENT
)
36748 resolve_unique_section (decl
, reloc
, true);
36749 if (IN_NAMED_SECTION (decl
))
36750 return get_named_section (decl
, NULL
, reloc
);
36753 if (decl_readonly_section (decl
, reloc
))
36755 if (TREE_PUBLIC (decl
))
36756 return read_only_data_section
;
36758 return read_only_private_data_section
;
36763 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
36765 if (TREE_PUBLIC (decl
))
36766 return tls_data_section
;
36767 else if (bss_initializer_p (decl
))
36769 /* Convert to COMMON to emit in BSS. */
36770 DECL_COMMON (decl
) = 1;
36771 return tls_comm_section
;
36774 return tls_private_data_section
;
36778 if (TREE_PUBLIC (decl
))
36779 return data_section
;
36781 return private_data_section
;
36786 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
36790 /* Use select_section for private data and uninitialized data with
36791 alignment <= BIGGEST_ALIGNMENT. */
36792 if (!TREE_PUBLIC (decl
)
36793 || DECL_COMMON (decl
)
36794 || (DECL_INITIAL (decl
) == NULL_TREE
36795 && DECL_ALIGN (decl
) <= BIGGEST_ALIGNMENT
)
36796 || DECL_INITIAL (decl
) == error_mark_node
36797 || (flag_zero_initialized_in_bss
36798 && initializer_zerop (DECL_INITIAL (decl
))))
36801 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
36802 name
= (*targetm
.strip_name_encoding
) (name
);
36803 set_decl_section_name (decl
, name
);
36806 /* Select section for constant in constant pool.
36808 On RS/6000, all constants are in the private read-only data area.
36809 However, if this is being placed in the TOC it must be output as a
36813 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
36814 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
36816 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
36817 return toc_section
;
36819 return read_only_private_data_section
;
36822 /* Remove any trailing [DS] or the like from the symbol name. */
36824 static const char *
36825 rs6000_xcoff_strip_name_encoding (const char *name
)
36830 len
= strlen (name
);
36831 if (name
[len
- 1] == ']')
36832 return ggc_alloc_string (name
, len
- 4);
36837 /* Section attributes. AIX is always PIC. */
36839 static unsigned int
36840 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
36842 unsigned int align
;
36843 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
36845 /* Align to at least UNIT size. */
36846 if ((flags
& SECTION_CODE
) != 0 || !decl
|| !DECL_P (decl
))
36847 align
= MIN_UNITS_PER_WORD
;
36849 /* Increase alignment of large objects if not already stricter. */
36850 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
36851 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
36852 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
36854 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
36857 /* Output at beginning of assembler file.
36859 Initialize the section names for the RS/6000 at this point.
36861 Specify filename, including full path, to assembler.
36863 We want to go into the TOC section so at least one .toc will be emitted.
36864 Also, in order to output proper .bs/.es pairs, we need at least one static
36865 [RW] section emitted.
36867 Finally, declare mcount when profiling to make the assembler happy. */
36870 rs6000_xcoff_file_start (void)
36872 rs6000_gen_section_name (&xcoff_bss_section_name
,
36873 main_input_filename
, ".bss_");
36874 rs6000_gen_section_name (&xcoff_private_data_section_name
,
36875 main_input_filename
, ".rw_");
36876 rs6000_gen_section_name (&xcoff_read_only_section_name
,
36877 main_input_filename
, ".ro_");
36878 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
36879 main_input_filename
, ".tls_");
36880 rs6000_gen_section_name (&xcoff_tbss_section_name
,
36881 main_input_filename
, ".tbss_[UL]");
36883 fputs ("\t.file\t", asm_out_file
);
36884 output_quoted_string (asm_out_file
, main_input_filename
);
36885 fputc ('\n', asm_out_file
);
36886 if (write_symbols
!= NO_DEBUG
)
36887 switch_to_section (private_data_section
);
36888 switch_to_section (toc_section
);
36889 switch_to_section (text_section
);
36891 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
36892 rs6000_file_start ();
36895 /* Output at end of assembler file.
36896 On the RS/6000, referencing data should automatically pull in text. */
36899 rs6000_xcoff_file_end (void)
36901 switch_to_section (text_section
);
36902 fputs ("_section_.text:\n", asm_out_file
);
36903 switch_to_section (data_section
);
36904 fputs (TARGET_32BIT
36905 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
36909 struct declare_alias_data
36912 bool function_descriptor
;
36915 /* Declare alias N. A helper function for for_node_and_aliases. */
36918 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
36920 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
36921 /* Main symbol is output specially, because varasm machinery does part of
36922 the job for us - we do not need to declare .globl/lglobs and such. */
36923 if (!n
->alias
|| n
->weakref
)
36926 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
36929 /* Prevent assemble_alias from trying to use .set pseudo operation
36930 that does not behave as expected by the middle-end. */
36931 TREE_ASM_WRITTEN (n
->decl
) = true;
36933 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
36934 char *buffer
= (char *) alloca (strlen (name
) + 2);
36936 int dollar_inside
= 0;
36938 strcpy (buffer
, name
);
36939 p
= strchr (buffer
, '$');
36943 p
= strchr (p
+ 1, '$');
36945 if (TREE_PUBLIC (n
->decl
))
36947 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
36949 if (dollar_inside
) {
36950 if (data
->function_descriptor
)
36951 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
36952 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
36954 if (data
->function_descriptor
)
36956 fputs ("\t.globl .", data
->file
);
36957 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36958 putc ('\n', data
->file
);
36960 fputs ("\t.globl ", data
->file
);
36961 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36962 putc ('\n', data
->file
);
36964 #ifdef ASM_WEAKEN_DECL
36965 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
36966 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
36973 if (data
->function_descriptor
)
36974 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
36975 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
36977 if (data
->function_descriptor
)
36979 fputs ("\t.lglobl .", data
->file
);
36980 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36981 putc ('\n', data
->file
);
36983 fputs ("\t.lglobl ", data
->file
);
36984 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36985 putc ('\n', data
->file
);
36987 if (data
->function_descriptor
)
36988 fputs (".", data
->file
);
36989 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36990 fputs (":\n", data
->file
);
36995 #ifdef HAVE_GAS_HIDDEN
36996 /* Helper function to calculate visibility of a DECL
36997 and return the value as a const string. */
36999 static const char *
37000 rs6000_xcoff_visibility (tree decl
)
37002 static const char * const visibility_types
[] = {
37003 "", ",protected", ",hidden", ",internal"
37006 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
37008 if (TREE_CODE (decl
) == FUNCTION_DECL
37009 && cgraph_node::get (decl
)
37010 && cgraph_node::get (decl
)->instrumentation_clone
37011 && cgraph_node::get (decl
)->instrumented_version
)
37012 vis
= DECL_VISIBILITY (cgraph_node::get (decl
)->instrumented_version
->decl
);
37014 return visibility_types
[vis
];
37019 /* This macro produces the initial definition of a function name.
37020 On the RS/6000, we need to place an extra '.' in the function name and
37021 output the function descriptor.
37022 Dollar signs are converted to underscores.
37024 The csect for the function will have already been created when
37025 text_section was selected. We do have to go back to that csect, however.
37027 The third and fourth parameters to the .function pseudo-op (16 and 044)
37028 are placeholders which no longer have any use.
37030 Because AIX assembler's .set command has unexpected semantics, we output
37031 all aliases as alternative labels in front of the definition. */
37034 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
37036 char *buffer
= (char *) alloca (strlen (name
) + 1);
37038 int dollar_inside
= 0;
37039 struct declare_alias_data data
= {file
, false};
37041 strcpy (buffer
, name
);
37042 p
= strchr (buffer
, '$');
37046 p
= strchr (p
+ 1, '$');
37048 if (TREE_PUBLIC (decl
))
37050 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
37052 if (dollar_inside
) {
37053 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
37054 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
37056 fputs ("\t.globl .", file
);
37057 RS6000_OUTPUT_BASENAME (file
, buffer
);
37058 #ifdef HAVE_GAS_HIDDEN
37059 fputs (rs6000_xcoff_visibility (decl
), file
);
37066 if (dollar_inside
) {
37067 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
37068 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
37070 fputs ("\t.lglobl .", file
);
37071 RS6000_OUTPUT_BASENAME (file
, buffer
);
37074 fputs ("\t.csect ", file
);
37075 RS6000_OUTPUT_BASENAME (file
, buffer
);
37076 fputs (TARGET_32BIT
? "[DS]\n" : "[DS],3\n", file
);
37077 RS6000_OUTPUT_BASENAME (file
, buffer
);
37078 fputs (":\n", file
);
37079 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
37081 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
37082 RS6000_OUTPUT_BASENAME (file
, buffer
);
37083 fputs (", TOC[tc0], 0\n", file
);
37085 switch_to_section (function_section (decl
));
37087 RS6000_OUTPUT_BASENAME (file
, buffer
);
37088 fputs (":\n", file
);
37089 data
.function_descriptor
= true;
37090 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
37092 if (!DECL_IGNORED_P (decl
))
37094 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
37095 xcoffout_declare_function (file
, decl
, buffer
);
37096 else if (write_symbols
== DWARF2_DEBUG
)
37098 name
= (*targetm
.strip_name_encoding
) (name
);
37099 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
37106 /* Output assembly language to globalize a symbol from a DECL,
37107 possibly with visibility. */
37110 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
37112 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
37113 fputs (GLOBAL_ASM_OP
, stream
);
37114 RS6000_OUTPUT_BASENAME (stream
, name
);
37115 #ifdef HAVE_GAS_HIDDEN
37116 fputs (rs6000_xcoff_visibility (decl
), stream
);
37118 putc ('\n', stream
);
37121 /* Output assembly language to define a symbol as COMMON from a DECL,
37122 possibly with visibility. */
37125 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
37126 tree decl ATTRIBUTE_UNUSED
,
37128 unsigned HOST_WIDE_INT size
,
37129 unsigned HOST_WIDE_INT align
)
37131 unsigned HOST_WIDE_INT align2
= 2;
37134 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
37138 fputs (COMMON_ASM_OP
, stream
);
37139 RS6000_OUTPUT_BASENAME (stream
, name
);
37142 "," HOST_WIDE_INT_PRINT_UNSIGNED
"," HOST_WIDE_INT_PRINT_UNSIGNED
,
37145 #ifdef HAVE_GAS_HIDDEN
37146 fputs (rs6000_xcoff_visibility (decl
), stream
);
37148 putc ('\n', stream
);
37151 /* This macro produces the initial definition of a object (variable) name.
37152 Because AIX assembler's .set command has unexpected semantics, we output
37153 all aliases as alternative labels in front of the definition. */
37156 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
37158 struct declare_alias_data data
= {file
, false};
37159 RS6000_OUTPUT_BASENAME (file
, name
);
37160 fputs (":\n", file
);
37161 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
37165 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
37168 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
37170 fputs (integer_asm_op (size
, FALSE
), file
);
37171 assemble_name (file
, label
);
37172 fputs ("-$", file
);
37175 /* Output a symbol offset relative to the dbase for the current object.
37176 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
37179 __gcc_unwind_dbase is embedded in all executables/libraries through
37180 libgcc/config/rs6000/crtdbase.S. */
37183 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
37185 fputs (integer_asm_op (size
, FALSE
), file
);
37186 assemble_name (file
, label
);
37187 fputs("-__gcc_unwind_dbase", file
);
37192 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
37196 const char *symname
;
37198 default_encode_section_info (decl
, rtl
, first
);
37200 /* Careful not to prod global register variables. */
37203 symbol
= XEXP (rtl
, 0);
37204 if (GET_CODE (symbol
) != SYMBOL_REF
)
37207 flags
= SYMBOL_REF_FLAGS (symbol
);
37209 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
37210 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
37212 SYMBOL_REF_FLAGS (symbol
) = flags
;
37214 /* Append mapping class to extern decls. */
37215 symname
= XSTR (symbol
, 0);
37216 if (decl
/* sync condition with assemble_external () */
37217 && DECL_P (decl
) && DECL_EXTERNAL (decl
) && TREE_PUBLIC (decl
)
37218 && ((TREE_CODE (decl
) == VAR_DECL
&& !DECL_THREAD_LOCAL_P (decl
))
37219 || TREE_CODE (decl
) == FUNCTION_DECL
)
37220 && symname
[strlen (symname
) - 1] != ']')
37222 char *newname
= (char *) alloca (strlen (symname
) + 5);
37223 strcpy (newname
, symname
);
37224 strcat (newname
, (TREE_CODE (decl
) == FUNCTION_DECL
37225 ? "[DS]" : "[UA]"));
37226 XSTR (symbol
, 0) = ggc_strdup (newname
);
37229 #endif /* HAVE_AS_TLS */
37230 #endif /* TARGET_XCOFF */
37233 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
37234 const char *name
, const char *val
)
37236 fputs ("\t.weak\t", stream
);
37237 RS6000_OUTPUT_BASENAME (stream
, name
);
37238 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
37239 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
37242 fputs ("[DS]", stream
);
37243 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37245 fputs (rs6000_xcoff_visibility (decl
), stream
);
37247 fputs ("\n\t.weak\t.", stream
);
37248 RS6000_OUTPUT_BASENAME (stream
, name
);
37250 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37252 fputs (rs6000_xcoff_visibility (decl
), stream
);
37254 fputc ('\n', stream
);
37257 #ifdef ASM_OUTPUT_DEF
37258 ASM_OUTPUT_DEF (stream
, name
, val
);
37260 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
37261 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
37263 fputs ("\t.set\t.", stream
);
37264 RS6000_OUTPUT_BASENAME (stream
, name
);
37265 fputs (",.", stream
);
37266 RS6000_OUTPUT_BASENAME (stream
, val
);
37267 fputc ('\n', stream
);
37273 /* Return true if INSN should not be copied. */
37276 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
37278 return recog_memoized (insn
) >= 0
37279 && get_attr_cannot_copy (insn
);
37282 /* Compute a (partial) cost for rtx X. Return true if the complete
37283 cost has been computed, and false if subexpressions should be
37284 scanned. In either case, *TOTAL contains the cost result. */
37287 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
37288 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
37290 int code
= GET_CODE (x
);
37294 /* On the RS/6000, if it is valid in the insn, it is free. */
37296 if (((outer_code
== SET
37297 || outer_code
== PLUS
37298 || outer_code
== MINUS
)
37299 && (satisfies_constraint_I (x
)
37300 || satisfies_constraint_L (x
)))
37301 || (outer_code
== AND
37302 && (satisfies_constraint_K (x
)
37304 ? satisfies_constraint_L (x
)
37305 : satisfies_constraint_J (x
))))
37306 || ((outer_code
== IOR
|| outer_code
== XOR
)
37307 && (satisfies_constraint_K (x
)
37309 ? satisfies_constraint_L (x
)
37310 : satisfies_constraint_J (x
))))
37311 || outer_code
== ASHIFT
37312 || outer_code
== ASHIFTRT
37313 || outer_code
== LSHIFTRT
37314 || outer_code
== ROTATE
37315 || outer_code
== ROTATERT
37316 || outer_code
== ZERO_EXTRACT
37317 || (outer_code
== MULT
37318 && satisfies_constraint_I (x
))
37319 || ((outer_code
== DIV
|| outer_code
== UDIV
37320 || outer_code
== MOD
|| outer_code
== UMOD
)
37321 && exact_log2 (INTVAL (x
)) >= 0)
37322 || (outer_code
== COMPARE
37323 && (satisfies_constraint_I (x
)
37324 || satisfies_constraint_K (x
)))
37325 || ((outer_code
== EQ
|| outer_code
== NE
)
37326 && (satisfies_constraint_I (x
)
37327 || satisfies_constraint_K (x
)
37329 ? satisfies_constraint_L (x
)
37330 : satisfies_constraint_J (x
))))
37331 || (outer_code
== GTU
37332 && satisfies_constraint_I (x
))
37333 || (outer_code
== LTU
37334 && satisfies_constraint_P (x
)))
37339 else if ((outer_code
== PLUS
37340 && reg_or_add_cint_operand (x
, VOIDmode
))
37341 || (outer_code
== MINUS
37342 && reg_or_sub_cint_operand (x
, VOIDmode
))
37343 || ((outer_code
== SET
37344 || outer_code
== IOR
37345 || outer_code
== XOR
)
37347 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
37349 *total
= COSTS_N_INSNS (1);
37355 case CONST_WIDE_INT
:
37359 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37363 /* When optimizing for size, MEM should be slightly more expensive
37364 than generating address, e.g., (plus (reg) (const)).
37365 L1 cache latency is about two instructions. */
37366 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37367 if (SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (x
)))
37368 *total
+= COSTS_N_INSNS (100);
37377 if (FLOAT_MODE_P (mode
))
37378 *total
= rs6000_cost
->fp
;
37380 *total
= COSTS_N_INSNS (1);
37384 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
37385 && satisfies_constraint_I (XEXP (x
, 1)))
37387 if (INTVAL (XEXP (x
, 1)) >= -256
37388 && INTVAL (XEXP (x
, 1)) <= 255)
37389 *total
= rs6000_cost
->mulsi_const9
;
37391 *total
= rs6000_cost
->mulsi_const
;
37393 else if (mode
== SFmode
)
37394 *total
= rs6000_cost
->fp
;
37395 else if (FLOAT_MODE_P (mode
))
37396 *total
= rs6000_cost
->dmul
;
37397 else if (mode
== DImode
)
37398 *total
= rs6000_cost
->muldi
;
37400 *total
= rs6000_cost
->mulsi
;
37404 if (mode
== SFmode
)
37405 *total
= rs6000_cost
->fp
;
37407 *total
= rs6000_cost
->dmul
;
37412 if (FLOAT_MODE_P (mode
))
37414 *total
= mode
== DFmode
? rs6000_cost
->ddiv
37415 : rs6000_cost
->sdiv
;
37422 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
37423 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
37425 if (code
== DIV
|| code
== MOD
)
37427 *total
= COSTS_N_INSNS (2);
37430 *total
= COSTS_N_INSNS (1);
37434 if (GET_MODE (XEXP (x
, 1)) == DImode
)
37435 *total
= rs6000_cost
->divdi
;
37437 *total
= rs6000_cost
->divsi
;
37439 /* Add in shift and subtract for MOD unless we have a mod instruction. */
37440 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
37441 *total
+= COSTS_N_INSNS (2);
37445 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
37449 *total
= COSTS_N_INSNS (4);
37453 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
37457 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
37461 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
37464 *total
= COSTS_N_INSNS (1);
37468 if (CONST_INT_P (XEXP (x
, 1)))
37470 rtx left
= XEXP (x
, 0);
37471 rtx_code left_code
= GET_CODE (left
);
37473 /* rotate-and-mask: 1 insn. */
37474 if ((left_code
== ROTATE
37475 || left_code
== ASHIFT
37476 || left_code
== LSHIFTRT
)
37477 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
37479 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
37480 if (!CONST_INT_P (XEXP (left
, 1)))
37481 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
37482 *total
+= COSTS_N_INSNS (1);
37486 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
37487 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
37488 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
37489 || (val
& 0xffff) == val
37490 || (val
& 0xffff0000) == val
37491 || ((val
& 0xffff) == 0 && mode
== SImode
))
37493 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
37494 *total
+= COSTS_N_INSNS (1);
37499 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
37501 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
37502 *total
+= COSTS_N_INSNS (2);
37507 *total
= COSTS_N_INSNS (1);
37512 *total
= COSTS_N_INSNS (1);
37518 *total
= COSTS_N_INSNS (1);
37522 /* The EXTSWSLI instruction is a combined instruction. Don't count both
37523 the sign extend and shift separately within the insn. */
37524 if (TARGET_EXTSWSLI
&& mode
== DImode
37525 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
37526 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
37537 /* Handle mul_highpart. */
37538 if (outer_code
== TRUNCATE
37539 && GET_CODE (XEXP (x
, 0)) == MULT
)
37541 if (mode
== DImode
)
37542 *total
= rs6000_cost
->muldi
;
37544 *total
= rs6000_cost
->mulsi
;
37547 else if (outer_code
== AND
)
37550 *total
= COSTS_N_INSNS (1);
37555 if (GET_CODE (XEXP (x
, 0)) == MEM
)
37558 *total
= COSTS_N_INSNS (1);
37564 if (!FLOAT_MODE_P (mode
))
37566 *total
= COSTS_N_INSNS (1);
37572 case UNSIGNED_FLOAT
:
37575 case FLOAT_TRUNCATE
:
37576 *total
= rs6000_cost
->fp
;
37580 if (mode
== DFmode
)
37581 *total
= rs6000_cost
->sfdf_convert
;
37583 *total
= rs6000_cost
->fp
;
37587 switch (XINT (x
, 1))
37590 *total
= rs6000_cost
->fp
;
37602 *total
= COSTS_N_INSNS (1);
37605 else if (FLOAT_MODE_P (mode
)
37606 && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
&& TARGET_FPRS
)
37608 *total
= rs6000_cost
->fp
;
37617 /* Carry bit requires mode == Pmode.
37618 NEG or PLUS already counted so only add one. */
37620 && (outer_code
== NEG
|| outer_code
== PLUS
))
37622 *total
= COSTS_N_INSNS (1);
37625 if (outer_code
== SET
)
37627 if (XEXP (x
, 1) == const0_rtx
)
37629 if (TARGET_ISEL
&& !TARGET_MFCRF
)
37630 *total
= COSTS_N_INSNS (8);
37632 *total
= COSTS_N_INSNS (2);
37637 *total
= COSTS_N_INSNS (3);
37646 if (outer_code
== SET
&& (XEXP (x
, 1) == const0_rtx
))
37648 if (TARGET_ISEL
&& !TARGET_MFCRF
)
37649 *total
= COSTS_N_INSNS (8);
37651 *total
= COSTS_N_INSNS (2);
37655 if (outer_code
== COMPARE
)
37669 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
37672 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
37673 int opno
, int *total
, bool speed
)
37675 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
37678 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37679 "opno = %d, total = %d, speed = %s, x:\n",
37680 ret
? "complete" : "scan inner",
37681 GET_MODE_NAME (mode
),
37682 GET_RTX_NAME (outer_code
),
37685 speed
? "true" : "false");
37692 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
37695 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
37696 addr_space_t as
, bool speed
)
37698 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
37700 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37701 ret
, speed
? "true" : "false");
37708 /* A C expression returning the cost of moving data from a register of class
37709 CLASS1 to one of CLASS2. */
37712 rs6000_register_move_cost (machine_mode mode
,
37713 reg_class_t from
, reg_class_t to
)
37717 if (TARGET_DEBUG_COST
)
37720 /* Moves from/to GENERAL_REGS. */
37721 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
37722 || reg_classes_intersect_p (from
, GENERAL_REGS
))
37724 reg_class_t rclass
= from
;
37726 if (! reg_classes_intersect_p (to
, GENERAL_REGS
))
37729 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
37730 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
37731 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
37733 /* It's more expensive to move CR_REGS than CR0_REGS because of the
37735 else if (rclass
== CR_REGS
)
37738 /* For those processors that have slow LR/CTR moves, make them more
37739 expensive than memory in order to bias spills to memory .*/
37740 else if ((rs6000_cpu
== PROCESSOR_POWER6
37741 || rs6000_cpu
== PROCESSOR_POWER7
37742 || rs6000_cpu
== PROCESSOR_POWER8
37743 || rs6000_cpu
== PROCESSOR_POWER9
)
37744 && reg_classes_intersect_p (rclass
, LINK_OR_CTR_REGS
))
37745 ret
= 6 * hard_regno_nregs
[0][mode
];
37748 /* A move will cost one instruction per GPR moved. */
37749 ret
= 2 * hard_regno_nregs
[0][mode
];
37752 /* If we have VSX, we can easily move between FPR or Altivec registers. */
37753 else if (VECTOR_MEM_VSX_P (mode
)
37754 && reg_classes_intersect_p (to
, VSX_REGS
)
37755 && reg_classes_intersect_p (from
, VSX_REGS
))
37756 ret
= 2 * hard_regno_nregs
[FIRST_FPR_REGNO
][mode
];
37758 /* Moving between two similar registers is just one instruction. */
37759 else if (reg_classes_intersect_p (to
, from
))
37760 ret
= (FLOAT128_2REG_P (mode
)) ? 4 : 2;
37762 /* Everything else has to go through GENERAL_REGS. */
37764 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
37765 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
37767 if (TARGET_DEBUG_COST
)
37769 if (dbg_cost_ctrl
== 1)
37771 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37772 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
37773 reg_class_names
[to
]);
37780 /* A C expressions returning the cost of moving data of MODE from a register to
37784 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
37785 bool in ATTRIBUTE_UNUSED
)
37789 if (TARGET_DEBUG_COST
)
37792 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
37793 ret
= 4 * hard_regno_nregs
[0][mode
];
37794 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
37795 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
37796 ret
= 4 * hard_regno_nregs
[32][mode
];
37797 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
37798 ret
= 4 * hard_regno_nregs
[FIRST_ALTIVEC_REGNO
][mode
];
37800 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
37802 if (TARGET_DEBUG_COST
)
37804 if (dbg_cost_ctrl
== 1)
37806 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37807 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
37814 /* Returns a code for a target-specific builtin that implements
37815 reciprocal of the function, or NULL_TREE if not available. */
37818 rs6000_builtin_reciprocal (tree fndecl
)
37820 switch (DECL_FUNCTION_CODE (fndecl
))
37822 case VSX_BUILTIN_XVSQRTDP
:
37823 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode
))
37826 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
37828 case VSX_BUILTIN_XVSQRTSP
:
37829 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode
))
37832 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_4SF
];
37839 /* Load up a constant. If the mode is a vector mode, splat the value across
37840 all of the vector elements. */
37843 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
37847 if (mode
== SFmode
|| mode
== DFmode
)
37849 rtx d
= const_double_from_real_value (dconst
, mode
);
37850 reg
= force_reg (mode
, d
);
37852 else if (mode
== V4SFmode
)
37854 rtx d
= const_double_from_real_value (dconst
, SFmode
);
37855 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
37856 reg
= gen_reg_rtx (mode
);
37857 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
37859 else if (mode
== V2DFmode
)
37861 rtx d
= const_double_from_real_value (dconst
, DFmode
);
37862 rtvec v
= gen_rtvec (2, d
, d
);
37863 reg
= gen_reg_rtx (mode
);
37864 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
37867 gcc_unreachable ();
37872 /* Generate an FMA instruction. */
37875 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
37877 machine_mode mode
= GET_MODE (target
);
37880 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
37881 gcc_assert (dst
!= NULL
);
37884 emit_move_insn (target
, dst
);
37887 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
37890 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
37892 machine_mode mode
= GET_MODE (dst
);
37895 /* This is a tad more complicated, since the fnma_optab is for
37896 a different expression: fma(-m1, m2, a), which is the same
37897 thing except in the case of signed zeros.
37899 Fortunately we know that if FMA is supported that FNMSUB is
37900 also supported in the ISA. Just expand it directly. */
37902 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
37904 r
= gen_rtx_NEG (mode
, a
);
37905 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
37906 r
= gen_rtx_NEG (mode
, r
);
37907 emit_insn (gen_rtx_SET (dst
, r
));
37910 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
37911 add a reg_note saying that this was a division. Support both scalar and
37912 vector divide. Assumes no trapping math and finite arguments. */
37915 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
37917 machine_mode mode
= GET_MODE (dst
);
37918 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
37921 /* Low precision estimates guarantee 5 bits of accuracy. High
37922 precision estimates guarantee 14 bits of accuracy. SFmode
37923 requires 23 bits of accuracy. DFmode requires 52 bits of
37924 accuracy. Each pass at least doubles the accuracy, leading
37925 to the following. */
37926 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
37927 if (mode
== DFmode
|| mode
== V2DFmode
)
37930 enum insn_code code
= optab_handler (smul_optab
, mode
);
37931 insn_gen_fn gen_mul
= GEN_FCN (code
);
37933 gcc_assert (code
!= CODE_FOR_nothing
);
37935 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
37937 /* x0 = 1./d estimate */
37938 x0
= gen_reg_rtx (mode
);
37939 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
37942 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
37945 /* e0 = 1. - d * x0 */
37946 e0
= gen_reg_rtx (mode
);
37947 rs6000_emit_nmsub (e0
, d
, x0
, one
);
37949 /* x1 = x0 + e0 * x0 */
37950 x1
= gen_reg_rtx (mode
);
37951 rs6000_emit_madd (x1
, e0
, x0
, x0
);
37953 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
37954 ++i
, xprev
= xnext
, eprev
= enext
) {
37956 /* enext = eprev * eprev */
37957 enext
= gen_reg_rtx (mode
);
37958 emit_insn (gen_mul (enext
, eprev
, eprev
));
37960 /* xnext = xprev + enext * xprev */
37961 xnext
= gen_reg_rtx (mode
);
37962 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
37968 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
37970 /* u = n * xprev */
37971 u
= gen_reg_rtx (mode
);
37972 emit_insn (gen_mul (u
, n
, xprev
));
37974 /* v = n - (d * u) */
37975 v
= gen_reg_rtx (mode
);
37976 rs6000_emit_nmsub (v
, d
, u
, n
);
37978 /* dst = (v * xprev) + u */
37979 rs6000_emit_madd (dst
, v
, xprev
, u
);
37982 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
37985 /* Goldschmidt's Algorithm for single/double-precision floating point
37986 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
37989 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
37991 machine_mode mode
= GET_MODE (src
);
37992 rtx e
= gen_reg_rtx (mode
);
37993 rtx g
= gen_reg_rtx (mode
);
37994 rtx h
= gen_reg_rtx (mode
);
37996 /* Low precision estimates guarantee 5 bits of accuracy. High
37997 precision estimates guarantee 14 bits of accuracy. SFmode
37998 requires 23 bits of accuracy. DFmode requires 52 bits of
37999 accuracy. Each pass at least doubles the accuracy, leading
38000 to the following. */
38001 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
38002 if (mode
== DFmode
|| mode
== V2DFmode
)
38007 enum insn_code code
= optab_handler (smul_optab
, mode
);
38008 insn_gen_fn gen_mul
= GEN_FCN (code
);
38010 gcc_assert (code
!= CODE_FOR_nothing
);
38012 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
38014 /* e = rsqrt estimate */
38015 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
38018 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
38021 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
38023 if (mode
== SFmode
)
38025 rtx target
= emit_conditional_move (e
, GT
, src
, zero
, mode
,
38028 emit_move_insn (e
, target
);
38032 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
38033 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
38037 /* g = sqrt estimate. */
38038 emit_insn (gen_mul (g
, e
, src
));
38039 /* h = 1/(2*sqrt) estimate. */
38040 emit_insn (gen_mul (h
, e
, mhalf
));
38046 rtx t
= gen_reg_rtx (mode
);
38047 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
38048 /* Apply correction directly to 1/rsqrt estimate. */
38049 rs6000_emit_madd (dst
, e
, t
, e
);
38053 for (i
= 0; i
< passes
; i
++)
38055 rtx t1
= gen_reg_rtx (mode
);
38056 rtx g1
= gen_reg_rtx (mode
);
38057 rtx h1
= gen_reg_rtx (mode
);
38059 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
38060 rs6000_emit_madd (g1
, g
, t1
, g
);
38061 rs6000_emit_madd (h1
, h
, t1
, h
);
38066 /* Multiply by 2 for 1/rsqrt. */
38067 emit_insn (gen_add3_insn (dst
, h
, h
));
38072 rtx t
= gen_reg_rtx (mode
);
38073 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
38074 rs6000_emit_madd (dst
, g
, t
, g
);
38080 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
38081 (Power7) targets. DST is the target, and SRC is the argument operand. */
38084 rs6000_emit_popcount (rtx dst
, rtx src
)
38086 machine_mode mode
= GET_MODE (dst
);
38089 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
38090 if (TARGET_POPCNTD
)
38092 if (mode
== SImode
)
38093 emit_insn (gen_popcntdsi2 (dst
, src
));
38095 emit_insn (gen_popcntddi2 (dst
, src
));
38099 tmp1
= gen_reg_rtx (mode
);
38101 if (mode
== SImode
)
38103 emit_insn (gen_popcntbsi2 (tmp1
, src
));
38104 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
38106 tmp2
= force_reg (SImode
, tmp2
);
38107 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
38111 emit_insn (gen_popcntbdi2 (tmp1
, src
));
38112 tmp2
= expand_mult (DImode
, tmp1
,
38113 GEN_INT ((HOST_WIDE_INT
)
38114 0x01010101 << 32 | 0x01010101),
38116 tmp2
= force_reg (DImode
, tmp2
);
38117 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
38122 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
38123 target, and SRC is the argument operand. */
38126 rs6000_emit_parity (rtx dst
, rtx src
)
38128 machine_mode mode
= GET_MODE (dst
);
38131 tmp
= gen_reg_rtx (mode
);
38133 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
38136 if (mode
== SImode
)
38138 emit_insn (gen_popcntbsi2 (tmp
, src
));
38139 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
38143 emit_insn (gen_popcntbdi2 (tmp
, src
));
38144 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
38149 if (mode
== SImode
)
38151 /* Is mult+shift >= shift+xor+shift+xor? */
38152 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
38154 rtx tmp1
, tmp2
, tmp3
, tmp4
;
38156 tmp1
= gen_reg_rtx (SImode
);
38157 emit_insn (gen_popcntbsi2 (tmp1
, src
));
38159 tmp2
= gen_reg_rtx (SImode
);
38160 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
38161 tmp3
= gen_reg_rtx (SImode
);
38162 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
38164 tmp4
= gen_reg_rtx (SImode
);
38165 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
38166 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
38169 rs6000_emit_popcount (tmp
, src
);
38170 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
38174 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
38175 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
38177 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
38179 tmp1
= gen_reg_rtx (DImode
);
38180 emit_insn (gen_popcntbdi2 (tmp1
, src
));
38182 tmp2
= gen_reg_rtx (DImode
);
38183 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
38184 tmp3
= gen_reg_rtx (DImode
);
38185 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
38187 tmp4
= gen_reg_rtx (DImode
);
38188 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
38189 tmp5
= gen_reg_rtx (DImode
);
38190 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
38192 tmp6
= gen_reg_rtx (DImode
);
38193 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
38194 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
38197 rs6000_emit_popcount (tmp
, src
);
38198 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
38202 /* Expand an Altivec constant permutation for little endian mode.
38203 There are two issues: First, the two input operands must be
38204 swapped so that together they form a double-wide array in LE
38205 order. Second, the vperm instruction has surprising behavior
38206 in LE mode: it interprets the elements of the source vectors
38207 in BE mode ("left to right") and interprets the elements of
38208 the destination vector in LE mode ("right to left"). To
38209 correct for this, we must subtract each element of the permute
38210 control vector from 31.
38212 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
38213 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
38214 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
38215 serve as the permute control vector. Then, in BE mode,
38219 places the desired result in vr9. However, in LE mode the
38220 vector contents will be
38222 vr10 = 00000003 00000002 00000001 00000000
38223 vr11 = 00000007 00000006 00000005 00000004
38225 The result of the vperm using the same permute control vector is
38227 vr9 = 05000000 07000000 01000000 03000000
38229 That is, the leftmost 4 bytes of vr10 are interpreted as the
38230 source for the rightmost 4 bytes of vr9, and so on.
38232 If we change the permute control vector to
38234 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
38242 vr9 = 00000006 00000004 00000002 00000000. */
38245 altivec_expand_vec_perm_const_le (rtx operands
[4])
38249 rtx constv
, unspec
;
38250 rtx target
= operands
[0];
38251 rtx op0
= operands
[1];
38252 rtx op1
= operands
[2];
38253 rtx sel
= operands
[3];
38255 /* Unpack and adjust the constant selector. */
38256 for (i
= 0; i
< 16; ++i
)
38258 rtx e
= XVECEXP (sel
, 0, i
);
38259 unsigned int elt
= 31 - (INTVAL (e
) & 31);
38260 perm
[i
] = GEN_INT (elt
);
38263 /* Expand to a permute, swapping the inputs and using the
38264 adjusted selector. */
38266 op0
= force_reg (V16QImode
, op0
);
38268 op1
= force_reg (V16QImode
, op1
);
38270 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
38271 constv
= force_reg (V16QImode
, constv
);
38272 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
38274 if (!REG_P (target
))
38276 rtx tmp
= gen_reg_rtx (V16QImode
);
38277 emit_move_insn (tmp
, unspec
);
38281 emit_move_insn (target
, unspec
);
38284 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
38285 permute control vector. But here it's not a constant, so we must
38286 generate a vector NAND or NOR to do the adjustment. */
38289 altivec_expand_vec_perm_le (rtx operands
[4])
38291 rtx notx
, iorx
, unspec
;
38292 rtx target
= operands
[0];
38293 rtx op0
= operands
[1];
38294 rtx op1
= operands
[2];
38295 rtx sel
= operands
[3];
38297 rtx norreg
= gen_reg_rtx (V16QImode
);
38298 machine_mode mode
= GET_MODE (target
);
38300 /* Get everything in regs so the pattern matches. */
38302 op0
= force_reg (mode
, op0
);
38304 op1
= force_reg (mode
, op1
);
38306 sel
= force_reg (V16QImode
, sel
);
38307 if (!REG_P (target
))
38308 tmp
= gen_reg_rtx (mode
);
38310 if (TARGET_P9_VECTOR
)
38312 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op0
, op1
, sel
),
38317 /* Invert the selector with a VNAND if available, else a VNOR.
38318 The VNAND is preferred for future fusion opportunities. */
38319 notx
= gen_rtx_NOT (V16QImode
, sel
);
38320 iorx
= (TARGET_P8_VECTOR
38321 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
38322 : gen_rtx_AND (V16QImode
, notx
, notx
));
38323 emit_insn (gen_rtx_SET (norreg
, iorx
));
38325 /* Permute with operands reversed and adjusted selector. */
38326 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
38330 /* Copy into target, possibly by way of a register. */
38331 if (!REG_P (target
))
38333 emit_move_insn (tmp
, unspec
);
38337 emit_move_insn (target
, unspec
);
38340 /* Expand an Altivec constant permutation. Return true if we match
38341 an efficient implementation; false to fall back to VPERM. */
38344 altivec_expand_vec_perm_const (rtx operands
[4])
38346 struct altivec_perm_insn
{
38347 HOST_WIDE_INT mask
;
38348 enum insn_code impl
;
38349 unsigned char perm
[16];
38351 static const struct altivec_perm_insn patterns
[] = {
38352 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuhum_direct
,
38353 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
38354 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuwum_direct
,
38355 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
38356 { OPTION_MASK_ALTIVEC
,
38357 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
38358 : CODE_FOR_altivec_vmrglb_direct
),
38359 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
38360 { OPTION_MASK_ALTIVEC
,
38361 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
38362 : CODE_FOR_altivec_vmrglh_direct
),
38363 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
38364 { OPTION_MASK_ALTIVEC
,
38365 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct
38366 : CODE_FOR_altivec_vmrglw_direct
),
38367 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
38368 { OPTION_MASK_ALTIVEC
,
38369 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
38370 : CODE_FOR_altivec_vmrghb_direct
),
38371 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
38372 { OPTION_MASK_ALTIVEC
,
38373 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
38374 : CODE_FOR_altivec_vmrghh_direct
),
38375 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
38376 { OPTION_MASK_ALTIVEC
,
38377 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct
38378 : CODE_FOR_altivec_vmrghw_direct
),
38379 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38380 { OPTION_MASK_P8_VECTOR
, CODE_FOR_p8_vmrgew
,
38381 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
38382 { OPTION_MASK_P8_VECTOR
, CODE_FOR_p8_vmrgow
,
38383 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38386 unsigned int i
, j
, elt
, which
;
38387 unsigned char perm
[16];
38388 rtx target
, op0
, op1
, sel
, x
;
38391 target
= operands
[0];
38396 /* Unpack the constant selector. */
38397 for (i
= which
= 0; i
< 16; ++i
)
38399 rtx e
= XVECEXP (sel
, 0, i
);
38400 elt
= INTVAL (e
) & 31;
38401 which
|= (elt
< 16 ? 1 : 2);
38405 /* Simplify the constant selector based on operands. */
38409 gcc_unreachable ();
38413 if (!rtx_equal_p (op0
, op1
))
38418 for (i
= 0; i
< 16; ++i
)
38430 /* Look for splat patterns. */
38435 for (i
= 0; i
< 16; ++i
)
38436 if (perm
[i
] != elt
)
38440 if (!BYTES_BIG_ENDIAN
)
38442 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
38448 for (i
= 0; i
< 16; i
+= 2)
38449 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
38453 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
38454 x
= gen_reg_rtx (V8HImode
);
38455 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
38457 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38464 for (i
= 0; i
< 16; i
+= 4)
38466 || perm
[i
+ 1] != elt
+ 1
38467 || perm
[i
+ 2] != elt
+ 2
38468 || perm
[i
+ 3] != elt
+ 3)
38472 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
38473 x
= gen_reg_rtx (V4SImode
);
38474 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
38476 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38482 /* Look for merge and pack patterns. */
38483 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
38487 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
38490 elt
= patterns
[j
].perm
[0];
38491 if (perm
[0] == elt
)
38493 else if (perm
[0] == elt
+ 16)
38497 for (i
= 1; i
< 16; ++i
)
38499 elt
= patterns
[j
].perm
[i
];
38501 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
38502 else if (one_vec
&& elt
>= 16)
38504 if (perm
[i
] != elt
)
38509 enum insn_code icode
= patterns
[j
].impl
;
38510 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
38511 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
38513 /* For little-endian, don't use vpkuwum and vpkuhum if the
38514 underlying vector type is not V4SI and V8HI, respectively.
38515 For example, using vpkuwum with a V8HI picks up the even
38516 halfwords (BE numbering) when the even halfwords (LE
38517 numbering) are what we need. */
38518 if (!BYTES_BIG_ENDIAN
38519 && icode
== CODE_FOR_altivec_vpkuwum_direct
38520 && ((GET_CODE (op0
) == REG
38521 && GET_MODE (op0
) != V4SImode
)
38522 || (GET_CODE (op0
) == SUBREG
38523 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
38525 if (!BYTES_BIG_ENDIAN
38526 && icode
== CODE_FOR_altivec_vpkuhum_direct
38527 && ((GET_CODE (op0
) == REG
38528 && GET_MODE (op0
) != V8HImode
)
38529 || (GET_CODE (op0
) == SUBREG
38530 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
38533 /* For little-endian, the two input operands must be swapped
38534 (or swapped back) to ensure proper right-to-left numbering
38536 if (swapped
^ !BYTES_BIG_ENDIAN
)
38537 std::swap (op0
, op1
);
38538 if (imode
!= V16QImode
)
38540 op0
= gen_lowpart (imode
, op0
);
38541 op1
= gen_lowpart (imode
, op1
);
38543 if (omode
== V16QImode
)
38546 x
= gen_reg_rtx (omode
);
38547 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
38548 if (omode
!= V16QImode
)
38549 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38554 if (!BYTES_BIG_ENDIAN
)
38556 altivec_expand_vec_perm_const_le (operands
);
38563 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38564 Return true if we match an efficient implementation. */
38567 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
38568 unsigned char perm0
, unsigned char perm1
)
38572 /* If both selectors come from the same operand, fold to single op. */
38573 if ((perm0
& 2) == (perm1
& 2))
38580 /* If both operands are equal, fold to simpler permutation. */
38581 if (rtx_equal_p (op0
, op1
))
38584 perm1
= (perm1
& 1) + 2;
38586 /* If the first selector comes from the second operand, swap. */
38587 else if (perm0
& 2)
38593 std::swap (op0
, op1
);
38595 /* If the second selector does not come from the second operand, fail. */
38596 else if ((perm1
& 2) == 0)
38600 if (target
!= NULL
)
38602 machine_mode vmode
, dmode
;
38605 vmode
= GET_MODE (target
);
38606 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
38607 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4);
38608 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
38609 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
38610 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
38611 emit_insn (gen_rtx_SET (target
, x
));
38617 rs6000_expand_vec_perm_const (rtx operands
[4])
38619 rtx target
, op0
, op1
, sel
;
38620 unsigned char perm0
, perm1
;
38622 target
= operands
[0];
38627 /* Unpack the constant selector. */
38628 perm0
= INTVAL (XVECEXP (sel
, 0, 0)) & 3;
38629 perm1
= INTVAL (XVECEXP (sel
, 0, 1)) & 3;
38631 return rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, perm0
, perm1
);
38634 /* Test whether a constant permutation is supported. */
38637 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode
,
38638 const unsigned char *sel
)
38640 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
38641 if (TARGET_ALTIVEC
)
38644 /* Check for ps_merge* or evmerge* insns. */
38645 if ((TARGET_PAIRED_FLOAT
&& vmode
== V2SFmode
)
38646 || (TARGET_SPE
&& vmode
== V2SImode
))
38648 rtx op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38649 rtx op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38650 return rs6000_expand_vec_perm_const_1 (NULL
, op0
, op1
, sel
[0], sel
[1]);
38656 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
38659 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
38660 machine_mode vmode
, unsigned nelt
, rtx perm
[])
38662 machine_mode imode
;
38666 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
)
38667 imode
= mode_for_vector
38668 (int_mode_for_mode (GET_MODE_INNER (vmode
)).require (), nelt
);
38670 x
= gen_rtx_CONST_VECTOR (imode
, gen_rtvec_v (nelt
, perm
));
38671 x
= expand_vec_perm (vmode
, op0
, op1
, x
, target
);
38673 emit_move_insn (target
, x
);
38676 /* Expand an extract even operation. */
38679 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
38681 machine_mode vmode
= GET_MODE (target
);
38682 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
38685 for (i
= 0; i
< nelt
; i
++)
38686 perm
[i
] = GEN_INT (i
* 2);
38688 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, nelt
, perm
);
38691 /* Expand a vector interleave operation. */
38694 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
38696 machine_mode vmode
= GET_MODE (target
);
38697 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
38700 high
= (highp
? 0 : nelt
/ 2);
38701 for (i
= 0; i
< nelt
/ 2; i
++)
38703 perm
[i
* 2] = GEN_INT (i
+ high
);
38704 perm
[i
* 2 + 1] = GEN_INT (i
+ nelt
+ high
);
38707 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, nelt
, perm
);
38710 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
38712 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
38714 HOST_WIDE_INT
hwi_scale (scale
);
38715 REAL_VALUE_TYPE r_pow
;
38716 rtvec v
= rtvec_alloc (2);
38718 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
38719 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
38720 elt
= const_double_from_real_value (r_pow
, DFmode
);
38721 RTVEC_ELT (v
, 0) = elt
;
38722 RTVEC_ELT (v
, 1) = elt
;
38723 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
38724 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
38727 /* Return an RTX representing where to find the function value of a
38728 function returning MODE. */
38730 rs6000_complex_function_value (machine_mode mode
)
38732 unsigned int regno
;
38734 machine_mode inner
= GET_MODE_INNER (mode
);
38735 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
38737 if (TARGET_FLOAT128_TYPE
38739 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
38740 regno
= ALTIVEC_ARG_RETURN
;
38742 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38743 regno
= FP_ARG_RETURN
;
38747 regno
= GP_ARG_RETURN
;
38749 /* 32-bit is OK since it'll go in r3/r4. */
38750 if (TARGET_32BIT
&& inner_bytes
>= 4)
38751 return gen_rtx_REG (mode
, regno
);
38754 if (inner_bytes
>= 8)
38755 return gen_rtx_REG (mode
, regno
);
38757 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
38759 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
38760 GEN_INT (inner_bytes
));
38761 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
38764 /* Return an rtx describing a return value of MODE as a PARALLEL
38765 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38766 stride REG_STRIDE. */
38769 rs6000_parallel_return (machine_mode mode
,
38770 int n_elts
, machine_mode elt_mode
,
38771 unsigned int regno
, unsigned int reg_stride
)
38773 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
38776 for (i
= 0; i
< n_elts
; i
++)
38778 rtx r
= gen_rtx_REG (elt_mode
, regno
);
38779 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
38780 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
38781 regno
+= reg_stride
;
38787 /* Target hook for TARGET_FUNCTION_VALUE.
38789 On the SPE, both FPs and vectors are returned in r3.
38791 On RS/6000 an integer value is in r3 and a floating-point value is in
38792 fp1, unless -msoft-float. */
38795 rs6000_function_value (const_tree valtype
,
38796 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
38797 bool outgoing ATTRIBUTE_UNUSED
)
38800 unsigned int regno
;
38801 machine_mode elt_mode
;
38804 /* Special handling for structs in darwin64. */
38806 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
38808 CUMULATIVE_ARGS valcum
;
38812 valcum
.fregno
= FP_ARG_MIN_REG
;
38813 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
38814 /* Do a trial code generation as if this were going to be passed as
38815 an argument; if any part goes in memory, we return NULL. */
38816 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
38819 /* Otherwise fall through to standard ABI rules. */
38822 mode
= TYPE_MODE (valtype
);
38824 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38825 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
38827 int first_reg
, n_regs
;
38829 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
38831 /* _Decimal128 must use even/odd register pairs. */
38832 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38833 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
38837 first_reg
= ALTIVEC_ARG_RETURN
;
38841 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
38844 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38845 if (TARGET_32BIT
&& TARGET_POWERPC64
)
38854 int count
= GET_MODE_SIZE (mode
) / 4;
38855 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
38858 if ((INTEGRAL_TYPE_P (valtype
)
38859 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
38860 || POINTER_TYPE_P (valtype
))
38861 mode
= TARGET_32BIT
? SImode
: DImode
;
38863 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38864 /* _Decimal128 must use an even/odd register pair. */
38865 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38866 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
38867 && !FLOAT128_VECTOR_P (mode
)
38868 && ((TARGET_SINGLE_FLOAT
&& (mode
== SFmode
)) || TARGET_DOUBLE_FLOAT
))
38869 regno
= FP_ARG_RETURN
;
38870 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
38871 && targetm
.calls
.split_complex_arg
)
38872 return rs6000_complex_function_value (mode
);
38873 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38874 return register is used in both cases, and we won't see V2DImode/V2DFmode
38875 for pure altivec, combine the two cases. */
38876 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| FLOAT128_VECTOR_P (mode
))
38877 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
38878 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
38879 regno
= ALTIVEC_ARG_RETURN
;
38880 else if (TARGET_E500_DOUBLE
&& TARGET_HARD_FLOAT
38881 && (mode
== DFmode
|| mode
== DCmode
38882 || FLOAT128_IBM_P (mode
) || mode
== TCmode
))
38883 return spe_build_register_parallel (mode
, GP_ARG_RETURN
);
38885 regno
= GP_ARG_RETURN
;
38887 return gen_rtx_REG (mode
, regno
);
38890 /* Define how to find the value returned by a library function
38891 assuming the value has mode MODE. */
38893 rs6000_libcall_value (machine_mode mode
)
38895 unsigned int regno
;
38897 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
38898 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
38899 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
38901 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38902 /* _Decimal128 must use an even/odd register pair. */
38903 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38904 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
)
38905 && TARGET_HARD_FLOAT
&& TARGET_FPRS
38906 && ((TARGET_SINGLE_FLOAT
&& mode
== SFmode
) || TARGET_DOUBLE_FLOAT
))
38907 regno
= FP_ARG_RETURN
;
38908 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38909 return register is used in both cases, and we won't see V2DImode/V2DFmode
38910 for pure altivec, combine the two cases. */
38911 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
38912 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
38913 regno
= ALTIVEC_ARG_RETURN
;
38914 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
38915 return rs6000_complex_function_value (mode
);
38916 else if (TARGET_E500_DOUBLE
&& TARGET_HARD_FLOAT
38917 && (mode
== DFmode
|| mode
== DCmode
38918 || FLOAT128_IBM_P (mode
) || mode
== TCmode
))
38919 return spe_build_register_parallel (mode
, GP_ARG_RETURN
);
38921 regno
= GP_ARG_RETURN
;
38923 return gen_rtx_REG (mode
, regno
);
38927 /* Return true if we use LRA instead of reload pass. */
38929 rs6000_lra_p (void)
38934 /* Compute register pressure classes. We implement the target hook to avoid
38935 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
38936 lead to incorrect estimates of number of available registers and therefor
38937 increased register pressure/spill. */
38939 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
38944 pressure_classes
[n
++] = GENERAL_REGS
;
38946 pressure_classes
[n
++] = VSX_REGS
;
38949 if (TARGET_ALTIVEC
)
38950 pressure_classes
[n
++] = ALTIVEC_REGS
;
38951 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38952 pressure_classes
[n
++] = FLOAT_REGS
;
38954 pressure_classes
[n
++] = CR_REGS
;
38955 pressure_classes
[n
++] = SPECIAL_REGS
;
38960 /* Given FROM and TO register numbers, say whether this elimination is allowed.
38961 Frame pointer elimination is automatically handled.
38963 For the RS/6000, if frame pointer elimination is being done, we would like
38964 to convert ap into fp, not sp.
38966 We need r30 if -mminimal-toc was specified, and there are constant pool
38970 rs6000_can_eliminate (const int from
, const int to
)
38972 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
38973 ? ! frame_pointer_needed
38974 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
38975 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC
38976 || constant_pool_empty_p ()
38980 /* Define the offset between two registers, FROM to be eliminated and its
38981 replacement TO, at the start of a routine. */
38983 rs6000_initial_elimination_offset (int from
, int to
)
38985 rs6000_stack_t
*info
= rs6000_stack_info ();
38986 HOST_WIDE_INT offset
;
38988 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
38989 offset
= info
->push_p
? 0 : -info
->total_size
;
38990 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
38992 offset
= info
->push_p
? 0 : -info
->total_size
;
38993 if (FRAME_GROWS_DOWNWARD
)
38994 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
38996 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
38997 offset
= FRAME_GROWS_DOWNWARD
38998 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
39000 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
39001 offset
= info
->total_size
;
39002 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
39003 offset
= info
->push_p
? info
->total_size
: 0;
39004 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
39007 gcc_unreachable ();
39013 rs6000_dwarf_register_span (rtx reg
)
39017 unsigned regno
= REGNO (reg
);
39018 machine_mode mode
= GET_MODE (reg
);
39022 && (SPE_VECTOR_MODE (GET_MODE (reg
))
39023 || (TARGET_E500_DOUBLE
&& FLOAT_MODE_P (mode
)
39024 && mode
!= SFmode
&& mode
!= SDmode
&& mode
!= SCmode
)))
39029 regno
= REGNO (reg
);
39031 /* The duality of the SPE register size wreaks all kinds of havoc.
39032 This is a way of distinguishing r0 in 32-bits from r0 in
39034 words
= (GET_MODE_SIZE (mode
) + UNITS_PER_FP_WORD
- 1) / UNITS_PER_FP_WORD
;
39035 gcc_assert (words
<= 4);
39036 for (i
= 0; i
< words
; i
++, regno
++)
39038 if (BYTES_BIG_ENDIAN
)
39040 parts
[2 * i
] = gen_rtx_REG (SImode
, regno
+ FIRST_SPE_HIGH_REGNO
);
39041 parts
[2 * i
+ 1] = gen_rtx_REG (SImode
, regno
);
39045 parts
[2 * i
] = gen_rtx_REG (SImode
, regno
);
39046 parts
[2 * i
+ 1] = gen_rtx_REG (SImode
, regno
+ FIRST_SPE_HIGH_REGNO
);
39050 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (words
* 2, parts
));
39053 /* Fill in sizes for SPE register high parts in table used by unwinder. */
39056 rs6000_init_dwarf_reg_sizes_extra (tree address
)
39061 machine_mode mode
= TYPE_MODE (char_type_node
);
39062 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
39063 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
39064 rtx value
= gen_int_mode (4, mode
);
39066 for (i
= FIRST_SPE_HIGH_REGNO
; i
< LAST_SPE_HIGH_REGNO
+1; i
++)
39068 int column
= DWARF_REG_TO_UNWIND_COLUMN
39069 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
39070 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
39072 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
39076 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
39079 machine_mode mode
= TYPE_MODE (char_type_node
);
39080 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
39081 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
39082 rtx value
= gen_int_mode (16, mode
);
39084 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
39085 The unwinder still needs to know the size of Altivec registers. */
39087 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
39089 int column
= DWARF_REG_TO_UNWIND_COLUMN
39090 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
39091 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
39093 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
39098 /* Map internal gcc register numbers to debug format register numbers.
39099 FORMAT specifies the type of debug register number to use:
39100 0 -- debug information, except for frame-related sections
39101 1 -- DWARF .debug_frame section
39102 2 -- DWARF .eh_frame section */
39105 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
39107 /* We never use the GCC internal number for SPE high registers.
39108 Those are mapped to the 1200..1231 range for all debug formats. */
39109 if (SPE_HIGH_REGNO_P (regno
))
39110 return regno
- FIRST_SPE_HIGH_REGNO
+ 1200;
39112 /* Except for the above, we use the internal number for non-DWARF
39113 debug information, and also for .eh_frame. */
39114 if ((format
== 0 && write_symbols
!= DWARF2_DEBUG
) || format
== 2)
39117 /* On some platforms, we use the standard DWARF register
39118 numbering for .debug_info and .debug_frame. */
39119 #ifdef RS6000_USE_DWARF_NUMBERING
39122 if (regno
== LR_REGNO
)
39124 if (regno
== CTR_REGNO
)
39126 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
39127 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
39128 The actual code emitted saves the whole of CR, so we map CR2_REGNO
39129 to the DWARF reg for CR. */
39130 if (format
== 1 && regno
== CR2_REGNO
)
39132 if (CR_REGNO_P (regno
))
39133 return regno
- CR0_REGNO
+ 86;
39134 if (regno
== CA_REGNO
)
39135 return 101; /* XER */
39136 if (ALTIVEC_REGNO_P (regno
))
39137 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
39138 if (regno
== VRSAVE_REGNO
)
39140 if (regno
== VSCR_REGNO
)
39142 if (regno
== SPE_ACC_REGNO
)
39144 if (regno
== SPEFSCR_REGNO
)
39150 /* target hook eh_return_filter_mode */
39151 static scalar_int_mode
39152 rs6000_eh_return_filter_mode (void)
39154 return TARGET_32BIT
? SImode
: word_mode
;
39157 /* Target hook for scalar_mode_supported_p. */
39159 rs6000_scalar_mode_supported_p (machine_mode mode
)
39161 /* -m32 does not support TImode. This is the default, from
39162 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
39163 same ABI as for -m32. But default_scalar_mode_supported_p allows
39164 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
39165 for -mpowerpc64. */
39166 if (TARGET_32BIT
&& mode
== TImode
)
39169 if (DECIMAL_FLOAT_MODE_P (mode
))
39170 return default_decimal_float_supported_p ();
39171 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
39174 return default_scalar_mode_supported_p (mode
);
39177 /* Target hook for vector_mode_supported_p. */
39179 rs6000_vector_mode_supported_p (machine_mode mode
)
39182 if (TARGET_PAIRED_FLOAT
&& PAIRED_VECTOR_MODE (mode
))
39185 if (TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
39188 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
39189 128-bit, the compiler might try to widen IEEE 128-bit to IBM
39191 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
39198 /* Target hook for floatn_mode. */
39199 static opt_scalar_float_mode
39200 rs6000_floatn_mode (int n
, bool extended
)
39210 if (TARGET_FLOAT128_KEYWORD
)
39211 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
39213 return opt_scalar_float_mode ();
39216 return opt_scalar_float_mode ();
39219 /* Those are the only valid _FloatNx types. */
39220 gcc_unreachable ();
39234 if (TARGET_FLOAT128_KEYWORD
)
39235 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
39237 return opt_scalar_float_mode ();
39240 return opt_scalar_float_mode ();
39246 /* Target hook for c_mode_for_suffix. */
39247 static machine_mode
39248 rs6000_c_mode_for_suffix (char suffix
)
39250 if (TARGET_FLOAT128_TYPE
)
39252 if (suffix
== 'q' || suffix
== 'Q')
39253 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
39255 /* At the moment, we are not defining a suffix for IBM extended double.
39256 If/when the default for -mabi=ieeelongdouble is changed, and we want
39257 to support __ibm128 constants in legacy library code, we may need to
39258 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
39259 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
39260 __float80 constants. */
39266 /* Target hook for invalid_arg_for_unprototyped_fn. */
39267 static const char *
39268 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
39270 return (!rs6000_darwin64_abi
39272 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
39273 && (funcdecl
== NULL_TREE
39274 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
39275 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
39276 ? N_("AltiVec argument passed to unprototyped function")
39280 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
39281 setup by using __stack_chk_fail_local hidden function instead of
39282 calling __stack_chk_fail directly. Otherwise it is better to call
39283 __stack_chk_fail directly. */
39285 static tree ATTRIBUTE_UNUSED
39286 rs6000_stack_protect_fail (void)
39288 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
39289 ? default_hidden_stack_protect_fail ()
39290 : default_external_stack_protect_fail ();
39294 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
*operand ATTRIBUTE_UNUSED
,
39295 int num_operands ATTRIBUTE_UNUSED
)
39297 if (rs6000_warn_cell_microcode
)
39300 int insn_code_number
= recog_memoized (insn
);
39301 location_t location
= INSN_LOCATION (insn
);
39303 /* Punt on insns we cannot recognize. */
39304 if (insn_code_number
< 0)
39307 /* get_insn_template can modify recog_data, so save and restore it. */
39308 struct recog_data_d recog_data_save
= recog_data
;
39309 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
39310 recog_data
.operand
[i
] = copy_rtx (recog_data
.operand
[i
]);
39311 temp
= get_insn_template (insn_code_number
, insn
);
39312 recog_data
= recog_data_save
;
39314 if (get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
)
39315 warning_at (location
, OPT_mwarn_cell_microcode
,
39316 "emitting microcode insn %s\t[%s] #%d",
39317 temp
, insn_data
[INSN_CODE (insn
)].name
, INSN_UID (insn
));
39318 else if (get_attr_cell_micro (insn
) == CELL_MICRO_CONDITIONAL
)
39319 warning_at (location
, OPT_mwarn_cell_microcode
,
39320 "emitting conditional microcode insn %s\t[%s] #%d",
39321 temp
, insn_data
[INSN_CODE (insn
)].name
, INSN_UID (insn
));
39325 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
39328 static unsigned HOST_WIDE_INT
39329 rs6000_asan_shadow_offset (void)
39331 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
39335 /* Mask options that we want to support inside of attribute((target)) and
39336 #pragma GCC target operations. Note, we do not include things like
39337 64/32-bit, endianness, hard/soft floating point, etc. that would have
39338 different calling sequences. */
39340 struct rs6000_opt_mask
{
39341 const char *name
; /* option name */
39342 HOST_WIDE_INT mask
; /* mask to set */
39343 bool invert
; /* invert sense of mask */
39344 bool valid_target
; /* option is a target option */
39347 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
39349 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
39350 { "cmpb", OPTION_MASK_CMPB
, false, true },
39351 { "crypto", OPTION_MASK_CRYPTO
, false, true },
39352 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
39353 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
39354 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
39356 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, false },
39357 { "float128-type", OPTION_MASK_FLOAT128_TYPE
, false, false },
39358 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, false },
39359 { "fprnd", OPTION_MASK_FPRND
, false, true },
39360 { "hard-dfp", OPTION_MASK_DFP
, false, true },
39361 { "htm", OPTION_MASK_HTM
, false, true },
39362 { "isel", OPTION_MASK_ISEL
, false, true },
39363 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
39364 { "mfpgpr", OPTION_MASK_MFPGPR
, false, true },
39365 { "modulo", OPTION_MASK_MODULO
, false, true },
39366 { "mulhw", OPTION_MASK_MULHW
, false, true },
39367 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
39368 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
39369 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
39370 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
39371 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
39372 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
39373 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR
, false, true },
39374 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR
, false, true },
39375 { "power9-fusion", OPTION_MASK_P9_FUSION
, false, true },
39376 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
39377 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
39378 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
39379 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
39380 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
39381 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
39382 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
39383 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
39384 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
39385 { "string", OPTION_MASK_STRING
, false, true },
39386 { "toc-fusion", OPTION_MASK_TOC_FUSION
, false, true },
39387 { "update", OPTION_MASK_NO_UPDATE
, true , true },
39388 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI
, false, true },
39389 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF
, false, true },
39390 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF
, false, true },
39391 { "vsx", OPTION_MASK_VSX
, false, true },
39392 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER
, false, true },
39393 { "vsx-timode", OPTION_MASK_VSX_TIMODE
, false, true },
39394 #ifdef OPTION_MASK_64BIT
39396 { "aix64", OPTION_MASK_64BIT
, false, false },
39397 { "aix32", OPTION_MASK_64BIT
, true, false },
39399 { "64", OPTION_MASK_64BIT
, false, false },
39400 { "32", OPTION_MASK_64BIT
, true, false },
39403 #ifdef OPTION_MASK_EABI
39404 { "eabi", OPTION_MASK_EABI
, false, false },
39406 #ifdef OPTION_MASK_LITTLE_ENDIAN
39407 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
39408 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
39410 #ifdef OPTION_MASK_RELOCATABLE
39411 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
39413 #ifdef OPTION_MASK_STRICT_ALIGN
39414 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
39416 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
39417 { "string", OPTION_MASK_STRING
, false, false },
39420 /* Builtin mask mapping for printing the flags. */
39421 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
39423 { "altivec", RS6000_BTM_ALTIVEC
, false, false },
39424 { "vsx", RS6000_BTM_VSX
, false, false },
39425 { "spe", RS6000_BTM_SPE
, false, false },
39426 { "paired", RS6000_BTM_PAIRED
, false, false },
39427 { "fre", RS6000_BTM_FRE
, false, false },
39428 { "fres", RS6000_BTM_FRES
, false, false },
39429 { "frsqrte", RS6000_BTM_FRSQRTE
, false, false },
39430 { "frsqrtes", RS6000_BTM_FRSQRTES
, false, false },
39431 { "popcntd", RS6000_BTM_POPCNTD
, false, false },
39432 { "cell", RS6000_BTM_CELL
, false, false },
39433 { "power8-vector", RS6000_BTM_P8_VECTOR
, false, false },
39434 { "power9-vector", RS6000_BTM_P9_VECTOR
, false, false },
39435 { "power9-misc", RS6000_BTM_P9_MISC
, false, false },
39436 { "crypto", RS6000_BTM_CRYPTO
, false, false },
39437 { "htm", RS6000_BTM_HTM
, false, false },
39438 { "hard-dfp", RS6000_BTM_DFP
, false, false },
39439 { "hard-float", RS6000_BTM_HARD_FLOAT
, false, false },
39440 { "long-double-128", RS6000_BTM_LDBL128
, false, false },
39441 { "float128", RS6000_BTM_FLOAT128
, false, false },
39444 /* Option variables that we want to support inside attribute((target)) and
39445 #pragma GCC target operations. */
39447 struct rs6000_opt_var
{
39448 const char *name
; /* option name */
39449 size_t global_offset
; /* offset of the option in global_options. */
39450 size_t target_offset
; /* offset of the option in target options. */
39453 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
39456 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
39457 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
39458 { "avoid-indexed-addresses",
39459 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
39460 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
39462 offsetof (struct gcc_options
, x_rs6000_paired_float
),
39463 offsetof (struct cl_target_option
, x_rs6000_paired_float
), },
39465 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
39466 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
39467 { "optimize-swaps",
39468 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
39469 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
39470 { "allow-movmisalign",
39471 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
39472 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
39473 { "allow-df-permute",
39474 offsetof (struct gcc_options
, x_TARGET_ALLOW_DF_PERMUTE
),
39475 offsetof (struct cl_target_option
, x_TARGET_ALLOW_DF_PERMUTE
), },
39477 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
39478 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
39480 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
39481 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
39482 { "align-branch-targets",
39483 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
39484 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
39485 { "vectorize-builtins",
39486 offsetof (struct gcc_options
, x_TARGET_VECTORIZE_BUILTINS
),
39487 offsetof (struct cl_target_option
, x_TARGET_VECTORIZE_BUILTINS
), },
39489 offsetof (struct gcc_options
, x_tls_markers
),
39490 offsetof (struct cl_target_option
, x_tls_markers
), },
39492 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
39493 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
39495 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
39496 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
39497 { "gen-cell-microcode",
39498 offsetof (struct gcc_options
, x_rs6000_gen_cell_microcode
),
39499 offsetof (struct cl_target_option
, x_rs6000_gen_cell_microcode
), },
39500 { "warn-cell-microcode",
39501 offsetof (struct gcc_options
, x_rs6000_warn_cell_microcode
),
39502 offsetof (struct cl_target_option
, x_rs6000_warn_cell_microcode
), },
39505 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39506 parsing. Return true if there were no errors. */
39509 rs6000_inner_target_options (tree args
, bool attr_p
)
39513 if (args
== NULL_TREE
)
39516 else if (TREE_CODE (args
) == STRING_CST
)
39518 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
39521 while ((q
= strtok (p
, ",")) != NULL
)
39523 bool error_p
= false;
39524 bool not_valid_p
= false;
39525 const char *cpu_opt
= NULL
;
39528 if (strncmp (q
, "cpu=", 4) == 0)
39530 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
39531 if (cpu_index
>= 0)
39532 rs6000_cpu_index
= cpu_index
;
39539 else if (strncmp (q
, "tune=", 5) == 0)
39541 int tune_index
= rs6000_cpu_name_lookup (q
+5);
39542 if (tune_index
>= 0)
39543 rs6000_tune_index
= tune_index
;
39553 bool invert
= false;
39557 if (strncmp (r
, "no-", 3) == 0)
39563 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
39564 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
39566 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
39568 if (!rs6000_opt_masks
[i
].valid_target
)
39569 not_valid_p
= true;
39573 rs6000_isa_flags_explicit
|= mask
;
39575 /* VSX needs altivec, so -mvsx automagically sets
39576 altivec and disables -mavoid-indexed-addresses. */
39579 if (mask
== OPTION_MASK_VSX
)
39581 mask
|= OPTION_MASK_ALTIVEC
;
39582 TARGET_AVOID_XFORM
= 0;
39586 if (rs6000_opt_masks
[i
].invert
)
39590 rs6000_isa_flags
&= ~mask
;
39592 rs6000_isa_flags
|= mask
;
39597 if (error_p
&& !not_valid_p
)
39599 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
39600 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
39602 size_t j
= rs6000_opt_vars
[i
].global_offset
;
39603 *((int *) ((char *)&global_options
+ j
)) = !invert
;
39605 not_valid_p
= false;
39613 const char *eprefix
, *esuffix
;
39618 eprefix
= "__attribute__((__target__(";
39623 eprefix
= "#pragma GCC target ";
39628 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt
, eprefix
,
39630 else if (not_valid_p
)
39631 error ("%s\"%s\"%s is not allowed", eprefix
, q
, esuffix
);
39633 error ("%s\"%s\"%s is invalid", eprefix
, q
, esuffix
);
39638 else if (TREE_CODE (args
) == TREE_LIST
)
39642 tree value
= TREE_VALUE (args
);
39645 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
39649 args
= TREE_CHAIN (args
);
39651 while (args
!= NULL_TREE
);
39656 error ("attribute %<target%> argument not a string");
39663 /* Print out the target options as a list for -mdebug=target. */
39666 rs6000_debug_target_options (tree args
, const char *prefix
)
39668 if (args
== NULL_TREE
)
39669 fprintf (stderr
, "%s<NULL>", prefix
);
39671 else if (TREE_CODE (args
) == STRING_CST
)
39673 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
39676 while ((q
= strtok (p
, ",")) != NULL
)
39679 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
39684 else if (TREE_CODE (args
) == TREE_LIST
)
39688 tree value
= TREE_VALUE (args
);
39691 rs6000_debug_target_options (value
, prefix
);
39694 args
= TREE_CHAIN (args
);
39696 while (args
!= NULL_TREE
);
39700 gcc_unreachable ();
39706 /* Hook to validate attribute((target("..."))). */
39709 rs6000_valid_attribute_p (tree fndecl
,
39710 tree
ARG_UNUSED (name
),
39714 struct cl_target_option cur_target
;
39716 tree old_optimize
= build_optimization_node (&global_options
);
39717 tree new_target
, new_optimize
;
39718 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
39720 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
39722 if (TARGET_DEBUG_TARGET
)
39724 tree tname
= DECL_NAME (fndecl
);
39725 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
39727 fprintf (stderr
, "function: %.*s\n",
39728 (int) IDENTIFIER_LENGTH (tname
),
39729 IDENTIFIER_POINTER (tname
));
39731 fprintf (stderr
, "function: unknown\n");
39733 fprintf (stderr
, "args:");
39734 rs6000_debug_target_options (args
, " ");
39735 fprintf (stderr
, "\n");
39738 fprintf (stderr
, "flags: 0x%x\n", flags
);
39740 fprintf (stderr
, "--------------------\n");
39743 old_optimize
= build_optimization_node (&global_options
);
39744 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
39746 /* If the function changed the optimization levels as well as setting target
39747 options, start with the optimizations specified. */
39748 if (func_optimize
&& func_optimize
!= old_optimize
)
39749 cl_optimization_restore (&global_options
,
39750 TREE_OPTIMIZATION (func_optimize
));
39752 /* The target attributes may also change some optimization flags, so update
39753 the optimization options if necessary. */
39754 cl_target_option_save (&cur_target
, &global_options
);
39755 rs6000_cpu_index
= rs6000_tune_index
= -1;
39756 ret
= rs6000_inner_target_options (args
, true);
39758 /* Set up any additional state. */
39761 ret
= rs6000_option_override_internal (false);
39762 new_target
= build_target_option_node (&global_options
);
39767 new_optimize
= build_optimization_node (&global_options
);
39774 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
39776 if (old_optimize
!= new_optimize
)
39777 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
39780 cl_target_option_restore (&global_options
, &cur_target
);
39782 if (old_optimize
!= new_optimize
)
39783 cl_optimization_restore (&global_options
,
39784 TREE_OPTIMIZATION (old_optimize
));
39790 /* Hook to validate the current #pragma GCC target and set the state, and
39791 update the macros based on what was changed. If ARGS is NULL, then
39792 POP_TARGET is used to reset the options. */
39795 rs6000_pragma_target_parse (tree args
, tree pop_target
)
39797 tree prev_tree
= build_target_option_node (&global_options
);
39799 struct cl_target_option
*prev_opt
, *cur_opt
;
39800 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
39801 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
39803 if (TARGET_DEBUG_TARGET
)
39805 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
39806 fprintf (stderr
, "args:");
39807 rs6000_debug_target_options (args
, " ");
39808 fprintf (stderr
, "\n");
39812 fprintf (stderr
, "pop_target:\n");
39813 debug_tree (pop_target
);
39816 fprintf (stderr
, "pop_target: <NULL>\n");
39818 fprintf (stderr
, "--------------------\n");
39823 cur_tree
= ((pop_target
)
39825 : target_option_default_node
);
39826 cl_target_option_restore (&global_options
,
39827 TREE_TARGET_OPTION (cur_tree
));
39831 rs6000_cpu_index
= rs6000_tune_index
= -1;
39832 if (!rs6000_inner_target_options (args
, false)
39833 || !rs6000_option_override_internal (false)
39834 || (cur_tree
= build_target_option_node (&global_options
))
39837 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
39838 fprintf (stderr
, "invalid pragma\n");
39844 target_option_current_node
= cur_tree
;
39846 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39847 change the macros that are defined. */
39848 if (rs6000_target_modify_macros_ptr
)
39850 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
39851 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
39852 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
39854 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
39855 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
39856 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
39858 diff_bumask
= (prev_bumask
^ cur_bumask
);
39859 diff_flags
= (prev_flags
^ cur_flags
);
39861 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
39863 /* Delete old macros. */
39864 rs6000_target_modify_macros_ptr (false,
39865 prev_flags
& diff_flags
,
39866 prev_bumask
& diff_bumask
);
39868 /* Define new macros. */
39869 rs6000_target_modify_macros_ptr (true,
39870 cur_flags
& diff_flags
,
39871 cur_bumask
& diff_bumask
);
39879 /* Remember the last target of rs6000_set_current_function. */
39880 static GTY(()) tree rs6000_previous_fndecl
;
39882 /* Establish appropriate back-end context for processing the function
39883 FNDECL. The argument might be NULL to indicate processing at top
39884 level, outside of any function scope. */
39886 rs6000_set_current_function (tree fndecl
)
39888 tree old_tree
= (rs6000_previous_fndecl
39889 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
)
39892 tree new_tree
= (fndecl
39893 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
39896 if (TARGET_DEBUG_TARGET
)
39898 bool print_final
= false;
39899 fprintf (stderr
, "\n==================== rs6000_set_current_function");
39902 fprintf (stderr
, ", fndecl %s (%p)",
39903 (DECL_NAME (fndecl
)
39904 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
39905 : "<unknown>"), (void *)fndecl
);
39907 if (rs6000_previous_fndecl
)
39908 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
39910 fprintf (stderr
, "\n");
39913 fprintf (stderr
, "\nnew fndecl target specific options:\n");
39914 debug_tree (new_tree
);
39915 print_final
= true;
39920 fprintf (stderr
, "\nold fndecl target specific options:\n");
39921 debug_tree (old_tree
);
39922 print_final
= true;
39926 fprintf (stderr
, "--------------------\n");
39929 /* Only change the context if the function changes. This hook is called
39930 several times in the course of compiling a function, and we don't want to
39931 slow things down too much or call target_reinit when it isn't safe. */
39932 if (fndecl
&& fndecl
!= rs6000_previous_fndecl
)
39934 rs6000_previous_fndecl
= fndecl
;
39935 if (old_tree
== new_tree
)
39938 else if (new_tree
&& new_tree
!= target_option_default_node
)
39940 cl_target_option_restore (&global_options
,
39941 TREE_TARGET_OPTION (new_tree
));
39942 if (TREE_TARGET_GLOBALS (new_tree
))
39943 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
39945 TREE_TARGET_GLOBALS (new_tree
)
39946 = save_target_globals_default_opts ();
39949 else if (old_tree
&& old_tree
!= target_option_default_node
)
39951 new_tree
= target_option_current_node
;
39952 cl_target_option_restore (&global_options
,
39953 TREE_TARGET_OPTION (new_tree
));
39954 if (TREE_TARGET_GLOBALS (new_tree
))
39955 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
39956 else if (new_tree
== target_option_default_node
)
39957 restore_target_globals (&default_target_globals
);
39959 TREE_TARGET_GLOBALS (new_tree
)
39960 = save_target_globals_default_opts ();
39966 /* Save the current options */
39969 rs6000_function_specific_save (struct cl_target_option
*ptr
,
39970 struct gcc_options
*opts
)
39972 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
39973 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
39976 /* Restore the current options */
39979 rs6000_function_specific_restore (struct gcc_options
*opts
,
39980 struct cl_target_option
*ptr
)
39983 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
39984 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
39985 (void) rs6000_option_override_internal (false);
39988 /* Print the current options */
39991 rs6000_function_specific_print (FILE *file
, int indent
,
39992 struct cl_target_option
*ptr
)
39994 rs6000_print_isa_options (file
, indent
, "Isa options set",
39995 ptr
->x_rs6000_isa_flags
);
39997 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
39998 ptr
->x_rs6000_isa_flags_explicit
);
40001 /* Helper function to print the current isa or misc options on a line. */
40004 rs6000_print_options_internal (FILE *file
,
40006 const char *string
,
40007 HOST_WIDE_INT flags
,
40008 const char *prefix
,
40009 const struct rs6000_opt_mask
*opts
,
40010 size_t num_elements
)
40013 size_t start_column
= 0;
40015 size_t max_column
= 120;
40016 size_t prefix_len
= strlen (prefix
);
40017 size_t comma_len
= 0;
40018 const char *comma
= "";
40021 start_column
+= fprintf (file
, "%*s", indent
, "");
40025 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
40029 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
40031 /* Print the various mask options. */
40032 cur_column
= start_column
;
40033 for (i
= 0; i
< num_elements
; i
++)
40035 bool invert
= opts
[i
].invert
;
40036 const char *name
= opts
[i
].name
;
40037 const char *no_str
= "";
40038 HOST_WIDE_INT mask
= opts
[i
].mask
;
40039 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
40043 if ((flags
& mask
) == 0)
40046 len
+= sizeof ("no-") - 1;
40054 if ((flags
& mask
) != 0)
40057 len
+= sizeof ("no-") - 1;
40064 if (cur_column
> max_column
)
40066 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
40067 cur_column
= start_column
+ len
;
40071 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
40073 comma_len
= sizeof (", ") - 1;
40076 fputs ("\n", file
);
40079 /* Helper function to print the current isa options on a line. */
40082 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
40083 HOST_WIDE_INT flags
)
40085 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
40086 &rs6000_opt_masks
[0],
40087 ARRAY_SIZE (rs6000_opt_masks
));
40091 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
40092 HOST_WIDE_INT flags
)
40094 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
40095 &rs6000_builtin_mask_names
[0],
40096 ARRAY_SIZE (rs6000_builtin_mask_names
));
40099 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
40100 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
40101 -mvsx-timode, -mupper-regs-df).
40103 If the user used -mno-power8-vector, we need to turn off all of the implicit
40104 ISA 2.07 and 3.0 options that relate to the vector unit.
40106 If the user used -mno-power9-vector, we need to turn off all of the implicit
40107 ISA 3.0 options that relate to the vector unit.
40109 This function does not handle explicit options such as the user specifying
40110 -mdirect-move. These are handled in rs6000_option_override_internal, and
40111 the appropriate error is given if needed.
40113 We return a mask of all of the implicit options that should not be enabled
40116 static HOST_WIDE_INT
40117 rs6000_disable_incompatible_switches (void)
40119 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
40122 static const struct {
40123 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
40124 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
40125 const char *const name
; /* name of the switch. */
40127 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
40128 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
40129 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
40132 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
40134 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
40136 if ((rs6000_isa_flags
& no_flag
) == 0
40137 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
40139 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
40140 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
40146 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
40147 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
40149 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
40150 error ("-mno-%s turns off -m%s",
40152 rs6000_opt_masks
[j
].name
);
40155 gcc_assert (!set_flags
);
40158 rs6000_isa_flags
&= ~dep_flags
;
40159 ignore_masks
|= no_flag
| dep_flags
;
40163 if (!TARGET_P9_VECTOR
40164 && (rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) != 0
40165 && TARGET_P9_DFORM_BOTH
> 0)
40167 error ("-mno-power9-vector turns off -mpower9-dform");
40168 TARGET_P9_DFORM_BOTH
= 0;
40171 return ignore_masks
;
40175 /* Hook to determine if one function can safely inline another. */
40178 rs6000_can_inline_p (tree caller
, tree callee
)
40181 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
40182 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
40184 /* If callee has no option attributes, then it is ok to inline. */
40188 /* If caller has no option attributes, but callee does then it is not ok to
40190 else if (!caller_tree
)
40195 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
40196 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
40198 /* Callee's options should a subset of the caller's, i.e. a vsx function
40199 can inline an altivec function but a non-vsx function can't inline a
40201 if ((caller_opts
->x_rs6000_isa_flags
& callee_opts
->x_rs6000_isa_flags
)
40202 == callee_opts
->x_rs6000_isa_flags
)
40206 if (TARGET_DEBUG_TARGET
)
40207 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
40208 (DECL_NAME (caller
)
40209 ? IDENTIFIER_POINTER (DECL_NAME (caller
))
40211 (DECL_NAME (callee
)
40212 ? IDENTIFIER_POINTER (DECL_NAME (callee
))
40214 (ret
? "can" : "cannot"));
40219 /* Allocate a stack temp and fixup the address so it meets the particular
40220 memory requirements (either offetable or REG+REG addressing). */
40223 rs6000_allocate_stack_temp (machine_mode mode
,
40224 bool offsettable_p
,
40227 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
40228 rtx addr
= XEXP (stack
, 0);
40229 int strict_p
= (reload_in_progress
|| reload_completed
);
40231 if (!legitimate_indirect_address_p (addr
, strict_p
))
40234 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
40235 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
40237 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
40238 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
40244 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
40245 to such a form to deal with memory reference instructions like STFIWX that
40246 only take reg+reg addressing. */
40249 rs6000_address_for_fpconvert (rtx x
)
40251 int strict_p
= (reload_in_progress
|| reload_completed
);
40254 gcc_assert (MEM_P (x
));
40255 addr
= XEXP (x
, 0);
40256 if (! legitimate_indirect_address_p (addr
, strict_p
)
40257 && ! legitimate_indexed_address_p (addr
, strict_p
))
40259 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
40261 rtx reg
= XEXP (addr
, 0);
40262 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
40263 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
40264 gcc_assert (REG_P (reg
));
40265 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
40268 else if (GET_CODE (addr
) == PRE_MODIFY
)
40270 rtx reg
= XEXP (addr
, 0);
40271 rtx expr
= XEXP (addr
, 1);
40272 gcc_assert (REG_P (reg
));
40273 gcc_assert (GET_CODE (expr
) == PLUS
);
40274 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
40278 x
= replace_equiv_address (x
, copy_addr_to_reg (addr
));
40284 /* Given a memory reference, if it is not in the form for altivec memory
40285 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
40286 convert to the altivec format. */
40289 rs6000_address_for_altivec (rtx x
)
40291 gcc_assert (MEM_P (x
));
40292 if (!altivec_indexed_or_indirect_operand (x
, GET_MODE (x
)))
40294 rtx addr
= XEXP (x
, 0);
40295 int strict_p
= (reload_in_progress
|| reload_completed
);
40297 if (!legitimate_indexed_address_p (addr
, strict_p
)
40298 && !legitimate_indirect_address_p (addr
, strict_p
))
40299 addr
= copy_to_mode_reg (Pmode
, addr
);
40301 addr
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
40302 x
= change_address (x
, GET_MODE (x
), addr
);
40308 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
40310 On the RS/6000, all integer constants are acceptable, most won't be valid
40311 for particular insns, though. Only easy FP constants are acceptable. */
40314 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
40316 if (TARGET_ELF
&& tls_referenced_p (x
))
40319 return ((GET_CODE (x
) != CONST_DOUBLE
&& GET_CODE (x
) != CONST_VECTOR
)
40320 || GET_MODE (x
) == VOIDmode
40321 || (TARGET_POWERPC64
&& mode
== DImode
)
40322 || easy_fp_constant (x
, mode
)
40323 || easy_vector_constant (x
, mode
));
40327 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
40330 chain_already_loaded (rtx_insn
*last
)
40332 for (; last
!= NULL
; last
= PREV_INSN (last
))
40334 if (NONJUMP_INSN_P (last
))
40336 rtx patt
= PATTERN (last
);
40338 if (GET_CODE (patt
) == SET
)
40340 rtx lhs
= XEXP (patt
, 0);
40342 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
40350 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
40353 rs6000_call_aix (rtx value
, rtx func_desc
, rtx flag
, rtx cookie
)
40355 const bool direct_call_p
40356 = GET_CODE (func_desc
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (func_desc
);
40357 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
40358 rtx toc_load
= NULL_RTX
;
40359 rtx toc_restore
= NULL_RTX
;
40361 rtx abi_reg
= NULL_RTX
;
40366 /* Handle longcall attributes. */
40367 if (INTVAL (cookie
) & CALL_LONG
)
40368 func_desc
= rs6000_longcall_ref (func_desc
);
40370 /* Handle indirect calls. */
40371 if (GET_CODE (func_desc
) != SYMBOL_REF
40372 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func_desc
)))
40374 /* Save the TOC into its reserved slot before the call,
40375 and prepare to restore it after the call. */
40376 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
40377 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
40378 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
40379 gen_rtx_PLUS (Pmode
, stack_ptr
,
40380 stack_toc_offset
));
40381 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
40382 gen_rtvec (1, stack_toc_offset
),
40384 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
40386 /* Can we optimize saving the TOC in the prologue or
40387 do we need to do it at every call? */
40388 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
40389 cfun
->machine
->save_toc_in_prologue
= true;
40392 MEM_VOLATILE_P (stack_toc_mem
) = 1;
40393 emit_move_insn (stack_toc_mem
, toc_reg
);
40396 if (DEFAULT_ABI
== ABI_ELFv2
)
40398 /* A function pointer in the ELFv2 ABI is just a plain address, but
40399 the ABI requires it to be loaded into r12 before the call. */
40400 func_addr
= gen_rtx_REG (Pmode
, 12);
40401 emit_move_insn (func_addr
, func_desc
);
40402 abi_reg
= func_addr
;
40406 /* A function pointer under AIX is a pointer to a data area whose
40407 first word contains the actual address of the function, whose
40408 second word contains a pointer to its TOC, and whose third word
40409 contains a value to place in the static chain register (r11).
40410 Note that if we load the static chain, our "trampoline" need
40411 not have any executable code. */
40413 /* Load up address of the actual function. */
40414 func_desc
= force_reg (Pmode
, func_desc
);
40415 func_addr
= gen_reg_rtx (Pmode
);
40416 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func_desc
));
40418 /* Prepare to load the TOC of the called function. Note that the
40419 TOC load must happen immediately before the actual call so
40420 that unwinding the TOC registers works correctly. See the
40421 comment in frob_update_context. */
40422 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
40423 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
40424 gen_rtx_PLUS (Pmode
, func_desc
,
40426 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
40428 /* If we have a static chain, load it up. But, if the call was
40429 originally direct, the 3rd word has not been written since no
40430 trampoline has been built, so we ought not to load it, lest we
40431 override a static chain value. */
40433 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
40434 && !chain_already_loaded (get_current_sequence ()->next
->last
))
40436 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
40437 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
40438 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
40439 gen_rtx_PLUS (Pmode
, func_desc
,
40441 emit_move_insn (sc_reg
, func_sc_mem
);
40448 /* Direct calls use the TOC: for local calls, the callee will
40449 assume the TOC register is set; for non-local calls, the
40450 PLT stub needs the TOC register. */
40452 func_addr
= func_desc
;
40455 /* Create the call. */
40456 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), flag
);
40457 if (value
!= NULL_RTX
)
40458 call
[0] = gen_rtx_SET (value
, call
[0]);
40462 call
[n_call
++] = toc_load
;
40464 call
[n_call
++] = toc_restore
;
40466 call
[n_call
++] = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
40468 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
40469 insn
= emit_call_insn (insn
);
40471 /* Mention all registers defined by the ABI to hold information
40472 as uses in CALL_INSN_FUNCTION_USAGE. */
40474 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
40477 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
40480 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx flag
, rtx cookie
)
40485 gcc_assert (INTVAL (cookie
) == 0);
40487 /* Create the call. */
40488 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_desc
), flag
);
40489 if (value
!= NULL_RTX
)
40490 call
[0] = gen_rtx_SET (value
, call
[0]);
40492 call
[1] = simple_return_rtx
;
40494 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
40495 insn
= emit_call_insn (insn
);
40497 /* Note use of the TOC register. */
40498 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, TOC_REGNUM
));
40501 /* Return whether we need to always update the saved TOC pointer when we update
40502 the stack pointer. */
40505 rs6000_save_toc_in_prologue_p (void)
40507 return (cfun
&& cfun
->machine
&& cfun
->machine
->save_toc_in_prologue
);
40510 #ifdef HAVE_GAS_HIDDEN
40511 # define USE_HIDDEN_LINKONCE 1
40513 # define USE_HIDDEN_LINKONCE 0
40516 /* Fills in the label name that should be used for a 476 link stack thunk. */
40519 get_ppc476_thunk_name (char name
[32])
40521 gcc_assert (TARGET_LINK_STACK
);
40523 if (USE_HIDDEN_LINKONCE
)
40524 sprintf (name
, "__ppc476.get_thunk");
40526 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
40529 /* This function emits the simple thunk routine that is used to preserve
40530 the link stack on the 476 cpu. */
40532 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
40534 rs6000_code_end (void)
40539 if (!TARGET_LINK_STACK
)
40542 get_ppc476_thunk_name (name
);
40544 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
40545 build_function_type_list (void_type_node
, NULL_TREE
));
40546 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
40547 NULL_TREE
, void_type_node
);
40548 TREE_PUBLIC (decl
) = 1;
40549 TREE_STATIC (decl
) = 1;
40552 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
40554 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
40555 targetm
.asm_out
.unique_section (decl
, 0);
40556 switch_to_section (get_named_section (decl
, NULL
, 0));
40557 DECL_WEAK (decl
) = 1;
40558 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
40559 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
40560 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
40561 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
40566 switch_to_section (text_section
);
40567 ASM_OUTPUT_LABEL (asm_out_file
, name
);
40570 DECL_INITIAL (decl
) = make_node (BLOCK
);
40571 current_function_decl
= decl
;
40572 allocate_struct_function (decl
, false);
40573 init_function_start (decl
);
40574 first_function_block_is_cold
= false;
40575 /* Make sure unwind info is emitted for the thunk if needed. */
40576 final_start_function (emit_barrier (), asm_out_file
, 1);
40578 fputs ("\tblr\n", asm_out_file
);
40580 final_end_function ();
40581 init_insn_lengths ();
40582 free_after_compilation (cfun
);
40584 current_function_decl
= NULL
;
40587 /* Add r30 to hard reg set if the prologue sets it up and it is not
40588 pic_offset_table_rtx. */
40591 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
40593 if (!TARGET_SINGLE_PIC_BASE
40595 && TARGET_MINIMAL_TOC
40596 && !constant_pool_empty_p ())
40597 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
40598 if (cfun
->machine
->split_stack_argp_used
)
40599 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
40603 /* Helper function for rs6000_split_logical to emit a logical instruction after
40604 spliting the operation to single GPR registers.
40606 DEST is the destination register.
40607 OP1 and OP2 are the input source registers.
40608 CODE is the base operation (AND, IOR, XOR, NOT).
40609 MODE is the machine mode.
40610 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40611 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40612 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40615 rs6000_split_logical_inner (rtx dest
,
40618 enum rtx_code code
,
40620 bool complement_final_p
,
40621 bool complement_op1_p
,
40622 bool complement_op2_p
)
40626 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
40627 if (op2
&& GET_CODE (op2
) == CONST_INT
40628 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
40629 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
40631 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
40632 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
40634 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
40639 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
40643 else if (value
== mask
)
40645 if (!rtx_equal_p (dest
, op1
))
40646 emit_insn (gen_rtx_SET (dest
, op1
));
40651 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
40652 into separate ORI/ORIS or XORI/XORIS instrucitons. */
40653 else if (code
== IOR
|| code
== XOR
)
40657 if (!rtx_equal_p (dest
, op1
))
40658 emit_insn (gen_rtx_SET (dest
, op1
));
40664 if (code
== AND
&& mode
== SImode
40665 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
40667 emit_insn (gen_andsi3 (dest
, op1
, op2
));
40671 if (complement_op1_p
)
40672 op1
= gen_rtx_NOT (mode
, op1
);
40674 if (complement_op2_p
)
40675 op2
= gen_rtx_NOT (mode
, op2
);
40677 /* For canonical RTL, if only one arm is inverted it is the first. */
40678 if (!complement_op1_p
&& complement_op2_p
)
40679 std::swap (op1
, op2
);
40681 bool_rtx
= ((code
== NOT
)
40682 ? gen_rtx_NOT (mode
, op1
)
40683 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
40685 if (complement_final_p
)
40686 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
40688 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
40691 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
40692 operations are split immediately during RTL generation to allow for more
40693 optimizations of the AND/IOR/XOR.
40695 OPERANDS is an array containing the destination and two input operands.
40696 CODE is the base operation (AND, IOR, XOR, NOT).
40697 MODE is the machine mode.
40698 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40699 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40700 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40701 CLOBBER_REG is either NULL or a scratch register of type CC to allow
40702 formation of the AND instructions. */
40705 rs6000_split_logical_di (rtx operands
[3],
40706 enum rtx_code code
,
40707 bool complement_final_p
,
40708 bool complement_op1_p
,
40709 bool complement_op2_p
)
40711 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
40712 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
40713 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
40714 enum hi_lo
{ hi
= 0, lo
= 1 };
40715 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
40718 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
40719 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
40720 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
40721 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
40724 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
40727 if (GET_CODE (operands
[2]) != CONST_INT
)
40729 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
40730 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
40734 HOST_WIDE_INT value
= INTVAL (operands
[2]);
40735 HOST_WIDE_INT value_hi_lo
[2];
40737 gcc_assert (!complement_final_p
);
40738 gcc_assert (!complement_op1_p
);
40739 gcc_assert (!complement_op2_p
);
40741 value_hi_lo
[hi
] = value
>> 32;
40742 value_hi_lo
[lo
] = value
& lower_32bits
;
40744 for (i
= 0; i
< 2; i
++)
40746 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
40748 if (sub_value
& sign_bit
)
40749 sub_value
|= upper_32bits
;
40751 op2_hi_lo
[i
] = GEN_INT (sub_value
);
40753 /* If this is an AND instruction, check to see if we need to load
40754 the value in a register. */
40755 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
40756 && !and_operand (op2_hi_lo
[i
], SImode
))
40757 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
40762 for (i
= 0; i
< 2; i
++)
40764 /* Split large IOR/XOR operations. */
40765 if ((code
== IOR
|| code
== XOR
)
40766 && GET_CODE (op2_hi_lo
[i
]) == CONST_INT
40767 && !complement_final_p
40768 && !complement_op1_p
40769 && !complement_op2_p
40770 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
40772 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
40773 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
40774 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
40775 rtx tmp
= gen_reg_rtx (SImode
);
40777 /* Make sure the constant is sign extended. */
40778 if ((hi_16bits
& sign_bit
) != 0)
40779 hi_16bits
|= upper_32bits
;
40781 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
40782 code
, SImode
, false, false, false);
40784 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
40785 code
, SImode
, false, false, false);
40788 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
40789 code
, SImode
, complement_final_p
,
40790 complement_op1_p
, complement_op2_p
);
40796 /* Split the insns that make up boolean operations operating on multiple GPR
40797 registers. The boolean MD patterns ensure that the inputs either are
40798 exactly the same as the output registers, or there is no overlap.
40800 OPERANDS is an array containing the destination and two input operands.
40801 CODE is the base operation (AND, IOR, XOR, NOT).
40802 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40803 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40804 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40807 rs6000_split_logical (rtx operands
[3],
40808 enum rtx_code code
,
40809 bool complement_final_p
,
40810 bool complement_op1_p
,
40811 bool complement_op2_p
)
40813 machine_mode mode
= GET_MODE (operands
[0]);
40814 machine_mode sub_mode
;
40816 int sub_size
, regno0
, regno1
, nregs
, i
;
40818 /* If this is DImode, use the specialized version that can run before
40819 register allocation. */
40820 if (mode
== DImode
&& !TARGET_POWERPC64
)
40822 rs6000_split_logical_di (operands
, code
, complement_final_p
,
40823 complement_op1_p
, complement_op2_p
);
40829 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
40830 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
40831 sub_size
= GET_MODE_SIZE (sub_mode
);
40832 regno0
= REGNO (op0
);
40833 regno1
= REGNO (op1
);
40835 gcc_assert (reload_completed
);
40836 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40837 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40839 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
40840 gcc_assert (nregs
> 1);
40842 if (op2
&& REG_P (op2
))
40843 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40845 for (i
= 0; i
< nregs
; i
++)
40847 int offset
= i
* sub_size
;
40848 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
40849 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
40850 rtx sub_op2
= ((code
== NOT
)
40852 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
40854 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
40855 complement_final_p
, complement_op1_p
,
40863 /* Return true if the peephole2 can combine a load involving a combination of
40864 an addis instruction and a load with an offset that can be fused together on
40868 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
40869 rtx addis_value
, /* addis value. */
40870 rtx target
, /* target register that is loaded. */
40871 rtx mem
) /* bottom part of the memory addr. */
40876 /* Validate arguments. */
40877 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
40880 if (!base_reg_operand (target
, GET_MODE (target
)))
40883 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
40886 /* Allow sign/zero extension. */
40887 if (GET_CODE (mem
) == ZERO_EXTEND
40888 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
40889 mem
= XEXP (mem
, 0);
40894 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
40897 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
40898 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
40901 /* Validate that the register used to load the high value is either the
40902 register being loaded, or we can safely replace its use.
40904 This function is only called from the peephole2 pass and we assume that
40905 there are 2 instructions in the peephole (addis and load), so we want to
40906 check if the target register was not used in the memory address and the
40907 register to hold the addis result is dead after the peephole. */
40908 if (REGNO (addis_reg
) != REGNO (target
))
40910 if (reg_mentioned_p (target
, mem
))
40913 if (!peep2_reg_dead_p (2, addis_reg
))
40916 /* If the target register being loaded is the stack pointer, we must
40917 avoid loading any other value into it, even temporarily. */
40918 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
40922 base_reg
= XEXP (addr
, 0);
40923 return REGNO (addis_reg
) == REGNO (base_reg
);
40926 /* During the peephole2 pass, adjust and expand the insns for a load fusion
40927 sequence. We adjust the addis register to use the target register. If the
40928 load sign extends, we adjust the code to do the zero extending load, and an
40929 explicit sign extension later since the fusion only covers zero extending
40933 operands[0] register set with addis (to be replaced with target)
40934 operands[1] value set via addis
40935 operands[2] target register being loaded
40936 operands[3] D-form memory reference using operands[0]. */
40939 expand_fusion_gpr_load (rtx
*operands
)
40941 rtx addis_value
= operands
[1];
40942 rtx target
= operands
[2];
40943 rtx orig_mem
= operands
[3];
40944 rtx new_addr
, new_mem
, orig_addr
, offset
;
40945 enum rtx_code plus_or_lo_sum
;
40946 machine_mode target_mode
= GET_MODE (target
);
40947 machine_mode extend_mode
= target_mode
;
40948 machine_mode ptr_mode
= Pmode
;
40949 enum rtx_code extend
= UNKNOWN
;
40951 if (GET_CODE (orig_mem
) == ZERO_EXTEND
40952 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
40954 extend
= GET_CODE (orig_mem
);
40955 orig_mem
= XEXP (orig_mem
, 0);
40956 target_mode
= GET_MODE (orig_mem
);
40959 gcc_assert (MEM_P (orig_mem
));
40961 orig_addr
= XEXP (orig_mem
, 0);
40962 plus_or_lo_sum
= GET_CODE (orig_addr
);
40963 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
40965 offset
= XEXP (orig_addr
, 1);
40966 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
40967 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
40969 if (extend
!= UNKNOWN
)
40970 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
40972 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
40973 UNSPEC_FUSION_GPR
);
40974 emit_insn (gen_rtx_SET (target
, new_mem
));
40976 if (extend
== SIGN_EXTEND
)
40978 int sub_off
= ((BYTES_BIG_ENDIAN
)
40979 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
40982 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
40984 emit_insn (gen_rtx_SET (target
,
40985 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
40991 /* Emit the addis instruction that will be part of a fused instruction
40995 emit_fusion_addis (rtx target
, rtx addis_value
, const char *comment
,
40996 const char *mode_name
)
40999 char insn_template
[80];
41000 const char *addis_str
= NULL
;
41001 const char *comment_str
= ASM_COMMENT_START
;
41003 if (*comment_str
== ' ')
41006 /* Emit the addis instruction. */
41007 fuse_ops
[0] = target
;
41008 if (satisfies_constraint_L (addis_value
))
41010 fuse_ops
[1] = addis_value
;
41011 addis_str
= "lis %0,%v1";
41014 else if (GET_CODE (addis_value
) == PLUS
)
41016 rtx op0
= XEXP (addis_value
, 0);
41017 rtx op1
= XEXP (addis_value
, 1);
41019 if (REG_P (op0
) && CONST_INT_P (op1
)
41020 && satisfies_constraint_L (op1
))
41024 addis_str
= "addis %0,%1,%v2";
41028 else if (GET_CODE (addis_value
) == HIGH
)
41030 rtx value
= XEXP (addis_value
, 0);
41031 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
41033 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
41034 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
41036 addis_str
= "addis %0,%2,%1@toc@ha";
41038 else if (TARGET_XCOFF
)
41039 addis_str
= "addis %0,%1@u(%2)";
41042 gcc_unreachable ();
41045 else if (GET_CODE (value
) == PLUS
)
41047 rtx op0
= XEXP (value
, 0);
41048 rtx op1
= XEXP (value
, 1);
41050 if (GET_CODE (op0
) == UNSPEC
41051 && XINT (op0
, 1) == UNSPEC_TOCREL
41052 && CONST_INT_P (op1
))
41054 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
41055 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
41058 addis_str
= "addis %0,%2,%1+%3@toc@ha";
41060 else if (TARGET_XCOFF
)
41061 addis_str
= "addis %0,%1+%3@u(%2)";
41064 gcc_unreachable ();
41068 else if (satisfies_constraint_L (value
))
41070 fuse_ops
[1] = value
;
41071 addis_str
= "lis %0,%v1";
41074 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
41076 fuse_ops
[1] = value
;
41077 addis_str
= "lis %0,%1@ha";
41082 fatal_insn ("Could not generate addis value for fusion", addis_value
);
41084 sprintf (insn_template
, "%s\t\t%s %s, type %s", addis_str
, comment_str
,
41085 comment
, mode_name
);
41086 output_asm_insn (insn_template
, fuse_ops
);
41089 /* Emit a D-form load or store instruction that is the second instruction
41090 of a fusion sequence. */
41093 emit_fusion_load_store (rtx load_store_reg
, rtx addis_reg
, rtx offset
,
41094 const char *insn_str
)
41097 char insn_template
[80];
41099 fuse_ops
[0] = load_store_reg
;
41100 fuse_ops
[1] = addis_reg
;
41102 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
41104 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
41105 fuse_ops
[2] = offset
;
41106 output_asm_insn (insn_template
, fuse_ops
);
41109 else if (GET_CODE (offset
) == UNSPEC
41110 && XINT (offset
, 1) == UNSPEC_TOCREL
)
41113 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
41115 else if (TARGET_XCOFF
)
41116 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
41119 gcc_unreachable ();
41121 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
41122 output_asm_insn (insn_template
, fuse_ops
);
41125 else if (GET_CODE (offset
) == PLUS
41126 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
41127 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
41128 && CONST_INT_P (XEXP (offset
, 1)))
41130 rtx tocrel_unspec
= XEXP (offset
, 0);
41132 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
41134 else if (TARGET_XCOFF
)
41135 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
41138 gcc_unreachable ();
41140 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
41141 fuse_ops
[3] = XEXP (offset
, 1);
41142 output_asm_insn (insn_template
, fuse_ops
);
41145 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
41147 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
41149 fuse_ops
[2] = offset
;
41150 output_asm_insn (insn_template
, fuse_ops
);
41154 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
41159 /* Wrap a TOC address that can be fused to indicate that special fusion
41160 processing is needed. */
41163 fusion_wrap_memory_address (rtx old_mem
)
41165 rtx old_addr
= XEXP (old_mem
, 0);
41166 rtvec v
= gen_rtvec (1, old_addr
);
41167 rtx new_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_FUSION_ADDIS
);
41168 return replace_equiv_address_nv (old_mem
, new_addr
, false);
41171 /* Given an address, convert it into the addis and load offset parts. Addresses
41172 created during the peephole2 process look like:
41173 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
41174 (unspec [(...)] UNSPEC_TOCREL))
41176 Addresses created via toc fusion look like:
41177 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
41180 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
41184 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_FUSION_ADDIS
)
41186 lo
= XVECEXP (addr
, 0, 0);
41187 hi
= gen_rtx_HIGH (Pmode
, lo
);
41189 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
41191 hi
= XEXP (addr
, 0);
41192 lo
= XEXP (addr
, 1);
41195 gcc_unreachable ();
41201 /* Return a string to fuse an addis instruction with a gpr load to the same
41202 register that we loaded up the addis instruction. The address that is used
41203 is the logical address that was formed during peephole2:
41204 (lo_sum (high) (low-part))
41206 Or the address is the TOC address that is wrapped before register allocation:
41207 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
41209 The code is complicated, so we call output_asm_insn directly, and just
41213 emit_fusion_gpr_load (rtx target
, rtx mem
)
41218 const char *load_str
= NULL
;
41219 const char *mode_name
= NULL
;
41222 if (GET_CODE (mem
) == ZERO_EXTEND
)
41223 mem
= XEXP (mem
, 0);
41225 gcc_assert (REG_P (target
) && MEM_P (mem
));
41227 addr
= XEXP (mem
, 0);
41228 fusion_split_address (addr
, &addis_value
, &load_offset
);
41230 /* Now emit the load instruction to the same register. */
41231 mode
= GET_MODE (mem
);
41235 mode_name
= "char";
41240 mode_name
= "short";
41246 mode_name
= (mode
== SFmode
) ? "float" : "int";
41252 gcc_assert (TARGET_POWERPC64
);
41253 mode_name
= (mode
== DFmode
) ? "double" : "long";
41258 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
41261 /* Emit the addis instruction. */
41262 emit_fusion_addis (target
, addis_value
, "gpr load fusion", mode_name
);
41264 /* Emit the D-form load instruction. */
41265 emit_fusion_load_store (target
, target
, load_offset
, load_str
);
41271 /* Return true if the peephole2 can combine a load/store involving a
41272 combination of an addis instruction and the memory operation. This was
41273 added to the ISA 3.0 (power9) hardware. */
41276 fusion_p9_p (rtx addis_reg
, /* register set via addis. */
41277 rtx addis_value
, /* addis value. */
41278 rtx dest
, /* destination (memory or register). */
41279 rtx src
) /* source (register or memory). */
41281 rtx addr
, mem
, offset
;
41282 machine_mode mode
= GET_MODE (src
);
41284 /* Validate arguments. */
41285 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
41288 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
41291 /* Ignore extend operations that are part of the load. */
41292 if (GET_CODE (src
) == FLOAT_EXTEND
|| GET_CODE (src
) == ZERO_EXTEND
)
41293 src
= XEXP (src
, 0);
41295 /* Test for memory<-register or register<-memory. */
41296 if (fpr_reg_operand (src
, mode
) || int_reg_operand (src
, mode
))
41304 else if (MEM_P (src
))
41306 if (!fpr_reg_operand (dest
, mode
) && !int_reg_operand (dest
, mode
))
41315 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
41316 if (GET_CODE (addr
) == PLUS
)
41318 if (!rtx_equal_p (addis_reg
, XEXP (addr
, 0)))
41321 return satisfies_constraint_I (XEXP (addr
, 1));
41324 else if (GET_CODE (addr
) == LO_SUM
)
41326 if (!rtx_equal_p (addis_reg
, XEXP (addr
, 0)))
41329 offset
= XEXP (addr
, 1);
41330 if (TARGET_XCOFF
|| (TARGET_ELF
&& TARGET_POWERPC64
))
41331 return small_toc_ref (offset
, GET_MODE (offset
));
41333 else if (TARGET_ELF
&& !TARGET_POWERPC64
)
41334 return CONSTANT_P (offset
);
41340 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41344 operands[0] register set with addis
41345 operands[1] value set via addis
41346 operands[2] target register being loaded
41347 operands[3] D-form memory reference using operands[0].
41349 This is similar to the fusion introduced with power8, except it scales to
41350 both loads/stores and does not require the result register to be the same as
41351 the base register. At the moment, we only do this if register set with addis
41355 expand_fusion_p9_load (rtx
*operands
)
41357 rtx tmp_reg
= operands
[0];
41358 rtx addis_value
= operands
[1];
41359 rtx target
= operands
[2];
41360 rtx orig_mem
= operands
[3];
41361 rtx new_addr
, new_mem
, orig_addr
, offset
, set
, clobber
, insn
;
41362 enum rtx_code plus_or_lo_sum
;
41363 machine_mode target_mode
= GET_MODE (target
);
41364 machine_mode extend_mode
= target_mode
;
41365 machine_mode ptr_mode
= Pmode
;
41366 enum rtx_code extend
= UNKNOWN
;
41368 if (GET_CODE (orig_mem
) == FLOAT_EXTEND
|| GET_CODE (orig_mem
) == ZERO_EXTEND
)
41370 extend
= GET_CODE (orig_mem
);
41371 orig_mem
= XEXP (orig_mem
, 0);
41372 target_mode
= GET_MODE (orig_mem
);
41375 gcc_assert (MEM_P (orig_mem
));
41377 orig_addr
= XEXP (orig_mem
, 0);
41378 plus_or_lo_sum
= GET_CODE (orig_addr
);
41379 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
41381 offset
= XEXP (orig_addr
, 1);
41382 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
41383 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
41385 if (extend
!= UNKNOWN
)
41386 new_mem
= gen_rtx_fmt_e (extend
, extend_mode
, new_mem
);
41388 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
41391 set
= gen_rtx_SET (target
, new_mem
);
41392 clobber
= gen_rtx_CLOBBER (VOIDmode
, tmp_reg
);
41393 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
));
41399 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41403 operands[0] register set with addis
41404 operands[1] value set via addis
41405 operands[2] target D-form memory being stored to
41406 operands[3] register being stored
41408 This is similar to the fusion introduced with power8, except it scales to
41409 both loads/stores and does not require the result register to be the same as
41410 the base register. At the moment, we only do this if register set with addis
41414 expand_fusion_p9_store (rtx
*operands
)
41416 rtx tmp_reg
= operands
[0];
41417 rtx addis_value
= operands
[1];
41418 rtx orig_mem
= operands
[2];
41419 rtx src
= operands
[3];
41420 rtx new_addr
, new_mem
, orig_addr
, offset
, set
, clobber
, insn
, new_src
;
41421 enum rtx_code plus_or_lo_sum
;
41422 machine_mode target_mode
= GET_MODE (orig_mem
);
41423 machine_mode ptr_mode
= Pmode
;
41425 gcc_assert (MEM_P (orig_mem
));
41427 orig_addr
= XEXP (orig_mem
, 0);
41428 plus_or_lo_sum
= GET_CODE (orig_addr
);
41429 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
41431 offset
= XEXP (orig_addr
, 1);
41432 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
41433 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
41435 new_src
= gen_rtx_UNSPEC (target_mode
, gen_rtvec (1, src
),
41438 set
= gen_rtx_SET (new_mem
, new_src
);
41439 clobber
= gen_rtx_CLOBBER (VOIDmode
, tmp_reg
);
41440 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
));
41446 /* Return a string to fuse an addis instruction with a load using extended
41447 fusion. The address that is used is the logical address that was formed
41448 during peephole2: (lo_sum (high) (low-part))
41450 The code is complicated, so we call output_asm_insn directly, and just
41454 emit_fusion_p9_load (rtx reg
, rtx mem
, rtx tmp_reg
)
41456 machine_mode mode
= GET_MODE (reg
);
41460 const char *load_string
;
41463 if (GET_CODE (mem
) == FLOAT_EXTEND
|| GET_CODE (mem
) == ZERO_EXTEND
)
41465 mem
= XEXP (mem
, 0);
41466 mode
= GET_MODE (mem
);
41469 if (GET_CODE (reg
) == SUBREG
)
41471 gcc_assert (SUBREG_BYTE (reg
) == 0);
41472 reg
= SUBREG_REG (reg
);
41476 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg
);
41479 if (FP_REGNO_P (r
))
41481 if (mode
== SFmode
)
41482 load_string
= "lfs";
41483 else if (mode
== DFmode
|| mode
== DImode
)
41484 load_string
= "lfd";
41486 gcc_unreachable ();
41488 else if (ALTIVEC_REGNO_P (r
) && TARGET_P9_DFORM_SCALAR
)
41490 if (mode
== SFmode
)
41491 load_string
= "lxssp";
41492 else if (mode
== DFmode
|| mode
== DImode
)
41493 load_string
= "lxsd";
41495 gcc_unreachable ();
41497 else if (INT_REGNO_P (r
))
41502 load_string
= "lbz";
41505 load_string
= "lhz";
41509 load_string
= "lwz";
41513 if (!TARGET_POWERPC64
)
41514 gcc_unreachable ();
41515 load_string
= "ld";
41518 gcc_unreachable ();
41522 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg
);
41525 fatal_insn ("emit_fusion_p9_load not MEM", mem
);
41527 addr
= XEXP (mem
, 0);
41528 fusion_split_address (addr
, &hi
, &lo
);
41530 /* Emit the addis instruction. */
41531 emit_fusion_addis (tmp_reg
, hi
, "power9 load fusion", GET_MODE_NAME (mode
));
41533 /* Emit the D-form load instruction. */
41534 emit_fusion_load_store (reg
, tmp_reg
, lo
, load_string
);
41539 /* Return a string to fuse an addis instruction with a store using extended
41540 fusion. The address that is used is the logical address that was formed
41541 during peephole2: (lo_sum (high) (low-part))
41543 The code is complicated, so we call output_asm_insn directly, and just
41547 emit_fusion_p9_store (rtx mem
, rtx reg
, rtx tmp_reg
)
41549 machine_mode mode
= GET_MODE (reg
);
41553 const char *store_string
;
41556 if (GET_CODE (reg
) == SUBREG
)
41558 gcc_assert (SUBREG_BYTE (reg
) == 0);
41559 reg
= SUBREG_REG (reg
);
41563 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg
);
41566 if (FP_REGNO_P (r
))
41568 if (mode
== SFmode
)
41569 store_string
= "stfs";
41570 else if (mode
== DFmode
)
41571 store_string
= "stfd";
41573 gcc_unreachable ();
41575 else if (ALTIVEC_REGNO_P (r
) && TARGET_P9_DFORM_SCALAR
)
41577 if (mode
== SFmode
)
41578 store_string
= "stxssp";
41579 else if (mode
== DFmode
|| mode
== DImode
)
41580 store_string
= "stxsd";
41582 gcc_unreachable ();
41584 else if (INT_REGNO_P (r
))
41589 store_string
= "stb";
41592 store_string
= "sth";
41596 store_string
= "stw";
41600 if (!TARGET_POWERPC64
)
41601 gcc_unreachable ();
41602 store_string
= "std";
41605 gcc_unreachable ();
41609 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg
);
41612 fatal_insn ("emit_fusion_p9_store not MEM", mem
);
41614 addr
= XEXP (mem
, 0);
41615 fusion_split_address (addr
, &hi
, &lo
);
41617 /* Emit the addis instruction. */
41618 emit_fusion_addis (tmp_reg
, hi
, "power9 store fusion", GET_MODE_NAME (mode
));
41620 /* Emit the D-form load instruction. */
41621 emit_fusion_load_store (reg
, tmp_reg
, lo
, store_string
);
41627 /* Analyze vector computations and remove unnecessary doubleword
41628 swaps (xxswapdi instructions). This pass is performed only
41629 for little-endian VSX code generation.
41631 For this specific case, loads and stores of 4x32 and 2x64 vectors
41632 are inefficient. These are implemented using the lvx2dx and
41633 stvx2dx instructions, which invert the order of doublewords in
41634 a vector register. Thus the code generation inserts an xxswapdi
41635 after each such load, and prior to each such store. (For spill
41636 code after register assignment, an additional xxswapdi is inserted
41637 following each store in order to return a hard register to its
41640 The extra xxswapdi instructions reduce performance. This can be
41641 particularly bad for vectorized code. The purpose of this pass
41642 is to reduce the number of xxswapdi instructions required for
41645 The primary insight is that much code that operates on vectors
41646 does not care about the relative order of elements in a register,
41647 so long as the correct memory order is preserved. If we have
41648 a computation where all input values are provided by lvxd2x/xxswapdi
41649 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41650 and all intermediate computations are pure SIMD (independent of
41651 element order), then all the xxswapdi's associated with the loads
41652 and stores may be removed.
41654 This pass uses some of the infrastructure and logical ideas from
41655 the "web" pass in web.c. We create maximal webs of computations
41656 fitting the description above using union-find. Each such web is
41657 then optimized by removing its unnecessary xxswapdi instructions.
41659 The pass is placed prior to global optimization so that we can
41660 perform the optimization in the safest and simplest way possible;
41661 that is, by replacing each xxswapdi insn with a register copy insn.
41662 Subsequent forward propagation will remove copies where possible.
41664 There are some operations sensitive to element order for which we
41665 can still allow the operation, provided we modify those operations.
41666 These include CONST_VECTORs, for which we must swap the first and
41667 second halves of the constant vector; and SUBREGs, for which we
41668 must adjust the byte offset to account for the swapped doublewords.
41669 A remaining opportunity would be non-immediate-form splats, for
41670 which we should adjust the selected lane of the input. We should
41671 also make code generation adjustments for sum-across operations,
41672 since this is a common vectorizer reduction.
41674 Because we run prior to the first split, we can see loads and stores
41675 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
41676 vector loads and stores that have not yet been split into a permuting
41677 load/store and a swap. (One way this can happen is with a builtin
41678 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
41679 than deleting a swap, we convert the load/store into a permuting
41680 load/store (which effectively removes the swap). */
41682 /* Notes on Permutes
41684 We do not currently handle computations that contain permutes. There
41685 is a general transformation that can be performed correctly, but it
41686 may introduce more expensive code than it replaces. To handle these
41687 would require a cost model to determine when to perform the optimization.
41688 This commentary records how this could be done if desired.
41690 The most general permute is something like this (example for V16QI):
41692 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41693 (parallel [(const_int a0) (const_int a1)
41695 (const_int a14) (const_int a15)]))
41697 where a0,...,a15 are in [0,31] and select elements from op1 and op2
41698 to produce in the result.
41700 Regardless of mode, we can convert the PARALLEL to a mask of 16
41701 byte-element selectors. Let's call this M, with M[i] representing
41702 the ith byte-element selector value. Then if we swap doublewords
41703 throughout the computation, we can get correct behavior by replacing
41704 M with M' as follows:
41706 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
41707 { ((M[i]+8)%16)+16 : M[i] in [16,31]
41709 This seems promising at first, since we are just replacing one mask
41710 with another. But certain masks are preferable to others. If M
41711 is a mask that matches a vmrghh pattern, for example, M' certainly
41712 will not. Instead of a single vmrghh, we would generate a load of
41713 M' and a vperm. So we would need to know how many xxswapd's we can
41714 remove as a result of this transformation to determine if it's
41715 profitable; and preferably the logic would need to be aware of all
41716 the special preferable masks.
41718 Another form of permute is an UNSPEC_VPERM, in which the mask is
41719 already in a register. In some cases, this mask may be a constant
41720 that we can discover with ud-chains, in which case the above
41721 transformation is ok. However, the common usage here is for the
41722 mask to be produced by an UNSPEC_LVSL, in which case the mask
41723 cannot be known at compile time. In such a case we would have to
41724 generate several instructions to compute M' as above at run time,
41725 and a cost model is needed again.
41727 However, when the mask M for an UNSPEC_VPERM is loaded from the
41728 constant pool, we can replace M with M' as above at no cost
41729 beyond adding a constant pool entry. */
41731 /* This is based on the union-find logic in web.c. web_entry_base is
41732 defined in df.h. */
41733 class swap_web_entry
: public web_entry_base
41736 /* Pointer to the insn. */
41738 /* Set if insn contains a mention of a vector register. All other
41739 fields are undefined if this field is unset. */
41740 unsigned int is_relevant
: 1;
41741 /* Set if insn is a load. */
41742 unsigned int is_load
: 1;
41743 /* Set if insn is a store. */
41744 unsigned int is_store
: 1;
41745 /* Set if insn is a doubleword swap. This can either be a register swap
41746 or a permuting load or store (test is_load and is_store for this). */
41747 unsigned int is_swap
: 1;
41748 /* Set if the insn has a live-in use of a parameter register. */
41749 unsigned int is_live_in
: 1;
41750 /* Set if the insn has a live-out def of a return register. */
41751 unsigned int is_live_out
: 1;
41752 /* Set if the insn contains a subreg reference of a vector register. */
41753 unsigned int contains_subreg
: 1;
41754 /* Set if the insn contains a 128-bit integer operand. */
41755 unsigned int is_128_int
: 1;
41756 /* Set if this is a call-insn. */
41757 unsigned int is_call
: 1;
41758 /* Set if this insn does not perform a vector operation for which
41759 element order matters, or if we know how to fix it up if it does.
41760 Undefined if is_swap is set. */
41761 unsigned int is_swappable
: 1;
41762 /* A nonzero value indicates what kind of special handling for this
41763 insn is required if doublewords are swapped. Undefined if
41764 is_swappable is not set. */
41765 unsigned int special_handling
: 4;
41766 /* Set if the web represented by this entry cannot be optimized. */
41767 unsigned int web_not_optimizable
: 1;
41768 /* Set if this insn should be deleted. */
41769 unsigned int will_delete
: 1;
41772 enum special_handling_values
{
41785 /* Union INSN with all insns containing definitions that reach USE.
41786 Detect whether USE is live-in to the current function. */
41788 union_defs (swap_web_entry
*insn_entry
, rtx insn
, df_ref use
)
41790 struct df_link
*link
= DF_REF_CHAIN (use
);
41793 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
41797 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41798 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
41800 if (DF_REF_INSN_INFO (link
->ref
))
41802 rtx def_insn
= DF_REF_INSN (link
->ref
);
41803 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
41804 insn_entry
+ INSN_UID (def_insn
));
41811 /* Union INSN with all insns containing uses reached from DEF.
41812 Detect whether DEF is live-out from the current function. */
41814 union_uses (swap_web_entry
*insn_entry
, rtx insn
, df_ref def
)
41816 struct df_link
*link
= DF_REF_CHAIN (def
);
41819 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
41823 /* This could be an eh use or some other artificial use;
41824 we treat these all the same (killing the optimization). */
41825 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41826 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
41828 if (DF_REF_INSN_INFO (link
->ref
))
41830 rtx use_insn
= DF_REF_INSN (link
->ref
);
41831 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
41832 insn_entry
+ INSN_UID (use_insn
));
41839 /* Return 1 iff INSN is a load insn, including permuting loads that
41840 represent an lvxd2x instruction; else return 0. */
41841 static unsigned int
41842 insn_is_load_p (rtx insn
)
41844 rtx body
= PATTERN (insn
);
41846 if (GET_CODE (body
) == SET
)
41848 if (GET_CODE (SET_SRC (body
)) == MEM
)
41851 if (GET_CODE (SET_SRC (body
)) == VEC_SELECT
41852 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
)
41858 if (GET_CODE (body
) != PARALLEL
)
41861 rtx set
= XVECEXP (body
, 0, 0);
41863 if (GET_CODE (set
) == SET
&& GET_CODE (SET_SRC (set
)) == MEM
)
41869 /* Return 1 iff INSN is a store insn, including permuting stores that
41870 represent an stvxd2x instruction; else return 0. */
41871 static unsigned int
41872 insn_is_store_p (rtx insn
)
41874 rtx body
= PATTERN (insn
);
41875 if (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == MEM
)
41877 if (GET_CODE (body
) != PARALLEL
)
41879 rtx set
= XVECEXP (body
, 0, 0);
41880 if (GET_CODE (set
) == SET
&& GET_CODE (SET_DEST (set
)) == MEM
)
41885 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41886 a permuting load, or a permuting store. */
41887 static unsigned int
41888 insn_is_swap_p (rtx insn
)
41890 rtx body
= PATTERN (insn
);
41891 if (GET_CODE (body
) != SET
)
41893 rtx rhs
= SET_SRC (body
);
41894 if (GET_CODE (rhs
) != VEC_SELECT
)
41896 rtx parallel
= XEXP (rhs
, 1);
41897 if (GET_CODE (parallel
) != PARALLEL
)
41899 unsigned int len
= XVECLEN (parallel
, 0);
41900 if (len
!= 2 && len
!= 4 && len
!= 8 && len
!= 16)
41902 for (unsigned int i
= 0; i
< len
/ 2; ++i
)
41904 rtx op
= XVECEXP (parallel
, 0, i
);
41905 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != len
/ 2 + i
)
41908 for (unsigned int i
= len
/ 2; i
< len
; ++i
)
41910 rtx op
= XVECEXP (parallel
, 0, i
);
41911 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != i
- len
/ 2)
41917 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
41919 const_load_sequence_p (swap_web_entry
*insn_entry
, rtx insn
)
41921 unsigned uid
= INSN_UID (insn
);
41922 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
)
41925 /* Find the unique use in the swap and locate its def. If the def
41926 isn't unique, punt. */
41927 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
41929 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
41931 struct df_link
*def_link
= DF_REF_CHAIN (use
);
41932 if (!def_link
|| def_link
->next
)
41935 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
41936 unsigned uid2
= INSN_UID (def_insn
);
41937 if (!insn_entry
[uid2
].is_load
|| !insn_entry
[uid2
].is_swap
)
41940 rtx body
= PATTERN (def_insn
);
41941 if (GET_CODE (body
) != SET
41942 || GET_CODE (SET_SRC (body
)) != VEC_SELECT
41943 || GET_CODE (XEXP (SET_SRC (body
), 0)) != MEM
)
41946 rtx mem
= XEXP (SET_SRC (body
), 0);
41947 rtx base_reg
= XEXP (mem
, 0);
41950 insn_info
= DF_INSN_INFO_GET (def_insn
);
41951 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
41953 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
41956 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
41957 if (!base_def_link
|| base_def_link
->next
)
41960 rtx tocrel_insn
= DF_REF_INSN (base_def_link
->ref
);
41961 rtx tocrel_body
= PATTERN (tocrel_insn
);
41963 if (GET_CODE (tocrel_body
) != SET
)
41965 /* There is an extra level of indirection for small/large
41967 rtx tocrel_expr
= SET_SRC (tocrel_body
);
41968 if (GET_CODE (tocrel_expr
) == MEM
)
41969 tocrel_expr
= XEXP (tocrel_expr
, 0);
41970 if (!toc_relative_expr_p (tocrel_expr
, false))
41972 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
41973 if (GET_CODE (base
) != SYMBOL_REF
|| !CONSTANT_POOL_ADDRESS_P (base
))
41980 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
41981 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
41983 v2df_reduction_p (rtx op
)
41985 if (GET_MODE (op
) != V2DFmode
)
41988 enum rtx_code code
= GET_CODE (op
);
41989 if (code
!= PLUS
&& code
!= SMIN
&& code
!= SMAX
)
41992 rtx concat
= XEXP (op
, 0);
41993 if (GET_CODE (concat
) != VEC_CONCAT
)
41996 rtx select0
= XEXP (concat
, 0);
41997 rtx select1
= XEXP (concat
, 1);
41998 if (GET_CODE (select0
) != VEC_SELECT
|| GET_CODE (select1
) != VEC_SELECT
)
42001 rtx reg0
= XEXP (select0
, 0);
42002 rtx reg1
= XEXP (select1
, 0);
42003 if (!rtx_equal_p (reg0
, reg1
) || !REG_P (reg0
))
42006 rtx parallel0
= XEXP (select0
, 1);
42007 rtx parallel1
= XEXP (select1
, 1);
42008 if (GET_CODE (parallel0
) != PARALLEL
|| GET_CODE (parallel1
) != PARALLEL
)
42011 if (!rtx_equal_p (XVECEXP (parallel0
, 0, 0), const1_rtx
)
42012 || !rtx_equal_p (XVECEXP (parallel1
, 0, 0), const0_rtx
))
42018 /* Return 1 iff OP is an operand that will not be affected by having
42019 vector doublewords swapped in memory. */
42020 static unsigned int
42021 rtx_is_swappable_p (rtx op
, unsigned int *special
)
42023 enum rtx_code code
= GET_CODE (op
);
42042 *special
= SH_CONST_VECTOR
;
42046 case VEC_DUPLICATE
:
42047 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
42048 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
42049 it represents a vector splat for which we can do special
42051 if (GET_CODE (XEXP (op
, 0)) == CONST_INT
)
42053 else if (REG_P (XEXP (op
, 0))
42054 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
42055 /* This catches V2DF and V2DI splat, at a minimum. */
42057 else if (GET_CODE (XEXP (op
, 0)) == TRUNCATE
42058 && REG_P (XEXP (XEXP (op
, 0), 0))
42059 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
42060 /* This catches splat of a truncated value. */
42062 else if (GET_CODE (XEXP (op
, 0)) == VEC_SELECT
)
42063 /* If the duplicated item is from a select, defer to the select
42064 processing to see if we can change the lane for the splat. */
42065 return rtx_is_swappable_p (XEXP (op
, 0), special
);
42070 /* A vec_extract operation is ok if we change the lane. */
42071 if (GET_CODE (XEXP (op
, 0)) == REG
42072 && GET_MODE_INNER (GET_MODE (XEXP (op
, 0))) == GET_MODE (op
)
42073 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
42074 && XVECLEN (parallel
, 0) == 1
42075 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
)
42077 *special
= SH_EXTRACT
;
42080 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
42081 XXPERMDI is a swap operation, it will be identified by
42082 insn_is_swap_p and therefore we won't get here. */
42083 else if (GET_CODE (XEXP (op
, 0)) == VEC_CONCAT
42084 && (GET_MODE (XEXP (op
, 0)) == V4DFmode
42085 || GET_MODE (XEXP (op
, 0)) == V4DImode
)
42086 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
42087 && XVECLEN (parallel
, 0) == 2
42088 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
42089 && GET_CODE (XVECEXP (parallel
, 0, 1)) == CONST_INT
)
42091 *special
= SH_XXPERMDI
;
42094 else if (v2df_reduction_p (op
))
42101 /* Various operations are unsafe for this optimization, at least
42102 without significant additional work. Permutes are obviously
42103 problematic, as both the permute control vector and the ordering
42104 of the target values are invalidated by doubleword swapping.
42105 Vector pack and unpack modify the number of vector lanes.
42106 Merge-high/low will not operate correctly on swapped operands.
42107 Vector shifts across element boundaries are clearly uncool,
42108 as are vector select and concatenate operations. Vector
42109 sum-across instructions define one operand with a specific
42110 order-dependent element, so additional fixup code would be
42111 needed to make those work. Vector set and non-immediate-form
42112 vector splat are element-order sensitive. A few of these
42113 cases might be workable with special handling if required.
42114 Adding cost modeling would be appropriate in some cases. */
42115 int val
= XINT (op
, 1);
42120 case UNSPEC_VMRGH_DIRECT
:
42121 case UNSPEC_VMRGL_DIRECT
:
42122 case UNSPEC_VPACK_SIGN_SIGN_SAT
:
42123 case UNSPEC_VPACK_SIGN_UNS_SAT
:
42124 case UNSPEC_VPACK_UNS_UNS_MOD
:
42125 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
:
42126 case UNSPEC_VPACK_UNS_UNS_SAT
:
42128 case UNSPEC_VPERM_UNS
:
42129 case UNSPEC_VPERMHI
:
42130 case UNSPEC_VPERMSI
:
42132 case UNSPEC_VSLDOI
:
42135 case UNSPEC_VSUM2SWS
:
42136 case UNSPEC_VSUM4S
:
42137 case UNSPEC_VSUM4UBS
:
42138 case UNSPEC_VSUMSWS
:
42139 case UNSPEC_VSUMSWS_DIRECT
:
42140 case UNSPEC_VSX_CONCAT
:
42141 case UNSPEC_VSX_SET
:
42142 case UNSPEC_VSX_SLDWI
:
42143 case UNSPEC_VUNPACK_HI_SIGN
:
42144 case UNSPEC_VUNPACK_HI_SIGN_DIRECT
:
42145 case UNSPEC_VUNPACK_LO_SIGN
:
42146 case UNSPEC_VUNPACK_LO_SIGN_DIRECT
:
42147 case UNSPEC_VUPKHPX
:
42148 case UNSPEC_VUPKHS_V4SF
:
42149 case UNSPEC_VUPKHU_V4SF
:
42150 case UNSPEC_VUPKLPX
:
42151 case UNSPEC_VUPKLS_V4SF
:
42152 case UNSPEC_VUPKLU_V4SF
:
42153 case UNSPEC_VSX_CVDPSPN
:
42154 case UNSPEC_VSX_CVSPDP
:
42155 case UNSPEC_VSX_CVSPDPN
:
42156 case UNSPEC_VSX_EXTRACT
:
42157 case UNSPEC_VSX_VSLO
:
42158 case UNSPEC_VSX_VEC_INIT
:
42160 case UNSPEC_VSPLT_DIRECT
:
42161 case UNSPEC_VSX_XXSPLTD
:
42162 *special
= SH_SPLAT
;
42164 case UNSPEC_REDUC_PLUS
:
42174 const char *fmt
= GET_RTX_FORMAT (code
);
42177 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
42178 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
42180 unsigned int special_op
= SH_NONE
;
42181 ok
&= rtx_is_swappable_p (XEXP (op
, i
), &special_op
);
42182 if (special_op
== SH_NONE
)
42184 /* Ensure we never have two kinds of special handling
42185 for the same insn. */
42186 if (*special
!= SH_NONE
&& *special
!= special_op
)
42188 *special
= special_op
;
42190 else if (fmt
[i
] == 'E')
42191 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
42193 unsigned int special_op
= SH_NONE
;
42194 ok
&= rtx_is_swappable_p (XVECEXP (op
, i
, j
), &special_op
);
42195 if (special_op
== SH_NONE
)
42197 /* Ensure we never have two kinds of special handling
42198 for the same insn. */
42199 if (*special
!= SH_NONE
&& *special
!= special_op
)
42201 *special
= special_op
;
42207 /* Return 1 iff INSN is an operand that will not be affected by
42208 having vector doublewords swapped in memory (in which case
42209 *SPECIAL is unchanged), or that can be modified to be correct
42210 if vector doublewords are swapped in memory (in which case
42211 *SPECIAL is changed to a value indicating how). */
42212 static unsigned int
42213 insn_is_swappable_p (swap_web_entry
*insn_entry
, rtx insn
,
42214 unsigned int *special
)
42216 /* Calls are always bad. */
42217 if (GET_CODE (insn
) == CALL_INSN
)
42220 /* Loads and stores seen here are not permuting, but we can still
42221 fix them up by converting them to permuting ones. Exceptions:
42222 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
42223 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
42224 for the SET source. Also we must now make an exception for lvx
42225 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
42226 explicit "& -16") since this leads to unrecognizable insns. */
42227 rtx body
= PATTERN (insn
);
42228 int i
= INSN_UID (insn
);
42230 if (insn_entry
[i
].is_load
)
42232 if (GET_CODE (body
) == SET
)
42234 rtx rhs
= SET_SRC (body
);
42235 /* Even without a swap, the RHS might be a vec_select for, say,
42236 a byte-reversing load. */
42237 if (GET_CODE (rhs
) != MEM
)
42239 if (GET_CODE (XEXP (rhs
, 0)) == AND
)
42242 *special
= SH_NOSWAP_LD
;
42249 if (insn_entry
[i
].is_store
)
42251 if (GET_CODE (body
) == SET
42252 && GET_CODE (SET_SRC (body
)) != UNSPEC
)
42254 rtx lhs
= SET_DEST (body
);
42255 /* Even without a swap, the LHS might be a vec_select for, say,
42256 a byte-reversing store. */
42257 if (GET_CODE (lhs
) != MEM
)
42259 if (GET_CODE (XEXP (lhs
, 0)) == AND
)
42262 *special
= SH_NOSWAP_ST
;
42269 /* A convert to single precision can be left as is provided that
42270 all of its uses are in xxspltw instructions that splat BE element
42272 if (GET_CODE (body
) == SET
42273 && GET_CODE (SET_SRC (body
)) == UNSPEC
42274 && XINT (SET_SRC (body
), 1) == UNSPEC_VSX_CVDPSPN
)
42277 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42279 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
42281 struct df_link
*link
= DF_REF_CHAIN (def
);
42285 for (; link
; link
= link
->next
) {
42286 rtx use_insn
= DF_REF_INSN (link
->ref
);
42287 rtx use_body
= PATTERN (use_insn
);
42288 if (GET_CODE (use_body
) != SET
42289 || GET_CODE (SET_SRC (use_body
)) != UNSPEC
42290 || XINT (SET_SRC (use_body
), 1) != UNSPEC_VSX_XXSPLTW
42291 || XVECEXP (SET_SRC (use_body
), 0, 1) != const0_rtx
)
42299 /* A concatenation of two doublewords is ok if we reverse the
42300 order of the inputs. */
42301 if (GET_CODE (body
) == SET
42302 && GET_CODE (SET_SRC (body
)) == VEC_CONCAT
42303 && (GET_MODE (SET_SRC (body
)) == V2DFmode
42304 || GET_MODE (SET_SRC (body
)) == V2DImode
))
42306 *special
= SH_CONCAT
;
42310 /* V2DF reductions are always swappable. */
42311 if (GET_CODE (body
) == PARALLEL
)
42313 rtx expr
= XVECEXP (body
, 0, 0);
42314 if (GET_CODE (expr
) == SET
42315 && v2df_reduction_p (SET_SRC (expr
)))
42319 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
42321 if (GET_CODE (body
) == SET
42322 && GET_CODE (SET_SRC (body
)) == UNSPEC
42323 && XINT (SET_SRC (body
), 1) == UNSPEC_VPERM
42324 && XVECLEN (SET_SRC (body
), 0) == 3
42325 && GET_CODE (XVECEXP (SET_SRC (body
), 0, 2)) == REG
)
42327 rtx mask_reg
= XVECEXP (SET_SRC (body
), 0, 2);
42328 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42330 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42331 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
42333 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42334 /* Punt if multiple definitions for this reg. */
42335 if (def_link
&& !def_link
->next
&&
42336 const_load_sequence_p (insn_entry
,
42337 DF_REF_INSN (def_link
->ref
)))
42339 *special
= SH_VPERM
;
42345 /* Otherwise check the operands for vector lane violations. */
42346 return rtx_is_swappable_p (body
, special
);
42349 enum chain_purpose
{ FOR_LOADS
, FOR_STORES
};
42351 /* Return true if the UD or DU chain headed by LINK is non-empty,
42352 and every entry on the chain references an insn that is a
42353 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
42354 register swap must have only permuting loads as reaching defs.
42355 If PURPOSE is FOR_STORES, each such register swap must have only
42356 register swaps or permuting stores as reached uses. */
42358 chain_contains_only_swaps (swap_web_entry
*insn_entry
, struct df_link
*link
,
42359 enum chain_purpose purpose
)
42364 for (; link
; link
= link
->next
)
42366 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link
->ref
))))
42369 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
42372 rtx reached_insn
= DF_REF_INSN (link
->ref
);
42373 unsigned uid
= INSN_UID (reached_insn
);
42374 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (reached_insn
);
42376 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
42377 || insn_entry
[uid
].is_store
)
42380 if (purpose
== FOR_LOADS
)
42383 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42385 struct df_link
*swap_link
= DF_REF_CHAIN (use
);
42389 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
42392 rtx swap_def_insn
= DF_REF_INSN (swap_link
->ref
);
42393 unsigned uid2
= INSN_UID (swap_def_insn
);
42395 /* Only permuting loads are allowed. */
42396 if (!insn_entry
[uid2
].is_swap
|| !insn_entry
[uid2
].is_load
)
42399 swap_link
= swap_link
->next
;
42403 else if (purpose
== FOR_STORES
)
42406 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
42408 struct df_link
*swap_link
= DF_REF_CHAIN (def
);
42412 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
42415 rtx swap_use_insn
= DF_REF_INSN (swap_link
->ref
);
42416 unsigned uid2
= INSN_UID (swap_use_insn
);
42418 /* Permuting stores or register swaps are allowed. */
42419 if (!insn_entry
[uid2
].is_swap
|| insn_entry
[uid2
].is_load
)
42422 swap_link
= swap_link
->next
;
42431 /* Mark the xxswapdi instructions associated with permuting loads and
42432 stores for removal. Note that we only flag them for deletion here,
42433 as there is a possibility of a swap being reached from multiple
42436 mark_swaps_for_removal (swap_web_entry
*insn_entry
, unsigned int i
)
42438 rtx insn
= insn_entry
[i
].insn
;
42439 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42441 if (insn_entry
[i
].is_load
)
42444 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
42446 struct df_link
*link
= DF_REF_CHAIN (def
);
42448 /* We know by now that these are swaps, so we can delete
42449 them confidently. */
42452 rtx use_insn
= DF_REF_INSN (link
->ref
);
42453 insn_entry
[INSN_UID (use_insn
)].will_delete
= 1;
42458 else if (insn_entry
[i
].is_store
)
42461 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42463 /* Ignore uses for addressability. */
42464 machine_mode mode
= GET_MODE (DF_REF_REG (use
));
42465 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
42468 struct df_link
*link
= DF_REF_CHAIN (use
);
42470 /* We know by now that these are swaps, so we can delete
42471 them confidently. */
42474 rtx def_insn
= DF_REF_INSN (link
->ref
);
42475 insn_entry
[INSN_UID (def_insn
)].will_delete
= 1;
42482 /* OP is either a CONST_VECTOR or an expression containing one.
42483 Swap the first half of the vector with the second in the first
42484 case. Recurse to find it in the second. */
42486 swap_const_vector_halves (rtx op
)
42489 enum rtx_code code
= GET_CODE (op
);
42490 if (GET_CODE (op
) == CONST_VECTOR
)
42492 int half_units
= GET_MODE_NUNITS (GET_MODE (op
)) / 2;
42493 for (i
= 0; i
< half_units
; ++i
)
42495 rtx temp
= CONST_VECTOR_ELT (op
, i
);
42496 CONST_VECTOR_ELT (op
, i
) = CONST_VECTOR_ELT (op
, i
+ half_units
);
42497 CONST_VECTOR_ELT (op
, i
+ half_units
) = temp
;
42503 const char *fmt
= GET_RTX_FORMAT (code
);
42504 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
42505 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
42506 swap_const_vector_halves (XEXP (op
, i
));
42507 else if (fmt
[i
] == 'E')
42508 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
42509 swap_const_vector_halves (XVECEXP (op
, i
, j
));
42513 /* Find all subregs of a vector expression that perform a narrowing,
42514 and adjust the subreg index to account for doubleword swapping. */
42516 adjust_subreg_index (rtx op
)
42518 enum rtx_code code
= GET_CODE (op
);
42520 && (GET_MODE_SIZE (GET_MODE (op
))
42521 < GET_MODE_SIZE (GET_MODE (XEXP (op
, 0)))))
42523 unsigned int index
= SUBREG_BYTE (op
);
42528 SUBREG_BYTE (op
) = index
;
42531 const char *fmt
= GET_RTX_FORMAT (code
);
42533 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
42534 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
42535 adjust_subreg_index (XEXP (op
, i
));
42536 else if (fmt
[i
] == 'E')
42537 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
42538 adjust_subreg_index (XVECEXP (op
, i
, j
));
42541 /* Convert the non-permuting load INSN to a permuting one. */
42543 permute_load (rtx_insn
*insn
)
42545 rtx body
= PATTERN (insn
);
42546 rtx mem_op
= SET_SRC (body
);
42547 rtx tgt_reg
= SET_DEST (body
);
42548 machine_mode mode
= GET_MODE (tgt_reg
);
42549 int n_elts
= GET_MODE_NUNITS (mode
);
42550 int half_elts
= n_elts
/ 2;
42551 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
42553 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
42554 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42555 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
42556 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42557 rtx sel
= gen_rtx_VEC_SELECT (mode
, mem_op
, par
);
42558 SET_SRC (body
) = sel
;
42559 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42560 df_insn_rescan (insn
);
42563 fprintf (dump_file
, "Replacing load %d with permuted load\n",
42567 /* Convert the non-permuting store INSN to a permuting one. */
42569 permute_store (rtx_insn
*insn
)
42571 rtx body
= PATTERN (insn
);
42572 rtx src_reg
= SET_SRC (body
);
42573 machine_mode mode
= GET_MODE (src_reg
);
42574 int n_elts
= GET_MODE_NUNITS (mode
);
42575 int half_elts
= n_elts
/ 2;
42576 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
42578 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
42579 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42580 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
42581 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42582 rtx sel
= gen_rtx_VEC_SELECT (mode
, src_reg
, par
);
42583 SET_SRC (body
) = sel
;
42584 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42585 df_insn_rescan (insn
);
42588 fprintf (dump_file
, "Replacing store %d with permuted store\n",
42592 /* Given OP that contains a vector extract operation, adjust the index
42593 of the extracted lane to account for the doubleword swap. */
42595 adjust_extract (rtx_insn
*insn
)
42597 rtx pattern
= PATTERN (insn
);
42598 if (GET_CODE (pattern
) == PARALLEL
)
42599 pattern
= XVECEXP (pattern
, 0, 0);
42600 rtx src
= SET_SRC (pattern
);
42601 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42602 account for that. */
42603 rtx sel
= GET_CODE (src
) == VEC_DUPLICATE
? XEXP (src
, 0) : src
;
42604 rtx par
= XEXP (sel
, 1);
42605 int half_elts
= GET_MODE_NUNITS (GET_MODE (XEXP (sel
, 0))) >> 1;
42606 int lane
= INTVAL (XVECEXP (par
, 0, 0));
42607 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
42608 XVECEXP (par
, 0, 0) = GEN_INT (lane
);
42609 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42610 df_insn_rescan (insn
);
42613 fprintf (dump_file
, "Changing lane for extract %d\n", INSN_UID (insn
));
42616 /* Given OP that contains a vector direct-splat operation, adjust the index
42617 of the source lane to account for the doubleword swap. */
42619 adjust_splat (rtx_insn
*insn
)
42621 rtx body
= PATTERN (insn
);
42622 rtx unspec
= XEXP (body
, 1);
42623 int half_elts
= GET_MODE_NUNITS (GET_MODE (unspec
)) >> 1;
42624 int lane
= INTVAL (XVECEXP (unspec
, 0, 1));
42625 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
42626 XVECEXP (unspec
, 0, 1) = GEN_INT (lane
);
42627 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42628 df_insn_rescan (insn
);
42631 fprintf (dump_file
, "Changing lane for splat %d\n", INSN_UID (insn
));
42634 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42635 swap), reverse the order of the source operands and adjust the indices
42636 of the source lanes to account for doubleword reversal. */
42638 adjust_xxpermdi (rtx_insn
*insn
)
42640 rtx set
= PATTERN (insn
);
42641 rtx select
= XEXP (set
, 1);
42642 rtx concat
= XEXP (select
, 0);
42643 rtx src0
= XEXP (concat
, 0);
42644 XEXP (concat
, 0) = XEXP (concat
, 1);
42645 XEXP (concat
, 1) = src0
;
42646 rtx parallel
= XEXP (select
, 1);
42647 int lane0
= INTVAL (XVECEXP (parallel
, 0, 0));
42648 int lane1
= INTVAL (XVECEXP (parallel
, 0, 1));
42649 int new_lane0
= 3 - lane1
;
42650 int new_lane1
= 3 - lane0
;
42651 XVECEXP (parallel
, 0, 0) = GEN_INT (new_lane0
);
42652 XVECEXP (parallel
, 0, 1) = GEN_INT (new_lane1
);
42653 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42654 df_insn_rescan (insn
);
42657 fprintf (dump_file
, "Changing lanes for xxpermdi %d\n", INSN_UID (insn
));
42660 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42661 reverse the order of those inputs. */
42663 adjust_concat (rtx_insn
*insn
)
42665 rtx set
= PATTERN (insn
);
42666 rtx concat
= XEXP (set
, 1);
42667 rtx src0
= XEXP (concat
, 0);
42668 XEXP (concat
, 0) = XEXP (concat
, 1);
42669 XEXP (concat
, 1) = src0
;
42670 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42671 df_insn_rescan (insn
);
42674 fprintf (dump_file
, "Reversing inputs for concat %d\n", INSN_UID (insn
));
42677 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42678 constant pool to reflect swapped doublewords. */
42680 adjust_vperm (rtx_insn
*insn
)
42682 /* We previously determined that the UNSPEC_VPERM was fed by a
42683 swap of a swapping load of a TOC-relative constant pool symbol.
42684 Find the MEM in the swapping load and replace it with a MEM for
42685 the adjusted mask constant. */
42686 rtx set
= PATTERN (insn
);
42687 rtx mask_reg
= XVECEXP (SET_SRC (set
), 0, 2);
42689 /* Find the swap. */
42690 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42692 rtx_insn
*swap_insn
= 0;
42693 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42694 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
42696 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42697 gcc_assert (def_link
&& !def_link
->next
);
42698 swap_insn
= DF_REF_INSN (def_link
->ref
);
42701 gcc_assert (swap_insn
);
42703 /* Find the load. */
42704 insn_info
= DF_INSN_INFO_GET (swap_insn
);
42705 rtx_insn
*load_insn
= 0;
42706 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42708 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42709 gcc_assert (def_link
&& !def_link
->next
);
42710 load_insn
= DF_REF_INSN (def_link
->ref
);
42713 gcc_assert (load_insn
);
42715 /* Find the TOC-relative symbol access. */
42716 insn_info
= DF_INSN_INFO_GET (load_insn
);
42717 rtx_insn
*tocrel_insn
= 0;
42718 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42720 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42721 gcc_assert (def_link
&& !def_link
->next
);
42722 tocrel_insn
= DF_REF_INSN (def_link
->ref
);
42725 gcc_assert (tocrel_insn
);
42727 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
42728 to set tocrel_base; otherwise it would be unnecessary as we've
42729 already established it will return true. */
42731 rtx tocrel_expr
= SET_SRC (PATTERN (tocrel_insn
));
42732 /* There is an extra level of indirection for small/large code models. */
42733 if (GET_CODE (tocrel_expr
) == MEM
)
42734 tocrel_expr
= XEXP (tocrel_expr
, 0);
42735 if (!toc_relative_expr_p (tocrel_expr
, false))
42736 gcc_unreachable ();
42737 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
42738 rtx const_vector
= get_pool_constant (base
);
42739 /* With the extra indirection, get_pool_constant will produce the
42740 real constant from the reg_equal expression, so get the real
42742 if (GET_CODE (const_vector
) == SYMBOL_REF
)
42743 const_vector
= get_pool_constant (const_vector
);
42744 gcc_assert (GET_CODE (const_vector
) == CONST_VECTOR
);
42746 /* Create an adjusted mask from the initial mask. */
42747 unsigned int new_mask
[16], i
, val
;
42748 for (i
= 0; i
< 16; ++i
) {
42749 val
= INTVAL (XVECEXP (const_vector
, 0, i
));
42751 new_mask
[i
] = (val
+ 8) % 16;
42753 new_mask
[i
] = ((val
+ 8) % 16) + 16;
42756 /* Create a new CONST_VECTOR and a MEM that references it. */
42757 rtx vals
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
42758 for (i
= 0; i
< 16; ++i
)
42759 XVECEXP (vals
, 0, i
) = GEN_INT (new_mask
[i
]);
42760 rtx new_const_vector
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (vals
, 0));
42761 rtx new_mem
= force_const_mem (V16QImode
, new_const_vector
);
42762 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42763 can't recognize. Force the SYMBOL_REF into a register. */
42764 if (!REG_P (XEXP (new_mem
, 0))) {
42765 rtx base_reg
= force_reg (Pmode
, XEXP (new_mem
, 0));
42766 XEXP (new_mem
, 0) = base_reg
;
42767 /* Move the newly created insn ahead of the load insn. */
42768 rtx_insn
*force_insn
= get_last_insn ();
42769 remove_insn (force_insn
);
42770 rtx_insn
*before_load_insn
= PREV_INSN (load_insn
);
42771 add_insn_after (force_insn
, before_load_insn
, BLOCK_FOR_INSN (load_insn
));
42772 df_insn_rescan (before_load_insn
);
42773 df_insn_rescan (force_insn
);
42776 /* Replace the MEM in the load instruction and rescan it. */
42777 XEXP (SET_SRC (PATTERN (load_insn
)), 0) = new_mem
;
42778 INSN_CODE (load_insn
) = -1; /* Force re-recognition. */
42779 df_insn_rescan (load_insn
);
42782 fprintf (dump_file
, "Adjusting mask for vperm %d\n", INSN_UID (insn
));
42785 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42786 with special handling. Take care of that here. */
42788 handle_special_swappables (swap_web_entry
*insn_entry
, unsigned i
)
42790 rtx_insn
*insn
= insn_entry
[i
].insn
;
42791 rtx body
= PATTERN (insn
);
42793 switch (insn_entry
[i
].special_handling
)
42796 gcc_unreachable ();
42797 case SH_CONST_VECTOR
:
42799 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
42800 gcc_assert (GET_CODE (body
) == SET
);
42801 rtx rhs
= SET_SRC (body
);
42802 swap_const_vector_halves (rhs
);
42804 fprintf (dump_file
, "Swapping constant halves in insn %d\n", i
);
42808 /* A subreg of the same size is already safe. For subregs that
42809 select a smaller portion of a reg, adjust the index for
42810 swapped doublewords. */
42811 adjust_subreg_index (body
);
42813 fprintf (dump_file
, "Adjusting subreg in insn %d\n", i
);
42816 /* Convert a non-permuting load to a permuting one. */
42817 permute_load (insn
);
42820 /* Convert a non-permuting store to a permuting one. */
42821 permute_store (insn
);
42824 /* Change the lane on an extract operation. */
42825 adjust_extract (insn
);
42828 /* Change the lane on a direct-splat operation. */
42829 adjust_splat (insn
);
42832 /* Change the lanes on an XXPERMDI operation. */
42833 adjust_xxpermdi (insn
);
42836 /* Reverse the order of a concatenation operation. */
42837 adjust_concat (insn
);
42840 /* Change the mask loaded from the constant pool for a VPERM. */
42841 adjust_vperm (insn
);
42846 /* Find the insn from the Ith table entry, which is known to be a
42847 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42849 replace_swap_with_copy (swap_web_entry
*insn_entry
, unsigned i
)
42851 rtx_insn
*insn
= insn_entry
[i
].insn
;
42852 rtx body
= PATTERN (insn
);
42853 rtx src_reg
= XEXP (SET_SRC (body
), 0);
42854 rtx copy
= gen_rtx_SET (SET_DEST (body
), src_reg
);
42855 rtx_insn
*new_insn
= emit_insn_before (copy
, insn
);
42856 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (insn
));
42857 df_insn_rescan (new_insn
);
42861 unsigned int new_uid
= INSN_UID (new_insn
);
42862 fprintf (dump_file
, "Replacing swap %d with copy %d\n", i
, new_uid
);
42865 df_insn_delete (insn
);
42866 remove_insn (insn
);
42867 insn
->set_deleted ();
42870 /* Dump the swap table to DUMP_FILE. */
42872 dump_swap_insn_table (swap_web_entry
*insn_entry
)
42874 int e
= get_max_uid ();
42875 fprintf (dump_file
, "\nRelevant insns with their flag settings\n\n");
42877 for (int i
= 0; i
< e
; ++i
)
42878 if (insn_entry
[i
].is_relevant
)
42880 swap_web_entry
*pred_entry
= (swap_web_entry
*)insn_entry
[i
].pred ();
42881 fprintf (dump_file
, "%6d %6d ", i
,
42882 pred_entry
&& pred_entry
->insn
42883 ? INSN_UID (pred_entry
->insn
) : 0);
42884 if (insn_entry
[i
].is_load
)
42885 fputs ("load ", dump_file
);
42886 if (insn_entry
[i
].is_store
)
42887 fputs ("store ", dump_file
);
42888 if (insn_entry
[i
].is_swap
)
42889 fputs ("swap ", dump_file
);
42890 if (insn_entry
[i
].is_live_in
)
42891 fputs ("live-in ", dump_file
);
42892 if (insn_entry
[i
].is_live_out
)
42893 fputs ("live-out ", dump_file
);
42894 if (insn_entry
[i
].contains_subreg
)
42895 fputs ("subreg ", dump_file
);
42896 if (insn_entry
[i
].is_128_int
)
42897 fputs ("int128 ", dump_file
);
42898 if (insn_entry
[i
].is_call
)
42899 fputs ("call ", dump_file
);
42900 if (insn_entry
[i
].is_swappable
)
42902 fputs ("swappable ", dump_file
);
42903 if (insn_entry
[i
].special_handling
== SH_CONST_VECTOR
)
42904 fputs ("special:constvec ", dump_file
);
42905 else if (insn_entry
[i
].special_handling
== SH_SUBREG
)
42906 fputs ("special:subreg ", dump_file
);
42907 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_LD
)
42908 fputs ("special:load ", dump_file
);
42909 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_ST
)
42910 fputs ("special:store ", dump_file
);
42911 else if (insn_entry
[i
].special_handling
== SH_EXTRACT
)
42912 fputs ("special:extract ", dump_file
);
42913 else if (insn_entry
[i
].special_handling
== SH_SPLAT
)
42914 fputs ("special:splat ", dump_file
);
42915 else if (insn_entry
[i
].special_handling
== SH_XXPERMDI
)
42916 fputs ("special:xxpermdi ", dump_file
);
42917 else if (insn_entry
[i
].special_handling
== SH_CONCAT
)
42918 fputs ("special:concat ", dump_file
);
42919 else if (insn_entry
[i
].special_handling
== SH_VPERM
)
42920 fputs ("special:vperm ", dump_file
);
42922 if (insn_entry
[i
].web_not_optimizable
)
42923 fputs ("unoptimizable ", dump_file
);
42924 if (insn_entry
[i
].will_delete
)
42925 fputs ("delete ", dump_file
);
42926 fputs ("\n", dump_file
);
42928 fputs ("\n", dump_file
);
42931 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
42932 Here RTX is an (& addr (const_int -16)). Always return a new copy
42933 to avoid problems with combine. */
42935 alignment_with_canonical_addr (rtx align
)
42938 rtx addr
= XEXP (align
, 0);
42943 else if (GET_CODE (addr
) == PLUS
)
42945 rtx addrop0
= XEXP (addr
, 0);
42946 rtx addrop1
= XEXP (addr
, 1);
42948 if (!REG_P (addrop0
))
42949 addrop0
= force_reg (GET_MODE (addrop0
), addrop0
);
42951 if (!REG_P (addrop1
))
42952 addrop1
= force_reg (GET_MODE (addrop1
), addrop1
);
42954 canon
= gen_rtx_PLUS (GET_MODE (addr
), addrop0
, addrop1
);
42958 canon
= force_reg (GET_MODE (addr
), addr
);
42960 return gen_rtx_AND (GET_MODE (align
), canon
, GEN_INT (-16));
42963 /* Check whether an rtx is an alignment mask, and if so, return
42964 a fully-expanded rtx for the masking operation. */
42966 alignment_mask (rtx_insn
*insn
)
42968 rtx body
= PATTERN (insn
);
42970 if (GET_CODE (body
) != SET
42971 || GET_CODE (SET_SRC (body
)) != AND
42972 || !REG_P (XEXP (SET_SRC (body
), 0)))
42975 rtx mask
= XEXP (SET_SRC (body
), 1);
42977 if (GET_CODE (mask
) == CONST_INT
)
42979 if (INTVAL (mask
) == -16)
42980 return alignment_with_canonical_addr (SET_SRC (body
));
42988 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42992 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42994 if (!rtx_equal_p (DF_REF_REG (use
), mask
))
42997 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42998 if (!def_link
|| def_link
->next
)
43001 rtx_insn
*const_insn
= DF_REF_INSN (def_link
->ref
);
43002 rtx const_body
= PATTERN (const_insn
);
43003 if (GET_CODE (const_body
) != SET
)
43006 real_mask
= SET_SRC (const_body
);
43008 if (GET_CODE (real_mask
) != CONST_INT
43009 || INTVAL (real_mask
) != -16)
43013 if (real_mask
== 0)
43016 return alignment_with_canonical_addr (SET_SRC (body
));
43019 /* Given INSN that's a load or store based at BASE_REG, look for a
43020 feeding computation that aligns its address on a 16-byte boundary. */
43022 find_alignment_op (rtx_insn
*insn
, rtx base_reg
)
43025 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43026 rtx and_operation
= 0;
43028 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
43030 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
43033 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
43034 if (!base_def_link
|| base_def_link
->next
)
43037 /* With stack-protector code enabled, and possibly in other
43038 circumstances, there may not be an associated insn for
43040 if (DF_REF_IS_ARTIFICIAL (base_def_link
->ref
))
43043 rtx_insn
*and_insn
= DF_REF_INSN (base_def_link
->ref
);
43044 and_operation
= alignment_mask (and_insn
);
43045 if (and_operation
!= 0)
43049 return and_operation
;
43052 struct del_info
{ bool replace
; rtx_insn
*replace_insn
; };
43054 /* If INSN is the load for an lvx pattern, put it in canonical form. */
43056 recombine_lvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
43058 rtx body
= PATTERN (insn
);
43059 gcc_assert (GET_CODE (body
) == SET
43060 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
43061 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
);
43063 rtx mem
= XEXP (SET_SRC (body
), 0);
43064 rtx base_reg
= XEXP (mem
, 0);
43066 rtx and_operation
= find_alignment_op (insn
, base_reg
);
43068 if (and_operation
!= 0)
43071 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43072 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
43074 struct df_link
*link
= DF_REF_CHAIN (def
);
43075 if (!link
|| link
->next
)
43078 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
43079 if (!insn_is_swap_p (swap_insn
)
43080 || insn_is_load_p (swap_insn
)
43081 || insn_is_store_p (swap_insn
))
43084 /* Expected lvx pattern found. Change the swap to
43085 a copy, and propagate the AND operation into the
43087 to_delete
[INSN_UID (swap_insn
)].replace
= true;
43088 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
43090 XEXP (mem
, 0) = and_operation
;
43091 SET_SRC (body
) = mem
;
43092 INSN_CODE (insn
) = -1; /* Force re-recognition. */
43093 df_insn_rescan (insn
);
43096 fprintf (dump_file
, "lvx opportunity found at %d\n",
43102 /* If INSN is the store for an stvx pattern, put it in canonical form. */
43104 recombine_stvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
43106 rtx body
= PATTERN (insn
);
43107 gcc_assert (GET_CODE (body
) == SET
43108 && GET_CODE (SET_DEST (body
)) == MEM
43109 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
);
43110 rtx mem
= SET_DEST (body
);
43111 rtx base_reg
= XEXP (mem
, 0);
43113 rtx and_operation
= find_alignment_op (insn
, base_reg
);
43115 if (and_operation
!= 0)
43117 rtx src_reg
= XEXP (SET_SRC (body
), 0);
43119 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43120 FOR_EACH_INSN_INFO_USE (src_use
, insn_info
)
43122 if (!rtx_equal_p (DF_REF_REG (src_use
), src_reg
))
43125 struct df_link
*link
= DF_REF_CHAIN (src_use
);
43126 if (!link
|| link
->next
)
43129 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
43130 if (!insn_is_swap_p (swap_insn
)
43131 || insn_is_load_p (swap_insn
)
43132 || insn_is_store_p (swap_insn
))
43135 /* Expected stvx pattern found. Change the swap to
43136 a copy, and propagate the AND operation into the
43138 to_delete
[INSN_UID (swap_insn
)].replace
= true;
43139 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
43141 XEXP (mem
, 0) = and_operation
;
43142 SET_SRC (body
) = src_reg
;
43143 INSN_CODE (insn
) = -1; /* Force re-recognition. */
43144 df_insn_rescan (insn
);
43147 fprintf (dump_file
, "stvx opportunity found at %d\n",
43153 /* Look for patterns created from builtin lvx and stvx calls, and
43154 canonicalize them to be properly recognized as such. */
43156 recombine_lvx_stvx_patterns (function
*fun
)
43162 int num_insns
= get_max_uid ();
43163 del_info
*to_delete
= XCNEWVEC (del_info
, num_insns
);
43165 FOR_ALL_BB_FN (bb
, fun
)
43166 FOR_BB_INSNS (bb
, insn
)
43168 if (!NONDEBUG_INSN_P (insn
))
43171 if (insn_is_load_p (insn
) && insn_is_swap_p (insn
))
43172 recombine_lvx_pattern (insn
, to_delete
);
43173 else if (insn_is_store_p (insn
) && insn_is_swap_p (insn
))
43174 recombine_stvx_pattern (insn
, to_delete
);
43177 /* Turning swaps into copies is delayed until now, to avoid problems
43178 with deleting instructions during the insn walk. */
43179 for (i
= 0; i
< num_insns
; i
++)
43180 if (to_delete
[i
].replace
)
43182 rtx swap_body
= PATTERN (to_delete
[i
].replace_insn
);
43183 rtx src_reg
= XEXP (SET_SRC (swap_body
), 0);
43184 rtx copy
= gen_rtx_SET (SET_DEST (swap_body
), src_reg
);
43185 rtx_insn
*new_insn
= emit_insn_before (copy
,
43186 to_delete
[i
].replace_insn
);
43187 set_block_for_insn (new_insn
,
43188 BLOCK_FOR_INSN (to_delete
[i
].replace_insn
));
43189 df_insn_rescan (new_insn
);
43190 df_insn_delete (to_delete
[i
].replace_insn
);
43191 remove_insn (to_delete
[i
].replace_insn
);
43192 to_delete
[i
].replace_insn
->set_deleted ();
43198 /* Main entry point for this pass. */
43200 rs6000_analyze_swaps (function
*fun
)
43202 swap_web_entry
*insn_entry
;
43204 rtx_insn
*insn
, *curr_insn
= 0;
43206 /* Dataflow analysis for use-def chains. */
43207 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
43208 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
43210 df_set_flags (DF_DEFER_INSN_RESCAN
);
43212 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
43213 recombine_lvx_stvx_patterns (fun
);
43215 /* Allocate structure to represent webs of insns. */
43216 insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
43218 /* Walk the insns to gather basic data. */
43219 FOR_ALL_BB_FN (bb
, fun
)
43220 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
43222 unsigned int uid
= INSN_UID (insn
);
43223 if (NONDEBUG_INSN_P (insn
))
43225 insn_entry
[uid
].insn
= insn
;
43227 if (GET_CODE (insn
) == CALL_INSN
)
43228 insn_entry
[uid
].is_call
= 1;
43230 /* Walk the uses and defs to see if we mention vector regs.
43231 Record any constraints on optimization of such mentions. */
43232 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43234 FOR_EACH_INSN_INFO_USE (mention
, insn_info
)
43236 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43237 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
43239 /* If a use gets its value from a call insn, it will be
43240 a hard register and will look like (reg:V4SI 3 3).
43241 The df analysis creates two mentions for GPR3 and GPR4,
43242 both DImode. We must recognize this and treat it as a
43243 vector mention to ensure the call is unioned with this
43245 if (mode
== DImode
&& DF_REF_INSN_INFO (mention
))
43247 rtx feeder
= DF_REF_INSN (mention
);
43248 /* FIXME: It is pretty hard to get from the df mention
43249 to the mode of the use in the insn. We arbitrarily
43250 pick a vector mode here, even though the use might
43251 be a real DImode. We can be too conservative
43252 (create a web larger than necessary) because of
43253 this, so consider eventually fixing this. */
43254 if (GET_CODE (feeder
) == CALL_INSN
)
43258 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
43260 insn_entry
[uid
].is_relevant
= 1;
43261 if (mode
== TImode
|| mode
== V1TImode
43262 || FLOAT128_VECTOR_P (mode
))
43263 insn_entry
[uid
].is_128_int
= 1;
43264 if (DF_REF_INSN_INFO (mention
))
43265 insn_entry
[uid
].contains_subreg
43266 = !rtx_equal_p (DF_REF_REG (mention
),
43267 DF_REF_REAL_REG (mention
));
43268 union_defs (insn_entry
, insn
, mention
);
43271 FOR_EACH_INSN_INFO_DEF (mention
, insn_info
)
43273 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43274 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
43276 /* If we're loading up a hard vector register for a call,
43277 it looks like (set (reg:V4SI 9 9) (...)). The df
43278 analysis creates two mentions for GPR9 and GPR10, both
43279 DImode. So relying on the mode from the mentions
43280 isn't sufficient to ensure we union the call into the
43281 web with the parameter setup code. */
43282 if (mode
== DImode
&& GET_CODE (insn
) == SET
43283 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn
))))
43284 mode
= GET_MODE (SET_DEST (insn
));
43286 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
43288 insn_entry
[uid
].is_relevant
= 1;
43289 if (mode
== TImode
|| mode
== V1TImode
43290 || FLOAT128_VECTOR_P (mode
))
43291 insn_entry
[uid
].is_128_int
= 1;
43292 if (DF_REF_INSN_INFO (mention
))
43293 insn_entry
[uid
].contains_subreg
43294 = !rtx_equal_p (DF_REF_REG (mention
),
43295 DF_REF_REAL_REG (mention
));
43296 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
43297 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention
)))
43298 insn_entry
[uid
].is_live_out
= 1;
43299 union_uses (insn_entry
, insn
, mention
);
43303 if (insn_entry
[uid
].is_relevant
)
43305 /* Determine if this is a load or store. */
43306 insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
43307 insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
43309 /* Determine if this is a doubleword swap. If not,
43310 determine whether it can legally be swapped. */
43311 if (insn_is_swap_p (insn
))
43312 insn_entry
[uid
].is_swap
= 1;
43315 unsigned int special
= SH_NONE
;
43316 insn_entry
[uid
].is_swappable
43317 = insn_is_swappable_p (insn_entry
, insn
, &special
);
43318 if (special
!= SH_NONE
&& insn_entry
[uid
].contains_subreg
)
43319 insn_entry
[uid
].is_swappable
= 0;
43320 else if (special
!= SH_NONE
)
43321 insn_entry
[uid
].special_handling
= special
;
43322 else if (insn_entry
[uid
].contains_subreg
)
43323 insn_entry
[uid
].special_handling
= SH_SUBREG
;
43331 fprintf (dump_file
, "\nSwap insn entry table when first built\n");
43332 dump_swap_insn_table (insn_entry
);
43335 /* Record unoptimizable webs. */
43336 unsigned e
= get_max_uid (), i
;
43337 for (i
= 0; i
< e
; ++i
)
43339 if (!insn_entry
[i
].is_relevant
)
43342 swap_web_entry
*root
43343 = (swap_web_entry
*)(&insn_entry
[i
])->unionfind_root ();
43345 if (insn_entry
[i
].is_live_in
|| insn_entry
[i
].is_live_out
43346 || (insn_entry
[i
].contains_subreg
43347 && insn_entry
[i
].special_handling
!= SH_SUBREG
)
43348 || insn_entry
[i
].is_128_int
|| insn_entry
[i
].is_call
43349 || !(insn_entry
[i
].is_swappable
|| insn_entry
[i
].is_swap
))
43350 root
->web_not_optimizable
= 1;
43352 /* If we have loads or stores that aren't permuting then the
43353 optimization isn't appropriate. */
43354 else if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
43355 && !insn_entry
[i
].is_swap
&& !insn_entry
[i
].is_swappable
)
43356 root
->web_not_optimizable
= 1;
43358 /* If we have permuting loads or stores that are not accompanied
43359 by a register swap, the optimization isn't appropriate. */
43360 else if (insn_entry
[i
].is_load
&& insn_entry
[i
].is_swap
)
43362 rtx insn
= insn_entry
[i
].insn
;
43363 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43366 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
43368 struct df_link
*link
= DF_REF_CHAIN (def
);
43370 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_LOADS
))
43372 root
->web_not_optimizable
= 1;
43377 else if (insn_entry
[i
].is_store
&& insn_entry
[i
].is_swap
)
43379 rtx insn
= insn_entry
[i
].insn
;
43380 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43383 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
43385 struct df_link
*link
= DF_REF_CHAIN (use
);
43387 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_STORES
))
43389 root
->web_not_optimizable
= 1;
43398 fprintf (dump_file
, "\nSwap insn entry table after web analysis\n");
43399 dump_swap_insn_table (insn_entry
);
43402 /* For each load and store in an optimizable web (which implies
43403 the loads and stores are permuting), find the associated
43404 register swaps and mark them for removal. Due to various
43405 optimizations we may mark the same swap more than once. Also
43406 perform special handling for swappable insns that require it. */
43407 for (i
= 0; i
< e
; ++i
)
43408 if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
43409 && insn_entry
[i
].is_swap
)
43411 swap_web_entry
* root_entry
43412 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
43413 if (!root_entry
->web_not_optimizable
)
43414 mark_swaps_for_removal (insn_entry
, i
);
43416 else if (insn_entry
[i
].is_swappable
&& insn_entry
[i
].special_handling
)
43418 swap_web_entry
* root_entry
43419 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
43420 if (!root_entry
->web_not_optimizable
)
43421 handle_special_swappables (insn_entry
, i
);
43424 /* Now delete the swaps marked for removal. */
43425 for (i
= 0; i
< e
; ++i
)
43426 if (insn_entry
[i
].will_delete
)
43427 replace_swap_with_copy (insn_entry
, i
);
43434 const pass_data pass_data_analyze_swaps
=
43436 RTL_PASS
, /* type */
43437 "swaps", /* name */
43438 OPTGROUP_NONE
, /* optinfo_flags */
43439 TV_NONE
, /* tv_id */
43440 0, /* properties_required */
43441 0, /* properties_provided */
43442 0, /* properties_destroyed */
43443 0, /* todo_flags_start */
43444 TODO_df_finish
, /* todo_flags_finish */
43447 class pass_analyze_swaps
: public rtl_opt_pass
43450 pass_analyze_swaps(gcc::context
*ctxt
)
43451 : rtl_opt_pass(pass_data_analyze_swaps
, ctxt
)
43454 /* opt_pass methods: */
43455 virtual bool gate (function
*)
43457 return (optimize
> 0 && !BYTES_BIG_ENDIAN
&& TARGET_VSX
43458 && !TARGET_P9_VECTOR
&& rs6000_optimize_swaps
);
43461 virtual unsigned int execute (function
*fun
)
43463 return rs6000_analyze_swaps (fun
);
43468 return new pass_analyze_swaps (m_ctxt
);
43471 }; // class pass_analyze_swaps
43474 make_pass_analyze_swaps (gcc::context
*ctxt
)
43476 return new pass_analyze_swaps (ctxt
);
43479 #ifdef RS6000_GLIBC_ATOMIC_FENV
43480 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
43481 static tree atomic_hold_decl
, atomic_clear_decl
, atomic_update_decl
;
43484 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
43487 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
43489 if (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
)
43491 #ifdef RS6000_GLIBC_ATOMIC_FENV
43492 if (atomic_hold_decl
== NULL_TREE
)
43495 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43496 get_identifier ("__atomic_feholdexcept"),
43497 build_function_type_list (void_type_node
,
43498 double_ptr_type_node
,
43500 TREE_PUBLIC (atomic_hold_decl
) = 1;
43501 DECL_EXTERNAL (atomic_hold_decl
) = 1;
43504 if (atomic_clear_decl
== NULL_TREE
)
43507 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43508 get_identifier ("__atomic_feclearexcept"),
43509 build_function_type_list (void_type_node
,
43511 TREE_PUBLIC (atomic_clear_decl
) = 1;
43512 DECL_EXTERNAL (atomic_clear_decl
) = 1;
43515 tree const_double
= build_qualified_type (double_type_node
,
43517 tree const_double_ptr
= build_pointer_type (const_double
);
43518 if (atomic_update_decl
== NULL_TREE
)
43521 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43522 get_identifier ("__atomic_feupdateenv"),
43523 build_function_type_list (void_type_node
,
43526 TREE_PUBLIC (atomic_update_decl
) = 1;
43527 DECL_EXTERNAL (atomic_update_decl
) = 1;
43530 tree fenv_var
= create_tmp_var_raw (double_type_node
);
43531 TREE_ADDRESSABLE (fenv_var
) = 1;
43532 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
, fenv_var
);
43534 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
43535 *clear
= build_call_expr (atomic_clear_decl
, 0);
43536 *update
= build_call_expr (atomic_update_decl
, 1,
43537 fold_convert (const_double_ptr
, fenv_addr
));
43542 tree mffs
= rs6000_builtin_decls
[RS6000_BUILTIN_MFFS
];
43543 tree mtfsf
= rs6000_builtin_decls
[RS6000_BUILTIN_MTFSF
];
43544 tree call_mffs
= build_call_expr (mffs
, 0);
43546 /* Generates the equivalent of feholdexcept (&fenv_var)
43548 *fenv_var = __builtin_mffs ();
43550 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43551 __builtin_mtfsf (0xff, fenv_hold); */
43553 /* Mask to clear everything except for the rounding modes and non-IEEE
43554 arithmetic flag. */
43555 const unsigned HOST_WIDE_INT hold_exception_mask
=
43556 HOST_WIDE_INT_C (0xffffffff00000007);
43558 tree fenv_var
= create_tmp_var_raw (double_type_node
);
43560 tree hold_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_var
, call_mffs
);
43562 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
43563 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
43564 build_int_cst (uint64_type_node
,
43565 hold_exception_mask
));
43567 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43570 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
43571 build_int_cst (unsigned_type_node
, 0xff),
43574 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
43576 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43578 double fenv_clear = __builtin_mffs ();
43579 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43580 __builtin_mtfsf (0xff, fenv_clear); */
43582 /* Mask to clear everything except for the rounding modes and non-IEEE
43583 arithmetic flag. */
43584 const unsigned HOST_WIDE_INT clear_exception_mask
=
43585 HOST_WIDE_INT_C (0xffffffff00000000);
43587 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
43589 tree clear_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_clear
, call_mffs
);
43591 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
43592 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
43594 build_int_cst (uint64_type_node
,
43595 clear_exception_mask
));
43597 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43598 fenv_clear_llu_and
);
43600 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
43601 build_int_cst (unsigned_type_node
, 0xff),
43604 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
43606 /* Generates the equivalent of feupdateenv (&fenv_var)
43608 double old_fenv = __builtin_mffs ();
43609 double fenv_update;
43610 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43611 (*(uint64_t*)fenv_var 0x1ff80fff);
43612 __builtin_mtfsf (0xff, fenv_update); */
43614 const unsigned HOST_WIDE_INT update_exception_mask
=
43615 HOST_WIDE_INT_C (0xffffffff1fffff00);
43616 const unsigned HOST_WIDE_INT new_exception_mask
=
43617 HOST_WIDE_INT_C (0x1ff80fff);
43619 tree old_fenv
= create_tmp_var_raw (double_type_node
);
43620 tree update_mffs
= build2 (MODIFY_EXPR
, void_type_node
, old_fenv
, call_mffs
);
43622 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
43623 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
43624 build_int_cst (uint64_type_node
,
43625 update_exception_mask
));
43627 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
43628 build_int_cst (uint64_type_node
,
43629 new_exception_mask
));
43631 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
43632 old_llu_and
, new_llu_and
);
43634 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43637 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
43638 build_int_cst (unsigned_type_node
, 0xff),
43639 fenv_update_mtfsf
);
43641 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
43644 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
43647 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
43648 optimization_type opt_type
)
43653 return (opt_type
== OPTIMIZE_FOR_SPEED
43654 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
43661 struct gcc_target targetm
= TARGET_INITIALIZER
;
43663 #include "gt-powerpcspe.h"